1df_outer = pd.merge(df1, df2, on='id', how='outer') #here id is common column
2
3df_outer
4
1#suppose you have two dataframes df1 and df2, and
2#you need to merge them along the column id
3df_merge_col = pd.merge(df1, df2, on='id')
1# Joins with another DataFrame
2
3df.join(df2, df.name == df2.name, 'outer').select(
4 df.name, df2.height).collect()
5# [Row(name=None, height=80), Row(name=u'Bob', height=85), Row(
6# name=u'Alice', height=None)]
7
8df.join(df2, 'name', 'outer').select('name', 'height').collect()
9# [Row(name=u'Tom', height=80), Row(name=u'Bob', height=85), Row(
10# name=u'Alice', height=None)]
11
12cond = [df.name == df3.name, df.age == df3.age]
13df.join(df3, cond, 'outer').select(df.name, df3.age).collect()
14# [Row(name=u'Alice', age=2), Row(name=u'Bob', age=5)]
15
16df.join(df2, 'name').select(df.name, df2.height).collect()
17# Row(name=u'Bob', height=85)]
18
19df.join(df4, ['name', 'age']).select(df.name, df.age).collect()
20# [Row(name=u'Bob', age=5)]
1import pandas as pd
2import numpy as np
3
4df1 = pd.DataFrame({'fruit': ['apple', 'banana', 'orange'] * 3,
5 'weight': ['high', 'medium', 'low'] * 3,
6 'price': np.random.randint(0, 15, 9)})
7
8df2 = pd.DataFrame({'pazham': ['apple', 'orange', 'pine'] * 2,
9 'kilo': ['high', 'low'] * 3,
10 'price': np.random.randint(0, 15, 6)})
11out = df1.merge(df2,left_on=('fruit','weight'),right_on=('pazham','kilo'),how='inner',suffixes=('_left','_right')).head(10)