1# Stack the DataFrames on top of each other
2vertical_stack = pd.concat([survey_sub, survey_sub_last10], axis=0)
3
4# Place the DataFrames side by side
5horizontal_stack = pd.concat([survey_sub, survey_sub_last10], axis=1)
6
1# Joins with another DataFrame
2
3df.join(df2, df.name == df2.name, 'outer').select(
4 df.name, df2.height).collect()
5# [Row(name=None, height=80), Row(name=u'Bob', height=85), Row(
6# name=u'Alice', height=None)]
7
8df.join(df2, 'name', 'outer').select('name', 'height').collect()
9# [Row(name=u'Tom', height=80), Row(name=u'Bob', height=85), Row(
10# name=u'Alice', height=None)]
11
12cond = [df.name == df3.name, df.age == df3.age]
13df.join(df3, cond, 'outer').select(df.name, df3.age).collect()
14# [Row(name=u'Alice', age=2), Row(name=u'Bob', age=5)]
15
16df.join(df2, 'name').select(df.name, df2.height).collect()
17# Row(name=u'Bob', height=85)]
18
19df.join(df4, ['name', 'age']).select(df.name, df.age).collect()
20# [Row(name=u'Bob', age=5)]
1>>> df1.merge(df2, left_on='lkey', right_on='rkey')
2 lkey value_x rkey value_y
30 foo 1 foo 5
41 foo 1 foo 8
52 foo 5 foo 5
63 foo 5 foo 8
74 bar 2 bar 6
85 baz 3 baz 7
9
1In [1]: df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
2 ...: 'B': ['B0', 'B1', 'B2', 'B3'],
3 ...: 'C': ['C0', 'C1', 'C2', 'C3'],
4 ...: 'D': ['D0', 'D1', 'D2', 'D3']},
5 ...: index=[0, 1, 2, 3])
6 ...:
7
8In [2]: df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
9 ...: 'B': ['B4', 'B5', 'B6', 'B7'],
10 ...: 'C': ['C4', 'C5', 'C6', 'C7'],
11 ...: 'D': ['D4', 'D5', 'D6', 'D7']},
12 ...: index=[4, 5, 6, 7])
13 ...:
14
15In [3]: df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
16 ...: 'B': ['B8', 'B9', 'B10', 'B11'],
17 ...: 'C': ['C8', 'C9', 'C10', 'C11'],
18 ...: 'D': ['D8', 'D9', 'D10', 'D11']},
19 ...: index=[8, 9, 10, 11])
20 ...:
21
22In [4]: frames = [df1, df2, df3]
23
24In [5]: result = pd.concat(frames)
25