1
2rdd2=rdd.flatMap(lambda x: x.split(" "))
3for element in rdd2.collect():
4 print(element)
5
1
2from pyspark.sql import SparkSession
3spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()
4
5data = ["Project Gutenberg’s",
6 "Alice’s Adventures in Wonderland",
7 "Project Gutenberg’s",
8 "Adventures in Wonderland",
9 "Project Gutenberg’s"]
10rdd=spark.sparkContext.parallelize(data)
11for element in rdd.collect():
12 print(element)
13
14#Flatmap
15rdd2=rdd.flatMap(lambda x: x.split(" "))
16for element in rdd2.collect():
17 print(element)
18
1
2import pyspark
3from pyspark.sql import SparkSession
4spark = SparkSession.builder.appName('pyspark-by-examples').getOrCreate()
5
6arrayData = [
7 ('James',['Java','Scala'],{'hair':'black','eye':'brown'}),
8 ('Michael',['Spark','Java',None],{'hair':'brown','eye':None}),
9 ('Robert',['CSharp',''],{'hair':'red','eye':''}),
10 ('Washington',None,None),
11 ('Jefferson',['1','2'],{})
12df = spark.createDataFrame(data=arrayData, schema = ['name','knownLanguages','properties'])
13
14from pyspark.sql.functions import explode
15df2 = df.select(df.name,explode(df.knownLanguages))
16df2.printSchema()
17df2.show()
18
1
2data = ["Project Gutenberg’s",
3 "Alice’s Adventures in Wonderland",
4 "Project Gutenberg’s",
5 "Adventures in Wonderland",
6 "Project Gutenberg’s"]
7rdd=spark.sparkContext.parallelize(data)
8for element in rdd.collect():
9 print(element)
10