1x = [(0,18.0),(1,19.0),(2,8.0),(3,5.0),(4,2.2),(5,4.0)]
2d = spark.createDataFrame(x,["id","hour"])
3d.show()
4from pyspark.ml.feature import Bucketizer
5splits = [0,1,2,3,4,float("Inf")]
6buck = Bucketizer(splits=splits,inputCol="BATHSTOTAL",outputCol="baths")
7df = buck.transform(df)
8df[["BATHSTOTAL","baths"]].show()