gradient boosted trees 28gbts 29 learning algorithm for classification

Solutions on MaxInterview for gradient boosted trees 28gbts 29 learning algorithm for classification by the best coders in the world

showing results for - "gradient boosted trees 28gbts 29 learning algorithm for classification"
Lucia
17 Aug 2016
1# Gradient-Boosted Trees (GBTs) learning algorithm for classification
2
3from numpy import allclose
4from pyspark.ml.linalg import Vectors
5from pyspark.ml.feature import StringIndexer
6df = spark.createDataFrame([
7  (1.0, Vectors.dense(1.0)),
8  (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
9stringIndexer = StringIndexer(inputCol="label", outputCol="indexed")
10si_model = stringIndexer.fit(df)
11td = si_model.transform(df)
12gbt = GBTClassifier(maxIter=5, maxDepth=2, labelCol="indexed", seed=42)
13model = gbt.fit(td)
14model.featuresImportances
15SparseVector(1, {0: 1.0})
16allclose(model.treeWeights, [1.0, 0.1, 0.1, 0.1])
17# True
18test0 = spark.createDataFrame([Vectors.dense(-1.0),)], ["features"])
19model.transform(test0).head().prediction
20# 0.0
21test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
22model.transform(test1).head().prediction
23# 1.0
24model.totalNumNodes
25# 15
26print(model.toDebugString)
27# GBTClassificationModel (uid=...)...with 5 trees...
28gbtc_path = temp_path + "gbtc"
29gbt.save(gbtc_path)
30gbt2 = GBTClassifier.load(gbtc_path)
31gbt2.getMaxDepth()
32# 2
33model_path = temp_path + "gbtc_model"
34model.save(model_path)
35model2 = GBTClassificationModel.load(model_path)
36model.featureImportances == model2.featureImportances
37# True
38model.treeWeights == model2.treeWeights
39# True
40model.trees
41# [DecisionTreeRegressionModel (uid=...) of depth..., DecisionTreeRegressionModel...]