1from sklearn.ensemble import RandomForestClassifier
2from numpy import genfromtxt, savetxt
3
4def main():
5 #create the training & test sets, skipping the header row with [1:]
6 dataset = genfromtxt(open('Data/train.csv','r'), delimiter=',', dtype='f8')[1:]
7 target = [x[0] for x in dataset]
8 train = [x[1:] for x in dataset]
9 test = genfromtxt(open('Data/test.csv','r'), delimiter=',', dtype='f8')[1:]
10
11 #create and train the random forest
12 #multi-core CPUs can use: rf = RandomForestClassifier(n_estimators=100, n_jobs=2)
13 rf = RandomForestClassifier(n_estimators=100)
14 rf.fit(train, target)
15
16 savetxt('Data/submission2.csv', rf.predict(test), delimiter=',', fmt='%f')
17
18if __name__=="__main__":
19 main()
20