1import sklearn.metrics as metrics
2# calculate the fpr and tpr for all thresholds of the classification
3probs = model.predict_proba(X_test)
4preds = probs[:,1]
5fpr, tpr, threshold = metrics.roc_curve(y_test, preds)
6roc_auc = metrics.auc(fpr, tpr)
7
8# method I: plt
9import matplotlib.pyplot as plt
10plt.title('Receiver Operating Characteristic')
11plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
12plt.legend(loc = 'lower right')
13plt.plot([0, 1], [0, 1],'r--')
14plt.xlim([0, 1])
15plt.ylim([0, 1])
16plt.ylabel('True Positive Rate')
17plt.xlabel('False Positive Rate')
18plt.show()
19
20# method II: ggplot
21from ggplot import *
22df = pd.DataFrame(dict(fpr = fpr, tpr = tpr))
23ggplot(df, aes(x = 'fpr', y = 'tpr')) + geom_line() + geom_abline(linetype = 'dashed')
1#ROC curve code snippet from external source(Module notes)
2def draw_roc( actual, probs ):
3 fpr, tpr, thresholds = metrics.roc_curve( actual, probs,
4 drop_intermediate = False )
5 auc_score = metrics.roc_auc_score( actual, probs )
6 plt.figure(figsize=(5, 5))
7 plt.plot( fpr, tpr, label='ROC curve (area = %0.2f)' % auc_score )
8 plt.plot([0, 1], [0, 1], 'k--')
9 plt.xlim([0.0, 1.0])
10 plt.ylim([0.0, 1.05])
11 plt.xlabel('False Positive Rate or [1 - True Negative Rate]')
12 plt.ylabel('True Positive Rate')
13 plt.title('Receiver operating characteristic example')
14 plt.legend(loc="lower right")
15 plt.show()
16
17 return None
18
19fpr, tpr, thresholds = metrics.roc_curve( y_train_pred_final.Converted, y_train_pred_final.Converted_prob, drop_intermediate = False )
20
21draw_roc(y_train_pred_final.Converted, y_train_pred_final.Converted_prob)
1y_pred_logreg_proba = classifier_logreg.predict_proba(X_test)
2from sklearn.metrics import roc_curve
3fpr, tpr, thresholds = roc_curve(y_test, y_pred_logreg_proba[:,1])
4plt.figure(figsize=(6,4))
5plt.plot(fpr,tpr,'-g',linewidth=1)
6plt.plot([0,1], [0,1], 'k--' )
7plt.title('ROC curve for Logistic Regression Model')
8plt.xlabel("False Positive Rate")
9plt.ylabel('True Positive Rate')
10plt.show()
1# Now let's calculate accuracy sensitivity and specificity for various probability cutoffs.
2cutoff_df = pd.DataFrame( columns = ['prob','accuracy','sensi','speci'])
3from sklearn.metrics import confusion_matrix
4
5# TP = confusion[1,1] # true positive
6# TN = confusion[0,0] # true negatives
7# FP = confusion[0,1] # false positives
8# FN = confusion[1,0] # false negatives
9
10num = [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
11for i in num:
12 cm1 = metrics.confusion_matrix(y_train_pred_final.Churn, y_train_pred_final[i] )
13 total1=sum(sum(cm1))
14 accuracy = (cm1[0,0]+cm1[1,1])/total1
15
16 speci = cm1[0,0]/(cm1[0,0]+cm1[0,1])
17 sensi = cm1[1,1]/(cm1[1,0]+cm1[1,1])
18 cutoff_df.loc[i] =[ i ,accuracy,sensi,speci]
19print(cutoff_df)