
from sklearn import metrics
from sklearn import datasets
from sklearn.model_selection import KFold
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
data = datasets.make_classification(n_samples = 1000, n_features = 10)
kf = KFold(n_splits = 10)
for tr_ind, te_ind in kf.split(data[0]):
X_tr, X_te = data[0][tr_ind], data[0][te_ind]
Y_tr, Y_te = data[1][tr_ind], data[1][te_ind]
clfs = [GaussianNB(), SVC(C = 0.1, kernel = 'rbf', gamma = 0.1), RandomForestClassifier(n_estimators = 100)]
for clf in clfs:
clf.fit(X_tr, Y_tr)
pred = clf.predict(X_te)
print(str(clf))
print('Accuracy: ', metrics.accuracy_score(Y_te, pred))
print('F1_score: ', metrics.f1_score(Y_te, pred))
print('AUC ROC: ', metrics.roc_auc_score(Y_te, pred), '\n')
本文通过使用Scikit-learn库中的多种分类器如朴素贝叶斯、支持向量机及随机森林,在合成数据集上进行10折交叉验证,并展示了如何计算准确性、F1分数及AUCROC等评价指标。

1045

被折叠的 条评论
为什么被折叠?



