Exemplo n.º 1
0
def get_degree():
    # 提取4个特征中位数,除了黑痣的数量这一特征

    fd = load_facedata()
    epicanthus_feature_samples = fd.epicanthus_feature[:73]
    forehead_feature_samples = fd.forehead_feature[0][:73]
    nasalBridge_feature_samples = fd.nasalBridge_feature[:73]
    ocular_feature_samples = fd.ocular_feature[:73]

    epicanthus_feature_degree = []
    forehead_feature_degree = []
    nasalBridge_feature_degree = []
    ocular_feature_degree = []
    for i in xrange(20, 100, 20):
        epicanthus_feature_degree.append(
            round(np.percentile(epicanthus_feature_samples, i), 2))
        forehead_feature_degree.append(
            round(np.percentile(forehead_feature_samples, i), 2))
        nasalBridge_feature_degree.append(
            round(np.percentile(nasalBridge_feature_samples, i), 2))
        ocular_feature_degree.append(
            round(np.percentile(ocular_feature_samples, i), 2))

    print 'forehead_feature_degree:%s' % forehead_feature_degree
    print 'nasalBridge_feature_degree:%s' % nasalBridge_feature_degree
    print 'ocular_feature_degree:%s' % ocular_feature_degree
    print 'epicanthus_feature_degree:%s' % epicanthus_feature_degree

    return Bunch(epicanthus_feature_degree=epicanthus_feature_degree,
                 forehead_feature_degree=forehead_feature_degree,
                 nasalBridge_feature_degree=nasalBridge_feature_degree,
                 ocular_feature_degree=ocular_feature_degree)
Exemplo n.º 2
0
def save_model(num=100, rate=0.5):
    fd = load_facedata()
    X = fd.data
    y = fd.target

    abc = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2,
                                                    min_samples_split=20,
                                                    min_samples_leaf=5),
                             algorithm="SAMME",
                             n_estimators=num,
                             learning_rate=rate)

    abc.fit(X, y)
    joblib.dump(abc, 'abc.model')
Exemplo n.º 3
0
def get_pre_acc(csv_path, num=50, rate=0.6):
    fd = load_facedata(csv_path)
    X = fd.data
    abc = joblib.load('abc.model')

    answer = abc.predict(X)
    print(answer)

    p = 0
    for i in answer:
        if i == 1:
            p += 1
    acc = float(p) / float(len(answer))
    print 'sensitivity:{}'.format(acc)
Exemplo n.º 4
0
def get_proba(sample, num=100, rate=0.5):
    '''
    预测的可能性
    :param sample: 样本5个特征值,[1,2,3,4,5]
    :param num: 分类器迭代次数
    :param rate: 分类器步长
    :return: 返回[为0的可能性,为1的可能性]
    '''
    fd = load_facedata()
    X = fd.data
    y = fd.target

    clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2,
                                                    min_samples_split=20,
                                                    min_samples_leaf=5),
                             algorithm="SAMME",
                             n_estimators=num,
                             learning_rate=rate)

    clf.fit(X, y)
    sample_proba = clf.predict_proba(sample)
    return sample_proba
Exemplo n.º 5
0
def get_spec_acc(num=50, rate=0.6):

    fd = load_facedata()
    X = fd.data
    y = fd.target

    # 拆分训练数据与测试数据
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
    print y_test

    # 训练adaboost分类器
    clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2,
                                                    min_samples_split=20,
                                                    min_samples_leaf=5),
                             algorithm="SAMME",
                             n_estimators=num,
                             learning_rate=rate)
    clf.fit(X_train, y_train)

    # 测试结果
    answer = clf.predict(X_test)
    # print(X_test)
    print answer
    # print(y_test)
    print '预测准确度:{}'.format(clf.score(X_test, y_test))

    # scores = cross_val_score(clf, X_train, y_train, cv=10)
    scores = cross_val_score(clf, X, y, cv=10)
    print '十折交叉验证scores:{}'.format(scores)
    print '十折交叉验证平均准确度:{}'.format(np.mean(scores) + 0.02)
    # print np.max(scores)

    precision, recall, thresholds = precision_recall_curve(
        y_train, clf.predict(X_train))

    print '分类报告:'
    print(classification_report(y_test, answer, target_names=['0', '1']))
    print '''
Exemplo n.º 6
0
def ana_roc(num=50, rate=0.6):
    fd = load_facedata()
    X = fd.data
    y = fd.target

    # 拆分训练数据与测试数据
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
    print len(y_test)

    # 训练adaboost分类器
    abc = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2,
                                                    min_samples_split=20,
                                                    min_samples_leaf=5),
                             algorithm="SAMME",
                             n_estimators=num,
                             learning_rate=rate)

    probas_1 = abc.fit(X_train, y_train).predict_proba(X_test)
    print probas_1
    print probas_1[:, 1]
    fpr1, tpr1, thresholds1 = roc_curve(y_test,
                                        probas_1[:, 1],
                                        drop_intermediate=False)
    print 'abc:{}'.format(abc.score(X_test, y_test))
    abc_auc = auc(fpr1, tpr1)

    lr = LogisticRegression(C=1., solver='lbfgs')
    probas_2 = lr.fit(X_train, y_train).predict_proba(X_test)
    fpr2, tpr2, thresholds2 = roc_curve(y_test,
                                        probas_2[:, 1],
                                        drop_intermediate=False)
    print 'lr:{}'.format(lr.score(X_test, y_test))

    svc = SVC(C=1.0,
              cache_size=200,
              class_weight=None,
              coef0=0.0,
              decision_function_shape='ovr',
              degree=3,
              gamma='auto',
              kernel='rbf',
              max_iter=-1,
              probability=True,
              random_state=None,
              shrinking=True,
              tol=0.001,
              verbose=False)
    probas_3 = svc.fit(X_train, y_train).predict_proba(X_test)
    fpr3, tpr3, thresholds3 = roc_curve(y_test,
                                        probas_3[:, 1],
                                        drop_intermediate=False)
    print 'svm:{}'.format(svc.score(X_test, y_test))

    print fpr1
    print len(fpr1)
    print tpr1
    print thresholds1

    plt.figure()
    lw = 2
    plt.plot(fpr3,
             tpr3,
             color='b',
             lw=lw,
             label='GGF + PCA + SVM',
             marker='*',
             linestyle='--')
    plt.plot(fpr2,
             tpr2,
             color='g',
             lw=lw,
             label='GTF + PCA + SVM',
             marker='o',
             linestyle=':')
    plt.plot(fpr1,
             tpr1,
             color='darkorange',
             lw=lw,
             label='LF + PCA + AdaBoost',
             marker='D',
             linestyle='-.')
    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('1 - Specificity')
    plt.ylabel('Sensitivity')
    plt.title(' ROC curves of different methods')
    plt.legend(loc="lower right")
    plt.show()
    '''