示例#1
0
def main():
    tr_data_arr, tr_label_arr = lds.load('./data_set/adult.data')
    pred_data_arr, pred_label_arr = lds.load('./data_set/adult.test')


    lr_st = time.time()
    lr_pred_data_len, lr_wrong, lr_accuracy = lr_predict.predict(
        tr_data_arr, tr_label_arr, pred_data_arr, pred_label_arr)
    lr_et = time.time()

    svm_st = time.time()
    svm_pred_data_len, svm_wrong, svm_accuracy = svm_predict.predict(
        tr_data_arr, tr_label_arr, pred_data_arr, pred_label_arr)
    svm_et = time.time()

    ada_st = time.time()
    ada_pred_data_len, ada_wrong, ada_accuracy = ada_predict.predict(
        tr_data_arr, tr_label_arr, pred_data_arr, pred_label_arr)
    ada_et = time.time()

    print '测试样本总数:', lr_pred_data_len
    print 'LR预测错误数:', lr_wrong
    print 'LR预测准确率:%s' % lr_accuracy, '%'
    print 'LR训练模型以及预测共耗时:%s秒' % (lr_et - lr_st)
    print '---------------------'
    print 'SVM预测错误数:', svm_wrong
    print 'SVM预测准确率:%s' % svm_accuracy, '%'
    print 'SVM训练模型以及预测共耗时:%s秒' % (svm_et - svm_st)
    print '---------------------'
    print 'AdaBoost预测错误数:', ada_wrong
    print 'AdaBoost预测准确率:%s' % ada_accuracy, '%'
    print 'AdaBoost训练模型以及预测共耗时:%s秒' % (ada_et - ada_st)
示例#2
0
def get_cv_data_file():
    """获取进行交叉验证的训练样本"""
    data_arr, label_arr = lds.load('./data_set/adult.data')

    with open('cv_data.smp', 'w+') as fp:
        for d_idx, data in enumerate(data_arr):
            feature_list = []
            for f_idx, feature in enumerate(data):
                feature_list.append('%s:%s' % (f_idx + 1, feature))
            feature_str = ' '.join(feature_list)
            fp.write('%s %s\n' % (label_arr[d_idx], feature_str))
示例#3
0
def test_adaboost_roc():
    """计算AdaBoost的ROC以及AUC"""
    from ada_boost.adaboost import ada_boost_train_ds, plotROC
    tr_data_arr, tr_label_arr = lds.load('./data_set/adult.data')
    data_arr = np.mat(tr_data_arr)
    f_label_arr = []
    for i in tr_label_arr:
        if i == 1:
            f_label_arr.append(i)
        else:
            f_label_arr.append(-1)
    classifier_arr, agg_class_est = ada_boost_train_ds(data_arr, f_label_arr, 30)
    plotROC(agg_class_est.T, tr_label_arr)
示例#4
0
        data_matrix.T * error就是目标函数
        每一次迭代得出的weights都会使得目标函数增长
        """
        weights += alpha * np.dot(data_matrix.T, error)
    return weights


def stoc_grad_ascent(data_matrix, class_labels, num_iter=150):
    """随机梯度上升"""
    data_matrix = np.array(data_matrix)
    class_labels = np.array(class_labels)
    m, n = data_matrix.shape
    """假如weights是作为参数传入的话,即可实现线上学习"""
    weights = np.ones(n)
    for j in xrange(num_iter):
        data_index = range(m)
        for i in xrange(m):
            alpha = 4 / (1.0 + j + i) + 0.01
            rand_index = int(random.uniform(0, len(data_index)))
            h = sigmoid(sum(data_matrix[rand_index] * weights))
            error = class_labels[rand_index] - h
            weights += alpha * error * data_matrix[rand_index]
            del(data_index[rand_index])
    return weights


if __name__ == '__main__':
    data_arr, label_mat = lds.load('./data_set/adult.data')
    w = grad_ascent(data_arr, label_mat)
    print('weights:', w)