Exemplo n.º 1
0
def bootstrap():

    #dataset = Processing().import_data()
    for dataset, filename in Processing().import_single_data():
        print(filename)
        training_data_X, training_data_y, testing_data_X, testing_data_y = Processing(
        ).separate_data(dataset)

        # print('train shape', training_data_X.shape)
        training_data_X = training_data_X.tolist()
        training_data_y = training_data_y.tolist()

        from PyOptimize.General_Opt import Test_function

        def LTR(a, **kwargs):
            return Test_function().LTR(a, **kwargs)

        ga = pyGaft(objfunc=LTR,
                    var_bounds=[(-2, 2)] * 20,
                    individual_size=50,
                    max_iter=10,
                    max_or_min='max',
                    X=training_data_X,
                    y=training_data_y).run()

        importlib.reload(best_fit)

        a, fitness_value = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
        print('a = {0}'.format(a))
        pred_y = []
        for test_x in testing_data_X:
            pred_y.append(np.dot(test_x, a))

        fpa = PerformanceMeasure(testing_data_y, pred_y).FPA()

        print('fpa = {0}'.format(fpa))
Exemplo n.º 2
0
def bootstrap(dataset):

    count = 0
    training_data_X, training_data_y, testing_data_X, testing_data_y = Processing(
    ).separate_data(dataset)

    # cost sensitive ranking SVM
    csrs = RankSVM()
    P, r = csrs.transform_pairwise(training_data_X, training_data_y)
    P = P.tolist()
    r = r.tolist()
    u, n = PerformanceMeasure(training_data_y).calc_UN(type='cs')

    count += 1
    global Loss
    csga = pyGaft(objfunc=Loss,
                  var_bounds=[(-1, 1)] * 20,
                  individual_size=500,
                  max_iter=2,
                  max_or_min='min',
                  P=P,
                  r=r,
                  u=u,
                  n=n).run()

    if count == 1:
        import best_fit
    else:
        importlib.reload(best_fit)

    w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    #print('w = ', w)
    csrs_pred_y = RankSVM(w=w).predict3(testing_data_X)
    csrs_fpa = PerformanceMeasure(testing_data_y, csrs_pred_y).FPA()
    csrs_fpa_list.append(csrs_fpa)

    csrs_pofb = PerformanceMeasure(testing_data_y, csrs_pred_y).PofB20()
    csrs_pofd = PerformanceMeasure(testing_data_y, csrs_pred_y).PofD20()
    csrs_ranking = PerformanceMeasure(testing_data_y, csrs_pred_y).ranking()
    csrs_pofb_list.append(csrs_pofb)
    csrs_pofd_list.append(csrs_pofd)
    csrs_ranking_list.append(csrs_ranking)

    # IR SVM
    u, n = PerformanceMeasure(training_data_y).calc_UN(type='ir')

    count += 1
    irga = pyGaft(objfunc=Loss,
                  var_bounds=[(-1, 1)] * 20,
                  individual_size=500,
                  max_iter=2,
                  max_or_min='min',
                  P=P,
                  r=r,
                  u=u,
                  n=n).run()
    if count == 1:
        import best_fit
    else:
        importlib.reload(best_fit)

    w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    #print('w = ', w)
    irsvm_pred_y = RankSVM(w=w).predict3(testing_data_X)
    irsvm_fpa = PerformanceMeasure(testing_data_y, irsvm_pred_y).FPA()
    #print('irsvm_fpa:', irsvm_fpa)
    irsvm_fpa_list.append(irsvm_fpa)

    irsvm_pofb = PerformanceMeasure(testing_data_y, irsvm_pred_y).PofB20()
    irsvm_pofd = PerformanceMeasure(testing_data_y, irsvm_pred_y).PofD20()
    irsvm_ranking = PerformanceMeasure(testing_data_y, irsvm_pred_y).ranking()
    irsvm_pofb_list.append(irsvm_pofb)
    irsvm_pofd_list.append(irsvm_pofd)
    irsvm_ranking_list.append(irsvm_ranking)

    # 这里还要加个去掉另一个参数的
    u, n = PerformanceMeasure(training_data_y).calc_UN(type='svm')

    count += 1
    irga = pyGaft(objfunc=Loss,
                  var_bounds=[(-1, 1)] * 20,
                  individual_size=500,
                  max_iter=2,
                  max_or_min='min',
                  P=P,
                  r=r,
                  u=u,
                  n=n).run()
    if count == 1:
        import best_fit
    else:
        importlib.reload(best_fit)

    w, fitness = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    #print('w = ', w)
    svm_pred_y = RankSVM(w=w).predict3(testing_data_X)
    svm_fpa = PerformanceMeasure(testing_data_y, svm_pred_y).FPA()
    #print('svm_fpa:', svm_fpa)
    svm_fpa_list.append(svm_fpa)

    svm_pofb = PerformanceMeasure(testing_data_y, svm_pred_y).PofB20()
    svm_pofd = PerformanceMeasure(testing_data_y, svm_pred_y).PofD20()
    svm_ranking = PerformanceMeasure(testing_data_y, svm_pred_y).ranking()
    svm_pofb_list.append(svm_pofb)
    svm_pofd_list.append(svm_pofd)
    svm_ranking_list.append(svm_ranking)

    # 这个是LTR
    training_datalist_X = training_data_X.tolist()
    training_datalist_y = training_data_y.tolist()

    from PyOptimize.General_Opt import Test_function

    count += 1
    global LTR
    ltrga = pyGaft(objfunc=LTR,
                   var_bounds=[(-20, 20)] * 20,
                   individual_size=100,
                   max_iter=2,
                   max_or_min='max',
                   X=training_datalist_X,
                   y=training_datalist_y).run()

    if count == 1:
        import best_fit
    else:
        importlib.reload(best_fit)

    a, fitness_value = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    #print('a = {0}'.format(a))
    ltr_pred_y = []
    for test_x in testing_data_X:
        ltr_pred_y.append(np.dot(test_x, a))
    ltr_fpa = PerformanceMeasure(testing_data_y, ltr_pred_y).FPA()
    # print('ltr_fpa', ltr_fpa)
    ltr_fpa_list.append(ltr_fpa)

    ltr_pofb = PerformanceMeasure(testing_data_y, ltr_pred_y).PofB20()
    ltr_pofd = PerformanceMeasure(testing_data_y, ltr_pred_y).PofD20()
    ltr_ranking = PerformanceMeasure(testing_data_y, ltr_pred_y).ranking()
    ltr_pofb_list.append(ltr_pofb)
    ltr_pofd_list.append(ltr_pofd)
    ltr_ranking_list.append(ltr_ranking)

    # 在原始数据集上训练Ranking SVM,DTR,LR,BRR

    # 这里加上了shuffle,是为了让r的值不全为1,全为1svm会报错
    shuf_X, shuf_y = shuffle(training_data_X, training_data_y)
    rs = RankSVM(C=1.0).fit(shuf_X, shuf_y)
    rs_pred_y = np.around(rs.predict2(testing_data_X))
    rs_fpa = PerformanceMeasure(testing_data_y, rs_pred_y).FPA()
    # print('rs_fpa:', rs_fpa)
    rs_fpa_list.append(rs_fpa)

    rs_pofb = PerformanceMeasure(testing_data_y, rs_pred_y).PofB20()
    rs_pofd = PerformanceMeasure(testing_data_y, rs_pred_y).PofD20()
    rs_ranking = PerformanceMeasure(testing_data_y, rs_pred_y).ranking()
    rs_pofb_list.append(rs_pofb)
    rs_pofd_list.append(rs_pofd)
    rs_ranking_list.append(rs_ranking)

    dtr = DecisionTreeRegressor().fit(training_data_X, training_data_y)
    dtr_pred_y = dtr.predict(testing_data_X)
    dtr_fpa = PerformanceMeasure(testing_data_y, dtr_pred_y).FPA()
    # print('dtr_fpa:', dtr_fpa)
    dtr_fpa_list.append(dtr_fpa)

    dtr_pofb = PerformanceMeasure(testing_data_y, dtr_pred_y).PofB20()
    dtr_pofd = PerformanceMeasure(testing_data_y, dtr_pred_y).PofD20()
    dtr_ranking = PerformanceMeasure(testing_data_y, dtr_pred_y).ranking()
    dtr_pofb_list.append(dtr_pofb)
    dtr_pofd_list.append(dtr_pofd)
    dtr_ranking_list.append(dtr_ranking)

    lr = linear_model.LinearRegression().fit(training_data_X, training_data_y)
    lr_pred_y = lr.predict(testing_data_X)
    lr_fpa = PerformanceMeasure(testing_data_y, lr_pred_y).FPA()
    # print('lr_fpa:', lr_fpa)
    lr_fpa_list.append(lr_fpa)

    lr_pofb = PerformanceMeasure(testing_data_y, lr_pred_y).PofB20()
    lr_pofd = PerformanceMeasure(testing_data_y, lr_pred_y).PofD20()
    lr_ranking = PerformanceMeasure(testing_data_y, lr_pred_y).ranking()
    lr_pofb_list.append(lr_pofb)
    lr_pofd_list.append(lr_pofd)
    lr_ranking_list.append(lr_ranking)

    brr = BayesianRidge().fit(training_data_X, training_data_y)
    brr_pred_y = brr.predict(testing_data_X)
    brr_fpa = PerformanceMeasure(testing_data_y, brr_pred_y).FPA()
    # print('brr_fpa:', brr_fpa)
    brr_fpa_list.append(brr_fpa)

    brr_pofb = PerformanceMeasure(testing_data_y, brr_pred_y).PofB20()
    brr_pofd = PerformanceMeasure(testing_data_y, brr_pred_y).PofD20()
    brr_ranking = PerformanceMeasure(testing_data_y, brr_pred_y).ranking()
    brr_pofb_list.append(brr_pofb)
    brr_pofd_list.append(brr_pofd)
    brr_ranking_list.append(brr_ranking)

    # 先对训练数据集进行RUS处理,然后训练Ranking SVM, DTR,LR,BRR
    rus_X, rus_y, _id = RandomUnderSampler(ratio=1.0,
                                           return_indices=True).fit_sample(
                                               training_data_X,
                                               training_data_y)

    # LTR
    training_datalist_X = rus_X.tolist()
    training_datalist_y = rus_y.tolist()

    from PyOptimize.General_Opt import Test_function
    count += 1
    rus_ltrga = pyGaft(objfunc=LTR,
                       var_bounds=[(-20, 20)] * 20,
                       individual_size=100,
                       max_iter=2,
                       max_or_min='max',
                       X=training_datalist_X,
                       y=training_datalist_y).run()

    if count == 1:
        import best_fit
    else:
        importlib.reload(best_fit)

    rus_a, fitness_value = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][-1]
    #print('rus_a = {0}'.format(rus_a))
    rus_ltr_pred_y = []
    for test_x in testing_data_X:
        rus_ltr_pred_y.append(np.dot(test_x, rus_a))
    rus_ltr_fpa = PerformanceMeasure(testing_data_y, rus_ltr_pred_y).FPA()
    # print('rus_ltr_fpa', rus_ltr_fpa)
    rus_ltr_fpa_list.append(rus_ltr_fpa)

    rus_ltr_pofb = PerformanceMeasure(testing_data_y, rus_ltr_pred_y).PofB20()
    rus_ltr_pofd = PerformanceMeasure(testing_data_y, rus_ltr_pred_y).PofD20()
    rus_ltr_ranking = PerformanceMeasure(testing_data_y,
                                         rus_ltr_pred_y).ranking()
    rus_ltr_pofb_list.append(rus_ltr_pofb)
    rus_ltr_pofd_list.append(rus_ltr_pofd)
    rus_ltr_ranking_list.append(rus_ltr_ranking)

    shuf_X, shuf_y = shuffle(rus_X, rus_y)
    rus_rs = RankSVM(C=1.0).fit(shuf_X, shuf_y)
    rus_rs_pred_y = rus_rs.predict2(testing_data_X)
    rus_rs_fpa = PerformanceMeasure(testing_data_y, rus_rs_pred_y).FPA()
    # print('rus_rs_fpa:', rus_rs_fpa)
    rus_rs_fpa_list.append(rus_rs_fpa)

    rus_rs_pofb = PerformanceMeasure(testing_data_y, rus_rs_pred_y).PofB20()
    rus_rs_pofd = PerformanceMeasure(testing_data_y, rus_rs_pred_y).PofD20()
    rus_rs_ranking = PerformanceMeasure(testing_data_y,
                                        rus_rs_pred_y).ranking()
    rus_rs_pofb_list.append(rus_rs_pofb)
    rus_rs_pofd_list.append(rus_rs_pofd)
    rus_rs_ranking_list.append(rus_rs_ranking)

    rus_dtr = DecisionTreeRegressor().fit(rus_X, rus_y)
    rus_dtr_pred_y = rus_dtr.predict(testing_data_X)
    rus_dtr_fpa = PerformanceMeasure(testing_data_y, rus_dtr_pred_y).FPA()
    # print('rus_dtr_fpa:', rus_dtr_fpa)
    rus_dtr_fpa_list.append(rus_dtr_fpa)

    rus_dtr_pofb = PerformanceMeasure(testing_data_y, rus_dtr_pred_y).PofB20()
    rus_dtr_pofd = PerformanceMeasure(testing_data_y, rus_dtr_pred_y).PofD20()
    rus_dtr_ranking = PerformanceMeasure(testing_data_y,
                                         rus_dtr_pred_y).ranking()
    rus_dtr_pofb_list.append(rus_dtr_pofb)
    rus_dtr_pofd_list.append(rus_dtr_pofd)
    rus_dtr_ranking_list.append(rus_dtr_ranking)

    rus_lr = linear_model.LinearRegression().fit(rus_X, rus_y)
    rus_lr_pred_y = rus_lr.predict(testing_data_X)
    rus_lr_fpa = PerformanceMeasure(testing_data_y, rus_lr_pred_y).FPA()
    # print('rus_lr_fpa:', rus_lr_fpa)
    rus_lr_fpa_list.append(rus_lr_fpa)

    rus_lr_pofb = PerformanceMeasure(testing_data_y, rus_lr_pred_y).PofB20()
    rus_lr_pofd = PerformanceMeasure(testing_data_y, rus_lr_pred_y).PofD20()
    rus_lr_ranking = PerformanceMeasure(testing_data_y,
                                        rus_lr_pred_y).ranking()
    rus_lr_pofb_list.append(rus_lr_pofb)
    rus_lr_pofd_list.append(rus_lr_pofd)
    rus_lr_ranking_list.append(rus_lr_ranking)

    rus_brr = BayesianRidge().fit(rus_X, rus_y)
    rus_brr_pred_y = rus_brr.predict(testing_data_X)
    rus_brr_fpa = PerformanceMeasure(testing_data_y, rus_brr_pred_y).FPA()
    # print('rus_brr_fpa:', rus_brr_fpa)
    rus_brr_fpa_list.append(rus_brr_fpa)

    rus_brr_pofb = PerformanceMeasure(testing_data_y, rus_brr_pred_y).PofB20()
    rus_brr_pofd = PerformanceMeasure(testing_data_y, rus_brr_pred_y).PofD20()
    rus_brr_ranking = PerformanceMeasure(testing_data_y,
                                         rus_brr_pred_y).ranking()
    rus_brr_pofb_list.append(rus_brr_pofb)
    rus_brr_pofd_list.append(rus_brr_pofd)
    rus_brr_ranking_list.append(rus_brr_ranking)

    # 先对训练数据集进行Smote处理,然后训练Ranking SVM, DTR,LR,BRR
    smote_X, smote_y = Smote(training_data_X, training_data_y, ratio=1.0,
                             k=5).over_sampling_addorginaldata()

    # LTR
    training_datalist_X = smote_X.tolist()
    training_datalist_y = smote_y.tolist()

    from PyOptimize.General_Opt import Test_function
    count += 1
    smote_ltrga = pyGaft(objfunc=LTR,
                         var_bounds=[(-20, 20)] * 20,
                         individual_size=100,
                         max_iter=2,
                         max_or_min='max',
                         X=training_datalist_X,
                         y=training_datalist_y).run()

    if count == 1:
        import best_fit
    else:
        importlib.reload(best_fit)

    smote_a, fitness_value = best_fit.best_fit[-1][-2], best_fit.best_fit[-1][
        -1]

    smote_ltr_pred_y = []
    for test_x in testing_data_X:
        smote_ltr_pred_y.append(np.dot(test_x, smote_a))
    smote_ltr_fpa = PerformanceMeasure(testing_data_y, smote_ltr_pred_y).FPA()

    smote_ltr_fpa_list.append(smote_ltr_fpa)

    smote_ltr_pofb = PerformanceMeasure(testing_data_y,
                                        smote_ltr_pred_y).PofB20()
    smote_ltr_pofd = PerformanceMeasure(testing_data_y,
                                        smote_ltr_pred_y).PofD20()
    smote_ltr_ranking = PerformanceMeasure(testing_data_y,
                                           smote_ltr_pred_y).ranking()
    smote_ltr_pofb_list.append(smote_ltr_pofb)
    smote_ltr_pofd_list.append(smote_ltr_pofd)
    smote_ltr_ranking_list.append(smote_ltr_ranking)

    shuf_X, shuf_y = shuffle(smote_X, smote_y)

    smote_rs = RankSVM(C=1.0).fit(shuf_X, shuf_y)
    smote_rs_pred_y = smote_rs.predict2(testing_data_X)
    smote_rs_fpa = PerformanceMeasure(testing_data_y, smote_rs_pred_y).FPA()
    smote_rs_fpa_list.append(smote_rs_fpa)
    smote_rs_pofb = PerformanceMeasure(testing_data_y,
                                       smote_rs_pred_y).PofB20()
    smote_rs_pofd = PerformanceMeasure(testing_data_y,
                                       smote_rs_pred_y).PofD20()
    smote_rs_ranking = PerformanceMeasure(testing_data_y,
                                          smote_rs_pred_y).ranking()
    smote_rs_pofb_list.append(smote_rs_pofb)
    smote_rs_pofd_list.append(smote_rs_pofd)
    smote_rs_ranking_list.append(smote_rs_ranking)

    smote_dtr = DecisionTreeRegressor().fit(smote_X, smote_y)
    smote_dtr_pred_y = smote_dtr.predict(testing_data_X)
    smote_dtr_fpa = PerformanceMeasure(testing_data_y, smote_dtr_pred_y).FPA()
    smote_dtr_fpa_list.append(smote_dtr_fpa)
    smote_dtr_pofb = PerformanceMeasure(testing_data_y,
                                        smote_dtr_pred_y).PofB20()
    smote_dtr_pofd = PerformanceMeasure(testing_data_y,
                                        smote_dtr_pred_y).PofD20()
    smote_dtr_ranking = PerformanceMeasure(testing_data_y,
                                           smote_dtr_pred_y).ranking()
    smote_dtr_pofb_list.append(smote_dtr_pofb)
    smote_dtr_pofd_list.append(smote_dtr_pofd)
    smote_dtr_ranking_list.append(smote_dtr_ranking)

    smote_lr = linear_model.LinearRegression().fit(smote_X, smote_y)
    smote_lr_pred_y = smote_lr.predict(testing_data_X)
    smote_lr_fpa = PerformanceMeasure(testing_data_y, smote_lr_pred_y).FPA()
    smote_lr_fpa_list.append(smote_lr_fpa)
    smote_lr_pofb = PerformanceMeasure(testing_data_y,
                                       smote_lr_pred_y).PofB20()
    smote_lr_pofd = PerformanceMeasure(testing_data_y,
                                       smote_lr_pred_y).PofD20()
    smote_lr_ranking = PerformanceMeasure(testing_data_y,
                                          smote_lr_pred_y).ranking()
    smote_lr_pofb_list.append(smote_lr_pofb)
    smote_lr_pofd_list.append(smote_lr_pofd)
    smote_lr_rankig_list.append(smote_lr_rankig)

    smote_brr = BayesianRidge().fit(smote_X, smote_y)
    smote_brr_pred_y = smote_brr.predict(testing_data_X)
    smote_brr_fpa = PerformanceMeasure(testing_data_y, smote_brr_pred_y).FPA()
    smote_brr_fpa_list.append(smote_brr_fpa)
    smote_brr_pofb = PerformanceMeasure(testing_data_y,
                                        smote_brr_pred_y).PofB20()
    smote_brr_pofd = PerformanceMeasure(testing_data_y,
                                        smote_brr_pred_y).PofD20()
    smote_brr_ranking = PerformanceMeasure(testing_data_y,
                                           smote_brr_pred_y).ranking()
    smote_brr_pofb_list.append(smote_brr_pofb)
    smote_brr_pofd_list.append(smote_brr_pofd)
    smote_brr_ranking_list.append(smote_brr_ranking)