示例#1
0
def run_voting(training_set, train_set_labels, validation_set,
               validation_set_labels):
    from sklearn.ensemble import VotingClassifier
    standard_train_inputs = standard_data(training_set)
    standard_valid_inputs = standard_data(validation_set)
    kknn_class = KNeighborsClassifier(weights='uniform', n_neighbors=5)

    logistic_regression_solver = sklearn.linear_model.LogisticRegression(
        penalty='l2',
        dual=False,
        tol=0.01,
        C=1.0,
        fit_intercept=True,
        intercept_scaling=1,
        class_weight=None,
        random_state=None,
        solver='newton-cg',
        max_iter=100,
        multi_class='ovr',
        verbose=0,
        warm_start=False,
        n_jobs=2)
    svm_class = svm.SVC(decision_function_shape='ovo', tol=0.001)
    eclf1 = VotingClassifier(estimators=[('knn', kknn_class),
                                         ('lr', logistic_regression_solver),
                                         ('svm', svm_class)],
                             voting='hard')
    eclf1.fit(standard_train_inputs, train_set_labels.ravel())

    accuracy = eclf1.score(standard_valid_inputs,
                           validation_set_labels.ravel())
    print accuracy
def run_voting(training_set, train_set_labels, validation_set, validation_set_labels):
    from sklearn.ensemble import VotingClassifier
    standard_train_inputs = standard_data(training_set)
    standard_valid_inputs = standard_data(validation_set)
    kknn_class = KNeighborsClassifier(weights='uniform', n_neighbors=5)

    logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.01, C=1.0, fit_intercept=True,
                                                                         intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg',
                                                                         max_iter=100, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2)
    svm_class = svm.SVC(decision_function_shape='ovo', tol=0.001)
    eclf1 = VotingClassifier(estimators=[('knn', kknn_class), ('lr', logistic_regression_solver), ('svm', svm_class)], voting='hard')
    eclf1.fit(standard_train_inputs,train_set_labels.ravel())

    accuracy = eclf1.score(standard_valid_inputs,validation_set_labels.ravel())
    print accuracy
def logistic_regression(training_inputs, training_labels, valid_inputs, valid_label, pre=True):
    logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.008, C=1.2, fit_intercept=True,
                                                                         intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg',
                                                                         max_iter=150, multi_class='ovr', verbose=0, warm_start=False, n_jobs=1)

    if pre:
        standard_train_inputs = standard_data(training_inputs)
        standard_valid_inputs = standard_data(valid_inputs)
    else:
        standard_train_inputs = training_inputs
        standard_valid_inputs = valid_inputs

    fl = logistic_regression_solver.fit(standard_train_inputs, training_labels.ravel())
    res_f = open('trained_lr.dump', 'w')
    pickle.dump(fl,res_f )
    res_f.close()

    accuracy = fl.score(standard_valid_inputs, np.ravel(valid_label))
    print 'the accuracy for logistic regression is:',accuracy
    return accuracy
示例#4
0
def make_data_for_prepro():
    accuracys = []
    training_sett, train_set_labelts, validation_set, validation_set_labels = LoadData(
        'labeled_images.mat', True, True)
    # training_set, train_set_labels, idst = LoadData('labeled_images.mat', True, False)
    # kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=5)
    # logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.001, C=1.2, fit_intercept=True,
    #                                                                          intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg',
    #                                                                          max_iter=200, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2)
    # svm_class = svm.SVC(kernel='rbf', C=50, shrinking = False,decision_function_shape='ovr', tol=0.001, max_iter=-1)

    standard_train_inputs = standard_data(training_sett)
    standard_valid_inputs = standard_data(validation_set)

    fixed_train_set = fix_pixels(training_sett)
    fixed_valid = fix_pixels(validation_set)

    # garbored_train_set = gabor_filter(training_sett)
    # garbored_valid_set = gabor_filter(validation_set)

    data_list = [(training_sett, validation_set),
                 (standard_train_inputs, standard_valid_inputs),
                 (fixed_train_set, fixed_valid)
                 ]  #,(garbored_train_set,garbored_valid_set)]
    for (t, v) in data_list:

        # accuracys.append(knn(t, train_set_labelts, v, validation_set_labels, False))
        # accuracys.append(logistic_regression(t,train_set_labelts , v, validation_set_labels, False))
        # accuracys.append(run_svm(t, train_set_labelts, v, validation_set_labels, False))
        net_clf = net_class(t, train_set_labelts, v, validation_set_labels,
                            False)
        net_preds = []
        for in_data in v:
            net_preds.append(net_clf.activate(in_data))
        accuracys.append(get_acc(net_preds, validation_set_labels, True))
        print "done iter"

    create_csv(accuracys, 'barplot_pre_accuracy.csv')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    barplot_preprocess(ax, accuracys)
def make_data_for_barplot():
    accuracys = []
    training_set, train_set_labels, validation_set, validation_set_labels = LoadData('labeled_images.mat', True, True)
    # training_set, train_set_labels, idst = LoadData('labeled_images.mat', True, False)

    kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=5)
    logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.001, C=1.2, fit_intercept=True,
                                                                             intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg',
                                                                             max_iter=200, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2)
    svm_class = svm.SVC(kernel='rbf', C=50, shrinking = False,decision_function_shape='ovr', tol=0.001, max_iter=-1)

    standard_train_inputs = standard_data(training_set)
    standard_valid_inputs = standard_data(validation_set)

    fixed_train_set = fix_pixels(training_set)
    fixed_valid = fix_pixels(validation_set)


    accuracys.append(knn(training_sett, train_set_labels, validation_set, validation_set_labels))
    print"knn"
    accuracys.append(logistic_regression(training_sett, train_set_labels, validation_set, validation_set_labels))
    print"logistic_regression"
    accuracys.append(run_svm(training_sett, train_set_labels, validation_set, validation_set_labels))
    print"run_svm"

    accuracys.append( run_bagging(fixed_train_set, train_set_labels, kknn_class,fixed_valid, validation_set_labels, True))
    print" knn B"
    accuracys.append( run_bagging(standard_train_inputs, train_set_labels, logistic_regression_solver,standard_valid_inputs, validation_set_labels, True))
    print"logistic_regression  B"
    accuracys.append( run_bagging(fixed_train_set, train_set_labels, svm_class,fixed_valid, validation_set_labels, True))
    print"run_svm  B"

    create_csv(accuracys,'barplot_bagg_accuracy.csv')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    barplot_bagging(ax,accuracys)

    return accuracys
def make_data_for_prepro():
    accuracys = []
    training_sett, train_set_labelts, validation_set, validation_set_labels = LoadData('labeled_images.mat', True, True)
    # training_set, train_set_labels, idst = LoadData('labeled_images.mat', True, False)
    # kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=5)
    # logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.001, C=1.2, fit_intercept=True,
    #                                                                          intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg',
    #                                                                          max_iter=200, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2)
    # svm_class = svm.SVC(kernel='rbf', C=50, shrinking = False,decision_function_shape='ovr', tol=0.001, max_iter=-1)

    standard_train_inputs = standard_data(training_sett)
    standard_valid_inputs = standard_data(validation_set)

    fixed_train_set = fix_pixels(training_sett)
    fixed_valid = fix_pixels(validation_set)

    # garbored_train_set = gabor_filter(training_sett)
    # garbored_valid_set = gabor_filter(validation_set)

    data_list = [(training_sett,validation_set), (standard_train_inputs, standard_valid_inputs),
                 (fixed_train_set,fixed_valid)]#,(garbored_train_set,garbored_valid_set)]
    for (t,v) in data_list:

        # accuracys.append(knn(t, train_set_labelts, v, validation_set_labels, False))
        # accuracys.append(logistic_regression(t,train_set_labelts , v, validation_set_labels, False))
        # accuracys.append(run_svm(t, train_set_labelts, v, validation_set_labels, False))
        net_clf = net_class(t, train_set_labelts, v, validation_set_labels, False)
        net_preds =[]
        for in_data in v:
            net_preds.append(net_clf.activate(in_data))
        accuracys.append(get_acc(net_preds,validation_set_labels, True))
        print"done iter"

    create_csv(accuracys,'barplot_pre_accuracy.csv')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    barplot_preprocess(ax,accuracys)
示例#7
0
def logistic_regression(training_inputs,
                        training_labels,
                        valid_inputs,
                        valid_label,
                        pre=True):
    logistic_regression_solver = sklearn.linear_model.LogisticRegression(
        penalty='l2',
        dual=False,
        tol=0.008,
        C=1.2,
        fit_intercept=True,
        intercept_scaling=1,
        class_weight=None,
        random_state=None,
        solver='newton-cg',
        max_iter=150,
        multi_class='ovr',
        verbose=0,
        warm_start=False,
        n_jobs=1)

    if pre:
        standard_train_inputs = standard_data(training_inputs)
        standard_valid_inputs = standard_data(valid_inputs)
    else:
        standard_train_inputs = training_inputs
        standard_valid_inputs = valid_inputs

    fl = logistic_regression_solver.fit(standard_train_inputs,
                                        training_labels.ravel())
    res_f = open('trained_lr.dump', 'w')
    pickle.dump(fl, res_f)
    res_f.close()

    accuracy = fl.score(standard_valid_inputs, np.ravel(valid_label))
    print 'the accuracy for logistic regression is:', accuracy
    return accuracy
示例#8
0
def load_net_and_check_errorate(X,Y):

    res_f = open('bestNet.dump', 'r')
    nnet = pickle.load(res_f)
    nnet.sorted = False
    nnet.sortModules()
    vds = ClassificationDataSet(1024, 7, nb_classes=7)
    lX = standard_data(X)
    for vd, vt in zip(lX, Y):
        vtarr = [int(i==vt-1) for i in range(0,7)]
        vds.addSample(vd, vtarr)
    ttrainer = BackpropTrainer(nnet, vds, learningrate=0.005, momentum=0, weightdecay=0.05, batchlearning=False,verbose=True)
    ttstresult = percentError( ttrainer.testOnClassData(), Y )



    print " Classification rate for the trained Neural net is: %5.2f%%" % (100 - ttstresult)
    res_f.close()
    return ttrainer.testOnClassData()
示例#9
0
def load_net_and_check_errorate(X, Y):

    res_f = open('bestNet.dump', 'r')
    nnet = pickle.load(res_f)
    nnet.sorted = False
    nnet.sortModules()
    vds = ClassificationDataSet(1024, 7, nb_classes=7)
    lX = standard_data(X)
    for vd, vt in zip(lX, Y):
        vtarr = [int(i == vt - 1) for i in range(0, 7)]
        vds.addSample(vd, vtarr)
    ttrainer = BackpropTrainer(nnet,
                               vds,
                               learningrate=0.005,
                               momentum=0,
                               weightdecay=0.05,
                               batchlearning=False,
                               verbose=True)
    ttstresult = percentError(ttrainer.testOnClassData(), Y)

    print " Classification rate for the trained Neural net is: %5.2f%%" % (
        100 - ttstresult)
    res_f.close()
    return ttrainer.testOnClassData()
示例#10
0
def run_my_votin(training_set, train_set_labels, validation_set=None, validation_set_labels=None, train=True):
    from sklearn.ensemble import VotingClassifier
    from pybrain.datasets import ClassificationDataSet


    standard_valid_inputs = standard_data(validation_set)
    fixed_valid = fix_pixels(validation_set)
    equalize_and_standard_validation= standard_data(fixed_valid)
    if train:
        standard_train_inputs = standard_data(training_set)
        fixed_train_set = fix_pixels(training_set)
        equalize_and_standard = standard_data(fixed_train_set)

        kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=11)
        # kknn_class.fit(standard_train_inputs, train_set_labels.ravel())
        logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.01, C=1.0, fit_intercept=True,
                                                                             intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg',
                                                                             max_iter=200, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2)
        svm_class = svm.SVC(kernel='rbf', C=50, shrinking = False,decision_function_shape='ovr', tol=0.001, max_iter=-1)

        print"train knn"
        bg1 = run_bagging(fixed_train_set, train_set_labels, kknn_class, None, None, False)
        res_f = open('bg1knn.dump', 'w')
        pickle.dump(bg1,res_f )
        res_f.close()
        print "Knn done"
        print"train Logistic Regression"
        bg2 = run_bagging(standard_train_inputs, train_set_labels, logistic_regression_solver, None, None, False)
        res_f = open('bg2lr.dump', 'w')
        pickle.dump(bg2,res_f )
        res_f.close()
        print "done bg LR"
        print"train SVM"
        bg3 = run_bagging(equalize_and_standard, train_set_labels ,svm_class,  None, None, False)
        res_f = open('bg3svm.dump', 'w')
        pickle.dump(bg3,res_f )
        res_f.close()
        print "done bg svm"
        print"train Neural-Nets"
        net_clf = net_class(standard_train_inputs,train_set_labels, None, None, False)
        res_f = open('net.dump', 'w')
        pickle.dump(net_clf,res_f)
        res_f.close()
        print "nets done"
    else:
        print"Load knn"
        res_1 = open('bg1knn.dump', 'r')
        bg1 = pickle.load(res_1)
        res_1.close()
        print "knn done"
        print"Load LR"
        res_2 = open('bg2lr.dump', 'r')
        bg2 = pickle.load(res_2)
        res_2.close()
        print "LR done"
        print"Load SVM"
        res_3 = open('bg3svm.dump', 'r')
        bg3 = pickle.load(res_3)
        res_3.close()
        print "svm done"
        print"Load Neural-nets"
        res_4 = open('net.dump', 'r')
        net_clf = pickle.load(res_4)
        res_4.close()
        print "net done"

    preds_arr = []
    pred_weights = [0.1, 0.26,0.34]
    net_weight = 0.30

    preds_arr.append(bg1.predict_proba(fixed_valid))
    preds_arr.append(bg2.predict_proba(standard_valid_inputs))
    preds_arr.append(bg3.predict_proba(equalize_and_standard_validation))

    net_preds =[]
    for in_data in standard_valid_inputs:
        net_preds.append(net_clf.activate(in_data))

    # preds_arr.append(net_preds)
    fin_pred = []
    for i in range(len(standard_valid_inputs)):
        tmp_np = np.zeros(7)
        for w ,pp in zip(pred_weights, preds_arr):
            tmp_np += pp[i] * w
        tmp_np += net_preds[i] * net_weight

        fin_pred.append(tmp_np)

    fin_labels = [(np.argmax(ar, axis=0)+1) for ar in fin_pred]
    create_csv(fin_labels,'test_csv.csv')
    if validation_set_labels:
        fin_acc, err = get_acc(fin_labels, validation_set_labels)
        print 'The final accuracy after bagging and votig is :', fin_acc

    fin_one_of_k = []
    for c in fin_labels:
        carr = [int(i==c-1) for i in range(0,7)]
        fin_one_of_k.append(carr)
    return fin_one_of_k
示例#11
0
def net_class(ustraining_set,
              train_set_labels,
              usvalidation_set=None,
              validation_set_labels=None,
              pre=True):
    # print (validation_set_labels - 1)
    if pre:
        # ltraining_set = gabor_filter(ustraining_set)
        ltraining_set = standard_data(ustraining_set)
    else:
        ltraining_set = ustraining_set
    if not usvalidation_set == None:
        if pre:
            # lvalidation_set = gabor_filter(usvalidation_set)
            lvalidation_set = standard_data(usvalidation_set)
        else:
            lvalidation_set = usvalidation_set
        vds = ClassificationDataSet(1024, 7, nb_classes=7)
        for vd, vt in zip(lvalidation_set, validation_set_labels):
            vtarr = [int(i == vt - 1) for i in range(0, 7)]
            vds.addSample(vd, vtarr)
    # net = buildNetwork(1024, 100, 8,outclass=SoftmaxLayer)

    ds = ClassificationDataSet(1024, 7, nb_classes=7)
    for d, t in zip(ltraining_set, train_set_labels):
        tarr = [int(i == t - 1) for i in range(0, 7)]
        ds.addSample(d, tarr)

    tot_min_err = 100.0
    best_l = 0.0
    best_w = 0.0
    obest_e = 0
    for l in [0.005]:
        for w in [0.01]:
            net = buildNetwork(1024,
                               320,
                               7,
                               outclass=SoftmaxLayer,
                               hiddenclass=SigmoidLayer)
            net.sortModules()
            trainer = BackpropTrainer(net,
                                      ds,
                                      learningrate=l,
                                      momentum=0,
                                      weightdecay=w,
                                      batchlearning=False,
                                      verbose=True)
            cmin_err = 100.0
            flag = True
            best_e = 0
            e = 0

            flag = False
            trnresult = 100.0
            tstresult = 100.0
            for i in range(10):
                e += 1
                trainer.trainEpochs(1)

                trnresult = percentError(trainer.testOnClassData(),
                                         train_set_labels - 1)

                if not usvalidation_set == None:
                    tstresult = percentError(
                        trainer.testOnClassData(dataset=vds),
                        validation_set_labels - 1)
                    if cmin_err >= tstresult:
                        cmin_err = tstresult
                        print "copt err ", tstresult
                        best_e = e
                        flag = True
                    if tot_min_err > cmin_err:

                        tot_min_err = cmin_err

                        best_l = l
                        best_w = w
                        obest_e = best_e
                        print "new opt err:{}, for LR: {}, WD:{}, NE:{} ".format(
                            tot_min_err, best_l, best_w, obest_e)
    net.sorted = False
    net.sortModules()
    res_f = open('net.dump', 'w')
    pickle.dump(net, res_f)
    res_f.close()
    return net
示例#12
0
def make_data_for_barplot():
    accuracys = []
    training_set, train_set_labels, validation_set, validation_set_labels = LoadData(
        'labeled_images.mat', True, True)
    # training_set, train_set_labels, idst = LoadData('labeled_images.mat', True, False)

    kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=5)
    logistic_regression_solver = sklearn.linear_model.LogisticRegression(
        penalty='l2',
        dual=False,
        tol=0.001,
        C=1.2,
        fit_intercept=True,
        intercept_scaling=1,
        class_weight=None,
        random_state=None,
        solver='newton-cg',
        max_iter=200,
        multi_class='ovr',
        verbose=0,
        warm_start=False,
        n_jobs=2)
    svm_class = svm.SVC(kernel='rbf',
                        C=50,
                        shrinking=False,
                        decision_function_shape='ovr',
                        tol=0.001,
                        max_iter=-1)

    standard_train_inputs = standard_data(training_set)
    standard_valid_inputs = standard_data(validation_set)

    fixed_train_set = fix_pixels(training_set)
    fixed_valid = fix_pixels(validation_set)

    accuracys.append(
        knn(training_sett, train_set_labels, validation_set,
            validation_set_labels))
    print "knn"
    accuracys.append(
        logistic_regression(training_sett, train_set_labels, validation_set,
                            validation_set_labels))
    print "logistic_regression"
    accuracys.append(
        run_svm(training_sett, train_set_labels, validation_set,
                validation_set_labels))
    print "run_svm"

    accuracys.append(
        run_bagging(fixed_train_set, train_set_labels, kknn_class, fixed_valid,
                    validation_set_labels, True))
    print " knn B"
    accuracys.append(
        run_bagging(standard_train_inputs, train_set_labels,
                    logistic_regression_solver, standard_valid_inputs,
                    validation_set_labels, True))
    print "logistic_regression  B"
    accuracys.append(
        run_bagging(fixed_train_set, train_set_labels, svm_class, fixed_valid,
                    validation_set_labels, True))
    print "run_svm  B"

    create_csv(accuracys, 'barplot_bagg_accuracy.csv')
    fig = plt.figure()
    ax = fig.add_subplot(111)
    barplot_bagging(ax, accuracys)

    return accuracys
示例#13
0
def run_my_votin(training_set,
                 train_set_labels,
                 validation_set=None,
                 validation_set_labels=None,
                 train=True):
    from sklearn.ensemble import VotingClassifier
    from pybrain.datasets import ClassificationDataSet

    standard_valid_inputs = standard_data(validation_set)
    fixed_valid = fix_pixels(validation_set)
    equalize_and_standard_validation = standard_data(fixed_valid)
    if train:
        standard_train_inputs = standard_data(training_set)
        fixed_train_set = fix_pixels(training_set)
        equalize_and_standard = standard_data(fixed_train_set)

        kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=11)
        # kknn_class.fit(standard_train_inputs, train_set_labels.ravel())
        logistic_regression_solver = sklearn.linear_model.LogisticRegression(
            penalty='l2',
            dual=False,
            tol=0.01,
            C=1.0,
            fit_intercept=True,
            intercept_scaling=1,
            class_weight=None,
            random_state=None,
            solver='newton-cg',
            max_iter=200,
            multi_class='ovr',
            verbose=0,
            warm_start=False,
            n_jobs=2)
        svm_class = svm.SVC(kernel='rbf',
                            C=50,
                            shrinking=False,
                            decision_function_shape='ovr',
                            tol=0.001,
                            max_iter=-1)

        print "train knn"
        bg1 = run_bagging(fixed_train_set, train_set_labels, kknn_class, None,
                          None, False)
        res_f = open('bg1knn.dump', 'w')
        pickle.dump(bg1, res_f)
        res_f.close()
        print "Knn done"
        print "train Logistic Regression"
        bg2 = run_bagging(standard_train_inputs, train_set_labels,
                          logistic_regression_solver, None, None, False)
        res_f = open('bg2lr.dump', 'w')
        pickle.dump(bg2, res_f)
        res_f.close()
        print "done bg LR"
        print "train SVM"
        bg3 = run_bagging(equalize_and_standard, train_set_labels, svm_class,
                          None, None, False)
        res_f = open('bg3svm.dump', 'w')
        pickle.dump(bg3, res_f)
        res_f.close()
        print "done bg svm"
        print "train Neural-Nets"
        net_clf = net_class(standard_train_inputs, train_set_labels, None,
                            None, False)
        res_f = open('net.dump', 'w')
        pickle.dump(net_clf, res_f)
        res_f.close()
        print "nets done"
    else:
        print "Load knn"
        res_1 = open('bg1knn.dump', 'r')
        bg1 = pickle.load(res_1)
        res_1.close()
        print "knn done"
        print "Load LR"
        res_2 = open('bg2lr.dump', 'r')
        bg2 = pickle.load(res_2)
        res_2.close()
        print "LR done"
        print "Load SVM"
        res_3 = open('bg3svm.dump', 'r')
        bg3 = pickle.load(res_3)
        res_3.close()
        print "svm done"
        print "Load Neural-nets"
        res_4 = open('net.dump', 'r')
        net_clf = pickle.load(res_4)
        res_4.close()
        print "net done"

    preds_arr = []
    pred_weights = [0.1, 0.26, 0.34]
    net_weight = 0.30

    preds_arr.append(bg1.predict_proba(fixed_valid))
    preds_arr.append(bg2.predict_proba(standard_valid_inputs))
    preds_arr.append(bg3.predict_proba(equalize_and_standard_validation))

    net_preds = []
    for in_data in standard_valid_inputs:
        net_preds.append(net_clf.activate(in_data))

    # preds_arr.append(net_preds)
    fin_pred = []
    for i in range(len(standard_valid_inputs)):
        tmp_np = np.zeros(7)
        for w, pp in zip(pred_weights, preds_arr):
            tmp_np += pp[i] * w
        tmp_np += net_preds[i] * net_weight

        fin_pred.append(tmp_np)

    fin_labels = [(np.argmax(ar, axis=0) + 1) for ar in fin_pred]
    create_csv(fin_labels, 'test_csv.csv')
    if validation_set_labels:
        fin_acc, err = get_acc(fin_labels, validation_set_labels)
        print 'The final accuracy after bagging and votig is :', fin_acc

    fin_one_of_k = []
    for c in fin_labels:
        carr = [int(i == c - 1) for i in range(0, 7)]
        fin_one_of_k.append(carr)
    return fin_one_of_k
示例#14
0
def net_class(ustraining_set, train_set_labels, usvalidation_set=None, validation_set_labels=None, pre = True):
    # print (validation_set_labels - 1)
    if pre:
        # ltraining_set = gabor_filter(ustraining_set)
        ltraining_set = standard_data(ustraining_set)
    else:
        ltraining_set = ustraining_set
    if not usvalidation_set == None:
        if pre:
            # lvalidation_set = gabor_filter(usvalidation_set)
            lvalidation_set = standard_data(usvalidation_set)
        else:
            lvalidation_set = usvalidation_set
        vds = ClassificationDataSet(1024, 7, nb_classes=7)
        for vd, vt in zip(lvalidation_set, validation_set_labels):
            vtarr = [int(i==vt-1) for i in range(0,7)]
            vds.addSample(vd, vtarr)
    # net = buildNetwork(1024, 100, 8,outclass=SoftmaxLayer)

    ds = ClassificationDataSet(1024, 7, nb_classes=7)
    for d,t in zip(ltraining_set, train_set_labels):
        tarr = [int(i==t-1) for i in range(0,7)]
        ds.addSample(d, tarr)


    tot_min_err = 100.0
    best_l = 0.0
    best_w = 0.0
    obest_e = 0
    for l in [ 0.005 ]:
        for w in [0.01]:
            net = buildNetwork(1024, 320, 7, outclass=SoftmaxLayer, hiddenclass=SigmoidLayer)
            net.sortModules()
            trainer = BackpropTrainer(net, ds, learningrate=l, momentum=0, weightdecay=w, batchlearning=False,verbose=True)
            cmin_err = 100.0
            flag = True
            best_e = 0
            e = 0
            
            flag = False
            trnresult = 100.0
            tstresult = 100.0
            for i in range(10):
                e += 1
                trainer.trainEpochs(1)

                trnresult = percentError( trainer.testOnClassData(),
                                          train_set_labels-1 )

                if not usvalidation_set == None:
                    tstresult = percentError( trainer.testOnClassData(dataset=vds ), validation_set_labels-1 )
                    if cmin_err >= tstresult:
                        cmin_err = tstresult
                        print "copt err ", tstresult
                        best_e = e
                        flag = True
                    if tot_min_err > cmin_err:

                        tot_min_err = cmin_err

                        best_l = l
                        best_w = w
                        obest_e = best_e
                        print "new opt err:{}, for LR: {}, WD:{}, NE:{} ".format(tot_min_err, best_l, best_w, obest_e)
    net.sorted = False
    net.sortModules()
    res_f = open('net.dump', 'w')
    pickle.dump(net,res_f )
    res_f.close()
    return net