Пример #1
0
def marginPerceptron_cvRate(train_matrix, train_actual_labels, rateRange,
                            num_folds):
    av_accuracy_list = []
    for rate in rateRange:
        print 'rate: ', rate
        accuracy_list = []
        training_lab = copy.deepcopy(train_actual_labels)
        training_lab = training_lab.tolist()
        training_mat = copy.deepcopy(train_matrix)
        training_mat = training_mat.tolist()
        subset_size = len(training_mat) / num_folds
        for i in range(num_folds):
            #
            if i == num_folds - 1:
                temp_test_Matrix = training_mat[(i) * subset_size:]
                temp_train_Matrix = training_mat[0:][:i * subset_size]
                #
                temp_test_Labels = training_lab[(i) * subset_size:]
                temp_train_Labels = training_lab[0:][:i * subset_size]
            else:
                temp_test_Matrix = training_mat[i * subset_size:][:subset_size]
                #temp_train_Matrix =np.append(training_mat[:i * subset_size],training_mat[(i + 1) * subset_size:])
                temp_train_Matrix = training_mat[:i *
                                                 subset_size] + training_mat[
                                                     (i + 1) * subset_size:]
                #
                temp_test_Labels = training_lab[i * subset_size:][:subset_size]
                temp_train_Labels = training_lab[:i *
                                                 subset_size] + training_lab[
                                                     (i + 1) * subset_size:]

            # Training
            w_train, numUpdates_simpleP = percepFunctions.marginPerceptron(
                temp_train_Matrix, temp_train_Labels, 'zeros', 1, rate)
            #print 'weight %s is %f' %(i,max(w_train))
            predicted_labels = projectFunctions.prediction_Perceptron(
                temp_test_Matrix, temp_test_Labels, w_train)
            # Metric
            #test_accuracy, test_f1_score = projectFunctions.accuracyMETRIC(temp_test_Labels, predicted_labels)
            test_accuracy = projectFunctions.accuracyMETRIC(
                temp_test_Labels, predicted_labels)
            accuracy_list.append(test_accuracy)
        av_accuracy = np.mean(accuracy_list)
        av_accuracy_list.append(av_accuracy)
        print av_accuracy
    best_accuracy = max(av_accuracy_list)
    max_index = av_accuracy_list.index(best_accuracy)
    best_rate = rateRange[max_index]
    #
    line, = plt.plot(rateRange, av_accuracy_list)
    plt.xlabel('Learning Rate')
    plt.ylabel('Accuracy')
    plt.title('Accuracy v/s Learning Rate, for Simple perceptron')
    plt.grid(True)
    plt.show()
    return best_accuracy, best_rate, av_accuracy_list
Пример #2
0
def eliminateBADfeaturesWITHperceptron(finalFeatureCASE, labelCASE,
                                       thresholdBIAS):
    featureCASE = 'w'
    tempList = []
    delLIST = []
    train_matrix, train_labels, test_matrix, test_labels, eval_matrix, eval_labels = eliminateZEROfetures(
        featureCASE, labelCASE)
    for c in range(len(train_matrix[0])):
        temp_train_matrix = copy.deepcopy(train_matrix)
        temp_train_matrix = np.delete(temp_train_matrix, c, axis=1)
        w, dummy = percepFunctions.weightedPerceptron(temp_train_matrix,
                                                      train_labels, 'zeros',
                                                      0.675)
        train_pred_labels = projectFunctions.prediction_Perceptron(
            temp_train_matrix, train_labels, w)
        acc_train = projectFunctions.accuracyMETRIC(train_labels,
                                                    train_pred_labels)
        dummyP, dummyR, f1_train = projectFunctions.f1METRIC(
            train_labels, train_pred_labels)
        tempList.append([c, acc_train, f1_train])
        #print 'c:',c,'    acc:',acc_train,'    f1:',f1_train
    columnSUM = np.sum(tempList, axis=0)
    meanF1 = columnSUM[2] / len(tempList)
    for c in range(len(train_matrix[0])):
        if tempList[c][2] < meanF1 + thresholdBIAS:
            delLIST.append(c)
    train_matrix = np.delete(train_matrix, delLIST, axis=1)
    test_matrix = np.delete(test_matrix, delLIST, axis=1)
    eval_matrix = np.delete(eval_matrix, delLIST, axis=1)
    #
    if finalFeatureCASE == '01':
        for i in range(len(train_matrix)):
            train_matrix[i][train_matrix[i] > 0] = 1
        for i in range(len(test_matrix)):
            test_matrix[i][test_matrix[i] > 0] = 1
        for i in range(len(eval_matrix)):
            eval_matrix[i][eval_matrix[i] > 0] = 1
    elif finalFeatureCASE == 'normalize':
        colRangeLIST = train_matrix.max(axis=0) - train_matrix.min(axis=0)
        for i in range(len(train_matrix)):
            for j in range(len(train_matrix[0])):
                train_matrix[i][j] = train_matrix[i][j] / colRangeLIST[j]
        colRangeLIST = test_matrix.max(axis=0) - test_matrix.min(axis=0)
        colRangeLIST[colRangeLIST == 0] = 1
        for i in range(len(test_matrix)):
            for j in range(len(test_matrix[0])):
                test_matrix[i][j] = (test_matrix[i][j]) / colRangeLIST[j]
        colRangeLIST = eval_matrix.max(axis=0) - eval_matrix.min(axis=0)
        colRangeLIST[colRangeLIST == 0] = 1
        for i in range(len(eval_matrix)):
            for j in range(len(eval_matrix[0])):
                eval_matrix[i][j] = (eval_matrix[i][j]) / colRangeLIST[j]
    return train_matrix, train_labels, test_matrix, test_labels, eval_matrix, eval_labels
Пример #3
0
def algoFORepochs(numEpochs,
                  trainMatrix,
                  trainLabels,
                  testMatrix,
                  testLabels,
                  algo_type,
                  learningRate,
                  initialWcondition,
                  margin=None):
    #for info ARRAY

    infoList = []
    temp_list = []
    temp_list.append('shuffleTYPE')
    temp_list.append('num of Epochs')
    temp_list.append('acc_train')
    temp_list.append('f1_train')
    temp_list.append('acc_test')
    temp_list.append('f1_test')
    temp_list.append('weights')
    #
    infoList.append(temp_list)

    tempTrainMatrix = copy.deepcopy(trainMatrix)
    tempTrainLabels = copy.deepcopy(trainLabels)

    best_w = np.zeros(361)
    best_train_acc = 0
    best_f1_train = 0
    av_count = 1
    #for noYesShuffle in range(2):
    total_numUpdates = 0
    w = 0
    for e_ind in range(numEpochs):
        #shuffling section start
        if e_ind == 0:
            shuffleTYPE = 'no shuffle'
            trainingExMat = trainMatrix
            trainExLabels = trainLabels
        else:
            shuffleTYPE = 'with shuffle'
            if e_ind == 0:
                trainingExMat = trainMatrix
                trainExLabels = trainLabels
            else:
                trainingExMat, trainExLabels = shuffle(tempTrainMatrix,
                                                       tempTrainLabels,
                                                       random_state=0)
                #trainingExMat, trainExLabels = shuffleFunction(tempTrainMatrix, tempTrainLabels)
                tempTrainMatrix = trainingExMat
                tempTrainLabels = trainExLabels
        # shuffling section end
        if e_ind == 0:
            weightCondition = initialWcondition
        else:
            weightCondition = w
        if algo_type == 'simple':
            w, numUpdates = simplePerceptron(trainingExMat, trainExLabels,
                                             weightCondition, learningRate)
        elif algo_type == 'margin':
            w, numUpdates = marginPerceptron(trainingExMat, trainExLabels,
                                             weightCondition, learningRate,
                                             margin)
        elif algo_type == 'aggressive':
            w, numUpdates = agrresiveMarginPerceptron(trainingExMat,
                                                      trainExLabels,
                                                      weightCondition,
                                                      learningRate, margin)
        elif algo_type == 'weightedAggressive':
            w, numUpdates = WeightedAgrresiveMarginPerceptron(
                trainingExMat, trainExLabels, weightCondition, learningRate,
                margin)
        elif algo_type == 'weightedSimple':
            w, numUpdates = weightedPerceptron(trainingExMat, trainExLabels,
                                               weightCondition, learningRate)
        elif algo_type == 'winnow':
            w, numUpdates = winnowFunctions.simpleWinnow(
                trainingExMat, trainExLabels, 'ones', 2)
        total_numUpdates += numUpdates

        if algo_type == 'winnow':
            train_pred_labels = projectFunctions.prediction_Winnow(
                trainMatrix, trainLabels, w)
            acc_train = projectFunctions.accuracyMETRIC(
                trainLabels, train_pred_labels)
            test_pred_labels = projectFunctions.prediction_Winnow(
                testMatrix, testLabels, w)
            acc_test = projectFunctions.accuracyMETRIC(testLabels,
                                                       test_pred_labels)
            dummyP, dummyR, f1_train = projectFunctions.f1METRIC(
                trainLabels, train_pred_labels)
            dummyP, dummyR, f1_test = projectFunctions.f1METRIC(
                testLabels, test_pred_labels)
        else:
            train_pred_labels = projectFunctions.prediction_Perceptron(
                trainMatrix, trainLabels, w)
            acc_train = projectFunctions.accuracyMETRIC(
                trainLabels, train_pred_labels)
            test_pred_labels = projectFunctions.prediction_Perceptron(
                testMatrix, testLabels, w)
            acc_test = projectFunctions.accuracyMETRIC(testLabels,
                                                       test_pred_labels)
            dummyP, dummyR, f1_train = projectFunctions.f1METRIC(
                trainLabels, train_pred_labels)
            dummyP, dummyR, f1_test = projectFunctions.f1METRIC(
                testLabels, test_pred_labels)

        #for info ARRAY
        temp_list = []
        temp_list.append(shuffleTYPE)
        temp_list.append(e_ind + 1)
        temp_list.append(acc_train)
        temp_list.append(f1_train)
        temp_list.append(acc_test)
        temp_list.append(f1_test)
        temp_list.append(w)
        #
        infoList.append(temp_list)
        '''
        if acc_train >= best_train_acc:
            best_train_acc = acc_train
            w_ATbestTRAINacc = w
            train_acc_ATbestTRAINacc = acc_train
            test_acc_ATbestTRAINacc = acc_test
            eval_acc_ATbestTRAINacc = acc_eval
            train_f1_ATbestTRAINacc = f1_train
            test_f1_ATbestTRAINacc = f1_test
            eval_f1_ATbestTRAINacc = f1_eval
        '''
        if f1_train >= best_f1_train:
            best_shuffle_type = shuffleTYPE
            best_epoch_index = e_ind
            best_acc_train = acc_train
            best_f1_train = f1_train
            best_acc_test = acc_test
            best_f1_test = f1_test
            best_w = w

    best_LIST = [
        best_shuffle_type, best_epoch_index, best_acc_train, best_f1_train,
        best_acc_test, best_f1_test, best_w
    ]
    return best_LIST
Пример #4
0
#trainMatrix, testMatrix, evalMatrix  = projectFunctions.featureTransformation(train_matrix, test_matrix, eval_matrix,1)

#Simple-Weighted Perceptron w Epochs

learningRate = 0.675
margin = 0
maxEpochs = 1
bestLIST = percepFunctions.algoFORepochs(maxEpochs, trainMatrix, train_actual_labels, testMatrix, test_actual_labels, evalMatrix, eval_actual_labels, 'simple', learningRate, margin)
for i in range(1,7):
  print bestLIST[i]
for i in range(8,14):
  print bestLIST[i]
#Write leaderboard file
solution_filename = raw_input('Enter x__x in ./solutions_log/x__x_solutions.csv: ')
w_val = bestLIST[7]
eval_pred_labels = projectFunctions.prediction_Perceptron(eval_matrix, eval_actual_labels, w_val)
projectFunctions.write_solutions('perceptron',eval_pred_labels,'./solutions_log/solutions/'+solution_filename+'.solutions.csv')


'''
filepath = './solutions_log/simple_50e_infolist.txt'
thefile = open('./solutions_log/infoARRAY/'+solution_filename+'.infoARRAY.csv', 'w')
for item in simple_weighted_info:
  thefile.write("%s\n" % item)
thefile.close()
'''
#end_time = timeit.default_timer()


#Simple or Weighted Perceptron
'''
Пример #5
0
def simplePerceptron_cvWeight(train_matrix, train_actual_labels, weightRange,
                              num_folds):
    av_accuracy_list = []
    for weight in weightRange:
        print 'weight: ', weight
        '''
        ###############
        kf = KFold(n_splits=5)
        temp_train_Matrix = copy.deepcopy(train_matrix)
        temp_train_Labels = copy.deepcopy(train_actual_labels)
        count = 0
        accuracy_list = []
        for train_indices, test_indices in kf.split(temp_train_Matrix):
            temp_test_Matrix = []
            temp_test_Labels = []
            for test_indx in test_indices:
                count +=1
                #print 'count: ', count
                temp_test_Matrix.append(temp_train_Matrix[test_indx-count])
                temp_test_Labels.append(temp_train_Labels[test_indx-count])
                np.delete(temp_train_Matrix, test_indx-count, 0)
                np.delete(temp_train_Labels, test_indx-count, 0)

        ################
        '''
        accuracy_list = []
        training_lab = copy.deepcopy(train_actual_labels)
        training_lab = training_lab.tolist()
        training_mat = copy.deepcopy(train_matrix)
        training_mat = training_mat.tolist()
        subset_size = len(training_mat) / num_folds
        for i in range(num_folds):
            #
            if i == num_folds - 1:
                temp_test_Matrix = training_mat[(i) * subset_size:]
                temp_train_Matrix = training_mat[0:][:i * subset_size]
                #
                temp_test_Labels = training_lab[(i) * subset_size:]
                temp_train_Labels = training_lab[0:][:i * subset_size]
                continue
            temp_test_Matrix = training_mat[i * subset_size:][:subset_size]
            #temp_train_Matrix =np.append(training_mat[:i * subset_size],training_mat[(i + 1) * subset_size:])
            temp_train_Matrix = training_mat[:i * subset_size] + training_mat[
                (i + 1) * subset_size:]
            #
            temp_test_Labels = training_lab[i * subset_size:][:subset_size]
            temp_train_Labels = training_lab[:i * subset_size] + training_lab[
                (i + 1) * subset_size:]
            #

            # Training
            w_train, numUpdates_simpleP = percepFunctions.simplePerceptron(
                temp_train_Matrix, temp_train_Labels, weight, 1)
            # Prediction on eval.anon
            predicted_labels = projectFunctions.prediction_Perceptron(
                temp_test_Matrix, temp_test_Labels, w_train)
            # Metric
            #test_accuracy, test_f1_score = projectFunctions.accuracyMETRIC(temp_test_Labels, predicted_labels)
            test_accuracy = projectFunctions.accuracyMETRIC(
                temp_test_Labels, predicted_labels)
            accuracy_list.append(test_accuracy)
        av_accuracy = np.mean(accuracy_list)
        av_accuracy_list.append(av_accuracy)
        print av_accuracy
    best_accuracy = max(av_accuracy_list)
    max_index = av_accuracy_list.index(best_accuracy)
    best_weight = weightRange[max_index]
    #
    line, = plt.plot(weightRange, av_accuracy_list)
    plt.xlabel('initialization weight')
    plt.ylabel('Accuracy')
    plt.title('Accuracy v/s Learning weight, for Simple perceptron')
    plt.grid(True)
    plt.show()
    return best_accuracy, weight, av_accuracy_list
Пример #6
0
def simplePerceptron_cvRate(train_matrix, train_actual_labels, rateRange,
                            num_folds):
    av_accuracy_list = []
    av_f1_list = []
    for rate in rateRange:
        print 'rate: ', rate
        '''
        ###############
        kf = KFold(n_splits=5)
        temp_train_Matrix = copy.deepcopy(train_matrix)
        temp_train_Labels = copy.deepcopy(train_actual_labels)
        count = 0
        accuracy_list = []
        for train_indices, test_indices in kf.split(temp_train_Matrix):
            temp_test_Matrix = []
            temp_test_Labels = []
            for test_indx in test_indices:
                count +=1
                #print 'count: ', count
                temp_test_Matrix.append(temp_train_Matrix[test_indx-count])
                temp_test_Labels.append(temp_train_Labels[test_indx-count])
                np.delete(temp_train_Matrix, test_indx-count, 0)
                np.delete(temp_train_Labels, test_indx-count, 0)

        ################
        '''
        accuracy_list = []
        f1_list = []
        training_lab = copy.deepcopy(train_actual_labels)
        training_lab = training_lab.tolist()
        training_mat = copy.deepcopy(train_matrix)
        training_mat = training_mat.tolist()
        subset_size = len(training_mat) / num_folds
        for i in range(num_folds):
            #
            if i == num_folds - 1:
                temp_test_Matrix = training_mat[(i) * subset_size:]
                temp_train_Matrix = training_mat[0:][:i * subset_size]
                #
                temp_test_Labels = training_lab[(i) * subset_size:]
                temp_train_Labels = training_lab[0:][:i * subset_size]
            else:
                temp_test_Matrix = training_mat[i * subset_size:][:subset_size]
                #temp_train_Matrix =np.append(training_mat[:i * subset_size],training_mat[(i + 1) * subset_size:])
                temp_train_Matrix = training_mat[:i *
                                                 subset_size] + training_mat[
                                                     (i + 1) * subset_size:]
                #
                temp_test_Labels = training_lab[i * subset_size:][:subset_size]
                temp_train_Labels = training_lab[:i *
                                                 subset_size] + training_lab[
                                                     (i + 1) * subset_size:]
            # Training
            w_train, numUpdates_simpleP = percepFunctions.weightedPerceptron(
                temp_train_Matrix, temp_train_Labels, 'zeros', rate)
            predicted_labels = projectFunctions.prediction_Perceptron(
                temp_test_Matrix, temp_test_Labels, w_train)
            # Metric
            #test_accuracy = projectFunctions.accuracyMETRIC(temp_test_Labels, predicted_labels)
            test_f1 = projectFunctions.f1METRIC(temp_test_Labels,
                                                predicted_labels)
            #accuracy_list.append(test_accuracy)
            f1_list.append(test_f1)
        #av_accuracy = np.mean(accuracy_list)
        av_f1 = np.mean(f1_list)
        #av_accuracy_list.append(av_accuracy)
        av_f1_list.append(av_f1)

        #print av_accuracy
        print av_f1
    #best_accuracy = max(av_accuracy_list)
    best_f1 = max(av_f1_list)
    #max_index = av_accuracy_list.index(best_accuracy)
    max_index = av_f1_list.index(best_f1)
    best_rate = rateRange[max_index]
    #
    #line, = plt.plot(rateRange, av_accuracy_list)
    line, = plt.plot(rateRange, av_f1_list)
    plt.xlabel('Learning Rate')
    #plt.ylabel('Accuracy')
    #plt.title('Accuracy v/s Learning Rate, for Simple perceptron')
    plt.ylabel('F1 score')
    plt.title('F1 score v/s Learning Rate, for Simple Weighted perceptron')
    plt.grid(True)
    plt.show()
    #return best_accuracy, best_rate, av_accuracy_list
    return best_f1, best_rate, av_f1_list
Пример #7
0
        bestEpochs = epochsCV ; bestGamma = gamma_; bestC = C_; bestAccuracy = accuracy
wCV = svmFunctions.SVM(train_matrix, train_labels, epochsCV, bestC, bestGamma)
pred_train_list = svmFunctions.prediction(train_matrix, train_labels, wCV)
pred_test_list = svmFunctions.prediction(test_matrix, test_labels, wCV)
acc_train = svmFunctions.accuracyMETRIC(train_labels, pred_train_list)
acc_test = svmFunctions.accuracyMETRIC(test_labels, pred_test_list)
dummyP, dummyR, f1_train = projectFunctions.f1METRIC(train_labels, pred_train_list)
dummyP, dummyR, f1_test = projectFunctions.f1METRIC(test_labels, pred_test_list)
print 'Best gamma(0) : ', bestGamma,'    Best C: ', bestC,'    Best epochs: ', bestEpochs,'   Train Acc:', acc_train,'   Train f1:', f1_train,'    Test Acc', acc_test,'%   Test f1:', f1_test
endTime = timeit.default_timer(); print 'Total run time: ', endTime - startTime,'secs'
'''

print 'Final Support Vector Machines:'
epochsFINAL = 20
CFINAL = 11000
gammaFINAL = 1e-06
wFINAL = svmFunctions.SVM(train_matrix, train_labels, epochsFINAL, CFINAL,
                          gammaFINAL)
pred_train_list = projectFunctions.prediction_Perceptron(
    train_matrix, train_labels, wFINAL)
pred_test_list = projectFunctions.prediction_Perceptron(
    test_matrix, test_labels, wFINAL)
acc_train = projectFunctions.accuracyMETRIC(train_labels, pred_train_list)
acc_test = projectFunctions.accuracyMETRIC(test_labels, pred_test_list)
dummyP, dummyR, f1_train = projectFunctions.f1METRIC(train_labels,
                                                     pred_train_list)
dummyP, dummyR, f1_test = projectFunctions.f1METRIC(test_labels,
                                                    pred_test_list)
print '\tBest gamma(0) : ', gammaFINAL, '    Best C: ', CFINAL, '   Train Acc:', acc_train, '   Train f1:', f1_train, '    Test Acc', acc_test, '%   Test f1:', f1_test

1e-06