Пример #1
0
def TrainPerceptronRandomWeight(train_data, test_data, train_label, test_label,
                                numFeatures, itr):

    weight = []
    intercept = []
    train_accuracy = np.empty([100])

    for i in range(0, 100):
        net = perceptron.Perceptron(max_iter=itr, shuffle=True)
        net.fit(train_data[:, :numFeatures],
                train_label,
                coef_init=np.random.rand(3, numFeatures),
                intercept_init=np.random.rand(3))
        train_accuracy[i] = net.score(train_data[:, :numFeatures], train_label)
        weight.append(net.coef_)
        intercept.append(net.intercept_)
        train_accuracy[i] = net.score(train_data[:, :numFeatures], train_label)

    # Print the results
    max_accuracy = np.argmax(train_accuracy)
    net1 = perceptron.Perceptron(max_iter=itr, shuffle=True)
    net1.fit(train_data[:, :numFeatures],
             train_label,
             coef_init=weight[max_accuracy],
             intercept_init=intercept[max_accuracy])
    print('Using', numFeatures, 'features:')
    print("Maximum Training Accuracy:  " +
          str(train_accuracy[max_accuracy] * 100) + " %")
    print("Corresponding Testing Accuracy:   " +
          str(net1.score(test_data[:, :numFeatures], test_label) * 100) + " %")
    print('\nWeight vectors: ')
    print(net.coef_)
    print('\nIntercept vector: ')
    print(net.intercept_, '\n')
Пример #2
0
def getModels(x_train, x_test, y_train, y_test):
	clf1 = tree.DecisionTreeClassifier()
	clf1 = clf1.fit(x_train, y_train)

	clf2 = neighbors.KNeighborsClassifier(10, 'uniform')
	clf2 = clf2.fit(x_train, y_train)

	clf3 = linear_model.LogisticRegression(penalty='l1', verbose=0, random_state=None, fit_intercept=True)
	clf3 = clf3.fit(x_train, y_train)

	clf4 = GaussianNB()
	clf4 = clf4.fit(x_train, y_train)
	
	clf5 = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=0)
	clf5 = clf5.fit(x_train, y_train)

	clf6 = perceptron.Perceptron(penalty='l1', n_iter=50, verbose=0, random_state=None, fit_intercept=True, eta0=0.02)
	clf6 = clf6.fit(x_train, y_train)

	clf7 = RandomForestClassifier(n_estimators=10)
	clf7 = clf7.fit(x_train, y_train)

	clf8 = svm.SVC()
	clf8 = clf8.fit(x_train, y_train)

	clf9 = GradientBoostingClassifier()
	clf9 = clf9.fit(x_train, y_train)

	return [clf1, clf2, clf3, clf4, clf5, clf6, clf7, clf8, clf9]
def GenPerceptronLayer(vals):
    nets = {}
    trainingData = {}
    trainingValues = {}
    for i in range(0, 37):
        indx = i + 1
        data = [0] * 12
        for j in range(0, 11):
            data[j] = float(vals[indx][j])
        for j in [13, 15, 17]:
            if vals[indx][j] not in trainingData:
                trainingData[vals[indx][j]] = []
                trainingValues[vals[indx][j]] = []
            trainingData[vals[indx][j]].append(data)
            if vals[indx][j + 1] is "1":
                trainingValues[vals[indx][j]].append(1)
            else:
                trainingValues[vals[indx][j]].append(0)
    for string in trainingData:
        if 1 not in trainingValues[string] or 0 not in trainingValues[string]:
            print "-E- Only one label exists for " + string + ": " + str(
                trainingValues[string])
            continue
        nets[str(string)] = perceptron.Perceptron(max_iter=100,
                                                  tol=None,
                                                  verbose=0,
                                                  random_state=None,
                                                  fit_intercept=True,
                                                  eta0=0.002)
        nets[str(string)].fit(trainingData[string], trainingValues[string])
        if debug is True:
            print string + ": " + str(nets[string].score(
                trainingData[string], trainingValues[string]) *
                                      100) + "% testing accuracy"
    return nets
Пример #4
0
def main():

    dataTest = pd.read_csv('HIGGS/HIGGS_1.csv', header=None)
    dataTestX = dataTest.values
    dataTestY = dataTestX[:, 0]
    #dataTestY = np.reshape(dataTestY, (dataTestY.shape[0], -1))
    dataTestX = np.delete(dataTestX, 0, 1)

    dataChunksTrain = pd.read_csv('HIGGS/HIGGS_0.csv',
                                  header=None,
                                  chunksize=2000000)
    svmPredict = svm.SVC(gamma=0.001,
                         C=100)  # Use cross validation to find gamma.
    # eta 0.1 gave best accuracy
    model = perceptron.Perceptron(n_iter=100,
                                  verbose=0,
                                  random_state=None,
                                  fit_intercept=True,
                                  eta0=0.01,
                                  warm_start=True)

    mean = np.array([
        0.9914658435843994, -8.2976178820622e-06, -1.3272252572679215e-05,
        0.9985363574312471, 2.6134592495411797e-05, 0.9909152318068567,
        -2.0275203997251415e-05, 7.71619920710906e-06, 0.9999687478206591,
        0.9927294304430038, -1.0264440172703127e-05, -2.0768873493851226e-05,
        1.0000080177052564, 0.9922590513707101, 1.459561349773536e-05,
        3.678631990462732e-06, 1.0000114192497513, 0.9861086617144861,
        -5.756954065664269e-06, 1.7449033596108414e-05, 1.0000001559677123,
        1.0342903040056053, 1.0248048350282475, 1.0505538681766282,
        1.009741840750048, 0.972959616608593, 1.033035574431563,
        0.9598119879373501
    ])
    stdDev = np.array([
        0.5653776754096951, 1.0088264812855468, 1.006346283885119,
        0.6000184644551814, 1.0063261640156402, 0.47497472589232176,
        1.009302952852424, 1.0059010877868422, 1.0278075278204606,
        0.49999384024846355, 1.0093304676767396, 1.0061543903728194,
        1.049397999042849, 0.4876623258003873, 1.0087467092311453,
        1.0063049450318349, 1.193675521568018, 0.5057776635500334,
        1.0076942258109045, 1.0063655876039794, 1.4002093224446897,
        0.6746353374867367, 0.38080739505009764, 0.16457624382242395,
        0.39744529874617945, 0.5254062490071941, 0.3652556048435137,
        0.3133377767062806
    ])
    dataTestX = dataTestX - mean
    dataTestX = dataTestX / stdDev

    counter = 0
    for chunk in dataChunksTrain:
        counter += 1
        chunkX = chunk.values
        chunkY = chunkX[:, 0]
        chunkX = np.delete(chunkX, 0, 1)
        chunkX = chunkX - mean
        chunkX = chunkX / stdDev
        trainKerasNetwork(chunkX, chunkY.copy(), dataTestX, dataTestY.copy())
        if counter == 1:
            break
def adaboost_avg_run_new(max_classes, avg_num_of_run, training_set,
                         testing_set):
    all_error_list = []

    # because datasets sometimes place the class attribute at the end or even
    # at the beginning or the middle, we'll separate the attribute vector from
    # the class-label. also note that this is the way scikit-learn does it.
    # train_x: the attribute vector; train_y: the class_label
    (train_x, train_y) = split_attribute_and_label(training_set)
    (test_x, test_y) = split_attribute_and_label(testing_set)
    # print(len(train_x))
    train_subset_num = int(len(train_y) * 0.2)

    our_ada_training_errors = {}
    our_ada_testing_errors = {}

    # init dict of num classifier to error list
    for i in range(1, max_classes + 1):
        our_ada_training_errors[i] = []
        our_ada_testing_errors[i] = []

    # run ada num_runs times
    for i in range(avg_num_of_run):
        ada_obj = AdaBoost(max_classes, train_subset_num, THRESHOLD, ETA,
                           UPPER_BOUND, ETA_WEIGHTS, False)
        ada_obj.fit_with_errors(train_x, train_y, test_x, test_y)

        for j in range(max_classes):
            our_ada_training_errors[j + 1].append(ada_obj.training_error[j])
            our_ada_testing_errors[j + 1].append(ada_obj.testing_error[j])

    for cl in range(1, max_classes + 1):
        scikit_error = []
        for i in range(avg_num_of_run):
            pada = perceptron.Perceptron(max_iter=UPPER_BOUND,
                                         verbose=0,
                                         random_state=None,
                                         fit_intercept=True,
                                         eta0=ETA)

            bdt = AdaBoostClassifier(pada, algorithm="SAMME", n_estimators=cl)
            bdt.fit(train_x, train_y)
            result_list = bdt.predict(test_x)
            scikit_error.append(calculate_error(test_y, result_list))

        errors = ErrorWrapper(
            cl,
            sum(our_ada_training_errors[cl]) /
            len(our_ada_training_errors[cl]),
            sum(our_ada_testing_errors[cl]) / len(our_ada_testing_errors[cl]),
            sum(scikit_error) / len(scikit_error))

        all_error_list.append(errors)
        print("Train avg for %s   %s" % (cl, errors.train_error))
        print("Testing avg for %s   %s" % (cl, errors.test_error))
        print("Scikit adaboost avg for %s   %s" % (cl, errors.scikit_error))

    return all_error_list
def adaboost_avg_run(max_classes, avg_num_of_run, training_set, testing_set):
    testing_error_list = []
    all_error_list = []

    # because datasets sometimes place the class attribute at the end or even
    # at the beginning or the middle, we'll separate the attribute vector from
    # the class-label. also note that this is the way scikit-learn does it.
    # train_x: the attribute vector; train_y: the class_label
    (train_x, train_y) = split_attribute_and_label(training_set)
    (test_x, test_y) = split_attribute_and_label(testing_set)
    # print(len(train_x))
    train_subset_num = int(len(train_y) * 0.2)

    for cl in range(1, max_classes + 1, 2):
        train_error = []
        testing_error = []
        scikit_error = []
        for i in range(avg_num_of_run):

            ada_obj = AdaBoost(cl, train_subset_num, THRESHOLD, ETA,
                               UPPER_BOUND, ETA_WEIGHTS, False)
            ada_obj.fit(train_x, train_y)

            hypothesis_list = ada_obj.predict(train_x)
            mistakes = ada_obj.xor_tuples(train_y, hypothesis_list)
            error_rate_train = classifier_error_rate(mistakes)

            hypothesis_list = ada_obj.predict(test_x)
            mistakes = ada_obj.xor_tuples(test_y, hypothesis_list)
            error_rate_test = classifier_error_rate(mistakes)
            train_error.append(error_rate_train)
            testing_error.append(error_rate_test)

            pada = perceptron.Perceptron(max_iter=UPPER_BOUND,
                                         verbose=0,
                                         random_state=None,
                                         fit_intercept=True,
                                         eta0=ETA)

            bdt = AdaBoostClassifier(pada, algorithm="SAMME", n_estimators=cl)
            bdt.fit(train_x, train_y)
            result_list = bdt.predict(test_x)
            scikit_error.append(calculate_error(test_y, result_list))

        errors = ErrorWrapper(cl,
                              sum(train_error) / len(train_error),
                              sum(testing_error) / len(testing_error),
                              sum(scikit_error) / len(scikit_error))

        all_error_list.append(errors)
        print("Train avg for %s   %s" % (cl, errors.train_error))
        print("Testing avg for %s   %s" % (cl, errors.test_error))
        testing_error_list.append(
            (sum(testing_error) / len(testing_error)) * 100)
        print("Scikit adaboost avg for %s   %s" % (cl, errors.scikit_error))

    #return testing_error_list
    return all_error_list
Пример #7
0
def perceptron_train(trainData, feaSt=0, feaEnd=-1):
    clf = perceptron.Perceptron(n_iter=15,
                                shuffle=False,
                                verbose=0,
                                random_state=None,
                                fit_intercept=True)
    clf.fit(trainData[:, feaSt:feaEnd], trainData[:, -1])

    return clf
Пример #8
0
def run_perceptron(x_train, x_test, y_train, y_test):
    clf = perceptron.Perceptron(penalty='l1',
                                n_iter=50,
                                verbose=0,
                                random_state=None,
                                fit_intercept=True,
                                eta0=0.02)
    clf.fit(x_train, y_train)
    scores = cross_val_score(clf, x_test, y_test, cv=5)
    print("perceptron: %.15f" % scores.mean())
    def __init__(self, training_data, test_data, num_classifiers, combo_method, classifier_layout, single_classifier):
        self.train = training_data
        self.test = test_data
        self.true_test, self.testing_labels = self.format_test_data()
        self.classifier_list = []
        if classifier_layout == "random":
            for x in range(num_classifiers):
                self.classifier_list.append(self.get_random_classifier())
        elif classifier_layout == "uniform":
            for x in range(num_classifiers):
                if x < num_classifiers/4: # 4 is the number of classifier types
                    self.classifier_list.append(tree.DecisionTreeClassifier())
                elif x >= num_classifiers/4 and x < 2 * (num_classifiers/4): # 4 is the number of classifier types
                    self.classifier_list.append(svm.SVC())
                elif x >= 2 * (num_classifiers/4) and x < 3 * (num_classifiers/4): # 4 is the number of classifier types
                    self.classifier_list.append(GaussianNB())
                else:
                    self.classifier_list.append(perceptron.Perceptron())
        elif classifier_layout == "single":
            for x in range(num_classifiers):
                if single_classifier == "perceptron":
                    self.classifier_list.append(perceptron.Perceptron())
                elif single_classifier == "svm":
                    self.classifier_list.append(svm.SVC())
                elif single_classifier == "gaussian":
                    self.classifier_list.append(GaussianNB())
                elif single_classifier == "decisiontree":
                    self.classifier_list.append(tree.DecisionTreeClassifier())
                else:
                    print("NO SUCH CLASSIFIER")


        self.combination = combo_method
        self.combiner = None
        if combo_method == Combiner.NEURAL_NET:
            self.combiner = MLPClassifier()
            #self.combiner = MLPClassifier(hidden_layer_sizes=(100, ), activation='relu', solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
        if combo_method == Combiner.DECISION_TREE:
            self.combiner = tree.DecisionTreeClassifier()
        if combo_method == Combiner.SVM:
            self.combiner = svm.SVC()
    def get_random_classifier(self):
        TYPES_OF_CLASSIFIERS = 3
        classifier_type = random.randint(0, TYPES_OF_CLASSIFIERS - 1)

        if classifier_type == 0:
            return tree.DecisionTreeClassifier()
        if classifier_type == 1:
            return svm.SVC()
        if classifier_type == 2:
            return GaussianNB()
        if classifier_type == 3:
            return perceptron.Perceptron()
Пример #11
0
def main():
    keywords = readFile('./Keywords.csv')
    train_data = readFile('./Datasets/Shuffled/Training.csv')
    validation_data = readFile('./Datasets/Shuffled/Validation.csv')
    test_data = readFile('./Datasets/Shuffled/Testing.csv')

    X_Train = getFeatureVector(train_data[0, 0], keywords)
    for i in range(1, train_data.shape[0]):
        X_Train = np.vstack(
            [X_Train, getFeatureVector(train_data[i, 0], keywords)])

    X_Validation = getFeatureVector(validation_data[0, 0], keywords)
    for i in range(1, validation_data.shape[0]):
        X_Validation = np.vstack(
            [X_Validation,
             getFeatureVector(validation_data[i, 0], keywords)])

    X_Test = getFeatureVector(test_data[0, 0], keywords)
    for i in range(1, test_data.shape[0]):
        X_Test = np.vstack(
            [X_Test, getFeatureVector(test_data[i, 0], keywords)])

    Y_Train = train_data[:, 1]
    Y_Validation = validation_data[:, 1]
    Y_Test = test_data[:, 1]

    # Create the model
    model = perceptron.Perceptron(n_iter=490,
                                  class_weight="balanced",
                                  penalty='l2',
                                  alpha=0.0001)
    model.fit(X_Train, Y_Train)

    # Predict the result
    print "\n Training Data:"
    print "Prediction " + str(model.predict(X_Train))
    print "Actual     " + str(Y_Train)
    print "Accuracy   " + str(model.score(X_Train, Y_Train) * 100) + "%"

    print "\n Vaildation Data:"
    print "Prediction " + str(model.predict(X_Validation))
    print "Actual     " + str(Y_Validation)
    print "Accuracy   " + str(
        model.score(X_Validation, Y_Validation) * 100) + "%"

    print "\n Test Data:"
    print "Prediction " + str(model.predict(X_Test))
    print "Actual     " + str(Y_Test)
    print "Accuracy   " + str(model.score(X_Test, Y_Test) * 100) + "%"
Пример #12
0
def perceptron_avg_run(avg_num_of_run, training_set, testing_set):
    (train_x, train_y) = split_attribute_and_label(training_set)
    (test_x, test_y) = split_attribute_and_label(testing_set)
    ptraining_error  = []
    perceptron_error = []

    for i in range(avg_num_of_run):
        p = perceptron.Perceptron(max_iter=UPPER_BOUND, verbose=0, random_state=None,
                                  fit_intercept=True, eta0=ETA)
        p.fit(train_x, train_y)
        t_result_list = p.predict(train_x)
        ptraining_error.append(calculate_error(train_y, t_result_list))
        result_list = p.predict(test_x)
        perceptron_error.append(calculate_error(test_y, result_list))

    return sum(perceptron_error) / len(perceptron_error) , sum(ptraining_error) / len(ptraining_error)
Пример #13
0
def slp(x_train, x_test, y_train, y_test, dataset):
    net = p.Perceptron(max_iter=500,
                       verbose=0,
                       random_state=None,
                       fit_intercept=True,
                       eta0=0.002)
    print(net)
    model = net.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    accuracy = (y_pred == y_test).sum() / len(y_test)
    print('Accuracy on ', dataset, ' with single layer perceptron: ',
          accuracy * 100, '%')
    print('Confusion matrix:\n', confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))

    return accuracy
Пример #14
0
def TrainPerceptron(train_data, test_data, train_label, test_label,
                    numFeatures, itr):

    net = perceptron.Perceptron(max_iter=itr, shuffle=True)
    net.fit(train_data[:, :numFeatures], train_label)

    # Print the results
    print('Using', numFeatures, 'features:')
    print("Training Accuracy:  " +
          str(net.score(train_data[:, :numFeatures], train_label) * 100) +
          " %")
    print("Testing Accuracy:   " +
          str(net.score(test_data[:, :numFeatures], test_label) * 100) + " %")
    print('\nWeight vectors: ')
    print(net.coef_)
    print('\nIntercept vector: ')
    print(net.intercept_, '\n')
Пример #15
0
def mainworker(limit1,limit2):
	N=10
	l=[]
	w1=[] # +1 class
	w2=[]#-1 class
	temp=[]
	classlist=[]
	countlist=[]
	f=open("pdata.txt")
	for line in f:
        	x=(line.strip("\n")).split(",")
        	temp=[]
        	for i in xrange(len(x)):
			x[i]=int(x[i])
			temp.append(x[i])
        	clas=temp.pop()
		temp=temp[:limit1]+temp[limit2+1:]
        	l.append(temp)
       		classlist.append(clas)
	f.close()
	X=np.array(l)
	y=np.array(classlist)
	w=l

	X=np.array(l)
	y=np.array(classlist)
	karray=[2,3,4,5]
	for k in karray:
		kf=cross_validation.KFold(11054, n_folds=k)
		averager=[]
		for train_index,test_index in kf:
			X_train, X_test = X[train_index], X[test_index]
			y_train, y_test = y[train_index], y[test_index]
			train_data=X_train
			train_label=y_train
        		test_data=X_test
       			test_label=y_test
        		net = perceptron.Perceptron(n_iter=100, verbose=0, random_state=None, fit_intercept=True, eta0=0.002)
			net.fit(train_data,train_label)	
			predicted = net.predict(test_data)
			accuracy = net.score(test_data, test_label)*100 
			averager.append(accuracy)
		answer=np.mean(averager)
		print "The accuracy for",k,"th fold is:",answer,
		print '\n' 
Пример #16
0
def run():
    parser = argparse.ArgumentParser(
        description="Project 3 Part I: Perceptron")
    parser.add_argument('input', type=str)
    parser.add_argument('output', type=str)
    args = parser.parse_args()

    x, y = load_csv(args.input)
    target = open(args.output, 'w')
    weights, bias = None, None
    for i in range(1, 99999):
        net = perceptron.Perceptron(n_iter=i, verbose=0, fit_intercept=True)
        net.fit(x, y)
        if np.array_equal(weights, net.coef_[0]) and bias == net.intercept_:
            break
        weights = net.coef_[0]
        bias = net.intercept_
        output = np.concatenate([weights.astype(int), bias.astype(int)])
        output = ",".join(str(x) for x in output)
        target.write(output + '\n')

    target.close()
    return None
 def get_weights(self, S1, S2_dict):
     """Returns a dictionary R where key is (i, j) and value is the weights [w_ij, theta_ij]"""
     R = {}
     for i in range(self.m):
         for j in range(i + 1, self.m):
             S_ij = S2_dict[(i, j)]
             labels = [
                 -1 if self.oracle.compare(s, S1[i]) == -1 else 1
                 for s in S_ij
             ]
             S_ij = set_to_matrix(S_ij, self.n)
             net = perceptron.Perceptron(max_iter=100,
                                         fit_intercept=True,
                                         eta0=0.002)
             if len(np.unique(
                     labels)) == 2:  # fit if having two unique labels
                 net.fit(S_ij, labels)
                 if net.score(S_ij,
                              labels) == 1:  # add to R if no training error
                     w = net.coef_[0]
                     theta = net.intercept_[0]
                     R[(i, j)] = np.append(w, theta)
     return R
Пример #18
0
def fperceptron(x, y):
    #perc = perceptron.Perceptron(None,0.00001,True,None,0.01,True,0,1.0,1,0,None,False,None)
    perc = perceptron.Perceptron()
    perc.fit(x, y)
    #print('accuracy:', clf.score(train_data, train_answers))
    return perc
Пример #19
0
ninputs = 2
net = Perceptron(ninputs)
net.fit(X, y)

w = net.weights[1:]
b = net.weights[0]
plot_dboundary(w, b)
plot_dboundary_contourf(net)
'''
H = net.weights_hist
niter = len(H)
snapshots = [int(t) for t in np.linspace(0, 20, 5)]
#snapshots = [15,  20, 25]
for t in snapshots:
    w = H[t][1:]
    b = H[t][0]
    plot_dboundary(w, b)
    plt.title('iter {}'.format(t))
'''

# sklearn version
from sklearn.linear_model import perceptron
net_sklearn = perceptron.Perceptron()
net_sklearn.fit(X, y)
w = net_sklearn.coef_[0]
offset = net_sklearn.intercept_[0]

plot_dboundary(w, b)
plot_dboundary_contourf(net)
Пример #20
0
# Labels
t = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

# Data

colormap = np.array(['r', 'k'])
plt.scatter(d[0], d[1], c=colormap[t], s=40)
#plt.show()
# rotate the data 180 degrees
d90 = np.rot90(d)
d90 = np.rot90(d90)
d90 = np.rot90(d90)

# Create the model
net = perceptron.Perceptron(n_iter=100, eta0=0.002)
net.fit(d90, t)

# Print the results
print("Prediction " + str(net.predict(d90)))
print("Actual     " + str(t))
print("Accuracy   " + str(net.score(d90, t) * 100) + "%")
print(d90)
print(t)
# Plot the original data
plt.scatter(d[0], d[1], c=colormap[t], s=40)

# Output the values
print("Coefficient 0 " + str(net.coef_[0, 0]))
print("Coefficient 1 " + str(net.coef_[0, 1]))
print("Bias " + str(net.intercept_))
            w = [x + y for x, y in zip(w, w0)]
            num_mistakes = num_mistakes + 1
        else:
            pass
    print("Iteration-" + str(index) + " No-of-Mistakes: " + str(num_mistakes))
    accuracy = (len(Train_Data) - num_mistakes) / len(Train_Data)
    print("Iteration-" + str(index) + " Training-Accuracy: " + str(accuracy))

print("**************TESTING DATA********************")
num_mistakes = 0
accuracy = 0
for x in range(0, len(Test_Data)):
    Xt = Test_Feature_Vector[x]
    Yt = Test_Labels[x]
    Yt = int(Yt)
    Yht = sign(dot(w, Xt))
    if (Yht != Yt):
        num_mistakes = num_mistakes + 1
    else:
        pass

accuracy = (len(Test_Data) - num_mistakes) / len(Test_Data)
print("Testing Mistakes: " + str(num_mistakes))
print("Testing Accuracy: " + str(accuracy))

print("**************REAL RESULTS********************")
practice = perceptron.Perceptron(max_iter=20)
practice.fit(Feature_Vector, Train_Labels)
print("Real Iteration-20 Accuracy:")
print(practice.score(Feature_Vector, Train_Labels) * 100)
Пример #22
0
import numpy as np
from sklearn.linear_model import perceptron
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier

# Load text data form the file
train_data = np.loadtxt("vertigo_train.txt", dtype=int)
test_data = np.loadtxt("vertigo_predict.txt", dtype=int)
true_class = np.loadtxt("vertigo_answers.txt", dtype=int)

train_class = train_data[:, 0]
train_data = np.delete(train_data, 1, 1)

p = perceptron.Perceptron()
perceptron_trained = p.fit(train_data, train_class)

test_result_perceptron = perceptron_trained.predict(test_data)

# confusion matrix
cm = confusion_matrix(true_class, test_result_perceptron)
accuracy_perceptron = sum(np.diag(cm)) / len(true_class) * 100

neigh = KNeighborsClassifier(metric="manhattan")
nn_fitted = neigh.fit(train_data, train_class)

nn_predict = nn_fitted.predict(test_data)
confu_mat_nn = confusion_matrix(true_class, nn_predict)

## calculating accuracy
accuracy_nn = sum(np.diag(confu_mat_nn)) / len(true_class) * 100
print("Perceptron: {0:.2f}% correct".format(accuracy_perceptron))
Пример #23
0
y_test = y_test.replace('ALL', 0)
y_test = y_test.replace('AML', 1)

x_train = x_train.iloc[0:-1, 1:-1]
y_train = y_train.iloc[0:-1, 1]
x_test = x_test.iloc[0:-1, 1:-1]
y_test = y_test.iloc[0:-1, 1]

#data scaling
sc = StandardScaler()
sc.fit(x_train)
sc.fit(x_test)
x_train_scale = sc.transform(x_train)
x_test_scale = sc.transform(x_test)

ppn = perceptron.Perceptron(n_iter=1000, eta0=0.1, random_state=20)
ppn.fit(x_train, y_train)

pred = ppn.predict(x_test)

A = accuracy_score(pred, y_test)
M = confusion_matrix(y_test, pred)

print 'Accuracy for Perceptron using unscaled features: '
print A

print 'Confusion Matrix : '
print M

ppn.fit(x_train_scale, y_train)
Acc = accuracy_score(ppn.predict(x_test_scale), y_test)
Пример #24
0
]

# dataset for prediction
real_X = [[190, 70, 43], [184, 84, 44], [198, 92, 48], [183, 83, 44],
          [166, 47, 36], [170, 60, 38], [172, 64, 39], [182, 80, 42],
          [180, 80, 43]]
real_Y = [
    'male', 'male', 'male', 'male', 'female', 'female', 'female', 'male',
    'male'
]

# classfiers with default hyperparameters
clf_tree = tree.DecisionTreeClassifier()
clf_svc = svm.SVC(gamma='auto')
clf_knn = neighbors.KNeighborsClassifier()
clf_per = perceptron.Perceptron()
clf_nb = GaussianNB()
clf_nn = MLPClassifier(max_iter=1000)
classifiers = [clf_tree, clf_svc, clf_knn, clf_per, clf_nb, clf_nn]

# training models
for classifier in classifiers:
    classifier = classifier.fit(X, Y)

# predict and compare results
preditions = [i for i in range(7)]
accuracy = [i for i in range(7)]

for i, classifier in enumerate(classifiers):
    preditions[i] = classifier.predict(real_X)
    accuracy[i] = accuracy_score(real_Y, preditions[i])
Пример #25
0
#6.3
import numpy as np
import matplotlib.pyplot as plt
import random
from sklearn.linear_model import perceptron

#Let's set up our data and our target
data = np.array([[0, 1], [0, 0], [1, 0], [1, 1]])
target = np.array([0, 0, 0, 1])

#6.4
p = perceptron.Perceptron(n_iter=100)
p_out = p.fit(data, target)
print p_out
msg = ("Coefficients: %s, Intercept: %s")
print msg % (str(p.coef_), str(p.intercept_))

#6.5
colors = np.array(['k', 'r'])
markers = np.array(['*', 'o'])
for data, target in zip(data, target):
    plt.scatter(data[0],
                data[1],
                s=100,
                c=colors[target],
                marker=markers[target])

#Need to calculate a hyperplane the straight line as it intersects with z=0
#Recall that our optimisation is solving z=m1x + m2y + c
#If we want to understand the straight line created at the intersection with the viewing plane of x and y (where z=0)
#0=m1x + m2y +c
Пример #26
0
# coding:utf-8
'''
Created on 2015年7月8日

@author: Administrator
'''
import numpy as np
from sklearn.cross_validation import train_test_split
from ANN.multilayer_perceptron import MultilayerPerceptronClassifier
from sklearn.linear_model import perceptron

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]] * 1000)
y = [0, 1, 1, 0] * 1000
X_train, X_test, y_trian, y_test = train_test_split(X, y, random_state=3)

clf = MultilayerPerceptronClassifier()
clf.fit(X_train, y_trian)
clf2 = perceptron.Perceptron()
clf2.fit(X_train, y_trian)
prediction = clf.predict(X_test)

for i, p in enumerate(prediction[:10]):
    print y_test[i], p
Пример #27
0
from sklearn.linear_model import perceptron
import numpy as np

# Function NOT
# NOT(1) = 0
# NOT(0) = 1

X = np.array([[1], [0]])
y = np.array([0, 1])

net = perceptron.Perceptron(n_iter=10, verbose=0)
net.fit(X, y)

print "Prediction:"
print "0 ->", net.predict(0)
print "1 ->", net.predict(1)
Пример #28
0
def get_perceptron_classifier(penalty_param, fitIntercept):
    return perceptron.Perceptron(
        penalty=penalty_param,
        fit_intercept=fitIntercept,
    )
Пример #29
0
			templist[x] = lmtzr.lemmatize(templist[x])
		filetext=' '.join(templist)
		#End Lemmatizing.......................................................
		data.append(filetext)
		f.close()
#At this point, data[] contains the vector of all messages , i.e., it is a list of message strings.
#Also, spam[] stores the true labels. This corresponds to Y in the training set.


CV=feature_extraction.text.CountVectorizer(stop_words=stopwords) #------------------------------------------------>The only difference!
#Note that binary is not True in prev!
vec=CV.fit_transform(data)
#Here vec stores a vector corresponding to each message. Essentially the X in our learning set.

#Build and train classifier. Also, take prior probabilities into account while making calculations.
clf = perceptron.Perceptron(n_iter=100, verbose=0, random_state=None, fit_intercept=True, eta0=0.002)
clf.fit(vec, spam)

#let us validate!
testfolder='part'+str(folder_validate)
tflist='p'+str(folder_validate)+'flist'
tf=open(tflist, 'r')
testflist=tf.read().split()
tf.close()
testdata=[]
testspam=[]
actualspam=[]
# print(testflist)
for fl in testflist:
	# print(fl)
	if 'spm' in fl:
Пример #30
0
# the problem of perceptron classifier
from sklearn.linear_model import perceptron
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

categories = ['alt.atheism', 'sci.med']
train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True)
perceptron = perceptron.Perceptron(max_iter=100)
cv = CountVectorizer()
x_train_counts = cv.fit_transform(train.data)
tfidf_tf = TfidfTransformer()
x_train_tfidf = tfidf_tf.fit_transform(x_train_counts)

perceptron.fit(x_train_tfidf, train.target)

test_docs = ['Religion is widespread, even in modern times',
             'His kidney failed', 'The pope is a controversial leader',
             'White blood cells fight off infections',
             'The reverend had a heart attack in church']


x_test_counts = cv.transform(test_docs)
x_test_tfidf = tfidf_tf.transform(x_test_counts)

pred = perceptron.predict(x_test_tfidf)

for doc, category in zip(test_docs, pred):
    print('%r => %s' % (doc, train.target_names[category]))
    help(perceptron)