示例#1
0
文件: 4.py 项目: muneebshahid/MSCS
def main():
    means = [[-1, -1], [1.0, 1.0]]
    variances = [np.random.rand]
    knn_models = [3, 5, 10]
    data_sizes = [10, 25, 50, 75, 100, 125, 150, 175, 200]
    points_per_class = 500
    data = dg.generate_gaussian_mixture(class_means=means, class_variances=np.eye(2),
                                        num_components=5, num_desired_points_per_class=points_per_class)
    class_0 = np.hstack((data[0], np.zeros((len(data[0]), 1))))
    class_1 = np.hstack((data[1], np.ones((len(data[0]), 1))))
    results_train = np.empty((len(knn_models), len(data_sizes)))
    results_test = np.empty((len(knn_models), len(data_sizes)))
    train_data_class_0, test_data_class_0 = split_train_test(class_0)
    train_data_class_1, test_data_class_1 = split_train_test(class_1)
    print 'train size, test size', len(train_data_class_1), len(test_data_class_1)
    train_data = np.vstack((train_data_class_0, train_data_class_1))
    test_data = np.vstack((test_data_class_0, test_data_class_1))
    for i, knn_model in enumerate(knn_models):
        kncs = KNeighborsClassifier(n_neighbors=knn_model)
        for j, data_size in enumerate(data_sizes):
            curr_train_class_0, curr_train_class_1 = train_data_class_0[:data_size], train_data_class_1[:data_size]
            curr_train_data = np.vstack((curr_train_class_0, curr_train_class_1))
            kncs.fit(curr_train_data[:, :2], curr_train_data[:, -1])
            predictions_train = kncs.predict(train_data[:, :2])
            predictions_test = kncs.predict(test_data[:, :2])
            results_train[i][j] = len(np.where(predictions_train != train_data[:, -1])[0]) / float(len(train_data))
            results_test[i][j] = len(np.where(predictions_test != test_data[:, -1])[0]) / float(len(test_data))

    plt.plot(data_sizes, results_test[0, :], 'r')
    plt.plot(data_sizes, results_test[1, :], 'b')
    plt.plot(data_sizes, results_test[2, :], 'g')
    plt.plot(data_sizes, results_train[0, :], 'r--')
    plt.plot(data_sizes, results_train[1, :], 'b--')
    plt.plot(data_sizes, results_train[2, :], 'g--')
    plt.show()
示例#2
0
文件: dcs_rank.py 项目: hippozhu/dcs
def knn_est_cv(X, y, clf, n_neigh):
    knn_est = KNeighborsClassifier(n_neigh, metric="manhattan", algorithm="brute")
    knn_est1 = KNeighborsClassifier(n_neigh, metric="manhattan", algorithm="brute")
    knn = KNeighborsClassifier(n_neigh, metric="euclidean", algorithm="brute")
    acc_folds = []
    for train, test in StratifiedKFold(y, 5):
        X_train = X[train]
        y_train = y[train]
        X_test = X[test]
        y_test = y[test]
        clf.fit(X_train, y_train)
        estimators = clf.estimators_
        preds_train = np.array(map(lambda e: e.predict(X_train), estimators)).T
        preds_test = np.array(map(lambda e: e.predict(X_test), estimators)).T
        preds_train_proba = np.array(map(lambda e: e.predict_proba(X_train), estimators))
        preds_test_proba = np.array(map(lambda e: e.predict_proba(X_test), estimators))
        p_train = preds_train_proba.swapaxes(0, 1)[:, :, 0]
        p_test = preds_test_proba.swapaxes(0, 1)[:, :, 0]
        acc = []
        for nn in xrange(1, n_neigh, 2):
            knn.set_params(n_neighbors=nn)
            knn_est.set_params(n_neighbors=nn)
            knn_est1.set_params(n_neighbors=nn)
            knn.fit(X_train, y_train)
            knn_est.fit(preds_train, y_train)
            knn_est1.fit(p_train, y_train)
            acc.append(
                [
                    accuracy_score(y_test, knn.predict(X_test)),
                    accuracy_score(y_test, knn_est.predict(preds_test)),
                    accuracy_score(y_test, knn_est1.predict(p_test)),
                ]
            )
        acc_folds.append(acc)
    return np.mean(acc_folds, axis=0)
def DecisionTreeClassifier(TrainData):
    features=['Month','Date','Year']
    season=['Fall','Spring','Summer','Winter']
    district=['BAYVIEW', 'CENTRAL', 'INGLESIDE', 'MISSION','NORTHERN', 'PARK', 'RICHMOND', 'SOUTHERN', 'TARAVAL', 'TENDERLOIN']
    days=['Friday', 'Monday', 'Saturday', 'Sunday', 'Thursday', 'Tuesday','Wednesday']
    time=['first','second','third']
    features2 = [x for x in range(0,24)]
    Minute=[x for x in range(100,160)]
    latitude=[x for x in range(948,964)]
    longitude=[x for x in range(2070,2083)]
    features=district+Minute+features2+season+time

    train,validation= train_test_split(TrainData, test_size=0.4)

    knn = KNeighborsClassifier()
    knn.fit(train[features],train['Category'])
    KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',metric_params=None, n_jobs=1, n_neighbors=5, p=2,weights='uniform',multilabel=True)
    predicted=np.array(knn.predict_proba(validation[features]))
    model=knn.predict(validation[features])
    model1=knn.predict(train[features])

    print "Precision is ",precision_score(validation['Category'].values.tolist(),model,average='macro')
    print "Recall is ",recall_score(validation['Category'].values.tolist(),model,average='macro')
    print "Accuracy is ", accuracy_score(validation['Category'].values.tolist(),model)
    print "Training Accuracy is ", accuracy_score(train['Category'].values.tolist(),model1)


    result=pd.DataFrame(predicted, columns=le_crime.classes_)
    result['Predicted']=model
    result.to_csv('knnProbabilities.csv', index = True, index_label = 'Id' )
示例#4
0
def knnSimulate(param):
    trainSet = SimData.simulate2Group(
        n = int(param['n']),
        p = int(param['p']),
        effect = [param['effect']] * int(param['p'])
    )
    knnFit = KNeighborsClassifier(n_neighbors=int(param['k']))
    knnFit.fit(np.array(trainSet['x']), np.array(trainSet['y']))
    testSet = SimData.simulate2Group(
        n = int(param['n']),
        p = int(param['p']),
        effect = [param['effect']] * int(param['p'])
    )
    out = OrderedDict()
    out['p'] = int(param['p'])
    out['k'] = int(param['k'])
    out['train'] = trainSet
    out['test'] = testSet
    out['resubPreds'] = knnFit.predict(trainSet['x'])
    out['resubProbs'] = knnFit.predict_proba(trainSet['x'])
    out['testPreds'] = knnFit.predict(testSet['x'])
    out['testProbs'] = knnFit.predict_proba(testSet['x'])
    out['resubTable'] = pd.crosstab(
        Series(out['resubPreds'], index=trainSet['y'].index),
        trainSet['y']
    )
    out['resubAccuracy'] = (np.sum(np.diag(out['resubTable'])) /
                            (1.0 * np.sum(np.sum(out['resubTable']))))
    out['testTable'] = pd.crosstab(
        Series(out['testPreds'], index=testSet['y'].index),
        testSet['y']
    )
    out['testAccuracy'] = (np.sum(np.diag(out['testTable'])) /
                           (1.0 * np.sum(np.sum(out['testTable']))))
    return out
示例#5
0
def knn(train,test,labels,neighbours=10, runAll=None, median=True, runPCA=True, components=80):
	print "Putting training data into matrix"
	trainM = np.mat(train)
	print "Running K nearest Neighbour, default = 10"
	knn = KNeighborsClassifier(n_neighbors=neighbours, algorithm="kd_tree")


	if(median == True):
		print "Running data through Median filter..."
		trainfilter = medianfilter(train)
		knn.fit(trainfilter,labels)
		result = knn.predict(trainfilter)
		print "Writing to output file output.knn-kdtree-median.csv\n"
		fwrite(result,fname='output.knn-kdtree-median.csv')
		return(0)

	if (runPCA==True):
			trainReduce, testReduce = pca(train,test,components)
			knn.fit(trainReduce,labels)	#print at beginning of this
			result = knn.predict(testReduce)
			print "Writing output to file output.knn-kdtree-pca.csv\n"
			fwrite(result,fname='output.knn-kdtree-pca.csv')
	


	
	print "Running without PCA\n"
	knn.fit(trainM,labels) #need this here - might as well print
	result = knn.predict(test)
	print "Writing output to file output.knn-kdtree.csv\n"
	fwrite(result,fname="output.knn-kdtree.csv")
示例#6
0
文件: 6.py 项目: muneebshahid/MSCS
def main():
    init_means = [-1, 1]
    mean_dimensions = 10
    points_per_class = 250
    knc = KNeighborsClassifier(n_neighbors=5)
    means = [[init_mean for mean_dim in range(mean_dimensions)]for init_mean in init_means]
    variances = [np.eye(len(means[0])), np.eye(len(means[0]))]
    data = dg.generate_prob_mixture(class_means=means, class_variances=variances, num_components=5,
                                    num_desired_points=points_per_class, dim_uniform=2)
    class_0 = np.hstack((data[0], np.zeros((len(data[0]), 1))))
    class_1 = np.hstack((data[1], np.ones((len(data[0]), 1))))
    train_data_class_0, test_data_class_0 = split_train_test(class_0)
    train_data_class_1, test_data_class_1 = split_train_test(class_1)
    train_data = np.vstack((train_data_class_0, train_data_class_1))
    test_data = np.vstack((test_data_class_0, test_data_class_1))
    corr_ranked_features, _ = CorrelationCoefficient.rank_features(train_data[:, :-1], train_data[:, -1])
    relief_ranked_features, _ = Relief.rank_features(train_data[:, :-1], train_data[:, -1])

    knc.fit(train_data[:, :-1], train_data[:, -1])
    pred = pred_test_default = knc.predict(test_data[:, :-1])
    print len(np.where(pred != test_data[:, -1])[0])

    corr_train_removed_features = remove_features(train_data[:, :-1], corr_ranked_features)
    corr_test_removed_features = remove_features(test_data[:, :-1], corr_ranked_features)
    knc.fit(corr_train_removed_features, train_data[:, -1])
    pred = knc.predict(corr_test_removed_features)
    print len(np.where(pred != test_data[:, -1])[0])

    corr_train_removed_features = remove_features(train_data[:, :-1], relief_ranked_features)
    corr_test_removed_features = remove_features(test_data[:, :-1], relief_ranked_features)
    knc.fit(corr_train_removed_features, train_data[:, -1])
    pred = knc.predict(corr_test_removed_features)
    print len(np.where(pred != test_data[:, -1])[0])
    return
class KNN_strings(object):
    '''
    classdocs
    '''

    def __init__(self, n_neighbors=1):
        '''
        Constructor
        '''
        self.dsr = DatasetReader()
        self.fenc = FreemanEncoder()
        self.data = []
        self.knn = KNeighborsClassifier(n_neighbors=n_neighbors, algorithm='auto', metric=self.lev_metric)
        
    def lev_metric(self, x, y):
        i, j = int(x[0]), int(y[0])     # extract indices
#         if self.data[i] == self.data[j]:
#             print self.data[i], self.data[j], edit_dist(self.data[i], self.data[j])
        return edit_dist(self.data[i], self.data[j])
    
    def knn_train(self, dataset, cv=1, datasplit=0.7):
        
        images_dataset= self.dsr.read_dataset_images(dataset)
        freeman_code_dict = self.fenc.encode_freeman_dataset(images_dataset)
        _, codes, labels = self.dsr.gen_labelled_arrays(freeman_code_dict)
        
        self.data = codes
        
        X = np.arange(len(self.data)).reshape(-1, 1)
        
        if cv <= 1:
            self.knn.fit(X, labels)
        elif cv > 1:
            cv_result = cross_validation.cross_val_score(self.knn, X, labels, cv=cv)
            print cv_result
            
        print 'Training Done!'
            
    def knn_predict(self, test_data, score=False):
        images_dataset= self.dsr.read_dataset_images(test_data)
        freeman_code_dict = self.fenc.encode_freeman_dataset(images_dataset)
        _, codes, labels = self.dsr.gen_labelled_arrays(freeman_code_dict)
        
        X_pred = np.arange(len(codes)).reshape(-1, 1)
        predictions = self.knn.predict(X_pred)
            
        if score == True:
            accuracy = self.knn.score(X_pred, labels)
            print "Test Accuracy: ", accuracy
        
        return predictions
    
    def knn_predict_one(self, test_image):
        image_code = self.fenc.encode_freeman(test_image)
        print image_code
        data = [image_code]
        X_pred = np.arange(len(data)).reshape(-1, 1)
        prediction = self.knn.predict(X_pred)
    
        return prediction
示例#8
0
def knnRun(folds,train,test,features):
	'''
	Performs random forest with cross validation based upon a list of features chosen
	from the data set.
	'''
	from sklearn.neighbors import KNeighborsClassifier
	clf = KNeighborsClassifier(n_neighbors = 5)
	performanceList = []
	RFfit = []
	for m in range(1,folds+1):
		#Generate this fold's test and train
		foldTrain = train[train['fold'] != m]
		foldTest = train[train['fold'] == m]
		#fit the train for this fold	
		RFfit.append(clf.fit(foldTrain[features],foldTrain['grantstatus']))
		#Test the fit against the fold's test
		Response = clf.predict(foldTest[features]) == foldTest['grantstatus']
		foldTest['Response'] = Response
		#Out of the response groupby grantid so we get one record for each grantid, and
		#sum the result and divide by the total number of records in a single grantid
		resultArray = foldTest['Response'].groupby(foldTest['grantid']).sum().apply(float)/foldTest.groupby(foldTest['grantid']).size().apply(float)
		#append performance for this fold to a list
		performanceList.append(sum(resultArray)/len(resultArray))
	#	print 'In Sample Performance: for fold %d: %f' % (m, np.mean(performanceList))
	Bestfitfold = performanceList.index(max(performanceList)) + 1
	BestfitTrain = train[train['fold'] != Bestfitfold]
	Bestfit = clf.fit(BestfitTrain[features],BestfitTrain['grantstatus'])
	testResponse = clf.predict(test[features]) == test['grantstatus']
	test['Response'] = testResponse
	#Out of the response groupby grantid so we get one record for each grantid, and
	#sum the result and divide by the total number of records in a single grantid
	resultArray = test['Response'].groupby(test['grantid']).sum().apply(float)/test.groupby(test['grantid']).size().apply(float)
	#append performance for this fold to a list
	testResult = sum(resultArray)/len(resultArray)	
	return Bestfit, np.mean(performanceList), testResult
def supervisedTest01():
	import numpy as np
	from sklearn import datasets
	iris = datasets.load_iris()
	iris_X = iris.data   #iris_X 是150*4的特征(二维矩阵)
	iris_Y = iris.target #iris_Y 是150*1的label (1维向量)

	#print len(iris_X)
	#print len(iris_Y)
	#print np.unique(iris_Y) #这是获得label的种类, 这里是一共3类

	np.random.seed(0)
	indices = np.random.permutation(len(iris_X)) #获得0-149的一个全排列
	#print indices

	iris_x_train = iris_X[indices[:-10]] #这里是获得从开始到倒数第十的数据
	iris_y_train = iris_Y[indices[:-10]] #获得与iris_x_train对应的label
	iris_x_test  = iris_X[indices[-10:]] #这是获得最后的10组数据作为test数据
	iris_y_test  = iris_Y[indices[-10:]] ##获得与iris_x_test对应的label

	from sklearn.neighbors import KNeighborsClassifier
	knn = KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
               metric_params=None, n_neighbors=3, p=2, weights='uniform')
	knn.fit(iris_x_train, iris_y_train) #其实这里输入的就是train_x 和train_y
										#算法待理解
	print knn.predict(iris_x_test)
	print iris_y_test
示例#10
0
 def startknn(self):
     temp=[]
     ff=open("knnout.txt","w+")
     if self.wbr==False :
         from sklearn.neighbors import KNeighborsClassifier
         neigh = KNeighborsClassifier(n_neighbors=self.k)
         neigh.fit(self.tr,self.classlabels)
         for i in self.te:
             temp.append(i)
         print neigh.predict(temp)
     elif self.ngraph==False:
         from sklearn.neighbors import KNeighborsClassifier
         neigh = KNeighborsClassifier(n_neighbors=self.k)
         neigh.fit(self.tr,self.classlabels)
         for i in self.te:
             temp.append(i)
         print>>ff,neigh.predict(temp)
         A = neigh.kneighbors_graph(self.tr)
         print A
     else:
         a=train(self.tr,self.classlabels,self.k)
         out=open("output.txt","w+")
         for i in self.te:
             print>>out,i,classify(a,i,distance_fn=self.dm)
     print "-------KNN --------Done------------"
class KNNClassifier:
	
	def __init__(self):
		"""
		This is the constructor for the KNN Classifier
		"""
		self.outputHeader = "#knn"
		self.clf = None
		self.n_neighbors = 5
		self.weights = "uniform"
		self.algorithm = "auto"

	def buildModel(self):
		"""
		This builds the model of the KNN Classifier
		"""
		self.clf = KNeighborsClassifier(n_neighbors=self.n_neighbors,
					 	weights = self.weights, algorithm=self.algorithm)

	def setNeighbors(self, param):
		"""
		This sets the n neighbor for the KNN Classifier.
		"""
		self.n_neighbors = param

	def setAlgorithm(self, param):
		"""
		This sets the algorithm parameter for the KNN Classifier
		"""
		if param in ["auto", "ball_tree", "kd_tree", "brute"]:
			self.algorithm = param
		else:
			print "unknown parameter defaulting to auto."

	def setWeights(self, param):
		"""
		This sets the weights parameter for KNN Classifier 
		"""
		self.weights = param

	def trainKNN(self,X, Y):
		"""
		Training the KNN Classifier
		"""
		self.clf.fit(X, Y)

	def validateKNN(self,X, Y):
		"""
		Validate the KNN Classifier
		"""
		YPred = self.clf.predict(X)
		print accuracy_score(Y, YPred)

	def testKNN(self,X, Y):
		"""
		Test the KNN Classifier
		"""
		YPred = self.clf.predict(X)
		print accuracy_score(Y, YPred)
示例#12
0
def runKNNSimulation(dataTrain, dataTest, holdout, train_M, test_M, hold_M):
    outFile = open('knnLog25.txt','a')
    print 'running mashable knn simulation'
    outFile.write('train==> %d, %d \n'%(train_M.shape[0],train_M.shape[1]))
    outFile.write('test==>  %d, %d \n'%(test_M.shape[0],test_M.shape[1]))
    with SimpleTimer('time to train', outFile):
        clf = KNeighborsClassifier(weights='distance', ).fit(train_M, dataTrain.target)
    plot_learning_curve(clf, 'knn with %d neighbors' , train_M, dataTrain.target, cv=5, n_jobs=4)
    
    baseScore = clf.score(test_M, dataTest.target)
    baseParams = clf.get_params(True)
    baseNeighbors = baseParams['n_neighbors']
    print 'baseline score %.3f base n_neighbors %d' % (baseScore, baseNeighbors)
    outFile.write('baseline score %.3f base height %d \n' % (baseScore, baseNeighbors))
    
    res = []
    with SimpleTimer('time to fine tune number of neighbors', outFile):
        for neighbors in range(2,baseNeighbors * 10):
#             print 'training for neighbors %d' % neighbors
            clf = KNeighborsClassifier(n_neighbors=neighbors, weights='distance').fit(train_M, dataTrain.target)
            score = clf.score(hold_M, holdout.target)
            res.append((score, neighbors))
            outFile.write('%d %.3f \n' % (neighbors, score))
    res = sorted(res, key=lambda x:x[0], reverse=True)
    print res[:5]
    bestNeighbors = res[0][1]
    print ('best number of neighbors is %d' % bestNeighbors)
    outFile.write('best number of neighbors is %d  and score is %.3f\n' % (bestNeighbors, res[0][0]))
    
    bestClf = KNeighborsClassifier(n_neighbors=bestNeighbors, weights='distance')
    bestClf.fit(train_M, dataTrain.target)
    
    predicted = bestClf.predict(test_M)
    trainPredict = bestClf.predict(train_M)
    print 'testing score'
    outFile.write('testing score')
    outputScores(dataTest.target, predicted, outFile)
    print 'training score'
    outFile.write('testing score')
    outputScores(dataTrain.target, trainPredict, outFile)
    
    results = predicted == dataTest.target
    print numpy.mean(results)
    res = []
    for i in range(len(results)):
        if not results[i]:
            res.append(i)
    print 'classifier got these wrong:'
    for i in res[:10]:
        print dataTest.data[i], dataTest.target[i]
        outFile.write('%s %d \n' % (dataTest.data[i], dataTest.target[i]))
    '''
    train_sizes, train_scores, valid_scores = learning_curve(DecisionTreeClassifier(), train_M, dataTrain.target, train_sizes=[50, 80, 110], cv=5)
    print train_sizes
    print train_scores
    print valid_scores
    '''
       
    plot_learning_curve(bestClf, 'knn with %d neighbors' % bestNeighbors, train_M, dataTrain.target, cv=5, n_jobs=4)
def predictAction(testMoments, trainMoments, trainLabels):
    neigh = KNeighborsClassifier(n_neighbors=5)
#neigh.fit([trainMHI[:,:,i].flatten() for i in xrange(20)], trainLabels)
#print(neigh.predict([testMHI.flatten()]))
    neigh.fit(trainMoments, trainLabels)
    print(neigh.predict(testMoments))
    print(neigh.predict(trainMoments))
    print(neigh.predict_proba(trainMoments))
    return neigh.predict(testMoments)
示例#14
0
def knn():
    
    X = [[125,1], [200,0], [70,0], [240,1], [114,0], [120,0], [264,1], [85,0], [150,0], [90,0]]
    y = [ 0, 0, 0, 0, 1, 0, 0, 1, 0,1 ]
    model = KN(n_neighbors=3, weights='distance')
    model.fit(X,y)
    print '80k, Single, HO=Y', model.predict([80,1])
    print '98k, Single, HO=N', model.predict([98,0])
    print '130k, Married, HO=N', model.predict([260, 0])
示例#15
0
def knn(X_vectors, t):
    # leave-one-out strategy to get average accuracy
    n = len(t)
    true_num = 0
    for i in range(n):
        X_train = list(X_vectors)
        del X_train[i]
        t_train = list(t)
        del t_train[i]
        X_test = X_vectors[i]
        t_test = t[i]

        clf = KNeighborsClassifier(n_neighbors=5)
        clf.fit(X_train, t_train)
        y = clf.predict(X_test)
        if y == t_test:
            true_num += 1
    accuracy = 1.0 * true_num / n

    # 8/2 split
    X = np.array(X_vectors)
    tt = list(t)
    pre = []
    rec = []
    for _ in range(100):
        X_train, X_test, t_train, t_test = train_test_split(X, tt, test_size=0.2)
        clf = KNeighborsClassifier(n_neighbors=5)
        clf.fit(X_train, t_train)
        y_test = clf.predict(X_test)
        t_pos = 0
        f_pos = 0
        t_neg = 0
        f_neg = 0
        for i in range(len(y_test)):
            if t_test[i] == 1 and y_test[i] == 1:
                t_pos += 1
            elif t_test[i] == 0 and y_test[i] == 1:
                f_pos += 1
            elif t_test[i] == 0 and y_test[i] == 0:
                t_neg += 1
            elif t_test[i] == 1 and y_test[i] == 0:
                f_neg += 1

            if t_pos == 0:
                precision = 0
                recall = 0
            else:
                precision = 1.0 * t_pos / (t_pos + f_pos)
                recall = 1.0 * t_pos / (t_pos + f_neg)
            pre.append(precision)
            rec.append(recall)

    pre = sum(pre) / len(pre)
    rec = sum(rec) / len(rec)
    F = 2 / (1/pre + 1/rec)

    return accuracy, pre, rec, F
示例#16
0
    def knn_mesh_fitter(self):

        # get the minimum and maximum values for each of the predictor variables
        v1_min, v1_max = np.min(self.X[:,0]), np.max(self.X[:,0])
        v2_min, v2_max = np.min(self.X[:,1]), np.max(self.X[:,1])

        # get the range of each variable
        v1_range = v1_max - v1_min
        v2_range = v2_max - v2_min

        # set up the min and max ranges of the axes of the plot
        # I add a buffer here (1/15th of the range) so no points are on the axes
        self.x_min = v1_min - (v1_range/self.buffer_denom)
        self.x_max = v1_max + (v1_range/self.buffer_denom)

        self.y_min = v2_min - (v2_min/self.buffer_denom)
        self.y_max = v2_max + (v2_range/self.buffer_denom)

        # use the numpy meshgrid function to make a bunch of points across the range
        # of values.

        self.xx, self.yy = np.meshgrid(np.linspace(self.x_min, self.x_max,
                                                   self.granularity),
                                       np.linspace(self.y_min, self.y_max,
                                                   self.granularity))

        # meshgrids:
        self.Zs = {'uniform':{},
                   'distance':{}}

        for nn in self.nn_range:
            # fit a knn on the data with the nearest neighbors number passed into the function
            knn_mod_euc = KNeighborsClassifier(n_neighbors=nn, weights='uniform')
            knn_mod_euc.fit(self.X, self.y)

            knn_mod_w = KNeighborsClassifier(n_neighbors=nn, weights='distance')
            knn_mod_w.fit(self.X, self.y)

            # Predict using the knn model on all the meshgrid points. This will let us see
            # the knn boundary of where it predicts between one class and another!
            Z = knn_mod_euc.predict(np.c_[self.xx.ravel(), self.yy.ravel()])
            Z = Z.reshape(self.xx.shape)
            self.Zs['uniform'][nn] = Z

            Z = knn_mod_w.predict(np.c_[self.xx.ravel(), self.yy.ravel()])
            Z = Z.reshape(self.xx.shape)
            self.Zs['distance'][nn] = Z

        if len(np.unique(self.X[:,0]))+50 < self.X.shape[0]:
            self.v1_points = self.rand_jitter(self.X[:,0])
        else:
            self.v1_points = self.X[:,0]

        if len(np.unique(self.X[:,1]))+50 < self.X.shape[0]:
            self.v2_points = self.rand_jitter(self.X[:,1])
        else:
            self.v2_points = self.X[:,1]
def predictAction(testMoments, trainMoments, trainLabels):
    neigh = KNeighborsClassifier(n_neighbors=5)
    # neigh.fit([trainMHI[:,:,i].flatten() for i in xrange(20)], trainLabels)
    # print(neigh.predict([testMHI.flatten()]))
    #    trainMoments_norm = trainMoments/np.linalg.norm(trainMoments) #normalize(trainMoments, axis=1)
    #    testMoments_norm = testMoments/np.linalg.norm(trainMoments) #normalize(testMoments, axis=1)
    neigh.fit(trainMoments, trainLabels)
    print(neigh.predict(trainMoments))
    #    print(neigh.predict_proba(trainMoments))
    return neigh.predict(testMoments)
示例#18
0
def knn(X_train, y_train, X_test, y_test):
    cpu_count = multiprocessing.cpu_count()
    knn = KNeighborsClassifier(n_neighbors=1, n_jobs=max(1, cpu_count // 3))
    knn.fit(X_train, y_train)

    y_pred_train = knn.predict(X_train)
    acc_train = skl_metrics.accuracy_score(y_true=y_train, y_pred=y_pred_train)
    y_pred_test = knn.predict(X_test)
    acc_test = skl_metrics.accuracy_score(y_true=y_test, y_pred=y_pred_test)

    return acc_train, acc_test
示例#19
0
文件: 3.py 项目: sandeepgithubrepo/ML
def apply_knn(k_set,traing_data,train_labels,test_data,test_labels):
    misclass = []
    for k in k_set:
        # p = 2 and metric='minkowski' combination uses euclidean distance 
        knn_train_test = KNeighborsClassifier(n_neighbors=k, weights='uniform',p=2, metric='minkowski',)
        knn_train_test.fit(traing_data, train_labels)
        knn_train_test.predict(test_data)
        acc = knn_train_test.score(test_data,test_labels)
        misclass.append(round(1-acc,3))
            
    return misclass
示例#20
0
def expAlgorithm(X,y,X_val, y_val,p):
    '''
        FOR FUTURE REFERENCE:
        "train" refers to the data set which is trained
        "test" refers to the untrained portion of the data set on which the training is validated
        "quiz" refers to the unlabeled points which we attempt to label and then submit to kaggle
    '''
    start = time.time()

    """
    
    X = data[train_start_idx:train_end_idx,0:-1]
    y = [lbl for lbl in data[train_start_idx:train_end_idx,-1]]
    """
    print('Received data, took this many seconds: ' + str(time.time() - start))
    # Training classifier

    # TODO: ExtraTreesClassifier

    clf1 = KNeighborsClassifier(       n_neighbors=5,
                                        weights = "distance",
                                        algorithm= "auto"
                                        
                                  )
   # fit sub-classifiers
    clf1.fit(X,y)
    # pickle.dump(clf1, open('experimental_classifier.pickle', 'wb'))

    # fit voting classifier

    # predict & calculate training error
    y_hat = clf1.predict(X)
    train_err = 0
    for yi, y_hati in zip(y, y_hat):
        train_err += (yi != y_hati)
    train_err = float(train_err)/float(len(y))
    
    print("Train err: " + str(train_err))

    print("Beginning test validation...")
  
        
    y_val_hat = clf1.predict(X_val)
    test_err = 0
    for yi, y_hati in zip(y_val, y_val_hat):
        test_err += (yi != y_hati)
    print(len(y_val))
    print(len(y_val_hat))

    test_err = float(test_err)/float(len((y_val))) 
    print("Test error: " + str(test_err))
    
    return test_err
示例#21
0
文件: test.py 项目: zedoul/air
def classifier(train,test,train_target,test_target):

    
    kclass = KNeighborsClassifier(n_neighbors=13,algorithm='kd_tree',weights='uniform',p=1)
    kclass.fit(train,train_target)
    res = kclass.predict(train)
    
    print classification_report(train_target,res)
    
    res1 = kclass.predict(test)
    print classification_report(test_target,res1)
    return kclass
示例#22
0
def predict_author(arr, yazar_features, yazar_classes):
    results = []

    print "\n[DEBUG] K-NN result (neighbors: 10)"
    knn = KNeighborsClassifier(n_neighbors=10)
    knn.fit(yazar_features, yazar_classes)
    print knn.predict(arr)
    results.append(knn.predict(arr)[0])

    print "\n[DEBUG] SVC result (linear) (degree=3)"
    svc = svm.SVC(kernel='linear', degree=3)
    svc.fit(yazar_features, yazar_classes)
    print svc.predict(arr)
    results.append(svc.predict(arr)[0])

    print "\n[DEBUG] Logistic Regression result ()"
    regr = linear_model.LogisticRegression()
    regr.fit(yazar_features, yazar_classes)
    print regr.predict(arr)
    results.append(regr.predict(arr)[0])

    print "\n[DEBUG] Gaussian Naive Bayes"
    gnb = GaussianNB()
    gnb.fit(yazar_features, yazar_classes)
    print gnb.predict(arr)
    results.append(gnb.predict(arr)[0])

    print "\n[DEBUG] Decision Tree Classifier"
    dtc = tree.DecisionTreeClassifier()
    dtc.fit(yazar_features, yazar_classes)
    print dtc.predict(arr)
    results.append(dtc.predict(arr)[0])

    print "\n[DEBUG] Gradient Boosting Classification"
    gbc = GradientBoostingClassifier()
    gbc.fit(yazar_features, yazar_classes)
    print gbc.predict(arr)
    results.append(gbc.predict(arr)[0])

    # output = open('features.pkl', 'wb')
    # pickle.dump(yazar_features, output)
    # output.close()

    # output = open('classes.pkl', 'wb')
    # pickle.dump(yazar_classes, output)
    # output.close()

    # test_yazar_features = []        # for test data
    # test_yazar_classes = []         # for test classes
    # # yazar_features = []             # for train data
    # # yazar_classes = []              # for train classes

    return results
示例#23
0
def knnclassifier(train, train_target, test, test_target, k):
    classif = KNeighborsClassifier(n_neighbors = k,algorithm='kd_tree',weights='uniform',p=1)
    classif.fit(train,train_target)
    res = classif.predict(train)
    
    print '*************************** knn ****************'
    print classification_report(train_target,res)
    
    res1 = classif.predict(test)
    
    print classification_report(test_target, res1)
    return classif
def get_iris_dataset():
    iris_dataset = load_iris()

    #1. The format of the dataset
    print("Keys of iris_dataset: \n{}".format(iris_dataset.keys()))
    print("Target names: {}".format(iris_dataset['target_names']))
    print("Feature names: \n{}".format(iris_dataset['feature_names']))
    # data -> numpy.ndarray
    # row -> the labels
    # column -> the features
    print("Type of data: {}".format(iris_dataset['data'].shape))     # (150,4)
    print("Type of target: {}".format(iris_dataset['target'].shape)) # (150,)

    #import pdb; pdb.set_trace()

    #2. split the dataset into training set and testing set
    # y = f(X)
    X_train, X_test, y_train, y_test = train_test_split(iris_dataset['data'], iris_dataset['target'],test_size=0.2, random_state=0)
    print("X_train shape: {}".format(X_train.shape))
    print("y_train shape: {}".format(y_train.shape))

    print("X_test shape: {}".format(X_test.shape))
    print("y_test shape: {}".format(y_test.shape))


    #import pdb; pdb.set_trace()

    # 3. inspect the data - virtualize it
    # convert Numpy array int oa pandas DataFrame
    iris_dataframe = pd.DataFrame(X_train, columns=iris_dataset.feature_names)

    # pdb; pdb.set_trace()
    grr = pd.scatter_matrix(iris_dataframe, c=y_train, figsize=(15,15), marker='o', hist_kwds={'bins':20}, s=60, alpha=.8, cmap=mglearn.cm3)
    plt.show()


    #import pdb; pdb.set_trace()

    # The modelu
    from sklearn.neighbors import KNeighborsClassifier
    knn = KNeighborsClassifier(n_neighbors=1)
    # build the model on the training set
    knn.fit(X_train, y_train)

    # the prediction
    X_new = np.array([[5, 2.9, 1, 0.2]])
    prediction = knn.predict(X_new)
    print("Prediction: {}".format(prediction))
    print("Predicted target name: {}".format(iris_dataset['target_names'][prediction]))

    y_pred = knn.predict(X_test)
    print("Test set predictions:\n {}".format(y_pred))
    print("Test set score: {:.2f}".format(np.mean(y_pred==y_test)))
示例#25
0
def train_and_test(X_train, y_train, X_test, y_test, distance, n_neighbors):
  knn = KNeighborsClassifier(n_neighbors=n_neighbors,
                             algorithm='auto', metric=distance)

  knn.fit(X_train, y_train)

  y_pred = knn.predict(X_train)
  train_acc = '%.2f' % (accuracy_score(y_train, y_pred) * 100)

  y_pred = knn.predict(X_test)
  test_acc = '%.2f' % (accuracy_score(y_test, y_pred) * 100)

  return train_acc, test_acc
示例#26
0
def classify(train_feature_matrix, train_classes, test_features, classifier):
    if classifier == 1:
        model = KNeighborsClassifier(23, weights='distance', p=1)
        model.fit(train_feature_matrix, train_classes.ravel())
        prediction = model.predict(test_features)[0]
    elif classifier == 2:
        model = VotingClassifier(estimators=[
            ('knn1', KNeighborsClassifier(10, weights='distance', p=1)),
            ('knn3', KNeighborsClassifier(30, weights='distance', p=1)),
            ('knn4', KNeighborsClassifier(50, weights='distance', p=1)),
            ('knn4', KNeighborsClassifier(70, weights='distance', p=1)),
            ('knn5', KNeighborsClassifier(90, weights='distance', p=1))],
            voting='soft')
        model.fit(train_feature_matrix, train_classes.ravel())
        prediction = model.predict(test_features)[0]
    elif classifier == 3:
        model = GaussianNB()
        model.fit(train_feature_matrix, train_classes.ravel())
        prediction = model.predict(test_features)[0]
    elif classifier == 4:
        model = SGDClassifier(loss='modified_huber', class_weight='balanced', penalty='l1')
        model.fit(train_feature_matrix, train_classes.ravel())
        prediction = model.predict(test_features)[0]
    elif classifier == 5:
        model = RandomForestClassifier()
        model.fit(train_feature_matrix, train_classes.ravel())
        prediction = model.predict(test_features)[0]
    elif classifier == 6:
        model = DecisionTreeClassifier()
        model.fit(train_feature_matrix, train_classes.ravel())
        prediction = model.predict(test_features)[0]
    elif classifier == 7:
        model = KNeighborsClassifier()
        model.fit(train_feature_matrix, train_classes.ravel())
        prediction = model.predict(test_features)[0]
    elif classifier == 8:
        model = SGDClassifier()
        model.fit(train_feature_matrix, train_classes.ravel())
        prediction = model.predict(test_features)[0]
    elif classifier == 9:
        model = AdaBoostClassifier(n_estimators=100)
        model.fit(train_feature_matrix, train_classes.ravel())
        prediction = model.predict(test_features)[0]
    elif classifier == 10:
        model = SVC(kernel='precomputed')
        model.fit(gram(train_feature_matrix, train_feature_matrix), train_classes.ravel())
        prediction = model.predict(gram(train_feature_matrix, test_features))[0]
    
    return genre_from_int(prediction)
示例#27
0
def main(output=RESULTS1B):
    """
    Using 1 nearest neighbor, predicts NYC Taxi trip times based on feature 
    vectors (pickup latitude, pickup longitude, dropoff latitude, dropoff latitude). 

    Tests on a subset of trip_data_1.csv

    Uses sklearn to implement nearest neighbors
    """
    features = ['pickup_latitude', 'pickup_longitude', 'dropoff_latitude', 
               'dropoff_longitude', 'trip_time_in_secs']

    ## Extract necessary data into pandas dataframes
    numrows = 100000
    df_train_read = pd.read_csv(TRAIN_DATA)
    df_test_read = pd.read_csv(TRIP_DATA_1, nrows = numrows)    # first 100k rows, for speed
    df_test = df_test_read[features].dropna()
    df_train = df_train_read[features].dropna() 


    ## Use sklearn to run nearest neighbors
    k = 1 
    clf = KNeighborsClassifier(n_neighbors=k)                   # default distance metric: euclidean
    clf.fit(df_train[features[0:4]], df_train[features[-1]])
    preds = clf.predict(df_test[features[0:4]])

    # # Calculate statistics (Root Mean Squared Error, Correlation Coefficient, Mean Absolute Error)
    print "Calculating statistics"
    with open(output, "a+") as outputFile:
        outputFile.write("Ran knn with k={}".format(k) + \
            " Trained on {}. Tested on first".format(TRAIN_DATA) + \
            " {} rows of {}. Stats:".format(numrows, TRIP_DATA_1))
    calcAndLogStats( numpy.array(preds), 
                     numpy.array(df_test[features[-1]]), 
                     output=output)
示例#28
0
def kann_classify(train_data,train_label,test_data):  
      
    knnClf=KNeighborsClassifier(n_neighbors=5)
    knnClf.fit(train_data,ravel(train_label))  
    test_label=knnClf.predict(test_data)  
    save_result(test_label,'sklearn_knn_Result.csv')  
    return test_label  
示例#29
0
文件: knn.py 项目: kbai/uss
def main():
    print("k nearest neighbours classifier!")

    X,Y,Xtest = importdata()
    print(Y.shape)
    param_grid={
            "n_neighbors":[10,20,50,100,200],
            "algorithm":['auto','ball_tree','kd_tree','brute'],
            "weights":['uniform','distance']
            }

    knn = KNeighborsClassifier() 
    Gridsearch_impl(X,Y,knn,param_grid,5)

#    for i in range(10,11,5):
#        clf = DecisionTreeClassifier(min_samples_split=i)
#        rf = RandomForestClassifier(n_estimators = 100,random_state=0,min_samples_split=i)
#        ab = AdaBoostClassifier(rf,n_estimators = 10)
        #ab = GradientBoostingClassifier(n_estimators = 100)
#        score = cross_validation.cross_val_score(ab,X,Y,cv=3)
      #  print(score)
      #  print("average score %f"%np.mean(score))
      #  print("std %f"%np.std(score))
      #  ab.fit(X,Y)
   


    Ytest = knn.predict(Xtest)
    output(Ytest,'submit3.csv')
示例#30
0
    def analyze_image(self):
        '''
        Load the image and analyze it with KNN

        im_file - pre-processed with histogram specification
        '''

        if self._avg_pixels.size == 0:
            self._process_annotations()        
            self._get_initial_classes()
        
        im = self._image
        rows = im.shape[0]

        clf = KNeighborsClassifier(n_neighbors = self._n_neighbors)
        clf.fit(self._avg_pixels, self._labels)

        im_1d = im.reshape(-1, 3)

        # calculate prediction reshape into image
        prediction = clf.predict(im_1d)
        prediction = prediction.reshape(rows, -1)

        prediction [self._mask == 0] = Labels.Masked
        self.display_current(prediction)
        return prediction
示例#31
0
文件: KNN.py 项目: Khidora/ML
    mobile_data = pd.read_csv('clearDataset.csv')

    X, Y = mobile_data.drop(['price_range'],
                            axis=1), mobile_data['price_range']

    x_train, x_test, y_train, y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.33,
                                                        random_state=42)

    x_1_train = copy.deepcopy(x_train)
    x_train = pd.concat([x_train, y_train], axis=1)

    num_neighbors = 5
    n_folds = 5
    model = KNN(num_neighbors, n_folds)
    # scores = model.evaluate_algorithm(x_train.values, model.k_nearest_neighbors, n_folds, num_neighbors)
    scores2 = model.fit(x_train, y_train)
    y_pred = model.predict(x_train, x_test)
    print(f'CV scores: {scores2}')
    mn = sum(scores2) / float(len(scores2))
    print(f'Train data accuracy: {mn}')

    test_score = model.accuracy_metric(y_test.values, y_pred)
    print(f'Test data accuracy: {test_score}')

    neigh = KNeighborsClassifier(n_neighbors=5)
    neigh.fit(x_1_train, y_train)
    y_pred = neigh.predict(x_test)
    print(f'Sklearn accuracy score: {accuracy_score(y_test, y_pred)}')
X_train, X_val, y_train, y_val  = train_test_split(X_train, y_train, test_size=0.25, random_state=1)
#print(X_train.shape)
#print(X_val.shape)
#print(X_test.shape)

#comparing models
logr=linear_model.LogisticRegression(solver= 'liblinear').fit(X_train, y_train)
logrPred= logr.predict(X_val)
print("LR :", accuracy_score(y_val, logrPred))

result =[]
result.append(logrPred)


knn = KNeighborsClassifier().fit(X_train, y_train)
knnPred= knn.predict(X_val)
print("knn :", accuracy_score(y_val, knnPred))
result.append(knnPred)


gus = GaussianNB().fit(X_train, y_train)
gusPred= gus.predict(X_val)
print("Gaussian :", accuracy_score(y_val, gusPred))
result.append(gusPred)

svm = SVC().fit(X_train, y_train)
svmPred = svm.predict(X_val)
print("SVC :", accuracy_score(y_val, svmPred))
result.append(svmPred)

示例#33
0
labels = []
for d in dirs:
    files = glob.glob('C:/img/dataset/' + d + '/*.jpg')
    for file in files:
        img = cv2.imread(file)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        (t, img) = cv2.threshold(img, 200, 1, cv2.THRESH_BINARY)
        imgs.append(cv2.resize(img, (45, 45)))
        labels.append(d)
for img in imgs:
    img = img.flatten()
    dataset.append(np.array(img, dtype=int))
knn = KNN(n_neighbors=1)
#print(dataset)
knn.fit(dataset, labels)

imagepath = "C:/img/1502099642.3551896('192.168.8.119', 53690)"
files1 = next(os.walk(imagepath + "/"))[2]
files1 = sorted(files1, key=lambda x: sortfiles(x))
images1 = []
for file in files1:
    img1 = cv2.resize(cv2.imread(imagepath + '/' + file), (45, 45))
    img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
    (t, img1) = cv2.threshold(img1, 200, 1, cv2.THRESH_BINARY)
    img1 = img1.flatten()
    img1 = np.array(img1, dtype=int)
    images1.append(img1)
arr = knn.predict(images1)
print(arr)
end = time.time()
print(end - start)
示例#34
0
def Question3_B(Xapp, Yapp, Xdev, Ydev):
    start_time = time()
    print("Taille de l'échantillon : ", Xdev.shape[0])
    print("Classifieur plus proches voisins")

    neigh = KNeighborsClassifier(n_jobs=-1)

    neigh.fit(Xapp, Yapp)

    print("START")
    erreur = 0
    result = neigh.predict(Xdev)
    tiBase = (time() - start_time)

    for i in range(0, Xdev.shape[0]):
        if (result[i] != Ydev[i]):
            erreur += 1

    teBase = (erreur / Xdev.shape[0])
    print("--- ", tiBase, " seconds ---")
    print("END")
    print("Taux d\'erreur : ", teBase, "\n")

    teList = []
    timeList = []
    sizeList = [i for i in range(10, 50, 10)
                ] + [i for i in range(50, Xapp.shape[1], 50)]

    for size in sizeList:
        start_time = time()

        pca = PCA(size)
        pca.fit(Xapp)
        XappPCA = pca.transform(Xapp)

        X = pca.transform(Xdev)
        Y = Ydev
        print("Taille de l'échantillon : ", X.shape[0])
        print("Classifieur plus proches voisins, ACP de taille ", size)

        neigh = KNeighborsClassifier(n_jobs=-1)
        neigh.fit(XappPCA, Yapp)

        print("START")
        erreur = 0
        result = neigh.predict(X)
        ti = (time() - start_time)

        for i in range(0, X.shape[0]):
            if (result[i] != Y[i]):
                erreur += 1

        print("--- ", ti, " seconds ---")
        print("END")
        te = (erreur / X.shape[0])
        print("Taux d'erreur : ", te, "\n")
        teList.append(te)
        timeList.append(ti)

    fig, ax1 = plt.subplots()

    ax2 = ax1.twinx()
    lgd1_1 = ax1.plot(
        sizeList,
        teList,
        'r-o',
        label="Taux d'erreur en fonction de la dimension de L'ACP")
    lgd1_2 = ax1.plot(Xdev.shape[1],
                      teBase,
                      'g-o',
                      label="Taux d'erreur sans ACP")
    lgd2_1 = ax2.plot(
        sizeList,
        timeList,
        'b-x',
        label="Temps de traitemet en fonction de la dimension de L'ACP")
    lgd2_2 = ax2.plot(Xdev.shape[1],
                      tiBase,
                      'g-x',
                      label="temps de traitement sans ACP")

    lgds = lgd1_1 + lgd1_2 + lgd2_1 + lgd2_2

    lbls = [lgd.get_label() for lgd in lgds]
    ax1.legend(lgds, lbls, loc=2)

    ax1.set_xlabel("Dimension de l'ACP")
    ax1.set_ylabel("Taux d'erreur", color='r')
    ax2.set_ylabel("Temps de traitement de l'échantillon \nEn seconde",
                   color='b')

    plt.show()
    fig.savefig("question3_B.png")
示例#35
0
def assign_labels(X_total,X_pred,y_pred):
    knn = KNeighborsClassifier(n_neighbors=1)    
    knn.fit(X_pred, y_pred)
    return knn.predict(X_total)
Y_test = test_file['Speaker'].values

#Standardising the values.
'''std_scaler_x = preprocessing.StandardScaler().fit(X_train)
std_scaler_y = preprocessing.StandardScaler().fit(y_train)
X_train_std = std_scaler_x.transform(X_train)
X_test_std = std_scaler_x.transform(X_test)
y_train_std = std_scaler_y.transform(y_train)
y_test_std = std_scaler_y.transform(y_test)'''

# Prediction

#KNN algorithm
KNN_clf= KNeighborsClassifier(n_neighbors=1)
KNN_clf.fit(X_train, Y_train)
Y_pred= KNN_clf.predict(X_test)
print Y_pred
KNN_accuracy= accuracy_score(Y_test, Y_pred)
print KNN_accuracy




Date = input_file['Date'].values
test_date = np.array(Date[456:])
datetimes = [datetime.strptime(t, "%Y-%m-%d") for t in test_date]
date = matplotlib.dates.date2num(datetimes)
hfmt = matplotlib.dates.DateFormatter('%d-%m-%Y')
fig = plt.figure()
fig.canvas.set_window_title('Moisture Prediction using Machine Learning') 
ax = fig.add_subplot(1,1,1)
示例#37
0
trainFeatures = train.drop([0], axis=1)
trainLabels   = train[0]

valFeatures = val.drop([0], axis=1)
valLabels   = val[0]

testFeatures = test.drop([0], axis=1)
testLabels   = test[0]

#----------KNN----------
print("\n\n----------KNN----------")
knnClassifier = KNeighborsClassifier()
knnClassifier.fit(trainFeatures, trainLabels)

knnTrainPredictions = knnClassifier.predict(trainFeatures)
knnTrainAccuracy = metrics.accuracy_score(knnTrainPredictions, trainLabels)
print('Train Accuracy : ', knnTrainAccuracy)

knnValPredictions = knnClassifier.predict(valFeatures)
knnValAccuracy = metrics.accuracy_score(knnValPredictions, valLabels)
print('Val Accuracy   : ', knnValAccuracy)

knnTestPredictions = knnClassifier.predict(testFeatures)
knnTestAccuracy = metrics.accuracy_score(knnTestPredictions, testLabels)
print('Test Accuracy  : ', knnTestAccuracy)

#----------SVM----------
print("\n\n----------SVM----------")
svmClassifier = svm.LinearSVC()
#svmClassifier = svm.SVC()
示例#38
0
from sklearn.neighbors import KNeighborsClassifier

# make an instance of a KNeighborsClassifier object
knn = KNeighborsClassifier(n_neighbors=1)
type(knn)

print knn

# fit the knn model. What might the function be called? Documentation...
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X, y) 

# make predictions on this input: [3, 5, 4, 2]
# Again, what might the prediction function be called for knn?
X1 = [[3, 5, 4, 2]]
print(knn.predict(X1))

# now make predictions for [3, 5, 4, 2], [5, 4, 3, 2]
X2 = [[3, 5, 4, 2], [5, 4, 3, 2]]
print(knn.predict(X2))

# confirm prediction is an numpy array
print type(knn.predict(X))

# instantiate the model (using the value K=5)
knn = KNeighborsClassifier(n_neighbors=5, weights='distance')

# fit the model with data
knn.fit(X, y)

X_new = [[3, 5, 4, 2], [5, 4, 3, 2]]
示例#39
0
def main():

    folder, image_path = sys.argv[1], sys.argv[2]
    train_loader, val_loader = get_dataloader(folder, batch_size=1)
    #draw_train_loader, draw_val_loader = get_dataloader10(folder, batch_size = 1)

    if torch.cuda.is_available():
        extractor = alexnet(pretrained=True).features.cuda()
    else:
        extractor = alexnet(pretrained=True).features

    feats_train = []
    train_y = []

    for batch, (x, label) in enumerate(tqdm(train_loader), 1):

        if torch.cuda.is_available():
            x = x.cuda()
            label = label.cuda()

        extractor.eval()
        feat = extractor(x).view(x.size(0), FEATURES_EXTRACTED, -1)
        feat = torch.mean(feat, 2)
        feat = feat.cpu().detach().numpy()
        feats_train.append(feat)
        train_y.append(label.item())

    feats_train = np.array(feats_train)
    feats_train = feats_train.reshape(6987, FEATURES_EXTRACTED)
    train_y = np.array(train_y)

    #pca_feats_train = do_pca(feats_train)
    x_train_std = StandardScaler().fit_transform(feats_train)
    pca = PCA(n_components=25).fit(x_train_std)
    pca_feats_train = pca.transform(x_train_std)

    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    knn.fit(pca_feats_train, train_y)

    # parsing valid data
    valid_y = []
    feats_valid = []
    for batch, (x, label) in enumerate(tqdm(val_loader), 1):
        if torch.cuda.is_available():
            x = x.cuda()
            label = label.cuda()

        extractor.eval()
        feat = extractor(x).view(x.size(0), FEATURES_EXTRACTED, -1)
        feat = torch.mean(feat, 2)
        feat = feat.cpu().detach().numpy()
        feats_valid.append(feat)
        valid_y.append(label.item())

    feats_valid = np.array(feats_valid)
    feats_valid = feats_valid.reshape(1526, FEATURES_EXTRACTED)

    # do pca to valid
    x_valid_std = StandardScaler().fit_transform(feats_valid)
    pca_feats_valid = pca.transform(x_valid_std)

    train_pred = knn.predict(pca_feats_train)
    val_pred = knn.predict(pca_feats_valid)

    train_accuracy = accuracy_score(train_pred, train_y)
    val_accuracy = accuracy_score(val_pred, valid_y)

    print("accuracy on training data: {}".format(train_accuracy))
    print("accuracy on validation data: {}".format(val_accuracy))
示例#40
0
    test_scores.append(knn1.score(X_test, y_test))

#training
max_train_score = max(train_scores)
train_scores_ind = [
    i for i, v in enumerate(train_scores) if v == max_train_score
]
print('Max train score {} % and k = {}'.format(
    max_train_score * 100, list(map(lambda x: x + 1, train_scores_ind))))
#testing
max_test_score = max(test_scores)
test_scores_ind = [i for i, v in enumerate(test_scores) if v == max_test_score]
print('Max test score {} % and k = {}'.format(
    max_test_score * 100, list(map(lambda x: x + 1, test_scores_ind))))

plt.figure(figsize=(12, 5))
p = sns.lineplot(range(1, 20), train_scores, marker='*', label='Train Score')
p = sns.lineplot(range(1, 20), test_scores, marker='o', label='Test Score')
plt.show()

y_pred = knn1.predict(X_test)
confusion_matrix(y_test, y_pred)
print(
    pd.crosstab(y_test,
                y_pred,
                rownames=['True'],
                colnames=['Predicted'],
                margins=True))

print(classification_report(y_test, y_pred))
示例#41
0
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))

# Evaluate each model in turn
results = []
names = []

for name, model in models:
	kfold = model_selection.KFold(n_splits=10, random_state=seed)
	cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold)
	results.append(cv_results)
	names.append(name)
	msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
	print(msg)

# Compare Algorithms
fig = plt.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(names)
plt.show()

# Make predictions on validation dataset
knn = KNeighborsClassifier()
knn.fit(X_train, Y_train)
predictions = knn.predict(X_validation)
print(accuracy_score(Y_validation, predictions))
print(confusion_matrix(Y_validation, predictions))
print(classification_report(Y_validation, predictions))
示例#42
0
print(metrics.classification_report(expected,predicted_svm))
print(metrics.confusion_matrix(expected,predicted_svm))
cm_svm=metrics.confusion_matrix(expected,predicted_svm)
cm_svm_list=cm_svm.tolist()
cm_svm_list[0].insert(0,'Real True')
cm_svm_list[1].insert(0,'Real False')
print tabulate(cm_svm_list,headers=['Real/Pred','Pred True', 'Pred False'])




K_NN=KNeighborsClassifier()

K_NN.fit(x_train,y_train.ravel())

predicted_knn=K_NN.predict(x_test)

accuracy_knn=K_NN.score(x_test,y_test)
print(accuracy_knn)
print(metrics.classification_report(expected,predicted_knn))
print(metrics.confusion_matrix(expected,predicted_knn))
cm_knn=metrics.confusion_matrix(expected,predicted_knn)
cm_knn_list=cm_knn.tolist()
cm_knn_list[0].insert(0,'Real True')
cm_knn_list[1].insert(0,'Real False')
print tabulate(cm_knn_list,headers=['Real/Pred','Pred True', 'Pred False'])




dtc=DecisionTreeClassifier(random_state=42)
示例#43
0
# -*- coding:utf-8 -*-
from sklearn.datasets import load_iris
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

iris = load_iris()
type(iris)
type(iris.data)
type(iris.target)
iris.data.shape
print(iris.DESCR)
x_train, x_test, y_train, y_test = train_test_split(iris.data,
                                                    iris.target,
                                                    test_size=0.25,
                                                    random_state=33)

ss = StandardScaler()
x_train = ss.fit_transform(x_train)
x_test = ss.transform(x_test)

kn = KNeighborsClassifier()
kn.fit(x_train, y_train)
kn.score(x_test, y_test)
y_predict = kn.predict(x_test)
print(classification_report(y_test, y_predict, target_names=iris.target_names))
示例#44
0
test_data = pd.read_csv('dataset/balanced_noTimestamp_mixTest.csv')
test_labels = test_data.iloc[:, -1]  # separate labels of testing set
test_data.drop(test_data.columns[label_index], axis=1, inplace=True)

dt_clf = DecisionTreeClassifier()  # Train DecisionTreeClassifier
selector_dt = RFE(dt_clf, None, step=1).fit(train_data, train_labels)
predicted_test_dt = selector_dt.predict(test_data)

rf_clf = RandomForestClassifier(n_estimators=100, max_depth=6,
                                random_state=0)  # RandomForestClassifier
selector_rf = RFE(rf_clf, None, step=1).fit(train_data, train_labels)
predicted_test_rf = selector_rf.predict(test_data)

knn_clf = KNeighborsClassifier(n_neighbors=5).fit(
    train_data, train_labels)  # Train KNN classifier
predicted_test_knn = knn_clf.predict(test_data)

# Train SVM classifier
svc_clf = svm.SVC(gamma='auto',
                  kernel='rbf',
                  decision_function_shape='ovo',
                  max_iter=-1,
                  probability=False,
                  random_state=None,
                  shrinking=True,
                  tol=0.001,
                  verbose=False).fit(train_data, train_labels)
predicted_test_svc = svc_clf.predict(test_data)

nn_clf = MLPClassifier(solver='lbfgs',
                       alpha=1e-5,
示例#45
0
                                                    random_state=0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Fitting classifier to the Training set
# Create your classifier here
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

# Visualising the Training set results
from matplotlib.colors import ListedColormap
X_set, y_set = X_train, y_train
X1, X2 = np.meshgrid(
    np.arange(start=X_set[:, 0].min() - 1,
              stop=X_set[:, 0].max() + 1,
              step=0.01),
    np.arange(start=X_set[:, 1].min() - 1,
              stop=X_set[:, 1].max() + 1,
              step=0.01))
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=42)

# In[99]:

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X_train, y_train)

# In[100]:

pred = knn.predict(X_test)
knn.score(X_test, y_test)

# In[101]:

neighbors = []
cv_scores = []

from sklearn.model_selection import cross_val_score
# perform 10 fold cross validation
for k in range(1, 51, 2):
    neighbors.append(k)
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X_train, y_train, cv=20, scoring='accuracy')
    cv_scores.append(scores.mean())
示例#47
0
#iris.data.shape
'''
查看数据说明
'''
print (iris.DESCR)
from sklearn.cross_validation import train_test_split
X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=0.25,random_state=33)
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier


knc1 = KNeighborsClassifier(n_neighbors=5,p=1,metric='minkowski')#近邻k值,p=1曼哈顿距离,p=2欧氏距离
knc.fit(X_train,y_train)
knc1.fit(X_train,y_train)
y_predict = knc.predict(X_test)
y_predict1 = knc1.predict(X_test)
from sklearn.metrics import classification_report
print ('Accuracy of K-Nearest Neighbor Classifier is:',knc.score(X_test,y_test))
print (classification_report(y_test,y_predict,target_names=iris.target_names)) 

print ('Accuracy of K-Nearest Neighbor Classifier with setting is:',knc.score(X_test,y_test))
print (classification_report(y_test,y_predict1,target_names=iris.target_names)) 
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
#def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):
#
#    # setup marker generator and color map
#    markers = ('s', 'x', 'o', '^', 'v')
#    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
#    cmap = ListedColormap(colors[:len(np.unique(y))])
#
示例#48
0
plt.legend(labels=['Female', 'Male'])

female_data = df[df['sex'] == 0]  #Only consider female
female_data.info()

feature_lis = ['trestbps', 'oldpeak', 'age']
feature_1 = female_data[feature_lis[0]]
feature_2 = female_data[feature_lis[1]]
feature_3 = female_data[feature_lis[2]]
target = female_data['target']

from sklearn.neighbors import KNeighborsClassifier

knn_clf = KNeighborsClassifier(n_neighbors=5)
knn_clf.fit(X, target)
y_pred = knn_clf.predict(X)
fig3 = plt.figure(10, figsize=(8, 5))
ax = fig3.add_subplot(111, projection='3d')
ax.scatter(feature_1,
           feature_2,
           feature_3,
           c=y_pred,
           cmap=mcolors.ListedColormap(colors))
plt.legend(handles=scatter.legend_elements()[0],
           labels=['No Heart Desease', 'Heart Desease'])

ax.set_xlabel(feature_lis[0])
ax.set_ylabel(feature_lis[1])
ax.set_zlabel(feature_lis[2])
ax.set_title('KNN')
plt.show()
示例#49
0
df_new
X
X
X = df_new
X
y
np.shape(X)
np.shape(y)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test =train_test_split(X,y,test_size = 0.2)
X
X_train
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
knn.predict(X_test)
y_test
from sklearn.metrics import accuracy_score
l
knn.predict(X_test)
y_predict = knn.predict(X_test)
y_predict
y_test
y_test[1]
y_test[1]
np.array(y_test)
y_test = np.array(y_test)
y_predict
accuracy_score(y_test,y_predict)
get_ipython().magic(u'ls ')
get_ipython().magic(u'pwd ')
示例#50
0
from sklearn.model_selection import train_test_split

training_data, testing_data, training_labels, testing_labels = train_test_split(
    iris_dataset['data'], iris_dataset['target'])

print(len(training_data))
print(testing_data.shape)

iris_dataframe = pd.DataFrame(training_data,
                              columns=iris_dataset.feature_names)

from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=1)

knn.fit(training_data, training_labels)
import numpy as np

mystery_iris = np.array([[5, 2.9, 1, 0.2]])

print(mystery_iris.shape)

prediction = knn.predict(mystery_iris)
print(iris_dataset['target_names'][prediction])

test_predictions = knn.predict(testing_data)
print(test_predictions)

print("Score: {:.2f}".format(np.mean(testing_labels == test_predictions)))

print(knn.score(testing_data, testing_labels))
示例#51
0
def knncls():
    """
    K-近邻预测用户签到位置
    :return:None
    """
    # 读取数据
    data = pd.read_csv("./data/FBlocation/train.csv")

    print(data.head(10))

    # 处理数据
    # 1、缩小数据,查询数据晒讯
    data = data.query("x > 1.0 &  x < 1.25 & y > 2.5 & y < 2.75")

    # 处理时间的数据
    time_value = pd.to_datetime(data['time'], unit='s')

    print(time_value)

    # 把日期格式转换成 字典格式
    time_value = pd.DatetimeIndex(time_value)

    # 构造一些特征
    data['day'] = time_value.day
    data['hour'] = time_value.hour
    data['weekday'] = time_value.weekday

    # 把时间戳特征删除
    data = data.drop(['time'], axis=1)

    print(data)

    # 把签到数量少于n个目标位置删除
    place_count = data.groupby('place_id').count()

    tf = place_count[place_count.row_id > 3].reset_index()

    data = data[data['place_id'].isin(tf.place_id)]

    # 取出数据当中的特征值和目标值
    y = data['place_id']

    x = data.drop(['place_id'], axis=1)

    # 进行数据的分割训练集合测试集
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)

    # 特征工程(标准化)
    std = StandardScaler()

    # 对测试集和训练集的特征值进行标准化
    x_train = std.fit_transform(x_train)

    x_test = std.transform(x_test)

    # 进行算法流程 # 超参数
    knn = KNeighborsClassifier()

    # # fit, predict,score
    knn.fit(x_train, y_train)

    # # 得出预测结果
    y_predict = knn.predict(x_test)
    #
    # print("预测的目标签到位置为:", y_predict)
    #
    # # 得出准确率
    # print("预测的准确率:", knn.score(x_test, y_test))

    # 构造一些参数的值进行搜索
    param = {"n_neighbors": [3, 5, 10]}

    # 进行网格搜索
    gc = GridSearchCV(knn, param_grid=param, cv=2)

    gc.fit(x_train, y_train)

    # 预测准确率
    print("在测试集上准确率:", gc.score(x_test, y_test))

    print("在交叉验证当中最好的结果:", gc.best_score_)

    print("选择最好的模型是:", gc.best_estimator_)

    print("每个超参数每次交叉验证的结果:", gc.cv_results_)

    return None
logreg = LogisticRegression()
logreg.fit(X, y)
logreg.predict(X)

# Store the response values
yPredictions = logreg.predict(X)

# We then evaluate the training accuracy
# Classification accuracy is the proportion of correct predictions
score = metrics.accuracy_score(y, yPredictions)
print(score)

# We can then repeat this procedure for KNN with K = 5 and K = 1
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)
yPredictions = knn.predict(X)
score = metrics.accuracy_score(y, yPredictions)
print(score)

knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X, y)
yPredictions = knn.predict(X)
score = metrics.accuracy_score(y, yPredictions)
print(score)

# We conclude the KNN with K = 1 is the best model to use with this data
# Note that obtaining 100 % accuracy with K = 1 is expected since we are testing
# the training data
# We therefore conclude that training and testing our models on the exact same data
# is not a useful procedure for deciding which models to choose
# Our goal here is to estimate how well each model is likely to perform on
示例#53
0
        image = cv.imread(imagePath)
        try:
            # Convert to Gray and Resize
            gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
            logo = cv.resize(gray, (200, 100))

            # Calculate Histogram of Test Image
            hist = feature.hog(logo,
                               orientations=9,
                               pixels_per_cell=(10, 10),
                               cells_per_block=(2, 2),
                               transform_sqrt=True,
                               block_norm="L1")

            # Predict in model
            predict = model.predict(hist.reshape(1, -1))[0]

            # Make pictures default Height
            height, width = image.shape[:2]
            reWidth = int((300 / height) * width)
            image = cv.resize(image, (reWidth, 300))

            # Write predicted label over the Image
            cv.putText(image, predict.title(), (10, 30),
                       cv.FONT_HERSHEY_TRIPLEX, 1.2, (0, 255, 0), 4)

            # Get Image name and show Image
            imageName = imagePath.split("/")[-1]
            plt.imshow(image)
            plt.show()
            cv.waitKey(0)
示例#54
0
plt.scatter(bumpy_fast, grade_fast, color="b", label="fast")
plt.scatter(grade_slow, bumpy_slow, color="r", label="slow")
plt.legend()
plt.xlabel("bumpiness")
plt.ylabel("grade")
plt.show()
################################################################################

### your code here!  name your classifier object clf if you want the
### visualization code (prettyPicture) to show you the decision boundary
from sklearn.neighbors import KNeighborsClassifier

neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(features_train, labels_train)

pred = neigh.predict(features_test)

from sklearn.metrics import accuracy_score
print accuracy_score(pred, labels_test)

########################
### adaboost algorithm
########################
from time import time
from sklearn.ensemble import AdaBoostClassifier
print "-:: adaboost ::------------------"
t0 = time()
adab = AdaBoostClassifier(n_estimators=100, learning_rate=1)
adab.fit(features_test, labels_test)
print "training time adaboost:", round(time() - t0, 3), "s"
示例#55
0
similarity_matrix = np.zeros([n_subs, n_subs])
disparity_matrix = np.zeros([n_subs, n_subs])

for s2 in range(n_subs):
    neigh = KNeighborsClassifier(n_neighbors=1)
    sub_feat = feat[s2, :, :, :].reshape(feat.shape[1] * feat.shape[2],
                                         feat.shape[-1])
    neigh.fit(sub_feat, labels)

    for s1 in range(n_subs):
        equal_label = 0
        diff_label = 0
        for cond in range(n_conditions):
            for sample in range(n_samples):
                data = feat[s1, cond, sample, :].reshape(1, -1)
                pred = neigh.predict(data)
                if pred == cond:
                    equal_label += 1
                else:
                    diff_label += 1

        similarity_matrix[s1, s2] = equal_label / (2 * n_samples)
        disparity_matrix[s1, s2] = diff_label / (2 * n_samples)

for s1 in range(n_subs):
    for s2 in range(n_subs):
        similarity_matrix[s1, s2] = min(similarity_matrix[s1, s2],
                                        similarity_matrix[s2, s1])
        similarity_matrix[s2, s1] = similarity_matrix[s1, s2]

        disparity_matrix[s1, s2] = max(disparity_matrix[s1, s2],
示例#56
0
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.neighbors import KNeighborsClassifier

data = pd.read_csv("data.csv")
X = data.iloc[:, :-1].values
y = data.iloc[:, 2].values

Knn = KNeighborsClassifier(n_neighbors=3)
Knn.fit(X, y)

X_pred = np.array([6, 6])
Y_pred = Knn.predict(X_pred.reshape(1, -1))
print("\nClass of [6,6] using KNN:- ", Y_pred[0])

#Weighted KNN
Knn = KNeighborsClassifier(n_neighbors=3, weights='distance')
Knn.fit(X, y)

Y_pred = Knn.predict(X_pred.reshape(1, -1))

h = 0.02
clf = Knn
cmap_light = ListedColormap(['lightgreen', 'yellow'])
cmap_bold = ListedColormap(['b', 'r'])

# calculate min, max and limits
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
示例#57
0
from sklearn.neighbors import KNeighborsClassifier

# manual make a data directory, and download sonar.all-data
URL = 'https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data'
df = pd.read_csv('data/sonar.all-data', header=None, prefix="X")
# df = pd.read_csv(URL, header=None, prefix="X")
print(df.shape)
print(df.columns)
#
df.rename(columns={'X60': 'Label'}, inplace=True)
# n_neighbots=2,4
clf1 = KNeighborsClassifier(n_neighbors=6)
data, labels = df.iloc[:, :-1], df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.3)
clf1.fit(X_train, y_train)
y_predict = clf1.predict(X_test)
print("score=", clf1.score(X_test, y_test))
# get confusion matrix
result_cm1 = confusion_matrix(y_test, y_predict)
print(result_cm1)

scores = cross_val_score(clf1, data, labels, cv=5, groups=labels)
print(scores)
from joblib import dump, load

dump(clf1, "knn1.joblib")
knn2 = load("knn1.joblib")
y_predict2 = knn2.predict(X_test)
result2 = confusion_matrix(y_predict, y_predict2)
print(result2)
示例#58
0
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Fitting classifier to the Training set
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric='minkowski', p = 2)
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

# Visualising the Training set results
from matplotlib.colors import ListedColormap
X_set, y_set = X_train, y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
                     np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
示例#59
0
#Declare empty arrays for accuracy and number of neigbours
accuracy = np.zeros((6, 1))
accuracy_val = np.zeros((6, 1))
neighbours = np.linspace(2, 7, 6)

for x in neighbours:

    #KNN model training
    from sklearn.neighbors import KNeighborsClassifier
    lr = KNeighborsClassifier(n_neighbors=int(x))

    #KNN Model
    lr = lr.fit(X_train_lda, y_train)
    #Classification Report
    from sklearn.metrics import classification_report, accuracy_score
    lr = lr.fit(X_train_lda, y_train)
    y_pred = lr.predict(X_train_lda)
    y_pred_test = lr.predict(X_test_lda)
    #print(classification_report(y_train, y_pred))
    accuracy[int(x - 2), :] = accuracy_score(y_train, y_pred)
    accuracy_val[int(x - 2), :] = accuracy_score(y_test, y_pred_test)
    print(accuracy_score(y_train, y_pred))

plt.plot(neighbours, accuracy, '-r', label='Training')
plt.plot(neighbours, accuracy_val, '-k', label='Validation')
plt.xlabel('Number of neighbours')
plt.ylabel('Accuracy')
plt.title('Sensitivity study')
plt.legend()
示例#60
0
le = preprocessing.LabelEncoder()

buying = le.fit_transform(list(data['buying']))
maint = le.fit_transform(list(data['maint']))
door = le.fit_transform(list(data['door']))
persons = le.fit_transform(list(data['persons']))
lug_boot = le.fit_transform(list(data['lug_boot']))
safety = le.fit_transform(list(data['safety']))
clss = le.fit_transform(list(data['class']))

X = list(zip(buying, maint, door, persons, lug_boot, safety))
Y = list(clss)

x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(
    X, Y, test_size=0.1)

model = KNeighborsClassifier(n_neighbors=9)

model.fit(x_train, y_train)

acc = model.score(x_test, y_test)
print(acc)

predicted = model.predict(x_test)
names = ['unacc', 'acc', 'good', 'vgood']

for x in range(len(predicted)):
    print(
        f'Predicted: {names[predicted[x]]}, Data: {x_test[x]}, Actual: {names[y_test[x]]}')