def knn_learn(nneighbors=1, data_train=np.array([]), target_train=np.array([]), data_test=np.array([])): """ estimate position using the K neareast neighbors (KNN) technique Parameters ---------- nneighbors : int default = 1 data_train : numpy.ndarray default = array([]) target_train : numpy.ndarray default = array([]) data_test : numpy.ndarray default = array([]) Returns ------- targets : numpy.ndarray """ clf = ngb.NeighborsClassifier(nneighbors).fit(data_train, target_train) targets = clf.predict(data_test) return targets
def test_pipeline(): # check that LocallyLinearEmbedding works fine as a Pipeline from sklearn import pipeline, datasets iris = datasets.load_iris() clf = pipeline.Pipeline([('filter', manifold.LocallyLinearEmbedding()), ('clf', neighbors.NeighborsClassifier())]) clf.fit(iris.data, iris.target) assert_lower(.7, clf.score(iris.data, iris.target))
def test_pipeline(): # check that Isomap works fine as a transformer in a Pipeline iris = datasets.load_iris() clf = pipeline.Pipeline([('isomap', manifold.Isomap()), ('neighbors_clf', neighbors.NeighborsClassifier()) ]) clf.fit(iris.data, iris.target) assert_lower(.7, clf.score(iris.data, iris.target))
def bench_skl(X, y, T, valid): # # .. scikits.learn .. # from sklearn import neighbors start = datetime.now() clf = neighbors.NeighborsClassifier(n_neighbors=n_neighbors, algorithm='brute_inplace') clf.fit(X, y) score = np.mean(clf.predict(T) == valid) return score, datetime.now() - start
def run(): # pdb.set_trace(); numComponents = 10; numCodeWords = 200; #data loading and normalization print 'loading data' pcaMean = loadtxt('pcamean.txt'); pcaComps = loadtxt('pcacomps.txt'); mean = loadtxt('mean.txt'); std = loadtxt('std.txt'); data = loadtxt('/home/mohsen/Downloads/shotFiles/shot.txt'); # mean = data.mean(axis=0); # std = sqrt(data.var(axis=0)); # std [where(std==0)] = 1; data = (data - mean ) / std; #dimension reduction print 'calculating pca' pca = decomposition.PCA(n_components=numComponents); pca.fit(data); projection = pca.transform(data); # savetxt('mean.txt',mean); # savetxt('std.txt',std); # savetxt('pcamean.txt',pca.mean_); # savetxt('pcacomps.txt',pca.components_); # raw_input(':'); #codebook generation print 'fnding k-means' # kmeans = cluster.KMeans(k=numCodeWords); # kmeans.fit(projection); # codeBook = kmeans.cluster_centers_; # savetxt('codebook.txt',codeBook) codeBook = loadtxt('codebook.txt'); #building histogram for eahc object print 'building histograms' root = '/home/mohsen/Downloads/shotFiles/'; f = open(root + 'listSingleSample.txt','r'); data = zeros((1,shape(codeBook)[0])); target = [0]; for line in f: shotFileName = root + line.replace('\n',''); objectFeatures = loadtxt(shotFileName); objectLabel = labelFromFileName(shotFileName); if objectLabel==-1: print 'Error: Cannot determine label from file name!!!' return; objectFeatures = objectFeatures - mean; objectFeatures = objectFeatures / std; #objectPca = pca.transform(objectFeatures); temp = objectFeatures - pcaMean; objectPca = dot(temp, pcaComps.T); objectHist = zeros((1,shape(codeBook)[0])); for idx in range(shape(objectPca)[0]): dist = sum( (codeBook - objectPca[idx,:])**2 , axis=1 ); objectHist[0,argmin(dist)] = objectHist[0,argmin(dist)] + 1; objectHist = objectHist / sum(objectHist); data = concatenate((data,objectHist),axis=0); target.append(objectLabel); # pdb.set_trace(); # dataTrain = concatenate((data,target),axis=1); # savetxt('dataTrain.txt',dataTrain); train = concatenate( (data[1::3,:],data[2::3,:]),axis=0 ); test = data[3::3,:]; traint = concatenate((target[1::3],target[2::3])); testt = target[3::3]; print 'knn' knn1 = neighbors.NeighborsClassifier(1) knn1.fit(train, traint) out = knn1.predict(test); correct = sum(out==testt)/float(shape(out)[0]) print "correct percent is:", correct printConfMat(testt,out);