def KNNclassifier(training, test, tLabels, k, d, *args): '''Implements the k-nearest neighbors classifier, using a training data set and the test data set to be labeled. Receives k by argument, as well as the distance function to be used. Any other arguments that might be needed by the distance function are stored in *args''' # Saving dimensions q = len(training) n = len(training[0]) - 1 j = len(test) trainingRDD = sc.parallelize(training) labels = [] for i in test: dist = trainingRDD.map(lambda x: euclidean_distance(x, i)).collect() k_labels = [] # Getting labels of k-nearest neighbors for i in range(k): nNeighbor = min(dist) nnIndex = dist.index(nNeighbor) k_labels.append(tLabels[nnIndex]) dist.remove(nNeighbor) labels.append(stat.mode(k_labels)) return labels
def bisect(lines): distance = [] for index, l in enumerate(lines): if index != 0: distance.append(l[0] - lines[index - 1][0]) max_distance = max(distance) index_distance = distance.index(max_distance) av_distance = max_distance // 2 lines.append([lines[index_distance][0] + av_distance, lines[index_distance][0] + av_distance]) return lines
def threesect(lines): distance = [] for index, l in enumerate(lines): if index != 0: distance.append(l[0] - lines[index - 1][0]) max_distance = max(distance) index_distance = distance.index(max_distance) #45 - this in length three character + epsilon if max_distance >= 45: av_distance = max_distance // 2 lines.append([lines[index_distance][0] + av_distance, lines[index_distance][0] + av_distance]) lines.append([lines[index_distance][0] + 2 * av_distance, lines[index_distance][0] + 2 * av_distance]) else: lines = bisect(lines) lines = bisect(lines) return lines
def filtering(self, image): # 各関数の呼び出し self.modeling() intensity, intensity_all = self.calcLikelihood(image) self.x_means(intensity, intensity_all) self.normalize() # リストの用意 self.X = [''] * self.cluster self.Y = [''] * self.cluster self.W = [''] * self.cluster self.H = [''] * self.cluster self.bx = np.zeros((self.cluster, 1)) self.by = np.zeros((self.cluster, 1)) self.bw = np.zeros((self.cluster, 1)) self.bh = np.zeros((self.cluster, 1)) self.px = [''] * self.cluster # リサンプリング self.resampling() for i in range(self.cluster): self.X[i] = self.x_kmeans[i][0][:, 0][self.sample[i]] self.Y[i] = self.x_kmeans[i][0][:, 1][self.sample[i]] self.W[i] = self.x_kmeans[i][0][:, 2][self.sample[i]] self.H[i] = self.x_kmeans[i][0][:, 3][self.sample[i]] # 対象推定 for i in range(self.cluster): for j in range(len(self.X[i])): self.bx[i][0] += float(self.X[i][j]) * float( self.weights[i][j]) for j in range(len(self.Y[i])): self.by[i][0] += float(self.Y[i][j]) * float( self.weights[i][j]) for j in range(len(self.W[i])): self.bw[i][0] += float(self.W[i][j]) * float( self.weights[i][j]) for j in range(len(self.H[i])): self.bh[i][0] += float(self.H[i][j]) * float( self.weights[i][j]) self.px[i] = [ self.bx[i][0], self.by[i][0], self.bw[i][0], self.bh[i][0] ] # 各クラスタのバウンディングボックスのx,y,w,h # ハンガリアン法 if self.frame_count > 1 and self.cluster > 1: id = self.hungarian() # ハンガリアン関数の呼び出し self.pre_px = np.array([x[:] for x in self.px ]) # コピー self.px → self.pre_px self.pre_id = id[:] # コピー id → self.pre_id self.flag_count = 0 # クラスタが一つの場合 elif self.frame_count > 1 and self.cluster == 1: self.flag_count += 1 p1 = np.array(self.px) p2 = self.pre_px print "p1:{}".format(len(p1)) print "p2:{}".format(len(p2)) distance = [] # ユークリッド距離を比較し、一番ユークリッド距離が小さいidとする for px in p2: distance.append(np.linalg.norm(p1[0] - px)) id = [self.pre_id[distance.index(min(distance))]] # flag_countが5以上になったら、追跡対象が一匹になったと判断し、idを保存する if self.flag_count > 5: self.pre_px = np.array([x[:] for x in self.px]) self.pre_id = id[:] # 1フレーム目 else: id = range(self.cluster) # フレームアウト average, id = self.frameout(id) return average, id
wav_file = '/number/eight.wav' print(str(8) + ' making spectrogram arr') eight = graph_spectrogram(wav_file) wav_file = '/number/nine.wav' print(str(9) + ' making spectrogram arr') nine = graph_spectrogram(wav_file) userInput = input() print('user input making spectrogram arr') example = graph_spectrogram(userInput) one = find_similarity(example, one) two = find_similarity(example, two) three = find_similarity(example, three) four = find_similarity(example, four) five = find_similarity(example, five) six = find_similarity(example, six) seven = find_similarity(example, seven) eight = find_similarity(example, eight) nine = find_similarity(example, nine) zero = find_similarity(example, zero) print('\n minimal gap distance arr') distance = [zero, one, two, three, four, five, six, seven, eight, nine] print(distance) print(distance.index(min(distance)))
# train_data = train_data[0:10,:] labels = np.zeros(train_data.shape[0]) # print labels.shape for cluster in number_of_clusters: directory_path = './' + str(cluster) + '_centroids_images/' centroid_index = random.sample(range(1, train_data.shape[0]), cluster) centroid = train_data[centroid_index] # print centroid[0].shape for iterations in range(0, iteration): print "Iteration " + str(iterations) for idx, data in enumerate(train_data): dist = [] for center in centroid: dist.append(np.linalg.norm(data - center)) labels[idx] = dist.index(min(dist)) print labels #Update Clusterss for cluster_number in range(0, cluster): index = [] for idx, label in enumerate(labels): if label == cluster_number: index.append(idx) temp_data = train_data[index] print 'Updating cluster ' + str(cluster_number) centroid[cluster_number] = np.mean(temp_data, axis=0) print centroid np.savez(str(cluster) + '_centroids.npz', centroids=centroid) vis.visualize(cluster)
print("") print( "-------------------------------------------ici commence la correspondance-------------------------------------------" ) for file in fichiers: if not os.path.isdir("./" + dir_features + "/" + file): #print(file) with open("./" + dir_features + "/" + file, "rb") as fic: data = pickle.load(fic) dist = [] for c in centres: d = DistanceHu(data[2], c) dist.append(d) minima = min(dist) #print(dist) kindice = dist.index(minima) groupe = labels[kindice] if (groupe == part): print("voici son fichier de correspondance:", file, "et son groupe est ", groupe) break #while i <nbr_img-1: #print("voici le probleme:",all_files[i]) # histB=histogramme(rep, all_files[i]) # img2 = cv2.imread('images/' + all_files[i], 0) #fonction de calcul da la distance d'histogramme # img2_couleur = cv2.imread('images/' + all_files[i], 1) # matrix_co=coocurrence(img2_couleur) # i=i+1