def task5b(self, id, t): given_image_index = self.data_ids.index(float(id)) res = self.lsh_index.query(self.data[given_image_index]) print('With repetition Overall considered images= ',len(res)) res = set(res) print('Total unique considered images= ',len(res)) ''' for i in res: print("index =",i) print("data id =",self.data_ids[i]) print("value =",np.linalg.norm(self.data[i]-self.data[given_image_index])) ''' ''' To Rank them in order ''' distances = sorted_list(t, 'distance', True) for i in res: dist = np.linalg.norm(self.data[i]-self.data[given_image_index]) distances.add({'id': self.data_ids[i], 'distance': dist}) print('Top 5 similar images and similarity score using LSH') print() pic_info = [] print('Image id - Distance') for i in range(0, t): o = distances.extract() print(str(int(o['id']))+' - '+str(o['distance'])) pic_info.append({'id': str(int(o['id'])), 'info': str(int(o['id']))}) PA.display_images(pic_info, 'Task 5b - '+str(id)+' - '+str(t)) '''To delete'''
def task_2b(self, data, c): c_a = Clustering_Algorithms() clusters = c_a.normalised_cut(data, c) for i in clusters: pic_info = [] print(len(i)) for j in i: pic_info.append({'id': data.img_ids[j], 'info': ''}) PA.display_images(pic_info, 'Task 2b - ' + str(c))
def task_2a(self, data, c): c_a = Clustering_Algorithms() labels = c_a.spectral_clustering(data, c) clusters = [[] for _ in range(c)] for i in range(len(labels)): clusters[labels[i]].append({'id': data.img_ids[i], 'info': ''}) for i in range(c): print(len(clusters[i])) PA.display_images(clusters[i], 'Task 2a - ' + str(c))
def task_3(self, data, k): pr = PageRanks() ranks = pr.page_rank(data) result = ranks.nlargest(k) pic_info = [] for idx, val in result.iteritems(): pic_info.append({'id': idx, 'info': idx + ' :'+str(val)}) PA.display_images(pic_info, 'Task 3 - '+str(k))
def task6a(self, fileName, k=None): labelled_data = pd.read_csv(fileName, delim_whitespace=True) labelled_data = labelled_data.drop(0) labelled_dict = {} for index, row in labelled_data.iterrows(): labelled_dict[float(row['image'])] = row['label'] self.process_distances(labelled_dict) total = 0 for key in self.cluster_dict: pic_info = [{ 'id': str(int(id)), 'info': str(int(id)) + " " + key } for id in self.cluster_dict[key]] PA.display_images(pic_info, 'Task 6a - ' + str(fileName)) #print(pic_info) print("cluster name=", key) print("images in this cluster= ", self.cluster_dict[key]) print("size of cluster= ", len(self.cluster_dict[key])) total += len(self.cluster_dict[key]) print("total=", total)
def task6b(self, data, fileName): labelled_data = pd.read_csv(fileName, delim_whitespace=True) labelled_data = labelled_data.drop(0) labelled_dict = {} for index, row in labelled_data.iterrows(): if row['label'] not in labelled_dict: labelled_dict[row['label']] = [row['image']] else: labelled_dict[row['label']].append(row['image']) pagelables = [] pageranks = [] for key in labelled_dict: pr = PageRanks() ranks = pr.personalized_page_rank(data, labelled_dict[key], False) pageranks.append(ranks) pagelables.append(key) pic_info = [] for i in range(len(data.img_ids)): max = 0 lables = [] for j in range(len(pagelables)): if pageranks[j][i] > max: lables = [pagelables[j]] max = pageranks[j][i] elif pageranks[j][i] == max: lables.append(pagelables[j]) pic_info.append({ 'id': str(data.img_ids[i]), 'info': str(data.img_ids[i]) + " : " + ",".join(lables) }) PA.display_images(pic_info, 'Task 6b - ' + str(fileName))