def calculate_label_counts(self, distances, records, train_labels, k, list_of_all_labels): """ Calculates the label counts c and c_prim. """ # init c and c' per a class: label_counts = {} for label in list_of_all_labels: label_counts[label] = {"c": defaultdict(lambda: 0), "c_prim": defaultdict(lambda: 0)} # find c and c' per a class: for ind in records: distance_vector = distances[ind] # print "ind, distance_vector, train_labels[ind], k, train_labels:", ind, distance_vector, train_labels[ind], k, train_labels labels = knn_repetitions_multilabel( distance_vector, records, train_labels, k + 1 ) # k+1 because we will consider the sample itself neigh_counts = count_distinct_objects(labels) # substract all the labels of the sample being classified for self_label in train_labels[ind]: neigh_counts[self_label] -= 1 oracle_ans = set(train_labels[ind]) for label in list_of_all_labels: # label in oracle labelling: if label in oracle_ans: label_counts[label]["c"][neigh_counts.get(label, 0)] += 1 # label NOT in oracle labelling: else: label_counts[label]["c_prim"][neigh_counts.get(label, 0)] += 1 return label_counts
def classify(self, sample_distances): nearest_labels = knn_repetitions_multilabel(sample_distances, self.records, self.train_labels, self.k) neigh_counts = count_distinct_objects(nearest_labels) result = [] #for each label check the threshold: for label in self.list_of_all_labels: PRINTER( '[classify]: condidering label, neigh_counts[label], self.fraction_knn_thresholds[label] ' + str(label) + ", " + str(neigh_counts[label]) + ", " + str(self.fraction_knn_thresholds[label])) if neigh_counts[label] >= self.fraction_knn_thresholds[label]: result.append(label) PRINTER('[classify]: result ' + str(result)) return result
def classify(self, sample_distances): nearest_labels = knn_repetitions_multilabel(sample_distances, self.records, self.train_labels, self.k) neigh_counts = count_distinct_objects(nearest_labels) result = [] # for each label check the threshold: for label in self.list_of_all_labels: PRINTER( "[classify]: condidering label, neigh_counts[label], self.fraction_knn_thresholds[label] " + str(label) + ", " + str(neigh_counts[label]) + ", " + str(self.fraction_knn_thresholds[label]) ) if neigh_counts[label] >= self.fraction_knn_thresholds[label]: result.append(label) PRINTER("[classify]: result " + str(result)) return result
def calculate_label_counts(self, distances, records, train_labels, k, list_of_all_labels): ''' Calculates the label counts c and c_prim. ''' #init c and c' per a class: label_counts = {} for label in list_of_all_labels: label_counts[label] = { 'c': defaultdict(lambda: 0), 'c_prim': defaultdict(lambda: 0) } #find c and c' per a class: for ind in records: distance_vector = distances[ind] #print "ind, distance_vector, train_labels[ind], k, train_labels:", ind, distance_vector, train_labels[ind], k, train_labels labels = knn_repetitions_multilabel( distance_vector, records, train_labels, k + 1) #k+1 because we will consider the sample itself neigh_counts = count_distinct_objects(labels) #substract all the labels of the sample being classified for self_label in train_labels[ind]: neigh_counts[self_label] -= 1 oracle_ans = set(train_labels[ind]) for label in list_of_all_labels: #label in oracle labelling: if label in oracle_ans: label_counts[label]['c'][neigh_counts.get(label, 0)] += 1 #label NOT in oracle labelling: else: label_counts[label]['c_prim'][neigh_counts.get(label, 0)] += 1 return label_counts