def k_nearest_neighbor(distance_matrix,labels,exemplars,k,sourcedir,destdir): counter = 0 tp = fp = 0 knn_index = [] selection = set() for row in distance_matrix: idx = argsort(row) # print "idx = ",idx # print "k = ",k knn_index = idx[:int(k)] w_cat = 0 ## wrong category r_cat = 0 ## right category tp_once = 0 for x in knn_index: # print("datapoint = %s, index = %s , exemplar/neighbor = %s") % (labels[counter] , idx , exemplars[idx]) part1Class, part1Details = labels[counter].split("-") category1 = dynamic.categorisePayload(part1Class, categories) # Classify the header -> return 14 for 14.1, 14.1.1, 14.1.2 part2Class, part2Details = exemplars[x].split("-") category2 = dynamic.categorisePayload(part2Class, categories) # Classify the header -> return 14 for 14.1, 14.1.1, 14.1.2 if category1 != category2: w_cat = w_cat + 1 print("datapoint = %s, index = %s , exemplar/neighbor = %s , FP") % (labels[counter] , idx[:int(k)] , exemplars[x]) else: r_cat = r_cat + 1 print("datapoint = %s, index = %s , exemplar/neighbor = %s , TP") % (labels[counter] , idx[:int(k)] , exemplars[x]) if r_cat > w_cat: tp = tp + 1 print "Majority vote True: ", r_cat print "Adding to set: ", labels[counter] selection.add(labels[counter]) else: fp = fp + 1 print "Majority vote False: ", w_cat counter = counter + 1 dimension_c = len(exemplars) dimension_r = len(labels) print "Exemplars: ", dimension_c print "Datapoints: ", dimension_r print "Total TP = ",tp print "Total FP = ",fp selection = list(selection) selection.sort() s = open("knn-select.txt",'a') for item in selection: s.write("\n" + str(item)) os.system("cp " + sourcedir + "/" + str(item) + " " + destdir) s.close() print "Selection set size: ", len(selection) f = open("knn.txt",'a') # f.write("\n####### NEW RECORD ########") # f.write( "\nExemplars: " + str(dimension_c) ) # f.write( "\nDatapoints: " + str(dimension_r) ) f.write( "\n" + k + "," + str(dimension_c) + "," + str(dimension_r) + "," + str(tp) + "," + str(fp) ) # f.write( "\nTotal FP = " + str(fp) ) f.close()
def sum_labels(labels, distance_matrix): val = {} match_labels = {} possfpval = {} possfp_labels = {} res = {} ressum = 0 global categories for label in labels: part1Class, part1Details = label.split("-") category1 = dynamic.categorisePayload( part1Class, categories ) # Classify the header -> return 14 for 14.1, 14.1.1, 14.1.2 for i in range(0, len(labels)): part2Class, part2Details = labels[i].split("-") category2 = dynamic.categorisePayload(part2Class, categories) if category1 == category2: ## Match main category or sub-cat if label in val: val[label].append(distance_matrix[labels.index(label)][i]) ## Append values from same category match_labels[label].append( labels[i] ) ## Append corresponding label for the values from same category if label not in val: val[label] = [] ## setup dictionary key for val match_labels[label] = [] res[label] = [] ## setup dicitonary key for res val[label].append( distance_matrix[labels.index(label)][i] ) ## Append corresponding values from same category match_labels[label].append( labels[i] ) ## Append corresponding label for the values from same category else: ## the categories dont match list of possible fp if label in possfpval: possfpval[label].append( distance_matrix[labels.index(label)][i] ) ## Append values from diff category possfp_labels[label].append( labels[i] ) ## Append corresponding label for the values from diff category if label not in possfpval: possfpval[label] = [] ## setup dictionary key for fpval possfp_labels[label] = [] # res[label] = [] ## setup dicitonary key for res possfpval[label].append( distance_matrix[labels.index(label)][i] ) ## Append corresponding values from same category possfp_labels[label].append( labels[i] ) ## Append corresponding label for the values from same category ressum = 0 for item in val[label]: ## for every item in the dictionary key ressum = ressum + item ## Add each item with its predecessor res[label].append(ressum) ## Append the sum to the result corresponding the label return possfpval, possfp_labels, res, val, match_labels ## return result and val dict
def k_nearest_neighbor(distance_matrix,labels,exemplars,k): counter = 0 tp = fp = 0 knn_index = [] for row in distance_matrix: idx = argsort(row) # print "idx = ",idx # print "k = ",k knn_index = idx[:int(k)] w_cat = 0 ## wrong category r_cat = 0 ## right category tp_once = 0 for x in knn_index: # print("datapoint = %s, index = %s , exemplar/neighbor = %s") % (labels[counter] , idx , exemplars[idx]) part1Class, part1Details = labels[counter].split("-") category1 = dynamic.categorisePayload(part1Class, categories) # Classify the header -> return 14 for 14.1, 14.1.1, 14.1.2 part2Class, part2Details = exemplars[x].split("-") category2 = dynamic.categorisePayload(part2Class, categories) # Classify the header -> return 14 for 14.1, 14.1.1, 14.1.2 if category1 != category2: w_cat = w_cat + 1 print("datapoint = %s, index = %s , exemplar/neighbor = %s , FP") % (labels[counter] , idx[:int(k)] , exemplars[x]) else: r_cat = r_cat + 1 print("datapoint = %s, index = %s , exemplar/neighbor = %s , TP") % (labels[counter] , idx[:int(k)] , exemplars[x]) counter = counter + 1 if r_cat > w_cat: tp = tp + 1 print "Majority vote True: ", r_cat else: fp = fp + 1 print "Majority vote False: ", w_cat dimension_c = len(exemplars) dimension_r = len(labels) print "Exemplars: ", dimension_c print "Datapoints: ", dimension_r print "Total TP = ",tp print "Total FP = ",fp f = open("knn.txt",'a') # f.write("\n####### NEW RECORD ########") # f.write( "\nExemplars: " + str(dimension_c) ) # f.write( "\nDatapoints: " + str(dimension_r) ) f.write( "\n" + k + "," + str(dimension_c) + "," + str(dimension_r) + "," + str(tp) + "," + str(fp) ) # f.write( "\nTotal FP = " + str(fp) ) f.close()
def loadModel(filename,categories): global threshDict; f = open(filename,'r') for line in f: if line == "": continue ## ignore empty lines line = line.strip() ex_label, ex_thresh = line.split(',') part1Class, part1Details = ex_label.split("-") categ = dynamic.categorisePayload(part1Class, categories) # Classify the header -> return 14 for 14.1, 14.1.1, 14.1.2 # print "categ: ",categ if categ not in threshDict: threshDict[categ] = [] threshDict[categ].append([ex_label,ex_thresh]) ## Append the model to the dictionary for the category else: threshDict[categ].append([ex_label,ex_thresh])
temp = [] counter = 0 for row in distance_matrix: if counter <= max: temp = list(row) temp.insert(0,labels[counter]) writer.writerow(temp) counter = counter + 1 counter = 0 tp = fp = 0 for row in distance_matrix: idx = nonzero(row==min(row))[0][0] # print("datapoint = %s, index = %s , exemplar/neighbor = %s") % (labels[counter] , idx , exemplars[idx]) part1Class, part1Details = labels[counter].split("-") category1 = dynamic.categorisePayload(part1Class, categories) # Classify the header -> return 14 for 14.1, 14.1.1, 14.1.2 part2Class, part2Details = exemplars[idx].split("-") category2 = dynamic.categorisePayload(part2Class, categories) # Classify the header -> return 14 for 14.1, 14.1.1, 14.1.2 if category1 == category2: print("datapoint = %s, index = %s , exemplar/neighbor = %s , TP") % (labels[counter] , idx , exemplars[idx]) tp = tp + 1 if category1 != category2: print("datapoint = %s, index = %s , exemplar/neighbor = %s , FP") % (labels[counter] , idx , exemplars[idx]) fp = fp + 1 counter = counter + 1 print "Exemplars: ", dimension_c print "Datapoints: ", dimension_r print "Total TP = ",tp print "Total FP = ",fp