try: aux_file = sys.argv[1] except: print "Argument expected: weka-csv (inst#,actual,predicted,error,prediction) (with header) file with labels" sys.exit(-1) try: label_no = int(sys.argv[2]) except: print "Argument expected: label to be overwritten no." sys.exit(-1) print "Loading labels' (weka-csv) file:", aux_file #inst#,actual,predicted,error,prediction lines = open(aux_file).readlines() labels2bool = list( line.split(",")[2].find("present")>=0 for line in lines[1:] if len(line.strip())>0 ) print "",len(labels2bool),"rows loaded" #print labels2bool labels = [] for ocur in labels2bool: if ocur: labels.append([label_no]) else: labels.append([]) aux_file = aux_file + "_jrs.txt" print "Writing to file:", aux_file jrs_io.store_labels(open(aux_file,"w"), labels)
print "Argument expected: path to a labels' file." sys.exit(-1) try: labels2_path = sys.argv[2] except: print "Argument expected: path to a second labels' file." sys.exit(-1) try: out_path = sys.argv[3] except: print "Argument expected: output labels' file." sys.exit(-1) print "Loading labels' 1 file:", labels1_path labels1 = jrs_io.load_labels(open(labels1_path)) try: print "Loading labels' 2 file:", labels2_path labels2 = jrs_io.load_labels(open(labels2_path)) except: print "Failed loading file", labels2_path print "Using empty file" labels2 = [[] for i in xrange(len(labels1))] labels12 = [] for i in xrange(len(labels1)): labels12.append(sorted(set(labels1[i] + labels2[i]))) print "Writing to 1+2 file:", out_path jrs_io.store_labels(open(out_path, "w"), labels12)
sys.exit(-1) print "The program shuffles distances and trainingLabels." print "Loading labels' file:", labels_path labels = jrs_io.load_labels(open(labels_path)) n = len(labels) print "",n," labels' sets loaded." order = range(n) random.shuffle(order) print "Random order:", order[:30],"..." print "Shuffling labels..." labels_shuffled = [labels[ix] for ix in order] jrs_io.store_labels(open(labels_path+"_shuffled","w"), labels_shuffled) print "Loading distances' file:", distance_matrix_path distances = jrs_io.load_data(open(distance_matrix_path), lambda x: x) try: print "",len(distances), "x",len(distances[0]) except: pass print "Extending order..." order = order + range(n, len(distances)) print "Extended order:", order print "Shuffling columns" distances_tmp = [] for row in distances: new_row = [ row[ix] for ix in order ] distances_tmp.append(new_row)
sys.exit(-1) print "The program shuffles distances and trainingLabels." print "Loading labels' file:", labels_path labels = jrs_io.load_labels(open(labels_path)) n = len(labels) print "", n, " labels' sets loaded." order = range(n) random.shuffle(order) print "Random order:", order[:30], "..." print "Shuffling labels..." labels_shuffled = [labels[ix] for ix in order] jrs_io.store_labels(open(labels_path + "_shuffled", "w"), labels_shuffled) print "Loading distances' file:", distance_matrix_path distances = jrs_io.load_data(open(distance_matrix_path), lambda x: x) try: print "", len(distances), "x", len(distances[0]) except: pass print "Extending order..." order = order + range(n, len(distances)) print "Extended order:", order print "Shuffling columns" distances_tmp = [] for row in distances:
print "Argument expected: path to a second labels' file." sys.exit(-1) try: out_path = sys.argv[3] except: print "Argument expected: output labels' file." sys.exit(-1) print "Loading labels' 1 file:", labels1_path labels1 = jrs_io.load_labels(open(labels1_path)) try: print "Loading labels' 2 file:", labels2_path labels2 = jrs_io.load_labels(open(labels2_path)) except: print "Failed loading file", labels2_path print "Using empty file" labels2 = [[] for i in xrange(len(labels1))] labels12 = [] for i in xrange(len(labels1)): labels12.append( sorted(set(labels1[i]+labels2[i])) ) print "Writing to 1+2 file:", out_path jrs_io.store_labels(open(out_path,"w"), labels12)
#subs = max(votes[0] - diff,0.0) #print "","subs=",subs #votes_for_yes = [v-subs for v in votes_for_yes] #votes_for_no = [v-subs for v in votes_for_no] #print "","sample_no:",sample_no," label:",label,"votes_for_yes:",votes_for_yes,"votes_for_no:",votes_for_no try: #yes = float(sum(votes_for_yes))/len(votes_for_yes) yes = sum(votes_for_yes) except: yes = 0.0 try: #no = float(sum(votes_for_no))/len(votes_for_no) no = sum(votes_for_no) except: no = 0.0 if yes > no: selected_sample_labels.append(label) predicted_labels.append(sorted(selected_sample_labels)) print "", "all labels:", all_sample_labels, " -> sel:", sorted( selected_sample_labels), "\n" print "STORING TO FILE:", out_path import sys sys.path.append(r'../') sys.path.append(r'../../') import jrs_io jrs_io.store_labels(open(out_path, "w"), predicted_labels)
if FINALTEST: start = time.clock() print "Final predicting..." print " loading from file:", distance_matrix_path, " in range", FINALTEST_START, "-", FINALTEST_END predicted_labels = [] for i, line in enumerate(open(distance_matrix_path).xreadlines()): if i % 1000 == 0: print "", i, "rows processed..." if i >= FINALTEST_END: break if i < FINALTEST_START: continue row = [cast_method(x) for x in line.split()] final2training_distances = row[training_range[0]:training_range[1]] predicted_labels.append( multilabel_classifier(final2training_distances, training_labels)) jrs_io.store_labels(open(FINAL_RESULT_PATH, "w"), predicted_labels) lcount = [len(ll) for ll in predicted_labels] print " avg labels in predicted:", float(sum(lcount)) / (len(lcount)) print " done in", (time.clock() - start), "sec..." print "------------------------------------------" else: def eval(): start = time.clock() print "Calculating predictions of ", len( testing_labels), " labels' sets..." predicted_labels = jrs_multilabel_classifier.classify_multilabel( testing2training_distances, training_labels, multilabel_classifier) accuracy, precision, recall, hammingloss, subset01loss, fmeasure = jrs_evaluation.jrs_evaluate( testing_labels, predicted_labels)
print "------------------------------------------" print "Classifying file:", features_matrix_path f = open(features_matrix_path) predicted_labels = [] for i,line in enumerate(f.xreadlines()): if i%1000==0: print "",i,"..." row = [int(x) for x in line.split()] ll = [] for label,feature_ixs in label2feature_ixs.iteritems(): says_yes = sum(row[ix]>0 for ix in feature_ixs) if says_yes >= len(feature_ixs)*MIN_FRACTION_OF_VOTES: ll.append(label) ll = sorted(ll) predicted_labels.append(ll) print "",i," oracle",labels[i]," pred",ll print "","len=",len(labels[:(i+1)]), len(predicted_labels) accuracy, precision, recall, hammingloss, subset01loss, fmeasure = jrs_evaluation.jrs_evaluate(labels[:(i+1)], predicted_labels) print "\t\t\t\t\t","%.2f" %precision,"%.2f" %recall,"%.2f" %fmeasure print "------------------------------------------" accuracy, precision, recall, hammingloss, subset01loss, fmeasure = jrs_evaluation.jrs_evaluate(labels, predicted_labels) print "\t\t\t\t\t","%.2f" %precision,"%.2f" %recall,"%.2f" %fmeasure print "Wrining results to", out_path jrs_io.store_labels(open(out_path,"w"), predicted_labels)
############################################################################################################################# ############################################################################################################################# if FINALTEST: start = time.clock() print "Final predicting..." print " loading from file:",distance_matrix_path," in range",FINALTEST_START,"-",FINALTEST_END predicted_labels = [] for i,line in enumerate(open(distance_matrix_path).xreadlines()): if i%1000 == 0: print "",i,"rows processed..." if i>=FINALTEST_END: break if i<FINALTEST_START: continue row = [cast_method(x) for x in line.split()] final2training_distances = row[training_range[0]:training_range[1]] predicted_labels.append(multilabel_classifier(final2training_distances, training_labels)) jrs_io.store_labels(open(FINAL_RESULT_PATH,"w"), predicted_labels) lcount = [len(ll) for ll in predicted_labels] print " avg labels in predicted:", float(sum(lcount))/(len(lcount)) print " done in", (time.clock() - start), "sec..." print "------------------------------------------" else: def eval(): start = time.clock() print "Calculating predictions of ",len(testing_labels)," labels' sets..." predicted_labels = jrs_multilabel_classifier.classify_multilabel(testing2training_distances, training_labels, multilabel_classifier) try: print testing_labels[:10],"\n", predicted_labels[:10] accuracy, precision, recall, hammingloss, subset01loss, fmeasure = jrs_evaluation.jrs_evaluate(testing_labels, predicted_labels) except: print "[knn] Error in jrs_evaluation.jrs_evaluate(testing_labels, predicted_labels):",testing_labels, predicted_labels print " accuracy:", accuracy,"\n precision:", precision,"\n recall:", recall,"\n fmeasure:", fmeasure
try: aux_file = sys.argv[1] except: print "Argument expected: weka-csv (inst#,actual,predicted,error,prediction) (with header) file with labels" sys.exit(-1) try: label_no = int(sys.argv[2]) except: print "Argument expected: label to be overwritten no." sys.exit(-1) print "Loading labels' (weka-csv) file:", aux_file #inst#,actual,predicted,error,prediction lines = open(aux_file).readlines() labels2bool = list( line.split(",")[2].find("present") >= 0 for line in lines[1:] if len(line.strip()) > 0) print "", len(labels2bool), "rows loaded" #print labels2bool labels = [] for ocur in labels2bool: if ocur: labels.append([label_no]) else: labels.append([]) aux_file = aux_file + "_jrs.txt" print "Writing to file:", aux_file jrs_io.store_labels(open(aux_file, "w"), labels)