def process(): #folder_path = "/home/amber/stew/slam++/bin" # #inlier_quantity = 20 # manhattan: 2097 garage: 4615 mit: 20 intel: 256 #outlier_quantity = 20 # read input file, find last N edges (where N=outlier_quantity+inlier_quantity) print(sys.argv) if len(sys.argv) > 1: posegraph_input_path = sys.argv[1] output_path = sys.argv[2] #rejected_loops_path = sys.argv[3] inlier_quantity = int(sys.argv[3]) outlier_quantity = int(sys.argv[4]) #posegraph_input_path = "/home/amber/stew/slam++/bin/input.g2o" #output_path = "/home/amber/stew/slam++/bin/clustering_results.txt" lc_edge_quantity = inlier_quantity + outlier_quantity #df_full = pd.read_csv(output_path, delimiter = " ", header = None, names = ['vertex_from', 'vertex_to', 'ofc', 'score']) # In[]: i = 0 lc_pair_search_phrases = [ ] # list of [str0, str1], each strpair represents one loop closure outlier_pair_search_phrases = [ ] # list of [str0, str1], each strpair represents one loop closure inlier_pair_search_phrases = [ ] # list of [str0, str1], each strpair represents one loop closure with readreverse(posegraph_input_path, encoding="utf-8") as obj: while i < lc_edge_quantity: line = obj.readline() line_stripped = line.strip('\n') line_splitted = line_stripped.split(' ') #if [line_splitted[1], line_splitted[2]] in lc_pair_search_phrases: #print('reduntant: '+ line_splitted[1] +' '+line_splitted[2] ) lc_pair_search_phrases.append([line_splitted[1], line_splitted[2]]) if i < outlier_quantity: outlier_pair_search_phrases.append( [line_splitted[1], line_splitted[2]]) else: inlier_pair_search_phrases.append( [line_splitted[1], line_splitted[2]]) i += 1 edge_twice = [ ] # handle edges that are in both inlier list and outlier lise, consider as inlier for e in outlier_pair_search_phrases: if e in inlier_pair_search_phrases: edge_twice.append(e) outlier_pair_search_phrases = [ i for i in outlier_pair_search_phrases if i not in edge_twice ] # In[]: #read output file, find clusters clusters = [[]] edge_counter = 0 cluster_index = 0 with open(output_path, encoding="utf-8") as obj: while edge_counter < lc_edge_quantity: # should always be true, since some edges will be deleted during the incremental clustering line = obj.readline() if line != '\n': edge_counter += 1 line_stripped = line.strip('\n') line_splitted = line_stripped.split(' ') if line_splitted[0] == 'CLUSTER': clusters[-1].append( [int(line_splitted[1]), int(line_splitted[2])]) #print('edge counter: ', edge_counter) else: #print('cluster', cluster_index, ' ended') if line_splitted[0] != 'CLUSTER_R': clusters.append([]) cluster_index += 1 clusters.remove([]) # In[]: #read rejected_loops_file rejected_loops = [] edge_counter = 0 cluster_index = 0 content = open(output_path, 'r') lines = content.readlines() for line in lines: line_stripped = line.strip('\n') line_splitted = line_stripped.split(' ') #print(line_splitted) if line_splitted[0] == 'CLUSTER_R': rejected_loops.append([line_splitted[1], line_splitted[2]]) #print('edge counter: ', edge_counter) rejected_outliers = [] rejected_inliers = [] for loop in rejected_loops: if loop in outlier_pair_search_phrases: rejected_outliers.append(loop) else: rejected_inliers.append(loop) # In[]: clusters_array_list = [] for cluster in clusters: np_array = np.array(cluster) empty_column = np.zeros([len(cluster), 1], dtype=int) # prepare space for later decisions np_array = np.append(np_array, empty_column, axis=1) clusters_array_list.append(np_array) # In[]: for i in range(0, len(clusters_array_list)): for j in range(0, len(clusters_array_list[i])): if [str(clusters_array_list[i][j][0]), str(clusters[i][j][1])] in outlier_pair_search_phrases: clusters_array_list[i][j][2] = -1 else: clusters_array_list[i][j][2] = 1 # In[]: decision = [] cluster_nature = [] inconsistant_cluster = [] for i in range(0, len(clusters_array_list)): if abs(sum(clusters_array_list[i][:, 2])) == len(clusters_array_list[i]): decision.append(True) if sum(clusters_array_list[i][:, 2]) > 0: cluster_nature.append(1) # cluster full of inliers else: cluster_nature.append(-1) # cluster full of outliers else: decision.append(False) inconsistant_cluster.append(clusters_array_list[i]) wrong_clusters = [] print('rejected outliers (', len(rejected_outliers), ') includes: ', rejected_outliers) print('rejected inliers (', len(rejected_inliers), ') includes: ', rejected_inliers) print('consistant clusters/all clusters: ', sum(decision), '/', len(decision)) if sum(decision) != len(decision): for i in range(0, len(decision)): if decision[i] == False: print('wrong clusters: ', i) wrong_clusters.append(clusters_array_list[i]) print('includes ', clusters_array_list[i]) print('sum of all edges', sum([len(cluster) for cluster in clusters_array_list]))
def process(): #inlier_quantity = 256 #outlier_quantity = 256 print(sys.argv) if len(sys.argv) > 1: inlier_quantity = int(sys.argv[1]) outlier_quantity = int(sys.argv[2]) posegraph_input_path = "input.g2o" #"/home/amber/stew/test_backend/MIT_random10_dcs50/mit_seed_1/input.g2o"# text_output_path = "s_value.txt" lc_edge_quantity = inlier_quantity + outlier_quantity # In[]: # read input file, find last N edges #find all edge ID-pair and outlier ID-pair f = open(posegraph_input_path, "r") i = 0 all_lc_edge_pair_search_phrases = [ ] # list of [str0, str1], each strpair represents one loop closure outlier_pair_search_phrases = [ ] # list of [str0, str1], each strpair represents one loop closure all_lc_edge_value = pd.DataFrame(columns=range(0, lc_edge_quantity), index=range(0, iteration + 2)) # 0th row: vertex_from # 1st row: vertex_to # 1st row: scale value with readreverse(posegraph_input_path, encoding="utf-8") as obj: while i < outlier_quantity + inlier_quantity: line = obj.readline() line_stripped = line.strip('\n') line_splitted = line_stripped.split(' ') all_lc_edge_pair_search_phrases.append( [line_splitted[1], line_splitted[2]]) all_lc_edge_value[lc_edge_quantity - i - 1].iloc[0] = int( line_splitted[1]) # fill from the last column all_lc_edge_value[lc_edge_quantity - i - 1].iloc[1] = int( line_splitted[2]) # fill from the last column if i < outlier_quantity: outlier_pair_search_phrases.append( [line_splitted[1], line_splitted[2]]) i += 1 all_lc_edge_pair_search_phrases = all_lc_edge_pair_search_phrases[:: -1] #reverse outlier_pair_search_phrases = outlier_pair_search_phrases[::-1] #reverse # In[]: all_lc_edge_value = all_lc_edge_value.sort_values(by=0, axis=1) input_edge_sequence = pd.DataFrame(all_lc_edge_value.columns) # append the sorted edge_sequence to last row, so later can be used to reverse the sorting # In[]: all_lc_edge_value.columns = range(0, lc_edge_quantity) # refresh the columns all_lc_edge_value = all_lc_edge_value.append( input_edge_sequence.transpose(), ignore_index=True) # In[]: #read s_value_file f = open(text_output_path, "r") i = 0 while i < (lc_edge_quantity) * iteration: line = f.readline() line_stripped = line.strip('\n') line_splitted = line_stripped.split(' ') if len(line_splitted) > 1: if i < lc_edge_quantity: all_lc_edge_value[i].iloc[2] = float(line_splitted[1]) else: all_lc_edge_value[i % lc_edge_quantity].iloc[i // lc_edge_quantity + 2] = float( line_splitted[1]) i += 1 # In[]: # revert the sorting all_lc_edge_value = all_lc_edge_value.sort_values(by=32, axis=1) # In[]: #for i in range(2, iteration+2): # starts from 2nd row, 0th and 1st row are for intpair inlier_value = all_lc_edge_value.iloc[31, 0:inlier_quantity] rejected_inlier = 0 print('rejected inliers: ') for i in range(0, inlier_quantity): if inlier_value.iloc[i] < threshold: print([all_lc_edge_value.iloc[0, i], all_lc_edge_value.iloc[1, i]], inlier_value.iloc[i]) rejected_inlier += 1 print('Num of rejected inliers: ', rejected_inlier) fig, ax = plt.subplots() plt.plot(range(0, inlier_quantity), inlier_value, 'o-') plt.legend(("inlier value", )) plt.savefig("inlier_value_iteration_" + str(31 - 1)) plt.show() # In[]: acceptted_outlier = 0 print('acceptted outliers: ') outlier_value = all_lc_edge_value.iloc[31, inlier_quantity:lc_edge_quantity] for i in range(0, outlier_quantity): if outlier_value.iloc[i] > threshold: print([ all_lc_edge_value.iloc[0, i + inlier_quantity], all_lc_edge_value.iloc[1, i + inlier_quantity] ], outlier_value.iloc[i]) acceptted_outlier += 1 print('Num of acceptted outliers: ', acceptted_outlier) fig, ax = plt.subplots() plt.plot(range(0, outlier_quantity), outlier_value, 'ro-') plt.legend(("outlier value", )) plt.savefig("outlier_value_iteration_" + str(31 - 1)) plt.show()
def process(options): # folder_path = "/home/amber/stew/test_backend/garage_seed_3" # pose_quantity = 1661 # inlier_quantity = 4615 # outlier_quantity = 20 # options = "3d" print(sys.argv) if len(sys.argv) > 1: folder_path = sys.argv[1] pose_quantity = int(sys.argv[2]) inlier_quantity = int(sys.argv[3]) outlier_quantity = int(sys.argv[4]) posegraph_input_path = folder_path + "/input.g2o" output_path = folder_path + "/output.g2o" text_output_path = folder_path + "/output.txt" lc_edge_quantity = inlier_quantity + outlier_quantity # In[]: # read input file, find last N edges #find all edge ID-pair and outlier ID-pair f = open(posegraph_input_path, "r") i = 0 all_lc_edge_pair_search_phrases = [ ] # list of [str0, str1], each strpair represents one loop closure outlier_pair_search_phrases = [ ] # list of [str0, str1], each strpair represents one loop closure with readreverse(posegraph_input_path, encoding="utf-8") as obj: while i < outlier_quantity + inlier_quantity: line = obj.readline() line_stripped = line.strip('\n') line_splitted = line_stripped.split(' ') if i < outlier_quantity: outlier_pair_search_phrases.append( [line_splitted[1], line_splitted[2]]) if abs( int(line_splitted[1]) - int(float(line_splitted[2])) ) != 1: # deals with garage dataset, where edges are already sorted all_lc_edge_pair_search_phrases.append( [line_splitted[1], str(int(float(line_splitted[2])))]) i += 1 # only increment when it's nonsequential edge # In[]: #read output.g2o, find outlier ID-pair f = open(output_path, "r") outlier_id_pair = np.zeros((outlier_quantity, 3), dtype=int) # array of [int0, int1, int2], int0: switchcounter, int1: vertex_from, int2: vertex_to i = 0 while i < outlier_quantity: x = f.readline() x_stripped = x.strip('\n') x_splitted = x_stripped.split(' ') if options == "2d": prefix = "EDGE_SE2_SWITCHABLE" elif options == "3d": prefix = "EDGE_SE3_SWITCHABLE" if x_splitted[0] == prefix: if [x_splitted[1], x_splitted[2]] in outlier_pair_search_phrases: outlier_id_pair[i] = np.array([ int(x_splitted[3]), int(x_splitted[1]), int(x_splitted[2]) ]) #print(all_switch_edge[i]) i += 1 outlier_pair_df = pd.DataFrame(columns=outlier_id_pair[:, 0], index=[0]) # put data in dataframe for easier access # DF column names: int (switch counter) # 0th row: [int0, int1], int0: vertex_from, int1: vertex_to for i in range(0, outlier_quantity): outlier_id = outlier_id_pair[:, 0][i] outlier_pair_df[outlier_id].iloc[0] = outlier_id_pair[i, 1:3] outlier_id_search_phrases = [ ] # list of ['s<switch counter>:'] for searching in the output.txt for rows in outlier_id_pair: outlier_id_search_phrases.append("s" + str(rows[0]) + ":") # In[]: #find all_edge ID-pair f = open(output_path, "r") all_edge_id_pair = np.zeros((lc_edge_quantity, 3), dtype=int) # array of [int0, int1, int2], int0: switchcounter, int1: vertex_from, int2: vertex_to i = 0 while i < lc_edge_quantity: x = f.readline() x_stripped = x.strip('\n') x_splitted = x_stripped.split(' ') if options == "2d": prefix = "EDGE_SE2_SWITCHABLE" elif options == "3d": prefix = "EDGE_SE3_SWITCHABLE" if x_splitted[0] == prefix: if [x_splitted[1], x_splitted[2]] in all_lc_edge_pair_search_phrases: all_edge_id_pair[i] = np.array([ int(x_splitted[3]), int(x_splitted[1]), int(x_splitted[2]) ]) #print(all_switch_edge[i]) i += 1 ## In[]: all_edge_pair_df = pd.DataFrame(columns=all_edge_id_pair[:, 0], index=[0]) # put data in dataframe for easier access # DF column names: int (switch counter) # 0th row: [int0, int1], int0: vertex_from, int1: vertex_to for i in range(0, lc_edge_quantity): e_id = all_edge_id_pair[:, 0][i] all_edge_pair_df[e_id].iloc[0] = all_edge_id_pair[i, 1:3] # In[2]: # read output.txt, find the value of all LC edges after the last iteration lc_edge_search_phrases = [ ] # list of ['s<switch counter>:'] for searching in the output.txt for i in range(pose_quantity, pose_quantity + lc_edge_quantity): # pose zero will be drop in rtabmap lc_edge_search_phrases.append("s" + str(i) + ":") all_lc_edge_value = pd.DataFrame(columns=lc_edge_search_phrases, index=[0, 1, 2]) # column names: 's<switch counter>:' # 0th row: switch variable value # 1st row: [vertex_from, vertex_to] # 2nd row: bool, indicating whether is switched on or off, depends on threshold i = 0 with readreverse(text_output_path, encoding="utf-8") as obj: while i < lc_edge_quantity: # only get the last iteration line = obj.readline() line_stripped = line.strip('\n') line_splitted = line_stripped.split(' ') if len(line_splitted) > 1: if str(line_splitted[1]) in lc_edge_search_phrases: all_lc_edge_value[str(line_splitted[1])][0] = float( line_splitted[4]) temp_s = str(line_splitted[1]) temp_str = temp_s.strip('s') temp_string = temp_str.strip(':') id_number_only = int(temp_string) all_lc_edge_value[str( line_splitted[1] )][1] = all_edge_pair_df[id_number_only].iloc[0] i += 1 # In[]: # calculate precision/recall rate of outlier/inlier for column in all_lc_edge_value: all_lc_edge_value[column][2] = (all_lc_edge_value[column][0] > threshold) outlier_analysis = all_lc_edge_value[outlier_id_search_phrases] #outlier_analysis = outlier_analysis.drop('s5644:',1) if generate_figure == True: fig, ax = plt.subplots() plt.plot(range(0, outlier_quantity), (outlier_analysis.iloc[0]), 'ro-') plt.legend(("outlier value", )) plt.savefig("outlier_variance") plt.show() pd.set_option('display.max_columns', None) # or 1000 pd.set_option('display.max_rows', None) # or 1000 pd.set_option('display.max_colwidth', -1) # or 199 print("threshold: ", threshold) print("outliers acceptted: ", sum(outlier_analysis.iloc[2]), "/", outlier_quantity) print("accepted outliers: ", outlier_analysis.loc[:, outlier_analysis.iloc[2] == True].columns) print("accepted outlier: ") print(outlier_analysis.loc[:, outlier_analysis.iloc[2] == True]) inlier_analysis = all_lc_edge_value.drop(outlier_id_search_phrases, 1) if generate_figure == True: fig, ax = plt.subplots() plt.plot(range(0, inlier_quantity), (inlier_analysis.iloc[0]), 'o-') plt.legend(("inlier value", )) plt.savefig("inlier_variance") plt.show() print("inliers acceptted: ", sum(inlier_analysis.iloc[2]), "/", lc_edge_quantity - outlier_quantity) print("rejected inlier: ") print(inlier_analysis.loc[:, inlier_analysis.iloc[2] == False]) print() # In[]: # get the convergence curve of all lc edge value: if examine_edge_option == 1: f = open(text_output_path, "r") lc_edge_value_convergence = pd.DataFrame( columns=lc_edge_search_phrases, index=[0]) # column names: 's<switch counter>:' # 0st row: list of values over all iterations # i = 0 while i < (lc_edge_quantity) * (iteration + 1): # including initial estimate line = f.readline() line_stripped = line.strip('\n') line_splitted = line_stripped.split(' ') if len(line_splitted) > 1: if str(line_splitted[1]) in lc_edge_search_phrases: if type(lc_edge_value_convergence[str( line_splitted[1])][0]) != list: lc_edge_value_convergence[str( line_splitted[1])][0] = [] lc_edge_value_convergence[str(line_splitted[1])][0].append( float(line_splitted[4])) i += 1 # In[]: # examine a specified edge value convergence examine_edge = [] #3625, 3781, 4139, 4291, 6441 new_search_phases = [] if len(sys.argv) > 5: for i in range(5, len(sys.argv)): examine_edge.append(sys.argv[i]) for edge in examine_edge: new_search_phases.append("s" + str(edge) + ":") for j in new_search_phases: plt.plot(lc_edge_value_convergence[j][0]) plt.title(j + str(all_lc_edge_value[j][1][0]) + ',' + str(all_lc_edge_value[j][1][1])) plt.savefig(j) plt.show()
def process(): # inlier_quantity = 256 # outlier_quantity = 256 # print(sys.argv) if len(sys.argv) > 1: inlier_quantity = int(sys.argv[1]) outlier_quantity = int(sys.argv[2]) posegraph_input_path = "input.g2o" #"/home/amber/stew/test_backend/MIT_random10_dcs50/mit_seed_1/input.g2o"# text_output_path = "s_value.txt" lc_edge_quantity = inlier_quantity + outlier_quantity # In[]: # read input file, find last N edges #find all edge ID-pair and outlier ID-pair f = open(posegraph_input_path, "r") i = 0 all_lc_edge_pair_search_phrases = [ ] # list of [str0, str1], each strpair represents one loop closure outlier_pair_search_phrases = [ ] # list of [str0, str1], each strpair represents one loop closure # 0th row: vertex_from # 1st row: vertex_to # 1st row: scale value with readreverse(posegraph_input_path, encoding="utf-8") as obj: while i < outlier_quantity + inlier_quantity: line = obj.readline() line_stripped = line.strip('\n') line_splitted = line_stripped.split(' ') all_lc_edge_pair_search_phrases.append('e'.join( [line_splitted[1], str(int(float(line_splitted[2])))])) #all_lc_edge_value[lc_edge_quantity - i - 1].iloc[0] = int(line_splitted[1]) # fill from the last column #all_lc_edge_value[lc_edge_quantity - i - 1].iloc[1] = int(line_splitted[2]) # fill from the last column if i < outlier_quantity: outlier_pair_search_phrases.append('e'.join( [line_splitted[1], str(int(float(line_splitted[2])))])) i += 1 all_lc_edge_pair_search_phrases = all_lc_edge_pair_search_phrases[:: -1] #reverse outlier_pair_search_phrases = outlier_pair_search_phrases[::-1] #reverse all_lc_edge_set = set(all_lc_edge_pair_search_phrases) outlier_set = set(outlier_pair_search_phrases) if len(outlier_set) != outlier_quantity: print('possible redundant edges') inlier_set = all_lc_edge_set - outlier_set if len(inlier_set) != inlier_quantity: print('possible redundant edges') inlier_pair_search_phrases = list(inlier_set) #all_lc_edge_value = all_lc_edge_value.sort_values(by=0, axis=1) #input_edge_sequence = pd.DataFrame(all_lc_edge_value.columns) # append the sorted edge_sequence to last row, so later can be used to reverse the sorting #all_lc_edge_value.columns = range(0, lc_edge_quantity) # refresh the columns #all_lc_edge_value = all_lc_edge_value.append(input_edge_sequence.transpose(), ignore_index=True) # In[]: #read s_value_file all_lc_edge_value = pd.DataFrame(columns=all_lc_edge_pair_search_phrases, index=range(0, 1)) f = open(text_output_path, "r") i = 0 while i < (lc_edge_quantity) * iteration: line = f.readline() line_stripped = line.strip('\n') line_splitted = line_stripped.split(' ') if len(line_splitted) > 1: edge_joined = 'e'.join([line_splitted[1], line_splitted[2]]) if type(all_lc_edge_value[edge_joined].iloc[0]) != list: all_lc_edge_value.loc[0, edge_joined] = [float(line_splitted[4])] else: all_lc_edge_value.loc[0, edge_joined].append( float(line_splitted[4])) i += 1 # revert the sorting #all_lc_edge_value = all_lc_edge_value.sort_values(by=32, axis=1) # In[]: # inlier_value = all_lc_edge_value[inlier_pair_search_phrases] inlier_final_value = [] rejected_inlier = 0 print('rejected inliers: ') for column in inlier_value.columns: final_value = inlier_value.loc[0, column][-1] inlier_final_value.append(final_value) if final_value < threshold: edge_list = column.split('e') print(edge_list, final_value) rejected_inlier += 1 print('Num of rejected inliers: ', rejected_inlier) if generate_figure == True: fig, ax = plt.subplots() plt.plot(range(0, inlier_quantity), inlier_final_value, 'o-') plt.legend(("inlier value", )) plt.savefig("inlier_value_iteration_" + str(len(all_lc_edge_value.iloc[0, 0]))) plt.show() # In[]: acceptted_outlier = 0 print('acceptted outliers: ') outlier_value = all_lc_edge_value[outlier_pair_search_phrases] outlier_final_value = [] for column in outlier_value.columns: final_value = outlier_value.loc[0, column][-1] outlier_final_value.append(final_value) if final_value >= threshold: edge_list = column.split('e') print(edge_list, final_value) acceptted_outlier += 1 print('Num of acceptted outliers: ', acceptted_outlier) if generate_figure == True: fig, ax = plt.subplots() plt.plot(range(0, outlier_quantity), outlier_final_value, 'ro-') plt.legend(("outlier value", )) plt.savefig("outlier_value_iteration_" + str(len(all_lc_edge_value.iloc[0, 0]))) plt.show() # In[]: if examine_edge_option == 1: examine_edge = [] #3625, 3781, 4139, 4291, 6441 if len(sys.argv) > 3: for i in range(3, len(sys.argv), 2): examine_edge.append('e'.join([sys.argv[i], sys.argv[i + 1]])) for j in examine_edge: plt.plot(all_lc_edge_value.loc[0, j]) plt.title(j) plt.savefig(j) plt.show()
def process(): #folder_path = "/home/amber/stew/test_backend/analysis_chamber" #threshold = 0.01 #outlier_quantity = 10 # #pose_quantity = 808 # manhattan: 3499 garage: 1660 mit: 808 intel: 1228 #inlier_quantity = 20 # manhattan: 2097 garage: 4615 mit: 20 intel: 256 # read input file, find last N edges (where N=outlier_quantity+inlier_quantity) print(sys.argv) if len(sys.argv) > 1: folder_path = sys.argv[1] #pose_quantity = int(sys.argv[2]) inlier_quantity = int(sys.argv[2]) outlier_quantity = int(sys.argv[3]) threshold = float(sys.argv[4]) zero_out_threshold = float(sys.argv[5]) print('threshold value: ', threshold) posegraph_input_path = folder_path+"/input.g2o" output_path = folder_path+"/full_analysis.txt" lc_edge_quantity = inlier_quantity + outlier_quantity df_full = pd.read_csv(output_path, delimiter = " ", header = None, names = ['vertex_from', 'vertex_to', 'ofc', 'score']) #df_full = df_full.sort_values(by=['vertex_to']) #df_full = df_full.reset_index(drop=True) #fig = plt.figure() #plt.plot(df_full['ofc'], 'ro') #plt.legend(("ofc",)) #plt.savefig("ofc_incremental") #plt.show() #fig = plt.figure() #plt.plot(df_full['score'], 'o') #plt.savefig("score_incremental") #plt.show() #df_full # In[]: i = 0 lc_pair_search_phrases = [] # list of [str0, str1], each strpair represents one loop closure outlier_pair_search_phrases = [] # list of [str0, str1], each strpair represents one loop closure inlier_pair_search_phrases = [] # list of [str0, str1], each strpair represents one loop closure with readreverse(posegraph_input_path, encoding="utf-8") as obj: while i < lc_edge_quantity: line = obj.readline() line_stripped = line.strip('\n') line_splitted = line_stripped.split(' ') #if [line_splitted[1], line_splitted[2]] in lc_pair_search_phrases: #print('reduntant: '+ line_splitted[1] +' '+line_splitted[2] ) lc_pair_search_phrases.append([line_splitted[1], line_splitted[2]]) if i < outlier_quantity: outlier_pair_search_phrases.append([line_splitted[1], line_splitted[2]]) else: inlier_pair_search_phrases.append([line_splitted[1], line_splitted[2]]) i += 1 edge_twice = [] # handle edges that are in both inlier list and outlier lise, consider as inlier for e in outlier_pair_search_phrases: if e in inlier_pair_search_phrases: edge_twice.append(e) outlier_pair_search_phrases = [i for i in outlier_pair_search_phrases if i not in edge_twice] # In[]: #read output file, find ID-value-pair lc_id_score_pair = np.zeros((lc_edge_quantity, 4), dtype = float) lc_id_ofc_pair = np.zeros((lc_edge_quantity, 4), dtype = float) # array of [float0, float1, float2, float3], f0: vertex_from, f1: vertex_to, f2: ofc or score, f3: incremental step for i in df_full.index: if [str(df_full['vertex_from'].iloc[i]), str(df_full['vertex_to'].iloc[i])] in lc_pair_search_phrases: lc_id_ofc_pair[i] = np.array([df_full['vertex_from'].iloc[i], df_full['vertex_to'].iloc[i], df_full['ofc'].iloc[i], float(i)]) if float(df_full['score'].iloc[i]) > zero_out_threshold: lc_id_score_pair[i] = np.array([df_full['vertex_from'].iloc[i], df_full['vertex_to'].iloc[i], df_full['score'].iloc[i], float(i)]) else: lc_id_score_pair[i] = np.array([df_full['vertex_from'].iloc[i], df_full['vertex_to'].iloc[i], 0, float(i)]) #print(all_switch_edge[i]) print('std_score of all edges: ', lc_id_score_pair[:,2].std()) # In[]: inlier_id_ofc_pair = np.zeros((inlier_quantity+len(edge_twice), 4), dtype = float) # change the quantity based on overlap outlier_id_ofc_pair = np.zeros((outlier_quantity-len(edge_twice), 4), dtype = float) # change the quantity based on overlap # array of [float0, float1, float2, float3], f0: vertex_from, f1: vertex_to, f2: ofc or score, f3: incremental step j=0 k=0 for i in range(0, lc_id_ofc_pair.shape[0]): if [str(int(lc_id_ofc_pair[i, 0])), str(int(lc_id_ofc_pair[i,1]))] in outlier_pair_search_phrases: outlier_id_ofc_pair[j] = lc_id_ofc_pair[i] j+=1 elif [str(int(lc_id_ofc_pair[i, 0])), str(int(lc_id_ofc_pair[i,1]))] in inlier_pair_search_phrases: inlier_id_ofc_pair[k] = lc_id_ofc_pair[i] k+=1 # In[]: print('outlier_ofc_minimum: ', outlier_id_ofc_pair[:,2].min()) print('outlier_ofc_maximum: ', outlier_id_ofc_pair[:,2].max()) print('inlier_ofc_minimum: ', inlier_id_ofc_pair[:,2].min()) print('inlier_ofc_maximum: ', inlier_id_ofc_pair[:,2].max()) print(' ') # fig = plt.figure() # plt.plot(outlier_id_ofc_pair[:,2], 'ro-') # plt.savefig("ofc_outlier") # plt.show() # fig = plt.figure() # plt.plot(inlier_id_ofc_pair[:,2], 'o-') # plt.savefig("ofc_inlier") # plt.show() #fig = plt.figure() #plt.plot(outlier_id_ofc_pair[:,3], outlier_id_ofc_pair[:,2], 'ro') #plt.plot(inlier_id_ofc_pair[:,3], inlier_id_ofc_pair[:,2], 'o') #plt.savefig("combined_ofc_plot") #plt.show() # In[]: inlier_id_score_pair = np.zeros((inlier_quantity+len(edge_twice), 4), dtype = float) # change the quantity based on overlap outlier_id_score_pair = np.zeros((outlier_quantity-len(edge_twice), 4), dtype = float) # change the quantity based on overlap j=0 k=0 for i in range(0, lc_id_score_pair.shape[0]): if [str(int(lc_id_score_pair[i, 0])), str(int(lc_id_score_pair[i,1]))] in outlier_pair_search_phrases: outlier_id_score_pair[j] = lc_id_score_pair[i] j+=1 elif [str(int(lc_id_score_pair[i, 0])), str(int(lc_id_score_pair[i,1]))] in inlier_pair_search_phrases: inlier_id_score_pair[k] = lc_id_score_pair[i] k+=1 print('outlier_score_minimum: ', outlier_id_score_pair[:,2].min()) print('outlier_score_maximum: ', outlier_id_score_pair[:,2].max()) print('inlier_score_minimum: ', inlier_id_score_pair[:,2].min()) print('inlier_score_maximum: ', inlier_id_score_pair[:,2].max()) # fig = plt.figure() # plt.plot(outlier_id_score_pair[:,2], 'ro-') # plt.savefig("score_outlier") # plt.show() # fig = plt.figure() # plt.plot(inlier_id_score_pair[:,2], 'o-') # plt.savefig("score_inlier") # plt.show() fig = plt.figure() plt.plot(outlier_id_score_pair[:,3], outlier_id_score_pair[:,2], 'ro') plt.plot(inlier_id_score_pair[:,3], inlier_id_score_pair[:,2], 'o') plt.savefig("combined_score_plot") plt.show() # In[]: clusters = [[lc_id_score_pair[0]]] for i in range(0, len(lc_id_score_pair)-1): last_cluster = clusters[-1] sumsup = 0 for j in last_cluster: sumsup += j[2] ave = sumsup / len(last_cluster) #print('ave: ', ave) #print('new score: ', lc_id_score_pair[i+1, 2]) if abs(ave - lc_id_score_pair[i+1, 2]) > threshold or ((lc_id_score_pair[i+1, 2] > 0.99) != (ave > 0.99)): clusters.append([lc_id_score_pair[i+1]]) else: clusters[-1].append(lc_id_score_pair[i+1]) for i in range(0, len(clusters)): print('cluster begins: ', clusters[i][0][0], ' ', clusters[i][0][1]) # In[]: for i in range(0, len(clusters)): for j in range(0, len(clusters[i])): if [str(int(clusters[i][j][0])), str(int(clusters[i][j][1]))] in outlier_pair_search_phrases: clusters[i][j] = np.append(clusters[i][j], -1) # append one value for decision status else: clusters[i][j] = np.append(clusters[i][j], 1) decision = [] inconsistant_cluster = [] for i in range(0, len(clusters)): if abs(sum(np.array(clusters[i])[:,4])) == len(clusters[i]): decision.append(True) else: decision.append(False) inconsistant_cluster.append(clusters[i]) print('consistant clusters/all clusters: ', sum(decision), '/', len(decision)) if sum(decision) != len(decision): for i in range(0, len(decision)): if decision[i] == False: print('wrong clusters: ', i) print('includes ', clusters[i]) print('sum of all edges', sum([len(cluster) for cluster in clusters]))