def graph_characteristics_csv(pattern_path, output_path, predicate): csv_folder_summary = os.path.join(output_path, 'graph_characteristics_csv') if not os.path.exists(csv_folder_summary): os.makedirs(csv_folder_summary) batch_number = pattern_path.split("/")[-1] file = csv_folder_summary + "/" + batch_number + '_results_final_limit.csv' print "Making file: ", file b = open(file, 'w') if predicate != None: field_names = [ 'pattern_name', 'nr_randvar_values', 'nr_targets', 'has_cycles', 'max_degree', 'average_degree', 'predicate' + predicate, 'invalid' ] else: field_names = [ 'pattern_name', 'nr_randvar_values', 'nr_targets', 'has_cycles', 'max_degree', 'average_degree', 'invalid' ] writer = csv.DictWriter(b, fieldnames=field_names) writer.writeheader() pattern_file_gml = None print "Number of patterns: ", len( sorted(os.listdir(pattern_path), key=lambda x: x[:-5])) for patt in sorted(os.listdir(pattern_path), key=lambda x: x[:-5]): #print os.path.join(pattern_path,patt) if (os.path.isfile(os.path.join(pattern_path, patt))): continue pattern_file_gml = os.path.join(pattern_path, patt, patt + ".gml") #if patt.endswith(".gml"): # pattern_file_gml=os.path.join(pattern_path,patt) #print "Pattern file gml",pattern_file_gml if pattern_file_gml != None: pattern = nx.read_gml(pattern_file_gml) else: continue pattern_file_name = patt #some general pattern charactersitics nr_randvar_values = man.count_nr_randvars_in_graph(pattern) cycles = man.is_there_cycle_in_graph(pattern) max_degree = man.get_maximum_node_degree(pattern) average_degree = man.get_average_node_degree(pattern) n_target_nodes = man.get_nr_target_nodes_other_than_head(pattern) contains_target_predicate = contains_predicate(pattern, predicate) row = {} row['pattern_name'] = pattern_file_name row['nr_randvar_values'] = str(nr_randvar_values) row['nr_targets'] = str(n_target_nodes) row['has_cycles'] = str(cycles) row['max_degree'] = str(max_degree) row['average_degree'] = str(average_degree) row['invalid'] = str(is_invalid(os.path.join(pattern_path, patt))) if predicate != None: row['predicate' + predicate] = str(contains_target_predicate) writer.writerow(row) print "Finished writing csv ...to", file return file
def makecsv_file_for_final_limits(pattern_path, output_path, redo): print "Output path: ", output_path print "Pattern path: ", pattern_path csv_folder_summary = os.path.join(output_path, 'csv_results') batch_number = pattern_path.split("/")[-1] print batch_number print "Does exist csv folder summary", csv_folder_summary, os.path.exists( csv_folder_summary) if not os.path.exists(csv_folder_summary): os.makedirs(csv_folder_summary) file = csv_folder_summary + "/" + batch_number + '_results_final_limit.csv' if os.path.exists(file) and redo == False: print "Results for this batch already exist" return b = open(file, 'w') field_names = [ 'pattern_name', 'selected', 'nr_randvar_values', 'nr_targets', 'has_cycles', 'exh_emb', 'rnd_emb', 'furer_emb', 'ffurer_emb', 'limit16_rnd_emb', 'limit16_fur_emb', 'limit16_ff_emb', 'rnd_KLD_16', 'furer_KLD_16', 'ff_KLD_16', 'exh_rt', 'rnd_avgRT_16', 'furer_avgRT_16', 'ff_avgRT_16' ] writer = csv.DictWriter(b, fieldnames=field_names) writer.writeheader() print "Number of patterns: ", len( sorted(os.listdir(pattern_path), key=lambda x: x[:-5])) counter = 1 nr_patterns = len(os.listdir(pattern_path)) for patt in sorted(os.listdir(pattern_path), key=lambda x: x[:-5]): print "CSV processing :", nr_patterns, " th pattern" nr_patterns -= 1 if (os.path.isfile(os.path.join(pattern_path, patt))): continue pattern_file_gml = None print "Path", os.path.join(pattern_path, patt) if os.path.exists( os.path.join(pattern_path, patt, "results_furer", "input_pattern.gml")): pattern_file_gml = os.path.join(pattern_path, patt, "results_furer", "input_pattern.gml") elif os.path.exists( os.path.join(pattern_path, patt, 'exhaustive_approach', 'input_pattern.gml')): pattern_file_gml = os.path.join(pattern_path, patt, 'exhaustive_approach', 'input_pattern.gml') elif patt.endswith(".gml"): pattern_file_gml = os.path.join(pattern_path, patt) print "Pattern file gml", pattern_file_gml if pattern_file_gml != None: pattern = nx.read_gml(pattern_file_gml) else: continue pattern_file_name = patt #PICKLES RESULTS PATH exhaustive_file_result = os.path.join( pattern_path, patt, 'exhaustive_approach', 'results_' + str(pattern_file_name) + ".res") random_dict_result = os.path.join(pattern_path, patt, 'random_vertex_approach', 'rndicts.pickle') furer_dict_result = os.path.join(pattern_path, patt, 'results_furer', 'fudicts.pickle') false_furer_dict_result = os.path.join(pattern_path, patt, 'results_false_furer', 'fudicts.pickle') #NLIMITS RESULTS PATH random_nlimits_result = os.path.join(pattern_path, patt, 'random_vertex_approach', 'n_limits') furer_nlimits_result = os.path.join(pattern_path, patt, 'results_furer', 'n_limits') false_furer_nlimits_result = os.path.join(pattern_path, patt, 'results_false_furer', 'n_limits') #some general pattern charactersitics nr_randvar_values = man.count_nr_randvars_in_graph(pattern) cycles = man.is_there_cycle_in_graph(pattern) max_degree = man.get_maximum_node_degree(pattern) average_degree = man.get_average_node_degree(pattern) n_target_nodes = man.get_nr_target_nodes_other_than_head(pattern) nr_embeddings_exhaustive, exhaustive_running_time = extract_number_of_embeddings_and_rt_exhaustive( exhaustive_file_result) nr_embeddings_random_final_limit = [] nr_embeddings_furer_final_limit = [] nr_embeddings_false_furer_final_limit = [] nr_embeddings_furer_final = -1 nr_embeddings_false_furer_final = -1 nr_embeddings_random_final = -1 furer_klds = [] furer_SSTDs = [] false_furer_kld = [] false_furer_SSTDs = [] random_klds = [] random_SSTDs = [] furer_avg_rt = [] false_furer_avg_rt = [] random_avg_rt = [] #FIRST CHECK IF EXPPERIMENTS WERE RUN IN SEQUENTIALL OR PARALLEL MODE. IF IT'S PARALLEL MODE RESULTS HAVE TO BE #EXTRACTED FROM RUN DIRECTORIES #EXTRACT RANDOM VERTEX RESULTS if os.path.exists( os.path.join(pattern_path, patt, 'random_vertex_approach')): files_random = sorted([ f for f in os.listdir( os.path.join(pattern_path, patt, 'random_vertex_approach')) if re.match('run_*', f) ]) if (len(files_random) != 0): random_klds, random_SSTDs, random_avg_rt = extract_KLD_sampling_approach_parallel_run( os.path.join(pattern_path, patt, 'random_vertex_approach'), files_random) nr_embeddings_random_final = extract_final_number_of_embeddings_sampling_approach( os.path.join(pattern_path, patt, 'random_vertex_approach')) else: random_klds, random_SSTDs, random_avg_rt = extract_KLD_sampling_approach( random_nlimits_result) #nr_embeddings_random_final_limit=extract_number_of_embeddings_sampling_approach(random_dict_result) nr_embeddings_random_final = extract_final_number_of_embeddings_sampling_approach( os.path.join(pattern_path, patt, 'random_vertex_approach')) #EXTRACT FURER RESULTS if os.path.exists(os.path.join(pattern_path, patt, 'results_furer')): print "FURER" files_furer = sorted([ f for f in os.listdir( os.path.join(pattern_path, patt, 'results_furer')) if re.match('run_*', f) ]) if (len(files_furer) != 0): furer_klds, furer_SSTDs, furer_avg_rt = extract_KLD_sampling_approach_parallel_run( os.path.join(pattern_path, patt, 'results_furer'), files_furer) nr_embeddings_furer_final = extract_final_number_of_embeddings_sampling_approach( os.path.join(pattern_path, patt, 'results_furer')) else: furer_klds, furer_SSTDs, furer_avg_rt = extract_KLD_sampling_approach( furer_nlimits_result) #nr_embeddings_furer_final_limit=extract_number_of_embeddings_sampling_approach(furer_dict_result) nr_embeddings_furer_final = extract_final_number_of_embeddings_sampling_approach( os.path.join(pattern_path, patt, 'results_furer')) #EXTRACT FALSE FURER TIMES if os.path.exists( os.path.join(pattern_path, patt, 'results_false_furer')): files_false_furer = sorted([ f for f in os.listdir( os.path.join(pattern_path, patt, 'results_false_furer')) if re.match('run_*', f) ]) if (len(files_false_furer) != 0): false_furer_kld, false_furer_SSTDs, false_furer_avg_rt = extract_KLD_sampling_approach_parallel_run( os.path.join(pattern_path, patt, 'results_furer'), files_false_furer) nr_embeddings_false_furer_final_limit = extract_number_of_embeddings_sampling_approach( random_dict_result) else: false_furer_kld, false_furer_SSTDs, false_furer_avg_rt = extract_KLD_sampling_approach( false_furer_nlimits_result) #nr_embeddings_false_furer_final_limit=extract_number_of_embeddings_sampling_approach(false_furer_dict_result) nr_embeddings_false_furer_final = extract_final_number_of_embeddings_sampling_approach( os.path.join(pattern_path, patt, 'results_false_furer')) print "PATH ", os.path.join(pattern_path, patt, 'selected.info'), os.path.exists( os.path.join(pattern_path, patt, 'selected.info')) selected = False #check if pattern selected if os.path.exists(os.path.join(pattern_path, patt, 'selected.info')): selected = True row = {} row['pattern_name'] = pattern_file_name row['selected'] = selected row['nr_randvar_values'] = str(nr_randvar_values) row['nr_targets'] = str(n_target_nodes) row['has_cycles'] = str(cycles) if (nr_embeddings_exhaustive == 'NC'): row['exh_emb'] = 'NC' else: row['exh_emb'] = nr_embeddings_exhaustive row['rnd_emb'] = nr_embeddings_random_final row['furer_emb'] = nr_embeddings_furer_final row['ffurer_emb'] = nr_embeddings_false_furer_final row['limit16_rnd_emb'] = "None" row['limit16_fur_emb'] = "None" row['limit16_ff_emb'] = "None" row['rnd_KLD_16'] = str( str(getNTH_limit_value(15, random_klds)) + " +- " + str(getNTH_limit_value(15, random_SSTDs))) row['furer_KLD_16'] = str( str(getNTH_limit_value(15, furer_klds)) + " +- " + str(getNTH_limit_value(15, furer_SSTDs))) row['ff_KLD_16'] = str( str(getNTH_limit_value(15, false_furer_kld)) + " +- " + str(getNTH_limit_value(15, false_furer_SSTDs))) row['exh_rt'] = exhaustive_running_time row['rnd_avgRT_16'] = getNTH_limit_value(15, random_avg_rt) row['furer_avgRT_16'] = getNTH_limit_value(15, furer_avg_rt) row['ff_avgRT_16'] = getNTH_limit_value(15, false_furer_avg_rt) writer.writerow(row) counter += 1 #return path tocreated csv file print "Finished writing csv ...to", file return file
def get_row_NS(general_path, pattern_result, experiment_name): row = {} if not (os.path.exists(os.path.join(general_path, 'input_pattern.gml'))): row['pattern_name'] = pattern_result row['nr_randvar_values'] = "NC" row['nodes'] = "NC" row['edges'] = "NC" row['has_cycles'] = "NC" row['density'] = "NC" row['shape'] = "NC" row['max_degree'] = "NC" row['avg_deg'] = "NC" row['nr_targets'] = "NC" row['nr_emb'] = "NC" row['has_obd'] = "NC" row['unequal_size_warn'] = "NC" row['OBD'] = "NC" return row pattern = nx.read_gml(os.path.join(general_path, 'input_pattern.gml')) nr_randvar_values = man.count_nr_randvars_in_graph(pattern) cycles = man.is_there_cycle_in_graph(pattern) max_degree = man.get_maximum_node_degree(pattern) average_degree = man.get_average_node_degree(pattern) n_target_nodes = man.get_nr_target_nodes_other_than_head(pattern) nr_emb = None has_obd = True if os.path.exists(os.path.join(pattern_result, 'no_obdecomp.info')): has_obd = False if os.path.exists(os.path.join(general_path, 'not_selected.info')): nr_emb = extract_nr_embeddings_NS( os.path.join(general_path, 'not_selected.info')) nodes, edges = man.get_readable_text_format(pattern) unequal_size_warning = False if os.path.exists( os.path.join(general_path, 'results_furer', 'unequal_size.warning')): unequal_size_warning = True OBD = None if os.path.exists( os.path.join(general_path, 'results_furer', 'OBDDecomp.info')): OBD = getOBDecomp( os.path.join(general_path, 'results_furer', 'OBDDecomp.info')) row['pattern_name'] = pattern_result row['nr_randvar_values'] = nr_randvar_values row['nodes'] = nodes row['edges'] = edges row['has_cycles'] = cycles row['density'] = nx.density(pattern) row['shape'] = man.get_graph_shape(pattern) row['max_degree'] = max_degree row['avg_deg'] = average_degree row['nr_targets'] = n_target_nodes row['nr_emb'] = nr_emb #row['has_obd']=has_obd #row['unequal_size_warn']=unequal_size_warning row['OBD'] = OBD return row
def get_row_exhaustive(general_path, pattern_result, pattern_path): row = {} print "Pattern exhaustive ", pattern_result print "Pattern path: ", pattern_path pattern = nx.read_gml(os.path.join(general_path, 'input_pattern.gml')) nr_randvar_values = man.count_nr_randvars_in_graph(pattern) cycles = man.is_there_cycle_in_graph(pattern) max_degree = man.get_maximum_node_degree(pattern) average_degree = man.get_average_node_degree(pattern) n_target_nodes = man.get_nr_target_nodes_other_than_head(pattern) parent_id = get_parent_id(os.path.join(pattern_path)) #get nr embeddings of exhaustive nr_emb = None time = None print general_path.split('/') pattern_name = general_path.split('/')[-1] if pattern_name == "": pattern_name = general_path.split('/')[-2] nr_obs = None print "Exists? ", os.path.join( general_path, 'exhaustive_approach', 'results_' + pattern_name + '.res'), os.path.exists( os.path.join(general_path, 'exhaustive_approach', 'results_' + pattern_name + '.res')) if os.path.exists( os.path.join(general_path, 'exhaustive_approach', 'results_' + pattern_name + '.res')): nr_emb, time, nr_obs = extract_nr_embeddings( os.path.join(general_path, 'exhaustive_approach', 'results_' + pattern_name + '.res')) #get the results if os.path.exists(os.path.join(pattern_result, 'monitoring')): embeddings, stdev, klds = get_stat( os.path.join(pattern_result, 'monitoring'), 'exhaustive') else: embeddings = [None] * 120 klds = [None] * 120 is_timeout = False if os.path.exists( os.path.join(general_path, 'exhaustive_approach', 'timeout.info')): is_timeout = True print "Nr of records for embeddings: ", len(embeddings) nodes, edges = man.get_readable_text_format(pattern) row['pattern_name'] = pattern_result row['parent_id'] = parent_id row['nr_randvar_values'] = int(nr_randvar_values) row['nodes'] = nodes row['edges'] = edges row['has_cycles'] = cycles row['density'] = nx.density(pattern) row['max_degree'] = float(max_degree) row['avg_deg'] = float(average_degree) row['nr_targets'] = int(n_target_nodes) if nr_emb: row['exh_emb'] = float(nr_emb) else: row['exh_emb'] = nr_emb row['time'] = time row['timeout'] = is_timeout row['nr_observations'] = nr_obs for i in xrange(1, len(embeddings) + 1): if embeddings[i - 1] == None: row["emb_" + str(i)] = None else: row["emb_" + str(i)] = float(embeddings[i - 1]) return row
def get_row(general_path, pattern_result, experiment_name, pattern_path): row = {} pattern = nx.read_gml(os.path.join(general_path, 'input_pattern.gml')) parent_id = get_parent_id(os.path.join(pattern_path)) nr_randvar_values = man.count_nr_randvars_in_graph(pattern) cycles = man.is_there_cycle_in_graph(pattern) max_degree = man.get_maximum_node_degree(pattern) average_degree = man.get_average_node_degree(pattern) n_target_nodes = man.get_nr_target_nodes_other_than_head(pattern) #get nr embeddings of exhaustive nr_emb = None sel_emb = None has_obd = True emb_stds = [] if os.path.exists(os.path.join(pattern_result, 'no_obdecomp.info')): has_obd = False if os.path.exists( os.path.join(os.path.dirname(pattern_result), "selected.info")): sel_emb = extract_nr_embeddings_NS( os.path.join(os.path.dirname(pattern_result), "selected.info")) print "General path: ", general_path print os.path.join( general_path, 'exhaustive_approach', 'results_' + general_path.split('/')[-1] + '.res'), "exists?", os.path.exists( os.path.join(general_path, 'exhaustive_approach', 'results_' + general_path.split('/')[-1] + '.res')) pattern_name = None print general_path.split('/') if general_path.split('/')[-1] == "": pattern_name = general_path.split('/')[-2] else: pattern_name = general_path.split('/')[-1] print pattern_name if os.path.exists( os.path.join(general_path, 'exhaustive_approach', 'results_' + pattern_name + '.res')): nr_emb, time, nr_obs = extract_nr_embeddings( os.path.join(general_path, 'exhaustive_approach', 'results_' + pattern_name + '.res')) #get the results if os.path.exists(os.path.join(pattern_result, 'monitoring')): embeddings, emb_stds, klds = get_stat( os.path.join(pattern_result, 'monitoring'), experiment_name) else: embeddings = [None] * 120 klds = [None] * 120 print "EMBEDDINGS: ", embeddings unequal_size_warning = False OBD = None if os.path.exists( os.path.join(general_path, 'results_furer', 'OBDDecomp.info')): OBD = getOBDecomp( os.path.join(general_path, 'results_furer', 'OBDDecomp.info')) nodes, edges = man.get_readable_text_format(pattern) print "PATTERN NAME: ", pattern_result row['pattern_name'] = pattern_result row['parent_id'] = parent_id row['nr_randvar_values'] = int(nr_randvar_values) row['nodes'] = nodes row['edges'] = edges row['has_cycles'] = cycles row['density'] = float(nx.density(pattern)) row['shape'] = man.get_graph_shape(pattern) row['max_degree'] = float(max_degree) row['avg_deg'] = float(average_degree) row['nr_targets'] = n_target_nodes if sel_emb: row['sel_emb'] = float(sel_emb) else: row['sel_emb'] = sel_emb if nr_emb: row['exh_emb'] = float(nr_emb) else: row['exh_emb'] = nr_emb row['has_obd'] = has_obd #row['unequal_size_warn']=unequal_size_warning row['OBD'] = OBD print "Nr embeddingS: ", len(embeddings) for i in xrange(0, len(embeddings)): row["emb_" + str(i + 1)] = embeddings[i] for i in xrange(0, len(emb_stds)): row["std_" + str(i + 1)] = emb_stds[i] for i in xrange(0, len(klds)): row["KLD_" + str(i + 1)] = klds[i] return row