def martin_version_report(fdict_exhaustive, data_graph, pattern, monitoring_marks, output_path, detailed_result_path, monitoring_reports, exhaustive_approach_results_path, Plist, nr, pattern_file_name): print "LEN FDICT_EXHAUSTIVE (BEFORE): ", len(fdict_exhaustive) size_fdict = len(fdict_exhaustive) num_embeddings = 0 for k in fdict_exhaustive.keys(): num_embeddings = num_embeddings + fdict_exhaustive[k] start_time = time.time() nr_possible_combinations = smplr.complete_combinations( fdict_exhaustive, data_graph, pattern, Plist) # add zeros to all not present combinations smplr.smooth(fdict_exhaustive, fdict_exhaustive) report.report_monitoring(monitoring_marks, output_path, detailed_result_path, monitoring_reports, exhaustive_approach_results_path, data_graph, pattern, Plist, nr, pattern_file_name, fdict_exhaustive) #print monitoring_reports print "ELAPSED TIME: ", time.time() - start_time
def report_monitoring(monitoring_marks,output_path,detailed_result_path,monitoring_reports,exhaustive_approach_result_file,data_graph,pattern,Plist,repetitions,pattern_file_name,fdict_exhaustive): #CREATE DIRECTORY THAT WILL CONTAIN RESULTS FOR EACH TIME INSTANCE dict={} duration=[] begin=0 nr_iterations=[] sum_of_embeddings=[] sum_of_squares=[] embeddings_estimate=[] sum_of_root_node_emb=[] sum_of_squares_root_node_emb=[] for time_int in monitoring_marks: duration.append(time_int-begin) begin=time_int #the problem might be that some runs finished earlier, and some later. for i in xrange(len(monitoring_marks)): for key_iter in monitoring_reports.keys(): if not(monitoring_marks[i] in dict.keys()): dict[monitoring_marks[i]]=[] try: dict[monitoring_marks[i]].append(monitoring_reports[key_iter][i]) nr_iterations.append(monitoring_reports[key_iter][i].nr_iterations) sum_of_embeddings.append(monitoring_reports[key_iter][i].sum_nr_embeddings) sum_of_squares.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings) embeddings_estimate.append(monitoring_reports[key_iter][i].embeddings_estimate) sum_of_root_node_emb.append(monitoring_reports[key_iter][i].sum_nr_extra_embeddings) sum_of_squares_root_node_emb.append(monitoring_reports[key_iter][i].sum_of_the_extra_square_embeddings) except IndexError: break print "NR ITERATIONS: ",nr_iterations print "sum_of_embeddings: ",sum_of_embeddings print "sum_of_squares: ",sum_of_squares snapshot_inits=[] for i in range(repetitions): snapshot_inits.append(0) counter_duration=0 counter=0 for time_snapshot in monitoring_marks: if counter==1: break print "Processed ",counter," out of: ",len(monitoring_marks) furer_results_KLD = [] furer_results_bhatta = [] furer_results_hellinger = [] furer_times = [] observed_nodes=[] observed_nodes_difference_per_snapshot=[] snapshot_directory_path=os.path.join(detailed_result_path,) if not(os.path.exists(snapshot_directory_path)): os.mkdir(snapshot_directory_path) snapshot_directory_file=os.path.join(snapshot_directory_path,'res_time_'+str(time_snapshot)+'.info') fdict_furer_temp=dict[time_snapshot] fdicts_Furer=[] for f in fdict_furer_temp: fdicts_Furer.append(f.current_fdict) observed_nodes.append(f.number_of_observed_nodes) if len(fdict_furer_temp)==0: continue for i in range(len(fdict_furer_temp)): fdict_limited = fdicts_Furer[i] smplr.smooth(fdict_limited, fdict_exhaustive) # smoothing to avoid zeros fdict_Furer=fdicts_Furer[i] observed_nodes_difference_per_snapshot.append(observed_nodes[i]-snapshot_inits[i]) snapshot_inits[i]=observed_nodes[i] [pde, trash_list, default_key] = smplr.make_pd_general_kickout_default(fdict_exhaustive, trash_factor=0.01) #print "Exhaustive dict: ",pde if len(pde) < 1: print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING." break [pdf , tl, dk]= smplr.make_pd_general_kickout_default_limited(fdict_Furer, trash_list, default_key) #with open('make_pdf_general_kickout_default_MARTIN.csv','w') as f: # for k in pdf.keys(): # f.write(str(k)+';'+str(pdf[k])+'\n') furer_results_KLD.append(su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) #print "KLD: ",su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf)) furer_results_bhatta.append(su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) furer_results_hellinger.append(su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) print "Writing to: ",snapshot_directory_file resultfile = open(snapshot_directory_file, 'w') resultfile.write('Furer\n') resultfile.write("experiment on graph: " + str(pattern_file_name) +" and pattern: "+pattern_file_name+"\n") resultfile.write("repetitions (for this time snapshot): " + str(repetitions) +"\n") resultfile.write(" " +"\n") print "KLD: ",str(numpy.mean(furer_results_KLD)) resultfile.write("average average KLD on furer: " + str(numpy.mean(furer_results_KLD)) + " with SSTD: " + str(numpy.std(furer_results_KLD, ddof=1)) +"\n") resultfile.write("average average bhatta on furer: " + str(numpy.mean(furer_results_bhatta)) + " with SSTD: " + str(numpy.std(furer_results_bhatta, ddof=1)) +"\n") resultfile.write("average average hellinger on furer: " + str(numpy.mean(furer_results_hellinger)) + " with SSTD: " + str(numpy.std(furer_results_hellinger, ddof=1)) +"\n") resultfile.write(" " +"\n") resultfile.write('-----DETAILED RESULTS-----' +"\n") resultfile.write('furer_results_KLD : ' + str(furer_results_KLD) +"\n") resultfile.write('furer_results_bhatta : ' + str(furer_results_bhatta) +"\n") resultfile.write('furer_results_hellinger : ' + str(furer_results_hellinger) +"\n") resultfile.write('avg #nodes observed : ' + str(numpy.mean(observed_nodes)) +"\n") resultfile.write('# nodes per time interval per run :' + str((numpy.mean(observed_nodes_difference_per_snapshot)/duration[counter_duration])) +"\n") resultfile.write('avg difference of nodes observed from previous snapshot :' + str(numpy.mean(observed_nodes_difference_per_snapshot)) +"\n") resultfile.write("------------------------------------ Sampling info ------------------------------\n") resultfile.write('number of sampling iterations : ' + str(nr_iterations[counter])+"\n") if sum_of_squares_root_node_emb[counter]==0 and sum_of_root_node_emb[counter]==0: nr_embeddings_temp=sum_of_embeddings[counter]/nr_iterations[counter] else: nr_embeddings_temp=sum_of_root_node_emb[counter]/nr_iterations[counter] #embeddings_estimate[counter] print "Writing to file: ",nr_embeddings_temp resultfile.write('average of embeddings : ' + str(nr_embeddings_temp)+"\n") if sum_of_squares_root_node_emb[counter]==0 and sum_of_root_node_emb[counter]==0: #we do the old standard deviation print "Old stdev" print sum_of_squares[counter] print sum_of_embeddings[counter] a=Decimal(sum_of_squares[counter])-(Decimal(math.pow(sum_of_embeddings[counter], 2))/Decimal(float(nr_iterations[counter]))) print a stdeviation=math.sqrt(a/Decimal(float((nr_iterations[counter]-1)))) else: print "here" a=Decimal(sum_of_squares_root_node_emb[counter])-(Decimal(math.pow(sum_of_root_node_emb[counter], 2))/Decimal(float(nr_iterations[counter]))) stdeviation=math.sqrt(a/Decimal(float((nr_iterations[counter]-1)))) print "old stdev: ",stdeviation resultfile.write('stdeviation of # embeddings: ' + str(stdeviation)+"\n") resultfile.close() counter+=1 counter_duration+=1
def report(output_path,detailed_result_path,fudicts,plot_result_dict,all_furer_times,exhaustive_approach_result_file,data_graph,pattern,Plist,NLIMIT_values,repetitions,pattern_file_name,fdict_exhaustive,iteration_counter_n_limit,n_limit_embeddings): if (len(fudicts)==0): with open(os.path.join(output_path,'no_results.info'), 'wb') as file: file.write("No results for random - empty fudicts!") pickout = open(os.path.join(output_path,'fudicts.pickle'), 'wb') pickle.dump(fudicts, pickout) pickout.close() pickout = open(os.path.join(output_path,'all_furer_times.pickle'), 'wb') pickle.dump(all_furer_times, pickout) pickout.close() picklename = os.path.join(exhaustive_approach_result_file,"fdict_exhaustive_%s.pickle" % pattern_file_name) pickin = open(picklename, 'rb') fdict_exhaustive = pickle.load(pickin) smplr.complete_combinations(fdict_exhaustive, data_graph, pattern, Plist) # add zeros to all not present combinations smplr.smooth(fdict_exhaustive, fdict_exhaustive) # Laplace smoothing also for the exhaustive for nli in range(len(NLIMIT_values)): print "ITERATION COUNTER FOR THIS LIMIT: ",iteration_counter_n_limit[nli] print "REPORTING LIMIT: ",NLIMIT_values[nli] plot_result_dict[NLIMIT_values[nli]] = {} furer_results_KLD = [] furer_results_bhatta = [] furer_results_hellinger = [] furer_times = [] for i in range(repetitions): furer_times.append(all_furer_times[i][nli]) fdict_limited = fudicts[i][nli] smplr.smooth(fdict_limited, fdict_exhaustive) # smoothing to avoid zeros fdict_Furer = fudicts[i][nli] [pde, trash_list, default_key] = smplr.make_pd_general_kickout_default(fdict_exhaustive, trash_factor=0.01) # we remove rows where frequencies do not reach 1% if len(pde) < 1: print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING." break emb=n_limit_embeddings[nli] [pdl, tl, dk] = smplr.make_pd_general_kickout_default_limited(fdict_limited, trash_list, default_key) [pdf , tl, dk]= smplr.make_pd_general_kickout_default_limited(fdict_Furer, trash_list, default_key) print "Appending results ..." furer_results_KLD.append(su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) furer_results_bhatta.append(su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) furer_results_hellinger.append(su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) plot_result_dict[NLIMIT_values[nli]]["furer_KLD"] = (numpy.mean(furer_results_KLD), numpy.std(furer_results_KLD, ddof=1)) plot_result_dict[NLIMIT_values[nli]]["furer_BHT"] = (numpy.mean(furer_results_bhatta), numpy.std(furer_results_bhatta, ddof=1)) plot_result_dict[NLIMIT_values[nli]]["furer_HEL"] = (numpy.mean(furer_results_hellinger), numpy.std(furer_results_hellinger, ddof=1)) plot_result_dict[NLIMIT_values[nli]]["furer_times"] = (numpy.mean(furer_times), numpy.std(furer_times, ddof=1)) result_file_name = detailed_result_path+"/"+"res_" + pattern_file_name + pattern_file_name+"."+str(repetitions) +"x"+str(NLIMIT_values[nli])+".result" resultfile = open(result_file_name, 'w') resultfile.write('Furer\n') resultfile.write("experiment on graph: " + str(pattern_file_name) +" and pattern: "+pattern_file_name+"\n") resultfile.write("NLIMIT: " + str(NLIMIT_values[nli]) +"\n") resultfile.write("repetitions: " + str(repetitions) +"\n") resultfile.write(" " +"\n") resultfile.write("average average KLD on randomnode: " + str(numpy.mean(furer_results_KLD)) + " with SSTD: " + str(numpy.std(furer_results_KLD, ddof=1)) +"\n") resultfile.write("average average bhatta on randomnode: " + str(numpy.mean(furer_results_bhatta)) + " with SSTD: " + str(numpy.std(furer_results_bhatta, ddof=1)) +"\n") resultfile.write("average average hellinger on randomnode: " + str(numpy.mean(furer_results_hellinger)) + " with SSTD: " + str(numpy.std(furer_results_hellinger, ddof=1)) +"\n") resultfile.write(" " +"\n") resultfile.write("Random node took per run on average: " +str(numpy.mean(furer_times)) + " seconds." +"\n") resultfile.write('-----DETAILED RESULTS-----' +"\n") resultfile.write('randnode_results_KLD :' + str(furer_results_KLD) +"\n") resultfile.write('randnode_results_bhatta :' + str(furer_results_bhatta) +"\n") resultfile.write('randnode_results_hellinger :' + str(furer_results_hellinger) +"\n") resultfile.write('randnode_times :' + str(furer_times) +"\n") resultfile.write('Nr embeddings for limit: '+str(emb)) resultfile.close()
def getStatistics_furer(fudicts, fdict_exhaustive, pattern, data_graph, target_indices, targe_ids, head_node, target_nodes, detailed_result_path, pattern_file_name): #CREATE DIRECTORY THAT WILL CONTAIN RESULTS FOR EACH TIME INSTANCE furer_results_KLD = [] furer_results_bhatta = [] furer_results_hellinger = [] furer_times = [] observed_nodes = [] observed_nodes_difference_per_snapshot = [] snapshot_directory_path = os.path.join(detailed_result_path) if not (os.path.exists(snapshot_directory_path)): os.mkdir(snapshot_directory_path) snapshot_directory_file = os.path.join(snapshot_directory_path, 'statistics.info') #filter out exhaustive dictionary filtered_f_dict_exhaustive = {} for key in fdict_exhaustive.keys(): new_key = () for target in target_indices: new_key += (key[target - 1]) if not new_key in filtered_f_dict_exhaustive.keys(): filtered_f_dict_exhaustive[new_key] = 0 if new_key in filtered_f_dict_exhaustive.keys(): filtered_f_dict_exhaustive[new_key] += fdict_exhaustive[key] #filter out furer dictionaries filtered_fudicts = [] fudict_monitors = fudicts[0] for dict in fudict_monitors: temp_fudict = {} for key in dict.keys(): new_key = () for target in target_indices: new_key += (key[target - 1]) if not new_key in temp_fudict.keys(): temp_fudict[new_key] = 0 if new_key in temp_fudict.keys(): temp_fudict[new_key] += dict[key] filtered_fudicts.append(temp_fudict) smplr.complete_combinations( filtered_f_dict_exhaustive, data_graph, pattern, targe_ids) # add zeros to all not present combinations smplr.smooth(filtered_f_dict_exhaustive, filtered_f_dict_exhaustive) fdict_limited = filtered_fudicts[1] smplr.smooth(fdict_limited, filtered_f_dict_exhaustive) fdict_Furer = filtered_fudicts[1] [pde, trash_list, default_key ] = smplr.make_pd_general_kickout_default(filtered_f_dict_exhaustive, trash_factor=0.01) [pdl, tl, dk ] = smplr.make_pd_general_kickout_default_limited(fdict_limited, trash_list, default_key) [pdf, tl, dk ] = smplr.make_pd_general_kickout_default_limited(fdict_Furer, trash_list, default_key) furer_results_KLD.append( su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) furer_results_bhatta.append( su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf))) furer_results_hellinger.append( su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf)))
def report_monitoring(monitoring_marks,output_path,detailed_result_path,monitoring_reports,exhaustive_approach_result_file,data_graph,pattern,Plist,repetitions,pattern_file_name): #CREATE DIRECTORY THAT WILL CONTAINS RESULTS FOR EACH TIME INSTANCE picklename = os.path.join(exhaustive_approach_result_file,"fdict_exhaustive_%s.pickle" % pattern_file_name) pickin = open(picklename, 'rb') fdict_exhaustive = pickle.load(pickin) #smplr.complete_combinations(fdict_exhaustive, data_graph, pattern, Plist) # add zeros to all not present combinations #smplr.smooth(fdict_exhaustive, fdict_exhaustive) # Laplace smoothing also for the exhaustive dict={} duration=[] nr_iterations=[] sum_number_of_embeddings=[] sum_of_embeddings_vers1=[] sum_of_squares_vers1=[] sum_of_the_square_embeddings=[] sum_of_embeddings_random_old=[] sum_of_square_emb_random_old=[] nr_root_nodes=[] begin=0 for time_int in monitoring_marks: duration.append(time_int-begin) begin=time_int #the problem might be that some runs finished earlier, and some later. for i in xrange(len(monitoring_marks)): for key_iter in monitoring_reports.keys(): if not(monitoring_marks[i] in dict.keys()): dict[monitoring_marks[i]]=[] try: dict[monitoring_marks[i]].append(monitoring_reports[key_iter][i]) nr_iterations.append(monitoring_reports[key_iter][i].nr_iterations) sum_number_of_embeddings.append(monitoring_reports[key_iter][i].sum_nr_embeddings) sum_of_the_square_embeddings.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings) nr_root_nodes.append(monitoring_reports[key_iter][i].nr_root_nodes) try: sum_of_embeddings_random_old.append(monitoring_reports[key_iter][i].sum_number_of_embeddings_random) sum_of_square_emb_random_old.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings_random) except: continue try: sum_of_embeddings_vers1.append(monitoring_reports[key_iter][i].sum_nr_embeddings_aux) sum_of_squares_vers1.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings_aux) except: continue except IndexError: break print "NR ITERATIONS: ",nr_iterations print "sum_of_embeddings: ",sum_of_embeddings_vers1 print "sum_of_squares: ",sum_of_squares_vers1 snapshot_inits=[] for i in range(repetitions): snapshot_inits.append(0) counter_duration=0 counter=0 interval=0 acc_nr_emb=0 acc_nr_emb_minus_average=0 nr_emb_per_interval=[] for time_snapshot in monitoring_marks: interval+=1 randnode_results_KLD = [] randnode_results_bhatta = [] randnode_results_hellinger = [] furer_times = [] observed_nodes=[] observed_nodes_difference_per_snapshot=[] snapshot_directory_path=os.path.join(detailed_result_path,) if not(os.path.exists(snapshot_directory_path)): os.mkdir(snapshot_directory_path) snapshot_directory_file=os.path.join(snapshot_directory_path,'res_time_'+str(time_snapshot)+'.info') fdict_furer_temp=dict[time_snapshot] fdicts_Furer=[] for f in fdict_furer_temp: fdicts_Furer.append(f.current_fdict) observed_nodes.append(f.number_of_observed_nodes) if len(fdict_furer_temp)==0: continue for i in range(len(fdict_furer_temp)): fdict_limited = fdicts_Furer[i] smplr.smooth(fdict_limited, fdict_exhaustive) # smoothing to avoid zeros fdict_Furer=fdicts_Furer[i] observed_nodes_difference_per_snapshot.append(observed_nodes[i]-snapshot_inits[i]) snapshot_inits[i]=observed_nodes[i] [pde, trash_list, default_key] = smplr.make_pd_general_kickout_default_old(fdict_exhaustive, trash_factor=0.01) if len(pde) < 1: print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING." break print "random vertex counts: " #for k in fdict_limited.keys(): # print k,fdict_limited[k] [pdl, tl, dk] = smplr.make_pd_general_kickout_default_limited_old(fdict_limited, trash_list, default_key) randnode_results_KLD.append(su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl))) randnode_results_bhatta.append(su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl))) randnode_results_hellinger.append(su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl))) print "Writing to: ",snapshot_directory_file resultfile = open(snapshot_directory_file, 'w') resultfile.write('Random\n') resultfile.write("experiment on graph: " + str(pattern_file_name) +" and pattern: "+pattern_file_name+"\n") resultfile.write("repetitions (for this time snapshot): " + str(repetitions) +"\n") resultfile.write(" " +"\n") resultfile.write("average KLD on random: " + str(numpy.mean(randnode_results_KLD)) + " with SSTD: " + str(numpy.std(randnode_results_KLD, ddof=1)) +"\n") print "KLD: ",str(numpy.mean(randnode_results_KLD)) resultfile.write("average bhatta on random: " + str(numpy.mean(randnode_results_bhatta)) + " with SSTD: " + str(numpy.std(randnode_results_bhatta, ddof=1)) +"\n") resultfile.write("average hellinger on random: " + str(numpy.mean(randnode_results_hellinger)) + " with SSTD: " + str(numpy.std(randnode_results_hellinger, ddof=1)) +"\n") resultfile.write(" " +"\n") resultfile.write('-----DETAILED RESULTS-----' +"\n") resultfile.write('random_results_KLD :' + str(randnode_results_KLD) +"\n") resultfile.write('random_results_bhatta :' + str(randnode_results_bhatta) +"\n") resultfile.write('random_results_hellinger :' + str(randnode_results_hellinger) +"\n") resultfile.write('avg #nodes observed :' + str(numpy.mean(observed_nodes)) +"\n") resultfile.write('# nodes per time interval per run:' + str((numpy.mean(observed_nodes_difference_per_snapshot)/duration[counter_duration])) +"\n") resultfile.write('avg difference of nodes observed from previous snapshot :' + str(numpy.mean(observed_nodes_difference_per_snapshot)) +"\n") resultfile.write("------------------------------------ Sampling info ------------------------------\n") resultfile.write('number of sampling iterations :' + str(nr_iterations[counter])+"\n") avg=(float(Decimal(sum_number_of_embeddings[counter]))/nr_iterations[counter])*(experiments.globals.nr_root_nodes) print "HALOOO:", Decimal(sum_number_of_embeddings[counter]) old=False if avg<0: print "Handling old data structures" #this means we handle the old version avg=float(sum_of_embeddings_random_old[counter])/nr_iterations[counter] old=True #nr_emb_per_interval.append(avg) #print "Nr emb per interval: ",nr_emb_per_interval #resultfile.write('average of embeddings w.r.t sampling iterations:' + str(avg)+"\n") #print "Avg old method: ",avg #acc_nr_emb+=Decimal(avg) #print "Interval: ",interval," Accnr emg: ",acc_nr_emb #avg_emb_Jan=acc_nr_emb/(interval) #acc_nr_emb_minus_average=0 #for i in xrange(0,interval): # acc_nr_emb_minus_average+=math.pow((Decimal(nr_emb_per_interval[i])-Decimal(avg_emb_Jan)),2) #std_Jan=math.sqrt(acc_nr_emb_minus_average)/(interval) #print "STDEV Jan: ",std_Jan #print "Average Nr emb: Jan Method: ",avg_emb_Jan sum1=Decimal(sum_of_the_square_embeddings[counter]) sum2=Decimal(sum_number_of_embeddings[counter]) #variance=(Decimal(sum1)-Decimal(sum2))/(nr_iterations[counter]-1) print "sum 1: ",sum1 print "sum 2: ",sum2 #print "Variance: ",variance #stdev1=math.sqrt(variance) var=Decimal(sum1)-(Decimal(math.pow(Decimal(sum2),2))/nr_iterations[counter]) stdev2=math.sqrt(var/(nr_iterations[counter]-1)) stdev=Decimal(stdev2)*Decimal(math.sqrt(nr_iterations[counter])) if old: print "Handling old data structures" #this means we handle the old version variance=sum_of_square_emb_random_old[counter]/(nr_iterations[counter]) stdev2=math.sqrt(variance/(nr_iterations[counter]-1)) print "STDEV: ",stdev print "STDEV 2: ",stdev2 print "Nr embeddingS: ",avg resultfile.write('average of embeddings w.r.t sampling iterations:' +str(avg) +"\n") resultfile.write('stdeviation of # embeddings:' + str(stdev2)+"\n") resultfile.write('2 stdeviation of # embeddings:' + str(stdev)+"\n") #resultfile.write('overal nr. of embeddings:' + str(sum_of_embeddings[counter])+"\n") #a=sum_of_squares[counter]-((math.pow(sum_of_embeddings[counter], 2)/nr_iterations[counter])) #stdeviation=math.sqrt(a/(nr_iterations[counter]-1)) #a1=sum_of_squares_vers1[counter]-((math.pow(sum_of_embeddings_vers1[counter], 2)/nr_iterations[counter])) #stdeviation1=math.sqrt(a1/(nr_iterations[counter]-1)) #resultfile.write('stdeviation of # embeddings:' + str(stdeviation)+"\n") counter+=1 resultfile.close() counter_duration+=1
def report(rndicts,all_randnode_times,NLIMIT_values,plot_result_dict,repetitions,detailed_result_path,output_path,exhaustive_approach_result_file,pattern_file_name,nr_embeddings_n_limits): if (len(rndicts)==0): with open(os.path.join(output_path,'no_results.info'), 'wb') as file: file.write("No results for random - empty rndicts!") pickout = open(os.path.join(output_path,'rndicts.pickle'), 'wb') pickle.dump(rndicts, pickout) pickout.close() pickout = open(os.path.join(output_path,'all_randnode_times.pickle'), 'wb') pickle.dump(all_randnode_times, pickout) pickout.close() picklename = os.path.join(exhaustive_approach_result_file,"fdict_exhaustive_%s.pickle" % pattern_file_name) pickin = open(picklename, 'rb') fdict_exhaustive = pickle.load(pickin) for nli in range(len(NLIMIT_values)): plot_result_dict[NLIMIT_values[nli]] = {} randnode_results_KLD = [] randnode_results_bhatta = [] randnode_results_hellinger = [] randnode_times = [] for i in range(repetitions): emb=nr_embeddings_n_limits[nli] randnode_times.append(all_randnode_times[i][nli]) fdict_limited = rndicts[i][nli] smplr.smooth(fdict_limited, fdict_exhaustive) # smoothing to avoid zeros [pde, trash_list, default_key] = smplr.make_pd_general_kickout_default(fdict_exhaustive, trash_factor=0.01) # we remove rows where frequencies do not reach 1% if len(pde) < 1: print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING." break #print "pde length: ",len(pde) [pdl, tl, dk] = smplr.make_pd_general_kickout_default_limited(fdict_limited, trash_list, default_key) #print "fdict exhaustive: ",len(fdict_exhaustive),"fdict limited",len(fdict_limited) # new function also for limited ones : make_pd_general_kickout_default_limited(fdict, trash, default_key) randnode_results_KLD.append(su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl))) randnode_results_bhatta.append(su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl))) randnode_results_hellinger.append(su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl))) plot_result_dict[NLIMIT_values[nli]]["randomnode_KLD"] = (numpy.mean(randnode_results_KLD), numpy.std(randnode_results_KLD, ddof=1)) plot_result_dict[NLIMIT_values[nli]]["randomnode_BHT"] = (numpy.mean(randnode_results_bhatta), numpy.std(randnode_results_bhatta, ddof=1)) plot_result_dict[NLIMIT_values[nli]]["randomnode_HEL"] = (numpy.mean(randnode_results_hellinger), numpy.std(randnode_results_hellinger, ddof=1)) # added to store and plot the times plot_result_dict[NLIMIT_values[nli]]["randomnode_times"] = (numpy.mean(randnode_times), numpy.std(randnode_times, ddof=1)) plot_result_dict[NLIMIT_values[nli]]["randomnode_times"] = (numpy.mean(randnode_times), numpy.std(randnode_times, ddof=1)) result_file_name = detailed_result_path+"/"+"ultimex_ICDM_" + pattern_file_name + pattern_file_name+"."+str(repetitions) +"x"+str(NLIMIT_values[nli])+".result" print "RESULT FILE NAME: ",result_file_name resultfile = open(result_file_name, 'w') resultfile.write('Random Vertex\n') resultfile.write("experiment on graph: " + str(pattern_file_name) +" and pattern: "+pattern_file_name+"\n") resultfile.write("NLIMIT: " + str(NLIMIT_values[nli]) +"\n") resultfile.write("repetitions: " + str(repetitions) +"\n") resultfile.write(" " +"\n") resultfile.write("average average KLD on randomnode: " + str(numpy.mean(randnode_results_KLD)) + " with SSTD: " + str(numpy.std(randnode_results_KLD, ddof=1)) +"\n") resultfile.write("average average bhatta on randomnode: " + str(numpy.mean(randnode_results_bhatta)) + " with SSTD: " + str(numpy.std(randnode_results_bhatta, ddof=1)) +"\n") resultfile.write("average average hellinger on randomnode: " + str(numpy.mean(randnode_results_hellinger)) + " with SSTD: " + str(numpy.std(randnode_results_hellinger, ddof=1)) +"\n") resultfile.write(" " +"\n") resultfile.write("Random node took per run on average: " +str(numpy.mean(randnode_times)) + " seconds." +"\n") resultfile.write('-----DETAILED RESULTS-----' +"\n") resultfile.write('randnode_results_KLD :' + str(randnode_results_KLD) +"\n") resultfile.write('randnode_results_bhatta :' + str(randnode_results_bhatta) +"\n") resultfile.write('randnode_results_hellinger :' + str(randnode_results_hellinger) +"\n") resultfile.write('randnode_times :' + str(randnode_times) +"\n") resultfile.write('Nr embeddings for limit: '+str(emb)) resultfile.close()