def replica_vs_original(seed=None, figpath=None, generator_func=None, G=None, params=None, num_replicas = 150, title_infix='', metrics=None, intermediates=False, n_jobs=-1): #generate one or more replicas and compare them to the original graph if seed==None: seed = npr.randint(1E6) print('rand seed: %d'%seed) npr.seed(seed) random.seed(seed) if generator_func==None: generator_func=algorithms.generate_graph if G==None: G = graphutils.load_graph(path='data-social/potterat_Hiv250.elist') if metrics == None: metrics = graphutils.default_metrics[:] metrics = [m for m in metrics if m['optional'] < 2] if 'metric_runningtime_bound' in params: mrtb = params['metric_runningtime_bound'] metrics = [m for m in metrics if m['runningtime'] <= mrtb] metrics = [m for m in metrics if m['name'] not in ['avg flow closeness']] #broken in NX 1.6 metrics.reverse() if params == None: params = {'verbose':False, 'node_edit_rate':[0.05, 0.04, 0.03, 0.02, 0.01], 'edge_edit_rate':[0.05, 0.04, 0.03, 0.02, 0.01], 'node_growth_rate':[0], 'locality_bias_correction':0., 'enforce_connected':True, 'accept_chance_edges':1.0, 'retain_intermediates':intermediates} if intermediates: params['retain_intermediates'] = True print('Params:') print(params) print('Metrics:') print([metric['name'] for metric in metrics]) replicas = replicate_graph(G=G, generator_func=generator_func, num_replicas=num_replicas, params=params, title_infix=title_infix, n_jobs=n_jobs) jaccard_edges = evaluate_similarity(base_graphs=G, graphs=replicas, n_jobs=n_jobs) #this is actually a mean vals_of_all = evaluate_metrics(graphs=[G]+replicas, metrics=metrics, n_jobs=n_jobs) vals_of_graph = [metric_data[0] for metric_data in vals_of_all] vals_of_replicas = [metric_data[1:] for metric_data in vals_of_all] replica_statistics, figpath = plot_deviation(vals_of_replicas, vals_of_graph, metrics, figpath, jaccard_edges, title_infix, seed, getattr(G, 'name', '')) #pylab.show() data = {'metrics':[met['name'] for met in metrics], 'name':getattr(G, 'name', ''), 'params':params, 'num_replicas':num_replicas, 'figpath':figpath} data[0] = replica_statistics data[0].update({'vals_of_replicas':vals_of_replicas, 'val_of_models':vals_of_graph, 'avg_jaccard_edges':jaccard_edges}) if intermediates: current_replicas = replicas for level in range(1, max(len(params.get('node_edit_rate', [])), len(params.get('edge_edit_rate', [])), len(params.get('node_growth_rate', [])), len(params.get('edge_growth_rate', [])))): print('LEVEL: %d'%level) coarse_models = [r.coarser_graph.model_graph for r in current_replicas] coarse_replicas = [r.coarser_graph for r in current_replicas] vals_of_models = evaluate_metrics(graphs=coarse_models, metrics=metrics, n_jobs=n_jobs) vals_of_replicas = evaluate_metrics(graphs=coarse_replicas, metrics=metrics, n_jobs=n_jobs) jaccard_edges = evaluate_similarity(base_graphs=coarse_models, graphs=coarse_replicas, n_jobs=n_jobs) replica_statistics, dummy \ = plot_deviation(vals_of_replicas=vals_of_replicas, vals_of_graph=vals_of_models, metrics=metrics, figpath=figpath + 'level%d'%level, jaccard_edges=jaccard_edges) current_replicas = coarse_replicas data[level] = replica_statistics data[level].update({'vals_of_replicas':vals_of_replicas, 'vals_of_models':vals_of_models, 'avg_jaccard_edges':jaccard_edges}) graphutils.safe_pickle(path=figpath+'.pkl', data=data) save_param_set(params, seed, figpath) save_stats_csv(path=figpath+'.csv', seed=seed, data=data) return data
if verbose: print('Writing graph image: %s ..' % image_path) sys.stdout.flush() retCode = os.system(visualizer_cmdl) if verbose: print(visualizer_cmdl) if os.name == 'nt': pdf_cmld = 'start %s' % image_path if verbose: print(pdf_cmld) os.system(pdf_cmld) elif os.name == 'posix': #aside: file --mime-type -b my.pdf open_in_unix(image_path, verbose=verbose, ext='pdf') elif write_graph and visualizer != None: if verbose: print('Running visualizer: ' + str(visualizer)) sys.stdout.flush() visualizer_cmdl = visualizer + ' ' + output_path retCode = os.system(visualizer_cmdl) if verbose: print(visualizer_cmdl) graphutils.safe_pickle(path=output_path + '.pkl', data=new_G, params=params) if verbose: print('Replica is referenced by variable: new_G')
def replica_vs_original(seed=None, figpath=None, generator_func=None, G=None, params=None, num_replicas=150, title_infix='', metrics=None, intermediates=False, n_jobs=-1, store_replicas=False): """generate one or more replicas and compare them to the original graph""" if seed == None: seed = npr.randint(1E6) print('rand seed: %d' % seed) npr.seed(seed) random.seed(seed) if generator_func == None: generator_func = algorithms.generate_graph if G == None: G = graphutils.load_graph(path='data-social/potterat_Hiv250.elist') if metrics == None: metrics = graphutils.default_metrics[:] metrics = [m for m in metrics if m['optional'] < 2] if 'metric_runningtime_bound' in params: mrtb = params['metric_runningtime_bound'] metrics = [m for m in metrics if m['runningtime'] <= mrtb] metrics = [m for m in metrics if m['name'] not in ['avg flow closeness']] #broken in NX 1.6 metrics.reverse() if params == None: params = { 'verbose': False, 'node_edit_rate': [0.05, 0.04, 0.03, 0.02, 0.01], 'edge_edit_rate': [0.05, 0.04, 0.03, 0.02, 0.01], 'node_growth_rate': [0], 'locality_bias_correction': 0., 'enforce_connected': True, 'accept_chance_edges': 1.0, 'retain_intermediates': intermediates } if intermediates: params['retain_intermediates'] = True print('Params:') print(params) print('Metrics:') print([metric['name'] for metric in metrics]) #if generator_func == algorithms.generate_graph: #replicas = replicate_graph(G=G, generator_func=generator_func, num_replicas=num_replicas, params=params, title_infix=title_infix, n_jobs=n_jobs) #else: #replicas = replicate_graph(G=G, generator_func=generator_func, num_replicas=num_replicas, params=params, # title_infix=title_infix, n_jobs=n_jobs) replicas = read_all_files() jaccard_edges = evaluate_similarity( base_graphs=G, graphs=replicas, n_jobs=n_jobs) #this is actually a mean vals_of_all = evaluate_metrics(graphs=[G] + replicas, metrics=metrics, n_jobs=n_jobs) vals_of_graph = [metric_data[0] for metric_data in vals_of_all] vals_of_replicas = [metric_data[1:] for metric_data in vals_of_all] replica_statistics, figpath = plot_deviation(vals_of_replicas, vals_of_graph, metrics, figpath, jaccard_edges, title_infix, seed, getattr(G, 'name', '')) #pylab.show() data = { 'metrics': [met['name'] for met in metrics], 'name': getattr(G, 'name', ''), 'params': params, 'num_replicas': num_replicas, 'figpath': figpath } data[0] = replica_statistics data[0].update({ 'vals_of_replicas': vals_of_replicas, 'val_of_models': vals_of_graph, 'avg_jaccard_edges': jaccard_edges }) out_dir = "/home/varsha/Documents/final_results/Krongen/Boeing_normalized" + timeNow( ) myfile = open(out_dir, 'w') i = 0 for repl in vals_of_replicas: for elem in repl: nor_value = elem if (vals_of_graph[i] != 0): nor_value = float(elem) / vals_of_graph[i] myfile.write(str(nor_value)) myfile.write('\t') i += 1 myfile.write('\n') myfile.close() if intermediates: current_replicas = replicas for level in range( 1, max(len(params.get('node_edit_rate', [])), len(params.get('edge_edit_rate', [])), len(params.get('node_growth_rate', [])), len(params.get('edge_growth_rate', [])))): print('LEVEL: %d' % level) coarse_models = [ r.coarser_graph.model_graph for r in current_replicas ] coarse_replicas = [r.coarser_graph for r in current_replicas] vals_of_models = evaluate_metrics(graphs=coarse_models, metrics=metrics, n_jobs=n_jobs) vals_of_replicas = evaluate_metrics(graphs=coarse_replicas, metrics=metrics, n_jobs=n_jobs) jaccard_edges = evaluate_similarity(base_graphs=coarse_models, graphs=coarse_replicas, n_jobs=n_jobs) replica_statistics, dummy \ = plot_deviation(vals_of_replicas=vals_of_replicas, vals_of_graph=vals_of_models, metrics=metrics, figpath=figpath + 'level%d'%level, jaccard_edges=jaccard_edges) current_replicas = coarse_replicas data[level] = replica_statistics data[level].update({ 'vals_of_replicas': vals_of_replicas, 'vals_of_models': vals_of_models, 'avg_jaccard_edges': jaccard_edges }) graphutils.safe_pickle(path=figpath + '.pkl', data=data) save_param_set(params, seed, figpath) save_stats_csv(path=figpath + '.csv', seed=seed, data=data) # optionally store replica graphs in files if store_replicas: out_dir = "output/replicas_{0}_{1}".format(getattr( G, "name", ""), timeNow()) # FIXME: add graph name os.mkdir(out_dir) for (G, replica_no) in zip(replicas, range(len(replicas))): graphutils.write_graph(G, path="{0}/{1}.gml".format( out_dir, replica_no)) return data
visualizer_cmdl = sfdp_default_cmd + " -Tpdf %s > %s 2> %s " % (output_path, image_path, stderr_path) if verbose: print "Writing graph image: %s .." % image_path sys.stdout.flush() retCode = os.system(visualizer_cmdl) if verbose: print visualizer_cmdl if os.name == "nt": pdf_cmld = "start %s" % image_path if verbose: print pdf_cmld os.system(pdf_cmld) elif os.name == "posix": # aside: file --mime-type -b my.pdf open_in_unix(image_path, verbose=verbose, ext="pdf") elif write_graph and visualizer != None: if verbose: print "Running visualizer: " + str(visualizer) sys.stdout.flush() visualizer_cmdl = visualizer + " " + output_path retCode = os.system(visualizer_cmdl) if verbose: print visualizer_cmdl graphutils.safe_pickle(path=output_path + ".pkl", data=new_G, params=params) if verbose: print "Replica is referenced by variable: new_G"
current_replicas = replicas for level in xrange(1, max(len(params.get('node_edit_rate', [])), len(params.get('edge_edit_rate', [])), len(params.get('node_growth_rate', [])), len(params.get('edge_growth_rate', [])))): print 'LEVEL: %d'%level coarse_models = [r.coarser_graph.model_graph for r in current_replicas] coarse_replicas = [r.coarser_graph for r in current_replicas] vals_of_models = evaluate_metrics(graphs=coarse_models, metrics=metrics, n_jobs=n_jobs) vals_of_replicas = evaluate_metrics(graphs=coarse_replicas, metrics=metrics, n_jobs=n_jobs) jaccard_edges = evaluate_similarity(base_graphs=coarse_models, graphs=coarse_replicas, n_jobs=n_jobs) replica_statistics, dummy \ = plot_deviation(vals_of_replicas=vals_of_replicas, vals_of_graph=vals_of_models, metrics=metrics, figpath=figpath + 'level%d'%level, jaccard_edges=jaccard_edges) current_replicas = coarse_replicas data[level] = replica_statistics data[level].update({'vals_of_replicas':vals_of_replicas, 'vals_of_models':vals_of_models, 'avg_jaccard_edges':jaccard_edges}) graphutils.safe_pickle(path=figpath+'.pkl', data=data) save_param_set(params, seed, figpath) save_stats_csv(path=figpath+'.csv', seed=seed, data=data) return data def safe_metrics(graph, metrics): rets = [] for met_num,metric in enumerate(metrics): try: rets.append(metric['function'](graph)) except Exception, inst: print 'error in computing: '+metric['name'] print inst rets.append(graphutils.METRIC_ERROR)
def replica_vs_original(seed=None, figpath=None, generator_func=None, G=None, params=None, num_replicas=150, title_infix='', metrics=None, intermediates=False, n_jobs=-1): #generate one or more replicas and compare them to the original graph if seed == None: seed = npr.randint(1E6) print 'rand seed: %d' % seed npr.seed(seed) random.seed(seed) if generator_func == None: generator_func = algorithms.generate_graph if G == None: G = graphutils.load_graph(path='data-social/potterat_Hiv250.elist') if metrics == None: metrics = graphutils.default_metrics[:] metrics = filter(lambda m: m['optional'] < 2, metrics) if 'metric_runningtime_bound' in params: mrtb = params['metric_runningtime_bound'] metrics = filter(lambda m: m['runningtime'] <= mrtb, metrics) metrics = filter(lambda m: m['name'] not in ['avg flow closeness'], metrics) #broken in NX 1.6 metrics.reverse() if params == None: params = { 'verbose': False, 'node_edit_rate': [0.05, 0.04, 0.03, 0.02, 0.01], 'edge_edit_rate': [0.05, 0.04, 0.03, 0.02, 0.01], 'node_growth_rate': [0], 'locality_bias_correction': 0., 'enforce_connected': True, 'accept_chance_edges': 1.0, 'retain_intermediates': intermediates } if intermediates: params['retain_intermediates'] = True print 'Params:' print params print 'Metrics:' print[metric['name'] for metric in metrics] replicas = replicate_graph(G=G, generator_func=generator_func, num_replicas=num_replicas, params=params, title_infix=title_infix, n_jobs=n_jobs) jaccard_edges = evaluate_similarity( base_graphs=G, graphs=replicas, n_jobs=n_jobs) #this is actually a mean vals_of_all = evaluate_metrics(graphs=[G] + replicas, metrics=metrics, n_jobs=n_jobs) vals_of_graph = [metric_data[0] for metric_data in vals_of_all] vals_of_replicas = [metric_data[1:] for metric_data in vals_of_all] replica_statistics, figpath = plot_deviation(vals_of_replicas, vals_of_graph, metrics, figpath, jaccard_edges, title_infix, seed, getattr(G, 'name', '')) #pylab.show() data = { 'metrics': [met['name'] for met in metrics], 'name': getattr(G, 'name', ''), 'params': params, 'num_replicas': num_replicas, 'figpath': figpath } data[0] = replica_statistics data[0].update({ 'vals_of_replicas': vals_of_replicas, 'val_of_models': vals_of_graph, 'avg_jaccard_edges': jaccard_edges }) if intermediates: current_replicas = replicas for level in xrange( 1, max(len(params.get('node_edit_rate', [])), len(params.get('edge_edit_rate', [])), len(params.get('node_growth_rate', [])), len(params.get('edge_growth_rate', [])))): print 'LEVEL: %d' % level coarse_models = [ r.coarser_graph.model_graph for r in current_replicas ] coarse_replicas = [r.coarser_graph for r in current_replicas] vals_of_models = evaluate_metrics(graphs=coarse_models, metrics=metrics, n_jobs=n_jobs) vals_of_replicas = evaluate_metrics(graphs=coarse_replicas, metrics=metrics, n_jobs=n_jobs) jaccard_edges = evaluate_similarity(base_graphs=coarse_models, graphs=coarse_replicas, n_jobs=n_jobs) replica_statistics, dummy \ = plot_deviation(vals_of_replicas=vals_of_replicas, vals_of_graph=vals_of_models, metrics=metrics, figpath=figpath + 'level%d'%level, jaccard_edges=jaccard_edges) current_replicas = coarse_replicas data[level] = replica_statistics data[level].update({ 'vals_of_replicas': vals_of_replicas, 'vals_of_models': vals_of_models, 'avg_jaccard_edges': jaccard_edges }) graphutils.safe_pickle(path=figpath + '.pkl', data=data) save_param_set(params, seed, figpath) save_stats_csv(path=figpath + '.csv', seed=seed, data=data) return data