def get_final_graph_dfs(): basedir = '/data/ssikdar/Attributed-VRG' names = [ 'polbooks', 'football', 'wisconsin', 'texas', 'cornell', 'cora', 'citeseer', 'airports', 'polblogs', 'film', 'chameleon', 'squirrel' ][:-3] # names = ['cora'] clusterings = ['leiden'] extract_types = ['mu-random'] mus = [5] # models = ['AVRG', 'NetGAN', 'CELL', 'CL', 'AGM'] #, 'linear_ae', 'linear_vae', 'gcn_ae', 'gcn_vae'] models = ['gcn_ae', 'gcn_vae'] args = [] final, slow_stats = True, False # basedir, name, clusterings, models=None, final=False, slow_stats=False, mus=None, extract_types=None for name in names: args.append((basedir, name, clusterings, models, final, slow_stats, mus, extract_types)) parallel_async(func=make_all_graph_dfs_new, args=args, num_workers=5) # make_all_graph_dfs_new(basedir=basedir, name=name, clusterings=clusterings, mus=mus, models=models, # extract_types=extract_types, final=True, slow_stats=True) return
def batch_synthetic_generator_runner(): # frac = np.linspace(0, 1, 21, endpoint=True) * 100 frac = np.linspace(0, 100, 11, endpoint=True, dtype=int) # change it to increments of 10 for now names = [f'toy-comm-{f}' for f in frac] # names = ['karate', 'football', 'polbooks', 'eucore', 'flights', 'chess', 'polblogs'] num_graphs = 5 outdir = '/data/ssikdar/attributed-vrg/dumps' use_pickle = True save_snapshots = False shuffle = 'edges' args = [] for name in names: # input_graph, attr_name = get_graph(name) input_graph, attr_name = nx.read_gexf( f'./input/shuffled/{shuffle}/{name}.gexf', node_type=int), 'block' name = f'{name}-{shuffle}' if attr_name == '': mix_dict, inp_deg_ast, inp_attr_ast = None, None, None else: mix_dict = get_mixing_dict(input_graph, attr_name=attr_name) inp_deg_ast = nx.degree_assortativity_coefficient(input_graph) inp_attr_ast = nx.attribute_assortativity_coefficient( input_graph, attr_name) for grammar_filename in glob(f'{outdir}/grammars/{name}/*'): grammar = load_pickle(grammar_filename) if isinstance(grammar, AttributedVRG): grammar_type = 'AVRG' fancy = True args.append((name, grammar, num_graphs, grammar_type, outdir, mix_dict, attr_name, fancy, inp_deg_ast, inp_attr_ast, use_pickle, save_snapshots)) grammar_type = 'AVRG-greedy' # args.append((name, grammar, num_graphs, grammar_type, outdir, mix_dict, attr_name, fancy, # inp_deg_ast, inp_attr_ast, use_pickle, save_snapshots)) for alpha in (0, 0.5, 1): args.append( (name, grammar, num_graphs, grammar_type, outdir, mix_dict, attr_name, fancy, inp_deg_ast, inp_attr_ast, use_pickle, save_snapshots, alpha)) else: assert isinstance(grammar, VRG) grammar_type = 'VRG' fancy = None args.append((name, grammar, num_graphs, grammar_type, outdir, mix_dict, attr_name, fancy, inp_deg_ast, inp_attr_ast, use_pickle, save_snapshots)) parallel_async(func=generate_graphs, args=args, num_workers=10) # generate_graphs(grammar: Union[VRG, NCE, AttributedVRG], num_graphs: int, grammar_type: str, outdir: str = 'dumps', # mixing_dict: Union[None, Dict] = None, attr_name: Union[str, None] = None, fancy = None, # inp_deg_ast: float = None, inp_attr_ast: float = None) return
def batch_grammar_runner(names, clusterings, outdir, mus=None, extract_types=None, num_workers=8, shuffle=False): # grammar_types_1 = ['VRG', 'AVRG'] grammar_types = ['AVRG'] if extract_types is None: extract_types = ['mu_random', 'mu_level', 'all_tnodes'] if mus is None: mus = range(3, 11) # mus = [5, 6] use_cluster_pickle = True use_grammar_pickle = True count = 1 args = [] write_pickle = True for name in names: input_graph, attr_name = get_graph(name, basedir=outdir) for clustering in clusterings: for grammar_type in grammar_types: for extract_type in extract_types: for mu in mus: extract = extract_type.replace('_', '-') if extract_type == 'all_tnodes': mu = -1 grammar_filename = join( outdir, 'output', 'grammars', name, f'{grammar_type}_{extract}_{clustering}_{mu}.pkl') arg = (name, grammar_type, extract_type, clustering, mu, input_graph, use_grammar_pickle, use_cluster_pickle, attr_name, outdir, count, grammar_filename, write_pickle) args.append(arg) if extract_type == 'all_tnodes': # here mu is not important for all_tnodes break print(args[:3]) if shuffle: random.shuffle(args) try: parallel_async(func=get_grammars, args=args, num_workers=num_workers) except Exception as e: print(e) ## get_grammars(name: str, grammar_type: str, extract_type: str, clustering: str, mu: int, input_graph: nx.Graph, # use_grammar_pickle: bool, use_cluster_pickle: bool, attr_name: str, outdir: str, count: int = 1, # grammar_filename: str = '', write_pickle: bool = True, list_of_list_clusters=None) ## return
def batched_graphs_clusters(basedir, name, clusterings, num_workers=5): input_graphs = read_batched_graphs(basedir=basedir, name=name) use_pickle = True args = [] for i, g in enumerate(input_graphs): g.name = f'{name}-{i}' for clustering in clusterings: filename = join(basedir, 'output', 'trees', name, f'{clustering}_{i}.pkl') args.append((g, join(basedir, 'output', 'trees'), clustering, use_pickle, filename)) parallel_async(func=get_clustering, args=args, num_workers=num_workers) # get_clustering(g: nx.Graph, outdir: str, clustering: str, use_pickle: bool, filename='') -> Any: return
def batch_cluster_runner(names, outdir, clusterings=None): if clusterings is None: clusterings = [ 'cond', 'spectral', 'leiden', 'louvain', 'infomap', 'labelprop', 'random', 'leading_eig', 'consensus' ][:-1] use_pickle = True args = [] for name in names: g, _ = get_graph(name, basedir=outdir) g.name = name for clustering in clusterings: args.append((g, outdir, clustering, use_pickle, '')) random.shuffle(args) parallel_async(func=get_clustering, args=args) return
def batched_graphs_generator(basedir, clusterings, name, mus=None): # num_graphs = 5 if 'polblogs' in name else 10 num_graphs = 10 use_pickle = True save_snapshots = False attr_name = 'value' mus = [5] alpha = None input_graphs = read_batched_graphs(basedir=basedir, name=name) extract_types = ['mu_random'] args = [] for i, input_graph in enumerate(input_graphs): mix_dict = get_mixing_dict(input_graph, attr_name=attr_name) inp_deg_ast = nx.degree_assortativity_coefficient(input_graph) inp_attr_ast = nx.attribute_assortativity_coefficient( input_graph, attr_name) for grammar_filename in glob( f'{basedir}/output/grammars/{name}/*_{i}.pkl'): grammar = load_pickle(grammar_filename) if grammar.mu not in mus or grammar.clustering not in clusterings or grammar.extract_type not in extract_types: continue extract_type = grammar.extract_type.replace('_', '-') if isinstance(grammar, AttributedVRG): for gen_type, fancy in zip(('AVRG-regular', 'AVRG-fancy'), (False, True)): graphs_filename = f'{basedir}/output/graphs/{name}/{gen_type}_{extract_type}_{grammar.clustering}_{grammar.mu}_{num_graphs}_{i}.pkl' args.append((name, grammar, num_graphs, extract_type, gen_type, basedir, graphs_filename, mix_dict, attr_name, fancy, inp_deg_ast, inp_attr_ast, use_pickle, save_snapshots, alpha)) for alpha, gen_type in zip( (0, 0.5, 1), ('AVRG-greedy-attr', 'AVRG-greedy-50', 'AVRG-greedy-deg')): graphs_filename = f'{basedir}/output/graphs/{name}/{gen_type}_{extract_type}_{grammar.clustering}_{grammar.mu}_{num_graphs}_{i}.pkl' args.append((name, grammar, num_graphs, extract_type, gen_type, basedir, graphs_filename, mix_dict, attr_name, fancy, inp_deg_ast, inp_attr_ast, use_pickle, save_snapshots, alpha)) # random.shuffle(args) parallel_async(func=generate_graphs, args=args, num_workers=8) return
def batch_motif_counter(name, model, basedir, overwrite=False, graphs=None, motif_filename=None): # dont overwrite by default args = [] # motif_counter_runner(name, nx_graph, basedir, overwrite, motif_fname=None) for i, graph in enumerate(graphs): # if motif_filename is None: if model == 'original': model_ = model else: model_ = f'{model}_{i}' motif_filename = join(basedir, 'output/motifs/', name, f'{model_}.pkl') args.append((name, graph, basedir, overwrite, motif_filename)) try: parallel_async(func=motif_counter_runner, args=args, num_workers=5) except Exception as e: logging.error(e) return
def batch_cluster_shuffler_runner(): shuffle_kind = 'edges' # clusterings = ['cond', 'spectral', 'leiden', 'louvain', 'infomap', 'labelprop', 'random', 'consensus'] clusterings = ['cond', 'leiden', 'louvain', 'leadingeig'] use_pickle = True args = [] # for graph_filename in glob(f'./input/shuffled/{shuffle_kind}/toy-comm-*.gexf'): shuffle_kind = 'attrs' for graph_filename in glob( f'./input/shuffled/{shuffle_kind}/toy-comm-0.gexf'): path = Path(graph_filename) g = nx.read_gexf(graph_filename, node_type=int) # name = f'{path.stem}-{shuffle_kind}' name = 'toy-comm-attr' g.name = name for clustering in clusterings: args.append((g, f'/data/ssikdar/attributed-vrg/dumps/trees/{name}', clustering, use_pickle)) parallel_async(func=get_clustering, args=args) return
def batched_graphs_grammars(basedir, name, clusterings): input_graphs = read_batched_graphs(basedir=basedir, name=name) attr_name = 'value' grammar_types = ['AVRG'] # ['VRG', 'AVRG'] extract_types = ['mu_random'] #, 'mu_level', 'all_tnodes'] mus = [5] use_cluster_pickle = True use_grammar_pickle = True count = 1 args = [] for i, input_graph in enumerate(input_graphs): for clustering in clusterings: list_of_list_clusters = load_pickle( join(basedir, 'output', 'trees', name, f'{clustering}_{i}.pkl')) for grammar_type in grammar_types: for extract_type in extract_types: extract = extract_type.replace('_', '-') for mu in mus: grammar_filename = f'{basedir}/output/grammars/{name}/{grammar_type}_{extract}_{clustering}_{mu}_{i}.pkl' arg = (name, grammar_type, extract_type, clustering, mu, input_graph, True, True, attr_name, basedir, 1, grammar_filename, True, list_of_list_clusters) args.append(arg) if extract_type == 'all_tnodes': # here mu is not important for all_tnodes break # print(args[: 3]) try: parallel_async(func=get_grammars, args=args, num_workers=5) except Exception as e: print(e) return
def batch_generator_runner(names, basedir, clusterings, mus=None, extract_types=None, save_snapshots=False, num_workers=10, shuffle=False): num_graphs = 10 # we need 1 graph to chart the progress # TODO: change this in the future? use_pickle = True save_snapshots = save_snapshots if mus is None: mus = list(range(3, 11)) + [-1] alpha = None args = [] for name in names: input_graph, attr_name = get_graph(name, basedir=basedir) if input_graph.size() > 3_000: save_snapshots = False mix_dict = get_mixing_dict(input_graph, attr_name=attr_name) inp_deg_ast = nx.degree_assortativity_coefficient(input_graph) inp_attr_ast = nx.attribute_assortativity_coefficient( input_graph, attr_name) for grammar_filename in glob(f'{basedir}/output/grammars/{name}/*'): grammar = load_pickle(grammar_filename) extract_type = grammar.extract_type.replace('_', '-') if grammar.mu not in mus or grammar.clustering not in clusterings or extract_type not in extract_types: continue print(Path(grammar_filename).stem) if isinstance(grammar, AttributedVRG): for gen_type, fancy in zip(('AVRG-regular', 'AVRG-fancy'), (False, True)): graphs_filename = f'{basedir}/output/graphs/{name}/{gen_type}_{extract_type}_{grammar.clustering}_{grammar.mu}_{num_graphs}.pkl' args.append((name, grammar, num_graphs, extract_type, gen_type, basedir, graphs_filename, mix_dict, attr_name, fancy, inp_deg_ast, inp_attr_ast, use_pickle, save_snapshots, alpha)) for alpha, gen_type in zip( (0, 0.5, 1), ('AVRG-greedy-attr', 'AVRG-greedy-50', 'AVRG-greedy-deg')): fancy = None graphs_filename = f'{basedir}/output/graphs/{name}/{gen_type}_{extract_type}_{grammar.clustering}_{grammar.mu}_{num_graphs}.pkl' args.append((name, grammar, num_graphs, extract_type, gen_type, basedir, graphs_filename, mix_dict, attr_name, fancy, inp_deg_ast, inp_attr_ast, use_pickle, save_snapshots, alpha)) else: continue # skip VRGs # assert isinstance(grammar, VRG) # grammar_type = 'VRG' # fancy = None # graphs_filename = f'{basedir}/output/graphs/{name}/{grammar_type}_{grammar.clustering}_{grammar.mu}_{num_graphs}.pkl' # args.append((name, grammar, num_graphs, grammar_type, outdir, mix_dict, attr_name, fancy, # inp_deg_ast, inp_attr_ast, use_pickle, save_snapshots, alpha, graphs_filename)) if shuffle: random.shuffle(args) try: parallel_async(func=generate_graphs, args=args, num_workers=num_workers) except Exception as e: print(e) return