def tst_prod_rules_level1_individual(in_path): # files = glob("ProdRules/moreno_lesmis_lesmis.*_iprules.tsv") mdf = pd.DataFrame() for f in sorted(files, reverse=True): df = pd.read_csv(f, header=None, sep="\t") mdf = pd.concat([mdf, df]) # print f, mdf.shape # print mdf.head() g = pcfg.Grammar('S') from td_isom_jaccard_sim import listify_rhs for (id, lhs, rhs, prob) in df.values: rhs = listify_rhs(rhs) # print (id), (lhs), (rhs), (prob) g.add_rule(pcfg.Rule(id, lhs, rhs, float(prob))) num_nodes = 16 # G.number_of_nodes() print "Starting max size", 'n=', num_nodes g.set_max_size(num_nodes) print "Done with max size" Hstars = [] print '-' * 40 try: rule_list = g.sample(num_nodes) except Exception, e: print str(e) continue hstar = phrg.grow(rule_list, g)[0] Hstars.append(hstar) print '+' * 40
def get_phrg_production_rules_onsubgraphs(argmnts): args = argmnts gn = graph_name(args['orig'][0]) f = "../datasets/" + gn + "*.p" files = glob(f) prod_rules = {} rules = [] id = 0 for f in files: Gprime = nx.read_gpickle(f) Gprime = reset_graph_nodes(Gprime) pp.pprint(Gprime.nodes()) T = td.quickbb(Gprime) root = list(T)[0] T = td.make_rooted(T, root) T = phrg.binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) td.new_visit(T, Gprime, prod_rules) # Process(target=td.new_visit, args=(T, Gprime, prod_rules,)).start() if DBG: print if DBG: print "--------------------" if DBG: print "- Production Rules -" if DBG: print "--------------------" for k in prod_rules.iterkeys(): if DBG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float(s) # normailization step to create probs not counts. if DBG: print '\t -> ', d, prod_rules[k][d] for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if DBG: print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 df = pd.DataFrame(rules) # pp.pprint(df.values.tolist()); exit() df.to_csv('../ProdRules/{}.tsv.phrg.prs'.format(gn), header=False, index=False, sep="\t") if os.path.exists('../ProdRules/{}.tsv.phrg.prs'.format(gn)): print 'Saved', '../ProdRules/{}.tsv.phrg.prs'.format(gn) else: print "Trouble saving" '''
def Hstar_Graphs_Control(G, graph_name, axs=None): # Derive the prod rules in a naive way, where prod_rules = phrg.probabilistic_hrg_learning(G) pp.pprint(prod_rules) g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) num_nodes = G.number_of_nodes() print "Starting max size", 'n=', num_nodes g.set_max_size(num_nodes) print "Done with max size" Hstars = [] num_samples = 20 print '*' * 40 for i in range(0, num_samples): rule_list = g.sample(num_nodes) hstar = phrg.grow(rule_list, g)[0] Hstars.append(hstar) # if 0: # g = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=['ts']) # draw_degree_whole_graph(g,axs) # draw_degree(Hstars, axs=axs, col='r') # #axs.set_title('Rules derived by ignoring time') # axs.set_ylabel('Frequency') # axs.set_xlabel('degree') if 0: # metricx = [ 'degree','hops', 'clust', 'assort', 'kcore','eigen','gcd'] metricx = ['clust'] # g = nx.from_pandas_dataframe(df, 'src', 'trg',edge_attr=['ts']) # graph_name = os.path.basename(f_path).rstrip('.tel') if DBG: print ">", graph_name metrics.network_properties([G], metricx, Hstars, name=graph_name, out_tsv=True)
def main_inf_mirr(gFname, gName, rnbr=1): retVal = None G = xt.load_edgelist(gFname) # rnbr = 1 # for j in range(1, rnbr + 1): # prs = get_hrg_production_rules_given(G) Hstars = [ ] # synthetic (stochastically generate) graphs using the graph grammars ProdRulesKth = [] # Note that therem might not be convergence, b/c the graph may degenerate early for j in range(0, 10): # nbr of times to do Inf. Mirr. tst for k in range(0, 1): # nbr of times to feedback the resulting graph # print ("\tGraph #:",k+1) prdrls = {} prod_rules = phrg.probabilistic_hrg_deriving_prod_rules(G) # print len(prod_rules) # initialize the Grammar g g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) num_nodes = G.number_of_nodes() g.set_max_size(num_nodes) print "Done initializing the grammar data-structure" # Generate a synthetic graph using HRGs try: rule_list = g.sample(num_nodes) except Exception, e: print str(e) rule_list = g.sample(num_nodes) break hstar = phrg.grow(rule_list, g)[0] G = hstar # feed back the newly created graph # store the last synth graph & restart Hstars.append(hstar) #
def grow_exact_size_hrg_graphs_from_prod_rules(prod_rules, gname, n, runs=1): """ Args: rules: production rules (model) gname: graph name n: target graph order (number of nodes) runs: how many graphs to generate Returns: list of synthetic graphs """ DBG = True if n <= 0: sys.exit(1) g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) print print "Added rules HRG (pr", len(prod_rules), ", n,", n, ")" num_nodes = n if DBG: print "Starting max size ..." t_start = time.time() g.set_max_size(num_nodes) print "Done with max size, took %s seconds" % (time.time() - t_start) hstars_lst = [] print " ", i = 0 max_tries = 10000 tries = 0 failed = False while i != runs: tries += 1 if tries > max_tries: failed = True break print '>', rule_list = g.sample(num_nodes) hstar = phrg.grow(rule_list, g)[0] if n * 0.99 <= hstar.order() <= n * 1.01: hstars_lst.append(hstar) i += 1 if len(hstars_lst) != runs or failed: print('HRG failed') return None return hstars_lst
def grow_hrg_graphs_with_infinity( prod_rules, gname, n, runs=1, rnbr=1, ): """ Args: rules: production rules (model) gname: graph name n: target graph order (number of nodes) runs: how many graphs to generate Returns: list of synthetic graphs """ DBG = True if n <= 0: sys.exit(1) g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) print print "Added rules HRG (pr", len(prod_rules), ", n,", n, ")" num_nodes = n if DBG: print "Starting max size ..." t_start = time.time() g.set_max_size(num_nodes) print "Done with max size, took %s seconds" % (time.time() - t_start) hstars_lst = [] print " ", for i in range(0, runs): rule_list = g.sample(num_nodes) hstar = phrg.grow(rule_list, g)[0] hstars_lst.append(hstar) return hstars_lst
def get_hrg_production_rules(edgelist_data_frame, graph_name, tw=False, trials=10, n_subg=2, n_nodes=300, nstats=False): from core.growing import derive_prules_from t_start = time.time() df = edgelist_data_frame if df.shape[1] == 4: G = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=True) # whole graph elif df.shape[1] == 3: G = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts']) # whole graph else: G = nx.from_pandas_dataframe(df, 'src', 'trg') G.name = graph_name print "==> read in graph took: {} seconds".format(time.time() - t_start) G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) num_nodes = G.number_of_nodes() phrg.graph_checks(G) if DBG: print if DBG: print "--------------------" if not DBG: print "-Tree Decomposition-" if DBG: print "--------------------" prod_rules = {} K = n_subg n = n_nodes if num_nodes >= 500: print 'Grande' t_start = time.time() for Gprime in gs.rwr_sample(G, K, n): T = td.quickbb(Gprime) root = list(T)[0] T = td.make_rooted(T, root) T = phrg.binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) td.new_visit(T, G, prod_rules) Process(target=td.new_visit, args=( T, G, prod_rules, )).start() else: T = td.quickbb(G) root = list(T)[0] T = td.make_rooted(T, root) T = phrg.binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) td.new_visit(T, G, prod_rules) # print_treewidth(T) # exit() if DBG: print if DBG: print "--------------------" if DBG: print "- Production Rules -" if DBG: print "--------------------" for k in prod_rules.iterkeys(): if DBG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float( s) # normailization step to create probs not counts. if DBG: print '\t -> ', d, prod_rules[k][d] rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append( ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if DBG: print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 df = pd.DataFrame(rules) '''print "++++++++++" df.to_csv('ProdRules/{}_prs.tsv'.format(G.name), header=False, index=False, sep="\t") if os.path.exists('ProdRules/{}_prs.tsv'.format(G.name)): print 'Saved', 'ProdRules/{}_prs.tsv'.format(G.name) else: print "Trouble saving" print "-----------" print [type(x) for x in rules[0]] ''' ''' Graph Generation of Synthetic Graphs Grow graphs usigng the union of rules from sampled sugbgraphs to predict the target order of the original graph ''' hStars = grow_exact_size_hrg_graphs_from_prod_rules( rules, graph_name, G.number_of_nodes(), trials) print '... hStart graphs:', len(hStars) if not os.path.exists(r"Results/"): os.makedirs(r"Results/") with open(r"Results/{}_hstars.pickle".format(graph_name), "wb") as output_file: cPickle.dump(hStars, output_file) if os.path.exists(r"Results/{}_hstars.pickle".format(graph_name)): print "File saved" '''if nstats:
# graph.name="kchop" print nx.info(graph) import core.PHRG as phrg import core.probabilistic_cfg as pcfg G = graph Hstars = [ ] # synthetic (stochastically generate) graphs using the graph grammars ProdRulesKth = [] # Note that therem might not be convergence, b/c the graph may degenerate early for j in range(0, 10): # nbr of times to do Inf. Mirr. tst for k in range(1, rnbr + 1): # nbr of times to feedback the resulting graph prdrls = {} prod_rules = phrg.probabilistic_hrg_deriving_prod_rules(G) # print len(prod_rules) # initialize the Grammar g g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in prod_rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) num_nodes = G.number_of_nodes() g.set_max_size(num_nodes) print "Done initializing the grammar data-structure" # Generate a synthetic graph using HRGs try: rule_list = g.sample(num_nodes) except Exception, e:
def dimacs_td_ct_fast(oriG, tdfname): """ tree decomp to clique-tree parameters: orig: filepath to orig (input) graph in edgelist tdfname: filepath to tree decomposition from INDDGO synthg: when the input graph is a syth (orig) graph Todo: currently not handling sythg in this version of dimacs_td_ct """ G = oriG if G is None: return (1) prod_rules = {} t_basename = os.path.basename(tdfname) out_tdfname = os.path.basename(t_basename) + ".prs" if os.path.exists("../ProdRules/" + out_tdfname): # print "==> exists:", out_tdfname return out_tdfname if 0: print "../ProdRules/" + out_tdfname, tdfname with open(tdfname, 'r') as f: # read tree decomp from inddgo lines = f.readlines() lines = [x.rstrip('\r\n') for x in lines] cbags = {} bags = [x.split() for x in lines if x.startswith('B')] for b in bags: cbags[int(b[1])] = [int(x) for x in b[3:]] # what to do with bag size? edges = [x.split()[1:] for x in lines if x.startswith('e')] edges = [[int(k) for k in x] for x in edges] tree = defaultdict(set) for s, t in edges: tree[frozenset(cbags[s])].add(frozenset(cbags[t])) if DEBUG: print '.. # of keys in `tree`:', len(tree.keys()) root = list(tree)[0] root = frozenset(cbags[1]) T = td.make_rooted(tree, root) # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets T = phrg.binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) # print ">>",len(T) td.new_visit(T, G, prod_rules) if 0: print "--------------------" if 0: print "- Production Rules -" if 0: print "--------------------" for k in prod_rules.iterkeys(): if DEBUG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float( s) # normailization step to create probs not counts. if DEBUG: print '\t -> ', d, prod_rules[k][d] rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append( ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if 0: print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 # print rules if 0: print "--------------------" if 0: print '- P. Rules', len(rules) if 0: print "--------------------" # ToDo. # Let's save these rules to file or print proper write_prod_rules_to_tsv(rules, out_tdfname) # g = pcfg.Grammar('S') # for (id, lhs, rhs, prob) in rules: # g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) # Synthetic Graphs # hStars = grow_exact_size_hrg_graphs_from_prod_rules(rules, graph_name, G.number_of_nodes(), 20) # # metricx = ['degree', 'hops', 'clust', 'assort', 'kcore', 'gcd'] # 'eigen' # metricx = ['gcd','avgdeg'] # metrics.network_properties([G], metricx, hStars, name=graph_name, out_tsv=True) return out_tdfname
def get_phrg_production_rules (argmnts): args = argmnts t_start = time.time() df = tdf.Pandas_DataFrame_From_Edgelist(args['orig'])[0] if df.shape[1] == 4: G = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=True) # whole graph elif df.shape[1] == 3: G = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts']) # whole graph else: G = nx.from_pandas_dataframe(df, 'src', 'trg') G.name = graph_name(args['orig'][0]) print "==> read in graph took: {} seconds".format(time.time() - t_start) G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) num_nodes = G.number_of_nodes() phrg.graph_checks(G) if DBG: print if DBG: print "--------------------" if not DBG: print "-Tree Decomposition-" if DBG: print "--------------------" prod_rules = {} K = 2 n = 300 if num_nodes >= 500: print 'Grande' t_start = time.time() for Gprime in gs.rwr_sample(G, K, n): T = td.quickbb(Gprime) root = list(T)[0] T = td.make_rooted(T, root) T = phrg.binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) td.new_visit(T, G, prod_rules) Process(target=td.new_visit, args=(T, G, prod_rules,)).start() else: T = td.quickbb(G) root = list(T)[0] T = td.make_rooted(T, root) T = phrg.binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) td.new_visit(T, G, prod_rules) # print_treewidth(T) # TODO: needs to be fixed # exit() if DBG: print if DBG: print "--------------------" if DBG: print "- Production Rules -" if DBG: print "--------------------" for k in prod_rules.iterkeys(): if DBG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float( s) # normailization step to create probs not counts. if DBG: print '\t -> ', d, prod_rules[k][d] rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if DBG: print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 df = pd.DataFrame(rules) # pp.pprint(df.values.tolist()); exit() df.to_csv('../ProdRules/{}.tsv.phrg.prs'.format(G.name), header=False, index=False, sep="\t") if os.path.exists('../ProdRules/{}.tsv.phrg.prs'.format(G.name)): print 'Saved', '../ProdRules/{}.tsv.phrg.prs'.format(G.name) else: print "Trouble saving" print "-----------" print [type(x) for x in rules[0]] '''
def get_hrg_production_rules_given(G, tw=False, n_subg=2, n_nodes=300, nstats=False): G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) num_nodes = G.number_of_nodes() phrg.graph_checks(G) if DBG: print if DBG: print "--------------------" if not DBG: print "-Tree Decomposition-" if DBG: print "--------------------" prod_rules = {} K = n_subg n = n_nodes if num_nodes >= 500: print 'Grande' t_start = time.time() for Gprime in gs.rwr_sample(G, K, n): T = td.quickbb(Gprime) root = list(T)[0] T = td.make_rooted(T, root) T = phrg.binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) td.new_visit(T, G, prod_rules) Process(target=td.new_visit, args=( T, G, prod_rules, )).start() else: T = td.quickbb(G) root = list(T)[0] T = td.make_rooted(T, root) T = phrg.binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) td.new_visit(T, G, prod_rules) # print_treewidth(T) exit() if DBG: print if DBG: print "--------------------" if DBG: print "- Production Rules -" if DBG: print "--------------------" for k in prod_rules.iterkeys(): if DBG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float( s) # normailization step to create probs not counts. if DBG: print '\t -> ', d, prod_rules[k][d] rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append( ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if DBG: print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 df = pd.DataFrame(rules) ''' Graph Generation of Synthetic Graphs Grow graphs usigng the union of rules from sampled sugbgraphs to predict the target order of the original graph exact change to fixed ''' # hStars = grow_exact_size_hrg_graphs_from_prod_rules(rules, graph_name, G.number_of_nodes(), 10) hStars = grow_hrg_graphs_with_infinity(rules, graph_name, G.number_of_nodes(), 10, rnbr=1) print '... hStart graphs:', len(hStars) d = {graph_name + "_hstars": hStars} with open(r"Results/{}_hstars.pickle".format(graph_name), "wb") as output_file: cPickle.dump(d, output_file) if os.path.exists(r"Results/{}_hstars.pickle".format(graph_name)): print "File saved" '''if nstats:
def tst_prod_rules_isom_intrxn(fname, origfname): """ Test the isomorphic subset of rules :param fname: isom intersection rules file :param origfname: reference input network (dataset) edgelist file :return: """ # Get the original file fdf = Pandas_DataFrame_From_Edgelist([origfname]) origG = nx.from_pandas_dataframe(fdf[0], 'src', 'trg') origG.name = graph_name(origfname) print origG.name, "+" * 80 # Read the subset of prod rules df = pd.read_csv(fname, header=None, sep="\t", dtype={ 0: str, 1: list, 2: list, 3: float }) g = pcfg.Grammar('S') if not willFire_check(df): print "-" * 10, fname, "contains production rules that WillNotFire" return None else: print "+" * 40 # Process dataframe from td_isom_jaccard_sim import listify_rhs for (id, lhs, rhs, prob) in df.values: rhs = listify_rhs(rhs) g.add_rule(pcfg.Rule(id, lhs, rhs, float(prob))) print "\n", "." * 40 #print 'Added the rules to the datastructure' num_nodes = origG.number_of_nodes() # print "Starting max size", 'n=', num_nodes g.set_max_size(num_nodes) # print "Done with max size" Hstars = [] ofname = "FakeGraphs/" + origG.name + "_isom_ntrxn.shl" database = shelve.open(ofname) num_samples = 20 # print '~' * 40 for i in range(0, num_samples): rule_list = g.sample(num_nodes) hstar = phrg.grow(rule_list, g)[0] Hstars.append(hstar) print hstar.number_of_nodes(), hstar.number_of_edges() print '-' * 40 database['hstars'] = Hstars database.close()