def PHRG(G, gname): n = G.number_of_nodes() target_nodes = n # degree_sequence = G.degree().values() prod_rules = learn_grammars_production_rules(G) if dbg: print print "--------------------" print "- Production Rules -" print "--------------------" for k in prod_rules.iterkeys(): # print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float( s) # normailization step to create probs not counts. # print '\t -> ', d, prod_rules[k][d] # rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append( ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) # print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) print "Starting max size" g.set_max_size(target_nodes) print "Done with max size" rule_list = g.sample(target_nodes) # PHRG pred_graph = grow(rule_list, g)[0] return pred_graph
prod_rules = shelf["karate"] shelf.close() rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) #print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in rules: g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) print "Starting max size" G = nx.karate_club_graph() g.set_max_size(G.number_of_edges()) print "Done with max size" graphletG = [] graphletH = [] multiGraphs = []
def probabilistic_hrg(G, num_samples=1, n=None): ''' Args: ------------ G: input graph (nx obj) num_samples: (int) in the 'grow' process, this is number of synthetic graphs to generate n: (int) num_nodes; number of nodes in the resulting graphs Returns: List of synthetic graphs (H^stars) ''' graphletG = [] if DEBUG: print G.number_of_nodes() if DEBUG: print G.number_of_edges() start_time = time.time() G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) if n is None: num_nodes = G.number_of_nodes() else: num_nodes = n if DEBUG: print G.number_of_nodes() if DEBUG: print G.number_of_edges() graph_checks(G) if DEBUG: print if DEBUG: print "--------------------" if DEBUG: print "-Tree Decomposition-" if DEBUG: print "--------------------" prod_rules = {} if num_nodes >= 500: print ' -- subgraphs' for Gprime in gs.rwr_sample(G, 2, 300): T = td.quickbb(Gprime) root = list(T)[0] T = td.make_rooted(T, root) T = binarize(T) root = list(T)[0] root, children = T #td.new_visit(T, G, prod_rules, TD) td.new_visit(T, G, prod_rules) else: T = td.quickbb(G) root = list(T)[0] T = td.make_rooted(T, root) T = binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) td.new_visit(T, G, prod_rules) if DEBUG: print if DEBUG: print "--------------------" if DEBUG: print "- Production Rules -" if DEBUG: print "--------------------" for k in prod_rules.iterkeys(): if DEBUG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float( s) # normailization step to create probs not counts. if DEBUG: print '\t -> ', d, prod_rules[k][d] rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append( ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if DEBUG: print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 # print rules #print 'P. Rules' if DEBUG: print(" --- Inference (PHRG) %s seconds ---" % (time.time() - start_time)) start_time = time.time() g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in rules: #print type(id), type(lhs), type(rhs), type(prob) if DEBUG: print ' ', id, lhs, rhs, prob g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) if DEBUG: print "Starting max size" num_nodes = num_nodes num_samples = num_samples g.set_max_size(num_nodes) if DEBUG: print "Done with max size" Hstars = [] for i in range(0, num_samples): rule_list = g.sample(num_nodes) if DEBUG: pp.pprint(rule_list) hstar = grow(rule_list, g)[0] # print "H* nodes: " + str(hstar.number_of_nodes()) # print "H* edges: " + str(hstar.number_of_edges()) Hstars.append(hstar) if DEBUG: print(" --- Graph gen (Fixed-size) %s seconds ---" % (time.time() - start_time)) return Hstars