Пример #1
0
def PHRG(G, gname):
    n = G.number_of_nodes()
    target_nodes = n
    # degree_sequence = G.degree().values()

    prod_rules = learn_grammars_production_rules(G)
    if dbg:
        print
        print "--------------------"
        print "- Production Rules -"
        print "--------------------"

    for k in prod_rules.iterkeys():
        # print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            # print '\t -> ', d, prod_rules[k][d]
    #

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            # print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])
            sid += 1
        id += 1

    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in rules:
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    print "Starting max size"
    g.set_max_size(target_nodes)
    print "Done with max size"

    rule_list = g.sample(target_nodes)

    # PHRG
    pred_graph = grow(rule_list, g)[0]

    return pred_graph
Пример #2
0
prod_rules = shelf["karate"]
shelf.close()

rules = []
id = 0
for k, v in prod_rules.iteritems():
    sid = 0
    for x in prod_rules[k]:
        rhs = re.findall("[^()]+", x)
        rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x]))
        #print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])
        sid += 1
    id += 1

g = pcfg.Grammar('S')
for (id, lhs, rhs, prob) in rules:
    g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

print "Starting max size"

G = nx.karate_club_graph()

g.set_max_size(G.number_of_edges())

print "Done with max size"

graphletG = []
graphletH = []
multiGraphs = []
Пример #3
0
def probabilistic_hrg(G, num_samples=1, n=None):
    '''
	Args:
	------------
	G: input graph (nx obj)
	num_samples:	 (int) in the 'grow' process, this is number of
								 synthetic graphs to generate
	n: (int) num_nodes; number of nodes in the resulting graphs
	Returns: List of synthetic graphs (H^stars)
	'''
    graphletG = []

    if DEBUG: print G.number_of_nodes()
    if DEBUG: print G.number_of_edges()
    start_time = time.time()
    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)

    if n is None:
        num_nodes = G.number_of_nodes()
    else:
        num_nodes = n

    if DEBUG: print G.number_of_nodes()
    if DEBUG: print G.number_of_edges()

    graph_checks(G)

    if DEBUG: print
    if DEBUG: print "--------------------"
    if DEBUG: print "-Tree Decomposition-"
    if DEBUG: print "--------------------"

    prod_rules = {}
    if num_nodes >= 500:
        print '  -- subgraphs'
        for Gprime in gs.rwr_sample(G, 2, 300):
            T = td.quickbb(Gprime)
            root = list(T)[0]
            T = td.make_rooted(T, root)
            T = binarize(T)
            root = list(T)[0]
            root, children = T
            #td.new_visit(T, G, prod_rules, TD)
            td.new_visit(T, G, prod_rules)
    else:
        T = td.quickbb(G)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = binarize(T)
        root = list(T)[0]
        root, children = T

        # td.new_visit(T, G, prod_rules, TD)
        td.new_visit(T, G, prod_rules)

    if DEBUG: print
    if DEBUG: print "--------------------"
    if DEBUG: print "- Production Rules -"
    if DEBUG: print "--------------------"

    for k in prod_rules.iterkeys():
        if DEBUG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DEBUG: print '\t -> ', d, prod_rules[k][d]

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if DEBUG:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])
            sid += 1
        id += 1
    # print rules
    #print 'P. Rules'
    if DEBUG:
        print("  --- Inference (PHRG) %s seconds ---" %
              (time.time() - start_time))
    start_time = time.time()
    g = pcfg.Grammar('S')
    for (id, lhs, rhs, prob) in rules:
        #print type(id), type(lhs), type(rhs), type(prob)
        if DEBUG: print ' ', id, lhs, rhs, prob
        g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    if DEBUG: print "Starting max size"
    num_nodes = num_nodes
    num_samples = num_samples

    g.set_max_size(num_nodes)

    if DEBUG: print "Done with max size"

    Hstars = []

    for i in range(0, num_samples):
        rule_list = g.sample(num_nodes)
        if DEBUG: pp.pprint(rule_list)
        hstar = grow(rule_list, g)[0]
        # print "H* nodes: " + str(hstar.number_of_nodes())
        # print "H* edges: " + str(hstar.number_of_edges())
        Hstars.append(hstar)

    if DEBUG:
        print("  --- Graph gen (Fixed-size) %s seconds ---" %
              (time.time() - start_time))
    return Hstars