def hrg_clique_tree (G):
  if G is None: return

  #  ------------------ ##
  #  tree decomposition
  #  ------------------ ##
  num_nodes = G.number_of_nodes()

  prod_rules = {}
  if num_nodes >= 500:
    for Gprime in gs.rwr_sample(G, 2, 300):
      T = td.quickbb(Gprime)
      root = list(T)[0]
      T = td.make_rooted(T, root)
      T = phrg.binarize(T)
      root = list(T)[0]
      root, children = T
      td.new_visit(T, G, prod_rules)
  else:
    T = td.quickbb(G)
    root = list(T)[0]
    T = td.make_rooted(T, root)
    T = phrg.binarize(T)
    root = list(T)[0]
    root, children = T
    td.new_visit(T, G, prod_rules)

  # pprint.pprint (children)
  return root, children
示例#2
0
def derive_production_rules(G):
    """

  Parameters
  ----------
  G : input graph
  """
    from PHRG import graph_checks, binarize
    prod_rules = {}

    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)

    num_nodes = G.number_of_nodes()

    graph_checks(G)

    print
    print "--------------------"
    print "-Tree Decomposition-"
    print "--------------------"

    if num_nodes >= 500:
        for Gprime in gs.rwr_sample(G, 2, 100):
            T = td.quickbb(Gprime)
            root = list(T)[0]
            T = td.make_rooted(T, root)
            T = binarize(T)
            root = list(T)[0]
            root, children = T
            td.new_visit(T, G, prod_rules)
    else:
        T = td.quickbb(G)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = binarize(T)
        root = list(T)[0]
        root, children = T
        td.new_visit(T, G, prod_rules)

    print
    print "--------------------"
    print "- Production Rules -"
    print "--------------------"

    for k in prod_rules.iterkeys():
        print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            #print '\t -> ', d, prod_rules[k][d]

    return prod_rules
示例#3
0
def probabilistic_hrg(G, num_samples=1):

		graphletG = []

		#print G.number_of_nodes()
		#print G.number_of_edges()

		G.remove_edges_from(G.selfloop_edges())
		giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
		G = nx.subgraph(G, giant_nodes)

		num_nodes = G.number_of_nodes()

		# print G.number_of_nodes()
		# print G.number_of_edges()

		graph_checks(G)

		print
		print "--------------------"
		print "-Tree Decomposition-"
		print "--------------------"

		if num_nodes >= 500:
				for Gprime in gs.rwr_sample(G, 2, 100):
						T = td.quickbb(Gprime)
						root = list(T)[0]
						T = td.make_rooted(T, root)
						T = binarize(T)
						root = list(T)[0]
						root, children = T
						td.new_visit(T, G, prod_rules)
		else:
				T = td.quickbb(G)
				root = list(T)[0]
				T = td.make_rooted(T, root)
				T = binarize(T)
				root = list(T)[0]
				root, children = T
				td.new_visit(T, G, prod_rules)

		print
		print "--------------------"
		print "- Production Rules -"
		print "--------------------"

		for k in prod_rules.iterkeys():
				#print k
				s = 0
				for d in prod_rules[k]:
						s += prod_rules[k][d]
				for d in prod_rules[k]:
						prod_rules[k][d] = float(prod_rules[k][d]) / float(s)	# normailization step to create probs not counts.
						#print '\t -> ', d, prod_rules[k][d]

		rules = []
		id = 0
		for k, v in prod_rules.iteritems():
				sid = 0
				for x in prod_rules[k]:
						rhs = re.findall("[^()]+", x)
						rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]))
						#print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])
						sid += 1
				id += 1

		g = pcfg.Grammar('S')
		for (id, lhs, rhs, prob) in rules:
				g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

		print "Starting max size"

		g.set_max_size(num_nodes)

		print "Done with max size"

		Hstars = []

		for i in range(0, num_samples):
				rule_list = g.sample(num_nodes)
				# print rule_list
				hstar = grow(rule_list, g)[0]
				# print "H* nodes: " + str(hstar.number_of_nodes())
				# print "H* edges: " + str(hstar.number_of_edges())
				Hstars.append(hstar)

		return (Hstars)
def get_hrg_production_rules(edgelist_data_frame,
                             graph_name,
                             tw=False,
                             n_subg=2,
                             n_nodes=300,
                             nstats=False):
    from growing import derive_prules_from

    t_start = time.time()
    df = edgelist_data_frame
    if df.shape[1] == 4:
        G = nx.from_pandas_dataframe(df, 'src', 'trg',
                                     edge_attr=True)  # whole graph
    elif df.shape[1] == 3:
        G = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts'])  # whole graph
    else:
        G = nx.from_pandas_dataframe(df, 'src', 'trg')
    G.name = graph_name
    print "==> read in graph took: {} seconds".format(time.time() - t_start)

    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)

    num_nodes = G.number_of_nodes()

    phrg.graph_checks(G)

    if DBG: print
    if DBG: print "--------------------"
    if not DBG: print "-Tree Decomposition-"
    if DBG: print "--------------------"

    prod_rules = {}
    K = n_subg
    n = n_nodes
    if num_nodes >= 500:
        print 'Grande'
        t_start = time.time()
        for Gprime in gs.rwr_sample(G, K, n):
            T = td.quickbb(Gprime)
            root = list(T)[0]
            T = td.make_rooted(T, root)
            T = phrg.binarize(T)
            root = list(T)[0]
            root, children = T
            # td.new_visit(T, G, prod_rules, TD)
            td.new_visit(T, G, prod_rules)
            Process(target=td.new_visit, args=(
                T,
                G,
                prod_rules,
            )).start()
    else:
        T = td.quickbb(G)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = phrg.binarize(T)
        root = list(T)[0]
        root, children = T
        # td.new_visit(T, G, prod_rules, TD)
        td.new_visit(T, G, prod_rules)

        print_treewidth(T)
        exit()

    if DBG: print
    if DBG: print "--------------------"
    if DBG: print "- Production Rules -"
    if DBG: print "--------------------"

    for k in prod_rules.iterkeys():
        if DBG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DBG: print '\t -> ', d, prod_rules[k][d]

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if DBG:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])
            sid += 1
        id += 1

    df = pd.DataFrame(rules)
    '''print "++++++++++"
    df.to_csv('ProdRules/{}_prs.tsv'.format(G.name), header=False, index=False, sep="\t")
    if os.path.exists('ProdRules/{}_prs.tsv'.format(G.name)): 
        print 'Saved', 'ProdRules/{}_prs.tsv'.format(G.name)
    else:
        print "Trouble saving"
    print "-----------"
    print [type(x) for x in rules[0]] '''
    '''
    Graph Generation of Synthetic Graphs
    Grow graphs usigng the union of rules from sampled sugbgraphs to predict the target order of the 
    original graph
    '''
    hStars = grow_exact_size_hrg_graphs_from_prod_rules(
        rules, graph_name, G.number_of_nodes(), 10)
    print '... hStart graphs:', len(hStars)
    d = {graph_name + "_hstars": hStars}
    with open(r"Results/{}_hstars.pickle".format(graph_name),
              "wb") as output_file:
        cPickle.dump(d, output_file)
    if os.path.exists(r"Results/{}_hstars.pickle".format(graph_name)):
        print "File saved"
    '''if nstats:
示例#5
0
graph_checks(G)

print
print "--------------------"
print "-Tree Decomposition-"
print "--------------------"

if num_nodes >= 500:
    for Gprime in gs.rwr_sample(G, 2, 100):
        T = td.quickbb(Gprime)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = hrg.binarize(T)
        root = list(T)[0]
        root, children = T
        td.new_visit(T, G, prod_rules)
else:
    T = td.quickbb(G)
    root = list(T)[0]
    T = td.make_rooted(T, root)
    T = hrg.binarize(T)
    root = list(T)[0]
    root, children = T
    td.new_visit(T, G, prod_rules)


def flatten(tup):
    if type(tup) == frozenset:
        print type(tup)
    else:
        print type(tup[0]), type([1])
示例#6
0
文件: PHRG.py 项目: nddsg/PHRG
def probabilistic_hrg_learning(G, num_samples=1, n=None, prod_rules=None):

    graphletG = []

    # print G.number_of_nodes()
    # print G.number_of_edges()

    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)

    if n is None:
        num_nodes = G.number_of_nodes
    else:
        num_nodes = n

    # print G.number_of_nodes()
    # print G.number_of_edges()

    graph_checks(G)

    # print
    # print "--------------------"
    # print "-Tree Decomposition-"
    # print "--------------------"

    if num_nodes >= 500:
        for Gprime in gs.rwr_sample(G, 2, 300):
            T = td.quickbb(Gprime)
            root = list(T)[0]
            T = td.make_rooted(T, root)
            T = binarize(T)
            root = list(T)[0]
            root, children = T
            td.new_visit(T, G, prod_rules)
    else:
        T = td.quickbb(G)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = binarize(T)
        root = list(T)[0]
        root, children = T
        td.new_visit(T, G, prod_rules)
    # print 'root', [x for x in T[0]]#, type(root)
    # import pprint as pp
    # pp.pprint([x for x in T])
    '''
		for x in T:
			if isinstance(x,(frozenset)):
				print '\t',x
			else:
				print [type(s) for s in x if isinstance(x,(list))]
		'''
    ##while isinstance(T,(tuple,list,)) and len(T):
    ##	for x in T:
    ##		if isinstance(x,(frozenset)):
    ##			print'\t',	x
    ##		else:
    ##			T = x

    # print
    # print "--------------------"
    # print "- Production Rules -"
    # print "--------------------"

    for k in prod_rules.iterkeys():
        # print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            # print '\t -> ', d, prod_rules[k][d]

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            # print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])
            sid += 1
        id += 1

    return rules
示例#7
0
文件: PHRG.py 项目: nddsg/PHRG
def probabilistic_hrg(G, n=None):
    '''
			Rule extraction procedure

					'''
    if G is None: return

    G.remove_edges_from(G.selfloop_edges())
    giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
    G = nx.subgraph(G, giant_nodes)

    if n is None:
        num_nodes = G.number_of_nodes()
    else:
        num_nodes = n

    graph_checks(G)

    if DEBUG: print
    if DEBUG: print "--------------------"
    if DEBUG: print "-Tree Decomposition-"
    if DEBUG: print "--------------------"
    prod_rules = {}
    if num_nodes >= 500:
        for Gprime in gs.rwr_sample(G, 2, 300):
            T = td.quickbb(Gprime)
            root = list(T)[0]
            T = td.make_rooted(T, root)
            T = binarize(T)
            root = list(T)[0]
            root, children = T
            td.new_visit(T, G, prod_rules)
    else:
        T = td.quickbb(G)
        root = list(T)[0]
        T = td.make_rooted(T, root)
        T = binarize(T)
        root = list(T)[0]
        root, children = T
        td.new_visit(T, G, prod_rules)

    if DEBUG: print
    if DEBUG: print "--------------------"
    if DEBUG: print "- Production Rules -"
    if DEBUG: print "--------------------"

    for k in prod_rules.iterkeys():
        if DEBUG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DEBUG: print '\t -> ', d, prod_rules[k][d]

    # pp.pprint(prod_rules)

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if DEBUG:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])
            sid += 1
        id += 1

    return rules
示例#8
0
def sampled_subgraphs_cliquetree(orig, tree_path):
    files = glob(tree_path + "*.dimacs.tree")
    prod_rules = {}
    graph_name = orig

    for fname in files:
        print '... input file:', fname

        df = Pandas_DataFrame_From_Edgelist([orig])[0]
        if df.shape[1] == 3:
            G = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts'])
        else:
            G = nx.from_pandas_dataframe(df, 'src', 'trg')
        print nx.info(G)

        with open(fname, 'r') as f:  # read tree decomp from inddgo
            lines = f.readlines()
            lines = [x.rstrip('\r\n') for x in lines]

        cbags = {}
        bags = [x.split() for x in lines if x.startswith('B')]

        for b in bags:
            cbags[int(b[1])] = [int(x)
                                for x in b[3:]]  # what to do with bag size?

        edges = [x.split()[1:] for x in lines if x.startswith('e')]
        edges = [[int(k) for k in x] for x in edges]

        tree = defaultdict(set)
        for s, t in edges:
            tree[frozenset(cbags[s])].add(frozenset(cbags[t]))
            if DEBUG: print '.. # of keys in `tree`:', len(tree.keys())
        if DEBUG: print tree.keys()
        # root = list(tree)[0]
        root = frozenset(cbags[1])
        if DEBUG: print '.. Root:', root
        T = td.make_rooted(tree, root)
        if DEBUG: print '.. T rooted:', len(T)
        # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets

        T = phrg.binarize(T)
        td.new_visit(
            T, G,
            prod_rules)  # ToDo: here is where something funny is goin on.

        if DEBUG: print "--------------------"
        if DEBUG: print "- Production Rules -"
        if DEBUG: print "--------------------"

        for k in prod_rules.iterkeys():
            if DEBUG: print k
            s = 0
            for d in prod_rules[k]:
                s += prod_rules[k][d]
            for d in prod_rules[k]:
                prod_rules[k][d] = float(prod_rules[k][d]) / float(
                    s)  # normailization step to create probs not counts.
                if DEBUG: print '\t -> ', d, prod_rules[k][d]
        print '... prod_rules size', len(prod_rules.keys())

    #  - production rules number -
    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if DEBUG:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])

            sid += 1
        id += 1

    df = pd.DataFrame(rules)

    print graph_name
    graph_name = os.path.basename(graph_name)
    print graph_name
    outdf_fname = "./ProdRules/" + graph_name + ".prules"
    if not os.path.isfile(outdf_fname + ".bz2"):
        print '...', outdf_fname, "written"
        df.to_csv(outdf_fname + ".bz2", compression="bz2")
    else:
        print '...', outdf_fname, "file exists"

    return
def dimacs_td_ct(tdfname):
    """ tree decomp to clique-tree """

    print '... input file:', tdfname
    fname = tdfname
    graph_name = os.path.basename(fname)
    gname = graph_name.split('.')[0]
    gfname = "datasets/out." + gname
    tdh = os.path.basename(fname).split('.')[1]  # tree decomp heuristic
    tfname = gname + "." + tdh

    G = load_edgelist(gfname)

    if DEBUG: print nx.info(G)
    print
    with open(fname, 'r') as f:  # read tree decomp from inddgo
        lines = f.readlines()
        lines = [x.rstrip('\r\n') for x in lines]

    cbags = {}
    bags = [x.split() for x in lines if x.startswith('B')]

    for b in bags:
        cbags[int(b[1])] = [int(x) for x in b[3:]]  # what to do with bag size?

    edges = [x.split()[1:] for x in lines if x.startswith('e')]
    edges = [[int(k) for k in x] for x in edges]

    tree = defaultdict(set)
    for s, t in edges:
        tree[frozenset(cbags[s])].add(frozenset(cbags[t]))
        if DEBUG: print '.. # of keys in `tree`:', len(tree.keys())
    if DEBUG: print tree.keys()
    root = list(tree)[0]
    if DEBUG: print '.. Root:', root
    root = frozenset(cbags[1])
    if DEBUG: print '.. Root:', root
    T = td.make_rooted(tree, root)
    if DEBUG: print '.. T rooted:', len(T)
    # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets

    T = phrg.binarize(T)

    prod_rules = {}
    td.new_visit(T, G, prod_rules)

    if DEBUG: print "--------------------"
    if DEBUG: print "- Production Rules -"
    if DEBUG: print "--------------------"

    for k in prod_rules.iterkeys():
        if DEBUG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DEBUG: print '\t -> ', d, prod_rules[k][d]

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if DEBUG:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])

            sid += 1
        id += 1

    df = pd.DataFrame(rules)

    outdf_fname = "./ProdRules/" + tfname + ".prules"
    if not os.path.isfile(outdf_fname + ".bz2"):
        print '...', outdf_fname, "written"
        df.to_csv(outdf_fname + ".bz2", compression="bz2")
    else:
        print '...', outdf_fname, "file exists"
    return
def dimacs_td_ct_fast(oriG, tdfname):
    """ tree decomp to clique-tree
	parameters:
	   orig:	filepath to orig (input) graph in edgelist
	tdfname:	filepath to tree decomposition from INDDGO
	 synthg:	when the input graph is a syth (orig) graph
	Todo:
		currently not handling sythg in this version of dimacs_td_ct
	"""
    G = oriG
    if G is None:
        return (1)

    prod_rules = {}

    t_basename = os.path.basename(tdfname)
    out_tdfname = os.path.basename(t_basename) + ".prs"
    if os.path.exists("../ProdRules/" + out_tdfname):
        # print "==> exists:", out_tdfname
        return out_tdfname
        # else:
        #   print ("create folder ../ProdRules")

    print "../ProdRules/" + out_tdfname, tdfname

    with open(tdfname, 'r') as f:  # read tree decomp from inddgo
        lines = f.readlines()
        lines = [x.rstrip('\r\n') for x in lines]

    cbags = {}
    bags = [x.split() for x in lines if x.startswith('B')]

    for b in bags:
        cbags[int(b[1])] = [int(x) for x in b[3:]]  # what to do with bag size?

    edges = [x.split()[1:] for x in lines if x.startswith('e')]
    edges = [[int(k) for k in x] for x in edges]

    tree = defaultdict(set)
    for s, t in edges:
        tree[frozenset(cbags[s])].add(frozenset(cbags[t]))
        if DEBUG: print '.. # of keys in `tree`:', len(tree.keys())

    root = list(tree)[0]
    root = frozenset(cbags[1])
    T = td.make_rooted(tree, root)
    # nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets

    T = phrg.binarize(T)
    root = list(T)[0]
    root, children = T
    # td.new_visit(T, G, prod_rules, TD)
    # print ">>",len(T)

    print type(G)
    exit()
    td.new_visit(T, G, prod_rules)

    if 0: print "--------------------"
    if 0: print "- Production Rules -"
    if 0: print "--------------------"

    for k in prod_rules.iterkeys():
        if DEBUG: print k
        s = 0
        for d in prod_rules[k]:
            s += prod_rules[k][d]
        for d in prod_rules[k]:
            prod_rules[k][d] = float(prod_rules[k][d]) / float(
                s)  # normailization step to create probs not counts.
            if DEBUG: print '\t -> ', d, prod_rules[k][d]

    rules = []
    id = 0
    for k, v in prod_rules.iteritems():
        sid = 0
        for x in prod_rules[k]:
            rhs = re.findall("[^()]+", x)
            rules.append(
                ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs,
                 prod_rules[k][x]))
            if 0:
                print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0],
                      rhs, prod_rules[k][x])
            sid += 1
        id += 1

    # print rules
    if 0: print "--------------------"
    if 0: print '- P. Rules', len(rules)
    if 0: print "--------------------"

    # ToDo.
    # Let's save these rules to file or print proper
    # write_prod_rules_to_tsv(rules, out_tdfname)

    # g = pcfg.Grammar('S')
    # for (id, lhs, rhs, prob) in rules:
    #	g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

    # Synthetic Graphs
    #	hStars = grow_exact_size_hrg_graphs_from_prod_rules(rules, graph_name, G.number_of_nodes(), 20)
    #	# metricx = ['degree', 'hops', 'clust', 'assort', 'kcore', 'gcd'] # 'eigen'
    #	metricx = ['gcd','avgdeg']
    #	metrics.network_properties([G], metricx, hStars, name=graph_name, out_tsv=True)

    return out_tdfname
def probabilistic_hrg (G, num_samples=1, n=None):
  '''
  Args:
  ------------
	G: input graph (nx obj)
	num_samples:   (int) in the 'grow' process, this is number of
	               synthetic graphs to generate
	n: (int) num_nodes; number of nodes in the resulting graphs
	Returns: List of synthetic graphs (H^stars)
  '''
  graphletG = []

  if DEBUG: print G.number_of_nodes()
  if DEBUG: print G.number_of_edges()

  G.remove_edges_from(G.selfloop_edges())
  giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
  G = nx.subgraph(G, giant_nodes)

  if n is None:
    num_nodes = G.number_of_nodes()
  else:
    num_nodes = n

  if DEBUG: print G.number_of_nodes()
  if DEBUG: print G.number_of_edges()

  graph_checks(G)

  if DEBUG: print
  if DEBUG: print "--------------------"
  if DEBUG: print "-Tree Decomposition-"
  if DEBUG: print "--------------------"

  prod_rules = {}
  if num_nodes >= 500:
    for Gprime in gs.rwr_sample(G, 2, 300):
      T = td.quickbb(Gprime)
      root = list(T)[0]
      T = td.make_rooted(T, root)
      T = binarize(T)
      root = list(T)[0]
      root, children = T
      td.new_visit(T, G, prod_rules, TD)
  else:
    T = td.quickbb(G)
    root = list(T)[0]
    T = td.make_rooted(T, root)
    T = binarize(T)
    root = list(T)[0]
    root, children = T

    td.new_visit(T, G, prod_rules, TD)

  if DEBUG: print
  if DEBUG: print "--------------------"
  if DEBUG: print "- Production Rules -"
  if DEBUG: print "--------------------"

  for k in prod_rules.iterkeys():
    if DEBUG: print k
    s = 0
    for d in prod_rules[k]:
      s += prod_rules[k][d]
    for d in prod_rules[k]:
      prod_rules[k][d] = float(prod_rules[k][d]) / float(s)  # normailization step to create probs not counts.
      if DEBUG: print '\t -> ', d, prod_rules[k][d]

  rules = []
  id = 0
  for k, v in prod_rules.iteritems():
    sid = 0
    for x in prod_rules[k]:
      rhs = re.findall("[^()]+", x)
      rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]))
      if DEBUG: print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])
      sid += 1
    id += 1
  # print rules
  exit()

  g = pcfg.Grammar('S')
  for (id, lhs, rhs, prob) in rules:
    # print type(id), type(lhs), type(rhs), type(prob)
    g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

  if DEBUG: print "Starting max size"
  num_nodes = num_nodes
  num_samples = num_samples

  g.set_max_size(num_nodes)

  if DEBUG: print "Done with max size"

  Hstars = []

  for i in range(0, num_samples):
    rule_list = g.sample(num_nodes)
    # print rule_list
    hstar = grow(rule_list, g)[0]
    # print "H* nodes: " + str(hstar.number_of_nodes())
    # print "H* edges: " + str(hstar.number_of_edges())
    Hstars.append(hstar)

  return Hstars
示例#12
0
def isomorphic_test_from_dimacs_tree(orig, tdfname, gname=""):
	""""
	orig: path to original/refernce input graph
	tdfname: path fragment for a set of td pro rules
	gname: graph name (str)
	returns:
    """

	# if whole tree path
	# else, assume a path fragment
	print '... input graph  :', os.path.basename(orig)
	print '... td path frag :', tdfname

	G = load_edgelist(orig)  # load edgelist into a graph obj
	N = G.number_of_nodes()
	M = G.number_of_edges()
	# +++ Graph Checks
	if G is None: sys.exit(1)
	G.remove_edges_from(G.selfloop_edges())
	giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
	G = nx.subgraph(G, giant_nodes)
	graph_checks(G)
	# --- graph checks

	G.name = gname

	files = glob(tdfname + "*.dimacs.tree")
	prod_rules = {}
	stacked_df = pd.DataFrame()

	mat_dict = {}
	for i, x in enumerate(sorted(files)):
		mat_dict[os.path.basename(x).split(".")[0].split("_")[-1]] = i
		if DBG: print os.path.basename(x).split(".")[0].split("_")[-1]

	for tfname in sorted(files):
		tname = os.path.basename(tfname).split(".")
		tname = "_".join(tname[:2])

		with open(tfname, 'r') as f:  # read tree decomp from inddgo
			lines = f.readlines()
			lines = [x.rstrip('\r\n') for x in lines]

		cbags = {}
		bags = [x.split() for x in lines if x.startswith('B')]

		for b in bags:
			cbags[int(b[1])] = [int(x) for x in b[3:]]  # what to do with bag size?

		edges = [x.split()[1:] for x in lines if x.startswith('e')]
		edges = [[int(k) for k in x] for x in edges]

		tree = defaultdict(set)
		for s, t in edges:
			tree[frozenset(cbags[s])].add(frozenset(cbags[t]))
			if DBG: print '.. # of keys in `tree`:', len(tree.keys())

		root = list(tree)[0]
		root = frozenset(cbags[1])
		T = td.make_rooted(tree, root)
		# nfld.unfold_2wide_tuple(T) # lets me display the tree's frozen sets

		T = phrg.binarize(T)
		# root = list(T)[0]
		# root, children = T
		# td.new_visit(T, G, prod_rules, TD)
		# print ">>",len(T)

		td.new_visit(T, G, prod_rules)
		from json import dumps
		# print dumps(prod_rules, indent=4, sort_keys=True)

		for k in prod_rules.iterkeys():
			if DBG: print k
			s = 0
			for d in prod_rules[k]:
				s += prod_rules[k][d]
			for d in prod_rules[k]:
				prod_rules[k][d] = float(prod_rules[k][d]) / float(s)  # normailization step to create probs not counts.
				if DBG: print '\t -> ', d, prod_rules[k][d]

		if DBG: print "--------------------"
		if DBG: print '- Prod. Rules'
		if DBG: print "--------------------"
		rules = []
		# print dumps(prod_rules, indent=4, sort_keys=True)

		id = 0
		for k, v in prod_rules.iteritems():
			sid = 0
			for x in prod_rules[k]:
				rhs = re.findall("[^()]+", x)
				rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]))
				if DBG: print "r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]
				sid += 1
			id += 1

		df = pd.DataFrame(rules)
		df['cate'] = tname
		stacked_df = pd.concat([df, stacked_df])
		# print df.shape
	print "\nStacked prod rules\n", "~" * 20
	print "  ", stacked_df.shape
	if args['verb']: print stacked_df.to_string()
	stacked_df.to_csv("../Results/{}_stacked_df.tsv".format(gname), sep="\t")
	if os.path.exists(
		"../Results/{}_stacked_df.tsv".format(gname)): print 'Wrote:', "../Results/{}_stacked_df.tsv".format(gname)

	print "\nisomorphic union of the rules (_mod probs)\n", "~" * 20
	stacked_df.columns = ['rnbr', 'lhs', 'rhs', 'pr', df['cate'].name]
	iso_union, iso_interx = isomorph_intersection_2dfstacked(stacked_df)
	print "  ", iso_union.shape
	if args['verb']: print iso_union.to_string()

	print "\nIsomorphic intersection of the prod rules\n", "~" * 20
	print "  ", iso_interx.shape
	iso_interx.to_csv('../Results/{}_isom_interxn.tsv'.format(gname))
	if os.path.exists(
		'../Results/{}_isom_interxn.tsv'.format(gname)): print 'Wrote:', '../Results/{}_isom_interxn.tsv'.format(gname)
示例#13
0
def main(add_edge_events={}, return_dict={}):
    start = time()
    del_edge_events = {}
    print(add_edge_events, file=open(logfile, 'a'))

    g_prev = nx.DiGraph()
    g_next = nx.DiGraph()

    events = sorted(list(set(add_edge_events.keys() + del_edge_events.keys())))

    name = None

    shrg_rules = {}
    i = 0
    for t in events[:-1]:
        decomp_time = time()

        if t in add_edge_events:
            for u, v in add_edge_events[t]:
                g_next.add_edge(u, v, label='e')
        if t in del_edge_events:
            for u, v in del_edge_events[t]:
                if (u, v) in g_next.edges():
                    g_next.remove_edge(u, v)
        nx.set_node_attributes(g_next, 'label', 'u')

        # get WCC
        if not nx.is_weakly_connected(g_next):
            g_next = max(nx.weakly_connected_component_subgraphs(g_next),
                         key=len)

        g_union = union_graph(g_prev, g_next)
        tree_decomp_l = tree_decomposition(g_union)

        i += 1

        tree_decomp = tree_decomp_l[0]
        tree_decomp = prune(tree_decomp, frozenset())
        tree_decomp = binarize(tree_decomp)
        tree_decomp = prune(tree_decomp, frozenset())

        td.new_visit(tree_decomp, g_prev, g_next, shrg_rules, i)
        g_prev = g_next.copy()
        print('tree decomp #{} done in {} sec'.format(t,
                                                      time() - decomp_time),
              file=open(logfile, 'a'))

    prev_rules = []
    next_rules = []
    anchor_candidates = []

    for lhs_set in shrg_rules.values():
        for rule_tuple in lhs_set:
            nonterm = False
            for n in rule_tuple[0].rhs.nodes(data=True):
                if isinstance(n[1]['label'], grammar.Nonterminal):
                    nonterm = True
                    break
            if not nonterm and rule_tuple[1].time == i and rule_tuple[
                    1].iso == False:
                for n in rule_tuple[0].rhs.nodes(data=True):
                    if 'external' not in n[1] and not isinstance(
                            n[1]['label'], grammar.Nonterminal):
                        anchor_candidates.append((n[1]['oid'], rule_tuple))

    print('Number of Anchors', len(anchor_candidates), file=open(logfile, 'a'))
    anchors = random.sample(anchor_candidates, len(anchor_candidates))
    for anchor in anchors:
        oid, rule = anchor
        prev, next = rule
        for n in prev.rhs.nodes(data=True):
            if 'oid' in n[1] and n[1]['oid'] == oid:
                n[1]['label'] = oid
        for n in next.rhs.nodes(data=True):
            if 'oid' in n[1] and n[1]['oid'] == oid:
                n[1]['label'] = oid
                print('label changed to oid',
                      rule[1].id,
                      rule[1].time,
                      n,
                      file=open(logfile, 'a'))

        for n in g_next.nodes(data=True):
            if n[0] == oid:
                n[1]['label'] = oid

        for n in g_prev.nodes(data=True):
            if n[0] == oid:
                n[1]['label'] = oid

    for lhs_set in shrg_rules.values():
        s = 0
        for rule_tuple in lhs_set:
            prev, next = rule_tuple
            s += prev.weight

        for rule_tuple in lhs_set:
            rule_tuple[1].weight /= float(s)
            next_rules.append(rule_tuple[1])

            rule_tuple[0].weight /= float(s)
            prev_rules.append(rule_tuple[0])

    assert len(prev_rules) == len(next_rules)

    print('Parse start, time elapsed: {} sec'.format(time() - start),
          file=open(logfile, 'a'))

    print('Number of Rules ', len(prev_rules), file=open(logfile, 'a'))

    forest = p.parse(prev_rules, [grammar.Nonterminal('0')], g_next)
    print('Parse end, time elapsed: {} sec'.format(time() - start),
          file=open(logfile, 'a'))

    try:
        new_g = p.derive(p.viterbi(forest), next_rules)
    except KeyError:
        print('Goal error!', file=open(logfile, 'a'))
        return_dict['status'] = 'fail'
        return_dict['graph'] = None
        return_dict['shrg_rules'] = shrg_rules
        return_dict['time'] = time() - start
        return 'fail', None, shrg_rules, time() - start

    h_shrg = nx.DiGraph()
    for e in hypergraphs.edges(new_g):
        h_shrg.add_edge(e.h[0], e.h[1])

    return_dict['status'] = 'pass'
    return_dict['graph'] = h_shrg
    return_dict['shrg_rules'] = shrg_rules
    return_dict['time'] = time() - start
    return 'pass', h_shrg, shrg_rules, time() - start