Exemplo n.º 1
0
def can_the_intersection_fire(args):
	print
	print "Test intersectin (isomorphic) production rules subset"
	print "-" * 40
	from core.baseball import recompute_probabilities
	from core.will_prod_rules_fire import probe_stacked_prs_likelihood_tofire
	from explodingTree import graph_name


	prs_file = args['fire']
	origG = load_edgelist(args['orig'])
	origG.name = graph_name(args['orig'])
	nbrnodes = origG.number_of_nodes()
	# edgelist base info dict
	# from core.utils import edgelist_basic_info
	# el_base_info_d = edgelist_basic_info([args['orig']])
	# import pprint as pp
	# pp.pprint(el_base_info_d)

	stacked_prs_fsf = stack_prod_rules_bygroup_into_list([prs_file])  # from core.stacked_prod_rules
	                            																			
	df = recompute_probabilities(stacked_prs_fsf)  # from core.baseball, recompute the probabilities
	stck_fired = probe_stacked_prs_likelihood_tofire(df,
													 graph_name(args['orig']),
													 nbr_nodes= nbrnodes) # origG.number_of_nodes()) # el_base_info_d[graph_name(args['orig'])]) # can stacked prs fire?
	# Info("{}".format(stck_fired['fired_b']))
	# print type(stck_fired)
	# exit()
	pickle.dump({'origG': origG, 'Hstars': stck_fired[1]},
				open('../Results/{}_hstr_from_prs_intxn.p'.format(origG.name), "wb"))
	if os.path.exists('../Results/{}_hstr_from_prs_intxn.p'.format(origG.name)):
		print ("Pickle written")
Exemplo n.º 2
0
def explode_to_trees(files, results_trees):
    print("\nExplode to trees")
    print("-" * 40)

    var_els = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm']
    if len(files) == 1:
        gn = xt.graph_name(files)
        dimacs_file = "../datasets/{}.dimacs".format(gn)

        print(
            " ",
            gn,
        )
        exit()
        p = mp.Pool(processes=2)
        for vael in var_els:
            p.apply_async(xt.dimacs_nddgo_tree_simple,
                          args=(
                              dimacs_file,
                              vael,
                          ),
                          callback=collect_results)
        # xt.dimacs_nddgo_tree_simple(f, vael)
        p.close()
        p.join()
        print(results_lst)
    for j, f in enumerate(files):
        gn = xt.graph_name(f)
        dimacs_file = "../datasets/{}.dimacs".format(gn)
        print(
            " ",
            gn,
        )
        p = mp.Pool(processes=2)
        for vael in var_els:
            p.apply_async(xt.dimacs_nddgo_tree_simple,
                          args=(
                              dimacs_file,
                              vael,
                          ),
                          callback=collect_results)
        # xt.dimacs_nddgo_tree_simple(f, vael)
        p.close()
        p.join()
        print(results_lst)

        if j == 0:
            asp_arr = np.array(results_trees)
            continue

        prs_np = np.array(results_trees)
        asp_arr = np.append(asp_arr, prs_np)
Exemplo n.º 3
0
def main():
    parser = get_parser()
    inargs = vars(parser.parse_args())
    print inargs

    ifname = inargs['orig'][0]
    gname = graph_name(ifname)
    #fgFiles = glob('FakeGraphs/*'+gname +"*")
    #print (len(fgFiles), "number of files")
    print("%% EvalUnion %%")
    runEvalUnion(gname)
    exit()

    print("%%")
    prsfiles = glob('ProdRules/{}_lcc_{}.prs'.format(gname,
                                                     [x for x in [0, 1]]))
    mdf = pd.DataFrame()  # masterDF
    for f in prsfiles:  # concat prod rules files
        df = pd.read_csv(f, sep="\t", header=None)
        mdf = pd.concat([df, mdf])
    mdf.to_csv('ProdRules/{}_concat.prs'.format(gname),
               sep="\t",
               header=None,
               index=None)
    return
Exemplo n.º 4
0
def main(args):
    orig_fname = args['orig'][0]
    gname = graph_name(orig_fname)
    Info(os.getcwd())
    dir = "../datasets"
    p_files = [
        x[0] + "/" + f for x in os.walk(dir) for f in x[2] if f.endswith(".p")
    ]
    orig_p = [x for x in p_files if gname in x]
    print
    if not len(orig_p):
        print("converting to gpickle", "\n", "-" * 40)
        g = load_edgelist(orig_fname)
        nx.write_gpickle(g, dir + "/{}.p".format(gname))
        orig_p = dir + "/{}.p".format(gname)
    results = []

    transform_edgelist_to_dimacs([orig_fname])
    # files = [x.rstrip(".p") for x in orig_p]
    # print files
    # exit()
    # print
    results_trees = []
    explode_to_tree(orig_fname, results_lst)
    # pp.pprint( [x[0]+"/"+f for x in os.walk(dir) for f in x[2] if f.endswith(".tree")])

    # results_prs =[]
    # print
    star_dot_trees_to_prod_rules([orig_fname], results_lst)
Exemplo n.º 5
0
def main():
    gname = graph_name(sys.argv[1])
    print gname
    concat_prs = "ProdRules/{}_concat.prs".format(gname)

    if not os.path.exists(concat_prs):
        G = load_edgelist(sys.argv[1])
        print "[<>]", "red the graph"
        lcc = max(nx.connected_component_subgraphs(G),
                  key=len)  # find largest conn component
        Glst = sample_rand_subgraphs_in(lcc)  #
        print "[<>]", "got the Glst LCCs"

        concat_phrg_prod_rules([x for x in Glst],
                               G.name)  # subgraphs base prod rules

        dimacs_files = glob("datasets/{}*.dimacs".format(gname))
        var_el_lst = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm']
        for gfname in dimacs_files:
            for ve in var_el_lst:
                multiprocessing.Process(target=dimacs_inddgo_tree_decomps,
                                        args=(
                                            ve,
                                            gfname,
                                        )).start()
        print "[<>]", "checks on the edgelist vs the orig graph"

        ## --
        convert_dimacs_trees_to_cliquetrees(gname)
        print "[<>]", "convert_dimacs_trees_to_cliquetrees"

        ## --
        elfiles = glob(".tmp_edgelists/{}*tsv".format(gname))
        subgraphs = [load_edgelist(f) for f in elfiles]
        prod_rules = []
        prod_rules = [
            phrg.probabilistic_hrg_deriving_prod_rules(G) for G in subgraphs
        ]
        import itertools
        prod_rules = list(itertools.chain.from_iterable(prod_rules))
        pd.DataFrame(prod_rules).to_csv(concat_prs,
                                        sep="\t",
                                        header=False,
                                        index=False)

        ## --
        dimacs_files = glob("datasets/{}*.dimacs".format(gname))
        var_el_lst = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm']
        for gfname in dimacs_files:
            for ve in var_el_lst:
                multiprocessing.Process(target=dimacs_inddgo_tree_decomps,
                                        args=(
                                            ve,
                                            gfname,
                                        )).start()

        print "[<>]", "checks on the edgelist vs the orig graph"

    print "[<>]", "concat hrg prod_rules:", concat_prs
Exemplo n.º 6
0
def get_phrg_production_rules_onsubgraphs(argmnts):
	args = argmnts
	gn = graph_name(args['orig'][0])
	f = "../datasets/" + gn + "*.p"
	files = glob(f)
	prod_rules = {}
	rules = []
	id = 0

	for f in files:
		Gprime = nx.read_gpickle(f)
		Gprime = reset_graph_nodes(Gprime)
		pp.pprint(Gprime.nodes())
		T = td.quickbb(Gprime)
		root = list(T)[0]
		T = td.make_rooted(T, root)
		T = phrg.binarize(T)
		root = list(T)[0]
		root, children = T
		# td.new_visit(T, G, prod_rules, TD)
		td.new_visit(T, Gprime, prod_rules)
		# Process(target=td.new_visit, args=(T, Gprime, prod_rules,)).start()
	if DBG: print
	if DBG: print "--------------------"
	if DBG: print "- Production Rules -"
	if DBG: print "--------------------"

	for k in prod_rules.iterkeys():
		if DBG: print k
		s = 0
		for d in prod_rules[k]:
			s += prod_rules[k][d]
		for d in prod_rules[k]:
			prod_rules[k][d] = float(prod_rules[k][d]) / float(s)  # normailization step to create probs not counts.
			if DBG: print '\t -> ', d, prod_rules[k][d]

	for k, v in prod_rules.iteritems():
		sid = 0
		for x in prod_rules[k]:
			rhs = re.findall("[^()]+", x)
			rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]))
			if DBG: print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])
			sid += 1
		id += 1

	df = pd.DataFrame(rules)
	# pp.pprint(df.values.tolist()); exit()

	df.to_csv('../ProdRules/{}.tsv.phrg.prs'.format(gn), header=False, index=False, sep="\t")
	if os.path.exists('../ProdRules/{}.tsv.phrg.prs'.format(gn)):
		print 'Saved', '../ProdRules/{}.tsv.phrg.prs'.format(gn)
	else:
		print "Trouble saving"


	'''
Exemplo n.º 7
0
def transform_edgelist_to_dimacs(files):
    print("Transform to dimacs")
    print("-" * 40)

    p = mp.Pool(processes=2)
    for f in files:
        print("  {}".format(f))
        gn = xt.graph_name(f)
        if os.path.exists('../datasets/{}.dimacs'.format(gn)): continue
        gfname = "../datasets/{}.p".format(gn)
        g = nx.read_gpickle(gfname)
        g.name = gn
        p.apply_async(xt.convert_nx_gObjs_to_dimacs_gObjs,
                      args=([g], ),
                      callback=collect_results)
        # xt.convert_nx_gObjs_to_dimacs_gObjs([g])
    p.close()
    p.join()
Exemplo n.º 8
0
def explode_to_tree(fname, results_trees):
    print("\nExplode to tree")
    print("-" * 40)

    var_els = ['mcs', 'mind', 'minf', 'mmd', 'lexm', 'mcsm']

    gn = xt.graph_name(str(fname))
    dimacs_file = "../datasets/{}.dimacs".format(gn)

    p = mp.Pool(processes=2)
    for vael in var_els:
        p.apply_async(xt.dimacs_nddgo_tree_simple,
                      args=(
                          dimacs_file,
                          vael,
                      ),
                      callback=collect_results)
    # xt.dimacs_nddgo_tree_simple(f, vael)
    p.close()
    p.join()
    if os.path.exists(dimacs_file): print("\n  {}".format(dimacs_file))
Exemplo n.º 9
0
def star_dot_trees_to_prod_rules(files, results_prs):
    print("Star dot trees to Production Rules")
    print("-" * 40)

    for j, f in enumerate(files):
        gn = xt.graph_name(f)
        trees = glob("../datasets/{}*.tree".format(gn))

        pp = mp.Pool(processes=2)
        for t in trees:
            prs_fname = "../ProdRules/{}.prs".format(os.path.basename(t))
            if os.path.exists(prs_fname):
                print("  {} file exits".format(prs_fname))
                continue
            oriG = xt.load_edgelist(f)
            pp.apply_async(dimacs_td_ct_fast,
                           args=(
                               oriG,
                               t,
                           ),
                           callback=collect_results)
        pp.close()
        pp.join()
        print(results_lst)
Exemplo n.º 10
0
def get_phrg_production_rules (argmnts):
	args = argmnts

	t_start = time.time()
	df = tdf.Pandas_DataFrame_From_Edgelist(args['orig'])[0]
	if df.shape[1] == 4:
		G = nx.from_pandas_dataframe(df, 'src', 'trg', edge_attr=True)	# whole graph
	elif df.shape[1] == 3:
		G = nx.from_pandas_dataframe(df, 'src', 'trg', ['ts'])	# whole graph
	else:
		G = nx.from_pandas_dataframe(df, 'src', 'trg')
	G.name = graph_name(args['orig'][0])
	print "==> read in graph took: {} seconds".format(time.time() - t_start)
	G.remove_edges_from(G.selfloop_edges())
	giant_nodes = max(nx.connected_component_subgraphs(G), key=len)
	G = nx.subgraph(G, giant_nodes)

	num_nodes = G.number_of_nodes()

	phrg.graph_checks(G)

	if DBG: print
	if DBG: print "--------------------"
	if not DBG: print "-Tree Decomposition-"
	if DBG: print "--------------------"

	prod_rules = {}
	K = 2
	n = 300
	if num_nodes >= 500:
		print 'Grande'
		t_start = time.time()
		for Gprime in gs.rwr_sample(G, K, n):
			T = td.quickbb(Gprime)
			root = list(T)[0]
			T = td.make_rooted(T, root)
			T = phrg.binarize(T)
			root = list(T)[0]
			root, children = T
			# td.new_visit(T, G, prod_rules, TD)
			td.new_visit(T, G, prod_rules)
			Process(target=td.new_visit, args=(T, G, prod_rules,)).start()
	else:
		T = td.quickbb(G)
		root = list(T)[0]
		T = td.make_rooted(T, root)
		T = phrg.binarize(T)
		root = list(T)[0]
		root, children = T
		# td.new_visit(T, G, prod_rules, TD)
		td.new_visit(T, G, prod_rules)

		# print_treewidth(T) # TODO: needs to be fixed
		# exit()

	if DBG: print
	if DBG: print "--------------------"
	if DBG: print "- Production Rules -"
	if DBG: print "--------------------"

	for k in prod_rules.iterkeys():
		if DBG: print k
		s = 0
		for d in prod_rules[k]:
			s += prod_rules[k][d]
		for d in prod_rules[k]:
			prod_rules[k][d] = float(prod_rules[k][d]) / float(
				s)	# normailization step to create probs not counts.
			if DBG: print '\t -> ', d, prod_rules[k][d]

	rules = []
	id = 0
	for k, v in prod_rules.iteritems():
		sid = 0
		for x in prod_rules[k]:
			rhs = re.findall("[^()]+", x)
			rules.append(("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]))
			if DBG: print ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])
			sid += 1
		id += 1

	df = pd.DataFrame(rules)
	# pp.pprint(df.values.tolist()); exit()

	df.to_csv('../ProdRules/{}.tsv.phrg.prs'.format(G.name), header=False, index=False, sep="\t")
	if os.path.exists('../ProdRules/{}.tsv.phrg.prs'.format(G.name)):
		print 'Saved', '../ProdRules/{}.tsv.phrg.prs'.format(G.name)
	else:
		print "Trouble saving"
	print "-----------"
	print [type(x) for x in rules[0]]

	'''
Exemplo n.º 11
0
    df = pd.concat([df,x])
    if 0: print j, "shape", df.shape

  gb =df.groupby(['k'])
  print (gb['cc'].mean().to_string())
  print "-"*10
  orig__clust_coef = metrics.clustering_coefficients_single(G)
  gb = orig__clust_coef.groupby(['k'])
  print (gb['cc'].mean().to_string())
  #synth_clust_coef = results
  '''
if __name__ == '__main__':
	parser = get_parser()
	args = vars(parser.parse_args())

	gname = graph_name(args['orig'][0])

	if args['nstats']:
		# main_network_stats(args)
		exit()
	elif args['chunglu']:
		print 'Generate chunglu graphs given an edgelist'
		sys.exit(0)
	elif args['kron']:
		print 'Generate chunglu graphs given an edgelist'
		sys.exit(0)
	elif args['prs']:
		print ('get_phrg_production_rules_onsubgraphs')
		get_phrg_production_rules_onsubgraphs(args)
	#elif args['samp']:
	#	print 'Sample K subgraphs of n nodes'
Exemplo n.º 12
0
def tst_prod_rules_isom_intrxn(fname, origfname):
    """
	Test the isomorphic subset of rules

	:param fname:	isom intersection rules file
	:param origfname: reference input network (dataset) edgelist file
	:return:
	"""
    # Get the original file
    fdf = Pandas_DataFrame_From_Edgelist([origfname])
    origG = nx.from_pandas_dataframe(fdf[0], 'src', 'trg')
    origG.name = graph_name(origfname)

    print origG.name, "+" * 80

    # Read the subset of prod rules
    df = pd.read_csv(fname,
                     header=None,
                     sep="\t",
                     dtype={
                         0: str,
                         1: list,
                         2: list,
                         3: float
                     })
    g = pcfg.Grammar('S')

    if not willFire_check(df):
        print "-" * 10, fname, "contains production rules that WillNotFire"
        return None
    else:
        print "+" * 40
    # Process dataframe
    from td_isom_jaccard_sim import listify_rhs
    for (id, lhs, rhs, prob) in df.values:
        rhs = listify_rhs(rhs)
        g.add_rule(pcfg.Rule(id, lhs, rhs, float(prob)))

    print "\n", "." * 40  #print 'Added the rules to the datastructure'

    num_nodes = origG.number_of_nodes()

    # print "Starting max size", 'n=', num_nodes
    g.set_max_size(num_nodes)
    # print "Done with max size"

    Hstars = []

    ofname = "FakeGraphs/" + origG.name + "_isom_ntrxn.shl"
    database = shelve.open(ofname)

    num_samples = 20  #
    print '~' * 40
    for i in range(0, num_samples):
        rule_list = g.sample(num_nodes)
        hstar = phrg.grow(rule_list, g)[0]
        Hstars.append(hstar)
        print hstar.number_of_nodes(), hstar.number_of_edges()

    print '-' * 40
    database['hstars'] = Hstars
    database.close()