def edgelist_in_dimacs_out(ifname):
    '''
		args: graph is the input nx graph
		returns: output filename
	'''
    g = nx.read_edgelist(ifname, data=False)
    g.name = graph_name(ifname)
    ofname = '../datasets/{}.dimacs'.format(g.name)
    if path.exists(ofname):
        return None
    n = g.number_of_nodes()
    m = g.number_of_edges()
    edges = g.edges()
    edges = [(int(e[0]), int(e[1])) for e in edges]
    df = pd.DataFrame(edges)
    df.sort_values(by=[0], inplace=True)
    with open(ofname, 'w') as f:
        f.write('c {}\n'.format(g.name))
        f.write('p edge\t{}\t{}\n'.format(n + 1, m))
        output_edges = lambda x: f.write("e\t{}\t{}\n".format(
            x[0] + 1, x[1] + 1))
        df.apply(output_edges, axis=1)
    if path.exists(ofname):
        Info("Wrote: %s" % ofname)
    # ToDo: a few extra checks could be added
    return ofname
示例#2
0
def proc_prod_rules_orig(fname):
    gn = graph_name(fname)
    df = pd.read_csv(fname, header=None, sep="\t")

    df['rhs'] = df[2].apply(listify_rhs)
    print df['rhs'].apply(lambda x: [k for k in x if 'N' in k]).head()
    df['rhs_n'] = df['rhs'].apply(lambda x: len([k for k in x if 'N' in k]))
    df['lhs_n'] = df[1].apply(lambda x: len(x.split(",")))
    print df.head()
示例#3
0
def proc_prod_rules_single(fname):
    gn = graph_name(fname)
    PRS_dir = "../ProdRules/"
    files = glob(PRS_dir + "*{}*prs*".format(gn))

    pp.pprint(files)
    # print(os.getcwd())
    # print
    rhs_nonterm_nbrs = lambda RHS: [x for x in RHS if "N" in RHS]
    mdf = pd.DataFrame()
    for f in files:
        df = pd.read_csv(f, header=None, sep="\t")
        df['varel'] = (os.path.basename(f).split(".")[2])
        df['rhs'] = df[2].apply(listify_rhs)
        df['lhs_n'] = df[1].apply(lambda x: len(x.split(",")))
        df['rhs_n'] = df['rhs'].apply(
            lambda rhs: len([x for x in df['rhs'].values[0] if 'N' in x]))
        df['rhs_n'] = df['rhs'].apply(
            lambda x: len([k for k in x if 'N' in k]))
        # df['rhs_t'] = df['rhs'].apply(lambda rhs: len([x for x in df['rhs'].values[0] if 'N' in x]) )
        # # df['rhs_t']
        # print df.apply(lambda x: (len(x[1].split(",")), len(x['rhs'])), axis=1)
        # '"lhs:", len([x.split(',') for x in df[1]]), "rhs:", len(df['rhs'].values)
        mdf = pd.concat([df, mdf])
        # print(mdf.head())
        # print len(df.loc[0]['rhs'])
        # print (df['lhs'].shape)
    print df.head()
    gb = mdf.groupby('varel').groups
    # print (gb['lexm'])
    # print (df.describe())
    # df.boxplot(ax=xa[1])
    # print
    # print (mdf.tail())
    fig, xa = plt.subplots(1, len(gb.keys()))
    # mdf.groupby('varel').hist(ax=xa[0])

    # Visualize pairplot of df
    # sns.pairplot(mdf, hue='varel');
    for j, ve in enumerate(gb.keys()):
        print(ve)
        # mdf[mdf['varel']==ve][['lhs_n','rhs_n']].hist(ax=xa[0])
        # xa[0].histogram(mdf[mdf['varel']==ve].lhs_n)
        numBins = 4
        xa[j].hist(mdf[mdf['varel'] == ve][['lhs_n', 'rhs_n']],
                   numBins,
                   alpha=0.8)
        if j == 0: xa[j].legend(('lhs_n', 'rhs_n'))
        xa[j].set_title(ve)
        # mdf[mdf['varel']==ve].hist(ax=xa[0],x=ve,y=)
        # mdf.loc(gb[ve].values).head() #.hist(ax=xa[j],label=ve)
        # print (mdf.loc(gb[ve]))

    plt.savefig('tmpfig', bbox_inches='tight')
示例#4
0
def hstar_fixed_graph_gen(args):
    import networkx as nx

    orig_fname = args['grow'][0]
    gn = graph_name(orig_fname)
    if os.path.exists("../datasets/{}.p".format(gn)):
        origG = nx.read_gpickle("../datasets/{}.p".format(gn))
    else:
        print("we load edgelist into an nx.obj")

    prs_files = glob("../ProdRules/{}*prs".format(gn))
    for f in prs_files:
        prod_rules = get_prod_rules(f)
        g = pcfg.Grammar('S')
        for (id, lhs, rhs, prob) in prod_rules:
            # print (id, lhs, rhs, prob)
            g.add_rule(pcfg.Rule(id, lhs, rhs, prob))

        # exit() # Takes this out
        # ToDo: We nee to get these rules in the right format

        num_nodes = origG.number_of_nodes()

        print "Starting max size"
        g.set_max_size(num_nodes)

        print "Done with max size"

        Hstars = []

        num_samples = 20
        print '*' * 40
        for i in range(0, num_samples):
            rule_list = g.sample(num_nodes)
            hstar = PHRG.grow(rule_list, g)[0]
            Hstars.append(hstar)
    import pickle
    pickle.dump({
        'origG': origG,
        'hstars': Hstars
    }, open('../Results/{}_hstars.p'.format(gn), "wb"))
    if os.path.exists('../Results/{}_hstars.p'.format(gn)):
        print("Pickle written")
def edgelist_to_dimacs(fname):
    g = nx.read_edgelist(fname, comments="%", data=False, nodetype=int)
    g.name = graph_name(fname)
    dimacsFiles = convert_nx_gObjs_to_dimacs_gObjs([g])
    return dimacsFiles  #convert_nx_gObjs_to_dimacs_gObjs([g])
示例#6
0
from utils import Info, graph_name
import sys, os
import pprint as pp

from glob import glob
from isomorph_overlap_hl import stack_prod_rules_bygroup_into_list
from prs import proc_prod_rules_orig

results = []


def prs_count_per(prs_lst):
    for f in prs_lst:
        pp.pprint([os.path.basename(f), len(open(f).readlines())])


if __name__ == '__main__':
    if len(sys.argv) < 2:
        Info("add an out.* dataset with its full path")
        exit()

    f = sys.argv[1]
    gn = graph_name(f)

    f = "../ProdRules/" + gn + "*.prs"
    files = glob(f)

    prs_cnt_per = prs_count_per(files)
    # prs_stack = stack_prod_rules_bygroup_into_list(files)

    sys.exit(0)