Пример #1
0
def mk2set(ind ,randseed):
    p,n,psamp,nsamp,ptrain,ntrain = main.alt_lc_get_graphs(randseed)
    
    ba.dumpfile(p,f"{foldername}/ptest_{ind}.pick")
    ba.dumpfile(n,f"{foldername}/ntest_{ind}.pick")
    rdk.nx_to_moses(ptrain,f"{foldername}/ptrain_{ind}.csv")
    rdk.nx_to_moses(ntrain,f"{foldername}/ntrain_{ind}.csv")
Пример #2
0
def getnx(fname):
    cachename = fname + ".cache"
    if os.path.isfile(cachename):
        return ba.loadfile(cachename)
    with gzip.open(fname, 'rb') as fi:
        smiles = fi.read()
    atomz = list(
        rut.smiles_strings_to_nx(
            [line.split()[1] for line in smiles.split(b'\n')[:-1]]))
    random.seed(123)
    random.shuffle(atomz)
    ba.dumpfile(atomz, cachename)
    return atomz
Пример #3
0
def loadsmi(fname, randseed=123):
    '''can we load from cache?'''
    cachename = fname + ".cache"
    if os.path.isfile(cachename):
        graphs = ba.loadfile(cachename)
    else:
        g = list(rut.smi_to_nx(fname))
        graphs = lu.pre_process(g)
        ba.dumpfile(graphs, cachename)

    random.seed(randseed)
    random.shuffle(graphs)
    return graphs
Пример #4
0
def getnx(fname, randseed=123):
    '''can we load from cache?'''
    cachename = fname + ".cache"
    if os.path.isfile(cachename):
        graphs = ba.loadfile(cachename)
    else:
        '''if not load normaly and write a cache'''
        with gzip.open(fname, 'rb') as fi:
            smiles = fi.read()
        graphs = list(
            rut.smiles_strings_to_nx(
                [line.split()[1] for line in smiles.split(b'\n')[:-1]]))
        graphs = lu.pre_process(graphs)
        ba.dumpfile(graphs, cachename)
    '''shuffle and return'''
    random.seed(randseed)
    random.shuffle(graphs)
    return graphs
Пример #5
0
def format_abc(a, b, c, sav='res.pickle'):
    cm = [np.mean(x) for x in list(zip(*a))]
    om = [np.mean(x) for x in list(zip(*b))]
    gm = [np.mean(x) for x in list(zip(*c))]
    cs = [np.std(x) for x in list(zip(*a))]
    os = [np.std(x) for x in list(zip(*b))]
    gs = [np.std(x) for x in list(zip(*c))]

    logger.log(51, f'combined  {cm}')
    logger.log(51, f'originals {om}')
    logger.log(51, f'generated {gm}')
    logger.log(41, f'combined{cs}')
    logger.log(41, f'originals only{os}')
    logger.log(41, f'generated only{gs}')

    ts = np.array(args.trainsizes)
    gen = np.array([
        e * ((args.n_steps - args.burnin) // args.emit + 1)
        for e in args.trainsizes
    ])
    ba.dumpfile([(ts + gen, ts, gen), (cm, om, gm), (cs, os, gs), (a, b, c)],
                sav)
Пример #6
0
def get_params_punk():
    trve = lambda pp: [i for d in [pp.a, pp.b] for i in d.obs['true'].values]
    dnames = "human1 human2 human3 human4 smartseq2 celseq2 celseq".split()
    loader = lambda x, seed: load.loadgruen_single(
        f"../data/punk/{x}", subsample=sampnum, seed=seed)
    return dnames, loader, trve


if __name__ == "__main__":
    task, t2, rep = map(int, sys.argv[1].strip().split(' '))
    dnames, loader, trve = get_params_100()
    other = dnames[t2]
    self = dnames[task]
    result = get_score(self, other, loader, trve, seed=rep)
    print("res: ", result)
    ba.dumpfile(result, "res/" + sys.argv[1].replace(" ", '_'))
    print("all good")


# use median instead of mean! TODO
# add error baro to plot -> quantiles! plot points! fit forrcoeff linear
# check the seed value for subsampling
# look at 100
def res(indices, reps):
    print(dnames)
    for i in range(indices):
        indexrepeats = np.array(
            [ba.loadfile(f"res/{i}_{r}") for r in range(reps)])
        print(indexrepeats.mean(axis=0).tolist())
Пример #7
0
 def save(self):
     ba.dumpfile(self.data, self.fname)
     ba.jdumpfile(self.data_int, self.fname_int)