Exemplo n.º 1
0
def tensorize_cond(mol_batch, vocab):
    x, y, cond = zip(*mol_batch)
    cond = [map(int, c.split(',')) for c in cond]
    cond = numpy.array(cond)
    x = MolGraph.tensorize(x, vocab, common_atom_vocab)
    y = MolGraph.tensorize(y, vocab, common_atom_vocab)
    return to_numpy(x)[:-1] + to_numpy(y) + (cond, )  #no need of order for x
Exemplo n.º 2
0
def tensorize(mol_batch, vocab):
    x = MolGraph.tensorize(mol_batch, vocab, common_atom_vocab)
    return to_numpy(x)
Exemplo n.º 3
0

if __name__ == "__main__":
    lg = rdkit.RDLogger.logger()
    lg.setLevel(rdkit.RDLogger.CRITICAL)

    parser = argparse.ArgumentParser()
    parser.add_argument('--train', required=True)
    parser.add_argument('--vocab', required=True)
    parser.add_argument('--batch_size', type=int, default=20)
    parser.add_argument('--ncpu', type=int, default=4)
    args = parser.parse_args()

    with open(args.vocab) as f:
        vocab = [x.strip("\r\n ").split() for x in f]
    MolGraph.load_fragments([x[0] for x in vocab if eval(x[-1])])
    args.vocab = PairVocab([(x, y) for x, y, _ in vocab], cuda=False)

    pool = Pool(args.ncpu)
    random.seed(1)

    with open(args.train) as f:
        data = [line.strip("\r\n ").split()[0] for line in f]

    random.shuffle(data)

    batches = [
        data[i:i + args.batch_size]
        for i in range(0, len(data), args.batch_size)
    ]
    func = partial(tensorize, vocab=args.vocab)
Exemplo n.º 4
0
def tensorize(mol_batch, vocab, include_smiles=False):
    x = MolGraph.tensorize(mol_batch, vocab, common_atom_vocab, include_smiles)
    return to_numpy(x, include_smiles)
Exemplo n.º 5
0
if __name__ == "__main__":
    lg = rdkit.RDLogger.logger()
    lg.setLevel(rdkit.RDLogger.CRITICAL)

    parser = argparse.ArgumentParser()
    parser.add_argument('--train', required=True)
    parser.add_argument('--vocab', required=True)
    parser.add_argument('--batch_size', type=int, default=20)
    parser.add_argument('--ncpu', type=int, default=4)
    args = parser.parse_args()

    with open(args.vocab) as f:
        vocab = [x.strip("\r\n ").split() for x in f]
    args.vocab = PairVocab(vocab, cuda=False)
    MolGraph.load_fragments([x[0] for x in vocab])

    pool = Pool(args.ncpu)
    random.seed(1)

    with open(args.train) as f:
        data = [line.strip("\r\n ").split()[0] for line in f]

    random.shuffle(data)

    batches = [
        data[i:i + args.batch_size]
        for i in range(0, len(data), args.batch_size)
    ]
    func = partial(tensorize, vocab=args.vocab)
    all_data = pool.map(func, batches)
Exemplo n.º 6
0
def tensorize_pair(mol_batch, vocab):
    x, y = zip(*mol_batch)
    x = MolGraph.tensorize(x, vocab, common_atom_vocab)
    y = MolGraph.tensorize(y, vocab, common_atom_vocab)
    return to_numpy(x)[:-1] + to_numpy(y)  #no need of order for x