def __init__(self, num_iterations, keep_top_n, time_per_iteration_minutes):
        self.num_iterations = num_iterations
        self.keep_top_n = keep_top_n
        self.time_per_iteration_minutes = time_per_iteration_minutes

        self.lm = None

        env = os.environ.copy()
        env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"]
        self.lm_trainer = KenLMTrainer(env)

        self.converter = Converter(rings=True, branches=True)
Пример #2
0
logger.info("keep_top_n = 5000")

logger.info("loading language model...")

vocab = get_arpa_vocab(
    '../resources/zinc12_fragments_deepsmiles_klm_10gram_200502.arpa')
lm = KenLMDeepSMILESLanguageModel(
    '../resources/zinc12_fragments_deepsmiles_klm_10gram_200502.klm', vocab)

abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl"
scorer = TanimotoScorer(abilify, radius=6)

converter = Converter(rings=True, branches=True)
env = os.environ.copy()
env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"]
lm_trainer = KenLMTrainer(env)


def log_best(j, all_best, n_valid, lggr):
    if j % 1000 == 0:
        lggr.info("--iteration: %d--" % j)
        lggr.info("num valid: %d" % n_valid)
        log_top_best(all_best, 5, lggr)


def smiles_to_deepsmiles(smiles):
    canonical = pybel.readstring(
        "smi", smiles).write("can").strip()  # TODO do we need to canonicalize?
    return converter.encode(canonical)