Exemplo n.º 1
0
)
logger.info("LanguageModelMCTSWithPUCTTerminating")
logger.info("TanimotoScorer(abilify, radius=6)")
logger.info("num_iterations = 100")
logger.info("simulations_per_iteration = 50000")
logger.info("keep_top_n = 5000")

logger.info("loading language model...")

vocab = get_arpa_vocab(
    '../resources/zinc12_fragments_deepsmiles_klm_10gram_200502.arpa')
lm = KenLMDeepSMILESLanguageModel(
    '../resources/zinc12_fragments_deepsmiles_klm_10gram_200502.klm', vocab)

abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl"
scorer = TanimotoScorer(abilify, radius=6)

converter = Converter(rings=True, branches=True)
env = os.environ.copy()
env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"]
lm_trainer = KenLMTrainer(env)


def log_best(j, all_best, n_valid, lggr):
    if j % 1000 == 0:
        lggr.info("--iteration: %d--" % j)
        lggr.info("num valid: %d" % n_valid)
        log_top_best(all_best, 5, lggr)


def smiles_to_deepsmiles(smiles):
Exemplo n.º 2
0
logger.info(os.path.basename(__file__))
logger.info("KenLMDeepSMILESLanguageModel('../models/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab)")
logger.info("width = 12, max_depth = 50, start_state = ['<s>'], c = 100")
logger.info("score: -1.0 if invalid; -1.0 if seen in all smiles; tanimoto distance from abilify if valid")
logger.info("LanguageModelMCTSWithPUCTTerminating")
logger.info("TanimotoScorer(abilify, radius=6)")
logger.info("num_iterations = 100")
logger.info("time per iteration = 45 min.")
logger.info("keep_top_n = 20000")

vocab = get_arpa_vocab('../models/chembl_25_deepsmiles_klm_10gram_200503.arpa')
lm = KenLMDeepSMILESLanguageModel('../models/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab)

abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl"
distance_scorer = TanimotoScorer(abilify, radius=6)

cycle_scorer = CycleScorer()

converter = Converter(rings=True, branches=True)
env = os.environ.copy()
env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"]
lm_trainer = KenLMTrainer(env)


def smiles_to_deepsmiles(smiles):
    canonical = pybel.readstring("smi", smiles).write("can").strip()
    return converter.encode(canonical)

logger.info("deleting any existing molexit directory, and creating a new one...")
path = Path("../models/molexit/")
Exemplo n.º 3
0
)
logger.info("LanguageModelMCTSWithPUCTTerminating")
logger.info(
    "TanimotoScorer(abilify, radius=6); distance only (no SA or cycle scoring)"
)
logger.info("num_iterations = 100")
logger.info("time per iteration = 45 min.")
logger.info("keep_top_n = 20000 unique")

vocab = get_arpa_vocab(
    '../resources/chembl_25_deepsmiles_klm_10gram_200503.arpa')
lm = KenLMDeepSMILESLanguageModel(
    '../resources/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab)

abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl"
distance_scorer = TanimotoScorer(abilify, radius=6)

converter = Converter(rings=True, branches=True)
env = os.environ.copy()
env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"]
lm_trainer = KenLMTrainer(env)


def smiles_to_deepsmiles(smiles):
    canonical = pybel.readstring("smi", smiles).write("can").strip()
    return converter.encode(canonical)


logger.info(
    "deleting any existing molexit directory, and creating a new one...")
path = Path("../models/molexit/")
logger = get_logger('chemgrams.log')

THIS_DIR = os.path.dirname(os.path.abspath(__file__))

logger.info(os.path.basename(__file__))
logger.info("KenLMDeepSMILESLanguageModel('../models/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab)")
logger.info("TanimotoScorer(abilify, radius=6)")
logger.info("num_iterations = 100")
logger.info("time per iteration = 45 min.")
logger.info("keep_top_n = 20000")

vocab = get_arpa_vocab('../models/chembl_25_deepsmiles_klm_10gram_200503.arpa')
lm = KenLMDeepSMILESLanguageModel('../models/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab)

abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl"
distance_scorer = TanimotoScorer(abilify, radius=6)

cycle_scorer = CycleScorer()

converter = Converter(rings=True, branches=True)
env = os.environ.copy()
env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"]
lm_trainer = KenLMTrainer(env)


def smiles_to_deepsmiles(smiles):
    canonical = pybel.readstring("smi", smiles).write("can").strip()
    return converter.encode(canonical)

logger.info("deleting any existing molexit directory, and creating a new one...")
path = Path("../models/molexit/")
Exemplo n.º 5
0
    "KenLMDeepSMILESLanguageModel('../resources/zinc12_fragments_deepsmiles_klm_10gram_200502.klm', vocab)"
)
logger.info("TanimotoScorer(abilify, radius=6)")
logger.info("num_iterations = 100")
logger.info("attempts_per_iteration = 400000")
logger.info("keep_top_n = 20000")

logger.info("loading language model...")

vocab = get_arpa_vocab(
    '../resources/zinc12_fragments_deepsmiles_klm_10gram_200502.arpa')
lm = KenLMDeepSMILESLanguageModel(
    '../resources/zinc12_fragments_deepsmiles_klm_10gram_200502.klm', vocab)

abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl"
distance_scorer = TanimotoScorer(abilify, radius=6)

cycle_scorer = CycleScorer()

converter = Converter(rings=True, branches=True)
env = os.environ.copy()
env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"]
lm_trainer = KenLMTrainer(env)


def smiles_to_deepsmiles(smiles):
    canonical = pybel.readstring("smi", smiles).write("can").strip()
    return converter.encode(canonical)


logger.info(
Exemplo n.º 6
0
logger.info("KenLMDeepSMILESLanguageModel('../resources/zinc12_fragments_deepsmiles_klm_6gram_190421.klm', vocab)")
logger.info("width = 12, max_depth = 35, start_state = ['<s>'], c = 5")
logger.info("score: -1.0 if invalid; -1.0 if seen previously; tanimoto distance from abilify if valid")
logger.info("LanguageModelMCTSWithPUCTTerminating")
logger.info("TanimotoScorer(abilify)")
logger.info("num_iterations = 15")
logger.info("simulations_per_iteration = 50000")
logger.info("keep_top_n = 5000")

logger.info("loading language model...")

vocab = get_arpa_vocab('../resources/zinc12_fragments_deepsmiles_klm_6gram_190421.arpa')
lm = KenLMDeepSMILESLanguageModel('../resources/zinc12_fragments_deepsmiles_klm_6gram_190421.klm', vocab)

abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl"
scorer = TanimotoScorer(abilify)

converter = Converter(rings=True, branches=True)
env = os.environ.copy()
env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"]
lm_trainer = KenLMTrainer(env)


def log_best(j, all_best, n_valid, lggr):
    if j % 1000 == 0:
        lggr.info("--iteration: %d--" % j)
        lggr.info("num valid: %d" % n_valid)
        log_top_best(all_best, 5, lggr)


def smiles_to_deepsmiles(smiles):
Exemplo n.º 7
0
logger = get_logger('chemgrams.log')

THIS_DIR = os.path.dirname(os.path.abspath(__file__))

logger.info(os.path.basename(__file__))
logger.info("KenLMDeepSMILESLanguageModel('../resources/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab)")
logger.info("TanimotoScorer(celecoxib, radius=6); distance only (no SA or cycle scoring)")
logger.info("num_iterations = 100")
logger.info("time per iteration = 45 min.")
logger.info("keep_top_n = 20000 of all (including duplicates)")

vocab = get_arpa_vocab('../resources/chembl_25_deepsmiles_klm_10gram_200503.arpa')
lm = KenLMDeepSMILESLanguageModel('../resources/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab)

celecoxib = "O=S(=O)(c3ccc(n1nc(cc1c2ccc(cc2)C)C(F)(F)F)cc3)N"
distance_scorer = TanimotoScorer(celecoxib, radius=6)

converter = Converter(rings=True, branches=True)
env = os.environ.copy()
env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"]
lm_trainer = KenLMTrainer(env)


def smiles_to_deepsmiles(smiles):
    canonical = pybel.readstring("smi", smiles).write("can").strip()
    return converter.encode(canonical)

logger.info("deleting any existing molexit directory, and creating a new one...")
path = Path("../models/molexit/")
if os.path.exists(path) and os.path.isdir(path):
    shutil.rmtree(path)
logger.info(
    "TanimotoScorer(celecoxib, radius=6); distance only (no SA or cycle scoring)"
)
logger.info("num_iterations = 100")
logger.info("time per iteration = 45 min.")
logger.info("keep_top_n = 200000 of all (including duplicates)")

vocab = get_arpa_vocab(
    '../resources/chembl_25_deepsmiles_klm_10gram_200503.arpa')
lm = KenLMDeepSMILESLanguageModel(
    '../resources/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab)

# abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl"
# distance_scorer = TanimotoScorer(abilify, radius=6)
celecoxib = "O=S(=O)(c3ccc(n1nc(cc1c2ccc(cc2)C)C(F)(F)F)cc3)N"
distance_scorer = TanimotoScorer(celecoxib, radius=6)

converter = Converter(rings=True, branches=True)
env = os.environ.copy()
env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"]
lm_trainer = KenLMTrainer(env)


def smiles_to_deepsmiles(smiles):
    canonical = pybel.readstring("smi", smiles).write("can").strip()
    return converter.encode(canonical)


logger.info(
    "deleting any existing molexit directory, and creating a new one...")
path = Path("../models/molexit/")
Exemplo n.º 9
0
logger.info(os.path.basename(__file__))
logger.info("KenLMDeepSMILESLanguageModel('../resources/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab)")
logger.info("width = 12, max_depth = 50, start_state = ['<s>'], c = 2")
logger.info("score: -1.0 if invalid; -1.0 if seen in iteration; tanimoto distance from abilify if valid; rescaling from [0,1] to [-1,1]")
logger.info("LanguageModelMCTSWithPUCTTerminating")
logger.info("TanimotoScorer(abilify, radius=6); distance only (no SA or cycle scoring)")
logger.info("num_iterations = 100")
logger.info("time per iteration = 45 min.")
logger.info("keep all valid in current iteration (including duplicates)")

vocab = get_arpa_vocab('../resources/chembl_25_deepsmiles_klm_10gram_200503.arpa')
lm = KenLMDeepSMILESLanguageModel('../resources/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab)

abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl"
distance_scorer = TanimotoScorer(abilify, radius=6)
query_scorer = TanimotoScorer(abilify, radius=6) # QueryScorer(abilify, radius=6, k=1.0)
# query_scorer = QueryScorer(abilify, k=1.0)  # roughly equivalent to FCFP4

# celecoxib = "O=S(=O)(c3ccc(n1nc(cc1c2ccc(cc2)C)C(F)(F)F)cc3)N"
# distance_scorer = TanimotoScorer(celecoxib, radius=6)

converter = Converter(rings=True, branches=True)
env = os.environ.copy()
env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"]
lm_trainer = KenLMTrainer(env)


def smiles_to_deepsmiles(smiles):
    canonical = pybel.readstring("smi", smiles).write("can").strip()
    return converter.encode(canonical)
Exemplo n.º 10
0
)
logger.info("LanguageModelMCTSWithPUCTTerminating")
logger.info("TanimotoScorer(abilify, radius=6)")
logger.info("num_iterations = 100")
logger.info("simulations_per_iteration = 50000")
logger.info("keep_top_n = 5000")

logger.info("loading language model...")

vocab = get_arpa_vocab(
    '../resources/zinc12_fragments_deepsmiles_klm_6gram_190421.arpa')
lm = KenLMDeepSMILESLanguageModel(
    '../resources/zinc12_fragments_deepsmiles_klm_6gram_190421.klm', vocab)

abilify = "Clc4cccc(N3CCN(CCCCOc2ccc1c(NC(=O)CC1)c2)CC3)c4Cl"
distance_scorer = TanimotoScorer(abilify, radius=6)

cycle_scorer = CycleScorer()

converter = Converter(rings=True, branches=True)
env = os.environ.copy()
env["PATH"] = "/Users/luis/kenlm/build/bin:" + env["PATH"]
lm_trainer = KenLMTrainer(env)


def log_best(j, all_best, n_valid, lggr):
    if j % 10000 == 0:
        lggr.info("--iteration: %d--" % j)
        lggr.info("num valid: %d" % n_valid)
        log_top_best(all_best, 5, lggr)