Пример #1
0
    start = time.time()
    mcts.search(state, num_simulations)
    end = time.time()

    logger.info("--done--")
    logger.info("num valid: %d" % num_valid)

    best = mcts.get_best_sequence()
    generated_text = ''.join(best[0])
    logger.info("best generated text: %s" % generated_text)
    decoded = DeepSMILESLanguageModelUtils.decode(generated_text,
                                                  start='<s>',
                                                  end='</s>')
    smiles = DeepSMILESLanguageModelUtils.sanitize(decoded)
    logger.info("best SMILES: %s, J: %s (%s seconds)" %
                (smiles, scorer.score(smiles), str((end - start))))

    log_top_best(all_smiles, 5, logger)

    logger.info("writing dataset...")
    name = 'molexit-%d' % n
    dataset = '../models/molexit/%s.txt' % name
    with open(dataset, 'w') as f:
        for smi in list(
                reversed(sorted(all_smiles.items(),
                                key=lambda kv: kv[1][0])))[:keep_top_n]:
            dsmi = smiles_to_deepsmiles(smi[0].strip())
            tok = DeepSMILESTokenizer(dsmi)
            tokens = tok.get_tokens()
            f.write(' '.join([t.value for t in tokens]))
            f.write("\n")
Пример #2
0
    start = time.time()
    mcts.search(state, num_simulations)
    end = time.time()

    logger.info("--done--")
    logger.info("num valid: %d" % num_valid)

    best = mcts.get_best_sequence()
    generated_text = ''.join(best[0])
    logger.info("best generated text: %s" % generated_text)
    decoded = DeepSMILESLanguageModelUtils.decode(generated_text,
                                                  start='<s>',
                                                  end='</s>')
    smiles = DeepSMILESLanguageModelUtils.sanitize(decoded)
    logger.info("best SMILES: %s, J: %s (%s seconds)" %
                (smiles, distance_scorer.score(smiles), str((end - start))))

    log_top_best(all_smiles, 5, logger)

    logger.info("writing dataset...")
    name = 'molexit-%d' % n
    dataset = '../models/molexit/%s.txt' % name
    with open(dataset, 'w') as f:
        for smi in list(
                reversed(sorted(all_smiles.items(),
                                key=lambda kv: kv[1][0])))[:keep_top_n]:
            dsmi = smiles_to_deepsmiles(smi[0].strip())
            tok = DeepSMILESTokenizer(dsmi)
            tokens = tok.get_tokens()
            f.write(' '.join([t.value for t in tokens]))
            f.write("\n")
Пример #3
0
            generated = lm.generate(num_chars=100, text_seed='<s>')

            decoded = DeepSMILESLanguageModelUtils.decode(generated,
                                                          start='<s>',
                                                          end='</s>')
            sanitized = DeepSMILESLanguageModelUtils.sanitize(decoded)

            num_valid += 1

            # synthetic accessibility score is a number between 1 (easy to make) and 10 (very difficult to make)
            sascore = sascorer.calculateScore(
                Chem.MolFromSmiles(sanitized)) / 10.
            # cycle score, squashed between 0 and 1
            cyclescore = cycle_scorer.score(sanitized)
            cyclescore = cyclescore / (1 + cyclescore)
            distance_score = distance_scorer.score(sanitized)
            score = (0.75 *
                     distance_score) + (0.15 *
                                        (1 - sascore)) + (0.10 *
                                                          (1 - cyclescore))

            all_smiles[sanitized] = (score, generated)

            if current_best_score is None or beats_current(score):
                current_best_score = score
                current_best_smiles = sanitized

        except Exception as e:
            pass

        if (i + 1) % 50000 == 0:
Пример #4
0
    t.cancel()
    end = time.time()

    logger.info("--done--")
    logger.info("num simulations: %s" % simulations)
    logger.info("num valid (in this iteration): %d" % num_valid)
    logger.info("num unique (over all iterations): %s" % len(all_smiles))
    logger.info("num unique (in this iteration): %s" % len(seen))

    best = mcts.get_best_sequence()
    generated_text = ''.join(best[0])
    logger.info("best generated text: %s" % generated_text)
    decoded = DeepSMILESLanguageModelUtils.decode(generated_text, start='<s>', end='</s>')
    smiles = DeepSMILESLanguageModelUtils.sanitize(decoded)
    logger.info("best SMILES: %s, J: %s (%s seconds)" % (smiles, distance_scorer.score(smiles), str((end - start))))

    log_top_best(all_smiles, 5, logger)

    logger.info("writing dataset...")
    name = 'molexit-%d' % n
    dataset = '../models/molexit/%s.txt' % name
    dataset_scores = []
    with open(dataset, 'w') as f:
        for smi in list(reversed(sorted(all_smiles.items(), key=lambda kv: kv[1][0])))[:keep_top_n]:
            dsmi = smiles_to_deepsmiles(smi[0].strip())
            tok = DeepSMILESTokenizer(dsmi)
            tokens = tok.get_tokens()
            f.write(' '.join([t.value for t in tokens]))
            f.write("\n")
            dataset_scores.append(smi[1][0])
Пример #5
0
    logger.info("beginning search...")
    start = time.time()
    mcts.search(state, num_simulations)
    end = time.time()

    logger.info("--done--")
    logger.info("num valid: %d" % num_valid)

    best = mcts.get_best_sequence()
    generated_text = ''.join(best[0])
    logger.info("best generated text: %s" % generated_text)
    decoded = DeepSMILESLanguageModelUtils.decode(generated_text,
                                                  start='<s>',
                                                  end='</s>')
    smiles = DeepSMILESLanguageModelUtils.sanitize(decoded)
    best_score = distance_scorer.score(smiles)
    logger.info("best SMILES: %s, J: %s (%s seconds)" %
                (smiles, best_score, str((end - start))))

    log_top_best(all_smiles, 5, logger)

    logger.info("writing dataset...")
    name = 'molexit-%d' % n
    dataset = '../models/molexit/%s.txt' % name
    with open(dataset, 'w') as f:
        for smi in list(
                reversed(sorted(all_smiles.items(),
                                key=lambda kv: kv[1][0])))[:keep_top_n]:
            dsmi = smiles_to_deepsmiles(smi[0].strip())
            tok = DeepSMILESTokenizer(dsmi)
            tokens = tok.get_tokens()