start = time.time() mcts.search(state, num_simulations) end = time.time() logger.info("--done--") logger.info("num valid: %d" % num_valid) best = mcts.get_best_sequence() generated_text = ''.join(best[0]) logger.info("best generated text: %s" % generated_text) decoded = DeepSMILESLanguageModelUtils.decode(generated_text, start='<s>', end='</s>') smiles = DeepSMILESLanguageModelUtils.sanitize(decoded) logger.info("best SMILES: %s, J: %s (%s seconds)" % (smiles, scorer.score(smiles), str((end - start)))) log_top_best(all_smiles, 5, logger) logger.info("writing dataset...") name = 'molexit-%d' % n dataset = '../models/molexit/%s.txt' % name with open(dataset, 'w') as f: for smi in list( reversed(sorted(all_smiles.items(), key=lambda kv: kv[1][0])))[:keep_top_n]: dsmi = smiles_to_deepsmiles(smi[0].strip()) tok = DeepSMILESTokenizer(dsmi) tokens = tok.get_tokens() f.write(' '.join([t.value for t in tokens])) f.write("\n")
start = time.time() mcts.search(state, num_simulations) end = time.time() logger.info("--done--") logger.info("num valid: %d" % num_valid) best = mcts.get_best_sequence() generated_text = ''.join(best[0]) logger.info("best generated text: %s" % generated_text) decoded = DeepSMILESLanguageModelUtils.decode(generated_text, start='<s>', end='</s>') smiles = DeepSMILESLanguageModelUtils.sanitize(decoded) logger.info("best SMILES: %s, J: %s (%s seconds)" % (smiles, distance_scorer.score(smiles), str((end - start)))) log_top_best(all_smiles, 5, logger) logger.info("writing dataset...") name = 'molexit-%d' % n dataset = '../models/molexit/%s.txt' % name with open(dataset, 'w') as f: for smi in list( reversed(sorted(all_smiles.items(), key=lambda kv: kv[1][0])))[:keep_top_n]: dsmi = smiles_to_deepsmiles(smi[0].strip()) tok = DeepSMILESTokenizer(dsmi) tokens = tok.get_tokens() f.write(' '.join([t.value for t in tokens])) f.write("\n")
generated = lm.generate(num_chars=100, text_seed='<s>') decoded = DeepSMILESLanguageModelUtils.decode(generated, start='<s>', end='</s>') sanitized = DeepSMILESLanguageModelUtils.sanitize(decoded) num_valid += 1 # synthetic accessibility score is a number between 1 (easy to make) and 10 (very difficult to make) sascore = sascorer.calculateScore( Chem.MolFromSmiles(sanitized)) / 10. # cycle score, squashed between 0 and 1 cyclescore = cycle_scorer.score(sanitized) cyclescore = cyclescore / (1 + cyclescore) distance_score = distance_scorer.score(sanitized) score = (0.75 * distance_score) + (0.15 * (1 - sascore)) + (0.10 * (1 - cyclescore)) all_smiles[sanitized] = (score, generated) if current_best_score is None or beats_current(score): current_best_score = score current_best_smiles = sanitized except Exception as e: pass if (i + 1) % 50000 == 0:
t.cancel() end = time.time() logger.info("--done--") logger.info("num simulations: %s" % simulations) logger.info("num valid (in this iteration): %d" % num_valid) logger.info("num unique (over all iterations): %s" % len(all_smiles)) logger.info("num unique (in this iteration): %s" % len(seen)) best = mcts.get_best_sequence() generated_text = ''.join(best[0]) logger.info("best generated text: %s" % generated_text) decoded = DeepSMILESLanguageModelUtils.decode(generated_text, start='<s>', end='</s>') smiles = DeepSMILESLanguageModelUtils.sanitize(decoded) logger.info("best SMILES: %s, J: %s (%s seconds)" % (smiles, distance_scorer.score(smiles), str((end - start)))) log_top_best(all_smiles, 5, logger) logger.info("writing dataset...") name = 'molexit-%d' % n dataset = '../models/molexit/%s.txt' % name dataset_scores = [] with open(dataset, 'w') as f: for smi in list(reversed(sorted(all_smiles.items(), key=lambda kv: kv[1][0])))[:keep_top_n]: dsmi = smiles_to_deepsmiles(smi[0].strip()) tok = DeepSMILESTokenizer(dsmi) tokens = tok.get_tokens() f.write(' '.join([t.value for t in tokens])) f.write("\n") dataset_scores.append(smi[1][0])
logger.info("beginning search...") start = time.time() mcts.search(state, num_simulations) end = time.time() logger.info("--done--") logger.info("num valid: %d" % num_valid) best = mcts.get_best_sequence() generated_text = ''.join(best[0]) logger.info("best generated text: %s" % generated_text) decoded = DeepSMILESLanguageModelUtils.decode(generated_text, start='<s>', end='</s>') smiles = DeepSMILESLanguageModelUtils.sanitize(decoded) best_score = distance_scorer.score(smiles) logger.info("best SMILES: %s, J: %s (%s seconds)" % (smiles, best_score, str((end - start)))) log_top_best(all_smiles, 5, logger) logger.info("writing dataset...") name = 'molexit-%d' % n dataset = '../models/molexit/%s.txt' % name with open(dataset, 'w') as f: for smi in list( reversed(sorted(all_smiles.items(), key=lambda kv: kv[1][0])))[:keep_top_n]: dsmi = smiles_to_deepsmiles(smi[0].strip()) tok = DeepSMILESTokenizer(dsmi) tokens = tok.get_tokens()