def generate_optimized_molecules(self,
                                     scoring_function,
                                     number_molecules,
                                     starting_population=None):
        print("generating %s samples..." % number_molecules)
        all_smiles = set()
        width = 24
        max_depth = 100
        c = 5
        num_simulations = 10000

        self.best_smiles = None
        self.best_score = -1.0

        def eval_function(text):

            generated = ''.join(text)
            try:
                decoded = DeepSMILESLanguageModelUtils.decode(generated,
                                                              start='<s>',
                                                              end='</s>')
                smiles = DeepSMILESLanguageModelUtils.sanitize(decoded)
            except Exception:
                return -1.0

            if smiles in all_smiles:
                score = -1.0
            else:
                score = scoring_function.score(smiles)
                all_smiles.add(smiles)

            if self.best_score < score:
                self.best_score = score
                self.best_smiles = smiles

            return score

        mcts = LanguageModelMCTSWithPUCTTerminating(self.lm,
                                                    width,
                                                    max_depth,
                                                    eval_function,
                                                    cpuct=c,
                                                    terminating_symbol='</s>')
        mcts.search(["<s>"], num_simulations)

        return [self.best_smiles]
示例#2
0
    def generate(self, number_samples):
        print("generating %s samples..." % number_samples)
        all_smiles = set()
        samples = []
        width = 24
        max_depth = 100
        c = 5

        def eval_function(text):

            generated = ''.join(text)
            try:
                decoded = DeepSMILESLanguageModelUtils.decode(generated,
                                                              start='<s>',
                                                              end='</s>')
                smiles = DeepSMILESLanguageModelUtils.sanitize(decoded)
            except Exception:
                samples.append("invalid")
                return -1.0

            samples.append(smiles)

            if smiles in all_smiles:
                score = -1.0
            else:
                score = 1.0
                all_smiles.add(smiles)

            return score

        mcts = LanguageModelMCTSWithPUCTTerminating(self.lm,
                                                    width,
                                                    max_depth,
                                                    eval_function,
                                                    cpuct=c,
                                                    terminating_symbol='</s>')
        mcts.search(["<s>"], number_samples)

        return samples
示例#3
0
        num_valid += 1

        if smiles in all_smiles:
            score = -1.0
        else:
            score = scorer.score(smiles)
            all_smiles[smiles] = (score, generated)

        logger.debug("%s, %s" % (smiles, str(score)))
        log_best(i, all_smiles, num_valid, logger)
        return score

    mcts = LanguageModelMCTSWithPUCTTerminating(lm,
                                                width,
                                                max_depth,
                                                eval_function,
                                                cpuct=c,
                                                terminating_symbol='</s>')
    state = start_state

    logger.info("beginning search...")
    start = time.time()
    mcts.search(state, num_simulations)
    end = time.time()

    logger.info("--done--")
    logger.info("num valid: %d" % num_valid)

    best = mcts.get_best_sequence()
    generated_text = ''.join(best[0])
    logger.info("best generated text: %s" % generated_text)
        # in practice, the log probs are rarely less than -45; so the min tot_score can be: -45 + (sigma*-1.0)
        rescale_min = -45 - sigma
        if tot_score < rescale_min:
            logger.info("WARNING: total score lower than %s" % rescale_min)
        # because probabilities are in the range [0,1], the max log prob is log(1) i.e. 0
        #  so the max tot_score can be: 0 + sigma*1.0
        rescale_max = sigma
        # scaling x into [a,b]: (b-a)*((x - min(x))/(max(x) - min(x))+a
        ret_score = (1 - (-1)) * ((tot_score - rescale_min) /
                                  (rescale_max - rescale_min)) + (-1)

        return ret_score

    mcts = LanguageModelMCTSWithPUCTTerminating(prior,
                                                width,
                                                max_depth,
                                                eval_function,
                                                cpuct=c,
                                                terminating_symbol='</s>')
    state = start_state

    logger.info("beginning search...")
    start = time.time()

    mcts.search(state, num_simulations)

    t.cancel()
    end = time.time()

    logger.info("--done--")
    logger.info("num valid: %d" % num_valid)
    logger.info("num unique: %s" % len(all_smiles))