Exemplo n.º 1
0
def sc_hard_fexofenadine() -> GoalDirectedBenchmark:
    specification = uniform_specification(1, 10, 100)
    benchmark_object = hard_fexofenadine()
    sa_biased = ScoringFunctionSAWrapper(benchmark_object.objective,
                                         SCScoreModifier())
    return GoalDirectedBenchmark(name='SC_fexofenadine',
                                 objective=sa_biased,
                                 contribution_specification=specification)
Exemplo n.º 2
0
def sa_ranolazine() -> GoalDirectedBenchmark:
    specification = uniform_specification(1, 10, 100)
    benchmark_object = start_pop_ranolazine()
    sa_biased = ScoringFunctionSAWrapper(benchmark_object.objective,
                                         SAScoreModifier())
    return GoalDirectedBenchmark(name='SA_ranolazine',
                                 objective=sa_biased,
                                 contribution_specification=specification)
Exemplo n.º 3
0
def sa_hard_osimertinib() -> GoalDirectedBenchmark:
    specification = uniform_specification(1, 10, 100)
    benchmark_object = hard_osimertinib()
    sa_biased = ScoringFunctionSAWrapper(benchmark_object.objective,
                                         SAScoreModifier())
    return GoalDirectedBenchmark(name='SA_osimertinib',
                                 objective=sa_biased,
                                 contribution_specification=specification)
Exemplo n.º 4
0
def sa_qed_benchmark() -> GoalDirectedBenchmark:
    specification = uniform_specification(1, 10, 100)
    sa_qed = ScoringFunctionSAWrapper(RdkitScoringFunction(descriptor=qed), SAScoreModifier())
    return GoalDirectedBenchmark(name='SA_QED',
                                 objective=sa_qed,
                                 contribution_specification=specification)
Exemplo n.º 5
0
    def generate_optimized_molecules(
            self,
            scoring_function: ScoringFunction,
            number_molecules: int,
            starting_population: Optional[List[str]] = None) -> List[str]:

        # sa_scoring_function = ScoringFunctionSAWrapper(scoring_function, SAScoreModifier(mu=3.2356, sigma=1.0156))
        # sa_scoring_function = ScoringFunctionSAWrapper(scoring_function, SCScoreModifier(mu=2.9308, sigma=0.1803))
        # sa_scoring_function = ScoringFunctionSAWrapper(scoring_function, SmilesModifier(self.sigma, self.mu))

        # sa_scoring_function = ScoringFunctionSAWrapper(scoring_function, SAScoreModifier(mu=self.mu, sigma=self.sigma))
        # sa_scoring_function = ScoringFunctionSAWrapper(scoring_function, SCScoreModifier(mu=self.mu, sigma=self.sigma))
        sa_scoring_function = ScoringFunctionSAWrapper(
            scoring_function, SmilesModifier(self.sigma, self.mu))

        if number_molecules > self.population_size:
            self.population_size = number_molecules
            print(
                f'Benchmark requested more molecules than expected: new population is {number_molecules}'
            )

        # fetch initial population?
        if starting_population is None:
            print('selecting initial population...')
            if self.random_start:
                starting_population = np.random.choice(self.all_smiles,
                                                       self.population_size)
            else:
                starting_population = self.top_k(self.all_smiles,
                                                 sa_scoring_function,
                                                 self.population_size)

        # select initial population
        population_smiles = heapq.nlargest(self.population_size,
                                           starting_population,
                                           key=sa_scoring_function.score)
        population_mol = [Chem.MolFromSmiles(s) for s in population_smiles]
        population_scores = self.pool(
            delayed(score_mol)(m, sa_scoring_function.score)
            for m in population_mol)

        # evolution: go go go!!
        t0 = time()

        patience = 0

        for generation in range(self.generations):

            # new_population
            mating_pool = make_mating_pool(population_mol, population_scores,
                                           self.offspring_size)
            offspring_mol = self.pool(
                delayed(reproduce)(mating_pool, self.mutation_rate)
                for _ in range(self.population_size))

            # add new_population
            population_mol += offspring_mol
            population_mol = sanitize(population_mol)

            # stats
            gen_time = time() - t0
            mol_sec = self.population_size / gen_time
            t0 = time()

            old_scores = population_scores
            population_scores = self.pool(
                delayed(score_mol)(m, sa_scoring_function.score)
                for m in population_mol)
            population_tuples = list(zip(population_scores, population_mol))
            population_tuples = sorted(population_tuples,
                                       key=lambda x: x[0],
                                       reverse=True)[:self.population_size]
            population_mol = [t[1] for t in population_tuples]
            population_scores = [t[0] for t in population_tuples]

            # early stopping
            if population_scores == old_scores:
                patience += 1
                print(f'Failed to progress: {patience}')
                if patience >= self.patience:
                    print(f'No more patience, bailing...')
                    break
            else:
                patience = 0

            print(f'{generation} | '
                  f'max: {np.max(population_scores):.3f} | '
                  f'avg: {np.mean(population_scores):.3f} | '
                  f'min: {np.min(population_scores):.3f} | '
                  f'std: {np.std(population_scores):.3f} | '
                  f'sum: {np.sum(population_scores):.3f} | '
                  f'{gen_time:.2f} sec/gen | '
                  f'{mol_sec:.2f} mol/sec')

        # finally
        return [Chem.MolToSmiles(m) for m in population_mol][:number_molecules]