def sc_hard_fexofenadine() -> GoalDirectedBenchmark: specification = uniform_specification(1, 10, 100) benchmark_object = hard_fexofenadine() sa_biased = ScoringFunctionSAWrapper(benchmark_object.objective, SCScoreModifier()) return GoalDirectedBenchmark(name='SC_fexofenadine', objective=sa_biased, contribution_specification=specification)
def sa_ranolazine() -> GoalDirectedBenchmark: specification = uniform_specification(1, 10, 100) benchmark_object = start_pop_ranolazine() sa_biased = ScoringFunctionSAWrapper(benchmark_object.objective, SAScoreModifier()) return GoalDirectedBenchmark(name='SA_ranolazine', objective=sa_biased, contribution_specification=specification)
def sa_hard_osimertinib() -> GoalDirectedBenchmark: specification = uniform_specification(1, 10, 100) benchmark_object = hard_osimertinib() sa_biased = ScoringFunctionSAWrapper(benchmark_object.objective, SAScoreModifier()) return GoalDirectedBenchmark(name='SA_osimertinib', objective=sa_biased, contribution_specification=specification)
def sa_qed_benchmark() -> GoalDirectedBenchmark: specification = uniform_specification(1, 10, 100) sa_qed = ScoringFunctionSAWrapper(RdkitScoringFunction(descriptor=qed), SAScoreModifier()) return GoalDirectedBenchmark(name='SA_QED', objective=sa_qed, contribution_specification=specification)
def generate_optimized_molecules( self, scoring_function: ScoringFunction, number_molecules: int, starting_population: Optional[List[str]] = None) -> List[str]: # sa_scoring_function = ScoringFunctionSAWrapper(scoring_function, SAScoreModifier(mu=3.2356, sigma=1.0156)) # sa_scoring_function = ScoringFunctionSAWrapper(scoring_function, SCScoreModifier(mu=2.9308, sigma=0.1803)) # sa_scoring_function = ScoringFunctionSAWrapper(scoring_function, SmilesModifier(self.sigma, self.mu)) # sa_scoring_function = ScoringFunctionSAWrapper(scoring_function, SAScoreModifier(mu=self.mu, sigma=self.sigma)) # sa_scoring_function = ScoringFunctionSAWrapper(scoring_function, SCScoreModifier(mu=self.mu, sigma=self.sigma)) sa_scoring_function = ScoringFunctionSAWrapper( scoring_function, SmilesModifier(self.sigma, self.mu)) if number_molecules > self.population_size: self.population_size = number_molecules print( f'Benchmark requested more molecules than expected: new population is {number_molecules}' ) # fetch initial population? if starting_population is None: print('selecting initial population...') if self.random_start: starting_population = np.random.choice(self.all_smiles, self.population_size) else: starting_population = self.top_k(self.all_smiles, sa_scoring_function, self.population_size) # select initial population population_smiles = heapq.nlargest(self.population_size, starting_population, key=sa_scoring_function.score) population_mol = [Chem.MolFromSmiles(s) for s in population_smiles] population_scores = self.pool( delayed(score_mol)(m, sa_scoring_function.score) for m in population_mol) # evolution: go go go!! t0 = time() patience = 0 for generation in range(self.generations): # new_population mating_pool = make_mating_pool(population_mol, population_scores, self.offspring_size) offspring_mol = self.pool( delayed(reproduce)(mating_pool, self.mutation_rate) for _ in range(self.population_size)) # add new_population population_mol += offspring_mol population_mol = sanitize(population_mol) # stats gen_time = time() - t0 mol_sec = self.population_size / gen_time t0 = time() old_scores = population_scores population_scores = self.pool( delayed(score_mol)(m, sa_scoring_function.score) for m in population_mol) population_tuples = list(zip(population_scores, population_mol)) population_tuples = sorted(population_tuples, key=lambda x: x[0], reverse=True)[:self.population_size] population_mol = [t[1] for t in population_tuples] population_scores = [t[0] for t in population_tuples] # early stopping if population_scores == old_scores: patience += 1 print(f'Failed to progress: {patience}') if patience >= self.patience: print(f'No more patience, bailing...') break else: patience = 0 print(f'{generation} | ' f'max: {np.max(population_scores):.3f} | ' f'avg: {np.mean(population_scores):.3f} | ' f'min: {np.min(population_scores):.3f} | ' f'std: {np.std(population_scores):.3f} | ' f'sum: {np.sum(population_scores):.3f} | ' f'{gen_time:.2f} sec/gen | ' f'{mol_sec:.2f} mol/sec') # finally return [Chem.MolToSmiles(m) for m in population_mol][:number_molecules]