Пример #1
0
    def get_outcomes(self, smiles, start_at=-1, end_at=-1,
                     singleonly=True, stop_if=False, template_count=10000, max_cum_prob=1.0):
        """Performs a one-step synthesis reaction for a given SMILES string.

        Each candidate in self.result.products is of type ForwardProduct

        Args:
            smiles (str): SMILES string of ??
            mincount (int): Minimum popularity of used templates.
            template_prioritization (??): Specifies method to use for ordering
                templates.
            start_at (int, optional): Index of first prioritized template to
                use. (default: {-1})
            end_at (int, optional): Index of prioritized template to stop
                before. (default: {-1})
            singleonly (bool, optional): Whether to reduce each product to the
                largest (longest) one. (default: {True})
            stop_if (bool or string, optional): SMILES string of molecule to
                stop at if found, or False for no target. (default: {False})
            template_count (int, optional): Maximum number of templates to use.
                (default: {10000})
            max_cum_prob (float, optional): Maximum cumulative probability of
                all templates used. (default: {1.0})
        """
        self.start_at = start_at
        self.singleonly = singleonly
        self.stop_if = stop_if

        if end_at == -1 or end_at >= len(self.templates):
            self.end_at = len(self.templates)
        else:
            self.end_at = end_at
         # Define mol to operate on
        mol = Chem.MolFromSmiles(smiles)
        clean_reactant_mapping(mol)
        [a.SetIsotope(i+1) for (i, a) in enumerate(mol.GetAtoms())]
        reactants_smiles = Chem.MolToSmiles(mol)
        smiles = Chem.MolToSmiles(
            mol, isomericSmiles=USE_STEREOCHEMISTRY)  # to canonicalize
        # Initialize results object
        result = []
        for i in range(self.start_at, self.end_at):
            # only use templates between the specified boundaries.
            template = self.templates[i]
            if not self.load_all:
                template = self.doc_to_template(template, retro=False)
            products = self.apply_one_template(
                mol, smiles, template, 
                singleonly=singleonly, stop_if=stop_if
            )
            for product in products:
                result.append({
                    'smiles_list': product.smiles_list,
                    'smiles': product.smiles,
                    'edits': product.edits,
                    'template_ids': product.template_ids,
                    'num_examples': product.num_examples
                })

        return (smiles, result)
Пример #2
0
    def evaluate(self, reactants_smiles, contexts, **kwargs):
        self.reset()
        self.nproc = kwargs.pop('nproc', 1)
        batch_size = kwargs.pop('batch_size', 250)
        if not self.celery:
            for i in range(self.nproc):
                self.idle.append(True)
                self.expansion_queue = Queue()

        mol = Chem.MolFromSmiles(reactants_smiles)
        if mol is None: 
            MyLogger.print_and_log('Reactants smiles not parsible: {}'.format(
                    reactants_smiles), template_nn_scorer_loc, level=1)
            return [[{
                        'rank': 1,
                        'outcome': '',
                        'score': 0,
                        'prob': 0,
                        }]]

        clean_reactant_mapping(mol)

        reactants_smiles = Chem.MolToSmiles(mol)
        if self.celery:
            from celery.result import allow_join_result
        else:
            from makeit.utilities.with_dummy import with_dummy as allow_join_result
        with allow_join_result():
            self.template_prioritization = kwargs.pop('template_prioritization', gc.popularity)
            self.prepare()
            self.initialize(reactants_smiles, batch_size)
            (all_results, candidate_edits) = self.get_candidate_edits(reactants_smiles)
            reactants = Chem.MolFromSmiles(reactants_smiles)
            atom_desc_dict = edits_to_vectors(
                [], reactants, return_atom_desc_dict=True)
            candidate_tensor = edits_to_tensor(
                candidate_edits, reactants, atom_desc_dict)

            if not candidate_tensor:
                return [[{
                        'rank': 1,
                        'outcome': '',
                        'score': 0,
                        'prob': 0,
                        }]]

            all_outcomes = []
            for context in contexts:
                if context == []:
                    all_outcomes.append({'rank': 0.0,
                                         'outcome': None,
                                         'score': 0.0,
                                         'prob': 0.0,
                                         })
                    continue
                # prediction_context = context_cleaner.clean_context(context) ## move this step to tree evaluator
                prediction_context = context
                context_tensor = context_cleaner.context_to_edit(
                    prediction_context, self.solvent_name_to_smiles, self.solvent_smiles_to_params)
                if not context_tensor: 
                    all_outcomes.append({'rank': 0.0,
                                         'outcome': None,
                                         'score': 0.0,
                                         'prob': 0.0,
                                         })
                    continue
                scores = self.model.predict(candidate_tensor + context_tensor)
                probs = scores

                if kwargs.pop('soft_max', True):
                    probs = softmax(scores)

                this_outcome = sorted(zip(all_results, scores[0], probs[
                                      0]), key=lambda x: x[2], reverse=True)

                # Convert to outcome dict, canonicalizing by SMILES
                outcome_dict = {}
                for i, outcome in enumerate(this_outcome):
                    try:
                        outcome_smiles = outcome[0].smiles
                    except AttributeError:
                        outcome_smiles = outcome[0]['smiles']
                    if outcome_smiles not in outcome_dict:
                        outcome_dict[outcome_smiles] = {
                            'rank': i + 1,
                            'outcome': outcome[0],
                            'score': float(outcome[1]),
                            'prob': float(outcome[2]),
                        }
                    else: # just add probability
                        outcome_dict[outcome_smiles]['prob'] += float(outcome[2])

                all_outcomes.append(sorted(list(outcome_dict.values()), key=lambda x: x['prob'], reverse=True))

            return all_outcomes
Пример #3
0
    def get_outcomes(self,
                     smiles,
                     mincount,
                     template_prioritization,
                     start_at=-1,
                     end_at=-1,
                     singleonly=True,
                     stop_if=False,
                     template_count=10000,
                     max_cum_prob=1.0):
        '''
        Each candidate in self.result.products is of type ForwardProduct
        '''
        self.get_template_prioritizers(template_prioritization)
        # Get sorted by popularity during loading.
        if template_prioritization == gc.popularity:
            prioritized_templates = self.templates
        else:
            self.template_prioritizer.set_max_templates(template_count)
            self.template_prioritizer.set_max_cum_prob(max_cum_prob)
            prioritized_templates = self.template_prioritizer.get_priority(
                (self.templates, smiles))
        self.mincount = mincount
        self.start_at = start_at
        self.singleonly = singleonly
        self.stop_if = stop_if

        if end_at == -1 or end_at >= len(self.templates):
            self.end_at = len(self.templates)
        else:
            self.end_at = end_at
        # Define mol to operate on
        mol = Chem.MolFromSmiles(smiles)
        clean_reactant_mapping(mol)
        reactants_smiles = Chem.MolToSmiles(mol)
        smiles = Chem.MolToSmiles(
            mol, isomericSmiles=USE_STEREOCHEMISTRY)  # to canonicalize
        # Initialize results object
        if self.celery:
            result = []
        else:
            result = ForwardResult(smiles)
        for i in range(self.start_at, self.end_at):

            # only use templates between the specified boundaries.
            template = prioritized_templates[i]
            if template['count'] > mincount:
                products = self.apply_one_template(mol,
                                                   smiles,
                                                   template,
                                                   singleonly=singleonly,
                                                   stop_if=stop_if)
                if self.celery:
                    for product in products:
                        result.append({
                            'smiles_list': product.smiles_list,
                            'smiles': product.smiles,
                            'edits': product.edits,
                            'template_ids': product.template_ids,
                            'num_examples': product.num_examples
                        })
                else:
                    result.add_products(products)

        return (smiles, result)