示例#1
0
def _to_sliced_mol(row):
    scaffold_smi, decorations, _ = row.split("\t")
    decoration_smis = decorations.split(";")
    return usc.SlicedMol(
        uc.to_mol(scaffold_smi),
        {i: uc.to_mol(dec)
         for i, dec in enumerate(decoration_smis)})
示例#2
0
def add_attachment_point_numbers(mol_or_smi, canonicalize=True):
    """
    Adds the numbers for the attachment points throughout the molecule.
    :param mol_or_smi: SMILES string to convert.
    :param canonicalize: Canonicalize the SMILES so that the attachment points are always in the same order.
    :return : A converted SMILES string.
    """
    if isinstance(mol_or_smi, str):
        smi = mol_or_smi
        if canonicalize:
            smi = to_smiles(uc.to_mol(mol_or_smi))
        # only add numbers ordered by the SMILES ordering
        num = -1

        def _ap_callback(_):
            nonlocal num
            num += 1
            return "[{}:{}]".format(ATTACHMENT_POINT_TOKEN, num)

        return re.sub(ATTACHMENT_POINT_REGEXP, _ap_callback, smi)
    else:
        mol = mol_or_smi
        if canonicalize:
            mol = uc.to_mol(to_smiles(mol))
        idx = 0
        for atom in mol.GetAtoms():
            if atom.GetSymbol() == ATTACHMENT_POINT_TOKEN:
                atom.SetProp("molAtomMapNumber", str(idx))
                idx += 1
        return to_smiles(mol)
def join(scaffold_smi, decoration_smi, keep_label_on_atoms=False):
    """
    Joins a SMILES scaffold with a decoration. They must be labelled.
    :param scaffold_smi: SMILES of the scaffold.
    :param decoration_smi: SMILES of the decoration.
    :param keep_label_on_atoms: Add the labels to the atoms after attaching the molecule.
    This is useful when debugging, but it can give problems.
    :return: A Mol object of the joined scaffold.
    """
    scaffold = uc.to_mol(scaffold_smi)
    decoration = uc.to_mol(decoration_smi)

    if scaffold and decoration:
        # obtain id in the decoration
        try:
            attachment_points = [atom.GetProp("molAtomMapNumber") for atom in decoration.GetAtoms()
                                 if atom.GetSymbol() == ATTACHMENT_POINT_TOKEN]
            if len(attachment_points) != 1:
                return None  # more than one attachment point...
            attachment_point = attachment_points[0]
        except KeyError:
            return None

        combined_scaffold = rkc.RWMol(rkc.CombineMols(decoration, scaffold))
        attachments = [atom for atom in combined_scaffold.GetAtoms()
                       if atom.GetSymbol() == ATTACHMENT_POINT_TOKEN and
                       atom.HasProp("molAtomMapNumber") and atom.GetProp("molAtomMapNumber") == attachment_point]
        if len(attachments) != 2:
            return None  # something weird

        neighbors = []
        for atom in attachments:
            if atom.GetDegree() != 1:
                return None  # the attachment is wrongly generated
            neighbors.append(atom.GetNeighbors()[0])

        bonds = [atom.GetBonds()[0] for atom in attachments]
        bond_type = rkc.BondType.SINGLE
        if any(bond for bond in bonds if bond.GetBondType() == rkc.BondType.DOUBLE):
            bond_type = rkc.BondType.DOUBLE

        combined_scaffold.AddBond(neighbors[0].GetIdx(), neighbors[1].GetIdx(), bond_type)
        combined_scaffold.RemoveAtom(attachments[0].GetIdx())
        combined_scaffold.RemoveAtom(attachments[1].GetIdx())

        if keep_label_on_atoms:
            for neigh in neighbors:
                _add_attachment_point_num(neigh, attachment_point)

        scaffold = combined_scaffold.GetMol()
        try:
            rkc.SanitizeMol(scaffold)
        except ValueError:  # sanitization error
            return None
    else:
        return None

    return scaffold
 def _generate_randomized_not_repeated(
         smi,
         num_rand=self.num_randomized_smiles,
         max_rand=self.max_randomized_smiles_sample):
     mol = uc.to_mol(smi)
     randomized_scaffolds = set()
     for _ in range(max_rand):
         randomized_scaffolds.add(usc.to_smiles(mol, variant="random"))
         if len(randomized_scaffolds) == num_rand:
             break
     return list(randomized_scaffolds)
def join_joined_attachments(scaffold_smi, decorations_smi):
    decorations_smi = [add_first_attachment_point_number(dec, i)
                       for i, dec in enumerate(decorations_smi.split(ATTACHMENT_SEPARATOR_TOKEN))]
    scaffold_smi = add_attachment_point_numbers(scaffold_smi)
    num_attachment_points = len(get_attachment_points(scaffold_smi))
    if len(decorations_smi) != num_attachment_points:
        return None

    mol = uc.to_mol(scaffold_smi)
    for dec in decorations_smi:
        mol = join(to_smiles(mol), dec)
        if not mol:
            return None
    return mol
示例#6
0
 def enumerate(row: ps.Row, enumerator: FragmentReactionSliceEnumerator, max_cuts: int) -> List[ps.Row]:
     attachments = AttachmentPoints()
     fields = row.split("\t")
     smiles = fields[0]
     mol = uc.to_mol(smiles)
     out_rows = []
     if mol:
         for sliced_mol in enumerator.enumerate(mol, cuts=max_cuts):
             row_dict = {
                 DataframeColumnsEnum.SCAFFOLDS:
                     attachments.remove_attachment_point_numbers(sliced_mol.scaffold_smiles),
                 DataframeColumnsEnum.DECORATIONS: sliced_mol.decorations_smiles,
                 DataframeColumnsEnum.ORIGINAL: sliced_mol.original_smiles,
                 DataframeColumnsEnum.MAX_CUTS: max_cuts}
             out_rows.append(ps.Row(**row_dict))
     return out_rows
示例#7
0
 def collect_failures(
         self, row: ps.Row,
         enumerator: FailingReactionsEnumerator) -> List[ps.Row]:
     fields = row.split("\t")
     smiles = fields[0]
     mol = uc.to_mol(smiles)
     out_rows = []
     if mol:
         for failed_reaction in enumerator.enumerate(
                 mol, failures_limit=self.configuration.failures_limit):
             row_dict = {
                 self._columns.REACTION: failed_reaction.reaction_smirks,
                 self._columns.ORIGINAL: failed_reaction.molecule_smiles
             }
             print("found failed reaction")
             out_rows.append(ps.Row(**row_dict))
             if self.configuration.failures_limit <= len(out_rows):
                 break
     return out_rows
 def _enumerate(row,
                max_cuts=self.max_cuts,
                enumerator=self.enumerator):
     fields = row.split("\t")
     smiles = fields[0]
     mol = uc.to_mol(smiles)
     out_rows = []
     if mol:
         for cuts in range(1, max_cuts + 1):
             for sliced_mol in enumerator.enumerate(mol, cuts=cuts):
                 # normalize scaffold and decorations
                 scaff_smi, dec_smis = sliced_mol.to_smiles()
                 dec_smis = [
                     smi for num, smi in sorted(dec_smis.items())
                 ]
                 out_rows.append(
                     ps.Row(scaffold=scaff_smi,
                            decorations=dec_smis,
                            smiles=uc.to_smiles(mol),
                            cuts=cuts))
     return out_rows
    def __init__(self,
                 model_path,
                 log_path,
                 validation_set_path,
                 training_set_path,
                 epoch,
                 sample_size=10000,
                 summary_writer=None,
                 with_weights=False,
                 smiles_type="smiles"):
        """
        Creates a CollectStatsFromModelRunner.
        :param model_path: The input model path.
        :return:
        """
        self._validation_set_path = validation_set_path
        self._training_set_path = training_set_path
        self._log_path = log_path
        self._with_weights = with_weights

        self._model = mm.Model.load_from_file(model_path, sampling_mode=True)
        self._epoch = epoch
        self._sample_size = max(sample_size, 1)

        # optionally reuse summary writer to prevent device errors
        if summary_writer:
            self.summary_writer = summary_writer
        else:
            self.summary_writer = tbx.SummaryWriter(log_dir=self._log_path)
        self.data = {}

        if smiles_type.startswith("deepsmiles"):
            _, deepsmiles_type = smiles_type.split(".")
            self._to_mol_func = lambda deepsmi: uc.to_mol(
                uc.from_deepsmiles(deepsmi, converter=deepsmiles_type))
        else:
            self._to_mol_func = uc.to_mol
 def _generate_randomized_repeated(smi,
                                   num_rand=self.num_randomized_smiles):
     mol = uc.to_mol(smi)
     return [
         usc.to_smiles(mol, variant="random") for _ in range(num_rand)
     ]
 def _format_attachment_point(smi, num):
     smi = usc.add_first_attachment_point_number(smi, num)
     return usc.to_smiles(uc.to_mol(smi))  # canonicalize
def _cleanup_decoration(dec_smi):
    dec_mol = uc.to_mol(dec_smi)
    if not dec_mol:
        return None
    return usc.to_smiles(usc.remove_attachment_point_numbers(dec_mol))