Пример #1
0
def get_atom_count(mol: rdkit.Chem.rdchem.Mol,
                   radical_check: bool = False) -> collections.Counter:
    """Takes a mol object and returns a counter with each element type in the set.

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
        Mol object to count atoms for.
    radical_check : bool, optional
        Check for radical electrons and count if present.

    Returns
    -------
    atoms : collections.Counter
        Count of each atom type in input molecule.
    """
    atoms = collections.Counter()
    # Find all strings of the form A# in the molecular formula where A
    # is the element (e.g. C) and # is the number of atoms of that
    # element in the molecule. Pair is of form [A, #]
    for pair in re.findall(r"([A-Z][a-z]*)(\d*)", AllChem.CalcMolFormula(mol)):
        # Add # to atom count, unless there is no # (in which case
        # there is just one of that element, as ones are implicit in
        # chemical formulas)
        if pair[1]:
            atoms[pair[0]] += int(pair[1])
        else:
            atoms[pair[0]] += 1
    if radical_check:
        radical = any(
            [atom.GetNumRadicalElectrons() for atom in mol.GetAtoms()])
        if radical:
            atoms["*"] += 1
    return atoms
Пример #2
0
 def set_computable(self):
     mol = tool_chemical.read_string("mol", self._mol)
     # molecular_formula = Descriptors.rdMolDescriptors.CalcMolFormula(mol)
     # molecular_weight = Descriptors.ExactMolWt(mol)
     self._smiles = Chem.MolToSmiles(mol, isomericSmiles=False)
     self._inchi = inchi.MolToInchi(mol)
     self._inchikey = inchi.MolToInchiKey(mol)
     self._molecular_formula = Chem.CalcMolFormula(mol)
     self._molecular_weight = Chem.CalcExactMolWt(mol)
Пример #3
0
    def insert_core_compound(self, compound_dict, requests=None):
        """This method generates a mongo request to save a compound into the core database.
        The necessary fields for the API are calculated.
        If a list of requests are given the request is appended for later bulk writing.
        Otherwise a single entry is made. If a compound is already in the core database
        nothing is written.

        :param compound_dict: Compound Dictionary
        :type compound_dict: dict
        :param requests: List of requests for bulk insert
        :type requests: None
        """
        core_dict = copy(compound_dict)
        cpd_id = core_dict['_id']
        mol_object = AllChem.MolFromSmiles(core_dict['SMILES'])

        if 'Generation' in core_dict:
            del (core_dict['Generation'])
        if 'Expand' in core_dict:
            del (core_dict['Expand'])
        if 'Type' in core_dict:
            del (core_dict['Type'])
        if 'Product_of' in core_dict:
            del (core_dict['Product_of'])
        if 'Reactant_in' in core_dict:
            del (core_dict['Reactant_in'])
        # Store all different representations of the molecule (SMILES, Formula,
        #  InChI key, etc.) as well as its properties in a dictionary
        if not 'SMILES' in core_dict:
            core_dict['SMILES'] = AllChem.MolToSmiles(mol_object, True)
        if not 'Inchi' in core_dict:
            core_dict['Inchi'] = AllChem.MolToInchi(mol_object)
        if not 'Inchikey' in core_dict:
            core_dict['Inchikey'] = AllChem.InchiToInchiKey(core_dict['Inchi'])
        core_dict['Mass'] = AllChem.CalcExactMolWt(mol_object)
        core_dict['Formula'] = AllChem.CalcMolFormula(mol_object)
        core_dict['logP'] = AllChem.CalcCrippenDescriptors(mol_object)[0]
        core_dict['NP_likeness'] = nps.scoreMol(mol_object, self.nps_model)
        core_dict['Spectra'] = {}
        # Record which expansion it's coming from
        core_dict['MINES'] = []

        if requests != None:
            requests.append(
                pymongo.UpdateOne({'_id': cpd_id}, {'$setOnInsert': core_dict},
                                  upsert=True))
        else:
            self.core_compounds.update_one({'_id': cpd_id},
                                           {'$setOnInsert': core_dict},
                                           upsert=True)

        return None
    def test_api_addMolecule(self):
        response = self.client.post(path="/api/addMolecule", data={"molfile": self.propane})
        self.assertEqual(response.status_code, 200)

        mol = AllChem.MolFromMolBlock(self.propane)
        mol_added = Molecule.objects.last()

        self.assertEqual(float("{0:.2f}".format(AllChem.CalcExactMolWt(mol))), mol_added.mw)
        self.assertEqual(AllChem.MolToSmiles(mol), mol_added.smiles)
        self.assertEqual(AllChem.CalcMolFormula(mol), mol_added.sum_formula)
        inchi = AllChem.MolToInchi(mol)
        self.assertEqual(inchi, mol_added.inchi)
        self.assertEqual(AllChem.InchiToInchiKey(inchi), mol_added.inchi_key)
def _make_compound_info(mol_object):
    return {
        'smiles': AllChem.MolToSmiles(mol_object, True),
        'inchikey': AllChem.InchiToInchiKey(AllChem.MolToInchi(mol_object)),
        'mass': Descriptors.MolWt(mol_object),
        'exactmass': AllChem.CalcExactMolWt(mol_object),
        'formula': AllChem.CalcMolFormula(mol_object),
        'charge': AllChem.GetFormalCharge(mol_object),
        'fingerprints': {
            'maccs': dict([(str(x), 1) for x in AllChem.GetMACCSKeysFingerprint(mol_object).GetOnBits()]),
            'rdkit': dict([(str(x), 1) for x in AllChem.RDKFingerprint(mol_object).GetOnBits()]),
        },
        'dblinks': {},
    }
Пример #6
0
 def testMolFormula(self):
   for (smiles, expected) in (("[NH4+]", "H4N+"),
                              ("c1ccccc1", "C6H6"),
                              ("C1CCCCC1", "C6H12"),
                              ("c1ccccc1O", "C6H6O"),
                              ("C1CCCCC1O", "C6H12O"),
                              ("C1CCCCC1=O", "C6H10O"),
                              ("N[Na]", "H2NNa"),
                              ("[C-][C-]", "C2-2"),
                              ("[H]", "H"),
                              ("[H-1]", "H-"),
                              ("[H-1]", "H-"),
                              ("[CH2]", "CH2"),
                              ("[He-2]", "He-2"),
                              ("[U+3]", "U+3"),):
     mol = Chem.MolFromSmiles(smiles)
     actual = AllChem.CalcMolFormula(mol)
     self.assertEqual(actual, expected)
Пример #7
0
def sdf_parser(soubor):
    mol_counter = 0
    suppl = Chem.SDMolSupplier(soubor)
    for mol in suppl:
        if mol is None: continue
        print(mol.GetNumAtoms())
        mol_counter += 1
        new_inchi = Chem.MolToInchi(mol)
        new_inchikey = Chem.InchiToInchiKey(new_inchi)
        # kontrola jestli je molekula již v databázi dle inchikey - ten by měl být unikátní
        
        if Molecule.objects.filter(inchikey=new_inchikey).exists():
            print(mol, "already exists")
        
        else:
            new_smiles = Chem.MolToSmiles(mol)
            new_summaryForm = AllChem.CalcMolFormula(mol)
            new_molweigth = AllChem.CalcExactMolWt(mol)
            
            if mol.HasProp('PUBCHEM_SUBSTANCE_SYNONYM'):
                new_name = mol.GetProp('PUBCHEM_SUBSTANCE_SYNONYM').split("\n")[0]
            
            
            newInsertedMolecule = Molecule(name=new_name, 
                                           smiles=new_smiles, 
                                           mol_weight=new_molweigth, 
                                           inchi=new_inchi, 
                                           inchikey=new_inchikey, 
                                           summary_formula=new_summaryForm)
            newInsertedMolecule.save()
        
        """
        new_name = django_form.cleaned_data['new_name']
        new_smiles = django_form.cleaned_data.get('new_smiles', '')
        new_summaryForm = django_form.cleaned_data.get('new_summaryForm', '')
        newInsertedMolecule = Molecule(name=new_name, smiles=new_smiles, summary_formula=new_summaryForm)
        newInsertedMolecule.save()
        """
        
        # ulož do databáze, naparsuj atd.
    #mols = [x for x in suppl]
    return mol_counter
Пример #8
0
def convert(structure: str, fmt: Format, get3d: bool) -> str:
    """Convenience function for conversion"""
    m_canon = rdkit_atom_order(smi_to_mol(structure))
    m_canon.SetProp("_Name", AllChem.CalcMolFormula(m_canon))

    if fmt == Format.sdf:
        print("SDF")
        if get3d:
            AllChem.EmbedMolecule(m_canon, randomSeed=0xF00D)
        return mol_to_sdf(m_canon) + f"\n> <SMILES>\n{structure}\n\n$$$$"
    if fmt == Format.smiles:
        print("SMILES")
        return mol_to_smi(m_canon)
    if fmt == Format.inchi:
        print("InChI")
        return mol_to_inchi(m_canon)
    if fmt == Format.svg:
        print("SVG")
        return mol_to_svg(m_canon)
    return "Broken"
Пример #9
0
 def parse_comps(field):
     atoms = collections.Counter()
     compounds = collections.Counter(field.split(' // '))
     half_rxn = []
     for comp, stoich in compounds.items():
         if comp in metacyc2hash:
             mol = metacyc2hash[comp]
             for pair in re.findall('([A-Z][a-z]*)(\d*)',
                                    AllChem.CalcMolFormula(mol)):
                 if pair[1]:
                     atoms[pair[0]] += int(pair[1]) * stoich
                 else:
                     atoms[pair[0]] += 1 * stoich
             if comp not in inserted:
                 mine_db.insert_compound(mol, {'Generation': 0})
                 inserted.add(comp)
             half_rxn.append(
                 utils.stoich_tuple(stoich, utils.get_compound_hash(mol)))
         else:
             raise ValueError('Undefined Compound: %s' % comp)
     return half_rxn, atoms
Пример #10
0
 def _add_compound(self, id, smi, mol=None, type='Predicted'):
     """Adds a compound to the internal compound dictionary"""
     _id = utils.compound_hash(smi, type == 'Coreactant')
     self._raw_compounds[smi] = _id
     # We don't want to overwrite the same compound from a prior
     # generation so we check with hashed id from above
     if _id not in self.compounds:
         if not mol:
             mol = AllChem.MolFromSmiles(smi)
         i_key = AllChem.InchiToInchiKey(AllChem.MolToInchi(mol))
         self.compounds[_id] = {
             'ID': id,
             '_id': _id,
             "SMILES": smi,
             'Inchikey': i_key,
             'Type': type,
             'Generation': self.generation,
             'Formula': AllChem.CalcMolFormula(mol),
             '_atom_count': self._get_atom_count(mol),
             'Charge': AllChem.GetFormalCharge(mol),
             'Reactant_in': [],
             'Product_of': [],
             "Sources": []
         }
         # Don't track sources of coreactants
         if _id[0] == 'X':
             del self.compounds[_id]['Sources']
         # If we are building a mine and generating images, do so here
         if self.image_dir and self.mine:
             try:
                 with open(os.path.join(self.image_dir, _id + '.svg'),
                           'w') as outfile:
                     nmol = rdMolDraw2D.PrepareMolForDrawing(mol)
                     d2d = rdMolDraw2D.MolDraw2DSVG(1000, 1000)
                     d2d.DrawMolecule(nmol)
                     d2d.FinishDrawing()
                     outfile.write(d2d.GetDrawingText())
             except OSError:
                 print("Unable to generate image for %s" % smi)
     return _id
Пример #11
0
 def _get_atom_count(self, mol):
     """Takes a set of mol objects and returns a counter with each element
     type in the set"""
     atoms = collections.Counter()
     # Find all strings of the form A# in the molecular formula where A
     # is the element (e.g. C) and # is the number of atoms of that
     # element in the molecule. Pair is of form [A, #]
     for pair in re.findall('([A-Z][a-z]*)(\d*)',
                            AllChem.CalcMolFormula(mol)):
         # Add # to atom count, unless there is no # (in which case
         # there is just one of that element, as ones are implicit in
         # chemical formulas)
         if pair[1]:
             atoms[pair[0]] += int(pair[1])
         else:
             atoms[pair[0]] += 1
     if self.radical_check:
         radical = any(
             [atom.GetNumRadicalElectrons() for atom in mol.GetAtoms()])
         if radical:
             atoms["*"] += 1
     return atoms
Пример #12
0
def _get_core_cpd_insert(cpd_dict: dict) -> pymongo.UpdateOne:
    """Generate core compound to be inserted"""
    core_keys = ["_id", "SMILES", "Inchi", "InchiKey", "Mass", "Formula"]
    core_dict = {
        key: cpd_dict.get(key)
        for key in core_keys if cpd_dict.get(key) != None
    }

    mol_object = AllChem.MolFromSmiles(core_dict["SMILES"])
    rdk_fp = [
        i for i, val in enumerate(
            list(AllChem.RDKFingerprint(mol_object, fpSize=512))) if val
    ]

    # Store all different representations of the molecule (SMILES, Formula,
    #  InChI key, etc.) as well as its properties in a dictionary
    if not "SMILES" in core_dict:
        core_dict["SMILES"] = AllChem.MolToSmiles(mol_object, True)
    if not "Inchi" in core_dict:
        core_dict["Inchi"] = AllChem.MolToInchi(mol_object)
    if not "Inchikey" in core_dict:
        core_dict["Inchikey"] = AllChem.InchiToInchiKey(core_dict["Inchi"])

    core_dict["Mass"] = AllChem.CalcExactMolWt(mol_object)
    core_dict["Charge"] = AllChem.GetFormalCharge(mol_object)
    core_dict["Formula"] = AllChem.CalcMolFormula(mol_object)
    core_dict["logP"] = AllChem.CalcCrippenDescriptors(mol_object)[0]
    core_dict["RDKit_fp"] = rdk_fp
    core_dict["len_RDKit_fp"] = len(rdk_fp)
    # core_dict['NP_likeness'] = nps.scoreMol(mol_object, nps_model)
    core_dict["Spectra"] = {}
    # Record which expansion it's coming from
    core_dict["MINES"] = []

    return pymongo.UpdateOne({"_id": core_dict["_id"]},
                             {"$setOnInsert": core_dict},
                             upsert=True)
Пример #13
0
    def insert_compound(self,
                        mol_object,
                        compound_dict=None,
                        bulk=None,
                        kegg_db="KEGG",
                        pubchem_db='PubChem-8-28-2015',
                        modelseed_db='ModelSEED'):
        """This class saves a RDKit Molecule as a compound entry in the MINE.
        Calculates necessary fields for API and includes additional
        information passed in the compound dict. Overwrites preexisting
        compounds in MINE on _id collision.
        
        :param mol_object: The compound to be stored
        :type mol_object: RDKit Mol object
        :param compound_dict: Additional information about the compound to be
            stored. Overwritten by calculated values.
        :type compound_dict: dict
        :param bulk: A pymongo bulk operation object. If None, reaction is
         immediately inserted in the database
        :param kegg_db: The ID of the KEGG Mongo database
        :type kegg_db: str
        :param pubchem_db: The ID of the PubChem Mongo database
        :type pubchem_db: str
        :param modelseed_db: The ID of the ModelSEED Mongo database
        :type modelseed_db: str
        :return: The hashed _id of the compound
        :rtype: str
        """

        if compound_dict is None:
            compound_dict = {}

        # Store all different representations of the molecule (SMILES, Formula,
        #  InChI key, etc.) as well as its properties in a dictionary
        compound_dict['SMILES'] = AllChem.MolToSmiles(mol_object, True)
        compound_dict['Inchi'] = AllChem.MolToInchi(mol_object)
        compound_dict['Inchikey'] = AllChem.InchiToInchiKey(
            compound_dict['Inchi'])
        compound_dict['Mass'] = AllChem.CalcExactMolWt(mol_object)
        compound_dict['Formula'] = AllChem.CalcMolFormula(mol_object)
        compound_dict['Charge'] = AllChem.GetFormalCharge(mol_object)
        # Get indices where bits are 1
        compound_dict['MACCS'] = list(
            AllChem.GetMACCSKeysFingerprint(mol_object).GetOnBits())
        compound_dict['len_MACCS'] = len(compound_dict['MACCS'])
        # Get indices where bits are 1
        compound_dict['RDKit'] = list(
            AllChem.RDKFingerprint(mol_object).GetOnBits())
        compound_dict['len_RDKit'] = len(compound_dict['RDKit'])
        compound_dict['logP'] = AllChem.CalcCrippenDescriptors(mol_object)[0]
        compound_dict['_id'] = utils.compound_hash(
            compound_dict['SMILES'],
            ('Type' in compound_dict
             and compound_dict['Type'] == 'Coreactant'))
        if '_atom_count' in compound_dict:
            del compound_dict['_atom_count']
        # Caching this for rapid reaction mass change calculation
        self._mass_cache[compound_dict['_id']] = compound_dict['Mass']

        # If the compound is a reactant, then make sure the reactant name is
        # in a correct format.
        if "Reactant_in" in compound_dict and isinstance(
                compound_dict['Reactant_in'], str) \
                and compound_dict['Reactant_in']:
            compound_dict['Reactant_in'] = ast.literal_eval(
                compound_dict['Reactant_in'])
        # If the compound is a product, then make sure the reactant name is
        # in a correct format.
        if "Product_of" in compound_dict \
                and isinstance(compound_dict['Product_of'], str) \
                and compound_dict['Product_of']:
            compound_dict['Product_of'] = ast.literal_eval(
                compound_dict['Product_of'])

        # Store links to external databases where compound is present
        if compound_dict['Inchikey']:
            if kegg_db:
                compound_dict = self.link_to_external_database(
                    kegg_db,
                    compound=compound_dict,
                    fields_to_copy=[('Pathways', 'Pathways'),
                                    ('Names', 'Names'),
                                    ('DB_links', 'DB_links'),
                                    ('Enzymes', 'Enzymes')])

            if pubchem_db:
                compound_dict = self.link_to_external_database(
                    pubchem_db,
                    compound=compound_dict,
                    fields_to_copy=[('COMPOUND_CID', 'DB_links.PubChem')])

            if modelseed_db:
                compound_dict = self.link_to_external_database(
                    modelseed_db,
                    compound=compound_dict,
                    fields_to_copy=[('DB_links', 'DB_links')])

        # Calculate natural product likeness score and store in dict
        if not self.np_model:
            self.np_model = np.readNPModel()
        compound_dict["NP_likeness"] = np.scoreMol(mol_object, self.np_model)

        compound_dict = utils.convert_sets_to_lists(compound_dict)
        # Assign an id to the compound
        if self.id_db:
            mine_comp = self.id_db.compounds.find_one(
                {"Inchikey": compound_dict['Inchikey']}, {
                    'MINE_id': 1,
                    "Pos_CFM_spectra": 1,
                    "Neg_CFM_spectra": 1
                })
            # If compound already exists in MINE, store its MINE id in the dict
            if mine_comp:
                compound_dict['MINE_id'] = mine_comp['MINE_id']
                if 'Pos_CFM_spectra' in mine_comp:
                    compound_dict['Pos_CFM_spectra'] = mine_comp[
                        'Pos_CFM_spectra']
                if 'Neg_CFM_spectra' in mine_comp:
                    compound_dict['Neg_CFM_spectra'] = mine_comp[
                        'Neg_CFM_spectra']
            # If compound does not exist, create new id based on number of
            # current ids in the MINE
            else:
                compound_dict['MINE_id'] = self.id_db.compounds.count()
                self.id_db.compounds.save(compound_dict)

        # If bulk insertion, upsert (insert and update) the database
        if bulk:
            bulk.find({'_id': compound_dict['_id']}).upsert().\
                replace_one(compound_dict)
        else:
            self.compounds.save(compound_dict)
        return compound_dict['_id']
Пример #14
0
def get_molecular_formula(mol):
    """Make String of molecular formula from rdkit.Mol"""
    return AllChem.CalcMolFormula(mol)
Пример #15
0
    def save(self,
             smiles=None,
             molfile=None,
             rdmol=None,
             inchi=None,
             name=None,
             update=False,
             *args,
             **kwargs):
        if not update:
            if molfile:
                mol = AllChem.MolFromMolBlock(molfile)
            elif smiles:
                mol = AllChem.MolFromSmiles(smiles)
            elif rdmol:
                mol = rdmol
            elif inchi:
                mol = AllChem.MolFromInchi(inchi)

            if mol:
                inchi = AllChem.MolToInchi(mol)
                smiles = AllChem.MolToSmiles(mol)

                if inchi and Molecule.objects.filter(
                        inchi=inchi).count() == 0 and len(inchi) > 1:
                    self.inchi = inchi

                    self.mw = float("{0:.2f}".format(
                        AllChem.CalcExactMolWt(mol)))
                    self.sum_formula = AllChem.CalcMolFormula(mol)
                    self.fingerprint = AllChem.GetMorganFingerprintAsBitVect(
                        mol, 4, nBits=1024).ToBitString()
                    self.inchi_key = AllChem.InchiToInchiKey(self.inchi)
                    self.molfile = AllChem.MolToMolBlock(mol)
                    self.smiles = smiles
                    self.rdmol = mol

                    # generating SVG image
                    if self.smiles not in self.EXCLUDED_MOLECULES:
                        binMol = AllChem.Mol(self.rdmol.ToBinary())

                        if not binMol.GetNumConformers():
                            rdDepictor.Compute2DCoords(self.rdmol)

                        drawer = rdMolDraw2D.MolDraw2DSVG(100, 100)
                        drawer.DrawMolecule(self.rdmol)
                        drawer.FinishDrawing()
                        svg = drawer.GetDrawingText().replace('svg:', '')

                        # remove first line containg XML meta information
                        self.image_svg = "\n".join(svg.split("\n")[1:]).strip()
                    else:
                        self.image_svg = None

                    if name:
                        self.name = name
                    else:
                        try:
                            self.name = mol.GetProp("LONGNAME")
                        except KeyError:
                            self.name = None

                    if Molecule.objects.all().count() == 0:
                        self.internal_id = "MI-J-1"
                    else:
                        self.internal_id = "MI-J-{}".format(
                            Molecule.objects.latest("id").id + 1)

                    super(Molecule, self).save(*args, **kwargs)
                else:
                    raise self.MoleculeExistsInDatabase(smiles)
            else:
                raise self.MoleculeCreationError
        else:
            super(Molecule, self).save(*args, **kwargs)
                            if (mol):
                                mol_source = "OpenBabel"
                        else:
                            mol_source = "RDKit"
                except Exception as e:
                    pass

                if (mol is None):
                    unresolved_structures.write(external_id + "\t" +
                                                struct_stage + "\t" +
                                                structure + "\n")
                    continue

                new_formula = ""
                if (mol_source == "RDKit"):
                    new_formula = AllChem.CalcMolFormula(mol)
                elif (mol_source == "OpenBabel"):
                    new_formula = mol.formula

                new_charge = 0
                if (mol_source == "RDKit"):
                    new_charge = AllChem.GetFormalCharge(mol)
                    match = re.search('([-+]\d?)$', new_formula)
                    if (match):
                        new_formula = new_formula.replace(match.group(), '')
                elif (mol_source == "OpenBabel"):
                    new_charge = mol.charge
                    match = re.search('([-+]+)$', new_formula)
                    if (match):
                        new_formula = new_formula.replace(match.group(), '')
Пример #17
0
def pipe_calc_props(stream,
                    props,
                    force2d=False,
                    summary=None,
                    comp_id="pipe_calc_props"):
    """Calculate properties from the Mol_List.
    props can be a single property or a list of properties.

    Calculable properties:
        2d, date, formula, hba, hbd, logp, molid, mw, smiles, rotb, sa (synthetic accessibility), tpsa

    Synthetic Accessibility (normalized):
        0: hard to synthesize; 1: easy access

        as described in:
            | Estimation of Synthetic Accessibility Score of Drug-like Molecules based on Molecular Complexity and Fragment Contributions
            | *Peter Ertl and Ansgar Schuffenhauer*
            | Journal of Cheminformatics 1:8 (2009) (`link <http://www.jcheminf.com/content/1/1/8>`_)
    """

    rec_counter = 0
    if not isinstance(props, list):
        props = [props]

    # make all props lower-case:
    props = list(map(lambda x: x.lower(), props))

    for rec in stream:
        if "mol" in rec:
            mol = rec["mol"]
            if "2d" in props:
                check_2d_coords(mol, force2d)

            if "date" in props:
                rec["Date"] = time.strftime("%Y%m%d")

            if "formula" in props:
                rec["Formula"] = Chem.CalcMolFormula(mol)

            if "hba" in props:
                rec["HBA"] = str(Desc.NOCount(mol))

            if "hbd" in props:
                rec["HBD"] = str(Desc.NHOHCount(mol))

            if "logp" in props:
                rec["LogP"] = "{:.2f}".format(Desc.MolLogP(mol))

            if "mw" in props:
                rec["MW"] = "{:.2f}".format(Desc.MolWt(mol))

            if "rotb" in props:
                mol.SetProp("RotB", str(Desc.NumRotatableBonds(mol)))

            if "smiles" in props:
                mol.SetProp("Smiles", Chem.MolToSmiles(mol))

            if SASCORER and "sa" in props:
                score = sascorer.calculateScore(mol)
                norm_score = 1 - (score / 10)
                rec["SA"] = "{:.2f}".format(norm_score)

            if "tpsa" in props:
                rec["TPSA"] = str(int(Desc.TPSA(mol)))

            rec_counter += 1
            if summary is not None:
                summary[comp_id] = rec_counter

            yield rec
Пример #18
0
        # Check for missing or unknown formulas.
        if cpd['formula'] == '' or cpd['formula'] == 'noformula' or cpd[
                'formula'] == 'unknown':
            noFormula.append(index)

        # Check for duplicate and missing compound structures.
        mol = AllChem.MolFromInchi(cpd['smiles'])
        if mol:
            inchikey = AllChem.InchiToInchiKey(cpd['smiles'])
            if inchikey in structureDict:
                if inchikey not in duplicateStructure:
                    duplicateStructure[inchikey] = [structureDict[inchikey]]
                duplicateStructure[inchikey].append(index)
            else:
                structureDict[inchikey] = index
            if cpd['formula'] != AllChem.CalcMolFormula(mol):
                inconsistentFormula[index] = (cpd['formula'],
                                              AllChem.CalcMolFormula(mol))
        else:
            noStructure.append(index)

        # Check for charges that are too big.
        if 'charge' in cpd:
            if abs(cpd['charge']) > args.charge:
                largeCharge.append(index)
        else:
            noCharge.append(index)

        # Check for invalid is_core flags.
        if cpd['is_core'] != 0 and cpd['is_core'] != 1:
            badCore.append(index)