Пример #1
0
    def insert_core_compound(self, compound_dict, requests=None):
        """This method generates a mongo request to save a compound into the core database.
        The necessary fields for the API are calculated.
        If a list of requests are given the request is appended for later bulk writing.
        Otherwise a single entry is made. If a compound is already in the core database
        nothing is written.

        :param compound_dict: Compound Dictionary
        :type compound_dict: dict
        :param requests: List of requests for bulk insert
        :type requests: None
        """
        core_dict = copy(compound_dict)
        cpd_id = core_dict['_id']
        mol_object = AllChem.MolFromSmiles(core_dict['SMILES'])

        if 'Generation' in core_dict:
            del (core_dict['Generation'])
        if 'Expand' in core_dict:
            del (core_dict['Expand'])
        if 'Type' in core_dict:
            del (core_dict['Type'])
        if 'Product_of' in core_dict:
            del (core_dict['Product_of'])
        if 'Reactant_in' in core_dict:
            del (core_dict['Reactant_in'])
        # Store all different representations of the molecule (SMILES, Formula,
        #  InChI key, etc.) as well as its properties in a dictionary
        if not 'SMILES' in core_dict:
            core_dict['SMILES'] = AllChem.MolToSmiles(mol_object, True)
        if not 'Inchi' in core_dict:
            core_dict['Inchi'] = AllChem.MolToInchi(mol_object)
        if not 'Inchikey' in core_dict:
            core_dict['Inchikey'] = AllChem.InchiToInchiKey(core_dict['Inchi'])
        core_dict['Mass'] = AllChem.CalcExactMolWt(mol_object)
        core_dict['Formula'] = AllChem.CalcMolFormula(mol_object)
        core_dict['logP'] = AllChem.CalcCrippenDescriptors(mol_object)[0]
        core_dict['NP_likeness'] = nps.scoreMol(mol_object, self.nps_model)
        core_dict['Spectra'] = {}
        # Record which expansion it's coming from
        core_dict['MINES'] = []

        if requests != None:
            requests.append(
                pymongo.UpdateOne({'_id': cpd_id}, {'$setOnInsert': core_dict},
                                  upsert=True))
        else:
            self.core_compounds.update_one({'_id': cpd_id},
                                           {'$setOnInsert': core_dict},
                                           upsert=True)

        return None
Пример #2
0
def _get_core_cpd_insert(cpd_dict: dict) -> pymongo.UpdateOne:
    """Generate core compound to be inserted"""
    core_keys = ["_id", "SMILES", "Inchi", "InchiKey", "Mass", "Formula"]
    core_dict = {
        key: cpd_dict.get(key)
        for key in core_keys if cpd_dict.get(key) != None
    }

    mol_object = AllChem.MolFromSmiles(core_dict["SMILES"])
    rdk_fp = [
        i for i, val in enumerate(
            list(AllChem.RDKFingerprint(mol_object, fpSize=512))) if val
    ]

    # Store all different representations of the molecule (SMILES, Formula,
    #  InChI key, etc.) as well as its properties in a dictionary
    if not "SMILES" in core_dict:
        core_dict["SMILES"] = AllChem.MolToSmiles(mol_object, True)
    if not "Inchi" in core_dict:
        core_dict["Inchi"] = AllChem.MolToInchi(mol_object)
    if not "Inchikey" in core_dict:
        core_dict["Inchikey"] = AllChem.InchiToInchiKey(core_dict["Inchi"])

    core_dict["Mass"] = AllChem.CalcExactMolWt(mol_object)
    core_dict["Charge"] = AllChem.GetFormalCharge(mol_object)
    core_dict["Formula"] = AllChem.CalcMolFormula(mol_object)
    core_dict["logP"] = AllChem.CalcCrippenDescriptors(mol_object)[0]
    core_dict["RDKit_fp"] = rdk_fp
    core_dict["len_RDKit_fp"] = len(rdk_fp)
    # core_dict['NP_likeness'] = nps.scoreMol(mol_object, nps_model)
    core_dict["Spectra"] = {}
    # Record which expansion it's coming from
    core_dict["MINES"] = []

    return pymongo.UpdateOne({"_id": core_dict["_id"]},
                             {"$setOnInsert": core_dict},
                             upsert=True)
Пример #3
0
    def insert_compound(self,
                        mol_object,
                        compound_dict=None,
                        bulk=None,
                        kegg_db="KEGG",
                        pubchem_db='PubChem-8-28-2015',
                        modelseed_db='ModelSEED'):
        """This class saves a RDKit Molecule as a compound entry in the MINE.
        Calculates necessary fields for API and includes additional
        information passed in the compound dict. Overwrites preexisting
        compounds in MINE on _id collision.
        
        :param mol_object: The compound to be stored
        :type mol_object: RDKit Mol object
        :param compound_dict: Additional information about the compound to be
            stored. Overwritten by calculated values.
        :type compound_dict: dict
        :param bulk: A pymongo bulk operation object. If None, reaction is
         immediately inserted in the database
        :param kegg_db: The ID of the KEGG Mongo database
        :type kegg_db: str
        :param pubchem_db: The ID of the PubChem Mongo database
        :type pubchem_db: str
        :param modelseed_db: The ID of the ModelSEED Mongo database
        :type modelseed_db: str
        :return: The hashed _id of the compound
        :rtype: str
        """

        if compound_dict is None:
            compound_dict = {}

        # Store all different representations of the molecule (SMILES, Formula,
        #  InChI key, etc.) as well as its properties in a dictionary
        compound_dict['SMILES'] = AllChem.MolToSmiles(mol_object, True)
        compound_dict['Inchi'] = AllChem.MolToInchi(mol_object)
        compound_dict['Inchikey'] = AllChem.InchiToInchiKey(
            compound_dict['Inchi'])
        compound_dict['Mass'] = AllChem.CalcExactMolWt(mol_object)
        compound_dict['Formula'] = AllChem.CalcMolFormula(mol_object)
        compound_dict['Charge'] = AllChem.GetFormalCharge(mol_object)
        # Get indices where bits are 1
        compound_dict['MACCS'] = list(
            AllChem.GetMACCSKeysFingerprint(mol_object).GetOnBits())
        compound_dict['len_MACCS'] = len(compound_dict['MACCS'])
        # Get indices where bits are 1
        compound_dict['RDKit'] = list(
            AllChem.RDKFingerprint(mol_object).GetOnBits())
        compound_dict['len_RDKit'] = len(compound_dict['RDKit'])
        compound_dict['logP'] = AllChem.CalcCrippenDescriptors(mol_object)[0]
        compound_dict['_id'] = utils.compound_hash(
            compound_dict['SMILES'],
            ('Type' in compound_dict
             and compound_dict['Type'] == 'Coreactant'))
        if '_atom_count' in compound_dict:
            del compound_dict['_atom_count']
        # Caching this for rapid reaction mass change calculation
        self._mass_cache[compound_dict['_id']] = compound_dict['Mass']

        # If the compound is a reactant, then make sure the reactant name is
        # in a correct format.
        if "Reactant_in" in compound_dict and isinstance(
                compound_dict['Reactant_in'], str) \
                and compound_dict['Reactant_in']:
            compound_dict['Reactant_in'] = ast.literal_eval(
                compound_dict['Reactant_in'])
        # If the compound is a product, then make sure the reactant name is
        # in a correct format.
        if "Product_of" in compound_dict \
                and isinstance(compound_dict['Product_of'], str) \
                and compound_dict['Product_of']:
            compound_dict['Product_of'] = ast.literal_eval(
                compound_dict['Product_of'])

        # Store links to external databases where compound is present
        if compound_dict['Inchikey']:
            if kegg_db:
                compound_dict = self.link_to_external_database(
                    kegg_db,
                    compound=compound_dict,
                    fields_to_copy=[('Pathways', 'Pathways'),
                                    ('Names', 'Names'),
                                    ('DB_links', 'DB_links'),
                                    ('Enzymes', 'Enzymes')])

            if pubchem_db:
                compound_dict = self.link_to_external_database(
                    pubchem_db,
                    compound=compound_dict,
                    fields_to_copy=[('COMPOUND_CID', 'DB_links.PubChem')])

            if modelseed_db:
                compound_dict = self.link_to_external_database(
                    modelseed_db,
                    compound=compound_dict,
                    fields_to_copy=[('DB_links', 'DB_links')])

        # Calculate natural product likeness score and store in dict
        if not self.np_model:
            self.np_model = np.readNPModel()
        compound_dict["NP_likeness"] = np.scoreMol(mol_object, self.np_model)

        compound_dict = utils.convert_sets_to_lists(compound_dict)
        # Assign an id to the compound
        if self.id_db:
            mine_comp = self.id_db.compounds.find_one(
                {"Inchikey": compound_dict['Inchikey']}, {
                    'MINE_id': 1,
                    "Pos_CFM_spectra": 1,
                    "Neg_CFM_spectra": 1
                })
            # If compound already exists in MINE, store its MINE id in the dict
            if mine_comp:
                compound_dict['MINE_id'] = mine_comp['MINE_id']
                if 'Pos_CFM_spectra' in mine_comp:
                    compound_dict['Pos_CFM_spectra'] = mine_comp[
                        'Pos_CFM_spectra']
                if 'Neg_CFM_spectra' in mine_comp:
                    compound_dict['Neg_CFM_spectra'] = mine_comp[
                        'Neg_CFM_spectra']
            # If compound does not exist, create new id based on number of
            # current ids in the MINE
            else:
                compound_dict['MINE_id'] = self.id_db.compounds.count()
                self.id_db.compounds.save(compound_dict)

        # If bulk insertion, upsert (insert and update) the database
        if bulk:
            bulk.find({'_id': compound_dict['_id']}).upsert().\
                replace_one(compound_dict)
        else:
            self.compounds.save(compound_dict)
        return compound_dict['_id']
Пример #4
0
def CalcCrippenLogPAndMR(mol):
    LogP, MR = AllChem.CalcCrippenDescriptors(mol)

    return LogP, MR
Пример #5
0
    def calculate_descriptors(self, mol):
        """
        Calculate MUV descriptors for a molecule.

        Parameters
        ----------
        mol : Mol
            Molecule.
        """
        d = []

        # prep
        mol = Chem.AddHs(mol)
        Chem.AssignStereochemistry(mol,
                                   cleanIt=True,
                                   force=True,
                                   flagPossibleStereoCenters=True)

        # atom counts
        atoms = {
            'B': 5,
            'Br': 35,
            'C': 6,
            'Cl': 17,
            'F': 9,
            'I': 53,
            'N': 7,
            'O': 8,
            'P': 15,
            'S': 16
        }
        counts = self.atom_counts(mol)
        total = mol.GetNumAtoms()
        d.append(total)
        heavy = mol.GetNumHeavyAtoms()
        d.append(heavy)
        for name in sorted(atoms.keys()):
            if atoms[name] in counts:
                d.append(counts[atoms[name]])
            else:
                d.append(0)

        # hydrogen bond acceptors / donors
        n_acc = AllChem.CalcNumHBA(mol)
        d.append(n_acc)
        n_don = AllChem.CalcNumHBD(mol)
        d.append(n_don)

        # cLogP
        c_log_p, _ = AllChem.CalcCrippenDescriptors(mol)
        d.append(c_log_p)

        # number of chiral centers
        n_chiral = 0
        for atom in mol.GetAtoms():
            if (atom.GetChiralTag() == ChiralType.CHI_TETRAHEDRAL_CW
                    or atom.GetChiralTag() == ChiralType.CHI_TETRAHEDRAL_CCW):
                n_chiral += 1
        d.append(n_chiral)

        # number of ring systems (not the number of rings)
        n_ring_systems = self.count_ring_systems(mol)
        d.append(n_ring_systems)

        return np.asarray(d)