Пример #1
0
def read_db_from_sd_3d(infile, gz=False):
    sub = {}
    failed = []
    if gz:
        with gzip.open(infile, mode="rt") as inf:
            content = inf.readlines()
    else:
        with open(infile, "r") as sd_file:
            content = sd_file.readlines()
    try:
        sd_blocks = [
            list(group)
            for k, group in groupby(content, lambda x: x == "$$$$\n") if not k
        ]
    except ValueError:
        return sub, failed
    del content
    for i in range(len(sd_blocks)):
        mol_block_list = sd_blocks[i][:sd_blocks[i].index("M  END\n") + 1]
        mol_block = ''.join([elem for elem in mol_block_list])
        mol = Chem.MolFromMolBlock(mol_block)
        if mol:
            if mol.GetConformer().Is3D():
                name = mol.GetProp("_Name")
                mol = Chem.AddHs(mol, addCoords=True)
                tags = sd_blocks[i][sd_blocks[i].index("M  END\n") + 1:]
                props = read_tags(name, tags)
                sub[i] = {
                    "confs": mol,
                    "props": props,
                    "pattern": Chem.MolToSmiles(mol)
                }
        else:
            failed.append(i)
    return sub, failed
Пример #2
0
    def index(self, smiles):
        # bridged atoms
        bridg_Matcher = pybel.Smarts('[x3]')
        # spiro atoms
        spiro_Matcher = pybel.Smarts('[x4]')
        # linked rings
        RR_Matcher = pybel.Smarts('[R]!@[R]')
        # separated rings
        R_R_Matcher = pybel.Smarts('[R]!@*!@[R]')

        rd_mol: Mol = Chem.MolFromSmiles(smiles)
        py_mol = pybel.readstring('smi', smiles)

        index = [
                    py_mol.OBMol.NumHvyAtoms(),
                    int(round(py_mol.molwt, 1) * 10),
                    self.get_shortest_wiener(rd_mol)[0],
                    Chem.CalcNumRotatableBonds(Chem.AddHs(rd_mol)),
                    len(bridg_Matcher.findall(py_mol)),
                    len(spiro_Matcher.findall(py_mol)),
                    len(RR_Matcher.findall(py_mol)),
                    len(R_R_Matcher.findall(py_mol)),
                ] + \
                list(self.get_ring_info(py_mol))

        return np.array(index)
Пример #3
0
    def to_mol(self, add_hs: bool = False, num_confs: int = 10) -> "Mol":
        """Convert widget value to RDKit molecule.
        If Hydrogen atoms are added, calculate the optimal conformer to get
        Hydrogen atom coordinates.

        :param add_hs: Add Hydrogen atoms
        :type add_hs: bool
        :param num_confs: Number of conformers to generate
        :type num_confs: int
        :return: RDKit molecule object
        :rtype: Mol
        """
        if self.value.smiles != "":
            mol = Chem.MolFromSmiles(self.value.smiles)
        elif self.value.molblock != "":
            mol = Chem.MolFromMolBlock(self.value.molblock)
        else:
            raise ValueError("Cannot create Mol object: JSME value is empty")

        if add_hs is True and mol is not None:
            mol = Chem.AddHs(mol)
            # Calculate conformers after adding hydrogens
            Chem.EmbedMultipleConfs(mol, numConfs=num_confs)

        if mol is None:
            raise ValueError(f"Cannot convert JSME widget value to Mol object:\
                \n{self.value}\
                \nPlease try clicking 'Save' button on widget or remove \
                Hydrogen atoms from SMILES string and try again.")
        return mol
Пример #4
0
    def hygrogen_ends(self, macro_mol):
        """
        Removes all atoms tagged for deletion and adds hydrogens.

        In polymers, you want to replace the functional groups at the
        ends with hydrogen atoms.

        Parameters
        ----------
        macro_mol : :class:`.Polymer`
            The polymer being assembled.

        Returns
        -------
        None : :class:`NoneType`

        """

        emol = rdkit.EditableMol(macro_mol.mol)
        # Remove all extra atoms.
        for atom in reversed(macro_mol.mol.GetAtoms()):
            if atom.HasProp('del'):
                emol.RemoveAtom(atom.GetIdx())

        macro_mol.mol = remake(emol.GetMol())
        macro_mol.mol = rdkit.AddHs(macro_mol.mol, addCoords=True)
Пример #5
0
 def from_mol(self) -> dict:
     if self.get('extension') in ('mol', 'sdf', 'mdl'):
         mol = Chem.MolFromMolBlock(self.get('block'),
                                    sanitize=True,
                                    removeHs=False,
                                    strictParsing=True)
     elif self.get('extension') in ('mol2', ):
         mol = Chem.MolFromMol2Block(self.get('block'),
                                     sanitize=True,
                                     removeHs=False)
     elif self.get('extension') in ('pdb', ):
         mol = Chem.MolFromPDBBlock(self.get('block'),
                                    sanitize=True,
                                    removeHs=False,
                                    proximityBonding=False)
     else:
         raise exc.HTTPClientError(
             f"Format {self.get('extension')} not supported")
     if self.get_bool('protons') is True:
         mol = AllChem.AddHs(mol)
     p = Params.from_mol(mol,
                         self.name,
                         generic=self.generic,
                         atomnames=self.atomnames)
     return self.to_dict(p)
Пример #6
0
def GenerateSDFFromMols(mols, inp_type):

    files = []

    cwd = os.getcwd()

    for ind, m in enumerate(mols):

        m = AllChem.AddHs(m, addCoords=True)

        AllChem.EmbedMolecule(m)

        AllChem.MMFFOptimizeMolecule(m)

        Chem.rdmolops.AssignStereochemistryFrom3D(m)

        f = inp_type + '_Mol_' + str(ind) + "_.sdf"

        files.append(f[:-4])

        fullf = os.path.join(cwd, f)

        save3d = Chem.SDWriter(fullf)

        save3d.write(m)

    return files
Пример #7
0
 def _from_smiles_w_pdb(cls, pdb: Chem.Mol, smiles, generic, name):
     dodgy = Chem.SplitMolByPDBResidues(pdb, whiteList=[name])[name]
     AllChem.SanitizeMol(dodgy)
     good = Chem.MolFromSmiles(smiles)
     good.SetProp('_Name', name)
     dummies = []
     for atom in good.GetAtoms():
         if atom.GetSymbol() == '*':
             atom.SetAtomicNum(9)
             dummies.append(atom.GetIdx())
     Chem.SanitizeMol(good)
     good = AllChem.AddHs(good)
     AllChem.EmbedMolecule(good)
     AllChem.ComputeGasteigerCharges(good)
     AllChem.MMFFOptimizeMolecule(good)
     for d in dummies:
         good.GetAtomWithIdx(d).SetAtomicNum(0)
     self = cls.load_mol(good, generic=generic, name=name)
     self.move_aside()
     self.rename_from_template(dodgy)
     self.move_back()
     self.convert_mol()
     #####
     warnings.warn('CHI DISABLED. - has issues with this mode'
                   )  # todo correct this issue!
     self.CHI.data = []  # !!!!
     return self
Пример #8
0
def smile_to_pdb(smile, pdb_out, mol_name, method_3d='rdkit', iter_num=5000):
    """
    """

    if method_3d == 'openbabel':

        from openbabel import pybel

        conf = pybel.readstring("smi", smile)
        # Get charge
        charge = conf.charge
        conf.make3D(forcefield='mmff94', steps=iter_num)
        conf.write(format='pdb', filename=pdb_out, overwrite=True)

    elif method_3d == 'rdkit':

        from rdkit.Chem import AllChem as Chem

        conf = Chem.MolFromSmiles(smile)
        conf = Chem.AddHs(conf)
        # Get charge
        charge = Chem.GetFormalCharge(conf)
        Chem.EmbedMolecule(conf)
        Chem.MMFFOptimizeMolecule(conf,
                                  mmffVariant='MMFF94',
                                  maxIters=iter_num)
        Chem.MolToPDBFile(conf, filename=pdb_out)

    # Change resname of pdb file to `self.mol_name`
    coor = pdb_manip.Coor(pdb_out)
    index_list = coor.get_index_selection(selec_dict={'res_name': ['UNL']})
    coor.change_index_pdb_field(index_list, change_dict={'res_name': mol_name})
    coor.write_pdb(pdb_out, check_file_out=False)

    return (charge)
Пример #9
0
def generateXYZ(smiPath, ogMonomer):
	#print smiPath
	file = open(smiPath, "r")
	smiles = str(file.next())
	file.close()
	mol1 = Chem.MolFromSmiles(smiles)

	# Add missing Hs - recommended for 3D structure generation
	mol  = Chem.AddHs(mol1)

	# EmbedMolecule sets up a 3D representation of the molecule
	Chem.EmbedMolecule(mol)

	# You can run an FF geometry optimization now if you want
	Chem.UFFOptimizeMolecule(mol)
	#Chem.MMFFOptimizeMolecule(mol)
	newFile = open(smiPath[:-4] + ".xyz", 'w')
	newFile.write(str(mol.GetNumAtoms())+"\n" + "$Comment" + " \n")
	# Print the generated conformation in xyz format
	print "%d" % mol.GetNumAtoms()
	print "Generated by %s" % argv[0]
	for i in range(0,mol.GetNumAtoms()):
	    atom = mol.GetAtomWithIdx(i).GetSymbol()
	    coords = mol.GetConformer().GetAtomPosition(i)
	    print "%6s%12.4f%12.4f%12.4f" % (atom,coords.x,coords.y,coords.z)
	    newFile.write(str(atom) + " " + str(coords.x) +" " + str(coords.y) +" " +str (coords.z) + "\n")
	newFile.close()
	#os.system("obabel -ismi " + smiPath + " -O " + smiPath[:-4] + ".xyz --gen3d ---errorlevel 1 >/dev/null 2>/dev/null")
	print "			Recusive spinning Dx"
	grabAndSpin( smiPath[:-4] + ".xyz")
	
	#unBendAndCenter(smiPath[:-4] + ".xyz", ogMonomer)
	return smiPath[:-4] + ".xyz"
Пример #10
0
def prepare_mol_2(mol, property_name="", do_charge=False):
    err = 0
    if do_charge:
        Chem.ComputeGasteigerCharges(mol)
        property_name = "_GasteigerCharge"
        err = check_mol(mol, property_name, do_charge)

    elif not do_charge:
        n_at = mol.GetNumAtoms()
        if property_name:
            mol = Chem.RemoveHs(mol)
            list_prop = mol.GetPropsAsDict()
            # extracts the property according to the set name
            string_values = list_prop[property_name]
            string_values = string_values.split("\n")
            w = np.asarray(map(float, string_values))
        elif not property_name:
            mol = Chem.AddHs(mol)
            w = np.ones((n_at, 1)) / n_at
            # same format as previous calculation
            w = np.asarray(map(float, w))
            property_name = 'equal_w'
            err = 0
        for atom in range(n_at):
            mol.GetAtomWithIdx(atom).SetDoubleProp(property_name, w[atom])

    return mol, property_name, err
Пример #11
0
def CleanUp(InputFiles):

    # check input file types

    CleanedInputFiles = []

    cwd = os.getcwd()

    for f in InputFiles:

        if f.endswith('.sdf'):

            f = f[:-4]

        fullf = os.path.join(cwd, f + 'cleaned.sdf')

        m = GenerateMolFromSDF(f)

        m = AllChem.AddHs(m, addCoords=True)

        AllChem.EmbedMolecule(m)

        AllChem.MMFFOptimizeMolecule(m)

        Chem.rdmolops.AssignStereochemistryFrom3D(m)

        save3d = Chem.SDWriter(fullf)

        save3d.write(m)

        CleanedInputFiles.append(f + 'cleaned')

    return CleanedInputFiles
Пример #12
0
def hmol(input, output):
    # create mol from input mol file
    rd_mol = Chem.MolFromMolFile(input, removeHs=False)
    # add hydrogens
    h_rd_mol = AllChem.AddHs(rd_mol, addCoords=True)
    # save mol with hydrogens
    Chem.MolToMolFile(h_rd_mol, output)
Пример #13
0
    def smiles_to_pdb(smiles_string, name=None):
        """
        Converts smiles strings to RDKit molobject.
        :param smiles_string: The hydrogen free smiles string
        :param name: The name of the molecule this will be used when writing the pdb file
        :return: The RDKit molecule
        """
        # Originally written by venkatakrishnan; rewritten and extended by Chris Ringrose

        if 'H' in smiles_string:
            raise SyntaxError(
                'Smiles string contains hydrogen atoms; try again.')

        m = AllChem.MolFromSmiles(smiles_string)
        if name is None:
            name = input('Please enter a name for the molecule:\n>')
        m.SetProp('_Name', name)
        mol_hydrogens = AllChem.AddHs(m)
        AllChem.EmbedMolecule(mol_hydrogens, AllChem.ETKDG())
        AllChem.SanitizeMol(mol_hydrogens)

        print(AllChem.MolToMolBlock(mol_hydrogens),
              file=open(f'{name}.mol', 'w+'))
        AllChem.MolToPDBFile(mol_hydrogens, f'{name}.pdb')

        return f'{name}.pdb'
def make_entry(
    mol: rdkit.Mol, sa_scorer: SyntheticAccesibilityScorer,
):
    # Ensure hydrogens are added to molecule.
    mol = rdkit.AddHs(mol)
    sascore, scscore, rfmodel = [
        sa_scorer.calculate_sa(mol, func) for func in sa_scorer.sa_funcs
    ]
    try:
        fg_name = str(
            list(
                filter(
                    lambda x: len(
                        mol.GetSubstructMatch(rdkit.MolFromSmarts(fg_names[x]))
                    )
                    != 0,
                    fg_names,
                )
            )[0]
        )
    except:
        fg_name = ""
    return (
        rdkit.MolToSmiles(mol),
        str(fg_name),
        float(sascore),
        float(scscore),
        float(rfmodel),
    )
Пример #15
0
    def create_from_smiles(self, smiles_code):
        # Creating base rdkit molecule object
        m = Chem.MolFromSmiles(smiles_code)
        m = AllChem.AddHs(m, False, False)
        confID = AllChem.Compute2DCoords(m, False, True)
        conf = m.GetConformer(confID)
        AllChem.WedgeMolBonds(m, conf)
        num_atoms = m.GetNumAtoms()

        # Getting all atomic coordinates and transforming
        for i in range(num_atoms):
            element = m.GetAtomWithIdx(i).GetAtomicNum()
            pos = np.array([
                m.GetConformer().GetAtomPosition(i).x,
                m.GetConformer().GetAtomPosition(i).y
            ])
            new_atom = atom(element, self.atom_radii, pos)
            self.atoms.append(new_atom)
        self.calculate_center()
        self.scale(self.scale_factor)

        # Getting all bonds with types
        for mbond in m.GetBonds():
            atom1 = self.atoms[mbond.GetBeginAtomIdx()]
            atom2 = self.atoms[mbond.GetEndAtomIdx()]
            type = mbond.GetBondTypeAsDouble()
            new_bond = bond(atom1, atom2, type, self.scale_factor)
            self.bonds.append(new_bond)
Пример #16
0
    def descriptor_calc(smiles, mol_name):
        '''
        Main function to calculate descriptors for molecules

        '''
        # read in molecules
        mol = (Chem.MolFromSmiles(smiles))

        # make parent dictionary
        d[mol_name]= {}
        # calculate descriptors and store as child dictionary
        for name, desc in Descriptors.descList:
             d[mol_name][name]= desc(mol)

        m2 = AllChem.AddHs(mol)
        AllChem.EmbedMolecule(m2)
        AllChem.MMFFOptimizeMolecule(m2)
        d[mol_name]['Asphericity'] = Descriptors3D.Asphericity(m2)
        d[mol_name]['PMI1'] = Descriptors3D.PMI1(m2)
        d[mol_name]['PMI2'] = Descriptors3D.PMI2(m2)
        d[mol_name]['PMI3'] = Descriptors3D.PMI3(m2)
        d[mol_name]['NPR1'] = Descriptors3D.NPR1(m2)
        d[mol_name]['NPR2'] = Descriptors3D.NPR2(m2)
        d[mol_name]['RadiusOfGyration'] = Descriptors3D.RadiusOfGyration(m2)
        d[mol_name]['InertialShapeFactor'] = Descriptors3D.InertialShapeFactor(m2)
        d[mol_name]['Eccentricity'] = Descriptors3D.Eccentricity(m2)
        d[mol_name]['SpherocityIndex'] = Descriptors3D.SpherocityIndex(m2)
Пример #17
0
    def run(self):
        proasis_out = ProasisOut.objects.filter(proasis=ProasisHits.objects.get(crystal_name_id=self.crystal_id,
                                                                                refinement_id=self.refinement_id))
        for o in proasis_out:
            lig = o.ligand
            infile = os.path.join(o.root, o.start, str(o.start + '_' + lig.replace(' ', '') + '.mol'))
            outfile = infile.replace('mol', 'mol2')

            rd_mol = Chem.MolFromMolFile(infile, removeHs=False)
            h_rd_mol = AllChem.AddHs(rd_mol, addCoords=True)

            Chem.MolToMolFile(h_rd_mol, outfile.replace('.mol2', '_h.mol'))
            o.h_mol = outfile.replace('.mol2', '_h.mol').split('/')[-1]
            rd_mol = Chem.MolFromMolFile(outfile.replace('.mol2', '_h.mol'), removeHs=False)

            infile = os.path.join(o.root, o.start, str(o.start + '_' + lig.replace(' ', '') + '_h.mol'))

            net_charge = AllChem.GetFormalCharge(rd_mol)
            command_string = str("antechamber -i " + infile + " -fi mdl -o " + outfile +
                                 " -fo mol2 -at sybyl -c bcc -nc " + str(net_charge))
            print(command_string)
            process = subprocess.Popen(command_string, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            out, err = process.communicate()
            out = out.decode('ascii')
            if err:
                err = err.decode('ascii')
                raise Exception(err)

            print(out)
            print(err)
            o.mol2 = outfile.split('/')[-1]
            o.save()
Пример #18
0
    def get_properties(self):
        """
        Calculate some general molecule properties from SMILES

        From RDKit:
            - synthetic accesibility:
                https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3225829/
                (1 = easy to make, 10 = harder)
            - logP (hydrophobicity):
                https://pubs.acs.org/doi/10.1021/ci990307l
                (smaller = more hydrophilic)
            - logS (aqueous solubility):
                https://github.com/PatWalters/solubility
                (smaller = less water soluble)
            - purchasable (ZINC purchasability):
                https://github.com/stevenbennett96/chemcost
                (True = has at least 3 vendors on ZINC)

        """

        print('>>> collect molecular properties using RDKit.')
        # logP and SA from RDKIT with SMILES:
        rdkitmol = Chem.MolFromSmiles(self.SMILES)
        rdkitmol = Chem.AddHs(rdkitmol)
        rdkitmol.Compute2DCoords()
        self.logP = Descriptors.MolLogP(rdkitmol, includeHs=True)
        self.logS = rdkf.get_logSw(rdkitmol)
        self.Synth_score = rdkf.get_SynthA_score(rdkitmol)
Пример #19
0
def align_mcs(mols, num_confs):
    suppl = [m for m in AllChem.SDMolSupplier('/Users/tom/code_test_repository/arrow_testing/cdk2.sdf', removeHs=False)]
    ref_mol = suppl[0]
    print(f'ref mol has atoms = {ref_mol.GetNumAtoms()}')
    mols_b = copy.deepcopy(mols)
    mol_blocks = []
    for mol in mols_b:
        mol = AllChem.AddHs(mol)
        AllChem.EmbedMultipleConfs(mol, numConfs=num_confs)
        mcs = rdFMCS.FindMCS([mol, ref_mol])
        smarts = mcs.smartsString
        match = Chem.MolFromSmarts(smarts)
        test_match_atoms = mol.GetSubstructMatch(match)
        ref_match_atoms = ref_mol.GetSubstructMatch(match)
        
        #Find alignments of all conformers of new drug to old drug:
        alignments_scores =[rdMolAlign.AlignMol(mol,
                        ref_mol,
                        prbCid=i,
                        atomMap=[[i,j] for i,j in zip(test_match_atoms, ref_match_atoms)]) for i in range(num_confs)]
        
        confId=int(np.argmin(alignments_scores))
        AllChem.CanonicalizeConformer(mol.GetConformer(confId))
        # print(Chem.MolToMolBlock(mol))
        mol_blocks.append(Chem.MolToMolBlock(mol))
    return pa.array(mol_blocks)
Пример #20
0
def smiles2conformers(smiles, N=10, optimize=True):
    """
    Convert smiles string to N conformers.

    Keyword Arguments:
        smiles (str) - smiles string for molecule
        N (int) - number of conformers to generate using the ETKDG
            algorithm
        optimize (bool) - flag for UFF optimization (default=True)

    Returns:
        mol (RDKit molecule ::class::) - contains N conformers
    """
    # Read SMILES and add Hs
    mol = rdkit.MolFromSmiles(smiles)
    if mol is None:
        print('RDKit error for', smiles)
        return None
    mol = rdkit.AddHs(mol)
    # try based on RuntimeError from RDKit
    try:
        # 2D to 3D with multiple conformers
        cids = rdkit.EmbedMultipleConfs(
            mol=mol,
            numConfs=N,
            useExpTorsionAnglePrefs=True,
            useBasicKnowledge=True,
        )
        # quick UFF optimize
        for cid in cids:
            rdkit.UFFOptimizeMolecule(mol, confId=cid)
    except RuntimeError:
        print('RDKit error for', smiles)
        return None
    return mol
def get_similar_compound(condon):
    com = condon['smiles']

    save_img(com, 'static//compound_img//smiles_img.png', 300, 300)

    output_num = condon['MaxLength']
    smiles_file_path = 'data//kegg_smiles2.txt'

    with open(smiles_file_path) as file:
        f = file.readlines()

    smiles_list = [x.split()[1] for x in f]
    output_num = min(output_num, len(smiles_list))
    top_idx = [0] * output_num
    top_score = [0] * output_num

    mol1 = Chem.MolFromSmiles(com)
    if mol1 is None:
        print('input smiles not exist')
        return []
    mol1 = AllChem.AddHs(mol1)
    fps1 = AllChem.GetMorganFingerprint(mol1, 2)

    for i, item in enumerate(smiles_list):
        mol2 = Chem.MolFromSmiles(item)
        if mol2 is None:
            continue
        mol2 = AllChem.AddHs(mol2)
        fps2 = AllChem.GetMorganFingerprint(mol2, 2)

        score = DataStructs.DiceSimilarity(fps1, fps2)
        score = round(score, 2)

        if score > min(top_score):
            min_idx = top_score.index(min(top_score))
            top_idx[min_idx] = i
            top_score[min_idx] = score

    top_keggid = [f[i].split()[0] for i in top_idx]
    top_smiles = [f[i].split()[1] for i in top_idx]
    result = sorted(zip(top_keggid, top_smiles, top_score),
                    key=lambda x: x[2],
                    reverse=True)
    for i in range(len(result)):
        result[i] = list(result[i])
        result[i].insert(1, compound_dict[result[i][0]][0])
    return result
Пример #22
0
def predict(mol, uncharged=True):
    if uncharged:
        un = rdMolStandardize.Uncharger()
        mol = un.uncharge(mol)
    mol = AllChem.AddHs(mol)
    base_dict = predict_base(mol)
    acid_dict = predict_acid(mol)
    return base_dict, acid_dict
Пример #23
0
def _calc_3d_coord(mol):
    AllChem.AddHs(mol)
    AllChem.EmbedMolecule(mol, useRandomCoords=True)
    try:
        AllChem.MMFFOptimizeMolecule(mol)
    except ValueError:
        logging.warning("Unable to make 3d cords.")
    AllChem.RemoveHs(mol)
Пример #24
0
def prepare_mol(mol, property_name, do_charge):
    """
    Sets atomic properties if they are specified in the sdf, otherwise computes them. If specified, computes 3D coordinates
    using MMF.  The default number of iterations is 200, but it is progressively increased to 5000 (with a step of 500)
    in case convergence is not reached.
    ====================================================================================================================
    :param
    mol: molecule to be analyzed (from rdkit supplier)
    property_name: name of the property to be used
    do_charge: if True, partial charge is computed
    do_geom: if True, molecular geometry is optimized
    :return:
    mol: molecule with property and 3D coordinates (H depleted)
    property_name: updated on the basis of the settings
    ====================================================================================================================
    Francesca Grisoni, 12/2016, v. alpha
    ETH Zurich
    """

    from rdkit.Chem import AllChem as Chem
    err = 0

    # partial charges
    if do_charge is False:
        if property_name is not '':
            err = check_mol(mol, property_name, do_charge)
            if err == 0:
                # prepares molecule
                # mol = Chem.AddHs(mol)
                mol = Chem.RemoveHs(mol)
                n_at = mol.GetNumAtoms()
                # takes properties
                list_prop = mol.GetPropsAsDict()
                string_values = list_prop[
                    property_name]  # extracts the property according to the set name
                string_values = string_values.split("\n")
                w = np.asarray(map(float, string_values))
        else:
            mol = Chem.AddHs(mol)
            n_at = mol.GetNumAtoms()
            w = np.ones((n_at, 1)) / n_at
            w = np.asarray(map(float,
                               w))  # same format as previous calculation
            property_name = 'equal_w'
            err = 0
        # extract properties
        for atom in range(n_at):
            mol.GetAtomWithIdx(atom).SetDoubleProp(property_name, w[atom])

        mol = Chem.RemoveHs(mol)

    # Gasteiger-Marsili Charges
    elif (do_charge is True) and (err is 0):
        Chem.ComputeGasteigerCharges(mol)
        property_name = '_GasteigerCharge'
        err = check_mol(mol, property_name, do_charge)

    return mol, property_name, err
Пример #25
0
def gen_mol_blocks_from_confs(mols, num_confs):
    mols_b = copy.deepcopy(mols)
    mol_blocks = []
    for mol in mols_b:
        mol = AllChem.AddHs(mol)
        AllChem.EmbedMultipleConfs(mol, numConfs=num_confs)
        for i in range(num_confs):
            mol_blocks.append(Chem.MolToMolBlock(mol, confId=i))
    return mol_blocks
Пример #26
0
 def smiles2fps(self, smiles):
     arr = np.zeros((1, ))
     mol = Chem.MolFromSmiles(smiles)
     mol = AllChem.AddHs(mol)
     fp = AllChem.GetMorganFingerprintAsBitVect(mol,
                                                3,
                                                nBits=self.state_size)
     DataStructs.ConvertToNumpyArray(fp, arr)
     return np.array([arr])
Пример #27
0
    def index(self, smiles):
        rdk_mol = Chem.AddHs(Chem.MolFromSmiles(smiles))
        index = [rdk_mol.GetNumAtoms(),
                 int(round(Descriptors.MolWt(rdk_mol), 1) * 10),
                 self.get_shortest_wiener(rdk_mol)[0],
                 Chem.CalcNumRotatableBonds(rdk_mol)]

        # py_mol = pybel.readstring('smi', smiles)
        # index += list(self.get_ring_info(py_mol))

        return np.array(index)
Пример #28
0
    def from_smiles(cls,
                    smiles: str,
                    add_hs: bool = True,
                    num_confs: int = 10):
        mol = Chem.MolFromSmiles(smiles)
        if add_hs:
            mol = Chem.AddHs(mol)
            # Calculate conformers after adding hydrogens
            Chem.EmbedMultipleConfs(mol, numConfs=num_confs)

        return cls(mol=mol)
Пример #29
0
def calculate_all_MW(molecules):
    """
    Calculate the molecular weight of all molecules in DB dictionary.

    {name: SMILES}

    """
    for m, smile in molecules.items():
        # Read SMILES and add Hs
        mol = rdkit.AddHs(rdkit.MolFromSmiles(smile))
        MW = Descriptors.MolWt(mol)
        print(m, '---', smile, '---', 'MW =', MW, 'g/mol')
Пример #30
0
def calculate_drug_similarity(input_dir='../data/DrugBank5.0_Approved_drugs/',
                              drug_dir='../data/DrugBank5.0_Approved_drugs/',
                              output_file='../data/output.csv'):
    drugbank_drugs = glob.glob(drug_dir + '*')
    input_drugs = glob.glob(input_dir + '*')
    drug_similarity_info = {}
    for each_drug_id1 in drugbank_drugs:
        drugbank_id = os.path.basename(each_drug_id1).split('.')[0]
        drug_similarity_info[drugbank_id] = {}
        drug1_mol = Chem.MolFromMolFile(each_drug_id1)
        drug1_mol = AllChem.AddHs(drug1_mol)
        for each_drug_id2 in input_drugs:
            input_drug_id = os.path.basename(each_drug_id2).split('.')[0]
            drug2_mol = Chem.MolFromMolFile(each_drug_id2)
            drug2_mol = AllChem.AddHs(drug2_mol)
            fps = AllChem.GetMorganFingerprint(drug1_mol, 2)
            fps2 = AllChem.GetMorganFingerprint(drug2_mol, 2)
            score = DataStructs.DiceSimilarity(fps, fps2)
            drug_similarity_info[drugbank_id][input_drug_id] = score

    df = pd.DataFrame.from_dict(drug_similarity_info)
    df.to_csv(output_file)