def __iter__(self):
        if self.properties is None:
            i = 0
            while True:
                mol = Chem.BasicMolecule()
                try:
                    if self.r.read(mol):
                        yield sanitize_mol(mol)
                    else:
                        break
                except IOError:
                    yield None

                i += 1
                if i == self.nr_mols:
                    break
        else:
            i = 0
            while True:
                mol = Chem.BasicMolecule()
                try:
                    if self.r.read(mol):
                        read_properties = self._extract_properties_from_mol(
                            mol)
                        yield sanitize_mol(mol), read_properties
                    else:
                        break
                except IOError:
                    yield None

                i += 1
                if i == self.nr_mols:
                    break
def cdfMol_pdb(pdb, output, name):
    initial_time = time.time()
    cdf_mol = Chem.BasicMolecule()
    pdb_mol = Chem.BasicMolecule()

    pdb_str = open(pdb, 'r').read().replace('WAT', 'HOH').replace('HIE', 'HIS')
    pdb_reader = Biomol.PDBMoleculeReader(Base.StringIOStream(pdb_str))

    Biomol.setPDBApplyDictAtomBondingToNonStdResiduesParameter(
        pdb_reader, True)
    if not pdb_reader.read(pdb_mol):
        return None

    Chem.calcImplicitHydrogenCounts(pdb_mol, False)
    Chem.perceiveHybridizationStates(pdb_mol, False)
    Chem.setAtomSymbolsFromTypes(pdb_mol, False)
    Chem.perceiveSSSR(pdb_mol, False)
    Chem.setRingFlags(pdb_mol, False)
    Chem.setAromaticityFlags(pdb_mol, False)

    cdf_mol.assign(pdb_mol)
    for atom in cdf_mol.atoms:
        Chem.set3DCoordinatesArray(atom, Math.Vector3DArray())

    i = 0
    while i < cdf_mol.numAtoms:
        Chem.get3DCoordinatesArray(cdf_mol.getAtom(i)).addElement(
            Chem.get3DCoordinates(pdb_mol.getAtom(i)))
        i += 1

    tmp_output = output + name + ".cdf"
    try:
        Chem.FileCDFMolecularGraphWriter(tmp_output).write(cdf_mol)
    except:
        print('> Cdf_mol writing failure.')
        raise

    residues = Biomol.ResidueList(cdf_mol)
    tmp_output = output + name + "_residue_info.txt"
    with open(tmp_output, 'w') as txt_writer:
        txt_writer.write('residue name_resid_chain\n')
        for res in residues:
            res_id = getResidueID(res)
            txt_writer.write('{}: \n'.format(res_id))

    calc_time = time.time() - initial_time
    print('> Cdf and amino acid residue number list files generated in {}s'.
          format(int(calc_time)))
Exemplo n.º 3
0
def getMoleculeFromAtom(atom: Chem.BasicAtom, protein: Chem.BasicMolecule) -> (Chem.BasicMolecule, list):
    """
    Given an atom and a protein structure, find the ligand the atom corresponds to.
    Traverses the molecule by its bonds until no longer any atoms are attached. All atoms and bonds are assigned to a
    new molecule, which is being returned.
    :param atom:
    :param protein:
    :return: The found ligand as well as the atom indices of the ligand in the parent molecule.
    """
    ligand = Chem.Fragment()
    neighbors = set()  # atoms not being added already
    neighborsAdded = set()  # keep track of added atoms to not process twice
    atomsToRemove = []

    neighbors.add(atom)
    while len(neighbors) > 0:
        n = neighbors.pop()
        neighborsAdded.add(n)
        ligand.addAtom(n)
        atomsToRemove.append(protein.getAtomIndex(n))

        # get all the neighbor atoms
        for i, b in enumerate(n.bonds):
            for a in b.atoms:
                if a != n:
                    if a not in neighbors and a not in neighborsAdded:  # new atom
                        neighbors.add(a)

                    ligand.addBond(b)  # ignored if already exists

    Chem.perceiveComponents(ligand, True)
    mol = Chem.BasicMolecule()
    mol.assign(ligand)
    return mol, atomsToRemove
def getSurfaceAtoms(mol):
    surfaceATomExtractor = Chem.SurfaceAtomExtractor()
    f = Chem.Fragment()
    surfaceATomExtractor.extract(mol, mol, f)
    surfaceAtoms = Chem.BasicMolecule()
    surfaceAtoms.assign(f)
    return surfaceAtoms
def readPDBFromStream(stream: Base.IOStream):
    from Protein import Protein
    from MoleculeTools import sanitize_mol

    r = Biomol.PDBMoleculeReader(stream)
    mol = Chem.BasicMolecule()
    r.read(mol)
    sanitize_mol(mol, makeHydrogenComplete=True)
    return Protein(mol)
Exemplo n.º 6
0
def process():
    if len(sys.argv) < 4:
        print('Usage:',
              sys.argv[0],
              '[input.sdf] [exclude-molecules.sdf] [output.sdf]',
              file=sys.stderr)
        sys.exit(2)

    ifs = Base.FileIOStream(sys.argv[1], 'r')
    xifs = Base.FileIOStream(sys.argv[2], 'r')
    ofs = Base.FileIOStream(sys.argv[3], 'w')

    reader = Chem.SDFMoleculeReader(ifs)
    xreader = Chem.SDFMoleculeReader(xifs)
    writer = Chem.SDFMolecularGraphWriter(ofs)
    mol = Chem.BasicMolecule()

    Chem.setMultiConfImportParameter(reader, False)
    Chem.setMultiConfImportParameter(xreader, False)
    Chem.setMultiConfExportParameter(writer, False)

    stats = Stats()
    stats.read = 0
    stats.dropped = 0

    xhashes = set()

    while xreader.read(mol):
        setupMolecule(mol)

        hashcode = Chem.calcHashCode(mol)
        xhashes.add(hashcode)

    while reader.read(mol):
        #print('Processing Molecule ' + str(stats.read)
        setupMolecule(mol)

        hashcode = Chem.calcHashCode(mol)

        if hashcode in xhashes:
            stats.dropped += 1
            print('Dropped Molecule ' + str(stats.read) + ': ' +
                  Chem.generateSMILES(mol) + ' ' + Chem.getName(mol),
                  file=sys.stderr)
        else:
            writer.write(mol)

        stats.read += 1

        if stats.read % 10000 == 0:
            print('Processed ' + str(stats.read) + ' Molecules...',
                  file=sys.stderr)

    print('', file=sys.stderr)
    print('-- Summary --', file=sys.stderr)
    print('Molecules processed: ' + str(stats.read), file=sys.stderr)
    print('Molecules dropped: ' + str(stats.dropped), file=sys.stderr)
Exemplo n.º 7
0
    def __init__(self, structure: Chem.BasicMolecule = None):
        self.shape: Shape.GaussianShape = Shape.GaussianShape()
        self.shapeFunc: Shape.GaussianShapeFunction = Shape.GaussianShapeFunction(
        )
        self.coordinates: Math.Vector3DArray = Math.Vector3DArray()
        self.ligands: List[Chem.BasicMolecule] = []
        self.surfaceAtoms: Chem.BasicMolecule = Chem.BasicMolecule()

        super(Protein, self).__init__()
        if structure:
            from MoleculeTools import sanitize_mol
            self.assign(structure)
def remove_components(mol: Chem.BasicMolecule) -> Chem.BasicMolecule:
    components = Chem.getComponents(mol)
    largest_component = None  # set default number of atoms and index
    for comp in components:
        if largest_component is None:
            largest_component = comp
        elif comp.numAtoms > largest_component.numAtoms:
            largest_component = comp
    new_mol = Chem.BasicMolecule()
    new_mol.assign(largest_component)
    if Chem.hasStructureData(mol):
        Chem.setStructureData(new_mol, Chem.getStructureData(mol))
    return new_mol
Exemplo n.º 9
0
def loadCDFMolecule(fname):
    mol = Chem.BasicMolecule()
    cdf_reader = Chem.FileCDFMoleculeReader(fname)

    if not cdf_reader.read(mol):
        return None

    Chem.calcImplicitHydrogenCounts(mol, False)
    Chem.perceiveHybridizationStates(mol, False)
    Chem.setAtomSymbolsFromTypes(mol, False)
    Chem.perceiveSSSR(mol, False)
    Chem.setRingFlags(mol, False)
    Chem.setAromaticityFlags(mol, False)

    return mol
Exemplo n.º 10
0
    def getSurfaceExposedAtoms(self, copy=True) -> Chem.BasicMolecule:
        """
        Get a list of CDPL Molecules located on the protein surface.
        :param copy: Whether to return a copy of the surface atoms or the surface atom object itself.
        :return:
        """
        from ProteinTools import getSurfaceAtoms

        self.surfaceAtoms.assign(getSurfaceAtoms(self))

        if copy:
            surfaceAtoms = Chem.BasicMolecule()
            surfaceAtoms.assign(self.surfaceAtoms)
            return surfaceAtoms

        return self.surfaceAtoms
Exemplo n.º 11
0
def CDPLmolFromSdf(sdf_path, conformation):
    '''
    generates a single CDPL Molecule from an sdf-file. If conformations is true, then
    one random conformation will be generated. \n
    Input: \n
    sdf_path (string): path to the sdf file \n
    conformation (boolean): generates one 3d conformation according to MMFF94 \n
    Return: \n
    (CDPL BasicMolecule): the corresponding CDPL BasicMolecule 
    '''
    mol = Chem.BasicMolecule()
    ifs = Base.FileIOStream(sdf_path, 'r')
    sdf_reader = Chem.SDFMoleculeReader(ifs)

    if not sdf_reader.read(mol):
        log.error("COULD NOT READ SDF", sdf_path)
        return False
    if conformation:
        return _CDPLgenerateConformation(mol)
    return mol
Exemplo n.º 12
0
def CDPLmolFromSmiles(smiles_path, conformation):
    ''' 
    generates a CDPL Molecule from smiles. If confromations is true, then
    one random conformation will be generated with explicit hydrogens. \n
    Input: \n
    smiles (string): smiles string \n
    conformation (boolean): generates one 3d conformation according to MMFF94 \n
    Return: \n
    (CDPL BasicMolecule): the corresponding CDPL BasicMolecule
    '''
    mol = Chem.BasicMolecule()
    if ".smi" in smiles_path:
        smi_reader = Chem.FileSMILESMoleculeReader(smiles_path)
        if not smi_reader.read(mol):
            log.error("COULD NOT READ Smiles", smiles_path)
            return False
    else:
        mol = Chem.parseSMILES(smiles_path)
    if conformation:
        return _CDPLgenerateConformation(mol)
    else:
        return mol
Exemplo n.º 13
0
def _CDPLreadFromPDBFile(pdb_file):
    '''
    PRIVAT METHOD
    reads a pdb file and is used by the CDPLreadProteinFile method.
    Input: \n
    pdb_file (string): the path to the pdb file  \n
    Return: \n
    (CDPL BasicMolecule): the corresponding pdb molecule
     '''
    ifs = Base.FileIOStream(pdb_file, 'r')
    pdb_reader = Biomol.PDBMoleculeReader(ifs)
    pdb_mol = Chem.BasicMolecule()

    Biomol.setPDBApplyDictAtomBondingToNonStdResiduesParameter(
        pdb_reader, False
    )  #TODO Should this be there for the pdb readin? or also in the config?

    if not pdb_reader.read(pdb_mol):
        log.error("COULD NOT READ PDB", pdb_file)
        return False

    return pdb_mol
Exemplo n.º 14
0
        def process(sdf_file, psd_file_path):

            ifs = Base.FileIOStream(sdf_file, 'r')

            reader = Chem.SDFMoleculeReader(ifs)

            mol = Chem.BasicMolecule()

            Chem.setMultiConfImportParameter(reader, True)

            psd_creator = Pharm.PSDScreeningDBCreator(
                psd_file_path, Pharm.PSDScreeningDBCreator.CREATE, True)
            i = 0
            t0 = time.clock()

            while reader.read(mol):
                setupMolecule(mol)

                psd_creator.process(mol)
                i += 1

                if i % 100 == 0:
                    print 'Processed ' + str(i) + ' molecules (' + str(
                        time.clock() - t0), 's elapsed)...'
                    t0 = time.clock()

                mol.clear()

            print ''
            print '-- Summary --'
            print 'Molecules processed: ' + str(psd_creator.numProcessed)
            print 'Molecules rejected: ' + str(psd_creator.numRejected)
            print 'Molecules deleted: ' + str(psd_creator.numDeleted)
            print 'Molecules inserted: ' + str(psd_creator.numInserted)

            psd_creator.close()
Exemplo n.º 15
0
        def generate_ph(pdb, key):

            ifs = Base.FileIOStream(pdb, 'r')
            tlc = self.ligand_3_letter_code
            pdb_reader = Biomol.PDBMoleculeReader(ifs)
            pdb_mol = Chem.BasicMolecule()

            print '- Reading input: ', pdb, ' ...'

            if not pdb_reader.read(pdb_mol):
                print '!! Could not read input molecule'
                return

            print '- Processing macromolecule', pdb, ' ...'

            i = 0

            while i < pdb_mol.getNumBonds():
                bond = pdb_mol.getBond(i)

                if Chem.isMetal(bond.atoms[0]) or Chem.isMetal(bond.atoms[1]):
                    pdb_mol.removeBond(i)
                else:
                    i += 1

            Chem.calcImplicitHydrogenCounts(pdb_mol, True)
            Chem.perceiveHybridizationStates(pdb_mol, True)
            Chem.makeHydrogenComplete(pdb_mol)
            Chem.setAtomSymbolsFromTypes(pdb_mol, False)
            Chem.calcImplicitHydrogenCounts(pdb_mol, True)
            Biomol.setHydrogenResidueSequenceInfo(pdb_mol, False)
            Chem.setRingFlags(pdb_mol, True)
            Chem.setAromaticityFlags(pdb_mol, True)
            Chem.generateHydrogen3DCoordinates(pdb_mol, True)
            ligand = Chem.Fragment()

            print '- Extracting ligand ', tlc, ' ...'

            for atom in pdb_mol.atoms:
                if Biomol.getResidueCode(atom) == tlc:
                    Biomol.extractResidueSubstructure(atom, pdb_mol, ligand,
                                                      False)
                    break

            if ligand.numAtoms == 0:
                print '!! Could not find ligand', tlc, 'in input file'
                return

            Chem.perceiveSSSR(ligand, True)

            lig_env = Chem.Fragment()

            Biomol.extractEnvironmentResidues(ligand, pdb_mol, lig_env, 7.0)
            Chem.perceiveSSSR(lig_env, True)
            print '- Constructing pharmacophore ...'
            lig_pharm = Pharm.BasicPharmacophore()
            env_pharm = Pharm.BasicPharmacophore()
            pharm_gen = Pharm.DefaultPharmacophoreGenerator(False)
            pharm_gen.generate(ligand, lig_pharm)
            pharm_gen.generate(lig_env, env_pharm)
            analyzer = Pharm.DefaultInteractionAnalyzer()
            interactions = Pharm.FeatureMapping()
            analyzer.analyze(lig_pharm, env_pharm, interactions)

            #------------------------- XVOLS

            int_env_ftrs = Pharm.FeatureSet()
            Pharm.getFeatures(int_env_ftrs, interactions, False)
            int_core_ftrs = Pharm.FeatureSet()
            Pharm.getFeatures(int_core_ftrs, interactions, True)
            int_pharm = Pharm.BasicPharmacophore(int_core_ftrs)

            for ftr in int_env_ftrs:
                if Pharm.getType(
                        ftr
                ) == Pharm.FeatureType.H_BOND_DONOR or Pharm.getType(
                        ftr) == Pharm.FeatureType.H_BOND_ACCEPTOR:
                    Pharm.setTolerance(ftr, 1.0)
                else:
                    Pharm.setTolerance(ftr, 1.5)

            Pharm.createExclusionVolumes(int_pharm, int_env_ftrs, 0.0, 0.1,
                                         False)
            int_env_ftr_atoms = Chem.Fragment()
            Pharm.getFeatureAtoms(int_env_ftrs, int_env_ftr_atoms)
            int_residue_atoms = Chem.Fragment()
            Biomol.extractResidueSubstructures(int_env_ftr_atoms, lig_env,
                                               int_residue_atoms, True)
            Chem.makeHydrogenDeplete(int_residue_atoms)

            def isAlphaAtom(atom):
                return Biomol.getResidueAtomName(atom) == 'CA'

            Chem.removeAtomsIfNot(int_residue_atoms, isAlphaAtom)
            Pharm.createExclusionVolumes(int_pharm, int_residue_atoms,
                                         Chem.Atom3DCoordinatesFunctor(), 1.0,
                                         2.0, False)

            features_in_ph = []
            for int_ftr in int_pharm:
                if Pharm.hasSubstructure(int_ftr) == False:
                    continue
                elif ftype_names[Pharm.getType(int_ftr)] == 'XV':
                    continue
                feature_id = generate_key(int_ftr)
                features_in_ph.append(str(feature_id))
                self.unique_feature_vector.add(str(feature_id))

            int_pharm.fv = features_in_ph
            int_pharm.path_to_pdb = pdb

            return int_pharm
Exemplo n.º 16
0
import CDPL.Chem as Chem
import CDPL.Math as Math


def process():
    if len(sys.argv) < 4:
	    print('Usage:', sys.argv[0], 'training-set.sdf logP-data regression-coeff-file', file=sys.stderr)
        sys.exit(2)

	struct_is = Base.FileIOStream(sys.argv[1], 'r')
	exp_logp_is = Base.FileIOStream(sys.argv[2], 'r')
	coeff_os = Base.FileIOStream(sys.argv[3], 'w')

    mlr_model = Math.DMLRModel()
	sdf_reader = Chem.SDFMoleculeReader(struct_is)
	mol = Chem.BasicMolecule()
	xlogp_calc = Chem.XLogPCalculator()

    histo = Math.DVector()
    histo.resize(Chem.XLogPCalculator.FEATURE_VECTOR_SIZE)

    Chem.setMultiConfImportParameter(sdf_reader, False)

	while sdf_reader.read(mol):
		exp_logp = float(exp_logp_is.readline())

		Chem.perceiveComponents(mol, False)
		Chem.perceiveSSSR(mol, False)
		Chem.setRingFlags(mol, False)
		Chem.calcImplicitHydrogenCounts(mol, False)
		Chem.perceiveHybridizationStates(mol, False)
def process():
    if len(sys.argv) < 3:
        print >> sys.stderr, 'Usage:', sys.argv[
            0], '[input.cdf] [output directory]'
        sys.exit(2)

    in_fname = path.splitext(path.basename(sys.argv[1]))[0]
    mol = Chem.BasicMolecule()
    cdf_reader = Chem.FileCDFMoleculeReader(sys.argv[1])
    pvd_file = open(path.join(sys.argv[2], in_fname + '.pvd'), 'w')

    Util.writePVDHeader(pvd_file)

    print >> sys.stderr, '- Processing CDF-file:', sys.argv[1], '...'

    if not cdf_reader.read(mol):
        print '!! Could not read file'
        sys.exit(2)

    backbone_atoms = []

    for atom in mol.atoms:
        if Biomol.isPDBBackboneAtom(atom) and Biomol.getResidueAtomName(
                atom) == 'C':
            backbone_atoms.append(atom)

    bond_list = []

    for bond in mol.bonds:
        if Biomol.getResidueCode(
                bond.getAtom(0)) == 'HOH' or Biomol.getResidueCode(
                    bond.getAtom(1)) == 'HOH':
            continue

        if Chem.getType(bond.getAtom(0)) == Chem.AtomType.H or Chem.getType(
                bond.getAtom(1)) == Chem.AtomType.H:
            continue

        bond_list.append(bond)

    num_confs = Chem.getNumConformations(mol)

    num_coords = len(bond_list) * 4 + (
        len(backbone_atoms) * SPLINE_POINTS_PER_BB_ATOM - 1) * 2
    bond_ctr = Math.Vector3D()
    i = 0

    while i < num_confs:
        line_x_coords = numpy.ndarray(num_coords, numpy.float32)
        line_y_coords = numpy.ndarray(num_coords, numpy.float32)
        line_z_coords = numpy.ndarray(num_coords, numpy.float32)
        atom_types = numpy.ndarray(num_coords, numpy.uint32)

        spline_ctrl_points = numpy.ndarray((len(backbone_atoms), 3),
                                           numpy.float32)
        j = 0

        for atom in backbone_atoms:
            atom_pos = Chem.getConformer3DCoordinates(atom, i)

            spline_ctrl_points[j, 0] = atom_pos(0)
            spline_ctrl_points[j, 1] = atom_pos(1)
            spline_ctrl_points[j, 2] = atom_pos(2)
            j += 1

        spline_pts = spline(spline_ctrl_points,
                            len(backbone_atoms) * SPLINE_POINTS_PER_BB_ATOM)
        j = 0
        k = 0

        while k < (len(backbone_atoms) * SPLINE_POINTS_PER_BB_ATOM - 1):
            line_x_coords[j] = spline_pts[0][k]
            line_y_coords[j] = spline_pts[1][k]
            line_z_coords[j] = spline_pts[2][k]
            atom_types[j] = 0
            j += 1

            line_x_coords[j] = spline_pts[0][k + 1]
            line_y_coords[j] = spline_pts[1][k + 1]
            line_z_coords[j] = spline_pts[2][k + 1]
            atom_types[j] = 0
            j += 1
            k += 1

        for bond in bond_list:
            atom1 = bond.getAtom(0)
            atom2 = bond.getAtom(1)

            atom1_pos = Chem.getConformer3DCoordinates(atom1, i)
            atom2_pos = Chem.getConformer3DCoordinates(atom2, i)

            atom1_type = Chem.getType(atom1)
            atom2_type = Chem.getType(atom2)

            bond_ctr.assign(atom1_pos)
            bond_ctr += atom2_pos
            bond_ctr *= 0.5

            line_x_coords[j] = atom1_pos(0)
            line_y_coords[j] = atom1_pos(1)
            line_z_coords[j] = atom1_pos(2)
            atom_types[j] = atom1_type
            j += 1

            line_x_coords[j] = bond_ctr(0)
            line_y_coords[j] = bond_ctr(1)
            line_z_coords[j] = bond_ctr(2)
            atom_types[j] = atom1_type
            j += 1

            line_x_coords[j] = bond_ctr(0)
            line_y_coords[j] = bond_ctr(1)
            line_z_coords[j] = bond_ctr(2)
            atom_types[j] = atom2_type
            j += 1

            line_x_coords[j] = atom2_pos(0)
            line_y_coords[j] = atom2_pos(1)
            line_z_coords[j] = atom2_pos(2)
            atom_types[j] = atom2_type
            j += 1

        line_x_coords.resize(j)
        line_y_coords.resize(j)
        line_z_coords.resize(j)
        atom_types.resize(j)

        out_fname = in_fname + '_frame_no_' + str(i)
        out_path = path.join(sys.argv[2], out_fname)
        line_data = {'atom_type': atom_types}

        print >> sys.stderr, '- Writing structure data for frame', i, '...'

        if not pyevtk.hl.linesToVTK(out_path,
                                    line_x_coords,
                                    line_y_coords,
                                    line_z_coords,
                                    pointData=line_data):
            print '!! Could not write output file'
            sys.exit(2)

        Util.writePVDEntry(pvd_file, i, out_fname, 'vtu')

        i += 1

    Util.writePVDFooter(pvd_file)
Exemplo n.º 18
0
def generate_ph(pdb, args, df_constructor, ts):

    ifs = Base.FileIOStream(pdb, 'r')
    tlc = args.ligand_three_letter_code
    pdb_reader = Biomol.PDBMoleculeReader(ifs)
    pdb_mol = Chem.BasicMolecule()

    print '- Reading input: ', pdb, ' ...'

    if not pdb_reader.read(pdb_mol):
        print '!! Could not read input molecule'
        return

    print '- Processing macromolecule', pdb, ' ...'

    i = 0

    while i < pdb_mol.getNumBonds():
        bond = pdb_mol.getBond(i)

        if Chem.isMetal(bond.atoms[0]) or Chem.isMetal(bond.atoms[1]):
            pdb_mol.removeBond(i)
        else:
            i += 1

    for a in pdb_mol.atoms:
        Chem.setImplicitHydrogenCount(a, 0)

    Chem.calcImplicitHydrogenCounts(pdb_mol, True)
    Chem.perceiveHybridizationStates(pdb_mol, True)
    Chem.makeHydrogenComplete(pdb_mol)
    Chem.setAtomSymbolsFromTypes(pdb_mol, False)
    Chem.calcImplicitHydrogenCounts(pdb_mol, True)
    Biomol.setHydrogenResidueSequenceInfo(pdb_mol, False)
    Chem.setRingFlags(pdb_mol, True)
    Chem.setAromaticityFlags(pdb_mol, True)
    Chem.generateHydrogen3DCoordinates(pdb_mol, True)
    Chem.calcFormalCharges(pdb_mol, True)
    ligand = Chem.Fragment()

    print '- Extracting ligand ', tlc, ' ...'

    for atom in pdb_mol.atoms:
        if Biomol.getResidueCode(atom) == tlc:
            Biomol.extractResidueSubstructure(atom, pdb_mol, ligand, False)
            break

    if ligand.numAtoms == 0:
        print '!! Could not find ligand', tlc, 'in input file'
        return

    Chem.perceiveSSSR(ligand, True)

    lig_env = Chem.Fragment()

    Biomol.extractEnvironmentResidues(ligand, pdb_mol, lig_env, 7.0)
    Chem.perceiveSSSR(lig_env, True)
    print '- Constructing pharmacophore ...'
    lig_pharm = Pharm.BasicPharmacophore()
    env_pharm = Pharm.BasicPharmacophore()
    pharm_gen = Pharm.DefaultPharmacophoreGenerator(True)
    pharm_gen.generate(ligand, lig_pharm)
    pharm_gen.generate(lig_env, env_pharm)
    #Pharm.FilePMLFeatureContainerWriter('./test/lig_ph_' + str(ts) + '.pml').write(lig_pharm)

    analyzer = Pharm.DefaultInteractionAnalyzer()
    interactions = Pharm.FeatureMapping()
    analyzer.analyze(lig_pharm, env_pharm, interactions)
    df_constructor, interaction_at_ts = outputInteractions(
        lig_pharm, env_pharm, interactions, df_constructor)
    #Chem.FileSDFMolecularGraphWriter('./test/ligand_' + str(ts) + '.sdf').write(ligand)

    return df_constructor, interaction_at_ts
Exemplo n.º 19
0
def cleanStructures():
    if len(sys.argv) < 5:
        print('Usage:',
              sys.argv[0],
              '[input.sdf] [output.sdf] [dropped.sdf] [start_index] [[count]]',
              file=sys.stderr)
        sys.exit(2)

    ifs = Base.FileIOStream(sys.argv[1], 'r')
    ofs = Base.FileIOStream(sys.argv[2], 'w')
    dofs = Base.FileIOStream(sys.argv[3], 'w')
    offset = int(sys.argv[4])
    count = 0

    if len(sys.argv) > 5:
        count = int(sys.argv[5])

    reader = Chem.SDFMoleculeReader(ifs)
    writer = Chem.SDFMolecularGraphWriter(ofs)
    dwriter = Chem.SDFMolecularGraphWriter(dofs)
    mol = Chem.BasicMolecule()

    #Chem.setSMILESRecordFormatParameter(reader, 'SN')

    stats = Stats()
    stats.read = 0
    stats.dropped = 0
    stats.modified = 0

    Chem.setMultiConfImportParameter(reader, False)
    Chem.setMultiConfExportParameter(writer, False)
    Chem.setMultiConfExportParameter(dwriter, False)

    if offset > 0:
        print('Skipping Molecules to Start Index ' + str(offset),
              file=sys.stderr)
        reader.setRecordIndex(offset)
        #print('Finished Setting Record Index', file=sys.stderr)

    stats.read = offset

    while reader.read(mol):
        #print('Processing Molecule ' + str(stats.read)
        proc_mol = processMolecule(mol, stats)

        if proc_mol:
            writer.write(proc_mol)
        else:
            stats.dropped += 1
            dwriter.write(mol)
            print('Dropped Molecule ' + str(stats.read) + ': ' +
                  generateSMILES(mol) + ' ' + Chem.getName(mol),
                  file=sys.stderr)

        stats.read += 1

        if stats.read % 10000 == 0:
            print('Processed ' + str(stats.read - offset) + ' Molecules...',
                  file=sys.stderr)

        if count > 0 and (stats.read - offset) >= count:
            break

    print('', file=sys.stderr)
    print('-- Summary --', file=sys.stderr)
    print('Molecules processed: ' + str(stats.read - offset), file=sys.stderr)
    print('Molecules dropped: ' + str(stats.dropped), file=sys.stderr)
    print('Molecules modified: ' + str(stats.modified), file=sys.stderr)
def mol_from_smiles(smiles: str) -> Chem.BasicMolecule:
    ifs = Base.StringIOStream(smiles)
    mol = Chem.BasicMolecule()
    r = Chem.SMILESMoleculeReader(ifs)
    r.read(mol)
    return sanitize_mol(mol)
Exemplo n.º 21
0
def readPDBFromStream(stream: Base.IOStream):
    r = Biomol.PDBMoleculeReader(stream)
    mol = Chem.BasicMolecule()
    r.read(mol)
    return Protein(mol)
Exemplo n.º 22
0
    # remove ligand and water, just to be sure
    protein.separateLigandFromProtein()

    # calculate surface area for a single carbon atom
    protein.getGaussianShape()
    shapeFunc = protein.shapeFunc
    carbonProteinIndex = None
    for a in protein.atoms:
        if Chem.getType(a) == 6:
            carbonProteinIndex = protein.getAtomIndex(a)
            break
    surfAreaCarbonProtein = shapeFunc.calcSurfaceArea(carbonProteinIndex)  # calculate the surface area contribution here?

    # create a simple carbon molecule with coordinates
    carbon = Chem.BasicMolecule()
    cAtom = carbon.addAtom()
    Chem.setType(cAtom, 6)
    coords = Math.Vector3D()
    coords.assign([1, 2, 3])
    Chem.set3DCoordinates(cAtom, coords)

    # calculate shape and surface area of carbon molecule
    carbonShape, carbonShapeFunc = getGaussianShapeOfMolecule(carbon)
    surfAreaCarbon = carbonShapeFunc.surfaceArea

    # assert that contribution of carbon atom in protein is in fact the entire surface area of a single carbon
    assert surfAreaCarbon == surfAreaCarbonProtein

    # What am I missing here?
    # Summing the surface area of all atoms in the protein yields the surface area of the protein. I find it hard to
Exemplo n.º 23
0
def process():
    if len(sys.argv) < 4:
        print(
            'Usage:',
            sys.argv[0],
            '[input torsion rules.xml] [structures.sdf] [output torsion histogram library.sdf]',
            file=sys.stderr)
        sys.exit(2)

    tor_lib = ConfGen.TorsionLibrary()

    try:
        tor_lib.load(Base.FileIOStream(sys.argv[1], 'r'))
    except:
        print('Error while loading input torsion rules:',
              sys.exc_info()[0],
              file=sys.stderr)
        sys.exit(2)

    tor_matcher = ConfGen.TorsionRuleMatcher(tor_lib)

    tor_matcher.findAllRuleMappings(True)
    tor_matcher.findUniqueMappingsOnly(True)
    tor_matcher.stopAtFirstMatchingRule(True)

    mol = Chem.BasicMolecule()
    mol_reader = Chem.FileSDFMoleculeReader(sys.argv[2])

    Chem.setMultiConfImportParameter(mol_reader, False)

    print('- Analyzing input structures...', file=sys.stderr)

    i = 1
    rule_to_angle_hists = {}
    coords = Math.Vector3DArray()

    while True:
        try:
            if not mol_reader.read(mol):
                break

        except IOError as e:
            print('Error while reading input molecule',
                  i,
                  ':',
                  e,
                  file=sys.stderr)
            i += 1
            continue

        if i % 500 == 0:
            print('   ... At input molecule', i, '...', file=sys.stderr)

        Chem.initSubstructureSearchTarget(mol, False)

        try:
            Chem.get3DCoordinates(mol, coords)

        except Base.ItemNotFound:
            print('Could not get 3D-coordinates for molecule',
                  i,
                  file=sys.stderr)
            i += 1
            continue

        for bond in mol.bonds:
            if Chem.getRingFlag(bond):
                continue

            if Chem.isHydrogenBond(bond):
                continue

            if Chem.getExplicitBondCount(
                    bond.getBegin()) <= 1 or Chem.getExplicitBondCount(
                        bond.getEnd()) <= 1:
                continue

            tor_matcher.findMatches(bond, mol, False)

            for match in tor_matcher:
                processMatch(i, match, mol, coords, rule_to_angle_hists)

        i += 1

    print('- Processing torsion angle histograms...', file=sys.stderr)

    processHistograms(tor_lib, rule_to_angle_hists)

    print('- Writing output torsion library...', file=sys.stderr)

    try:
        tor_lib.save(Base.FileIOStream(sys.argv[3], 'w+'))

    except:
        print('Error while writing torsion library:',
              sys.exc_info()[0],
              file=sys.stderr)
        sys.exit(2)

    print('DONE!', file=sys.stderr)
Exemplo n.º 24
0
def process():
    if len(sys.argv) < 4:
        print >> sys.stderr, 'Usage:', sys.argv[
            0], '[input topology-file] [input coordinates-file] [output CDF-file]'
        sys.exit(2)

    print >> sys.stderr, '- Processing topology-file', sys.argv[
        1], 'and coordinates-file', sys.argv[2], '...'

    u = MDAnalysis.Universe(sys.argv[1], sys.argv[2])
    cdf_mol = Chem.BasicMolecule()

    cdf_mol.reserveMemoryForAtoms(len(u.atoms))
    cdf_mol.reserveMemoryForBonds(len(u.bonds))

    print >> sys.stderr, '- Num. atoms:', len(u.atoms)
    print >> sys.stderr, '- Num. bonds:', len(u.bonds)

    num_frames = len(u.trajectory)

    print >> sys.stderr, '- Num. frames:', num_frames

    # construct atoms

    print >> sys.stderr, '- Building atoms ...'

    waters = {}
    i = 0

    for md_atom in u.atoms:
        atom = cdf_mol.addAtom()
        sym = MDAnalysis.topology.guessers.guess_atom_element(md_atom.name)

        Chem.setSymbol(atom, sym.title())
        Chem.setImplicitHydrogenCount(atom, 0)
        Biomol.setChainID(atom, md_atom.segid)

        if md_atom.resname == 'WAT':
            Biomol.setResidueCode(atom, 'HOH')
        else:
            Biomol.setResidueCode(atom, md_atom.resname)

        if Biomol.getResidueCode(atom) == 'HOH':
            if md_atom.resid in waters:
                waters[md_atom.resid].append(i)
            else:
                waters[md_atom.resid] = [i]

        Biomol.setResidueSequenceNumber(atom, int(md_atom.resid))
        Biomol.setResidueAtomName(atom, md_atom.name)

        # fix positive charge on arginin nitrogen
        if md_atom.resname == 'ARG' and md_atom.name == 'NH2':
            Chem.setFormalCharge(atom, 1)

        coords = []
        for coord in md_atom.position:
            coords.append(float(coord))

        Chem.set3DCoordinates(atom, coords)

        coords_array = Math.Vector3DArray()
        coords_array.reserve(num_frames)

        Chem.set3DCoordinatesArray(atom, coords_array)
        Chem.setPEOECharge(atom, float(md_atom.charge))

        i += 1

    Chem.setAtomTypesFromSymbols(cdf_mol, True)

    # construct bonds

    print >> sys.stderr, '- Building bonds ...'

    for md_bond in u.bonds:
        cdf_mol.addBond(int(md_bond.atoms[0].index),
                        int(md_bond.atoms[1].index))

    print >> sys.stderr, '- Building water atom bonds ...'

    for water in waters.values():
        if len(water) < 2:
            continue

        for atom_idx in water:
            if Chem.getType(cdf_mol.atoms[atom_idx]) == Chem.AtomType.O:
                if atom.numBonds > 1:
                    break

                for atom_idx2 in water:
                    if Chem.getType(
                            cdf_mol.atoms[atom_idx2]) == Chem.AtomType.H:
                        cdf_mol.addBond(atom_idx, atom_idx2)

                break

    # make sane biomolecule

    Chem.perceiveSSSR(cdf_mol, True)
    Chem.setRingFlags(cdf_mol, True)
    Chem.perceiveBondOrders(cdf_mol, True)
    Chem.perceiveHybridizationStates(cdf_mol, True)
    Chem.setAromaticityFlags(cdf_mol, True)
    Chem.calcFormalCharges(cdf_mol, True)

    # read timsteps and write cdf

    print >> sys.stderr, '- Importing coordinates ...'

    i = 0
    traj_coords = []
    atom_coords = Math.Vector3D()

    for ts in u.trajectory:
        print >> sys.stderr, '- Processing time step', i, '...'

        for md_atom in u.atoms:
            del traj_coords[:]

            for coord in md_atom.position:
                traj_coords.append(float(coord))

            coords_array = Chem.get3DCoordinatesArray(
                cdf_mol.getAtom(int(md_atom.index)))

            atom_coords[0] = traj_coords[0]
            atom_coords[1] = traj_coords[1]
            atom_coords[2] = traj_coords[2]

            coords_array.addElement(atom_coords)

        i += 1

    print >> sys.stderr, '- Writing output file:'

    if not Chem.FileCDFMolecularGraphWriter(sys.argv[3]).write(cdf_mol):
        print >> sys.stderr, '!! Could not write output file'
        sys.exit(2)