示例#1
0
文件: conv.py 项目: rhara/cbi
 def read(self, ag):
     sio = io.StringIO()
     prody.writePDBStream(sio, ag)
     cont = sio.getvalue().rstrip()
     sio.close()
     mol = openbabel.OBMol()
     self.ReadString(mol, cont)
     return mol
示例#2
0
    def add_mol(self, mol, keep_chains=False, keep_resi=False):
        """
        This behaves bad when molecules have same chain names
        """
        ag1 = self.ag.copy()
        ag2 = mol.ag.copy()
        if ag1.numCoordsets() != ag2.numCoordsets():
            raise RuntimeError('Atom groups have different numbers of coordinate sets')

        nsets = ag1.numCoordsets()

        chains1 = list(set(ag1.getChids()))
        chains2 = list(set(ag2.getChids()))
        all_chains = chains1 + chains2
        if len(set(all_chains)) != len(all_chains) and keep_chains:
            logger.warning('Two atom groups contain same chain IDs, merging can go wrong')
            if keep_resi and len(set(ag1.getResnums()).intersection(set(ag2.getResnums()))) > 0:
                raise RuntimeError('Refusing to merge atom groups which contain same chain IDs AND residue IDs')

        if not keep_chains:
            if len(chains1) + len(chains2) > len(self._chain_order):
                raise RuntimeError('Total number of chains is too large, out of chain ID letters')

            iter_chains = iter(self._chain_order)
            map1 = {x: next(iter_chains) for x in chains1}
            map2 = {x: next(iter_chains) for x in chains2}

            for old, new in map1.items():
                ag1.select('chain ' + old).setChids(new)
            for old, new in map2.items():
                ag2.select('chain ' + old).setChids(new)

        if not keep_resi:
            resi = 1
            for r in ag1.getHierView().iterResidues():
                r.setResnum(resi)
                resi += 1
            for r in ag2.getHierView().iterResidues():
                r.setResnum(resi)
                resi += 1

        buf = StringIO()
        for i in range(nsets):
            if nsets > 1:
                buf.write('MODEL \n' + str(i + 1))
            prody.writePDBStream(buf, ag1, csets=i)
            prody.writePDBStream(buf, ag2, csets=i)
            if nsets > 1:
                buf.write('ENDMDL\n')
            else:
                buf.write('END\n')

        buf.seek(0)
        joint = BasePDB(ag=prody.parsePDBStream(buf))
        joint.renumber(keep_resi=True, keep_chains=True)
        buf.close()
        return joint
def RDKit_Mol_from_ProDy(prody_instance, removeHs=True):
    """
    Creates an RDKit Mol object from a ProDy AtomGroup instance
    :return:
    """
    residue_io = io.StringIO()
    prody.writePDBStream(residue_io, prody_instance)

    return Chem.MolFromPDBBlock(residue_io.getvalue(), removeHs=removeHs)
示例#4
0
    def add_hydrogens(self, trim=True, csets=None):
        raise NotImplementedError()

        output = []
        natoms = -1
        csets = self._make_csets(csets)

        for i in csets:
            if trim:
                p_start = Popen([define.REDUCE_EXE, '-Quiet', '-Trim', '-'], stdin=PIPE, stdout=PIPE, stderr=STDOUT)
                p_finish = Popen([define.REDUCE_EXE, '-Quiet', '-FLIP', '-'], stdin=p_start.stdout, stdout=PIPE,
                                 stderr=STDOUT)
            else:
                p_start = Popen([define.REDUCE_EXE, '-Quiet', '-FLIP', '-'], stdin=PIPE, stdout=PIPE, stderr=STDOUT)
                p_finish = p_start

            prody.writePDBStream(p_start.stdin, self.ag, csets=i)
            p_start.stdin.close()

            output += ['MODEL%9i\n' % (i + 1)]
            reduced = []
            while p_finish.poll() is None:
                reduced = p_finish.stdout.readlines()

            p_start.wait()
            p_finish.wait()
            print(reduced)

            natoms_cur = len(list(filter(lambda x: x.startswith('ATOM') or x.startswith('HETATM'), reduced)))
            if i == csets[0]:
                natoms = natoms_cur
            elif natoms != natoms_cur:
                raise RuntimeError('Number of atoms in reduced model %i is different from the first model (%i, %i)' % (
                i, natoms_cur, natoms))

            output += reduced
            output += ['ENDMDL\n']

            status = p_finish.poll()

            if status != 0:
                logger.error('Called process returned ' + str(status))

        self.ag = prody.parsePDBStream(StringIO(''.join(output)))
        self.renumber()
        return self
示例#5
0
 def print_pdb(self, ifg, parsed_pdb, comb):
     vdm_renum = renumber_chids_resnums(self, 'X')
     ifg_renum = renumber_chids_resnums(ifg, 'Y')
     filename = comb.output_dir_pdb + 'iFG_' + str(ifg.count) + '_vdM_' + str(ifg.vdm_count) \
                + '_' + comb.file_tag + '.pdb.gz'
     with gzip.open(filename, 'wt') as pdbfile:
         pr.writePDBStream(pdbfile, vdm_renum)
         pr.writePDBStream(pdbfile, ifg_renum)
         if ifg.contact_atoms_water:
             pr.writePDBStream(pdbfile, ifg.contact_atoms_water)
         if ifg.contact_atoms_metal:
             pr.writePDBStream(pdbfile, ifg.contact_atoms_metal)
         if ifg.contact_atoms_ligand:
             pr.writePDBStream(
                 pdbfile,
                 parsed_pdb.prody_pdb.select('resindex ' + ' '.join(
                     str(ri) for ri in np.unique(
                         ifg.contact_atoms_ligand.getResindices()))))
示例#6
0
 def __str__(self):
     ss = io.StringStream()
     prody.writePDBStream(ss, self._pdb)
     return ss.read()
示例#7
0
def searchDali(pdb, chain=None, subset='fullPDB', daliURL=None, **kwargs):
    """Search Dali server with input of PDB ID (or local PDB file) and chain ID.
    Dali server: http://ekhidna2.biocenter.helsinki.fi/dali/
    
    :arg pdb: PDB code or local PDB file for the protein to be searched

    :arg chain: chain identifier (only one chain can be assigned for PDB)
    :type chain: str

    :arg subset: fullPDB, PDB25, PDB50, PDB90
    :type subset: str
    """
    
    import requests
    
    LOGGER.timeit('_dali')
    # timeout = 120
    timeout = kwargs.pop('timeout', 120)
    
    if daliURL is None:
        daliURL = "http://ekhidna2.biocenter.helsinki.fi/cgi-bin/sans/dump.cgi"
    
    if isinstance(pdb, Atomic):
        atoms = pdb
        chain_set = set(atoms.getChids())
        if chain and not chain in chain_set:
            raise ValueError('input structure (%s) does not have chain %s'%(atoms.getTitle(), chain))
        
        if len(chain_set) > 1:
            if not chain:
                raise TypeError('the structure (%s) contains more than one chain, therefore a chain identifier '
                                'needs to be specified'%pdb.getTitle())
            atoms = atoms.select('chain '+chain)
        else:
            chain = chain_set.pop()
            
        stream = createStringIO()
        writePDBStream(stream, atoms)
        data = stream.getvalue()
        stream.close()
        files = {"file1" : data}

        pdbId = atoms.getTitle()
        pdb_chain = ''
        dali_title = 'Title_'+pdbId+chain
    elif isinstance(pdb, str):
        if os.path.isfile(pdb):
            atoms = parsePDB(pdb)
            chain_set = set(atoms.getChids())
            # pdbId = "s001"
            filename = os.path.basename(pdb)
            filename, ext = os.path.splitext(filename)
            if ext.lower() == '.gz':
                filename2, ext2 = os.path.splitext(filename)
                if ext2.lower() == '.pdb':
                    filename = filename2
            pdbId = filename
            if chain and not chain in chain_set:
                raise ValueError('input PDB file does not have chain ' + chain)
            
            if len(chain_set) > 1:
                if not chain:
                    raise TypeError('PDB file (%s) contains more than one chain, therefore a chain identifier '
                                    'needs to be specified'%pdb)
                atoms = atoms.select('chain '+chain)
                #local_temp_pdb = pdbId+chain+'.pdb'
                #local_temp_pdb = 's001'+chain+'.pdb'
                stream = createStringIO()
                writePDBStream(stream, atoms)
                data = stream.getvalue()
                stream.close()
            else:
                data = open(pdb, "rb")
                chain = chain_set.pop()
            files = {"file1" : data}
            # case: multiple chains.             apply fetch ? multiple times?
            pdb_chain = ''
            dali_title = 'Title_' + pdbId + chain
        else:
            pdbId, ch = _getPDBid(pdb)
            if not chain:
                chain = ch
            if not chain:
                raise TypeError('a chain identifier is needed for the search')
            pdb_chain = pdbId + chain
            dali_title = 'Title_' + pdb_chain
            files = ''
    parameters = { 'cd1' : pdb_chain, 'method': 'search', 'title': dali_title, 'address': '' }
    # enc_params = urllib.urlencode(parameters).encode('utf-8')
    # request = urllib2.Request(daliURL, enc_params)
    request = requests.post(daliURL, parameters, files=files)
    try_error = 3
    while try_error >= 0:
        try:
            # url = urllib2.urlopen(request).url
            url = request.url
            break
        except:
            try_error -= 1
            if try_error >= 0:
                LOGGER.sleep(2, '. Connection error happened. Trying to reconnect...')
                continue
            else:
                # url = urllib2.urlopen(request).url
                url = request.url
                break
    if url.split('.')[-1].lower() in ['html', 'php']:
        # print('test -1: '+url)
        url = url.replace(url.split('/')[-1], '')
    LOGGER.debug('Submitted Dali search for PDB "{0}{1}".'.format(pdbId, chain))
    LOGGER.info(url)
    LOGGER.clear()
    
    return DaliRecord(url, pdbId, chain, subset=subset, timeout=timeout, **kwargs)
 def __str__(self):
     ss = io.StringStream()
     prody.writePDBStream(ss, self._pdb)
     return ss.read()
示例#9
0
def save_cluster_elements(elements,
                          ids,
                          out_pdb_name,
                          data_handler,
                          options,
                          cluster_sizes = None):
    """
    Saves a pdb file containing the most representative elements of the clustering.

    @param elements: A list of the representative elements of the clustering we want to extract.

    @params ids: A list with the cluster ids (1 to 1 mapping with 'elements').

    @param out_pdb_name: The complete path of the produced file.

    @param data_handler: The trajectory handler for this run or an array with pdb file paths.

    @param options: postprocessing options to generate the file. Currently a dic with any of these:
        "keep_remarks" - Will add each model's remarks before the model header if present
            Possible values are:
            - "NONE": not to store remarks (Default)
            - "STANDARD": stores remarks that follow pdb standard
            - "NOT STANDARD": stores remarks not following the pdb standard
            - "ALL": stores all remarks
        "add_source_details" - Will add two remarks before the model tag: the path of the source file and 
            the original model number. 
            
    @params cluster_sizes: specific for the representatives case. Each element of this array holds the
    size of its cluster. 
        
    """
    keep_remarks = options.get_value("keep_remarks", default_value = "NONE")
    add_source_details = options.get_value("add_source_details", default_value = False)

    file_handler_out = open(out_pdb_name, "w")
    
    data = data_handler.get_data()
    
    merged_structure = data.get_all_elements()
    
    file_handler_out.write("REMARK 000 File created using Prody and pyProCT\n")
    
    if not keep_remarks == "NONE" and not add_source_details:
        prody.writePDBStream(file_handler_out, merged_structure, csets =  elements)
    else:
        all_remarks = filter_remarks(data.get_all_remarks(), subset= keep_remarks)
        all_model_numbers = data.get_all_model_numbers()
        
        current_model = 0
        for i, element_id in enumerate(elements): 
            if keep_remarks:
                remarks = all_remarks[element_id]
                file_handler_out.write("".join(remarks))
            
            if add_source_details:
                model_number = all_model_numbers[element_id]
                conf_source = data_handler.get_source_of_element(element_id).get_path()
                file_handler_out.write("REMARK 000  source            : %s\n"%conf_source)
                file_handler_out.write("REMARK 000  original model nr : %d\n"%model_number)
                file_handler_out.write("REMARK 000  cluster id : %s\n"%ids[i])
                file_handler_out.write("REMARK 000  cluster element : %d\n"%element_id)
                if cluster_sizes is not None:
                    file_handler_out.write("REMARK 000  cluster population : %s\n"%(cluster_sizes[i]))
            
            file_handler_out.write("MODEL"+str(current_model).rjust(9)+"\n")
            pdb_handler = cStringIO.StringIO()
            prody.writePDBStream(pdb_handler, merged_structure, csets=  element_id)
            # skip the first remark if any
            lines = filter(lambda line: line[0:6]!="REMARK" and line[0:5]!="MODEL" and line[0:6]!="ENDMDL", 
                           pdb_handler.getvalue().splitlines(True))
            pdb_handler.close()
            file_handler_out.write("".join(lines))
            file_handler_out.write("ENDMDL\n")
            current_model+=1

    file_handler_out.close()
示例#10
0
    def alignment_monstrosity(self,
                              rmsd_cutoff=0.5,
                              use_local_pdb_database=False,
                              verify_substructure=True):
        """
        Consequences of not thinking ahead...
        For each fragment, align all fragment-containing ligands to fragment
        Generate PDBs with aligned coordinate systems
        :param args:
        :param rmsd_cutoff: fragment alignment RMSD cutoff, anything higher gets rejected
        :return:
        """

        # Create directory for processed PDBs
        rejected_dict = self.load_previously_rejected_pdbs()

        # Create directories...
        if not use_local_pdb_database:
            os.makedirs(self.pdb_bank_dir, exist_ok=True)
        os.makedirs(self.processed_PDBs_path, exist_ok=True)

        # If use_local_pdb_database=False, use PDB FTP to download all structures
        # Otherwise, all relevant structures should be found in the local PDB database
        if not use_local_pdb_database:
            prody.pathPDBFolder(folder=self.pdb_bank_dir)

            for current_fragment in self.pdb_ligand_json:

                # Only download PDBs that aren't already in PDB bank directory
                existing_PDBs = [
                    pdb[:4].lower() for pdb in os.listdir(self.pdb_bank_dir)
                ]
                PDBs_to_download = list(
                    set(self.pdb_ligand_json[current_fragment]['PDBs']) -
                    set(existing_PDBs))

                if len(PDBs_to_download) > 0:
                    print(f'Downloading PDBs for {current_fragment}...\n')
                    prody.fetchPDBviaFTP(*PDBs_to_download)
                else:
                    print(
                        f'All relevant PDBs for {current_fragment} found in {self.pdb_bank_dir}!\n'
                    )

        # Fragment_1, Fragment_2, ...
        for current_fragment in self.pdb_ligand_json:

            # Create directory for processed PDBs
            processed_dir = os.path.join(self.processed_PDBs_path,
                                         current_fragment)
            processed_dir_exists = os.path.exists(processed_dir)
            os.makedirs(processed_dir, exist_ok=True)

            # Get list of already processed PDBs for current_fragment
            already_processed_pdbs = [
                file[:4].lower() for file in os.listdir(processed_dir)
            ]

            # Save ideal_ligand_containers for each fragment so things are only downloaded once
            ideal_ligand_dict = dict()
            ideal_ligand_dict['Ligands'] = dict()
            ideal_ligand_dict['Failed'] = list()

            # Align_PDB class holds all information for the current fragment
            align = Align_PDB(self.user_defined_dir,
                              current_fragment,
                              self.sanitized_smiles_dict[current_fragment],
                              verify_substructure=verify_substructure)

            # Get PDB IDs that are viable for extracting protein-fragment contacts
            reject_pdbs = rejected_dict[
                current_fragment] if current_fragment in rejected_dict.keys(
                ) else list()
            if not processed_dir_exists:
                reject_pdbs = list()
            reject_pdbs.append('3k87')  # DEBUGGING

            viable_pdbs = list(
                set(self.pdb_ligand_json[current_fragment]['PDBs']) -
                set(reject_pdbs) - set(already_processed_pdbs))

            # For each PDB containing a fragment-containing compound
            for pdbid in viable_pdbs:

                # Return path of PDB file to use for processing
                found_pdb, pdb_path = self.return_PDB_to_use_for_alignments(
                    pdbid, use_local_pdb_database=use_local_pdb_database)

                if not found_pdb:
                    print(f'Cannot find {pdbid}!')
                    continue

                # Proceed with processing if the current PDB passes all filters
                print("\n\nProcessing {}...".format(pdbid))

                # --- Check which ligands contain relevant fragments --- #

                relevant_ligands = self.return_substructure_containing_ligands(
                    pdb_path, self.pdb_ligand_json, current_fragment)

                # Set things up! Get ligands from Ligand Expo if haven't already tried and failed
                for ligand in relevant_ligands:

                    if not ideal_ligand_dict['Ligands'].get(
                            ligand
                    ) and ligand not in ideal_ligand_dict['Failed']:
                        ideal_ligand_container = Ideal_Ligand_PDB_Container(
                            ligand)

                        if ideal_ligand_container.success:
                            ideal_ligand_dict['Ligands'][
                                ligand] = ideal_ligand_container
                        else:
                            ideal_ligand_dict['Failed'].append(ligand)

                # Create a temp list for ligands that will be pulled from the current PDB
                ligand_container_dict_for_current_pdb = {
                    lig: ideal_ligand_dict['Ligands'][lig]
                    for lig in ideal_ligand_dict['Ligands']
                    if lig in relevant_ligands
                }
                relevant_ligands_prody_dict = align.extract_ligand_records(
                    pdb_path, ligand_container_dict_for_current_pdb)

                # Reject if no ligands with all atoms represented can be found for the given PDB
                if len(relevant_ligands_prody_dict) < 1:
                    if current_fragment in rejected_dict.keys():
                        rejected_dict[current_fragment].append(pdbid)
                    else:
                        rejected_dict[current_fragment] = [pdbid]
                    print(
                        'REJECTED - no target ligands were fully represented in the PDB'
                    )
                    continue

                # --- Perform alignment of PDB fragment substructure (mobile) onto defined fragment (target) --- #

                # ...if PDB has not been processed, rejected, or excluded by the user

                else:

                    # Iterate over ligands found to contain fragments as substructures
                    for ligand_resname, ligand_chain, ligand_resnum in relevant_ligands_prody_dict:

                        # Mapping of fragment atoms to target ligand atoms
                        target_ligand_ideal_smiles = ligand_container_dict_for_current_pdb[
                            ligand_resname].smiles

                        # todo: catch ligands with missing SMILES strings earlier...
                        if target_ligand_ideal_smiles is None:
                            continue

                        target_ligand_pdb_string = io.StringIO()
                        target_ligand_prody = relevant_ligands_prody_dict[(
                            ligand_resname, ligand_chain,
                            ligand_resnum)].select('not hydrogen')
                        prody.writePDBStream(target_ligand_pdb_string,
                                             target_ligand_prody)

                        mapping_successful, fragment_target_map = align.fragment_target_mapping(
                            target_ligand_ideal_smiles,
                            target_ligand_pdb_string)

                        if not mapping_successful:
                            if current_fragment in rejected_dict.keys():
                                rejected_dict[current_fragment].append(pdbid)
                            else:
                                rejected_dict[current_fragment] = [pdbid]
                            print(
                                'REJECTED - failed atom mapping between target and reference fragment'
                            )
                            continue

                        print(
                            f'\n{len(fragment_target_map)} possible mapping(s) of fragment onto {pdbid}:{ligand} found...\n'
                        )

                        # Iterate over possible mappings of fragment onto current ligand
                        rmsd_success = False
                        for count, mapping in enumerate(fragment_target_map):

                            # todo: refactor to use RDKit's atom.GetMonomerInfo() for atom selections...
                            # Determine translation vector and rotation matrix
                            target_coords_and_serials, frag_atom_coords, transformation_matrix = align.determine_rotation_and_translation(
                                mapping, target_ligand_prody)
                            trgt_atom_coords, target_fragment_atom_serials = target_coords_and_serials

                            # Apply transformation to protein_ligand complex if rmsd if below cutoff
                            # Use information from PubChem fragment SMILES in determining correct mappings
                            # Actually, map fragment onto source ligand and use valence information to determine correct mappings
                            rmsd = prody.calcRMSD(
                                frag_atom_coords,
                                prody.applyTransformation(
                                    transformation_matrix, trgt_atom_coords))
                            print(
                                'RMSD of target onto reference fragment:\t{}'.
                                format(rmsd))

                            if rmsd < rmsd_cutoff:
                                transformed_pdb = align.apply_transformation(
                                    pdb_path, ligand_resnum,
                                    target_fragment_atom_serials,
                                    transformation_matrix)

                                # Continue if transformed_pdb - ligand is None
                                if transformed_pdb.select(
                                        f'not (resname {ligand_resname})'
                                ) is None:
                                    continue

                                transformed_pdb_name = f'{pdbid}_{ligand_resname}_{ligand_chain}_{ligand_resnum}-{count}.pdb'
                                prody.writePDB(
                                    os.path.join(processed_dir,
                                                 transformed_pdb_name),
                                    transformed_pdb)
                                rmsd_success = True

                            else:
                                print(
                                    'REJECTED - high RMSD upon alignment to reference fragment'
                                )

                        if rmsd_success is False:
                            if current_fragment in rejected_dict.keys():
                                rejected_dict[current_fragment].append(pdbid)
                            else:
                                rejected_dict[current_fragment] = [pdbid]

        # Remember rejected PDBs
        with open(self.rejected_dict_pickle, 'wb') as reject_pickle:
            pickle.dump(rejected_dict, reject_pickle)