def test_pdb_to_xyzr(self):
        """Test generation of xyzr (atomic radii) file."""
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", PDBConstructionWarning)
            p = PDBParser(PERMISSIVE=1)
            structure = p.get_structure("example", "PDB/1A8O.pdb")

        # Read radii produced with original shell script
        with open("PDB/1A8O.xyzr") as handle:
            msms_radii = []
            for line in handle:
                fields = line.split()
                radius = float(fields[3])
                msms_radii.append(radius)

        model = structure[0]
        biopy_radii = []
        for atom in model.get_atoms():
            biopy_radii.append(_get_atom_radius(atom, rtype="united"))
        self.assertEqual(msms_radii, biopy_radii)
示例#2
0
def aa_seq_from_coords(pdb_file):
    """
    Gets the sequence of each protein chain from the ATOM section of a PDB 
    file. Any residue with a coordinate will be shown in the sequence, but any

    :return: 
        A dictionary mapping each protein chain to its sequence from the
        pdb file, for each residue that has at least one coordinate.
    :rtype: defaultdict(str)
    """
    p = PDBParser()
    file_name = filename_no_extension(pdb_file)
    structure = p.get_structure(file_name, pdb_file)

    seqs = {}
    for chain in structure.get_chains():
        seq3letters = ''.join(
            [res.get_resname() for res in chain.get_residues()])
        seqs[chain.get_id()] = seq1(seq3letters, undef_code='')
    return seqs
示例#3
0
def load_pdb(fileName, ident=None):
	"""
	Read PDB from file into biopython structure object

	Parameters
	----------
	fileName : str
		the path to the file
	ident : str (optional)
		the desired identity of the structure object

	Returns
	-------
	values : :class:`paramagpy.protein.CustomStructure`
		a structure object containing the atomic coordinates
	"""
	if not ident:
		ident = fileName
	parser = PDBParser(structure_builder=CustomStructureBuilder())
	return parser.get_structure(ident, fileName)
    def GetUniqueChains(self, pdir, pdbID, chains_to_check):
        """ Returns a List Unique Chains based on the C-alpha atom information.
			Structure based, not sequence based """
        e = 'pdb' + self.pdbID + '.ent'
        BioParser = PDBParser(PERMISSIVE=True, QUIET=True)
        BioStructure = BioParser.get_structure(
            self.pdbID, pdir + 'pdb' + self.pdbID + '.ent')
        BioModel = BioStructure[0]
        Chain_AtomSeq = []
        listMatches = []
        for item in chains_to_check:
            pdbid_chain = e[3:7] + '_' + item
            BioChain = BioModel[item]
            residues = []
            for residue in BioChain:
                for atom in residue:
                    if atom.name == 'CA':
                        aa1 = amino_dict.replace_all(residue.resname,
                                                     amino_dict.one_letter)
                        residues.append(aa1)
            req_res = [x for x in residues if x in amino_dict.amino]
            atom = "".join(req_res)
            Chain_AtomSeq.append((pdbid_chain, atom))

        Chain_Dict = {}
        for k, v in Chain_AtomSeq:
            Chain_Dict.setdefault(k, v)
        # print (Chain_Dict)

        allChains = [i for i in Chain_Dict.values()]
        set_allChains = list(set(allChains))
        # print (set_allChains)
        groups = {}
        for k, v in Chain_Dict.items():
            groups.setdefault(v, []).append(k)
        matches = {k: v for k, v in groups.items()}
        list_of_matches = [i for i in matches.values()]
        # print (list_of_matches)
        listMatches.append(list_of_matches)
        req_matches = [i[0] for i in matches.values()]
        return sorted(req_matches), sorted(list_of_matches)
def main():
    """Extract the x, y coordinates for a list of residues from the given
    PDB file on the command line.

    The residues are specified by their residue sequence positions in a
    file given on the command line. One position per line. A PDB file from
    which to extract coordinates is required on the command line. The
    extracted x, y coordinates are written to the output file in the CSV
    format. One pair of coordinates per line.

    """
    # parse command-line arguments
    logging.info('Parsing command-line arguments ...')
    args = parse_cmd_args()

    # read in the list of residue IDs
    logging.info('Reading in the list of residue sequence positions from %s' %
                 args.input)
    with open(args.input, 'rt') as ipf:
        res_ids = [int(i.strip()) for i in ipf]

    # parse the PDB file
    logging.info('Parsing the PDB file %s' % args.pdb)
    pdb_parser = PDBParser()
    pdb_id = os.path.basename(args.pdb).split('.')[0]
    structure = pdb_parser.get_structure(id=pdb_id, file=args.pdb)
    model = structure[0]

    # get x, y coordinates of alpha carbon for each residue
    logging.info('Extracting x, y coordinates ...')
    xy_coords = []
    for i in res_ids:
        for r in model.get_residues():
            if r.get_id()[1] == i:
                xy_coords.append(r['CA'].coord[:2])

    # write to file
    logging.info('Writing extracted coordinates to %s' % args.output)
    with open(args.output, 'wt') as opf:
        all_coords_str = ['%.2f, %.2f' % tuple(coord) for coord in xy_coords]
        opf.write('\n'.join(all_coords_str))
示例#6
0
    def __init__(self, structure, name='structure', path='.'):
        try:
            from Bio.PDB import PDBParser, MMCIFParser
            from Bio.PDB.Entity import Entity
        except ModuleNotFoundError:
            raise ModuleNotFoundError(
                "BioPython is a required dependency for structure-related functions!"
            )

        if isinstance(structure, str):
            file_type = (str(structure).split('.')[-1]).lower()
            if file_type in ('pdb', 'ent'):
                # load a PDB file
                __parser = PDBParser(PERMISSIVE=1, QUIET=True)
                self.structure = __parser.get_structure(
                    name, os.path.join(path, structure))
            elif file_type == 'cif':
                # load MMCIF file
                __parser = MMCIFParser(QUIET=True)
                self.structure = __parser.get_structure(
                    name, os.path.join(path, structure))
            else:
                raise ValueError(
                    "Unknown filetype for structure file name: {}".format(
                        structure))
        elif isinstance(structure, Entity):
            # use structure as-is
            self.structure = structure
        else:
            raise ValueError(
                "Unknown type for input argument 'structure': {}".format(
                    str(structure)))

        # properties
        self.name = name

        # cachable properties
        self.cache = {}
        self._atom_KDTree = None
        self._atom_list = None
        self._surface_residues = None
示例#7
0
def dlSortedStrucs(prots: pd.DataFrame) -> str:
    '''
    Downloads a set of structures from the above query using the PDB_dl_dir.
    '''
    # check is the prots df is empty, if it is exit the function
    if prots.empty:
        return

    now = datetime.datetime.now()

    def now_dir_ts():
        '''
        Computes the timestamp for "now", when the query is called
        '''
        now_ts = str(now.year) + "_" + str(now.month) + "_" + str(
            now.day) + "_" + str(now.hour) + "_" + str(now.minute) + "_" + str(
                now.second)
        return now_ts

    now = now_dir_ts()  # get the time

    PDB_dl_dir = "ds_" + now  # make the timestamp, save to the class variable

    parser = PDBParser()  # create a parser
    pdbl = PDBList()

    # Download all PDB structures in the previous list if they aren't there
    for pdbid in prots[0]:  # index the zeroth col
        pdbl.retrieve_pdb_file(
            pdb_code=pdbid, file_format='pdb',
            pdir=PDB_dl_dir)  # Retrieve in PDB format, put in directory 'PDB'

    print('\n#############~DOWNLOAD COMPLETE~#############\n'
          )  # Finished, print "Downloading ... finished!"

    for file in os.scandir(PDB_dl_dir):
        if (file.path.endswith(".ent") and file.is_file()):
            newfn = file.name.replace("pdb", "").replace(".ent", ".pdb")
            os.rename(file, PDB_dl_dir + "/" + newfn)

    return
示例#8
0
def build_complex(file_1, file_2):
    """
    This function takes the complex output file (or in the first iteration one of the pairwise interactions)
    and another pairwise interaction PDB complex. Then it tries to add the chain to the complex until there is not clash

    @ Input - Two file path for a PDB interactions.
    @ Output - File path of the complex PDB file / Error: Chain cannot be added.
    """

    parser = PDBParser(PERMISSIVE=1)

    structure_1 = parser.get_structure('Complex', file_1)
    structure_2 = parser.get_structure('Complex', file_2)

    sup = Superimposer()
    io = PDBIO()

    atoms_fixed, atoms_moving = Compute_equal_chain(structure_1, structure_2)

    try:
        sup.set_atoms(atoms_fixed, atoms_moving)
    except:
        return False

    sup.apply(list(structure_2.get_atoms()))

    for chain in structure_2[0].get_chains():
        if chain.id != list(atoms_moving)[0].get_full_id()[2]:
            moved_chain = chain

    if check_clash(structure_1, moved_chain):
        with open(file_1, "wt") as out_file:

            for model in list(structure_1.get_chains()) + [moved_chain]:
                io.set_structure(model)
                io.save(out_file)

        rename_complex_chains(file_1)

        return True
    return False
示例#9
0
 def test_model_numbering(self):
     """Preserve model serial numbers during I/O."""
     def confirm_numbering(struct):
         self.assertEqual(len(struct), 20)
         for idx, model in enumerate(struct):
             self.assertTrue(model.serial_num, idx + 1)
             self.assertTrue(model.serial_num, model.id + 1)
     parser = PDBParser()
     struct1 = parser.get_structure("1mot", "PDB/1MOT.pdb")
     confirm_numbering(struct1)
     # Round trip: serialize and parse again
     io = PDBIO()
     io.set_structure(struct1)
     filenumber, filename = tempfile.mkstemp()
     os.close(filenumber)
     try:
         io.save(filename)
         struct2 = parser.get_structure("1mot", filename)
         confirm_numbering(struct2)
     finally:
         os.remove(filename)
def pdb2fasta(pdb_file, num_chains=None):
    """Converts a PDB file to a fasta formatted string using its ATOM data"""
    pdb_id = basename(pdb_file).split('.')[0]
    parser = PDBParser()
    structure = parser.get_structure(pdb_id, pdb_file)

    real_num_chains = len([0 for _ in structure.get_chains()])
    if num_chains is not None and num_chains != real_num_chains:
        print('WARNING: Skipping {}. Expected {} chains, got {}'.format(
            pdb_file, num_chains, real_num_chains))
        return ''

    fasta = ''
    for chain in structure.get_chains():
        id_ = chain.id
        seq = seq1(''.join([residue.resname for residue in chain]))
        fasta += '>{}:{}\t{}\n'.format(pdb_id, id_, len(seq))
        max_line_length = 80
        for i in range(0, len(seq), max_line_length):
            fasta += f'{seq[i:i + max_line_length]}\n'
    return fasta
 def test_2(self):
     """Parse the header of another PDB file (2BEG)."""
     parser = PDBParser()
     struct = parser.get_structure("2BEG", "PDB/2BEG.pdb")
     known_strings = {
         "author":
         "T.Luhrs,C.Ritter,M.Adrian,D.Riek-Loher,B.Bohrmann,H.Dobeli,D.Schubert,R.Riek",
         "deposition_date": "2005-10-24",
         "head": "protein fibril",
         "journal":
         "AUTH   T.LUHRS,C.RITTER,M.ADRIAN,D.RIEK-LOHER,B.BOHRMANN,AUTH 2 H.DOBELI,D.SCHUBERT,R.RIEKTITL   3D STRUCTURE OF ALZHEIMER'S AMYLOID-{BETA}(1-42)TITL 2 FIBRILS.REF    PROC.NATL.ACAD.SCI.USA        V. 102 17342 2005REFN                   ISSN 0027-8424PMID   16293696DOI    10.1073/PNAS.0506723102",
         "journal_reference":
         "t.luhrs,c.ritter,m.adrian,d.riek-loher,b.bohrmann, h.dobeli,d.schubert,r.riek 3d structure of alzheimer's amyloid-{beta}(1-42) fibrils. proc.natl.acad.sci.usa v. 102 17342 2005 issn 0027-8424 16293696 10.1073/pnas.0506723102 ",
         "keywords":
         "alzheimer's, fibril, protofilament, beta-sandwich, quenched hydrogen/deuterium exchange, pairwise mutagenesis, protein fibril",
         "name": "3d structure of alzheimer's abeta(1-42) fibrils",
         "release_date": "2005-11-22",
         "structure_method": "solution nmr",
     }
     for key, expect in known_strings.items():
         self.assertEqual(struct.header[key].lower(), expect.lower())
示例#12
0
 def extract_structures(self, infolder):
     """
     takes all files from outfolder and stores in self.structures list of objects
     """
     os.system("python Modules/Trans/Pyry_cleanPDB.py -q -d " +
               str(infolder))
     #run_cleanPDB(str(infolder), str(infolder)+"bla")
     pdb_files = glob.glob(str(infolder) + '/*.pdb')
     if len(pdb_files) == 0:
         raise PyRy3D_IG_Error("The files you provided are not pdb files")
     for pdbfile in pdb_files:
         parser = PDBParser(PERMISSIVE=False, QUIET=True)
         structure = parser.get_structure(str(pdbfile), pdbfile)
         print pdbfile
         filename = pdbfile.split("\\")[1]
         struc = InStructure(structure, filename)
         if len(list(structure.get_residues())) == 0:
             raise PyRy3D_IG_Error(
                 "The file you provided for structure %s is not a valid pdb file"
                 % (structure.id))
         self.structures.append(struc)
 def create(self, pdb):
     """ Creates the amino acid network using biographs"""
     mol = bg.Pmolecule(pdb)
     self.net = mol.network(cutoff=self.cutoff, weight=True)
     self.structure = PDBParser().get_structure('X', pdb)[0]
     # if self.pos1 and self.pos2:
     #     for node in list(self.net.nodes):
     #         pos = int(node[1::])
     #         if pos not in range(self.pos1, self.pos2):
     #             self.net.remove_node(node)
     residues = []
     for residue in self.structure.get_residues():
         if residue.resname in self.three2one:
             residues.append(self.three2one[residue.resname])
         else:
             residues.append(residue.resname)
     old_labels = self.net.nodes
     labels = [a + b[1:] + ':' + b[0] for a, b in zip(residues, old_labels)]
     mapping = dict(zip(old_labels, labels))
     self.net = nx.relabel_nodes(self.net, mapping)
     return self.net
示例#14
0
def split_pdb_by_chain(pdb_id):
    if not os.path.isdir("pdb_chains/" + pdb_id.upper()):
        os.mkdir("pdb_chains/" + pdb_id.upper())
    actual_pdbfile = PDBParser().get_structure(
        pdb_id, "ent_files/pdb" + pdb_id.lower() + ".ent")
    return_dict = dict()
    for model in actual_pdbfile:
        for chain in model:
            outfilename = pdb_id.upper() + "-" + str(
                model.get_id() + 1) + "_" + str(chain.get_id()) + ".pdb"
            if not os.path.isfile("pdb_chains/" + pdb_id.upper() + "/" +
                                  outfilename):
                io = PDBIO()
                io.set_structure(chain)
                io.save("pdb_chains/" + pdb_id.upper() + "/" + outfilename)
            ppb = PPBuilder().build_peptides(chain)
            this_seq = Seq("", generic_protein)
            for pp in ppb:
                this_seq += pp.get_sequence()
            return_dict[outfilename] = this_seq
    return return_dict
示例#15
0
文件: pdb.py 项目: xiangf/conkit
    def read(self, f_handle, f_id="pdb", distance_cutoff=8, atom_type='CB'):
        """Read a contact file

        Parameters
        ----------
        f_handle
           Open file handle [read permissions]
        f_id : str, optional
           Unique contact file identifier
        distance_cutoff : int, optional
           Distance cutoff for which to determine contacts [default: 8]
        atom_type : str, optional
           Atom type between which distances are calculated [default: CB]

        Returns
        -------
        :obj:`ContactFile <conkit.core.contactfile.ContactFile>`

        """
        structure = PDBParser(QUIET=True).get_structure("pdb", f_handle)
        return self._read(structure, f_id, distance_cutoff, atom_type)
示例#16
0
def get_mol_from_ligandpdb(ligand):
	if not os.path.exists('./pdb_files/'+ligand+'_ideal.pdb'):
		return None, None, None
	name_order_list = []
	name_to_idx_dict, name_to_element_dict = {}, {}
	p = PDBParser()
	structure = p.get_structure(ligand, './pdb_files/'+ligand+'_ideal.pdb')
	for model in structure:
		for chain in model:
			chain_id = chain.get_id()
			for res in chain:
				if ligand == res.get_resname():
					#print(ligand,res.get_resname(),res.get_full_id())
					for atom in res:
						name_order_list.append(atom.get_id())
						name_to_element_dict[atom.get_id()] = atom.element
						name_to_idx_dict[atom.get_id()] = atom.get_serial_number()-1
	#print('check', name_to_idx_dict.items())
	if len(name_to_idx_dict) == 0:
		return None, None, None
	return name_order_list, name_to_idx_dict, name_to_element_dict
 def setUp(self):
     pdb_filename = "PDB/a_structure.pdb"
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", PDBConstructionWarning)
         structure = PDBParser(PERMISSIVE=True).get_structure(
             "X", pdb_filename)
     structure[1].detach_child("B")
     self.model = structure[1]
     # Look at first chain only
     a_residues = list(self.model["A"].child_list)
     self.assertEqual(86, len(a_residues))
     self.assertEqual(a_residues[0].get_resname(), "CYS")
     self.assertEqual(a_residues[1].get_resname(), "ARG")
     self.assertEqual(a_residues[2].get_resname(), "CYS")
     self.assertEqual(a_residues[3].get_resname(), "GLY")
     # ...
     self.assertEqual(a_residues[-3].get_resname(), "TYR")
     self.assertEqual(a_residues[-2].get_resname(), "ARG")
     self.assertEqual(a_residues[-1].get_resname(), "CYS")
     self.a_residues = a_residues
     self.radius = 13.0
示例#18
0
def get_atoms(pdb_file):
    """Returns an array with the atoms of the pdb file specified
    
    ######################################################################################
    #                                                                                    #
    #  This function makes use of the Biopython module, so it will not work if this      #
    #  module is not installed.                                                          #
    #  This function reads a pdb file and extracts only the atoms of the first chain.    #
    #                                                                                    #
    ######################################################################################
    """
    parser = PDBParser()
    structure = parser.get_structure(pdb_file[:-4], pdb_file)
    atms = structure.get_atoms()
    atoms = []
    for a in atms:
        atoms.append(a.get_name())
    if len(atoms) == 0:
        print('404 - No Atoms Found!!!')
        raise NoProtError('404 - No Atoms Found!!!')
    return atoms
示例#19
0
文件: stats.py 项目: sienkie/cgRNA
def get_bonds(dir, atoms_list):
    p = PDBParser()
    pdbs = glob.glob(dir + '/*', recursive=True)
    points = []

    for struct in pdbs:
        print(struct)
        structure = p.get_structure(struct.split('/')[1].split('.')[0], struct)
        for chain in structure[0]:  # model1
            i = 0
            coords = []
            for residue in chain:
                for atom in residue:
                    if atom.get_id() == atoms_list[i]:
                        coords.append(list(atom.get_vector()))
                        i = (i + 1) % 2
            coords = coords[:int(len(coords) / 2) * 2]
            coords = [coords[x:x + 100] for x in range(0, len(coords), 2)]
            for duo in coords:
                points.append(bond_length(duo))
    return points
示例#20
0
def check_NMR():
    from Bio.PDB import PDBList, PDBParser
    import pandas as pd
    import os

    pdb_list = pd.read_csv('pdb_no_missing_residue.csv')['pdb'].values
    method_list = []
    for pdb_id in pdb_list:
        if os.path.exists(f'pdbs/pdb{pdb_id[:4].lower()}.ent'):
            continue
        pdbl = PDBList()
        pdbl.retrieve_pdb_file(pdb_id[:4], pdir='./pdbs', file_format='pdb')
        if not os.path.exists(f'pdbs/pdb{pdb_id[:4].lower()}.ent'):
            method_list.append('no pdb')
            continue
        p = PDBParser()
        structure = p.get_structure('X', f'./pdbs/pdb{pdb_id[:4].lower()}.ent')
        method_list.append(structure.header['structure_method'])

    df = pd.DataFrame({'pdb': pdb_list, 'method': method_list})
    df.to_csv('pdbs_methods.csv', index=False)
示例#21
0
def get_around_residue(listresids, pdb, chainid, cutoff):
    """ Returns list of residues within cutoff distance of the one specified, and the number of residues in the chain_id specified """

    structure = PDBParser(QUIET=True).get_structure('X', pdb)
    chain = structure[0][str(chainid)]
    center_residues = [chain[resi] for resi in listresids]

    center_atoms = Selection.unfold_entities(center_residues, str(chainid))
    atom_list = [
        atom for atom in structure[0][str(chainid)].get_atoms()
        if atom.name == 'CA'
    ]

    ns = NeighborSearch(atom_list)
    nearby_residues = {
        res
        for center_atom in center_atoms
        for res in ns.search(center_atom.coord, float(cutoff), 'R')
    }

    return sorted(res.id[1] for res in nearby_residues), len(atom_list)
示例#22
0
def check_file_format(pdb_file: Union[str, Path], make_parser: bool = False):
    pdb_file = Path(pdb_file)
    file_format = ''
    code = pdb_file.stem
    # TODO: reliable check needs to peek into the file
    if pdb_file.stem.startswith('pdb') and pdb_file.suffix == '.ent':
        code = code[3:]
        file_format = 'pdb'
    elif pdb_file.suffix == '.cif':
        file_format = 'mmcif'

    if make_parser:
        if file_format == 'pdb':
            parser = PDBParser(PERMISSIVE=True, QUIET=True)
        elif file_format == 'mmcif':
            parser = MMCIFParser(QUIET=True)
        else:
            raise ValueError(f'parser does not support the file format: {str(pdb_file)}')
        return file_format, code, parser
    else:
        return file_format, code
示例#23
0
def extract_beads(pdb_file):
    """
    convert PDB to pandas dataframe
    :param pdb_file:
    :return:
    """
    amino_acids = pd.read_csv('data/amino_acids.csv')
    vocab_aa = [x.upper() for x in amino_acids.AA3C]

    p = PDBParser()
    structure = p.get_structure('X', f'data/dock/pdb/{pdb_file}.pdb')
    residue_list = Selection.unfold_entities(structure, 'R')

    bead_center_list = []
    res_name_list = []
    res_num_list = []
    chain_list = []

    for res in residue_list:
        if res.get_resname() not in vocab_aa:
            # raise ValueError('protein has non natural amino acids')
            continue
        chain_list.append(res.parent.id)
        res_name_list.append(res.get_resname())
        res_num_list.append(res.id[1])
        bead_center = get_bead_center(res)
        bead_center_list.append(bead_center)

    g_center = np.vstack(bead_center_list)

    df = pd.DataFrame({
        'chain_id': chain_list,
        'group_num': res_num_list,
        'group_name': res_name_list,
        'x': g_center[:, 0],
        'y': g_center[:, 1],
        'z': g_center[:, 2]
    })

    df.to_csv(f'data/dock/beads/{pdb_file}_bead.csv', index=False)
示例#24
0
    def testBioPDB(self):
        try:
            from Bio.PDB import PDBParser
        except ImportError:
            print("Can't import Bio.PDB, tests skipped")
            pass
        else:
            parser = PDBParser(QUIET=True)
            bp_structure = parser.get_structure("Ubiquitin","lib/tests/data/1a0q.pdb")
            s1 = structureFromBioPDB(bp_structure)
            s2 = Structure("lib/tests/data/1a0q.pdb")
            self.assertTrue(s1.nAtoms() == s2.nAtoms())

            # make sure we got the insertion code
            self.assertEqual(s1.residueNumber(2286), '82A')

            for i in range(0, s2.nAtoms()):
                self.assertTrue(s1.radius(i) == s2.radius(i))

                # there can be tiny errors here
                self.assertTrue(math.fabs(s1.coord(i)[0] - s2.coord(i)[0]) < 1e-5)
                self.assertTrue(math.fabs(s1.coord(i)[1] - s2.coord(i)[1]) < 1e-5)
                self.assertTrue(math.fabs(s1.coord(i)[2] - s2.coord(i)[2]) < 1e-5)

                # whitespace won't match
                self.assertIn(s1.residueNumber(i), s2.residueNumber(i))

            # because Bio.PDB structures will have slightly different
            # coordinates (due to rounding errors) we set the
            # tolerance as high as 1e-3
            result = calc(s1, Parameters({'algorithm' : LeeRichards, 'n-slices' : 20}))
            self.assertTrue(math.fabs(result.totalArea() - 18923.280586) < 1e-3)
            sasa_classes = classifyResults(result, s1)
            self.assertTrue(math.fabs(sasa_classes['Polar'] - 9143.066411) < 1e-3)
            self.assertTrue(math.fabs(sasa_classes['Apolar'] - 9780.2141746) < 1e-3)

            result, sasa_classes = calcBioPDB(bp_structure, Parameters({'algorithm' : ShrakeRupley}))
            self.assertTrue(math.fabs(result.totalArea() - 18908.900192) < 1e-3)
            self.assertTrue(math.fabs(sasa_classes['Polar'] - 9120.7423269) < 1e-3)
            self.assertTrue(math.fabs(sasa_classes['Apolar'] - 9788.157865) < 1e-3)
示例#25
0
def get_dfrominterface(pdb_fh):
    """
    This calculates distances between each ligand atom or optionally provided amino acids (sources) and each residue in the protein.
    
    :param pdb_fh: path to .pdb file.
    :returns dinter: pandas table with distances from dimer interface
    """
    junk_residues = [
        "HOH", " MG", "CA", " NA", "SO4", "IOD", "NA", "CL", "GOL", "PO4"
    ]
    pdb_parser = PDBParser()
    pdb_data = pdb_parser.get_structure("pdb_name", pdb_fh)
    model = pdb_data[0]

    if len(model.child_dict) == 2:
        chainA = model["A"]  #only a chain
        chainB = model["B"]  #only a chain

        def get_resobjs(chainA):
            ligands_residue_objs = []
            for residue in chainA:
                if not residue.get_resname() in junk_residues:
                    if residue.get_resname() in aas_21_3letter:  #only aas
                        ligands_residue_objs.append(residue)
            return ligands_residue_objs

        chainA_resobjs = get_resobjs(chainA)
        chainB_resobjs = get_resobjs(chainB)

        resobjs_tups = zip(chainA_resobjs, chainB_resobjs)

        dfrominter = pd.DataFrame(columns=['Distance from dimer interface'])
        for tup in resobjs_tups:
            resA = tup[0]
            resB = tup[1]
            if resA.get_id()[1] == resB.get_id()[1]:
                dfrominter.loc[resA.get_id()[1],'Distance from dimer interface']=\
                (resA['CA']-resB['CA'])/2
        dfrominter.index.name = 'refi'
        return dfrominter
def protein_dist_angle_matrix(pdb_file, mask=None):
    p = PDBParser()
    file_name = splitext(basename(pdb_file))[0]
    structure = p.get_structure(file_name, pdb_file)
    residues = [r for r in structure.get_residues()]

    def get_cb_or_ca_coord(residue):
        if 'CB' in residue:
            return residue['CB'].get_coord()
        elif 'CA' in residue:
            return residue['CA'].get_coord()
        else:
            return [0, 0, 0]

    def get_atom_coord(residue, atom_type):
        if atom_type in residue:
            return residue[atom_type].get_coord()
        else:
            return [0, 0, 0]

    cb_ca_coords = torch.tensor([get_cb_or_ca_coord(r) for r in residues])
    ca_coords = torch.tensor([get_atom_coord(r, 'CA') for r in residues])
    cb_coords = torch.tensor([get_atom_coord(r, 'CB') for r in residues])
    n_coords = torch.tensor([get_atom_coord(r, 'N') for r in residues])

    cb_mask = torch.ByteTensor([1 if sum(_) != 0 else 0 for _ in cb_coords])
    if mask is None:
        mask = torch.ByteTensor([1] * len(cb_coords))

    output_matrix = torch.stack([
        generate_dist_matrix(cb_ca_coords, mask=mask),
        generate_cb_cb_dihedral(ca_coords, cb_coords, mask=(mask & cb_mask)),
        generate_ca_cb_dihedral(ca_coords,
                                cb_coords,
                                n_coords,
                                mask=(mask & cb_mask)),
        generate_ca_cb_cb_planar(ca_coords, cb_coords, mask=(mask & cb_mask))
    ])

    return output_matrix
示例#27
0
    def test_1_warnings(self):
        """Check warnings: Parse a flawed PDB file in permissive mode.

        NB: The try/finally block is adapted from the warnings.catch_warnings
        context manager in the Python 2.6 standard library.
        """
        warnings.simplefilter('always', PDBConstructionWarning)
        try:
            # Equivalent to warnings.catch_warnings -- hackmagic
            orig_showwarning = warnings.showwarning
            all_warns = []

            def showwarning(*args, **kwargs):
                all_warns.append(args[0])

            warnings.showwarning = showwarning
            # Trigger warnings
            p = PDBParser(PERMISSIVE=True)
            p.get_structure("example", "PDB/a_structure.pdb")
            self.assertEqual(len(all_warns), 14)
            for wrn, msg in zip(all_warns, [
              # Expected warning messages:
              "Used element 'N' for Atom (name=N) with given element ''",
              "Used element 'C' for Atom (name=CA) with given element ''",
              "Atom names ' CA ' and 'CA  ' differ only in spaces at line 17.",
              "Used element 'CA' for Atom (name=CA  ) with given element ''",
              'Atom N defined twice in residue <Residue ARG het=  resseq=2 icode= > at line 21.',
              'disordered atom found with blank altloc before line 33.',
              "Residue (' ', 4, ' ') redefined at line 43.",
              "Blank altlocs in duplicate residue SER (' ', 4, ' ') at line 43.",
              "Residue (' ', 10, ' ') redefined at line 75.",
              "Residue (' ', 14, ' ') redefined at line 106.",
              "Residue (' ', 16, ' ') redefined at line 135.",
              "Residue (' ', 80, ' ') redefined at line 633.",
              "Residue (' ', 81, ' ') redefined at line 646.",
              'Atom O defined twice in residue <Residue HOH het=W resseq=67 icode= > at line 822.'
              ]):
                self.assertTrue(msg in str(wrn), str(wrn))
        finally:
            warnings.showwarning = orig_showwarning
示例#28
0
def pdb_sequence(pdb_file, id=None, method="order"):
    from Bio.PDB import PDBParser, CaPPBuilder
    from Bio.PDB.Polypeptide import three_to_one
    if id is None:
        id = util.make_id_from_file_name(pdb_file)
    parser = PDBParser()
    structure = parser.get_structure(id, pdb_file)
    seq_chains = []
    for chain in structure.get_chains():
        id_chain = chain.get_id()
        if method == "distance":
            ppb = CaPPBuilder()
            seq = sum((pp.get_sequence() for pp in ppb.build_peptides(chain)),
                      Seq("", IUPAC.protein))
            seq_spec = None  #TODO: implement
        elif method == "order":
            seq = []
            seq_spec = []
            for res in chain.get_residues():
                seq.append(three_to_one(res.get_resname()))
                ## from Bio docs, res.get_full_id() returns: ("1abc", 0, "A", (" ", 10, "A"))
                fid = res.get_full_id()
                seq_spec.append(
                    pdb_seq_spec(chain=fid[-2].strip(),
                                 resn=res.get_resname(),
                                 resi=fid[-1][-2],
                                 ins=fid[-1][-1].strip()))

            seq = Seq("".join(seq), IUPAC.protein)
        else:
            raise ValueError("Unknown method: {}".format(method))

        seq_chains.append(
            dict(id_chain=id_chain,
                 seq_rec=SeqRecord(seq,
                                   id="{}_{}".format(id, id_chain),
                                   description=""),
                 seq_spec=seq_spec))
        chains_map = dict(((x["id_chain"], x) for x in seq_chains))
    return pdb_seqs(id=id, chains=seq_chains, chains_map=chains_map)
示例#29
0
def SecStr(pdb_id, chain_id, start, stop):
    
    #Change pdb_id to lower cases - as in local pdb db. 
    pdb_id = pdb_id.lower()
    
    #Read pdb structure if it exists.
    p = PDBParser()
    try:
        structure = p.get_structure(pdb_id, f'/home/m.pak/pdb/pdb{pdb_id}.pdb')
    except FileNotFoundError:
        print(f'File not found, proceed...  {pdb_id}')
        return None, None, None
    model = structure[0]
    
    #Run DSSP.
    try:
        dssp = DSSP(model, f'/home/m.pak/pdb/pdb{pdb_id}.pdb')
    except:
        print(f'DSSP unable to process the structure {pdb_id}, proceed...')
        return None, None, None
    
    #Keep annotation of secondaty structure elements, Phi and Psi angles for defined region of structure.
    sec_str = ''
    phi_lst = []
    psi_lst = []
    
    #INCLUDES STOP!!!!
    for num in range(start, stop+1):
        try: 
            res_key = (chain_id, (' ', num, ' ')) #Can not deal with hetero-flag and insertion code
            res = dssp[res_key]
        except:
            print(f'{res_key} not found in {pdb_id}, proceed...')
            continue
        
        sec_str += res[2]
        phi_lst.append(res[4])
        psi_lst.append(res[5])
        
    return sec_str, phi_lst, psi_lst
示例#30
0
    def test_StructAlign(self):
        """Tests on module to align two proteins according to a FASTA alignment file."""
        p = PDBParser(QUIET=1)

        al_file = "PDB/alignment_file.fa"
        with open(al_file) as handle:
            records = AlignIO.read(handle, "fasta")

        s1 = p.get_structure("1", "PDB/2XHE.pdb")
        s2 = p.get_structure("2", "PDB/1A8O.pdb")
        m1 = s1[0]
        m2 = s2[0]
        al = StructureAlignment(records, m1, m2)
        self.assertNotEqual(al.map12, al.map21)
        self.assertTrue(len(al.map12), 566)
        self.assertTrue(len(al.map21), 70)
        chain1_A = m1["A"]
        chain2_A = m2["A"]
        self.assertEqual(chain1_A[202].get_resname(), "ILE")
        self.assertEqual(chain2_A[202].get_resname(), "LEU")
        self.assertEqual(chain1_A[291].get_resname(), chain2_A[180].get_resname())
        self.assertNotEqual(chain1_A[291].get_resname(), chain2_A[181].get_resname())