Python PDB示例，Bio.PDB Python示例

示例#1

0

显示文件

文件： compile_trimers2.py 项目： mcprentiss/rama_score

def calc_dihedral(chain, child_res_id):
    """
    calculates the dihedral angles (phi, psi) for residue of index
    child_res_id in chain
    
    returns a tuple of form (phi, psi), if it exists
    
    """
    from math import pi
    from Bio import PDB
    
    try:
        CP = chain.child_list[(child_res_id-1)]['C'].get_vector()
        N = chain.child_list[child_res_id]['N'].get_vector()
        CA = chain.child_list[child_res_id]['CA'].get_vector()
        C = chain.child_list[child_res_id]['C'].get_vector()
        NA = chain.child_list[(child_res_id+1)]['N'].get_vector()
    except KeyError:
        return () # no dihedral angles for corner residues or non-a.a.'residues'
    else:
        try:
            phi = PDB.calc_dihedral(CP, N, CA, C)*-180/pi
            psi = PDB.calc_dihedral(N, CA, C, NA)*-180/pi
            return (phi, psi)
        except ZeroDivisionError:
            return ()

示例#2

0

显示文件

文件： compile_trimers2.py 项目： mcprentiss/rama_score

def calc_all_dihedrals(chain, child_res_id):
    """
    calculates the dihedral angles (phi, psi, chia, chib) for residue of
    index child_res_id in chain
    
    returns a tuple of form (phi, psi, chia, chib), if it exists
    
    where ambiguity in chi angle definition exists:
    chia is in reference to the longer side chain or the heavier atom
    chib to the shorter
    
    if no ambiguity, chia=chib
    
    if residue is a GLY or ALA return only (phi, psi)
    """

    from math import pi
    from Bio import PDB
    try:
        residue = chain.child_list[child_res_id]
        name = residue.get_resname()
        if name == 'GLY' or name == 'ALA':
            dih = calc_dihedral(chain, child_res_id)
            phi = dih[0]
            psi = dih[1]
            return (phi, psi, 0, 0)          
        
    except KeyError:
        print "key error line 103"
        return () # no dihedral angles for corner residues or non-a.a.'residues'
    except IndexError:
        print 'IndexError line 106, probable cause: irregular PDB file'
        return ()     
    try:
        CP = chain.child_list[(child_res_id-1)]['C'].get_vector()
        N = chain.child_list[child_res_id]['N'].get_vector()
        CA = chain.child_list[child_res_id]['CA'].get_vector()
        C = chain.child_list[child_res_id]['C'].get_vector()
        NA = chain.child_list[(child_res_id+1)]['N'].get_vector()
        CB = chain.child_list[child_res_id]['CB'].get_vector()
        fourth_chi_atom = chain.child_list[child_res_id].child_list[5].get_vector()
        if name == 'VAL' or name == 'ILE' or name == 'THR':
            alt_fourth_chi_atom = chain.child_list[child_res_id].child_list[6].get_vector()
        else:
            alt_fourth_chi_atom = fourth_chi_atom
    except KeyError:
        print 'KeyError line 119'
        return () # no dihedral angles for corner residues or non-a.a.'residues'
    except IndexError:
        print 'IndexError line 122, probable cause: irregular PDB file'
        return ()
    else:
        try:
            phi = PDB.calc_dihedral(CP, N, CA, C)*-180/pi
            psi = PDB.calc_dihedral(N, CA, C, NA)*-180/pi
            chia= PDB.calc_dihedral(C, CA, CB, fourth_chi_atom)*-180/pi
            chib= PDB.calc_dihedral(C, CA, CB, alt_fourth_chi_atom)*-180/pi
            return (phi, psi, chia, chib)
        except ZeroDivisionError:
            return ()

示例#3

0

显示文件

文件： SB_Protein_part_exam_Rudolfs_Berzins_ckj464.py 项目： rudolfsberzins/Various_code_examples

                def calc_phi_psi(structure):
                    '''Function makes 3 lists of proteins C-alpha, C and N atom vectors. These lists are then used to calculate
                     dihedral angles of proteins.'''

                    atom_vector_list_Ca = []
                    atom_vector_list_N = []
                    atom_vector_list_C = []

                    # For-loop for acquiring atom vectors, but only for those residues which have a C-alpha atom.
                    for chain in structure.get_chains():
                        for res in chain:
                            if res.has_id('CA'):
                                for atom in res:
                                    if atom.get_name() == 'N':
                                        atom_vector_list_N.append(atom.get_vector())
                                    elif atom.get_name() == 'CA':
                                        atom_vector_list_Ca.append(atom.get_vector())
                                    elif atom.get_name() == 'C':
                                        atom_vector_list_C.append(atom.get_vector())
                                    else:
                                        pass

                    len_vec = 0

                    ### The if statement compares vector list length between C-alpha vector list and two others, if one of them is
                    ### shorter than C-alpha, possibly due to an error in the PDB structure, then the length vector which is
                    ### required for calculating dihedral angles is set to be C-alpha which is the same length as other vector lists
                    if len(atom_vector_list_Ca) > len(atom_vector_list_C) or len(atom_vector_list_Ca) > len(
                            atom_vector_list_N):
                        c_ca = len(atom_vector_list_Ca) - len(atom_vector_list_C)
                        n_ca = len(atom_vector_list_Ca) - len(atom_vector_list_N)
                        if c_ca == n_ca:
                            len_vec = len(atom_vector_list_Ca) - c_ca
                    else:
                        len_vec = len(atom_vector_list_Ca)

                    dihedral_phi = []
                    dihedral_psi = []

                    # So we don't include first amino acid which has no phi angle and last amino acid which has no psi angle!
                    cut_off = range(1, len_vec - 1)

                    # Calculation of phi angles!
                    for i in cut_off:
                        dihedral_phi.append(PDB.calc_dihedral(atom_vector_list_C[i - 1],
                                                              atom_vector_list_N[i],
                                                              atom_vector_list_Ca[i],
                                                              atom_vector_list_C[i]))

                    # Calculation of psi angles!
                    for i in cut_off:
                        dihedral_psi.append(PDB.calc_dihedral(atom_vector_list_N[i],
                                                              atom_vector_list_Ca[i],
                                                              atom_vector_list_C[i],
                                                              atom_vector_list_N[i + 1]))
                    return (dihedral_phi, dihedral_psi)

示例#4

0

显示文件

文件： pdb.py 项目： alisterburt/ABTT

def read(pdb_file):
    """
    reads a pdb file into a structure object
    :param pdb_file: pdb format file
    :return: structure
    """
    logging.info(f'reading pdb file: {pdb_file}')
    if not pdb_file.lower().endswith('.cif'):
        structure = PDB.PDBParser().get_structure(pdb_file, pdb_file)
    else:
        logging.info(f'switched to cif modus for file: {pdb_file}')
        structure = PDB.MMCIFParser().get_structure(pdb_file, pdb_file)

    return structure

示例#5

0

显示文件

def calc_ramachandran(file_name_list):
    """
    Main calculation and plotting definition
    :param file_name_list: List of PDB files to plot
    :return: Nothing
    """
    global RAMA_PREF_VALUES

    if RAMA_PREF_VALUES is None:
        RAMA_PREF_VALUES = _cache_RAMA_PREF_VALUES()

    # Read in the expected torsion angles
    normals = {}
    outliers = {}
    for key, val in RAMA_PREFERENCES.items():
        normals[key] = {"x": [], "y": []}
        outliers[key] = {"x": [], "y": []}

    # Calculate the torsion angle of the inputs
    for inp in file_name_list:
        if not os.path.isfile(inp):
            continue
        structure = PDB.PDBParser().get_structure('input_structure', inp)
        for model in structure:
            for chain in model:
                polypeptides = PDB.PPBuilder().build_peptides(chain)
                for poly_index, poly in enumerate(polypeptides):
                    phi_psi = poly.get_phi_psi_list()
                    for res_index, residue in enumerate(poly):
                        res_name = "{}".format(residue.resname)
                        res_num = residue.id[1]
                        phi, psi = phi_psi[res_index]
                        if phi and psi:
                            if str(poly[res_index + 1].resname) == "PRO":
                                aa_type = "PRE-PRO"
                            elif res_name == "PRO":
                                aa_type = "PRO"
                            elif res_name == "GLY":
                                aa_type = "GLY"
                            else:
                                aa_type = "General"
                            if RAMA_PREF_VALUES[aa_type][int(math.degrees(psi)) + 180][int(math.degrees(phi)) + 180] < \
                                    RAMA_PREFERENCES[aa_type]["bounds"][1]:
                                outliers[aa_type]["x"].append(math.degrees(phi))
                                outliers[aa_type]["y"].append(math.degrees(psi))
                            else:
                                normals[aa_type]["x"].append(math.degrees(phi))
                                normals[aa_type]["y"].append(math.degrees(psi))
    return normals, outliers

示例#6

0

显示文件

文件： process_pdb.py 项目： tayste5000/phipsidb

def get_dihedral( residue_list ):

	'''
	returns phi and psi angles of a residue and the amino acid sidechain present

	residue_list - []Bio.PDB.Residue - list of 3 *hopefully* continuous residues

	'''

	for one, two in zip( residue_list[:-1], residue_list[1:] ):

		if ( two.get_id()[1] - one.get_id()[1] ) != 1:

			raise BackboneError( "Discontinuous residues", two.get_id()[1] )

	atoms = (
		{"C": False},
		{"N": False,
		"CA": False,
		"C": False},
		{"N": False}
	)

	for i, residue in enumerate( residue_list ):

		if i == 1:

			res_name = SeqUtils.seq1( residue.get_resname() )

			if not is_aa( res_name ):

				raise BackboneError( "Not a valid amino acid", residue.get_id()[1] )

		for atom in residue.get_unpacked_list():

			if atom.name in atoms[i].keys():
				
				atoms[i][ atom.name ] = atom.get_vector()

	if False in map( check_dict, atoms ):

		raise BackboneError( "Missing backbone atoms", residue.get_id()[1] )

	dihedrals = [
		PDB.calc_dihedral( atoms[0]["C"], atoms[1]["N"], atoms[1]["CA"], atoms[1]["C"] ), #phi
		PDB.calc_dihedral( atoms[1]["N"], atoms[1]["CA"], atoms[1]["C"], atoms[2]["N"] ) #psi
	]

	return ( dihedrals, res_name )

示例#7

0

显示文件

def Separate_Chains(pdb_file):
    """Separate the two chains and return their name in a list
    Input: 
    -pdb file = target file 
    Output:
    -interaction = list with chain information
    """
    folder = "pdb_chains"

    if not Check_folder(folder):

        return False

    pdb_parser = pdb.PDBParser(PERMISSIVE=True, QUIET=True)

    pdb_structure = pdb_parser.get_structure("pdb_file", pdb_file)

    interaction = list(
        pdb_file[:-4].split("_")[-1]
    )  # Obtain 2 length lists with the chain names from file name, the order of the letters need match with the order in the pdb file(format= something_chains.pdb)

    if len(interaction) != 2:  #if the length is not true, something goes wrong

        print(settings.IncorrectName(interaction))

    for model in pdb_structure:
        for chain in model:

            id = chain.get_id()

            class chain(pdb.Select):
                def accept_chain(self, chain):
                    if chain.get_id() == id:
                        return True
                    else:
                        return False

            io = pdb.PDBIO()

            io.set_structure(pdb_structure)

            name = "%s_chain_%s.pdb" % (interaction[0] + interaction[1],
                                        interaction[i])

            file_name = os.path.join(folder, name)

            io.save(file_name, chain())

    return interaction

示例#8

0

显示文件

文件： input_output.py 项目： noelcjr/OIPD

def _read_structure(path, pdb_id='pdb', cif_id='cif'):
    file_name = os.path.basename(path).split('.')[0]
    file_sufix = os.path.basename(path).split('.')[1]
    dir_path = os.path.dirname(path)
    if file_sufix == 'pdb':
        parser = struct.PDBParser(QUIET=True)
        structure = parser.get_structure(pdb_id, path)
    elif file_sufix == 'cif':
        parser = struct.MMCIFParser()
        structure = parser.get_structure(cif_id, path)
    else:
        print("ERROR: Unreognized file type " + file_sufix + " in " +
              file_name)
        sys.exit(1)
    return structure, dir_path, file_name

示例#9

0

显示文件

def compute_chi3(structure_, model_, chain_, curr_residue_):
    chi3 = 999.00
    if curr_residue_.has_id('CB') and curr_residue_.has_id(
            'CG') and curr_residue_.has_id('CD'):
        curr_cb = structure_[model_.id][chain_.id][
            curr_residue_.id]['CB'].get_vector()
        curr_cg = structure_[model_.id][chain_.id][
            curr_residue_.id]['CG'].get_vector()
        curr_cd = structure_[model_.id][chain_.id][
            curr_residue_.id]['CD'].get_vector()

        if curr_residue_.has_id('NE') and curr_residue_.resname == 'ARG':
            curr_ne = structure_[model_.id][chain_.id][
                curr_residue_.id]['NE'].get_vector()
            chi3 = round(
                math.degrees(
                    PDB.calc_dihedral(curr_cb, curr_cg, curr_cd, curr_ne)), 2)

        if curr_residue_.has_id('OE1') and (curr_residue_.resname == 'GLN'
                                            or curr_residue_.resname == 'GLU'):
            curr_oe1 = structure_[model_.id][chain_.id][
                curr_residue_.id]['OE1'].get_vector()
            chi3 = round(
                math.degrees(
                    PDB.calc_dihedral(curr_cb, curr_cg, curr_cd, curr_oe1)), 2)

        if curr_residue_.has_id('CE') and curr_residue_.resname == 'LYS':
            curr_ce = structure_[model_.id][chain_.id][
                curr_residue_.id]['CE'].get_vector()
            chi3 = round(
                math.degrees(
                    PDB.calc_dihedral(curr_cb, curr_cg, curr_cd, curr_ce)), 2)

    if curr_residue_.has_id('CB') and curr_residue_.has_id(
            'CG') and curr_residue_.has_id('SD') and curr_residue_.has_id(
                'CE') and curr_residue_.resname == 'MET':
        curr_cb = structure_[model_.id][chain_.id][
            curr_residue_.id]['CB'].get_vector()
        curr_cg = structure_[model_.id][chain_.id][
            curr_residue_.id]['CG'].get_vector()
        curr_sd = structure_[model_.id][chain_.id][
            curr_residue_.id]['SD'].get_vector()
        curr_ce = structure_[model_.id][chain_.id][
            curr_residue_.id]['CE'].get_vector()
        chi3 = round(
            math.degrees(PDB.calc_dihedral(curr_cb, curr_cg, curr_sd,
                                           curr_ce)), 2)
    return chi3

示例#10

0

显示文件

文件： tools.py 项目： mcbeaker/pro-min

def calc_vecsum(metVal, ox):
    # print(valenceDictionary.keys())
    # The borderline and outlier thresholds are >0.10 and >0.23, respectively, for nVECSUM,
    # >10% and >25%, respectively, for the vacancy parameter, which is the percentage of all expected coordination sites left vacant (Supplementary Fig. 2 and Supplementary Table 2). For example, ions with all coordination sites occupied by ligands (vacancy = 0) are classi- fied as acceptable. For geometry with an expected coordination number greater than four, metals with one vacant coordina- tion site (vacancy ≤ 25%) are borderline, and metals with two or more vacant coordination sites (vacancy > 25%)
    vecsum = 0
    fij = PDB.Vector(x=0, y=0, z=0)
    bonds = [
        key for key in metVal[ox].keys() if key not in ['coordNum', 'valence']
    ]
    for bond in bonds:
        distance = metVal[ox][bond]['dist']
        metVec = metVal[ox][bond]['metVec']
        ligVec = metVal[ox][bond]['ligVec']
        # print('metVec',metVec)
        # print('ligVec',ligVec)
        vec = (ligVec - metVec)
        rij = vec.__truediv__(distance)
        ligOcc = metVal[ox][bond]['ligOcc']
        bondValence = metVal[ox][bond]['bond_val']
        # print('blha: ' + str(bondValence))
        sij = float(ligOcc) * bondValence
        # print('sij',sij)
        # raise TypeError('somethingHappend ' + str(ij))
        fij = fij.__add__(np.multiply(rij.get_array(), sij))
        # print('fij: ',fij)
    vecsum = math.sqrt(fij.__mul__(fij)) / metVal[ox]['valence']
    # print('vecsum: ',vecsum)
    return vecsum

示例#11

0

显示文件

文件： ICPTransformation.py 项目： rmslick/HelixAxisRegistration

 def PDBToNPY(self, fpathin):
     parser = PDB.PDBParser()
     io = PDB.PDBIO()
     struct = parser.get_structure('1ABZ', fpathin)
     allcoords1 = []
     for model in struct:
         for chain in model:
             for residue in chain:
                 for atom in residue:
                     x, y, z = atom.get_coord()
                     cSet = []
                     cSet.append(x)
                     cSet.append(y)
                     cSet.append(z)
                     allcoords1.append(cSet)
     return allcoords1

示例#12

0

显示文件

def AngleFinder(Atom1,Atom2,Atom3):
	vector1 = Atom1.get_vector()
	vector2 = Atom2.get_vector()
	vector3 = Atom3.get_vector()
	angle = bp.calc_angle(vector1,vector2,vector3)

	return math.degrees(angle)

示例#13

0

显示文件

文件： distance.py 项目： sky-xian/DeeProtein

def assign_sensitivity(structure, md_df, chain, pdb_path, go):
    """
    Changed:
    lookup the sensitivities directly in the df, no dict.
    :param structure:
    :param md_df:
    :param chain:
    :param pdb_path:
    :return:
    """
    seq_pdb = []
    residues = structure[0][chain]
    for res in residues:  # move along the protein chain
        if not pdb.is_aa(res):
            continue
        aa = three2single[res.get_resname()]
        seq_pdb.append(aa)
    # get the sequence:
    aas = ''.join(md_df['AA'].values[1:].tolist())

    # align

    seq_md = ''.join(md_df['AA'][1:])
    aligned_md, aligned_pdb, identity = water(seq_md, seq_pdb)

    gos = [c for c in md_df.columns if c.startswith('GO:')]

    for aa_md, aa_pdb, res, pos in zip(aligned_md, aligned_pdb, residues,
                                       range(len(aligned_md))):
        if aa_md == '-' or aa_pdb == '-':
            continue
        res.sensitivity = {go: md_df.loc[pos, go] for go in gos}
    return structure

示例#14

0

显示文件

    def test_is_protein(self):
        struct = bpdb.PDBParser().get_structure(
            "temp", 'test/forgi/threedee/data/1MFQ.pdb')
        chains = struct.get_chains()

        for c in chains:
            ftup.is_protein(c)

示例#15

0

显示文件

文件： Alignment.py 项目： pp-ct/CS8170-Project1

    def generate_hit_distance_matrix(self, type='CA'):
        hit_distance_matrix = np.zeros((self.hit_span, self.hit_span))

        r1_type = 'CA'
        r2_type = 'CA'
        if type == 'NO':
            r1_type = 'N'
            r2_type = 'O'

        parser = PDB.PDBParser()
        chains = parser.get_structure(id='temp', file=self.pdb_path)[0]
        chain = chains[
            self.chain_id] if self.chain_id in chains else chains['A']

        for residue1 in chain.get_residues():
            r1 = residue1.id[1]
            if self.hit_range[0] < r1 < self.hit_range[1]:
                for residue2 in chain.get_residues():
                    r2 = residue2.id[1]
                    if self.hit_range[0] < r2 < self.hit_range[
                            1] and r1_type in residue1 and r2_type in residue2:
                        distance = residue1[r1_type] - residue2[r2_type]
                        hit_distance_matrix[r1 - self.hit_range[0]][
                            r2 - self.hit_range[0]] = distance

        return hit_distance_matrix

示例#16

0

显示文件

文件： Pipeline-for-examining-Ramachandran-plot.py 项目： jammygill/Python-Projects

def get_phi_psi(structure):
    """
    Calculate phi,psi dihedral angles and return lists.
    Uses the polypeptide class."""

    # Create a list of  polypeptide objects
    ppb = PDB.PPBuilder()
    pp_list = ppb.build_peptides(structure)

    # Get phi and psi angles
    phi_angles_list = []
    psi_angles_list = []

    # Iterate over polypeptide molecules
    for pp in pp_list:

        # Calculate phi and psi angles and unpack list and tuple
        Agg_phi = []
        Agg_psi = []

        for phi,psi in pp.get_phi_psi_list():

            # put them in the lists
            Agg_phi.append(phi)
            Agg_psi.append(psi)

        phi_angles_list.append(Agg_phi)
        psi_angles_list.append(Agg_psi)

    return phi_angles_list, psi_angles_list

示例#17

0

显示文件

文件： test_pdb.py 项目： jlumpe/ipqb-ramachandran

def test_parse_chain(pdbfile):
	"""Test parsing residues from a PDB file vs the BioPython implementation."""

	# Parse using our code
	with open(pdbfile) as fobj:
		residues1 = list(parse_pdb_chain(fobj))

	# Parse using BioPython
	parser = PDB.PDBParser()
	structure = parser.get_structure('test', pdbfile)
	residues2 = list(structure.get_residues())

	assert len(residues1) == len(residues2)

	# Compare residues
	for res1, res2 in zip(residues1, residues2):

		# Residue attributes
		assert res1.name == res2.resname
		assert res1.seq == res2.id[1]

		# Compare atoms
		assert len(res1.atoms) == len(res2)

		# Both should be in the same order they were in in the file...
		for a1, a2 in zip(res1.atoms, res2):
			assert a1.name == a2.name
			assert np.allclose(a1.coord, a2.coord)
			assert a1.serial == a2.serial_number

示例#18

0

显示文件

文件： working.py 项目： eckhaus/Thesis

class PASS(PredictionAlgorithm):
    pdbParser = PDB.PDBParser(PERMISSIVE=1)

    def __init__(self, pdbLoader, outputFolder):
        self.executionString = "./algo/pass %s"
        PredictionAlgorithm.__init__(self, pdbLoader, outputFolder)

    def run_one(self, structure):
        PredictionAlgorithm.run_one(self, structure)
        print structure.pdbID + "_asps.pdb"
        # cleanup
        # TODO: test whether the files is present...
        try:
            copyfile(structure.fileName,
                     self.outputFolder + "/" + structure.pdbID + ".pdb")
        except:
            pass
        try:
            tryMove(structure.pdbID + "_asps.pdb",
                    self.outputFolder + "/" + structure.pdbID + "_asps.pdb")
            tryMove(structure.pdbID + "_lig1.pdb",
                    self.outputFolder + "/" + structure.pdbID + "_lig1.pdb")
            tryMove(structure.pdbID + "_lig2.pdb",
                    self.outputFolder + "/" + structure.pdbID + "_lig2.pdb")
            tryMove(structure.pdbID + "_lig3.pdb",
                    self.outputFolder + "/" + structure.pdbID + "_lig3.pdb")
            tryMove(structure.pdbID + "_probes.pdb",
                    self.outputFolder + "/" + structure.pdbID + "_probes.pdb")
        except:
            pass

示例#19

0

显示文件

文件： structure_anomaly_recognition.py 项目： magdchat/protwis

 def __init__(self, xtal=False, num_range=False, verbose=False):
     if xtal:
         self.verbose = verbose
         try:
             xtal.pdb_code
             self.structure = xtal
         except:
             self.structure = Structure.objects.get(
                 pdb_code__index=xtal.upper())
         self.parent_prot_conf = ProteinConformation.objects.get(
             protein=self.structure.protein_conformation.protein.parent)
         io = StringIO(self.structure.pdb_data.pdb)
         self.pdb_struct = PDB.PDBParser(QUIET=True).get_structure(
             self.structure.pdb_code.index, io)[0]
         self.range = []
         if num_range:
             self.range = [[int(i) for i in num_range.split('-')]]
         else:
             for t in ProteinSegment.objects.filter(proteinfamily='GPCR',
                                                    category='helix'):
                 resis = Residue.objects.filter(
                     protein_conformation__protein=self.structure.
                     protein_conformation.protein.parent,
                     protein_segment=t)
                 if len(resis) == 0:
                     continue
                 self.range.append([
                     resis[0].sequence_number,
                     resis.reverse()[0].sequence_number
                 ])

示例#20

0

显示文件

文件： find_cid_from_pname.py 项目： neksa/Descriptor_Preprocessor

def _extract_seq_from_pdb(pdb_filepath, AA3_to_AA1=generic.AA3_to_AA1):
    parser = PDB.PDBParser(QUIET=True)
    with open(pdb_filepath, 'r') as file:
        struct = parser.get_structure('placeholder', file)
    cid_seq_map = dict()
    for model in struct:
        for chain in model:
            seq = []
            for residue in chain:
                atom_type, res_id = residue.get_id()[:2]
                # res_id should start from 1
                if res_id < len(seq) + 1:
                    continue
                while res_id > len(seq) + 1:
                    seq.append("X")
                if atom_type == " ":
                    res_3 = residue.resname
                    try:
                        res_1 = AA3_to_AA1[res_3]
                    except IndexError:
                        continue
                    seq.append(res_1)
            cid_seq_map[chain.id] = "".join(seq)
        break
    return cid_seq_map

#
# if __name__ == "__main__":
#     path = list(os.listdir(paths.PDB_FOLDER))[0]
#     path = os.path.join(paths.PDB_FOLDER, path)
#     print(path)
#     print(_extract_seq_from_pdb(path))

示例#21

0

显示文件

def pdb2xyz(inputfile, outputPrefix, keepIntermediate=False):
    """pdb2xyz: Transform a pdb file to a goccs compatible xyz file with number of atoms, elements and coordinates into an ouputfile, prefixed with outputPrefix.xyz. If you set keepIntermediate to true then the pdb file written by PDBFixer will be kept in the output folder. """

    pdbfixedfilename = outputPrefix + "_fixed.pdb"
    xyzoutfilename = outputPrefix + ".xyz"
    fixer = pdbfixer.PDBFixer(inputfile)
    fixer.removeHeterogens(False)
    PDBFile.writeFile(fixer.topology, fixer.positions,
                      open(pdbfixedfilename, 'w'))

    parser = PDB.PDBParser()
    #parser = PDB.MMCIFParser() #in case it's a cif file

    structure = parser.get_structure("input", pdbfixedfilename)

    #print(dir(structure))

    natoms = sum(1 for _ in structure.get_atoms())

    #print("Writing output")
    outputhandle = open(xyzoutfilename, "w")
    outputhandle.write("""%d
    empty line\n""" % (natoms))

    for atom in structure.get_atoms():
        element = atom.element
        coords = atom.get_coord()
        outputhandle.write("%s     %.3f     %.3f     %.3f\n" %
                           (element, coords[0], coords[1], coords[2]))
    outputhandle.close()
    if not keepIntermediate:
        os.remove(pdbfixedfilename)

示例#22

0

显示文件

def save_results(out_models, output, directory, verbose):
    """Saves the resulting models into PDB files. Creates a specific directory for the model if it does not exist.
    Additionally, each chain receives a new ID in order to distinguish those chains that were equivalent.

    Keyword arguments:
    out_models -- list of the resulting model objects created by the program
    output -- name of the output model/file given by the user
    verbose -- boolean, prints to stderr the progress of the program"""
    u = 1
    if verbose:
        sys.stderr.write("Saving models...\n")
    io = PDB.PDBIO()
    if not os.path.exists(directory):
        os.makedirs(directory)
    for i in range(len(out_models)):
        id_list = []
        final_model = UpdModel(str(i))
        old_model = out_models[i]
        for chain in old_model.get_chains():
            new_chain = chain.copy()
            new_chain.id = new_id(id_list)
            id_list.append(new_chain.id)
            final_model.add(new_chain)
        io.set_structure(final_model)
        io.save(directory + "/" + output + "_" + str(u) + ".pdb")
        if verbose:
            sys.stderr.write("  " + output + "_" + str(u) + ".pdb saved\n")
        u += 1

示例#23

0

显示文件

文件： energies.py 项目： maxemil/InteractionPotential

def ligandfilter(pdb):
    """
    Remove water and other ligands from pdb.
    :param pdb: PDB.Structure.Structure
    :return: None
    """
    # Remove non amino acid residues
    # To upkeep the integrity due to detaching, iterate over child_list copy!
    for model in pdb.child_list[:]:
        for chain in model.child_list[:]:
            for res in chain.child_list[:]:
                if not PDB.is_aa(res):
                    chain.detach_child(res.id)
            if len(chain) == 0:
                model.detach_child(chain)
        if len(model) == 0:
            pdb.detach_child(model)
    # if the pdb still has more than one model, it's probably an NMR structure
    # simply keep the first model
    if len(pdb) > 1:
        for model in pdb.child_list[1:]:
            pdb.detach_child(model.id)
    if len(pdb.child_list[0]) > 1:
        model = pdb.child_list[0]
        for chain in model.child_list[1:]:
            model.detach_child(chain.id)
    # There is only one model left
    assert len(pdb) == 1
    # This model has only one chain
    assert len(pdb.child_list[0]) == 1

示例#24

0

显示文件

文件： Dihedral_Analysis.py 项目： EkaterinaOsipova/CovaLib

 def calc_dihedral(self):
     cb = self.cov_receptor.parent['CB']
     ca = self.cov_receptor.parent['CA']
     lig_cov_neoghbors = self.get_atom_neighbors(self.cov_ligand, list(self.ligand.get_atoms()))
     self.angles = list()
     ang1 = math.degrees( bp.calc_dihedral(ca.get_vector(), 
                                           cb.get_vector(), 
                                           self.cov_receptor.get_vector(),
                                           self.cov_ligand.get_vector()))
     self.angles.append(ang1)
     for i in lig_cov_neoghbors:
         ang = math.degrees( bp.calc_dihedral(cb.get_vector(),
                                              self.cov_receptor.get_vector(),
                                              self.cov_ligand.get_vector(),
                                              i.get_vector()))
         self.angles.append(ang)

示例#25

0

显示文件

文件： utils.py 项目： SKorablyov/Proteins

def parse_pdb(pdb_file):
    #pdb_file = 'pdb5l6t.ent' #np.random.choice(pdb_list)
    p = bio.PDBParser()
    s = p.get_structure('X', pdb_file)

    gen = s.get_models()
    l = list(gen)
    mod = l[np.random.randint(
        0, len(l))]  #choose random model when more than 1 exists

    seq_strs = []
    seq_locs = []
    for chain in mod:
        seq_str = ''
        seq_loc = []
        for residue in chain:
            if residue.get_id()[0] == ' ':
                letter_code = residue_letter_codes[residue.get_resname()]
                seq_str += letter_code
                for atom in residue:
                    seq_loc.append(atom.get_full_id()[3][1])
        seq_strs.append(seq_str)
        seq_locs.append(np.unique(seq_loc))

    return seq_strs, seq_locs

示例#26

0

显示文件

def annotate_fallback(chain_list):
    """
    If neither DSSR nor MC-Annotate are available, we use an ad-hoc implementation of canonical
    basepair detection as fallback.
    This does not work well for missing atoms or modified residues.
    """
    kdtree = bpdb.NeighborSearch(
        [atom for chain in chain_list for atom in chain.get_atoms()])
    pairs = kdtree.search_all(10, "R")
    basepairs = {}
    # Sorted, so conflicting basepairs are deterministically solved
    for res1, res2 in sorted(pairs):
        if res1.resname.strip() not in RNA_RESIDUES or res1.id[0].startswith(
                "H_"):
            continue
        if res2.resname.strip() not in RNA_RESIDUES or res2.id[0].startswith(
                "H_"):
            continue
        labels = {res1.resname.strip(), res2.resname.strip()}
        try:
            is_bp = is_basepair_pair(res1, res2)
            if is_bp:
                res1_id = fgr.resid_from_biopython(res1)
                res2_id = fgr.resid_from_biopython(res2)
                if res1_id in basepairs:
                    warnings.warn("More than one basepair detected for {}."
                                  " Ignoring {}-{} because {}-{} is already"
                                  " part of the structure".format(
                                      res1_id, res1_id, res2_id, res1_id,
                                      basepairs[res1_id]))
                    continue
                if res2_id in basepairs:
                    warnings.warn("More than one basepair detected for {}."
                                  " Ignoring {}-{} because {}-{} is already"
                                  " part of the structure".format(
                                      res2_id, res2_id, res1_id, res2_id,
                                      basepairs[res2_id]))
                    continue
                basepairs[res1_id] = res2_id
                basepairs[res2_id] = res1_id
        except KeyError as e:
            log.debug("Missing atom %s. %s has atoms %s, %s has atoms %s", e,
                      res1, res1.child_dict, res2, res2.child_dict)
            pass

    seq_ids = []
    for chain in sorted(chain_list, key=lambda x: x.id):
        for residue in chain:
            seq_ids.append(fgr.resid_from_biopython(residue))
    bpseq = ""
    chain_dict = {c.id: c for c in chain_list}
    for i, seqid in enumerate(seq_ids):
        if seqid in basepairs:
            bp = seq_ids.index(basepairs[seqid]) + 1
        else:
            bp = 0

        bpseq += "{} {} {}\n".format(
            i + 1, chain_dict[seqid.chain][seqid.resid].resname.strip(), bp)
    return bpseq, seq_ids

示例#27

0

显示文件

 def __init__(self, out_dir=None):
     """ Create parsing and writing objects, specify output directory. """
     self.parser = PDBParser(QUIET=True)
     self.writer = PDB.PDBIO()
     if out_dir is None:
         out_dir = os.path.join(os.getcwd(), "chain_PDBs")
     self.out_dir = out_dir

示例#28

0

显示文件

文件： rangefinder.py 项目： Yoshanuikabundi/rangefinder

def load_structures(files_to_load, quiet=False):
    """Load PDB files from a list and return a list of the structures"""
    parser = PDB.PDBParser(QUIET=True, PERMISSIVE=True)
    structures = []
    longest_line_len = 0
    for file in files_to_load:
        name = os.path.splitext(file)[0]
        if not quiet:
            print_line = "Loading " + name + "..."
            print(print_line, end="\r")
            longest_line_len = max(longest_line_len, len(print_line))
        new_structure = parser.get_structure(name, file)
        # Remove residue 0 to dedicate it to the donor fluorophore
        for new_model in new_structure:
            for new_chain in new_model:
                for residue in new_chain:
                    if residue.id[1] == 0:
                        new_chain.detach_child(residue.id)
        structures.append(new_structure)
        # save_structure(new_structure, name + ".no0.pdb")
    if not quiet:
        final_print_str = "Loaded " + str(len(files_to_load)) + " structures."
        num_spaces = max(0, longest_line_len - len(final_print_str))
        print(final_print_str + " " * num_spaces)
    return structures

示例#29

0

显示文件

文件： inputParsers.py 项目： npwilliams09/dockTact_data

def adjacencyMat(prot,
                 chainID,
                 seqIDs,
                 normalise=True,
                 mode='bool',
                 thresh=6.0):
    size = len(seqIDs)
    mat = np.zeros(shape=(size, size))

    prefix = "./PPI4DOCK/PPI4DOCK_docking_set/"
    chainFile = f"{prefix}/{prot}/{chainID}_model_st.pdb"
    parser = PDB.PDBParser()
    structure = parser.get_structure(chainID, chainFile)
    chain = structure[0][chainID]

    for i, resA in enumerate(seqIDs):
        for j, resB in enumerate(seqIDs):
            if resA != resB:
                distance = centralCarbon(chain[resA]) - centralCarbon(
                    chain[resB])
            else:  #same residue,self loop
                distance = 0
            mat[i][j] = distance

    if mode == 'distance':
        mat = np.where(mat < thresh, thresh - mat, 0)
        if normalise:
            mat = mat / thresh

    elif mode == 'bool':
        mat = np.where(mat < thresh, 1, 0)

    return mat

示例#30

0

显示文件

文件： 2019-04-12_HW2Q1.py 项目： OSSYULYYZ/COMP564

def get_dssp_info(PDB_file, model, dir):
    """Runs DSSP on protein input"""

    #TODO : you can run DSSP through biopython. The output contains a lot of useful information.
    #Tip : make sure your secondary structure indexing matches the sequence order in the PDB file!

    return PDB.DSSP(model, dir + '/' + PDB_file, dssp='mkdssp')

示例#31

0

显示文件

文件： extract_fasta_from_PDB.py 项目： nitrogenase/contact_prediction

def main():
    parser = optparse.OptionParser()
    parser.add_option("-p",
                      "--pdb",
                      dest="pdb",
                      help="path to PDB file",
                      metavar="STRING")
    parser.add_option("-f",
                      "--pdb_fasta",
                      dest="pdb_fasta",
                      help="path to PDB fasta file (out)",
                      metavar="STRING")

    (options, args) = parser.parse_args()
    pdb_fasta = options.pdb_fasta
    pdb_file = options.pdb

    pdb_name = os.path.basename(pdb_file).split(".")[0]

    parser = BP.PDBParser()
    ppb = PPBuilder(radius=1000)  # retrieve all amino acids
    pdbseq = ""
    structure = parser.get_structure(pdb_name, pdb_file)
    model = structure[0]
    for chain in model:
        for pp in ppb.build_peptides(model[chain.id], aa_only=False):
            pdbseq += (pp.get_sequence())

    print ">", pdb_name, len(pdbseq)
    print pdbseq

    with open(pdb_fasta, "w") as o:
        o.write(">%s %i\n%s\n" % (pdb_name, len(pdbseq), pdbseq))

示例#32

0

显示文件

文件： tools.py 项目： Hkethar/Conformation_predict

def pdb2cd(name):
    f = name + ".pdb"
    dssp_tuple = dssp_dict_from_pdb_file(f)
    dssp_dict = dssp_tuple[0]
    p = PDBParser(QUIET=True).get_structure("file", f)

    # Initiates and fills array ("cc") with chains.
    cc = [chain.get_id() for model in p for chain in model]

    # Determines length of sequence, initiates an array ("ss") of same length.
    howLong = ss_out = 0
    for c in cc:
        howLong += len([_ for _ in p[0][c].get_residues() if PDB.is_aa(_)])
    if not howLong == len(dssp_tuple[1]): howLong = len(dssp_tuple[1])
    ss = np.arange(1, howLong + 1)

    # Fills the array ("ss") with secondary structures.
    for i in ss:
        ss_lib = dssp_dict[dssp_tuple[1][
            i -
            3]]  # ss_lib = dssp_dict[(dssp_tuple[1][0][0], (' ', i-1, ' '))]
        dict_ss = ss_lib[1]
        if dict_ss == 'H':
            ss_out = 0
        if dict_ss == 'E':
            ss_out = 1
        if dict_ss == '-':  # else:# dict_ss == '-':
            ss_out = 2
        ss[i - 1] = ss_out
    # Returns the fractional composition of alpha helix, beta sheet or random coil.
    alpha = (ss == 0).sum() / ss.__len__()
    beta = (ss == 1).sum() / ss.__len__()
    coil = (ss == 2).sum() / ss.__len__()
    abc = [alpha, beta, coil]
    return abc

示例#33

0

显示文件

 def create_g_alpha_pdb_array(signprot_complex):
     segments = ProteinSegment.objects.filter(proteinfamily='Alpha')
     residues = Residue.objects.filter(
         protein_conformation__protein__entry_name=signprot_complex.
         structure.pdb_code.index.lower() + '_a')
     pdb_array = OrderedDict()
     parse = GPCRDBParsingPDB()
     for s in segments:
         if s.slug not in pdb_array:
             pdb_array[s.slug] = OrderedDict()
         for r in residues.filter(protein_segment=s):
             try:
                 rotamers = Rotamer.objects.filter(
                     structure=signprot_complex.structure,
                     residue__display_generic_number__label=r.
                     display_generic_number.label)
                 if len(rotamers) == 0:
                     raise Exception()
                 rotamer = parse.right_rotamer_select(rotamers)
                 p = PDB.PDBParser(QUIET=True).get_structure(
                     'structure', StringIO(rotamer.pdbdata.pdb))[0]
                 atoms = []
                 for chain in p:
                     for res in chain:
                         for atom in res:
                             atoms.append(atom)
             except:
                 atoms = 'x'
             pdb_array[r.protein_segment.slug][
                 r.display_generic_number.label] = atoms
     return pdb_array

示例#34

0

显示文件

def calc_vecsum(structure,metalName,valenceDictionary):
	# print(valenceDictionary.keys())
	metals = ["FE", "CO", "MN", "CU", "NI", "MO","W", "V"]
	atoms = list(structure.get_atoms())
	metalRow = get_metalRow(list(structure.get_atoms()),metalName)
	metalAtom = atoms[metalRow]
	numAtoms = len(atoms)

	vecsum = 0
	fij = PDB.Vector(x=0,y=0,z=0)
	for idx in range(0,numAtoms):
		if idx != metalRow:
			# print('blah')
			atomNames = metalName+"_"+atoms[idx].get_name().upper()
			ligandAtom = atoms[idx]
			distance = abs(ligandAtom - metalAtom)
			vec = (ligandAtom.get_vector() - metalAtom.get_vector())
			rij = vec.__truediv__(distance)
			ligOcc = ligandAtom.get_occupancy()
			# print('ligOCC: ',ligOcc)
			# print('valence: ',valenceDictionary[atomNames]['Valence'])
			oxInd = valenceDictionary[atomNames]['Ox'].index(valenceDictionary['oxNum'])
			bondValence = float(valenceDictionary[atomNames]['Valence'][oxInd])
			# print('blha: ' + str(bondValence))
			sij = float(ligOcc) * bondValence
			# print('sij',sij)
			# raise TypeError('somethingHappend ' + str(ij))
			fij = fij.__add__(np.multiply(rij.get_array(),sij))
			# print('fij: ',fij)
	vecsum =  math.sqrt(fij.__mul__(fij)) / float(valenceDictionary['Valency'])
	# print('vecsum: ',vecsum)
	return vecsum

示例#35

0

显示文件

文件： rna_clarna_run.py 项目： miamiasheep/RNA-Secondary-Prediction

 def load_pdb_fobject(self, fobject):
     parser = PDB.PDBParser(QUIET = True)
     res = parser.get_structure("c",fobject)
     for a in res.get_atoms():
         if re.match(r'^[A-Z]{1,2}[0-9]?\*$',a.id):
             a.id = a.id.replace("*","'")
     return res

示例#36

0

显示文件

def from_structure(structure):
    """Return contact data from a 3D structure (in pdb format).
    """

    try:
        from Bio import PDB
        if isinstance(structure, str):
            p = PDB.PDBParser()
            structure = p.get_structure('S', structure)
        if isinstance(structure, PDB.Structure.Structure):
            for _ in structure.get_chains():
                atoms = [
                    np.array(atom.get_coord())
                    for atom in structure.get_atoms()
                ]
    except ImportError:
        print("Biopython not found.")
        raise

    atoms = np.array(structure)
    try:
        import scipy
        D = scipy.spatial.distance.pdist(atoms, 'euclidean')
        D = scipy.spatial.distance.squareform(D)
    except ImportError:
        print("Scipy not found.")
        raise
    m = np.max(1 / D[D != 0])
    M = np.zeros(D.shape)
    M[D != 0] = 1 / D[D != 0]
    M[D == 0] = m
    return M

示例#37

0

显示文件

文件： validate_seqatom.py 项目： susannvorberg/contact_prediction

def read_dssp(dssp_file):

    try:
        dssp, keys = BP.make_dssp_dict(dssp_file)
    except(PDBException):
        print("SKIPPING THIS protein: pdb exception occurred for  %s" % dssp_file)
        return

    return dssp, keys

示例#38

0

显示文件

文件： main.py 项目： bioinf/proteomics2014

    def _get_system_vectors(rotation_axis, m_point, target_vector):
        r = pdb.vector_to_axis(rotation_axis, m_point)  # the perpendicular projection m_point to rotation_axis
        o = m_point - r                                 # corresponded rotation axis vector

        if r.norm() < 1e-9:  # m_point on rotation axis
            return
        r_normd = r.normalized()
        f = target_vector - o
        theta_norm = rotation_axis.normalized()
        s_normd = r_normd ** theta_norm
        return r, f, r_normd, s_normd

示例#39

0

显示文件

文件： extract.py 项目： sven1103/bioinfo2-project

def compute_torsion_angles(previous_residue, residue, next_residue):
    """
    Little helper function, calculates the backbone phi and psi torsion
    angles from the given residues and returns them
    :param residue: The amino acid residue the torsion angles shall be computed
    :return: Phi and psi backbone torsion angles
    """
    # print previous_residue.get_id()[1], residue.get_id()[1], next_residue.get_id()[1]
    # extract the atoms for the torsion calculation
    # 1.) for the phi
    atom_CO_0 = previous_residue['C'].get_vector()
    atom_N_1 = residue['N'].get_vector()
    atom_CA_1 = residue['CA'].get_vector()
    atom_CO_1 = residue['C'].get_vector()
    atom_N_2 = next_residue['N'].get_vector()

    phi_angle = PDB.calc_dihedral(atom_CO_0, atom_N_1, atom_CA_1, atom_CO_1)
    psi_angle = PDB.calc_dihedral(atom_N_1, atom_CA_1, atom_CO_1, atom_N_2)

    # convert into degrees
    return math.degrees(phi_angle), math.degrees(psi_angle)

示例#40

0

显示文件

文件： Prepare_benchmark.py 项目： EkaterinaOsipova/CovaLib

 def analyze_dihedral(self):  
     """
     Deprecated. Please use class Dihedral_Analisys
     """
     angles = list()
     cov_atm_lig = self.ligand.child_dict[self.ligand_dict['cov_atm']]
     ##dihedral between CA < CB < SG < ligand
     angle_1 = math.degrees( bp.calc_dihedral(self.covalent_res.child_dict['CA'].get_vector(),
                                              self.covalent_res.child_dict['CB'].get_vector(),
                                              self.covalent_atm_res.get_vector(),
                                              cov_atm_lig.get_vector()))
     angles.append(angle_1)
     ##all dihedral of CB < SG < ligand-covalent-atom < other ligand atoms
     ns  = bp.NeighborSearch(list(self.ligand.get_atom()))        
     neigh = ns.search(cov_atm_lig.get_coord(), 2) 
     neigh = filter(lambda x: x.name != self.ligand_dict['cov_atm'], neigh)# removes the atom itself
     for i in neigh:
         ang = math.degrees( bp.calc_dihedral(self.covalent_res.child_dict['CB'].get_vector(),
                                              self.covalent_atm_res.get_vector(),
                                              cov_atm_lig.get_vector(),
                                              i.get_vector()))
         angles.append(ang)
     open('/'.join([self.path, DIHEDRAL_OUTPUT]), 'w').write(reduce(lambda x, ang: ' '.join([x, str(ang)]), angles, ''))

示例#41

0

显示文件

文件： __init__.py 项目： gieses/CLQC

    def __get_residues__(self, structure):
        """
        Gets all amino acids residues from a given structure and stores them
        in an array.

        parameters:
        ----------------
        structure: PDB strutore obj,
                   openened PDB structure file object

        Returns:
        ---------------------------------------
        array: np-arr,
               residue objects from Bio.PDB
        """
        residues_arr = []
        for res_i in structure.get_residues():
            if PDB.is_aa(res_i):
                residues_arr.append(res_i)
        return(np.array(residues_arr))

示例#42

0

显示文件

文件： FeatureCollector.py 项目： biocad/ptm_prediction

 def calculate_torsion_psi(current_residue, next_residue):
     atom1 = current_residue['N'].get_vector()
     atom2 = current_residue['CA'].get_vector()
     atom3 = current_residue['C'].get_vector()
     atom4 = next_residue['N'].get_vector()
     return PDB.calc_dihedral(atom1, atom2, atom3, atom4)

示例#43

0

显示文件

文件： FeatureCollector.py 项目： biocad/ptm_prediction

 def calculate_torsion_phi(previous_residue, current_residue):
     atom1 = previous_residue['C'].get_vector()
     atom2 = current_residue['N'].get_vector()
     atom3 = current_residue['CA'].get_vector()
     atom4 = current_residue['C'].get_vector()
     return PDB.calc_dihedral(atom1, atom2, atom3, atom4)

示例#44

0

显示文件

文件： generate_cst.py 项目： vorobieva/beta_solenoids

def get_pose_constraints(Pose, MaxDist, MinPositionSeperation, SasaRadius, SasaScale, UpstreamGrep, DownstreamGrep, NeedHydrogen=True):
    '''  '''
    # AlexsSasaCalculator is from Alex's interface_fragment_matching 
    # thanks Alex!
    #
    # This is used to give buried polar contacts more weight. Thanks Alex Ford!
    try:
      from interface_fragment_matching.utility.analysis import AtomicSasaCalculator
      # make instace of Alex's sasa calculator
      AlexsSasaCalculator = AtomicSasaCalculator(probe_radius=SasaRadius)
      ResidueAtomSasa = AlexsSasaCalculator.calculate_per_atom_sasa(Pose)    
    except ImportError:
      ' Error: SASA weighting of contacts requires interface_fragment_matching from Alex Ford '

    # for making full atom kd tree
    ResAtmCoordLists = []
    # for translating from kd tree index to ( residue, atom ) coord
    ResAtmRecordLists = []

    # loop through all residue numbers
    for Res in range(1, Pose.n_residue() + 1):
      # remade for each residue
      AtmRecordList = []
      AtmCoordList = []
      # loop through residue's atom numbers
      for Atm in range(1, Pose.residue(Res).natoms() + 1):
        # add (residue, atom) coord to residue's list
        AtmRecordList.append((Res, Atm))
        # add atom xyz coord to residue's list
        AtmCoordList.append( np.array(list(Pose.residue(Res).atom(Atm).xyz())) )
      
      # add residue's lists to respective global lists
      ResAtmCoordLists.extend(AtmCoordList)
      ResAtmRecordLists.extend(AtmRecordList)

    ResidueAtomArray = np.array( ResAtmCoordLists )
    ResidueAtomKDTree = spatial.KDTree( ResidueAtomArray )

    ResidueAtomNeighbors = ResidueAtomKDTree.query_ball_point( ResidueAtomArray, MaxDist )
    # ResidueAtomNearNeighbors = ResidueAtomKDTree.query_ball_point( ResidueAtomArray, 2.0 )
    ResidueAtomHydrogens = ResidueAtomKDTree.query_ball_point( ResidueAtomArray, 1.1 )

    # holds constraints before printing
    AllConstraints = [] 
    # holds sorted cst
    AllBackboneBackboneCst = []
    AllBackboneSidechainCst = []
    AllSidechainSidechainCst = []

    # All contacts are from upstream to downstream residues to avoid double counting
    Upstream = []
    for UpIndex, UpXyzCoords in enumerate(ResAtmCoordLists):
      UpRes, UpAtm = ResAtmRecordLists[UpIndex]

      # # loop through residues storing info on oxygens
      # for UpRes in range( 1, Pose.n_residue() + 1 ):
      #   # loop through atoms
      #   for UpAtm in range( 1, Pose.residue(UpRes).natoms() + 1 ):
      UpName = Pose.residue(UpRes).atom_name(UpAtm).replace(' ', '')

      # skip virtual residues
      if Pose.residue(UpRes).is_virtual(UpAtm):
        continue

      #                                this guy 
      #                                 /
      # checks upstream name           V
      if re.match(UpstreamGrep, UpName ): 
        # print '\n'*2
        # print 'UpRes, UpName', UpRes, UpName

        # get neighbors of upstream residues
        NeighborsOfUpstream = ResidueAtomNeighbors[UpIndex]
        
        # prep for loop
        Downstreams = []

        Constraints = []
        BackboneBackboneCst = []
        BackboneSidechainCst = []
        SidechainSidechainCst = []

        # ArbitrayOrderOfAtomNames = {}
        for DownIndex in NeighborsOfUpstream:
          # name presumes downstream, checks with if imediately below
          DownRes, DownAtm = ResAtmRecordLists[DownIndex]

          # checks that downstream residue is dowstream of upstream and passes min primary sequence spacing
          if DownRes - UpRes >= MinPositionSeperation:
            DownName = Pose.residue(DownRes).atom_name(DownAtm).replace(' ', '')
            
            # skip if same atom
            if UpRes == DownRes:
              if UpName == DownName:
                continue

            # skip virtual residues
            if Pose.residue(DownRes).is_virtual(DownAtm):
              continue

            # checks downstream name
            if re.match( DownstreamGrep, DownName ):
              # print 'DownRes, DownName', DownRes, DownName

              PotentialUpstreamHydrogens = ResidueAtomHydrogens[UpIndex]
              UpstreamHydrogens = []
              # print 'PotentialUpstreamHydrogens', PotentialUpstreamHydrogens
              for UpH_I in PotentialUpstreamHydrogens:
                UpH_Res, UpH_Atm = ResAtmRecordLists[UpH_I]
                UpH_Name  = Pose.residue(UpH_Res).atom_name(UpH_Atm).replace(' ', '')
                # print 'UpH_Name', UpH_Name
                if 'H' in UpH_Name:
                  UpstreamHydrogens.append((UpH_Res, UpH_Atm, UpH_Name))
                # print 'UpstreamHydrogens', UpstreamHydrogens

              PotentialDownstreamHydrogens = ResidueAtomHydrogens[DownIndex]
              DownstreamHydrogens = []
              # print 'PotentialDownstreamHydrogens', PotentialDownstreamHydrogens
              for DownH_I in PotentialDownstreamHydrogens:
                DownH_Res, DownH_Atm = ResAtmRecordLists[DownH_I]
                DownH_Name = Pose.residue(DownH_Res).atom_name(DownH_Atm).replace(' ', '')
                # print 'DownH_Name', DownH_Name
                if 'H' in DownH_Name:
                  DownstreamHydrogens.append((DownH_Res, DownH_Atm, DownH_Name))
                # print 'DownstreamHydrogens', DownstreamHydrogens

              # check their is at least one hydrogen in system before adding constraint
              if len(UpstreamHydrogens) or len(DownstreamHydrogens) or NeedHydrogen == False:

                # these trys / excepts seperate 
                # backbone-backbone from 
                # backbone-sidechain from
                # sidechain-sidechain interactions
                # 
                # in future maybe sort into seperate lists, shouldn't rely on ResidueAtomSasa to know what is in backbone
                try:
                  UpstreamSasa = ResidueAtomSasa[UpRes][UpName]
                  DownstreamSasa = ResidueAtomSasa[DownRes][DownName]
                  AverageSasa = np.mean([UpstreamSasa, DownstreamSasa])        
                  BBBB = 1
                  BBSC = SCSC = 0
                except KeyError:                
                  # These lines handle backbone to sidechain interactions
                  # set weight equal to the most buried 
                  try:
                    UpstreamSasa = ResidueAtomSasa[UpRes][UpName]
                    AverageSasa = SasaScale.FloorSasa
                    BBSC = 1
                    BBBB = SCSC = 0
                  except KeyError:
                    try:
                      DownstreamSasa = ResidueAtomSasa[DownRes][DownName]
                      AverageSasa = SasaScale.FloorSasa 
                      BBSC = 1
                      BBBB = SCSC = 0            
                    
                    # set weight of side chain side chain equal to the most buried             
                    except KeyError:
                      AverageSasa = SasaScale.CeilingSasa 
                      SCSC = 1
                      BBSC = BBBB = 0

                # use instance of sasa_scale to calculate weight based on avg sasa of N and O
                SasaBasedWeight = SasaScale.weigh(AverageSasa)
                # print 
                # print 'AverageSasa', AverageSasa
                # print 'SasaBasedWeight', SasaBasedWeight

                # print 'found downstream neighbor %s'%DownName
                DownXyzCoords = np.array( list(Pose.residue(DownRes).atom(DownAtm).xyz()) )
                # print 'DownRes, DownName', DownRes, DownName
                # print 'DownXyzCoords', DownXyzCoords

                # ## Get neighbors for angles and torsions to use with AtomPairs

                SelectUpNeighbors = []
                # iterates through upstream atom neighbors for references for angle
                for UpNeighborIndex in NeighborsOfUpstream:
                  UpNeighborRes, UpNeighborAtm = ResAtmRecordLists[UpNeighborIndex]
                  UpNeighborName = Pose.residue(UpNeighborRes).atom_name(UpNeighborAtm).replace(' ', '')

                  # keep looking if neighbor is hyrdogen
                  if 'H' in UpNeighborName:
                    continue                

                  # skip virtual residues
                  if Pose.residue(UpNeighborRes).is_virtual(UpNeighborAtm):
                    continue

                  # keep looking if neighbor is self
                  if UpNeighborName == UpName and UpNeighborRes == UpRes:
                    continue
                  # keep looking if neighbor is downstream residue again
                  if UpNeighborName == DownName and UpNeighborRes == DownRes:
                    continue
                  UpNeighborCoords = ResAtmCoordLists[UpNeighborIndex]
                  DistanceToNeighbor = solenoid_tools.vector_magnitude( UpXyzCoords - UpNeighborCoords )
                  SelectUpNeighbors.append( (DistanceToNeighbor, UpNeighborName, UpNeighborRes, UpNeighborCoords) )

                # sort by distance to atom, nearest first
                SelectUpNeighbors.sort()                
                UpNeighbor1Tuple = SelectUpNeighbors[0]
                UpNeighbor2Tuple = SelectUpNeighbors[1]
                # print '\n'*2
                # print 'UpRes, UpName', UpRes, UpName
                # print 'UpstreamHydrogens', UpstreamHydrogens
                # print 'SelectUpNeighbors', SelectUpNeighbors

                 # get neighbors of upstream residues
                NeighborsOfDownstream = ResidueAtomNeighbors[DownIndex]
                SelectDownNeighbors = []
                # iterates through upstream atom neighbors for references for angle
                for DownNeighborIndex in NeighborsOfDownstream:
                  DownNeighborRes, DownNeighborAtm = ResAtmRecordLists[DownNeighborIndex]
                  DownNeighborName = Pose.residue(DownNeighborRes).atom_name(DownNeighborAtm).replace(' ', '')

                  # keep looking if neighbor is hyrdogen
                  if 'H' in DownNeighborName:
                    continue                

                  # skip virtual residues
                  if Pose.residue(DownNeighborRes).is_virtual(DownNeighborAtm):
                    continue

                  # keep looking if neighbor is self
                  if DownNeighborName == DownName and DownNeighborRes == DownRes:
                    continue
                  # keep looking if neighbor is upstream residue
                  if DownNeighborName == UpName and DownNeighborRes == UpRes:
                    continue

                  DownNeighborCoords = ResAtmCoordLists[DownNeighborIndex]
                  DistanceToNeighbor = solenoid_tools.vector_magnitude( DownXyzCoords - DownNeighborCoords )
                  SelectDownNeighbors.append( (DistanceToNeighbor, DownNeighborName, DownNeighborRes, DownNeighborCoords) )

                # sort by distance to atom, nearest first
                SelectDownNeighbors.sort()
                DownNeighbor1Tuple = SelectDownNeighbors[0]
                DownNeighbor2Tuple = SelectDownNeighbors[1]
                # print 'DownRes, DownName', DownRes, DownName
                # print 'DownstreamHydrogens', DownstreamHydrogens
                # print 'SelectDownNeighbors', SelectDownNeighbors

                Distance = solenoid_tools.vector_magnitude(DownXyzCoords - UpXyzCoords)
                
                DistanceCst = 'AtomPair %s %d %s %d SCALARWEIGHTEDFUNC %f HARMONIC %.2f 1.0' %( UpName, UpRes, DownName, DownRes, SasaBasedWeight, Distance )

                # Use Biopython for angle and dihedral calculations
                # here 'Vec' means PDB.Vector of atom's xyz coord
                UpstreamVec = PDB.Vector(UpXyzCoords)
                DownstreamVec = PDB.Vector(DownXyzCoords)
                
                UpNeighbor1Vec = PDB.Vector(UpNeighbor1Tuple[3])
                UpNeighbor2Vec = PDB.Vector(UpNeighbor2Tuple[3])
                DownNeighbor1Vec = PDB.Vector(DownNeighbor1Tuple[3])
                DownNeighbor2Vec = PDB.Vector(DownNeighbor2Tuple[3])

                Angle1 = PDB.calc_angle(UpNeighbor1Vec, UpstreamVec, DownstreamVec)
                AngleCst1 = 'Angle %s %d %s %d %s %d SCALARWEIGHTEDFUNC %f CIRCULARHARMONIC %.2f 0.5' %( UpNeighbor1Tuple[1], UpNeighbor1Tuple[2], UpName, UpRes, DownName, DownRes, SasaBasedWeight, Angle1 )
                Angle2 = PDB.calc_angle(UpstreamVec, DownstreamVec, DownNeighbor1Vec)
                AngleCst2 = 'Angle %s %d %s %d %s %d SCALARWEIGHTEDFUNC %f CIRCULARHARMONIC %.2f 0.5' %( UpName, UpRes, DownName, DownRes, DownNeighbor1Tuple[1], DownNeighbor1Tuple[2], SasaBasedWeight, Angle2 )

                Torsion1 = PDB.calc_dihedral(UpNeighbor2Vec, UpNeighbor1Vec, UpstreamVec, DownstreamVec)
                TorsionCst1 = 'Dihedral %s %d %s %d %s %d %s %d SCALARWEIGHTEDFUNC %f CIRCULARHARMONIC %.2f 0.5' %( UpNeighbor2Tuple[1], UpNeighbor2Tuple[2], UpNeighbor1Tuple[1], UpNeighbor1Tuple[2], UpName, UpRes, DownName, DownRes, SasaBasedWeight, Torsion1 )
                Torsion2 = PDB.calc_dihedral(UpNeighbor1Vec, UpstreamVec, DownstreamVec, DownNeighbor1Vec)
                TorsionCst2 = 'Dihedral %s %d %s %d %s %d %s %d SCALARWEIGHTEDFUNC %f CIRCULARHARMONIC %.2f 0.5' %( UpNeighbor1Tuple[1], UpNeighbor1Tuple[2], UpName, UpRes, DownName, DownRes, DownNeighbor1Tuple[1], DownNeighbor1Tuple[2], SasaBasedWeight, Torsion2 )
                Torsion3 = PDB.calc_dihedral(UpstreamVec, DownstreamVec, DownNeighbor1Vec, DownNeighbor2Vec)
                TorsionCst3 = 'Dihedral %s %d %s %d %s %d %s %d SCALARWEIGHTEDFUNC %f CIRCULARHARMONIC %.2f 0.5' %( UpName, UpRes, DownName, DownRes, DownNeighbor1Tuple[1], DownNeighbor1Tuple[2], DownNeighbor2Tuple[1], DownNeighbor2Tuple[2], SasaBasedWeight, Torsion3 )

                # adds constraint to running lists of constraints
                Constraints.extend( [DistanceCst, AngleCst1, AngleCst2, TorsionCst1, TorsionCst2, TorsionCst3] )
                if BBBB: BackboneBackboneCst.extend( [DistanceCst, AngleCst1, AngleCst2, TorsionCst1, TorsionCst2, TorsionCst3] )
                if BBSC: BackboneSidechainCst.extend( [DistanceCst, AngleCst1, AngleCst2, TorsionCst1, TorsionCst2, TorsionCst3] )
                if SCSC: SidechainSidechainCst.extend( [DistanceCst, AngleCst1, AngleCst2, TorsionCst1, TorsionCst2, TorsionCst3] )

              # else:
              #   print 'No hydrogen!'
              #   sys.exit()

        AllConstraints.extend(Constraints)
        AllBackboneBackboneCst.extend(BackboneBackboneCst)
        AllBackboneSidechainCst.extend(BackboneSidechainCst)
        AllSidechainSidechainCst.extend(SidechainSidechainCst)

    SortedConstraints = (AllBackboneBackboneCst, AllBackboneSidechainCst, AllSidechainSidechainCst)

    return AllConstraints, SortedConstraints

示例#45

0

显示文件

文件： getnrpdblist.py 项目： NirBenTalLab/proorigami-ptgraph

def get_nr_pdb_list(TMPDIR):
    """
    The main program logic to get the nonredundant list of pdb identifiers,
    selecting the highest resolution as representative.
    See module docstring at top of file for description

    Parameters:
       TMPDIR - name of temp directory to use
    Return value:
       None.

       Output is to stdout:
       list of list of pdb ids, each entry in list (line) is a list of pdb ids
       reprsenting a cluster; first in the inner (cluster) list is
       the chosen represenstative.
    """
    pdb_dict = {} # dict of {pdbid : MethodResolution} to cache info from PDB
    
    for cluster in yield_cluster_from_file(sys.stdin):
        if not cluster[0].is_protein(): # since clustered, if one not, all not
            continue # discard non-protein sequences
        orig_seqlist = list(cluster.seqlist) # keep copy before deleting some
        cluster.discard_short_seqs()
        if len(cluster) > 1:
            # now we need to look in PDB files to find highest res X-ray struct
            for seq in cluster.seqlist:
                pdbid = seq.descr[:4].lower()
                if pdb_dict.has_key(pdbid):
                    methres = pdb_dict[pdbid]
                    seq.is_xray = methres.is_xray
                    seq.resolution = methres.resolution
                else:
                    name = "pdb" + pdbid
                    pdbfile = os.path.join(PDBDIV_ROOT,
                                           os.path.join(pdbid[1:3], name + ".ent.gz"))
                    tmp_pdbfilename = os.path.join(TMPDIR, name)
                    os.system("gzip " + pdbfile + " -d -c > " + tmp_pdbfilename)
                    pdbheader = PDB.parse_pdb_header(tmp_pdbfilename)
                    if 'x-ray' in pdbheader['structure_method'].lower():
                        seq.is_xray = True
                        seq.resolution = float(pdbheader['resolution'])
                    methres = MethodResolution()
                    methres.is_xray = seq.is_xray
                    methres.resolution = seq.resolution
                    pdb_dict[pdbid] = methres
                    os.unlink(tmp_pdbfilename)
            cluster.discard_non_xray()
        if len(cluster) > 1:
            cluster.discard_lower_resolution()
        if len(cluster) > 1:
            cluster.discard_lower_similarity()
        if len(cluster) > 1:
            cluster.seqlist = [cluster.seqlist[0]] # arbitrary: use first seq

        repr_id =  cluster.seqlist[0].descr[:6].lower()
        sys.stdout.write(repr_id + ": ")
        for seq in orig_seqlist:
            other_id = seq.descr[:6].lower()
            if other_id != repr_id:
                sys.stdout.write(other_id + " ")
        sys.stdout.write("\n")

示例#46

0

显示文件

文件： backmapper.py 项目： bryan-lunt/PDB_Backmapper

def is_no_aa_chain(chain):
	"""
	Test if a chain contains no amino acids.
	"""
	return all([(not PDB.is_aa(r)) for r in chain])