Exemplo n.º 1
0
def sec_str_rev_filter(PDBfile, model, chain, seq_index, sec_str):
    """
    Helper function

    A filter for secondary structures.

    Reverse filter of sec_str_filter
    
    Checks residues in seq_index of chain, removes those w/ specified sec_str

    Returns filtered seq_index    
    """
    from Bio.PDB import DSSP
    dssp = DSSP(model, PDBfile) ##need to hide this output
    num = 0
    while num < len(seq_index):
        try:
            res = chain.child_list[seq_index[num]]
            sec = dssp.__getitem__(res)[0]    
            if sec == sec_str:
                seq_index.remove(seq_index[num])
            else:            
                num += 1
        except KeyError: #not an amino acid
            num += 1
    return seq_index
Exemplo n.º 2
0
def make_dssp(pdb_id, chain, chain_delimiter=':'):
    """
    Retrieve dssp string from PDB database

    :param pdb_id: pdb id
    :param chain: pdb chain id
    :param chain_delimiter: delimiter between id and chain id
    :return: id, aa sequence, ss sequence
    """
    url = PDB_URL + pdb_id + '.pdb'
    urllib.request.urlretrieve(url, pdb_id)

    parser = PDBParser()
    structure = parser.get_structure(pdb_id, pdb_id)
    dssp = DSSP(structure[0], pdb_id, dssp='mkdssp')

    aa = ''
    ss = ''
    for key in dssp.keys():
        if key[0] == chain:
            aa += dssp[key][1]
            ss += SS_MAP[dssp[key][2]]

    os.remove(pdb_id)
    return pdb_id + chain_delimiter + chain, aa, ss
Exemplo n.º 3
0
    def run_DSSP(self, corresponding_gene_call, pdb_filepath):
        """
        DSSP is ran using the API developed in Biopython. That means we don't work directly from the
        text output of DSSP, but rather a Biopython object.
        """
        # Determine the model name by loading the structure file
        p = PDBParser()
        structure = p.get_structure(corresponding_gene_call, pdb_filepath)
        model = structure[
            0]  # pdb files can have multiple models. DSSP assumes the first.

        # run DSSP
        residue_annotation = DSSP(model,
                                  pdb_filepath,
                                  dssp=self.DSSP_executable,
                                  acc_array="Wilke")

        if not len(residue_annotation.keys()):
            raise ConfigError("Your executable of DSSP, `{}`, exists but didn't return any meaningful output. This\
                               is a known issue with certain distributions of DSSP. For information on how to test\
                               that your version is working correctly, please visit\
                               http://merenlab.org/2016/06/18/installing-third-party-software/#dssp"\
                               .format(self.DSSP_executable, pdb_filepath))

        # convert to a digestible format
        return self.convert_DSSP_output_from_biopython_to_dataframe(
            residue_annotation)
Exemplo n.º 4
0
def run_dssp(biopdb, pdb_path, work_dir=None, job=None):
    work_dir = work_dir or os.getcwd()

    if apiDockerCall is not None and job is not None:
        if not os.path.abspath(
                os.path.dirname(pdb_path)) == os.path.abspath(work_dir):
            shutil.copy(pdb_file, work_dir)
        parameters = [
            '-i',
            os.path.join("/data", os.path.basename(pdb_path)), '-o',
            os.path.join("/data",
                         os.path.basename(pdb_path) + ".dssp")
        ]
        apiDockerCall(job,
                      image='edraizen/dssp:latest',
                      working_dir=work_dir,
                      parameters=parameters)
        dssp = DSSP(biopdb[0],
                    os.path.join(work_dir,
                                 os.path.basename(pdb_path) + ".dssp"),
                    file_type='DSSP')
    else:
        try:
            dssp = DSSP(biopdb, pdb_path, dssp='dssp')
        except KeyboardInterrupt:
            raise
        except NameError:
            dssp = None
        except Exception as e:
            raise InvalidPDB("Cannot run dssp for {}".format(pdb_path))

    return dssp
Exemplo n.º 5
0
 def run(self):
     tmp = QTemporaryFile()
     result = {}
     io = None
     dssp = None
     prevChain = None
     key = None
     if tmp.open():
         io = PDBIO()
         io.set_structure(self.struct)
         io.save(tmp.fileName())
         try:
             dssp = DSSP(self.struct[0], tmp.fileName(), dssp='mkdssp')
             prevChain = next(iter(dssp.keys()))[0]
             for key in dssp.keys():
                 #print(key[0])
                 if key[0] == prevChain:
                     #print(key)
                     # I THINK I'M DOING THIS PART WRONG
                     result[dssp[key][0] + self.offset] = dssp[key][2]
             self.finished.emit([result, self.seq, self.node])
         except:
             traceback.print_exc()
             print("SORRY, DSSP WAS NOT FOUND")
             self.finished.emit([None, None, None])
     del tmp, result, io, dssp, prevChain, key
Exemplo n.º 6
0
 def dssp(self):
     '''Get DSSP object'''
     if self._dssp is not None:
         return self._dssp
     #DSSP only works on the first model in the PDB file
     if isinstance(self._parent.pdb_file(), str) and self._id == 0:
         try:
             self._dssp = DSSP(self.model, self._parent.pdb_file())
         except OSError:
             self._dssp = DSSP(self.model,
                               self._parent.pdb_file(),
                               dssp="mkdssp")
     elif self._id == 0:
         if self._parent._mmcif:
             suffix = '.cif'
         else:
             suffix = '.pdb'
         with NamedTemporaryFile(mode='w', suffix=suffix) as temp_pdb_file:
             temp_pdb_file.write(self._parent.pdb_file().read())
             temp_pdb_file.flush()
             try:
                 self._dssp = DSSP(self.model, temp_pdb_file.name)
             except OSError:
                 self._dssp = DSSP(self.model,
                                   temp_pdb_file.name,
                                   dssp="mkdssp")
     else:
         self._dssp = {}
     return self._dssp
Exemplo n.º 7
0
def sec_str_rev_filter(PDBfile, model, chain, seq_index, sec_str):
    """
    Helper function

    A filter for secondary structures.

    Reverse filter of sec_str_filter
    
    Checks residues in seq_index of chain, removes those w/ specified sec_str

    Returns filtered seq_index    
    """
    from Bio.PDB import DSSP
    dssp = DSSP(model, PDBfile)  ##need to hide this output
    num = 0
    while num < len(seq_index):
        try:
            res = chain.child_list[seq_index[num]]
            sec = dssp.__getitem__(res)[0]
            if sec == sec_str:
                seq_index.remove(seq_index[num])
            else:
                num += 1
        except KeyError:  #not an amino acid
            num += 1
    return seq_index
Exemplo n.º 8
0
    def test_dssp_with_mmcif_file_and_different_chain_ids(self):
        """Test DSSP generation from MMCIF which has different label and author chain IDs."""
        if self.dssp_version < StrictVersion("2.2.0"):
            self.skipTest("Test requires DSSP version 2.2.0 or greater")

        pdbfile = "PDB/1A7G.cif"
        model = self.cifparser.get_structure("1A7G", pdbfile)[0]
        dssp = DSSP(model, pdbfile)
        self.assertEqual(len(dssp), 82)
        self.assertEqual(dssp.keys()[0][0], "E")
Exemplo n.º 9
0
 def ss_map_creator(self, struc_to_aln_index_mapping):
     '''
     Connects the alignment mapping index and the secondary structural
     assignments from DSSP.
     '''
     ss_aln_index_map = {}
     inv_map, model = self.structure_loader(struc_to_aln_index_mapping)
     dssp = DSSP(model, self.struc_path)
     for a_key in list(dssp.keys()):
         ss_aln_index_map[inv_map[a_key[1][1]]] = self.DSSP_code_mycode[
             dssp[a_key][2]]
     return ss_aln_index_map
Exemplo n.º 10
0
    def depth_map_creator(self, struc_to_aln_index_mapping):
        '''Connects the alignment mapping index and the residue depth'''

        res_depth_aln_index_map = {}
        inv_map, model = self.structure_loader(struc_to_aln_index_mapping)
        dssp = DSSP(model, self.struc_path)
        #rd = ResidueDepth(model)
        for a_key in list(dssp.keys()):
            if dssp[a_key][3] > 0.2:
                res_depth_aln_index_map[inv_map[a_key[1][1]]] = 'E'
            else:
                res_depth_aln_index_map[inv_map[a_key[1][1]]] = 'B'
        return res_depth_aln_index_map
Exemplo n.º 11
0
 def test_dssp_with_mmcif_file_and_nonstandard_residues(self):
     """Test DSSP generation from MMCIF with non-standard residues."""
     p = MMCIFParser()
     pdbfile = "PDB/1AS5.cif"
     model = p.get_structure("1AS5", pdbfile)[0]
     dssp = DSSP(model, pdbfile)
     self.assertEqual(len(dssp), 24)
Exemplo n.º 12
0
 def test_dssp(self):
     """Test DSSP generation from PDB."""
     p = PDBParser()
     pdbfile = "PDB/2BEG.pdb"
     model = p.get_structure("2BEG", pdbfile)[0]
     dssp = DSSP(model, pdbfile)
     self.assertEqual(len(dssp), 130)
Exemplo n.º 13
0
def CalculateHydrogenBonds(ProteinModel, Filename, EnergyCutoff, PathToDSSP):
	# Run DSSP algorithm
	DSSPOutput = DSSP(ProteinModel, Filename, dssp = PathToDSSP)

	# Assign structure
	HydrogenBonds = {}
	TotalNumberOfHydrogenBonds = 0

	for Chain in ProteinModel:
		ChainID = Chain.get_id()
		
		for Residue in Chain:
			ResidueID = Residue.get_id()

			if is_aa(Residue.get_resname(), standard = True):
				HydrogenBonds[Chain, Residue] = 0

				try:
					DSSPEntry = DSSPOutput[(ChainID, ResidueID)]

					if float(DSSPEntry[7]) < EnergyCutoff:
						HydrogenBonds[Chain, Residue] += 1
						TotalNumberOfHydrogenBonds += 1

					if float(DSSPEntry[11]) < EnergyCutoff:
						HydrogenBonds[Chain, Residue] += 1
						TotalNumberOfHydrogenBonds += 1

				except:
					sys.stderr.write("No DSSP entry generated for amino acid residue " + str(ResidueID[1]) + ". Ignoring the residue. \n")

	sys.stdout.write(str(TotalNumberOfHydrogenBonds) + " backbone N-O hydrogen bonds.\n")

	return HydrogenBonds
Exemplo n.º 14
0
 def test_DSSP_in_model_obj(self):
     """All elements correctly added to xtra attribute of input model object."""
     p = PDBParser()
     s = p.get_structure("example", "PDB/2BEG.pdb")
     m = s[0]
     # Read the DSSP data into the pdb object:
     _ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Sander", "DSSP")
     # Now compare the xtra attribute of the pdb object
     # residue by residue with the pre-computed values:
     i = 0
     with open("PDB/dssp_xtra_Sander.txt") as fh_ref:
         ref_lines = fh_ref.readlines()
         for chain in m:
             for res in chain:
                 # Split the pre-computed values into a list:
                 xtra_list_ref = ref_lines[i].rstrip().split("\t")
                 # Then convert each element to float where possible:
                 xtra_list_ref = list(map(will_it_float, xtra_list_ref))
                 # The xtra attribute is a dict.
                 # To compare with the pre-computed values first sort according to keys:
                 xtra_itemts = sorted(res.xtra.items(),
                                      key=lambda s: s[0])  # noqa: E731
                 # Then extract the list of xtra values for the residue
                 # and convert to floats where possible:
                 xtra_list = [t[1] for t in xtra_itemts]
                 xtra_list = list(map(will_it_float, xtra_list))
                 # The reason for converting to float is, that casting a float to a string in python2.6
                 # will include fewer decimals than python3 and an assertion error will be thrown.
                 self.assertEqual(xtra_list, xtra_list_ref)
                 i += 1
Exemplo n.º 15
0
 def test_dssp_with_mmcif_file(self):
     """Test DSSP generation from MMCIF."""
     p = MMCIFParser()
     pdbfile = "PDB/2BEG.cif"
     model = p.get_structure("2BEG", pdbfile)[0]
     dssp = DSSP(model, pdbfile)
     self.assertEqual(len(dssp), 130)
Exemplo n.º 16
0
    def test_DSSP_RSA(self):
        """Tests the usage of different ASA tables."""
        # Tests include Sander/default, Wilke and Miller
        p = PDBParser()
        # Sander/default:
        s = p.get_structure("example", "PDB/2BEG.pdb")
        m = s[0]
        # Read the DSSP data into the pdb object:
        _ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Sander", "DSSP")
        # Then compare the RASA values for each residue with the pre-computed values:
        i = 0
        with open("PDB/Sander_RASA.txt") as fh_ref:
            ref_lines = fh_ref.readlines()
            for chain in m:
                for res in chain:
                    rasa_ref = float(ref_lines[i].rstrip())
                    rasa = float(res.xtra["EXP_DSSP_RASA"])
                    self.assertAlmostEqual(rasa, rasa_ref)
                    i += 1

        # Wilke (procedure similar as for the Sander values above):
        s = p.get_structure("example", "PDB/2BEG.pdb")
        m = s[0]
        _ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Wilke", "DSSP")
        i = 0
        with open("PDB/Wilke_RASA.txt") as fh_ref:
            ref_lines = fh_ref.readlines()
            for chain in m:
                for res in chain:
                    rasa_ref = float(ref_lines[i].rstrip())
                    rasa = float(res.xtra["EXP_DSSP_RASA"])
                    self.assertAlmostEqual(rasa, rasa_ref)
                    i += 1

        # Miller (procedure similar as for the Sander values above):
        s = p.get_structure("example", "PDB/2BEG.pdb")
        m = s[0]
        _ = DSSP(m, "PDB/2BEG.dssp", "dssp", "Miller", "DSSP")
        i = 0
        with open("PDB/Miller_RASA.txt") as fh_ref:
            ref_lines = fh_ref.readlines()
            for chain in m:
                for res in chain:
                    rasa_ref = float(ref_lines[i].rstrip())
                    rasa = float(res.xtra["EXP_DSSP_RASA"])
                    self.assertAlmostEqual(rasa, rasa_ref)
                    i += 1
Exemplo n.º 17
0
 def both_map_creator(self, struc_to_aln_index_mapping):
     '''Connects the alignment mapping index and the residue depth'''
     sda = {}
     inv_map, model = self.structure_loader(struc_to_aln_index_mapping)
     try:
         dssp = DSSP(model, self.struc_path)
     except OSError as e:
         raise OSError("DSSP failed with the following error:\n" + e)
     for a_key in list(dssp.keys()):
         if a_key[1][1] in inv_map.keys():
             if dssp[a_key][3] > 0.2:
                 sda[inv_map[a_key[1][1]]] = 'E' + self.DSSP_code_mycode[
                     dssp[a_key][2]]
             else:
                 sda[inv_map[a_key[1][1]]] = 'B' + self.DSSP_code_mycode[
                     dssp[a_key][2]]
     return sda
Exemplo n.º 18
0
    def test_dssp_with_mmcif_file(self):
        """Test DSSP generation from MMCIF."""
        if self.dssp_version < StrictVersion("2.2.0"):
            self.skipTest("Test requires DSSP version 2.2.0 or greater")

        pdbfile = "PDB/2BEG.cif"
        model = self.cifparser.get_structure("2BEG", pdbfile)[0]
        dssp = DSSP(model, pdbfile)
        self.assertEqual(len(dssp), 130)
Exemplo n.º 19
0
    def test_dssp_with_mmcif_file_and_nonstandard_residues(self):
        """Test DSSP generation from MMCIF with non-standard residues."""
        if self.dssp_version < StrictVersion("2.2.0"):
            self.skipTest("Test requires DSSP version 2.2.0 or greater")

        pdbfile = "PDB/1AS5.cif"
        model = self.cifparser.get_structure("1AS5", pdbfile)[0]
        dssp = DSSP(model, pdbfile)
        self.assertEqual(len(dssp), 24)
Exemplo n.º 20
0
def write_models(pdb_id, structure, config_params):
    # Write temporary PDB model files. One different file for each chain and model
    # Models can start from a number greater than 0

    chain_model = {}  # {chain: [(model_id, model_file, model_dssp), ...]}
    model_chain = {}  # {model_id: [(chain_id, model_file, model_dssp), ...]}

    io = PDBIO()

    # Split models
    model_files = []  # [(model_id, model_file), ...]
    for model in structure:
        i = model.serial_num
        model_file = "{}/model_{}_{}".format(tmp_dir, pdb_id, i)
        model_files.append((i, model_file))

        # Save model
        io.set_structure(structure)
        io.save(model_file, select=ModelSelect([i]))

    # Split chains. For each model save chain separately and get dssp
    for model_id, model_file in model_files:

        model_structure = PDBParser(QUIET=True).get_structure(
            model_id, model_file)

        # model_structure should have length 1
        for model in model_structure:
            # print(i, model_file, model_structure, len(model_structure))
            for chain in model:
                # Save chain models separately
                model_chain_file = "{}_{}".format(model_file, chain.id)

                io.set_structure(model_structure)
                io.save(model_chain_file, select=ChainSelect([chain.id]))

                # Calculate secondary structure for each chain separately (for Flipper)
                model_structure_chain = PDBParser(QUIET=True).get_structure(
                    "{}_{}_{}".format(pdb_id, model_id, chain.id),
                    model_chain_file)
                _dssp = None
                try:
                    _dssp = DSSP(model_structure_chain[0],
                                 model_chain_file,
                                 dssp=config_params.get('dssp'))
                except Exception as e:
                    logging.warning("{} DSSP error {} {} {}".format(
                        pdb_id, model_file, chain.id, e))

                chain_model.setdefault(chain.id, []).append(
                    (model_id, model_chain_file, _dssp))
                model_chain.setdefault(model_id, []).append(
                    (chain.id, model_chain_file, _dssp))

    return chain_model, model_chain
Exemplo n.º 21
0
 def get_secondary_structure_details(self, name, pdb_file, aa_only=False):
     parser = PDBParser()
     structure = parser.get_structure(name, pdb_file)
     dssp = DSSP(structure[0], pdb_file, acc_array="Wilke")
     ss = "".join([aa[2] for aa in dssp])
     sasa = [residues[aa[1]] * aa[3] for aa in dssp]
     builder = PPBuilder()
     seq = ""
     for chain in builder.build_peptides(structure, aa_only=aa_only):
         seq += chain.get_sequence()
     return name, seq, ss, sasa, structure
Exemplo n.º 22
0
def get_DSSPList(file):
	p = PDBParser() 
	'''
	# parses the pdb file
	'''
	s = p.get_structure('X', file) 
	'''
	# getting the structure 
	'''
	model = s[0]
	d = DSSP(model, file,dssp=fileExe,acc_array=asaName) 
	'''
	# DSSP executable
	'''
	dssp_dict,dssp_keys = dssp_dict_from_pdb_file(file,DSSP=fileExe) 
	'''
	# Create a dssp dictionary from a PDB file
	'''
	#print (file)
	
	dssp_list = []
	a_keys =list(d.keys())
	#print(a_keys)
	for v in a_keys:
		rasa_values = (d[v]) 
		'''
		#values of the dictionary that gives the RASA values
		'''
		acc_values = dssp_dict[v] 
		
		'''# from the dictionary that provides the residue number, aa, and the acc value'''
		
		x = v[1][1],acc_values[0],acc_values[2],rasa_values[3] 
		
		'''# residue number, amino acid, acc value and RASA value'''
		
		dssp_list.append(x) 
		
		'''# creating a list of these values'''
		
	return dssp_list
 def extract_feature(self):
     print_info_nn(
         " >>> Adding secondary structure for database {0} ... ".format(
             self._database.name))
     overall_time = datetime.now()
     if not os.path.exists(self.__get_dir_name()):
         os.mkdir(self.__get_dir_name())
     for complex_name in self._database.complexes.keys():
         protein_complex = self._database.complexes[complex_name]
         proteins = [
             protein_complex.unbound_formation.ligand,
             protein_complex.unbound_formation.receptor
         ]
         for protein in proteins:
             dssp_file = self.__get_dir_name() + protein.name + ".npy"
             if not os.path.exists(dssp_file):
                 print_info_nn("... running DSSP for protein " +
                               protein.name)
                 start_time = datetime.now()
                 dssp = DSSP(
                     protein.structure[0], self._database.directory +
                     pdb_directory + protein.name + ".pdb")
                 dssp_array = np.ndarray((len(protein.residues), 6))
                 for (i, res) in enumerate(protein.biopython_residues):
                     (_, _, cid, rid) = res.get_full_id()
                     key = (cid, rid)
                     if key in dssp:
                         dssp_array[i, 2:] = (dssp[key])[2:]
                     else:
                         dssp_array[i, 2:] = [0, 0, 0, 0]
                         # print_error("WTH")
                         # sys.exit(0)
                         # print('here')
                         # pdb.set_trace()
                         # self.SS[:, index] = np.nan
                         # self.ASA[index] = np.nan
                         # self.rASA[index] = np.nan
                         # self.Phi[index] = np.nan
                         # self.Psi[index] = np.nan
                 np.save(dssp_file, dssp_array)
                 print_info("took {0} seconds.".format(
                     (datetime.now() - start_time).seconds))
             dssp = np.load(dssp_file)
             for i, res in enumerate(protein.residues):
                 # (_, s, ASA, rASA, phi, psi)
                 res.add_feature(Features.ACCESSIBLE_SURFACE_AREA, dssp[i,
                                                                        2])
                 res.add_feature(Features.RELATIVE_ACCESSIBLE_SURFACE_AREA,
                                 dssp[i, 3])
                 res.add_feature(Features.PHI, dssp[i, 4])
                 res.add_feature(Features.PSI, dssp[i, 5])
     print_info("took {0} seconds.".format(
         (datetime.now() - overall_time).seconds))
Exemplo n.º 24
0
def get_asa(residues):
	pdb_id = residues[0].get_full_id()[0]
	chain_id = residues[0].get_full_id()[2]
	structure = PDBParser(QUIET=True).get_structure(pdb_id, pdb_folder_path+"pdb{}.ent".format(pdb_id))
	dssp = DSSP(structure[0], pdb_folder_path+"/pdb{}.ent".format(pdb_id), dssp=src_folder_path+"bin/xssp-master/mkdssp")
	dssp = dict(dssp)  # Convert to dict to access residues

	tot_residues = len(residues)
	ss_content = {}
	surface = 0
	for residue in residues:
		if dssp.get((chain_id, residue.id)):
			tot_residues += 1
			dssp_index, aa, ss, asa, phi, psi, NH_O_1_relidx, NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy = dssp.get((chain_id, residue.id))
			surface += asa
			ss_content.setdefault(ss, 0)
			ss_content[ss] += 1
	#for ss in ss_content:
		#print "{} {} {} {:.2f}".format(chain.id, ss, ss_content[ss], float(ss_content[ss]) / tot_residues)
	#print "{} ASA {:.2f} {:.2f}\n".format(chain.id, surface, float(surface) / tot_residues)
	return float(surface) / tot_residues
Exemplo n.º 25
0
    def run_DSSP(self, corresponding_gene_call, pdb_filepath):
        """
        DSSP is ran using the API developed in Biopython. That means we don't work directly from the
        text output of DSSP, but rather a Biopython object.
        """
        # Determine the model name by loading the structure file
        p = PDBParser()
        structure = p.get_structure(corresponding_gene_call, pdb_filepath)
        model = structure[0] # pdb files can have multiple models. DSSP assumes the first.

        # run DSSP
        residue_annotation = DSSP(model, pdb_filepath, dssp = self.DSSP_executable, acc_array = "Wilke")

        if not len(residue_annotation.keys()):
            raise ConfigError("Your executable of DSSP, `{}`, exists but didn't return any meaningful output. This\
                               is a known issue with certain distributions of DSSP. For information on how to test\
                               that your version is working correctly, please visit\
                               http://merenlab.org/2016/06/18/installing-third-party-software/#dssp"\
                               .format(self.DSSP_executable, pdb_filepath))

        # convert to a digestible format
        return self.convert_DSSP_output_from_biopython_to_dataframe(residue_annotation)
Exemplo n.º 26
0
    def __init__(self, pdbdata, mode='cpk'):

        #Startup
        app.Canvas.__init__(self, keys='interactive', size=(W, H))

        #Loading shaders
        self.program = gloo.Program(vertex, fragment)

        #Analyze pdb file
        self.parser = PDBParser(QUIET=True, PERMISSIVE=True)
        self.structure = self.parser.get_structure('model', pdbdata)

        #DSSP prediction
        self.pmodel = self.structure[0]
        self.dssp = DSSP(self.pmodel, pdbdata)

        #Mode selection
        if mode not in Canvas.visualization_modes:
            raise Exception('Not recognized visualization mode %s' % mode)
        self.mode = mode

        #Camera settings
        self.translate = 50
        self.translate = max(-1, self.translate)
        self.view = translate((0, 0, -self.translate), dtype=np.float32)

        self.model = np.eye(4, dtype=np.float32)
        self.projection = np.eye(4, dtype=np.float32)
        self.program['u_projection'] = self.projection

        self.quaternion = Quaternion()

        #Load data depending on the mdoe

        self.apply_zoom()
        self.atom_information()
        self.load_data()
        self.show()
Exemplo n.º 27
0
 def __init__(self, pdbdata, mode='cpk'):
     
     #Analyze pdb file
     self.parser = PDBParser(QUIET=True,PERMISSIVE=True)
     self.structure = self.parser.get_structure('model',pdbdata)
     
     #DSSP prediction
     self.model = self.structure[0]
     self.dssp = DSSP(self.model, pdbdata)
     
     #Mode selection
     if mode not in MatViewer.visualization_modes:
         raise Exception('Not recognized visualization mode %s' % mode)
     self.mode = mode
     
     #Make the plot
     if self.mode == 'cpk':
         self.cpk2d()
     elif self.mode == 'backbone':
         self.bb2d()
     elif self.mode == 'aminoacid':
         self.aa2d()
     elif self.mode == 'dssp':
         self.dssp2d()
Exemplo n.º 28
0
    def calculate_solvent_access_score(self, threshold):
        '''
             Calculate the accessibility score between the predicted model
             and the template pdb structure.
             Args:
                 threshold cutoff (int): Cutoff for relative accessibility residue values.
             Returns:
                 float: The Accessibility score calculated
         '''
        # Path to the PDBs of predicted and template models
        pred_model_pdb = "data/templates/" + self.template.name + "/" + self.template.modeller_pdb + ".atm"
        template_pdb = "data/templates/" + self.template.name + "/" + self.template.reindexed_pdb + ".atm"
        # Parse PDBs
        pred_model = PDBParser(QUIET=True).get_structure(
            "pred_model", pred_model_pdb)[0]
        template_model = PDBParser(QUIET=True).get_structure(
            "template_model", template_pdb)[0]
        # Run DSSP on both PDB files of the template and the Modeller's model
        dssp_pred_model = DSSP(pred_model,
                               pred_model_pdb,
                               dssp="bin/dssp-2.0.4-linux-amd64")
        dssp_template_model = DSSP(template_model,
                                   template_pdb,
                                   dssp="bin/dssp-2.0.4-linux-amd64")
        # Parse the DSSP output to retrieve the relative % of solvant accessible area for each CA.
        #get alignement index
        query_index_ali = [
            index for index, residue in enumerate(self.query.residues)
            if str(residue) != "-"
        ]
        template_index_ali = [
            index for index, residue in enumerate(self.template.residues)
            if str(residue) != "-"
        ]
        #attribuate alignemnt index
        rsa_pred_model = dict(
            zip(query_index_ali,
                [dssp_pred_model[key][3] for key in dssp_pred_model.keys()]))
        rsa_template_model = dict(
            zip(template_index_ali, [
                dssp_template_model[key][3]
                for key in dssp_template_model.keys()
            ]))
        # Keep only residues under a relative accessibilities threshold: buried residues
        pred_access_residues = keep_accessible_residues(
            rsa_pred_model, threshold)
        template_access_residues = keep_accessible_residues(
            rsa_template_model, threshold)

        # Get the common buried residues
        common_residues_len = len(
            set(pred_access_residues).intersection(template_access_residues))
        # Normalization
        return common_residues_len / len(query_index_ali)
Exemplo n.º 29
0
def get_DSSP(pdb_ids, pdb_dir='.', dssp_path='/usr/local/bin/mkdssp', drop_features=['DSSP_ID', 'AA']):
    # Check parameters
    logging.debug('PDB ids: {}'.format(pdb_ids))
    logging.debug('PDB directory: \'{}\''.format(pdb_dir))
    # Define a list of dssp features (which will be stored in a list before being turned into DataFrame
    ds_dssp = list()
    # Loop thorugh every protein
    for pdb_id in pdb_ids:
        # Parse structure of the protein
        structure = PDBParser(QUIET=True).get_structure(pdb_id, pdb_dir + '/pdb{}.ent'.format(pdb_id))
        # Get only first model
        model = structure[0]
        # Define DSSP instance of the 0-th model
        dssp = DSSP(model, pdb_dir + '/pdb{}.ent'.format(pdb_id), dssp="/usr/local/bin/mkdssp")
        # Get DSSP features: dssp index, amino acid, secondary structure, relative ASA, phi, psi, NH_O_1_relidx,
        # NH_O_1_energy, O_NH_1_relidx, O_NH_1_energy, NH_O_2_relidx, NH_O_2_energy, O_NH_2_relidx, O_NH_2_energy
        # Get chain id and residue id
        for ids, res in zip(dict(dssp), dssp):
            # Create the DSSP row
            row = list()
            row.append(pdb_id)
            row.extend(list(ids))
            row.extend(list(res))
            # Add row to dssp list
            ds_dssp.append(row)
    # Define feature names
    columns = ['PDB_ID', 'CHAIN_ID', 'RES_ID', 'DSSP_ID', 'AA', 'SEC_STRUCT', 'REL_ASA', 'PHI', 'PSI',
               'NH_O_1_relidx', 'NH_O_1_energy', 'O_NH_1_relidx', 'O_NH_1_energy', 'NH_O_2_relidx',
               'H_O_2_energy', 'O_NH_2_relidx', 'O_NH_2_energy']
    # Define DSSP DataFrame
    ds_dssp = pd.DataFrame(ds_dssp, columns=columns)
    # Turn RES_ID from tuple to integer (gets 1-st element)
    ds_dssp.RES_ID = ds_dssp.RES_ID.apply(lambda x: x[1])
    # Drop useless features, if any
    if drop_features:
        ds_dssp = ds_dssp.drop(drop_features, axis=1)
    # Turns NA to nan
    ds_dssp = ds_dssp.replace('NA', np.nan)
    # Handle nan
    ds_dssp.loc[ds_dssp.REL_ASA.isna(), 'REL_ASA'] = ds_dssp.REL_ASA.mean()
    # Return DSSP dataset
    return ds_dssp
Exemplo n.º 30
0
def get_ss(file):

    p = PDBParser()
    pdbl = PDBList()
    try:

        p.get_structure("structure",
                        pdbl.retrieve_pdb_file(file, file_format='pdb'))

        f = os.popen('find . -iname *{}*.ent'.format(file))
        path = f.read().replace('\n', '')

        structure = p.get_structure("", path)
        model = structure[0]
        dssp = DSSP(model, path)

        ss_holder = {
            '-': 0,
            'T': 0,
            'S': 0,
            'H': 0,
            'B': 0,
            'E': 0,
            'G': 0,
            'I': 0
        }

        for entry in dssp:
            if entry[2] not in ss_holder:
                ss_holder[entry[2]] = 1
            else:
                ss_holder[entry[2]] += 1

        sorted(ss_holder.keys())

        return ss_holder

    except FileNotFoundError:
        pass
Exemplo n.º 31
0
def main():
    """ main function """
    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument('pdb', help='Input PDB file')
    parser.add_argument('-o', '--output', help='Output directory')
    args = parser.parse_args()

    basename = os.path.splitext(os.path.basename(args.pdb))[0]

    parser = PDBParser()
    structure = parser.get_structure('structure', args.pdb)

    atom_output = []
    residue_output = []
    c_alpha_output = []
    model = structure[0]

    dssp = DSSP(model, args.pdb, dssp='mkdssp')

    for chain in model:
        for residue in chain:
            residue_sph_coord = []
            c_alpha_output.append(spherical(*residue['CA'].get_coord()))
            for atom in residue:
                sph_coord = spherical(*atom.get_coord())
                atom_output.append(sph_coord)
                residue_sph_coord.append(sph_coord)
            residue_output.append(np.mean(residue_sph_coord, axis=0))

    atom_output = np.array(atom_output).T
    residue_output = np.array(residue_output).T
    c_alpha_output = np.array(c_alpha_output).T

    if args.output:
        output_file = os.path.join(args.output, basename)
    else:
        output_file = basename
    np.savez(output_file, atom_wise=atom_output, residue_wise=residue_output,
             c_alpha=c_alpha_output, dssp=dssp)
Exemplo n.º 32
0
    if os.path.exists(filename):
        pdb_path = filename
    elif os.path.exists(filename[:-3]):  # if the file is not gzipped
        pdb_path = filename[:-3]
    else:
        sys.stderr.write(bestmodel + " does not have a pdb file!")
        break

    pdb_parser = PDBParser()
    if pdb_path.endswith('.gz'):
        structure = pdb_parser.get_structure(bestmodel, GzipFile(pdb_path))
    else:
        structure = pdb_parser.get_structure(bestmodel, file(pdb_path))

    model = structure[0]
    dssp = DSSP(model, pdb_path, dssp="dssp", acc_array="Wilke")

    buried = []
    exposed = []

    for t in dssp.property_list:
        aa = t[1]  # one letter codes
        ss = t[2]  # secondary structure
        rsa = t[3]  # relative surface accessibility

        if rsa >= RSA_EXPOSED_THRESH:
            exposed.append(aa)
        else:
            buried.append(aa)

    exposed = Counter(exposed)
Exemplo n.º 33
0
from difflib import * 
import numpy as np
import tables as tb

##############################################################################################################################
###################################################### PARSE DSSP ############################################################
##############################################################################################################################

fa= open(sys.argv[1]).read().splitlines()
filename=sys.argv[2]    #this will eventually become the pdb file that we run dssp on
dssp=open(filename)

p=PDBParser()
structure = p.get_structure(filename, dssp)
model= structure[0]        #there is only one structure for dssp (NMR for example has more) and the dssp parser can only take one structure
dssp= DSSP(model, filename)
a_key = list(dssp.keys())

statedic= {'H':1, 'I':2 , 'G':3, 'E':4, 'B':5, 'T':6, 'S':7, '-':8} #, '-':0}
dsspAA=[ ]
states= [ ] 
for line in a_key:
#	print dssp[line]
	dsspAA.append(dssp[line][1])
	states.append(statedic[dssp[line][2]])


##############################################################################################################################
######################################################ONE HOT ENCODING #######################################################
##############################################################################################################################