Exemplo n.º 1
0
    def get_sequence(self, chain_name=None):
        """
        Returns a dictionary with chains as keys and sequence string as values
        """

        if chain_name is not None:
            return {chain_name: ProteinSequence("".join( [ ProteinSequence.get_aminoacid_code_3to1(self.chains[chain_name][x].get_residue_type()) for x in self._get_sorted_residues(chain_name=chain_name)] ), sequenceID=self.name+"_"+chain_name)}
        else:
            if len(self.chains)>1:
                print "ALERT: Trying to get the sequence from a PDB with more than a single chain."

            chains = self.chains.keys()

            return dict( [(chain_name, ProteinSequence("".join( [ ProteinSequence.get_aminoacid_code_3to1(self.chains[chain_name][x].get_residue_type()) for x in self._get_sorted_residues(chain_name=chain_name)] ), sequenceID=self.name+"_"+chain_name)) for chain_name in chains ] )
Exemplo n.º 2
0
    def _parse_dssp_results(self, fp):

        # relative_accessibilities = [ None for x in self.]
        # accessibilities = []
        # ss = []

        surface = {'A': 115,
                   'C': 149,
                   'D': 170,
                   'E': 207,
                   'F': 230,
                   'G': 86,
                   'H': 206,
                   'I': 187,
                   'K': 222,
                   'L': 192,
                   'M': 210,
                   'N': 184,
                   'P': 140,
                   'Q': 208,
                   'R': 263,
                   'S': 140,
                   'T': 164,
                   'V': 161,
                   'W': 269,
                   'Y': 257}

        
        start_regex = re.compile("  #  RESIDUE AA STRUCTURE BP1 BP2")
        in_results = None

        temp_seq = []
        
        for line in fp:

            if in_results is None:
                if re.search(start_regex,line):
                    in_results = 1
                continue
            
            #Skipping chain breaks?
            if line[13]=="!":
                continue

            temp_seq.append(line[13])
            acc = int(line[35:38])
            #accessibilities.append(acc)
            chain = line[11]
            res_num = int(line[5:10].strip())
            res_type = line[13]

            residue_object = self.chains[chain][res_num]

            residue_object.dssp_accessibility = acc
            if ProteinSequence.get_aminoacid_code_3to1(residue_object.get_residue_type())!=res_type and res_type!="X":
                print ProteinSequence.get_aminoacid_code_3to1(residue_object.get_residue_type())
                print res_type
                sys.stderr.write("DSSP LINE: %s" %line)
                raise ValueError("Discordance between PDB and DSSP")
            #if( float(acc)*100/surface[line[13]] > 100 ):
            #    print "How can be a percentage greater than 100? residue: %s. Accessible area: %s, total surface: %s" %(line[13],acc,surface[line[13]])
            try:
                residue_object.dssp_rel_accessibility = float(acc)*100/surface[line[13]]
                #relative_accessibilities.append(float(acc)*100/surface[line[13]])
            except:
                print "The code arrives here? if yes... why??"
                pass
                #print line
                #traceback.print_exc()
                #relative_accessibilities.append(0) #Added becuase if not it produces an error
                
            if line[16]==' ':
                residue_object.dssp_ss = 'N'
                #ss.append('N')
            else:
                residue_object.dssp_ss = line[16]