def find_disulfides(self):
     ''' Finds disulfide bridges.
     '''
     if self.path_to_pdb=='':
         raise AssertionError('Run find_files function first to locate the model source file.')
     disulfide_pairs = []
     with open(self.path_to_pdb, 'r') as f:
         lines = f.readlines()
         c=1
         for line in lines:
             if c>10:
                 break
             if line.startswith('SSBOND'):
                 pdb_re = re.search('SSBOND\s+\d+\s+CYS\sR\s+(\d+)\s+CYS\sR\s+(\d+)', line)
                 num1 = pdb_re.group(1)
                 num2 = pdb_re.group(2)
                 res1, res2 = list(Residue.objects.filter(protein_conformation__protein__entry_name=self.receptor, sequence_number__in=[num1, num2]))
                 if res1.display_generic_number!=None:
                     gn1 = ggn(res1.display_generic_number.label)
                 else:
                     gn1 = str(res1.sequence_number)
                 if res2.display_generic_number!=None:
                     gn2 = ggn(res2.display_generic_number.label)
                 else:
                     gn2 = str(res2.sequence_number)
                 disulfide_pairs.append([gn1, gn2])
             c+=1
     print('FIND disulfides: {}'.format(disulfide_pairs))
     return disulfide_pairs
 def find_disulfides(self):
     ''' Finds disulfide bridges.
     '''
     if self.path_to_pdb=='':
         raise AssertionError('Run find_files function first to locate the model source file.')
     disulfide_pairs = []
     with open(self.path_to_pdb, 'r') as f:
         lines = f.readlines()
         c=1
         for line in lines:
             if c>10:
                 break
             if line.startswith('SSBOND'):
                 pdb_re = re.search('SSBOND\s+\d+\s+CYS\sR\s+(\d+)\s+CYS\sR\s+(\d+)', line)
                 num1 = pdb_re.group(1)
                 num2 = pdb_re.group(2)
                 res1, res2 = list(Residue.objects.filter(protein_conformation__protein__entry_name=self.receptor, sequence_number__in=[num1, num2]))
                 if res1.display_generic_number!=None:
                     gn1 = ggn(res1.display_generic_number.label)
                 else:
                     gn1 = str(res1.sequence_number)
                 if res2.display_generic_number!=None:
                     gn2 = ggn(res2.display_generic_number.label)
                 else:
                     gn2 = str(res2.sequence_number)
                 disulfide_pairs.append([gn1, gn2])
             c+=1
     print('FIND disulfides: {}'.format(disulfide_pairs))
     return disulfide_pairs
 def fetch_residues_from_pdb(self, structure, generic_numbers, modify_bulges=False, just_nums=False):
     ''' Fetches specific lines from pdb file by generic number (if generic number is
         not available then by residue number). Returns nested OrderedDict()
         with generic numbers as keys in the outer dictionary, and atom names as keys
         in the inner dictionary.
         
         @param structure: Structure, Structure object where residues should be fetched from \n
         @param generic_numbers: list, list of generic numbers to be fetched \n
         @param modify_bulges: boolean, set it to true when used for bulge switching. E.g. you want a 5x461
         residue to be considered a 5x46 residue. 
     '''
     output = OrderedDict()
     atoms_list = []
     for gn in generic_numbers:
         rotamer=None
         if 'x' in str(gn):      
             rotamer = list(Rotamer.objects.filter(structure__protein_conformation=structure.protein_conformation, 
                     residue__display_generic_number__label=dgn(gn,structure.protein_conformation), 
                     structure__preferred_chain=structure.preferred_chain))
         else:
             rotamer = list(Rotamer.objects.filter(structure__protein_conformation=structure.protein_conformation, 
                     residue__sequence_number=gn, structure__preferred_chain=structure.preferred_chain))
             if just_nums==False:
                 try:
                     gn = ggn(Residue.objects.get(protein_conformation=structure.protein_conformation,
                                                 sequence_number=gn).display_generic_number.label)
                 except:
                     pass
         if len(rotamer)>1:
             for i in rotamer:
                 if i.pdbdata.pdb.startswith('COMPND')==False:
                     if i.pdbdata.pdb[21] in structure.preferred_chain:
                         rotamer = i
                         break
         else:
             rotamer = rotamer[0]
         io = StringIO(rotamer.pdbdata.pdb)
         rota_struct = PDB.PDBParser(QUIET=True).get_structure('structure', io)[0]
         for chain in rota_struct:
             for residue in chain:
                 for atom in residue:
                     atoms_list.append(atom)
                 if modify_bulges==True and len(gn)==5:
                     output[gn.replace('x','.')[:-1]] = atoms_list
                 else:
                     try:
                         output[gn.replace('x','.')] = atoms_list
                     except:
                         output[str(gn)] = atoms_list
                 atoms_list = []
     return output
 def fetch_residues_from_pdb(self, structure, generic_numbers, modify_bulges=False, just_nums=False):
     ''' Fetches specific lines from pdb file by generic number (if generic number is
         not available then by residue number). Returns nested OrderedDict()
         with generic numbers as keys in the outer dictionary, and atom names as keys
         in the inner dictionary.
         
         @param structure: Structure, Structure object where residues should be fetched from \n
         @param generic_numbers: list, list of generic numbers to be fetched \n
         @param modify_bulges: boolean, set it to true when used for bulge switching. E.g. you want a 5x461
         residue to be considered a 5x46 residue. 
     '''
     output = OrderedDict()
     atoms_list = []
     for gn in generic_numbers:
         rotamer=None
         if 'x' in str(gn):      
             rotamer = list(Rotamer.objects.filter(structure__protein_conformation=structure.protein_conformation, 
                     residue__display_generic_number__label=dgn(gn,structure.protein_conformation), 
                     structure__preferred_chain=structure.preferred_chain))
         else:
             rotamer = list(Rotamer.objects.filter(structure__protein_conformation=structure.protein_conformation, 
                     residue__sequence_number=gn, structure__preferred_chain=structure.preferred_chain))
             if just_nums==False:
                 try:
                     gn = ggn(Residue.objects.get(protein_conformation=structure.protein_conformation,
                                                 sequence_number=gn).display_generic_number.label)
                 except:
                     pass
         if len(rotamer)>1:
             for i in rotamer:
                 if i.pdbdata.pdb.startswith('COMPND')==False:
                     if i.pdbdata.pdb[21] in structure.preferred_chain:
                         rotamer = i
                         break
         else:
             rotamer = rotamer[0]
         io = StringIO(rotamer.pdbdata.pdb)
         rota_struct = PDB.PDBParser(QUIET=True).get_structure('structure', io)[0]
         for chain in rota_struct:
             for residue in chain:
                 for atom in residue:
                     atoms_list.append(atom)
                 if modify_bulges==True and len(gn)==5:
                     output[gn.replace('x','.')[:-1]] = atoms_list
                 else:
                     try:
                         output[gn.replace('x','.')] = atoms_list
                     except:
                         output[str(gn)] = atoms_list
                 atoms_list = []
     return output
Пример #5
0
    def pdb_array_creator(self, structure=None, filename=None):
        ''' Creates an OrderedDict() from the pdb of a Structure object where residue numbers/generic numbers are 
            keys for the residues, and atom names are keys for the Bio.PDB.Residue objects.
            
            @param structure: Structure, Structure object of protein. When using structure, leave filename=None. \n
            @param filename: str, filename of pdb to be parsed. When using filename, leave structure=None).
        '''
        # seq_nums_overwrite_cutoff_dict = {'4PHU':2000, '4LDL':1000, '4LDO':1000, '4QKX':1000, '5JQH':1000, '5TZY':2000, '5KW2':2000}
        if structure != None and filename == None:
            io = StringIO(structure.pdb_data.pdb)
        else:
            io = filename
        gn_array = []
        residue_array = []
        # pdb_struct = PDB.PDBParser(QUIET=True).get_structure(structure.pdb_code.index, io)[0]

        residues = Residue.objects.filter(
            protein_conformation=structure.protein_conformation)
        gn_list = []
        for i in residues:
            try:
                gn_list.append(
                    ggn(i.display_generic_number.label).replace('x', '.'))
            except:
                pass

        ssno = StructureSeqNumOverwrite(structure)
        ssno.seq_num_overwrite('pdb')
        if len(ssno.pdb_wt_table) > 0:
            residues = residues.filter(protein_segment__slug__in=[
                'TM1', 'TM2', 'TM3', 'TM4', 'TM5', 'TM6', 'TM7', 'H8'
            ]).order_by('sequence_number')
            output = OrderedDict()
            for r in residues:
                if r.protein_segment.slug == None:
                    continue
                if r.protein_segment.slug not in output:
                    output[r.protein_segment.slug] = OrderedDict()
                rotamer = Rotamer.objects.filter(residue=r)
                rotamer = self.right_rotamer_select(rotamer)
                rota_io = StringIO(rotamer.pdbdata.pdb)
                p = PDB.PDBParser()
                parsed_rota = p.get_structure('rota', rota_io)
                for chain in parsed_rota[0]:
                    for res in chain:
                        atom_list = []
                        for atom in res:
                            # Skip hydrogens
                            if atom.get_id().startswith('H'):
                                continue
                            if atom.get_id() == 'N':
                                bw, gn = r.display_generic_number.label.split(
                                    'x')
                                atom.set_bfactor(bw)
                            elif atom.get_id() == 'CA':
                                bw, gn = r.display_generic_number.label.split(
                                    'x')
                                gn = "{}.{}".format(bw.split('.')[0], gn)
                                if len(gn.split('.')[1]) == 3:
                                    gn = '-' + gn[:-1]
                                atom.set_bfactor(gn)
                            atom_list.append(atom)
                        output[r.protein_segment.slug][ggn(
                            r.display_generic_number.label).replace(
                                'x', '.')] = atom_list
            pprint.pprint(output)
            return output
        else:
            assign_gn = as_gn.GenericNumbering(
                pdb_file=io,
                pdb_code=structure.pdb_code.index,
                sequence_parser=True)
            pdb_struct = assign_gn.assign_generic_numbers_with_sequence_parser(
            )
            pref_chain = structure.preferred_chain
            parent_prot_conf = ProteinConformation.objects.get(
                protein=structure.protein_conformation.protein.parent)
            parent_residues = Residue.objects.filter(
                protein_conformation=parent_prot_conf)
            last_res = list(parent_residues)[-1].sequence_number
            if len(pref_chain) > 1:
                pref_chain = pref_chain[0]
            for residue in pdb_struct[pref_chain]:
                if 'CA' in residue and -9.1 < residue['CA'].get_bfactor(
                ) < 9.1:
                    use_resid = False
                    gn = str(residue['CA'].get_bfactor())
                    if len(gn.split('.')[1]) == 1:
                        gn = gn + '0'
                    if gn[0] == '-':
                        gn = gn[1:] + '1'
                    # Exceptions
                    if structure.pdb_code.index == '3PBL' and residue.get_id(
                    )[1] == 331:
                        use_resid = True
                    elif structure.pdb_code.index == '6QZH' and residue.get_id(
                    )[1] == 1434:
                        use_resid = True
                    elif structure.pdb_code.index == '7M3E':
                        use_resid = True
                    #################################################
                    elif gn in gn_list:
                        gn_array.append(gn)
                        residue_array.append(residue.get_list())
                    else:
                        use_resid = True
                    if use_resid:
                        gn_array.append(str(residue.get_id()[1]))
                        residue_array.append(residue.get_list())
            output = OrderedDict()
            for num, label in self.segment_coding.items():
                output[label] = OrderedDict()
            if len(gn_array) != len(residue_array):
                raise AssertionError()

            for gn, res in zip(gn_array, residue_array):
                if '.' in gn:
                    seg_num = int(gn.split('.')[0])
                    seg_label = self.segment_coding[seg_num]
                    if seg_num == 8 and len(output['TM7']) == 0:
                        continue
                    else:
                        output[seg_label][gn] = res
                else:
                    try:
                        found_res, found_gn = None, None
                        try:
                            found_res = Residue.objects.get(
                                protein_conformation=structure.
                                protein_conformation,
                                sequence_number=gn)
                        except:
                            # Exception for res 317 in 5VEX, 5VEW
                            if structure.pdb_code.index in [
                                    '5VEX', '5VEW'
                            ] and gn == '317' and res[0].get_parent(
                            ).get_resname() == 'CYS':
                                found_res = Residue.objects.get(
                                    protein_conformation=parent_prot_conf,
                                    sequence_number=gn)
                            #####################################
                        found_gn = str(
                            ggn(found_res.display_generic_number.label)
                        ).replace('x', '.')

                        # Exception for res 318 in 5VEX, 5VEW
                        if structure.pdb_code.index in [
                                '5VEX', '5VEW'
                        ] and gn == '318' and res[0].get_parent().get_resname(
                        ) == 'ILE' and found_gn == '5.47':
                            found_gn = '5.48'
                        #####################################
                        if -9.1 < float(found_gn) < 9.1:
                            if len(res) == 1:
                                continue
                            if int(gn) > last_res:
                                continue
                            seg_label = self.segment_coding[int(
                                found_gn.split('.')[0])]
                            output[seg_label][found_gn] = res
                    except:
                        if res[0].get_parent().get_resname() == 'YCM' or res[
                                0].get_parent().get_resname() == 'CSD':
                            try:
                                found_res = Residue.objects.get(
                                    protein_conformation=parent_prot_conf,
                                    sequence_number=gn)
                            except:
                                continue
                            if found_res.protein_segment.slug[0] not in [
                                    'T', 'H'
                            ]:
                                continue
                            try:
                                found_gn = str(
                                    ggn(found_res.display_generic_number.label)
                                ).replace('x', '.')
                            except:
                                found_gn = str(gn)
                            output[
                                found_res.protein_segment.slug][found_gn] = res
        return output
    def pdb_array_creator(self, structure=None, filename=None):
        ''' Creates an OrderedDict() from the pdb of a Structure object where residue numbers/generic numbers are 
            keys for the residues, and atom names are keys for the Bio.PDB.Residue objects.
            
            @param structure: Structure, Structure object of protein. When using structure, leave filename=None. \n
            @param filename: str, filename of pdb to be parsed. When using filename, leave structure=None).
        '''
        seq_nums_overwrite_cutoff_dict = {'4PHU':2000, '4LDL':1000, '4LDO':1000, '4QKX':1000, '5JQH':1000, '5TZY':2000, '5KW2':2000}
        if structure!=None and filename==None:
            io = StringIO(structure.pdb_data.pdb)
        else:
            io = filename
        gn_array = []
        residue_array = []
        # pdb_struct = PDB.PDBParser(QUIET=True).get_structure(structure.pdb_code.index, io)[0]
        
        residues = Residue.objects.filter(protein_conformation=structure.protein_conformation)
        gn_list = []
        for i in residues:
            try:
                gn_list.append(ggn(i.display_generic_number.label).replace('x','.'))
            except:
                pass

        assign_gn = as_gn.GenericNumbering(pdb_file=io, pdb_code=structure.pdb_code.index, sequence_parser=True)
        pdb_struct = assign_gn.assign_generic_numbers_with_sequence_parser()
        pref_chain = structure.preferred_chain
        parent_prot_conf = ProteinConformation.objects.get(protein=structure.protein_conformation.protein.parent)
        parent_residues = Residue.objects.filter(protein_conformation=parent_prot_conf)
        last_res = list(parent_residues)[-1].sequence_number
        if len(pref_chain)>1:
            pref_chain = pref_chain[0]
        for residue in pdb_struct[pref_chain]:
            try:
                if -9.1 < residue['CA'].get_bfactor() < 9.1:
                    gn = str(residue['CA'].get_bfactor())
                    if len(gn.split('.')[1])==1:
                        gn = gn+'0'
                    if gn[0]=='-':
                        gn = gn[1:]+'1'
                    # Exception for 3PBL 331, gn get's assigned wrong
                    if structure.pdb_code.index=='3PBL' and residue.get_id()[1]==331:
                        raise Exception()
                    #################################################
                    if gn in gn_list:
                        if int(residue.get_id()[1])>1000:
                            if structure.pdb_code.index in seq_nums_overwrite_cutoff_dict and int(residue.get_id()[1])>=seq_nums_overwrite_cutoff_dict[structure.pdb_code.index]:
                                gn_array.append(gn)
                                residue_array.append(residue.get_list())
                            else:
                                raise Exception()
                        else:
                            gn_array.append(gn)
                            residue_array.append(residue.get_list())
                    else:
                        raise Exception()
                else:
                    raise Exception()
            except:
                if structure!=None and structure.pdb_code.index in seq_nums_overwrite_cutoff_dict:
                    if int(residue.get_id()[1])>seq_nums_overwrite_cutoff_dict[structure.pdb_code.index]:
                        gn_array.append(str(int(str(residue.get_id()[1])[1:])))
                    else:
                        gn_array.append(str(residue.get_id()[1]))
                else:
                    gn_array.append(str(residue.get_id()[1]))
                residue_array.append(residue.get_list())
        output = OrderedDict()
        for num, label in self.segment_coding.items():
            output[label] = OrderedDict()
        if len(gn_array)!=len(residue_array):
            raise AssertionError()
        for gn, res in zip(gn_array,residue_array):
            if '.' in gn:
                seg_num = int(gn.split('.')[0])
                seg_label = self.segment_coding[seg_num]
                if seg_num==8 and len(output['TM7'])==0:
                    continue
                else:
                    output[seg_label][gn] = res
            else:
                try:
                    found_res, found_gn = None, None
                    try:
                        found_res = Residue.objects.get(protein_conformation=structure.protein_conformation,
                                                        sequence_number=gn)
                    except:
                        # Exception for res 317 in 5VEX, 5VEW
                        if structure.pdb_code.index in ['5VEX','5VEW'] and gn=='317' and res[0].get_parent().get_resname()=='CYS':
                            found_res = Residue.objects.get(protein_conformation=parent_prot_conf,
                                                            sequence_number=gn)
                        #####################################
                    found_gn = str(ggn(found_res.display_generic_number.label)).replace('x','.')

                    # Exception for res 318 in 5VEX, 5VEW
                    if structure.pdb_code.index in ['5VEX','5VEW'] and gn=='318' and res[0].get_parent().get_resname()=='ILE' and found_gn=='5.47':
                        found_gn = '5.48'
                    #####################################
                    if -9.1 < float(found_gn) < 9.1:
                        if len(res)==1:
                            continue
                        if int(gn)>last_res:
                            continue
                        seg_label = self.segment_coding[int(found_gn.split('.')[0])]
                        output[seg_label][found_gn] = res
                except:
                    if res[0].get_parent().get_resname()=='YCM' or res[0].get_parent().get_resname()=='CSD':
                        found_res = Residue.objects.get(protein_conformation=parent_prot_conf, sequence_number=gn)
                        if found_res.protein_segment.slug[0] not in ['T','H']:
                            continue
                        try:
                            found_gn = str(ggn(found_res.display_generic_number.label)).replace('x','.')
                        except:
                            found_gn = str(gn)
                        output[found_res.protein_segment.slug][found_gn] = res

        return output