Exemplo n.º 1
0
    def run_RMSD(self, file1, file2):
        ''' Calculates 4 RMSD values between two GPCR pdb files. It compares the two files using sequence numbers.
            1. overall all atoms RMSD
            2. overall backbone atoms RMSD
            3. 7TM all atoms RMSD
            4. 7TM backbone atoms RMSD
        '''
        parser = PDB.PDBParser(QUIET=True)
        pdb1 = parser.get_structure('struct1', file1)[0]
        pdb2 = parser.get_structure('struct2', file2)[0]
        pdb_array1, pdb_array2, pdb_array3, pdb_array4 = OrderedDict(
        ), OrderedDict(), OrderedDict(), OrderedDict()

        assign_gn1 = as_gn.GenericNumbering(structure=pdb1)
        pdb1 = assign_gn1.assign_generic_numbers()
        assign_gn2 = as_gn.GenericNumbering(structure=pdb2)
        pdb2 = assign_gn2.assign_generic_numbers()

        for chain1 in pdb1:
            for residue1 in chain1:
                if residue1.get_full_id()[3][0] != ' ':
                    continue
                pdb_array1[int(residue1.get_id()[1])] = residue1
                try:
                    if -8.1 < residue1['CA'].get_bfactor() < 8.1:
                        pdb_array3[int(residue1.get_id()[1])] = residue1
                except:
                    pass
        for chain2 in pdb2:
            for residue2 in chain2:
                if residue2.get_full_id()[3][0] != ' ':
                    continue
                pdb_array2[int(residue2.get_id()[1])] = residue2
                try:
                    if -8.1 < residue2['CA'].get_bfactor() < 8.1:
                        pdb_array4[int(residue2.get_id()[1])] = residue2
                except:
                    pass
        overall_all1, overall_all2, overall_backbone1, overall_backbone2, o_a, o_b = self.create_lists(
            pdb_array1, pdb_array2)
        TM_all1, TM_all2, TM_backbone1, TM_backbone2, t_a, t_b = self.create_lists(
            pdb_array3, pdb_array4)

        rmsd1 = self.calc_RMSD(overall_all1, overall_all2, o_a)
        rmsd2 = self.calc_RMSD(overall_backbone1, overall_backbone2, o_b)
        rmsd3 = self.calc_RMSD(TM_all1, TM_all2, t_a)
        rmsd4 = self.calc_RMSD(TM_backbone1, TM_backbone2, t_b)
        return rmsd1, rmsd2, rmsd3, rmsd4
Exemplo n.º 2
0
    def run(self):
        parse = GPCRDBParsingPDB()
        self.signprot_complex = SignprotComplex.objects.get(
            structure=self.main_structure)
        structure_signprot = self.signprot_complex.protein
        if self.signprot != False:
            self.target_signprot = Protein.objects.get(
                entry_name=self.signprot)
        else:
            self.target_signprot = self.signprot_complex.protein
        self.signprot_protconf = ProteinConformation.objects.get(
            protein=self.target_signprot)
        sign_a = GProteinAlignment()
        sign_a.run_alignment(self.target_signprot, structure_signprot)
        io = StringIO(self.main_structure.pdb_data.pdb)
        assign_cgn = as_gn.GenericNumbering(
            pdb_file=io,
            pdb_code=self.main_structure.pdb_code.index,
            sequence_parser=True,
            signprot=structure_signprot)
        signprot_pdb_array = assign_cgn.assign_cgn_with_sequence_parser(
            self.signprot_complex.alpha)

        # Alignment exception in HN for 6OIJ, shifting alignment by 6 residues
        if self.main_structure.pdb_code.index == '6OIJ':
            keys = list(signprot_pdb_array['HN'].keys())
            new_HN = OrderedDict()
            for i, k in enumerate(signprot_pdb_array['HN']):
                if i < 8:
                    new_HN[k] = 'x'
                else:
                    new_HN[k] = signprot_pdb_array['HN'][keys[i - 6]]
            signprot_pdb_array['HN'] = new_HN

        new_array = OrderedDict()

        # Initiate complex part of template source
        source_resis = Residue.objects.filter(
            protein_conformation__protein=self.target_signprot)
        for res in source_resis:
            if res.protein_segment.slug not in self.template_source:
                self.template_source[res.protein_segment.slug] = OrderedDict()
            if res.protein_segment.category == 'loop':
                self.template_source[res.protein_segment.slug][str(
                    res.sequence_number)] = [None, None]
            else:
                self.template_source[res.protein_segment.slug][
                    res.display_generic_number.label] = [
                        self.main_structure, self.main_structure
                    ]

        # Superimpose missing regions H1 - hfs2
        alt_complex_struct = None
        segs_for_alt_complex_struct = []
        alt_templates_H_domain = self.get_full_alpha_templates()

        if self.main_structure.id not in alt_templates_H_domain[1]:
            segs_for_alt_complex_struct = [
                'H1', 'h1ha', 'HA', 'hahb', 'HB', 'hbhc', 'HC', 'hchd', 'HD',
                'hdhe', 'HE', 'hehf', 'HF', 'hfs2'
            ]
            alt_complex_struct = self.find_h_domain_template(
                self.target_signprot, alt_templates_H_domain[0]
            )  #Structure.objects.get(pdb_code__index='3SN6')
            io = StringIO(alt_complex_struct.pdb_data.pdb)
            alt_signprot_complex = SignprotComplex.objects.get(
                structure=alt_complex_struct)
            alt_assign_cgn = as_gn.GenericNumbering(
                pdb_file=io,
                pdb_code=alt_complex_struct.pdb_code.index,
                sequence_parser=True,
                signprot=alt_signprot_complex.protein)
            alt_signprot_pdb_array = alt_assign_cgn.assign_cgn_with_sequence_parser(
                alt_signprot_complex.alpha)
            before_cgns = ['G.HN.50', 'G.HN.51', 'G.HN.52', 'G.HN.53']
            after_cgns = ['G.H5.03', 'G.H5.04', 'G.H5.05', 'G.H5.06']
            orig_residues1 = parse.fetch_residues_from_array(
                signprot_pdb_array['HN'], before_cgns)
            orig_residues2 = parse.fetch_residues_from_array(
                signprot_pdb_array['H5'], after_cgns)
            orig_residues = parse.add_two_ordereddict(orig_residues1,
                                                      orig_residues2)

            alt_residues1 = parse.fetch_residues_from_array(
                alt_signprot_pdb_array['HN'], before_cgns)
            alt_residues2 = parse.fetch_residues_from_array(
                alt_signprot_pdb_array['H5'], after_cgns)

            # for i,j in orig_residues.items():
            #     print(i, j, j[0].get_parent())
            # print('ALTERNATIVES')
            # for i,j in alt_residues1.items():
            #     print(i, j, j[0].get_parent())
            # for i,j in alt_residues2.items():
            #     print(i, j, j[0].get_parent())

            alt_middle = OrderedDict()
            for s in segs_for_alt_complex_struct:
                alt_middle = parse.add_two_ordereddict(
                    alt_middle, alt_signprot_pdb_array[s])
                self.template_source = update_template_source(
                    self.template_source, list(self.template_source[s].keys()),
                    alt_complex_struct, s)

            alt_residues = parse.add_two_ordereddict(
                parse.add_two_ordereddict(alt_residues1, alt_middle),
                alt_residues2)
            del_list = []
            for r, t in alt_middle.items():
                if t == 'x':
                    del_list.append(r)
            for r in del_list:
                del alt_residues[r]

            superpose = sp.LoopSuperpose(orig_residues, alt_residues)
            new_residues = superpose.run()
            key_list = list(new_residues.keys())[4:-4]
            for key in key_list:
                seg = key.split('.')[1]
                signprot_pdb_array[seg][key] = new_residues[key]

            # alt local loop alignment
            alt_sign_a = GProteinAlignment()
            alt_sign_a.run_alignment(self.target_signprot,
                                     alt_signprot_complex.protein,
                                     segments=segs_for_alt_complex_struct)
            for alt_seg in segs_for_alt_complex_struct:
                sign_a.reference_dict[alt_seg] = alt_sign_a.reference_dict[
                    alt_seg]
                sign_a.template_dict[alt_seg] = alt_sign_a.template_dict[
                    alt_seg]
                sign_a.alignment_dict[alt_seg] = alt_sign_a.alignment_dict[
                    alt_seg]

            # fix h1ha and hahb and hbhc
            if self.target_signprot.entry_name != 'gnas2_human':
                h1ha = Residue.objects.filter(
                    protein_conformation__protein=alt_signprot_complex.protein,
                    protein_segment__slug='h1ha')
                h1ha_dict, hahb_dict = OrderedDict(), OrderedDict()
                for h in h1ha:
                    h1ha_dict[h.generic_number.label] = 'x'
                signprot_pdb_array['h1ha'] = h1ha_dict
                right_order = sorted(list(signprot_pdb_array['hahb'].keys()),
                                     key=lambda x: (x))
                for r in right_order:
                    hahb_dict[r] = signprot_pdb_array['hahb'][r]
                signprot_pdb_array['hahb'] = hahb_dict

            # Let Modeller model buffer regions
            self.trimmed_residues.append('s1h1_6')
            self.trimmed_residues.append('hfs2_1')
            self.trimmed_residues.append('hfs2_2')
            self.trimmed_residues.append('hfs2_3')
            self.trimmed_residues.append('hfs2_4')
            self.trimmed_residues.append('hfs2_5')
            self.trimmed_residues.append('hfs2_6')
            self.trimmed_residues.append('hfs2_7')
            self.trimmed_residues.append('G.S2.01')
            self.trimmed_residues.append('G.S2.02')
            self.trimmed_residues.append('s4h3_4')
            self.trimmed_residues.append('s4h3_5')

        # New loop alignments for signprot. If length differs between ref and temp, buffer is created in the middle of the loop
        loops = [
            i.slug
            for i in ProteinSegment.objects.filter(proteinfamily='Alpha',
                                                   category='loop')
        ]
        loops_to_model = []
        for r_seg, t_seg, a_seg in zip(sign_a.reference_dict,
                                       sign_a.template_dict,
                                       sign_a.alignment_dict):
            if r_seg in loops:
                loop_length = len(sign_a.reference_dict[r_seg])
                ref_loop = [
                    i for i in list(sign_a.reference_dict[r_seg].values())
                    if i not in ['x', '-']
                ]
                ref_keys = [
                    i for i in list(sign_a.reference_dict[r_seg].keys())
                    if i not in ['x', '-']
                ]
                ref_loop_residues = Residue.objects.filter(
                    protein_conformation__protein=self.target_signprot,
                    protein_segment__slug=r_seg)
                temp_loop = [
                    i for i in list(sign_a.template_dict[t_seg].values())
                    if i not in ['x', '-']
                ]
                temp_keys = [
                    i for i in list(sign_a.template_dict[t_seg].keys())
                    if i not in ['x', '-']
                ]
                if alt_complex_struct and r_seg in segs_for_alt_complex_struct:
                    temp_loop_residues = Residue.objects.filter(
                        protein_conformation__protein=alt_signprot_complex.
                        protein,
                        protein_segment__slug=r_seg)
                else:
                    temp_loop_residues = Residue.objects.filter(
                        protein_conformation__protein=structure_signprot,
                        protein_segment__slug=r_seg)
                ref_out, temp_out, align_out = OrderedDict(), OrderedDict(
                ), OrderedDict()
                # ref is longer
                if len(ref_loop) > len(temp_loop):
                    mid_temp = math.ceil(len(temp_loop) / 2)
                    j = 0
                    for i in range(0, loop_length):
                        key = r_seg + '_' + str(i + 1)
                        if i + 1 <= mid_temp:
                            temp_out[key] = temp_loop[i]
                            self.template_source = compare_and_update_template_source(
                                self.template_source, r_seg,
                                signprot_pdb_array, i, ref_loop_residues[i].
                                display_generic_number.label,
                                ref_loop_residues[i].sequence_number,
                                segs_for_alt_complex_struct,
                                alt_complex_struct, self.main_structure)
                        elif mid_temp < i + 1 <= loop_length - mid_temp + 1:
                            if i + 1 == loop_length - mid_temp + 1 and len(
                                    temp_loop) % 2 == 0:
                                temp_out[key] = temp_loop[mid_temp + j]
                                self.template_source = compare_and_update_template_source(
                                    self.template_source, r_seg,
                                    signprot_pdb_array, mid_temp + j,
                                    ref_loop_residues[i].
                                    display_generic_number.label,
                                    ref_loop_residues[i].sequence_number,
                                    segs_for_alt_complex_struct,
                                    alt_complex_struct, self.main_structure)
                                j += 1
                            else:
                                temp_out[key.replace('_', '?')] = '-'
                                self.template_source = compare_and_update_template_source(
                                    self.template_source, r_seg,
                                    signprot_pdb_array, mid_temp + j,
                                    ref_loop_residues[i].
                                    display_generic_number.label,
                                    ref_loop_residues[i].sequence_number,
                                    segs_for_alt_complex_struct,
                                    alt_complex_struct, self.main_structure)
                        else:
                            temp_out[key] = temp_loop[mid_temp + j]
                            self.template_source = compare_and_update_template_source(
                                self.template_source, r_seg,
                                signprot_pdb_array, mid_temp + j,
                                ref_loop_residues[i].display_generic_number.
                                label, ref_loop_residues[i].sequence_number,
                                segs_for_alt_complex_struct,
                                alt_complex_struct, self.main_structure)
                            j += 1
                    for i, j in enumerate(
                            list(sign_a.reference_dict[r_seg].values())):
                        key = r_seg + '_' + str(i + 1)
                        try:
                            temp_out[key]
                            ref_out[key] = j
                        except:
                            ref_out[key.replace('_', '?')] = j
                        i += 1
                # temp is longer
                elif len(ref_loop) < len(temp_loop):
                    mid_ref = math.ceil(len(ref_loop) / 2)
                    j = 0
                    for i in range(0, loop_length):
                        key = r_seg + '_' + str(i + 1)
                        if i + 1 <= mid_ref:
                            ref_out[key] = ref_loop[i]
                            self.template_source = compare_and_update_template_source(
                                self.template_source, r_seg,
                                signprot_pdb_array, i, temp_loop_residues[i].
                                display_generic_number.label,
                                ref_loop_residues[i].sequence_number,
                                segs_for_alt_complex_struct,
                                alt_complex_struct, self.main_structure)
                        elif mid_ref < i + 1 <= loop_length - mid_ref + 1:
                            if i + 1 == loop_length - mid_ref + 1 and len(
                                    ref_loop) % 2 == 0:
                                ref_out[key] = ref_loop[mid_ref + j]
                                self.template_source = compare_and_update_template_source(
                                    self.template_source, r_seg,
                                    signprot_pdb_array, mid_ref + j,
                                    temp_loop_residues[i].
                                    display_generic_number.label,
                                    ref_loop_residues[mid_ref +
                                                      j].sequence_number,
                                    segs_for_alt_complex_struct,
                                    alt_complex_struct, self.main_structure)
                                j += 1
                            else:
                                ref_out[key.replace('_', '?')] = '-'
                                self.template_source = compare_and_update_template_source(
                                    self.template_source, r_seg,
                                    signprot_pdb_array, mid_ref + j,
                                    temp_loop_residues[i].
                                    display_generic_number.label,
                                    ref_loop_residues[mid_ref +
                                                      j].sequence_number,
                                    segs_for_alt_complex_struct,
                                    alt_complex_struct, self.main_structure)
                        else:
                            ref_out[key] = ref_loop[mid_ref + j]
                            self.template_source = compare_and_update_template_source(
                                self.template_source, r_seg,
                                signprot_pdb_array, mid_ref + j,
                                temp_loop_residues[i].display_generic_number.
                                label,
                                ref_loop_residues[mid_ref + j].sequence_number,
                                segs_for_alt_complex_struct,
                                alt_complex_struct, self.main_structure)
                            j += 1
                    for i, j in enumerate(
                            list(sign_a.template_dict[t_seg].values())):
                        key = r_seg + '_' + str(i + 1)
                        try:
                            ref_out[key]
                            temp_out[key] = j
                        except:
                            temp_out[key.replace('_', '?')] = j
                        i += 1
                    loops_to_model.append(r_seg)
                # ref and temp length equal
                else:
                    cr, ct = 1, 1
                    for i, j in zip(
                            list(sign_a.reference_dict[r_seg].values()),
                            list(sign_a.template_dict[t_seg].values())):
                        ref_out[r_seg + '_' + str(cr)] = i
                        temp_out[r_seg + '_' + str(ct)] = j
                        self.template_source = compare_and_update_template_source(
                            self.template_source, r_seg, signprot_pdb_array,
                            ct - 1,
                            temp_loop_residues[ct -
                                               1].display_generic_number.label,
                            ref_loop_residues[cr - 1].sequence_number,
                            segs_for_alt_complex_struct, alt_complex_struct,
                            self.main_structure)
                        if i != '-':
                            cr += 1
                        if j != '-':
                            ct += 1

                c = 1

                # update alignment dict
                for i, j in zip(list(ref_out.values()),
                                list(temp_out.values())):
                    key = r_seg + '_' + str(c)
                    if i == '-' or j == '-':
                        align_out[key.replace('_', '?')] = '-'
                    elif i != j:
                        align_out[key] = '.'
                    elif i == j:
                        align_out[key] = i
                    c += 1
                # update pdb array
                new_pdb_array = OrderedDict()
                atoms_list = list(signprot_pdb_array[t_seg].values())
                j = 0
                for t_c, t in temp_out.items():
                    jplus1 = False
                    if t != '-':
                        for i in range(j, len(atoms_list)):
                            if atoms_list[j] != '-':
                                new_pdb_array[t_c] = atoms_list[j]
                                jplus1 = True
                                break
                        if jplus1:
                            j += 1
                    else:
                        new_pdb_array[t_c] = 'x'
                        # j+=1

                # pprint.pprint(new_pdb_array)
                # for i,j in new_pdb_array.items():
                #     try:
                #         print(i, PDB.Polypeptide.three_to_one(j[0].get_parent().get_resname()))
                #     except:
                #         print(i, j)

                # update dictionary keys with '?' if no backbone template
                ref_out_final, temp_out_final, align_out_final, new_pdb_array_final = OrderedDict(
                ), OrderedDict(), OrderedDict(), OrderedDict()
                # self.template_source[r_seg] = OrderedDict()
                for i, j in new_pdb_array.items():
                    if '?' not in i and j == 'x':
                        ref_out_final[i.replace('_',
                                                '?').replace('.',
                                                             '?')] = ref_out[i]
                        temp_out_final[i.replace('_', '?').replace(
                            '.', '?')] = temp_out[i]
                        align_out_final[i.replace('_', '?').replace(
                            '.', '?')] = align_out[i]
                        new_pdb_array_final[i.replace('_', '?').replace(
                            '.', '?')] = new_pdb_array[i]
                    else:
                        ref_out_final[i] = ref_out[i]
                        temp_out_final[i] = temp_out[i]
                        align_out_final[i] = align_out[i]
                        new_pdb_array_final[i] = new_pdb_array[i]
                sign_a.reference_dict[r_seg] = ref_out_final
                sign_a.template_dict[t_seg] = temp_out_final
                sign_a.alignment_dict[a_seg] = align_out_final
                signprot_pdb_array[r_seg] = new_pdb_array_final

                align_loop = list(sign_a.alignment_dict[a_seg].values())

        self.a.reference_dict = deepcopy(self.a.reference_dict)
        self.a.template_dict = deepcopy(self.a.template_dict)
        self.a.alignment_dict = deepcopy(self.a.alignment_dict)

        for seg, values in sign_a.reference_dict.items():
            new_array[seg] = OrderedDict()
            # self.template_source[seg] = OrderedDict()
            final_values = deepcopy(values)
            for key, res in values.items():
                try:
                    if signprot_pdb_array[seg][key] == 'x':
                        new_array[seg][key] = 'x'
                        self.template_source = update_template_source(
                            self.template_source, [key], None, seg)
                    else:
                        new_array[seg][key] = signprot_pdb_array[seg][key]
                except:
                    if res != '-':
                        new_array[seg][key] = '-'
                        self.template_source = update_template_source(
                            self.template_source, [key], None, seg)
            self.a.reference_dict[seg] = final_values
        for seg, values in sign_a.template_dict.items():
            for key, res in values.items():
                if new_array[seg][key] == 'x':
                    sign_a.template_dict[seg][key] = 'x'
                else:
                    if new_array[seg][key] == '-':
                        sign_a.template_dict[seg][key] = '-'
                    else:
                        pdb_res = PDB.Polypeptide.three_to_one(
                            new_array[seg][key][0].get_parent().get_resname())
                        if pdb_res != sign_a.template_dict[seg][key]:
                            sign_a.template_dict[seg][key] = pdb_res
            self.a.template_dict[seg] = sign_a.template_dict[seg]

        for seg, values in sign_a.alignment_dict.items():
            for key, res in values.items():
                if new_array[seg][key] == 'x':
                    values[key] = 'x'
            self.a.alignment_dict[seg] = values
        signprot_pdb_array = new_array

        for seg, values in signprot_pdb_array.items():
            self.main_pdb_array[seg] = values

        delete_HN_begin = []
        for i in self.a.reference_dict['HN']:
            if i == 'G.HN.30':
                break
            delete_HN_begin.append(i)

        for d in delete_HN_begin:
            del self.a.reference_dict['HN'][d]
            try:
                del self.a.template_dict['HN'][d]
            except:
                pass
            try:
                del self.a.alignment_dict['HN'][d]
            except:
                pass
            del self.main_pdb_array['HN'][d]
            try:
                del self.template_source['HN'][d]
            except:
                pass

        # add residues to model to self.trimmed_residues
        gprot_segments = [
            i.slug
            for i in ProteinSegment.objects.filter(proteinfamily='Alpha')
        ]
        for i, j in self.a.reference_dict.items():
            if i in gprot_segments:
                for k, l in j.items():
                    if '?' in k or self.main_pdb_array[i][k] in ['-', 'x']:
                        self.trimmed_residues.append(k)
                    if i in loops_to_model:
                        self.trimmed_residues.append(k)

        # custom mods
        long_HG_prots = Protein.objects.filter(family__name='Gs')
        if structure_signprot in long_HG_prots and self.target_signprot not in long_HG_prots:
            self.trimmed_residues.append('G.HG.08')
            self.trimmed_residues.append('G.HG.09')
            self.trimmed_residues.append('G.HG.12')
            self.trimmed_residues.append('G.HG.13')
            self.trimmed_residues.append('G.HG.14')
            self.trimmed_residues.append('G.HG.16')
            self.trimmed_residues.append('G.HG.17')
        if structure_signprot != self.target_signprot or alt_signprot_complex.protein not in [
                None, self.target_signprot
        ]:
            # hbhc
            hbhc_keys = list(self.a.reference_dict['hbhc'].keys())
            self.trimmed_residues.append(hbhc_keys[2])
            self.trimmed_residues.append(hbhc_keys[3])
            self.trimmed_residues.append(hbhc_keys[-3])
            self.trimmed_residues.append(hbhc_keys[-2])
            # H1
            self.trimmed_residues.append('G.H1.07')
            self.trimmed_residues.append('G.H1.08')
        if 'hgh4' in loops_to_model:
            self.trimmed_residues.append('G.H4.01')
            self.trimmed_residues.append('G.H4.02')
            self.trimmed_residues.append('G.H4.03')

        # Add mismatching residues to trimmed residues for modeling
        for seg, val in self.a.alignment_dict.items():
            if seg in gprotein_segment_slugs:
                for key, res in val.items():
                    if res == '.':
                        self.trimmed_residues.append(key)
        # Add residues with missing atom coordinates to trimmed residues for modeling
        for seg, val in self.main_pdb_array.items():
            if seg in gprotein_segment_slugs:
                for key, atoms in val.items():
                    if atoms not in ['-', 'x']:
                        if atom_num_dict[PDB.Polypeptide.three_to_one(
                                atoms[0].get_parent().get_resname())] > len(
                                    atoms):
                            self.trimmed_residues.append(key)

        # Add Beta and Gamma chains
        p = PDB.PDBParser(QUIET=True).get_structure(
            'structure', StringIO(self.main_structure.pdb_data.pdb))[0]
        beta = p[self.signprot_complex.beta_chain]
        gamma = p[self.signprot_complex.gamma_chain]
        self.a.reference_dict['Beta'] = OrderedDict()
        self.a.template_dict['Beta'] = OrderedDict()
        self.a.alignment_dict['Beta'] = OrderedDict()
        self.main_pdb_array['Beta'] = OrderedDict()
        self.template_source['Beta'] = OrderedDict()
        self.a.reference_dict['Gamma'] = OrderedDict()
        self.a.template_dict['Gamma'] = OrderedDict()
        self.a.alignment_dict['Gamma'] = OrderedDict()
        self.main_pdb_array['Gamma'] = OrderedDict()
        self.template_source['Gamma'] = OrderedDict()
        for b_res in beta:
            key = str(b_res.get_id()[1])
            self.a.reference_dict['Beta'][key] = PDB.Polypeptide.three_to_one(
                b_res.get_resname())
            self.a.template_dict['Beta'][key] = PDB.Polypeptide.three_to_one(
                b_res.get_resname())
            self.a.alignment_dict['Beta'][key] = PDB.Polypeptide.three_to_one(
                b_res.get_resname())
            atoms = [atom for atom in b_res]
            self.main_pdb_array['Beta'][key] = atoms
            self.template_source['Beta'][key] = [
                self.main_structure, self.main_structure
            ]
        for g_res in gamma:
            key = str(g_res.get_id()[1])
            self.a.reference_dict['Gamma'][key] = PDB.Polypeptide.three_to_one(
                g_res.get_resname())
            self.a.template_dict['Gamma'][key] = PDB.Polypeptide.three_to_one(
                g_res.get_resname())
            self.a.alignment_dict['Gamma'][key] = PDB.Polypeptide.three_to_one(
                g_res.get_resname())
            atoms = [atom for atom in g_res]
            self.main_pdb_array['Gamma'][key] = atoms
            self.template_source['Gamma'][key] = [
                self.main_structure, self.main_structure
            ]
Exemplo n.º 3
0
    def run_RMSD_list(self, files, receptor, seq_nums=None, force_chain=None, sp_7TM=False, only_backbone=False):
        """Calculates 3 RMSD values between a list of GPCR pdb files.

        It compares the files using sequence and generic numbers.
        First file in the list has to be the reference file.
        Params:
            @receptor: UniProt entry name of GPCR, str
            @seq_nums: Specified list of sequence residue numbers for the Custom calculation, list
            @force_chain: Specify one letter chain name to use in the pdb files, str
            @sp_7TM: Superimpose only on 7TM backbone atoms (N, CA, C), boolean
            @only_backbone: Calculate RMSD for only the backbone atoms, boolean
        """
        parser = PDB.PDBParser(QUIET=True)
        count = 0
        pdbs = []
        for f in files:
            count+=1
            pdb = parser.get_structure('struct{}'.format(count), f)[0]
            assign_gn = as_gn.GenericNumbering(pdb_file=f, sequence_parser=True)
            pdb = assign_gn.assign_generic_numbers_with_sequence_parser()
            pdbs.append(pdb)
        chains = []
        for p in pdbs:
            this = []
            for c in p.get_chains():
                this.append(c.get_id())
            chains.append(this)
        usable_chains = []
        for m in chains[1:]:
            for c in m:
                if c in chains[0]:
                    usable_chains.append(c)
        if force_chain:
            chains[0] = [force_chain]

        arrays = []
        model_counter = 0
        ### Creating full arrays
        for p in pdbs:
            try:
                if pdbs.index(p)==0 and len(usable_chains)==0:
                    chain = [c.get_id() for c in pdbs[0].get_chains()][0]
                else:
                    chain = p[usable_chains[0]].get_id()
            except:
                try:
                    chain = p[' '].get_id()
                except:
                    chain = p['A'].get_id()
            if force_chain and model_counter==0:
                chain = force_chain
            pdb_array1 = OrderedDict()
            for residue in p[chain]:
                if residue.get_full_id()[3][0]!=' ':
                    continue
                pdb_array1[int(residue.get_id()[1])] = residue
            arrays.append(pdb_array1)
            model_counter+=1

        ### Checking available residues in target and models
        all_deletes, all_keep = [], []
        for res in arrays[0]:
            for m in arrays[1:]:
                if res not in m:
                    all_deletes.append(res)
                else:
                    all_keep.append(res)
        ### Making unique list with residues present in all structures
        unique_nums = []
        for i in all_keep:
            if i not in unique_nums:
                unique_nums.append(i)
        all_keep = [i for i in unique_nums if i not in all_deletes]

        print('Residue sequence numbers present in all structures: {}'.format(len(all_keep)))
        print(all_keep)

        ### Checking available atoms in target and models
        atoms_to_keep, atoms_to_delete = OrderedDict(), OrderedDict()
        for target_resnum, target_res in arrays[0].items():
            atoms_to_keep[target_resnum] = []
            atoms_to_delete[target_resnum] = []
            for model in arrays[1:]:
                for model_resnum, model_res in model.items():
                    if target_resnum==model_resnum:
                        for atom in model_res:
                            if atom.id in target_res and atom.id not in atoms_to_keep[target_resnum]:
                                atoms_to_keep[target_resnum].append(atom.id)
                            elif atom.id not in target_res and atom.id not in atoms_to_delete[target_resnum]:
                                atoms_to_delete[target_resnum].append(atom.id)
                        for t_atom in target_res:
                            if t_atom.id in model_res and t_atom.id not in atoms_to_keep[target_resnum]:
                                atoms_to_keep[target_resnum].append(t_atom.id)
                            elif t_atom.id not in model_res and t_atom.id not in atoms_to_delete[target_resnum]:
                                atoms_to_delete[target_resnum].append(t_atom.id)
                        break

        ### Creating atom lists of structures
        atom_lists = []
        for m in arrays:
            atom_list = []
            for num, res in m.items():
                if num in all_keep and num not in all_deletes:
                    for atom in res:
                        if atom.id in atoms_to_keep[num] and atom.id not in atoms_to_delete[num]:
                            atom_list.append(atom)
            atom_lists.append(atom_list)

        ### Fetching TM data from GPCRdb
        TM_nums = Residue.objects.filter(protein_conformation__protein__entry_name=receptor, protein_segment__slug__in=['TM1', 'TM2', 'TM3', 'TM4', 'TM5', 'TM6', 'TM7']).values_list('sequence_number', flat=True)
        TM_target_atom_list = [i for i in atom_lists[0] if i.get_parent().id[1] in TM_nums]
        TM_target_backbone_atom_list = [i for i in TM_target_atom_list if i.id in ['N','CA','C']]
        TM_atom_num = len(TM_target_atom_list)
        print('TM_atom_num:',TM_atom_num)
        print('TM_backbone_atom_num:',len(TM_target_backbone_atom_list))

        ### Running superposition and RMSD calculation
        c = 2
        for m in atom_lists[1:]:
            print('########################################')
            print('Model {}'.format(c-1))
            if seq_nums:
                seq_nums = [int(s) for s in seq_nums]
            else:
                seq_nums = all_keep

            ### Custom calculation
            if sp_7TM:
                TM_superposed, atoms_used_sp = self.superpose(atom_lists[0], m, list(TM_nums))
                superposed = self.fetch_atoms_with_seqnum(TM_superposed, seq_nums, only_backbone)
                target_atoms = self.fetch_atoms_with_seqnum(atom_lists[0], seq_nums, only_backbone)
                rmsd = self.calc_RMSD(target_atoms, superposed)
            else:
                superposed, atoms_used_sp = self.superpose(atom_lists[0], m)
                target_atoms = atom_lists[0]
            rmsd = self.calc_RMSD(target_atoms, superposed)
            print('Num atoms sent for superposition: ', len(atom_lists[0]), len(m))
            print('Num atoms used for superposition: ', atoms_used_sp)
            print('Num atoms used for RMSD: ', len(target_atoms), len(superposed))
            print('Custom RMSD:', rmsd)

            ### 7TM all atoms calculation
            TM_model_atom_list = [i for i in m if i.get_parent().id[1] in TM_nums]
            superposed2, atoms_used_sp = self.superpose(TM_target_atom_list, TM_model_atom_list, list(TM_nums))
            rmsd = self.calc_RMSD(TM_target_atom_list, superposed2)
            print('Num atoms sent for superposition: ', len(TM_target_atom_list), len(TM_model_atom_list))
            print('Num atoms used for superposition: ', atoms_used_sp)
            print('Num atoms used for RMSD: ', len(TM_target_atom_list), len(superposed2))
            print('7TM all RMSD:', rmsd)

            ### 7TM only backbone (N, CA, C) calculation
            superposed, atoms_used_sp = self.superpose(TM_target_atom_list, TM_model_atom_list, list(TM_nums))
            superposed3 = self.fetch_atoms_with_seqnum(superposed, list(TM_nums), True)
            rmsd = self.calc_RMSD(TM_target_backbone_atom_list, superposed3)
            print('Num atoms sent for superposition: ', len(TM_target_atom_list), len(TM_model_atom_list))
            print('Num atoms used for superposition: ', atoms_used_sp)
            print('Num atoms used for RMSD: ', len(TM_target_backbone_atom_list), len(superposed3))
            print('7TM backbone RMSD:', rmsd)

            c+=1
Exemplo n.º 4
0
    def run_RMSD_list_archived(self, files, seq_nums=None, force_chain=None):
        """Calculates 4 RMSD values between a list of GPCR pdb files.

        It compares the files using sequence and generic numbers.
        First file in the list has to be the reference file.
            1. overall all atoms RMSD
            2. overall backbone atoms RMSD
            3. 7TM all atoms RMSD
            4. 7TM backbone atoms RMSD
        """
        c = 0
        for f in files:
            c+=1
            if c==1:
                self.number_of_residues_superposed['reference'] = OrderedDict()
                self.number_of_atoms_superposed['reference'] = OrderedDict()
                self.rmsds['reference'] = OrderedDict()
            else:
                self.number_of_residues_superposed['file{}'.format(str(c))] = OrderedDict()
                self.number_of_atoms_superposed['file{}'.format(str(c))] = OrderedDict()
                self.rmsds['file{}'.format(str(c))] = OrderedDict()
        parser = PDB.PDBParser(QUIET=True)
        count = 0
        pdbs = []
        for f in files:
            count+=1
            pdb = parser.get_structure('struct{}'.format(count), f)[0]
            assign_gn = as_gn.GenericNumbering(pdb_file=f, sequence_parser=True)
            pdb = assign_gn.assign_generic_numbers_with_sequence_parser()
            pdbs.append(pdb)
        chains = []
        for p in pdbs:
            this = []
            for c in p.get_chains():
                this.append(c.get_id())
            chains.append(this)
        usable_chains = []
        for m in chains[1:]:
            for c in m:
                if c in chains[0]:
                    usable_chains.append(c)
        if force_chain:
            chains[0] = [force_chain]

        arrays = []
        model_counter = 0
        for p in pdbs:
            try:
                if pdbs.index(p)==0 and len(usable_chains)==0:
                    chain = [c.get_id() for c in pdbs[0].get_chains()][0]
                else:
                    chain = p[usable_chains[0]].get_id()
            except:
                try:
                    chain = p[' '].get_id()
                except:
                    chain = p['A'].get_id()
            if force_chain and model_counter==0:
                chain = force_chain
            pdb_array1, pdb_array2 = OrderedDict(), OrderedDict()
            for residue in p[chain]:
                if residue.get_full_id()[3][0]!=' ':
                    continue
                if seq_nums!=None and str(residue.get_id()[1]) in seq_nums:
                    pdb_array1[int(residue.get_id()[1])] = residue
                elif seq_nums==None:
                    pdb_array1[int(residue.get_id()[1])] = residue
                try:
                    if -8.1 < residue['CA'].get_bfactor() < 8.1:
                        pdb_array2[int(residue.get_id()[1])] = residue
                except:
                    pass
            arrays.append([pdb_array1,pdb_array2])
            model_counter+=1

        all_deletes, TM_deletes = [], []
        all_keep, TM_keep = [], []
        for i in range(0,2):
            for res in arrays[0][i]:
                for m in arrays[1:]:
                    if res not in m[i]:
                        if i==0:
                            all_deletes.append(res)
                        else:
                            TM_deletes.append(res)
                    else:
                        if i==0:
                            all_keep.append(res)
                        else:
                            TM_keep.append(res)
        deletes = [all_deletes, TM_deletes]
        keeps = [all_keep, TM_keep]

        num_atoms1, num_atoms2 = OrderedDict(), OrderedDict()
        num_atoms = [num_atoms1, num_atoms2]
        mismatches = []
        resis_to_delete = []
        for m_i, m in enumerate(arrays):
            for i in range(0,2):
                for res in m[i]:
                    if res in deletes[i] or res not in keeps[i]:
                        resis_to_delete.append([m_i,i,res])
                    else:
                        try:
                            if m[i][res].get_resname()!=num_atoms[i][res][0].get_parent().get_resname():
                                del num_atoms[i][res]
                                mismatches.append(res)
                            else:
                                raise Exception()
                        except:
                            if res not in mismatches:
                                atoms = []
                                for atom in m[i][res]:
                                    atoms.append(atom)
                                if res not in num_atoms[i]:
                                    num_atoms[i][res] = atoms
                                else:
                                    if len(atoms)<len(num_atoms[i][res]):
                                        num_atoms[i][res] = atoms
        for i in resis_to_delete:
            del arrays[i[0]][i[1]][i[2]]
        atom_lists = []
        for m in arrays:
            this_model = []
            for i in range(0,2):
                this_list_all = []
                this_list_bb = []
                for res in m[i]:
                    if res in num_atoms[i]:
                        atoms = [a.get_id() for a in m[i][res].get_list()]
                        ref_atoms = [at.get_id() for at in num_atoms[i][res]]
                        for atom in sorted(atoms):
                            if atom in ref_atoms:
                                this_list_all.append(m[i][res][atom])
                                if atom in ['N','CA','C']:
                                    this_list_bb.append(m[i][res][atom])
                this_model.append(this_list_all)
                this_model.append(this_list_bb)
            atom_lists.append(this_model)
        TM_keys = list(num_atoms[1].keys())
        c = 0
        for m in atom_lists:
            c+=1
            for i in range(0,4):
                if i<2:
                    j=0
                else:
                    j=1
                if c>1:
                    self.number_of_residues_superposed['file{}'.format(str(c))][self.four_scores[i]] = len(num_atoms[j])
                    self.number_of_atoms_superposed['file{}'.format(str(c))][self.four_scores[i]] = len(m[i])
                    rmsd = self.calc_RMSD(atom_lists[0][i], m[i])#, TM_keys)
                    self.rmsds['file{}'.format(str(c))][self.four_scores[i]] = rmsd
                else:
                    self.number_of_residues_superposed['reference'][self.four_scores[i]] = len(num_atoms[j])
                    self.number_of_atoms_superposed['reference'][self.four_scores[i]] = len(m[i])
                    self.rmsds['reference'][self.four_scores[i]] = None
Exemplo n.º 5
0
    def pdb_array_creator(self, structure=None, filename=None):
        ''' Creates an OrderedDict() from the pdb of a Structure object where residue numbers/generic numbers are 
            keys for the residues, and atom names are keys for the Bio.PDB.Residue objects.
            
            @param structure: Structure, Structure object of protein. When using structure, leave filename=None. \n
            @param filename: str, filename of pdb to be parsed. When using filename, leave structure=None).
        '''
        # seq_nums_overwrite_cutoff_dict = {'4PHU':2000, '4LDL':1000, '4LDO':1000, '4QKX':1000, '5JQH':1000, '5TZY':2000, '5KW2':2000}
        if structure != None and filename == None:
            io = StringIO(structure.pdb_data.pdb)
        else:
            io = filename
        gn_array = []
        residue_array = []
        # pdb_struct = PDB.PDBParser(QUIET=True).get_structure(structure.pdb_code.index, io)[0]

        residues = Residue.objects.filter(
            protein_conformation=structure.protein_conformation)
        gn_list = []
        for i in residues:
            try:
                gn_list.append(
                    ggn(i.display_generic_number.label).replace('x', '.'))
            except:
                pass

        ssno = StructureSeqNumOverwrite(structure)
        ssno.seq_num_overwrite('pdb')
        if len(ssno.pdb_wt_table) > 0:
            residues = residues.filter(protein_segment__slug__in=[
                'TM1', 'TM2', 'TM3', 'TM4', 'TM5', 'TM6', 'TM7', 'H8'
            ]).order_by('sequence_number')
            output = OrderedDict()
            for r in residues:
                if r.protein_segment.slug == None:
                    continue
                if r.protein_segment.slug not in output:
                    output[r.protein_segment.slug] = OrderedDict()
                rotamer = Rotamer.objects.filter(residue=r)
                rotamer = self.right_rotamer_select(rotamer)
                rota_io = StringIO(rotamer.pdbdata.pdb)
                p = PDB.PDBParser()
                parsed_rota = p.get_structure('rota', rota_io)
                for chain in parsed_rota[0]:
                    for res in chain:
                        atom_list = []
                        for atom in res:
                            # Skip hydrogens
                            if atom.get_id().startswith('H'):
                                continue
                            if atom.get_id() == 'N':
                                bw, gn = r.display_generic_number.label.split(
                                    'x')
                                atom.set_bfactor(bw)
                            elif atom.get_id() == 'CA':
                                bw, gn = r.display_generic_number.label.split(
                                    'x')
                                gn = "{}.{}".format(bw.split('.')[0], gn)
                                if len(gn.split('.')[1]) == 3:
                                    gn = '-' + gn[:-1]
                                atom.set_bfactor(gn)
                            atom_list.append(atom)
                        output[r.protein_segment.slug][ggn(
                            r.display_generic_number.label).replace(
                                'x', '.')] = atom_list
            pprint.pprint(output)
            return output
        else:
            assign_gn = as_gn.GenericNumbering(
                pdb_file=io,
                pdb_code=structure.pdb_code.index,
                sequence_parser=True)
            pdb_struct = assign_gn.assign_generic_numbers_with_sequence_parser(
            )
            pref_chain = structure.preferred_chain
            parent_prot_conf = ProteinConformation.objects.get(
                protein=structure.protein_conformation.protein.parent)
            parent_residues = Residue.objects.filter(
                protein_conformation=parent_prot_conf)
            last_res = list(parent_residues)[-1].sequence_number
            if len(pref_chain) > 1:
                pref_chain = pref_chain[0]
            for residue in pdb_struct[pref_chain]:
                if 'CA' in residue and -9.1 < residue['CA'].get_bfactor(
                ) < 9.1:
                    use_resid = False
                    gn = str(residue['CA'].get_bfactor())
                    if len(gn.split('.')[1]) == 1:
                        gn = gn + '0'
                    if gn[0] == '-':
                        gn = gn[1:] + '1'
                    # Exceptions
                    if structure.pdb_code.index == '3PBL' and residue.get_id(
                    )[1] == 331:
                        use_resid = True
                    elif structure.pdb_code.index == '6QZH' and residue.get_id(
                    )[1] == 1434:
                        use_resid = True
                    elif structure.pdb_code.index == '7M3E':
                        use_resid = True
                    #################################################
                    elif gn in gn_list:
                        gn_array.append(gn)
                        residue_array.append(residue.get_list())
                    else:
                        use_resid = True
                    if use_resid:
                        gn_array.append(str(residue.get_id()[1]))
                        residue_array.append(residue.get_list())
            output = OrderedDict()
            for num, label in self.segment_coding.items():
                output[label] = OrderedDict()
            if len(gn_array) != len(residue_array):
                raise AssertionError()

            for gn, res in zip(gn_array, residue_array):
                if '.' in gn:
                    seg_num = int(gn.split('.')[0])
                    seg_label = self.segment_coding[seg_num]
                    if seg_num == 8 and len(output['TM7']) == 0:
                        continue
                    else:
                        output[seg_label][gn] = res
                else:
                    try:
                        found_res, found_gn = None, None
                        try:
                            found_res = Residue.objects.get(
                                protein_conformation=structure.
                                protein_conformation,
                                sequence_number=gn)
                        except:
                            # Exception for res 317 in 5VEX, 5VEW
                            if structure.pdb_code.index in [
                                    '5VEX', '5VEW'
                            ] and gn == '317' and res[0].get_parent(
                            ).get_resname() == 'CYS':
                                found_res = Residue.objects.get(
                                    protein_conformation=parent_prot_conf,
                                    sequence_number=gn)
                            #####################################
                        found_gn = str(
                            ggn(found_res.display_generic_number.label)
                        ).replace('x', '.')

                        # Exception for res 318 in 5VEX, 5VEW
                        if structure.pdb_code.index in [
                                '5VEX', '5VEW'
                        ] and gn == '318' and res[0].get_parent().get_resname(
                        ) == 'ILE' and found_gn == '5.47':
                            found_gn = '5.48'
                        #####################################
                        if -9.1 < float(found_gn) < 9.1:
                            if len(res) == 1:
                                continue
                            if int(gn) > last_res:
                                continue
                            seg_label = self.segment_coding[int(
                                found_gn.split('.')[0])]
                            output[seg_label][found_gn] = res
                    except:
                        if res[0].get_parent().get_resname() == 'YCM' or res[
                                0].get_parent().get_resname() == 'CSD':
                            try:
                                found_res = Residue.objects.get(
                                    protein_conformation=parent_prot_conf,
                                    sequence_number=gn)
                            except:
                                continue
                            if found_res.protein_segment.slug[0] not in [
                                    'T', 'H'
                            ]:
                                continue
                            try:
                                found_gn = str(
                                    ggn(found_res.display_generic_number.label)
                                ).replace('x', '.')
                            except:
                                found_gn = str(gn)
                            output[
                                found_res.protein_segment.slug][found_gn] = res
        return output