Python fetch_pdb_info示例，construct.functions.fetch_pdb_info Python示例

示例#1

0

显示文件

文件： build_construct_data.py 项目： fosfataza/protwis

    def create_construct_data(self, filenames=False):
        self.logger.info('ADDING EXPERIMENTAL CONSTRUCT DATA')

        # read source files
        if not filenames:
            filenames = os.listdir(self.construct_data_dir)

        for filename in filenames:
            if filename[-4:]!='json':
                continue
            filepath = os.sep.join([self.construct_data_dir, filename])
            with open(filepath) as json_file:
                d = json.load(json_file)

                add_construct(d)

        structures = Structure.objects.all()

        for s in structures:
            pdbname = str(s)
            try:
                protein = Protein.objects.filter(entry_name=pdbname.lower()).get()
                d = fetch_pdb_info(pdbname,protein)
                add_construct(d)
            except:
                print(pdbname,'failed')



        self.logger.info('COMPLETED CREATING EXPERIMENTAL CONSTRUCT DATA')

示例#2

0

显示文件

    def create_construct_data(self, filenames=False):
        self.logger.info('ADDING EXPERIMENTAL CONSTRUCT DATA')

        # read source files
        if not filenames:
            filenames = os.listdir(self.construct_data_dir)

        for filename in filenames:
            if filename[-4:] != 'json':
                continue
            filepath = os.sep.join([self.construct_data_dir, filename])
            with open(filepath) as json_file:
                d = json.load(json_file)

                add_construct(d)

        structures = Structure.objects.all()

        for s in structures:
            pdbname = str(s)
            try:
                protein = Protein.objects.filter(
                    entry_name=pdbname.lower()).get()
                d = fetch_pdb_info(pdbname, protein)
                add_construct(d)
            except:
                print(pdbname, 'failed')

        self.logger.info('COMPLETED CREATING EXPERIMENTAL CONSTRUCT DATA')

示例#3

0

显示文件

文件： update_construct_mutations.py 项目： pszgaspar/protwis

    def replace_deletions(self):
        # delete alle deletions
        # ConstructDeletion.objects.all().delete()
        for c in Construct.objects.all():

            pdbname = c.structure.pdb_code.index

            # if not pdbname in ['5F8U','2VT4']:
            #     continue
            # print(pdbname)

            #reset caches
            c.schematics = None
            c.snakecache = None
            c.save()

            c.deletions.all().delete()

            pdbname = c.structure.pdb_code.index
            cname = c.name
            protein = Protein.objects.filter(entry_name=pdbname.lower()).get()
            uniprot = protein.parent.entry_name
            d = cache.get(pdbname+"_auto_d")
            # d = None
            if not d and 'deletions' in d:
                d = fetch_pdb_info(pdbname,protein)
                cache.set(pdbname+"_auto_d",d,60*60*24)
            if 'deletions' in d:
                for d in d['deletions']:
                    dele, created = ConstructDeletion.objects.get_or_create(construct=c, start=d['start'],end=d['end'])
            else:
                print('No deletions in d[]',pdbname)

示例#4

0

显示文件

    def replace_deletions(self):
        # delete alle deletions
        # ConstructDeletion.objects.all().delete()
        for c in Construct.objects.all():

            pdbname = c.structure.pdb_code.index

            # if not pdbname in ['5F8U','2VT4']:
            #     continue
            # print(pdbname)

            #reset caches
            c.schematics = None
            c.snakecache = None
            c.save()

            c.deletions.all().delete()

            pdbname = c.structure.pdb_code.index
            cname = c.name
            protein = Protein.objects.filter(entry_name=pdbname.lower()).get()
            uniprot = protein.parent.entry_name
            d = cache.get(pdbname + "_deletions")
            # d = None
            if not d:
                d = fetch_pdb_info(pdbname, protein)
                cache.set(pdbname + "_deletions", d, 60 * 60 * 24)
            for d in d['deletions']:
                dele, created = ConstructDeletion.objects.get_or_create(
                    construct=c, start=d['start'], end=d['end'])

示例#5

0

显示文件

    def check_deletions(self):
        constructs = Construct.objects.all()
        csv_rows = []
        for c in constructs:
            issues = []
            pdbname = c.structure.pdb_code.index
            cname = c.name
            # if pdbname!='4GPO':
            #     continue
            protein = Protein.objects.filter(entry_name=pdbname.lower()).get()
            uniprot = protein.parent.entry_name
            d = cache.get(pdbname + "_deletions")
            # d = None
            if not d:
                d = fetch_pdb_info(pdbname, protein)
                cache.set(pdbname + "_deletions", d, 60 * 60 * 24)
            pdb_deletions = []
            for d in d['deletions']:
                pdb_deletions += range(d['start'], d['end'] + 1)
            cons_dels = c.deletions.all()
            db_deletions = []
            for d in cons_dels:
                db_deletions += range(d.start, d.end + 1)

            present_in_pdb_only = set(pdb_deletions) - set(db_deletions)
            present_in_pdb_only_list = []
            for k, g in groupby(enumerate(present_in_pdb_only),
                                lambda x: x[0] - x[1]):
                group = list(map(itemgetter(1), g))
                present_in_pdb_only_list.append([group[0], group[-1]])

            present_in_db_only = set(db_deletions) - set(pdb_deletions)
            present_in_db_only_list = []
            for k, g in groupby(enumerate(present_in_db_only),
                                lambda x: x[0] - x[1]):
                group = list(map(itemgetter(1), g))
                present_in_db_only_list.append([group[0], group[-1]])

            if present_in_pdb_only or present_in_db_only:
                print(pdbname)
            if present_in_pdb_only: print("PDBONLY", present_in_pdb_only)
            if present_in_db_only: print("DBONLY", present_in_db_only)
            csv_rows.append([
                pdbname, uniprot, cname, present_in_db_only_list,
                present_in_pdb_only_list, ''
            ])
        import csv
        with open('construct_del_issues.csv', 'w') as f:
            writer = csv.writer(f, delimiter='\t')
            writer.writerows(csv_rows)

示例#6

0

显示文件

    def create_construct_local_data(self, filenames=False):
        self.logger.info('ADDING EXPERIMENTAL CONSTRUCT DATA')

        #delete existing
        self.purge_construct_data()

        # read source files
        if not filenames:
            filenames = os.listdir(self.construct_data_dir)

        for filename in sorted(filenames):
            print('dealing with', filename)
            if filename[-4:] != 'json':
                continue
            filepath = os.sep.join([self.construct_data_dir, filename])
            with open(filepath) as json_file:
                d = json.load(json_file)
                add_construct(d)

        filenames = os.listdir(self.construct_data_local_dir)

        for filename in sorted(filenames):
            print('dealing with', filename)
            if filename[-4:] != 'json':
                continue
            filepath = os.sep.join([self.construct_data_local_dir, filename])
            with open(filepath) as json_file:
                d = json.load(json_file)
                add_construct(d)

        structures = Structure.objects.all()

        for s in structures:
            pdbname = str(s)
            try:
                exists = Construct.objects.filter(
                    structure__pdb_code__index=pdbname).exists()
                if not exists:
                    print(pdbname)
                    protein = Protein.objects.filter(
                        entry_name=pdbname.lower()).get()
                    d = fetch_pdb_info(pdbname, protein)
                    add_construct(d)
                else:
                    print("Entry for", pdbname, "already there")
            except:
                print(pdbname, 'failed')

示例#7

0

显示文件

文件： build_construct_data.py 项目： protwis/protwis

    def create_construct_local_data(self, filenames=False):
        self.logger.info('ADDING EXPERIMENTAL CONSTRUCT DATA')

        #delete existing
        self.purge_construct_data()

        # read source files
        if not filenames:
            filenames = os.listdir(self.construct_data_dir)

        for filename in sorted(filenames):
            print('dealing with',filename)
            if filename[-4:]!='json':
                continue
            filepath = os.sep.join([self.construct_data_dir, filename])
            with open(filepath) as json_file:
                d = json.load(json_file)
                add_construct(d)

        filenames = os.listdir(self.construct_data_local_dir)

        for filename in sorted(filenames):
            print('dealing with',filename)
            if filename[-4:]!='json':
                continue
            filepath = os.sep.join([self.construct_data_local_dir, filename])
            with open(filepath) as json_file:
                d = json.load(json_file)
                add_construct(d)

        structures = Structure.objects.all()

        for s in structures:
            pdbname = str(s)
            try:
                exists = Construct.objects.filter(structure__pdb_code__index=pdbname).exists()
                if not exists:
                    print(pdbname)
                    protein = Protein.objects.filter(entry_name=pdbname.lower()).get()
                    d = fetch_pdb_info(pdbname,protein)
                    add_construct(d)
                else:
                    print("Entry for",pdbname,"already there")
            except:
                print(pdbname,'failed')

示例#8

0

显示文件

文件： update_construct_mutations.py 项目： pszgaspar/protwis

    def check_deletions(self):
        constructs = Construct.objects.all()
        csv_rows = []
        for c in constructs:
            issues = []
            pdbname = c.structure.pdb_code.index
            cname = c.name
            # if pdbname!='4GPO':
            #     continue
            protein = Protein.objects.filter(entry_name=pdbname.lower()).get()
            uniprot = protein.parent.entry_name
            d = cache.get(pdbname+"_auto_d")
            # d = None
            if not d:
                d = fetch_pdb_info(pdbname,protein)
                cache.set(pdbname+"_auto_d",d,60*60*24)
            pdb_deletions = []
            for d in d['deletions']:
                pdb_deletions += range(d['start'],d['end']+1)
            cons_dels = c.deletions.all()
            db_deletions = []
            for d in cons_dels:
                db_deletions += range(d.start,d.end+1)

            present_in_pdb_only = set(pdb_deletions)-set(db_deletions)
            present_in_pdb_only_list = []
            for k, g in groupby(enumerate(present_in_pdb_only), lambda x:x[0]-x[1]):
                group = list(map(itemgetter(1), g))
                present_in_pdb_only_list.append([group[0], group[-1]])

            present_in_db_only = set(db_deletions)-set(pdb_deletions)
            present_in_db_only_list = []
            for k, g in groupby(enumerate(present_in_db_only), lambda x:x[0]-x[1]):
                group = list(map(itemgetter(1), g))
                present_in_db_only_list.append([group[0], group[-1]])

            if present_in_pdb_only or present_in_db_only:
                print(pdbname)
            if present_in_pdb_only: print("PDBONLY",present_in_pdb_only)
            if present_in_db_only: print("DBONLY",present_in_db_only)
            csv_rows.append([pdbname,uniprot,cname,present_in_db_only_list,present_in_pdb_only_list,''])
        import csv
        with open('construct_del_issues.csv', 'w') as f:
            writer = csv.writer(f, delimiter = '\t')
            writer.writerows(csv_rows)

示例#9

0

显示文件

文件： build_construct_data.py 项目： hugwind/protwis

    def create_construct_data(self, filenames=False):
        self.logger.info('ADDING EXPERIMENTAL CONSTRUCT DATA')

        # read source files
        do_all = False
        if not filenames:
            do_all = True
            # self.purge_construct_data()
            # filenames = os.listdir(self.construct_data_dir)

        if filenames:
            for filename in filenames:
                if filename[-4:] != 'json':
                    continue
                filepath = os.sep.join([self.construct_data_dir, filename])
                print('Adding ' + filepath)
                with open(filepath) as json_file:
                    d = json.load(json_file)
                    add_construct(d)

        if do_all:
            structures = Structure.objects.all().exclude(refined=True)
            for s in structures:
                pdbname = str(s)
                try:
                    exists = Construct.objects.filter(
                        structure__pdb_code__index=pdbname).exists()
                    if not exists:
                        print(pdbname)
                        protein = Protein.objects.filter(
                            entry_name=pdbname.lower()).get()
                        d = fetch_pdb_info(pdbname, protein)
                        add_construct(d)
                    else:
                        # pass
                        print("Entry for", pdbname, "already there")
                except:
                    print(pdbname, 'failed')

        self.logger.info('COMPLETED CREATING EXPERIMENTAL CONSTRUCT DATA')

示例#10

0

显示文件

文件： build_construct_data.py 项目： pszgaspar/protwis

    def create_construct_data(self, filenames=False):
        self.logger.info('ADDING EXPERIMENTAL CONSTRUCT DATA')

        # read source files
        do_all = False
        if not filenames:
            do_all = True
            # self.purge_construct_data()
            # filenames = os.listdir(self.construct_data_dir)

        if filenames:
            for filename in filenames:
                if filename[-4:]!='json':
                    continue
                filepath = os.sep.join([self.construct_data_dir, filename])
                print('Adding '+filepath)
                with open(filepath) as json_file:
                    d = json.load(json_file)
                    add_construct(d)

        if do_all:
            structures = Structure.objects.all().exclude(refined=True)
            for s in structures:
                pdbname = str(s)
                try:
                    exists = Construct.objects.filter(structure__pdb_code__index=pdbname).exists()
                    if not exists:
                        # print(pdbname)
                        protein = Protein.objects.filter(entry_name=pdbname.lower()).get()
                        d = fetch_pdb_info(pdbname,protein)
                        add_construct(d)
                    else:
                        # pass
                        print("Entry for",pdbname,"already there")
                except:
                    print(pdbname,'failed')

        self.logger.info('COMPLETED CREATING EXPERIMENTAL CONSTRUCT DATA')

示例#11

0

显示文件

    def check_mutations(self):
        track_annotated_mutations = []
        cached_mutations = {}
        for i, mut in enumerate(self.excel_mutations):
            # print("Progress ",i,len(self.excel_mutations))
            # continue
            #print(mut)
            m = {}
            m['gn'] = mut[8]
            m['mut_aa'] = mut[11]
            m['wt_aa'] = mut[10]
            m['entry_name'] = mut[6]
            m['pos'] = int(mut[9])
            m['pdb'] = mut[1]
            m['thermo_effect'] = mut[12]
            m['expression_effect'] = mut[13]
            m['site_effect'] = mut[14]
            m['site_effect_type'] = mut[15]
            m['other_effect'] = mut[16]
            # if m['entry_name']!='glp1r_human':
            #     continue

            # print(m)
            if m['pdb'] and m['pdb'][0] != '%':
                pdbs = m['pdb'].split(',')
                # print(pdbs)
                if len(pdbs) == 0:
                    cons = Construct.objects.filter(
                        structure__pdb_code__index=m['pdb'])
                else:
                    cons = Construct.objects.filter(
                        structure__pdb_code__index__in=pdbs)
            else:
                cons = Construct.objects.filter(
                    structure__protein_conformation__protein__parent__entry_name
                    =m['entry_name'])

            pdbs_has = []
            pdbs_hasnot = []
            for c in cons:
                c_pdb = c.structure.pdb_code.index
                not_to_check = None
                if m['pdb'] and m['pdb'][0] == '%':
                    # if there are some pdbs not to check on this uniport
                    not_to_check = m['pdb'].replace("%", "").split(",")
                    if c_pdb in not_to_check:
                        continue
                # if not_to_check:
                #     print(c_pdb,not_to_check,m['pdb'][0])
                protein = Protein.objects.filter(
                    entry_name=c_pdb.lower()).get()
                if c_pdb in cached_mutations:
                    d = cached_mutations[c_pdb]
                else:
                    d = cache.get(c_pdb + "_mutations")
                    if not d:
                        d = fetch_pdb_info(c_pdb, protein)
                        cache.set(c_pdb + "_mutations", d, 60 * 60 * 24)
                    cached_mutations[c_pdb] = d
                # Find construct mutation
                cons_muts = ConstructMutation.objects.filter(
                    construct=c,
                    sequence_number=m['pos'],
                    mutated_amino_acid=m['mut_aa'],
                    wild_type_amino_acid=m['wt_aa'])

                if not cons_muts.exists(
                ) and m['other_effect'] != 'Non-receptor' and m[
                        'other_effect'] != 'Wrong annotation - remove!':
                    # If no hits something is odd
                    # print(c.structure.pdb_code.index,' do not have following mutation:',mut)
                    found = False
                    for pdb_m in d['mutations']:
                        if int(pdb_m['pos']) == m['pos']:
                            found = True
                            break
                    if found:
                        # print('It was however found in pdb! ADDING')
                        res_wt = Residue.objects.get(
                            protein_conformation__protein=protein.parent,
                            sequence_number=m['pos'])
                        mut = ConstructMutation.objects.create(
                            construct=c,
                            sequence_number=m['pos'],
                            wild_type_amino_acid=m['wt_aa'],
                            mutated_amino_acid=m['mut_aa'],
                            residue=res_wt)
                        pdbs_has.append(c_pdb)
                    else:
                        # print('Was also not found in pdb!')
                        pdbs_hasnot.append("%" + c_pdb)
                        cons_muts_odd = ConstructMutation.objects.filter(
                            construct=c, sequence_number=m['pos'])
                        for cons_mut in cons_muts_odd:
                            print(c_pdb, cons_mut)
                else:
                    # print(c.structure.pdb_code.index,' HAS following mutation:',mut)
                    pdbs_has.append(c_pdb)

                cons_muts = ConstructMutation.objects.filter(
                    construct=c,
                    sequence_number=m['pos'],
                    mutated_amino_acid=m['mut_aa'],
                    wild_type_amino_acid=m['wt_aa'])
                for cons_mut in cons_muts:
                    if m['other_effect'] == 'Non-receptor' or m[
                            'other_effect'] == 'Wrong annotation - remove!':
                        # print('Delete!',cons_mut.construct.structure.pdb_code.index,cons_mut)
                        cons_mut.delete()
                        continue
                    # Clear existing to replace with current
                    cons_mut.effects.clear()

                    if m['thermo_effect']:
                        mutation_type, created = ConstructMutationType.objects.get_or_create(
                            slug=slugify('Thermostabilising'),
                            name='Thermostabilising',
                            effect=m['thermo_effect'])
                        cons_mut.effects.add(mutation_type)

                    if m['expression_effect']:
                        mutation_type, created = ConstructMutationType.objects.get_or_create(
                            slug=slugify('Receptor Expression'),
                            name='Receptor Expression',
                            effect=m['expression_effect'])
                        cons_mut.effects.add(mutation_type)

                    if m['site_effect']:
                        mutation_type, created = ConstructMutationType.objects.get_or_create(
                            slug=slugify(m['site_effect']),
                            name=m['site_effect'],
                            effect=m['site_effect_type'])
                        cons_mut.effects.add(mutation_type)

                    if m['other_effect']:
                        # print(m['other_effect'])
                        mutation_type, created = ConstructMutationType.objects.get_or_create(
                            slug=slugify('Other effect'),
                            name='Other effect',
                            effect=m['other_effect'])
                        cons_mut.effects.add(mutation_type)

                    track_annotated_mutations.append(cons_mut.pk)
            # if not m['pdb'] and len(pdbs_hasnot) and len(pdbs_has):
            #     print(m['entry_name'],m['wt_aa']+str(m['pos'])+m['mut_aa'])
            #     print("has",",".join(pdbs_has))
            #     print("hasnot",",".join(pdbs_hasnot))
            # if not len(pdbs_has):
            #     print('NOONE HAS',m['entry_name'],m['wt_aa']+str(m['pos'])+m['mut_aa'])
        print(len(track_annotated_mutations), 'annotated mutations')

        non_annotated_muts = ConstructMutation.objects.all().exclude(
            pk__in=track_annotated_mutations).order_by(
                'construct__protein__entry_name', 'sequence_number')
        print(len(non_annotated_muts), 'non-annotated mutations')
        csv_rows = [[
            'reference', 'pdb', 'construct name', 'class', 'lig type',
            'rec fam', 'uniprot', 'segment', 'gpcrdb#', 'AA no.', 'WT aa',
            'Mut aa', '', '', '', '', '', 'Remark'
        ]]

        for mut in non_annotated_muts:
            pdb = mut.construct.structure.pdb_code.index
            uniprot = mut.construct.protein.entry_name
            seg = mut.residue.protein_segment.slug
            if mut.residue.generic_number:
                gn = mut.residue.generic_number.label
            else:
                gn = ''
            pos = mut.sequence_number
            wt_aa = mut.wild_type_amino_acid
            mut_aa = mut.mutated_amino_acid

            annotated_effect = [e.slug for e in mut.effects.all()]

            # print(annotated_effect)

            csv_rows.append([
                '', pdb, '', '', '', '', uniprot, seg, gn, pos, wt_aa, mut_aa,
                '', '', '', '', '', ','.join(annotated_effect)
            ])
            # print(csv_rows[-1])

    #  print(csv_rows)
        import csv
        with open('construct_mut_missing.csv', 'w') as f:
            writer = csv.writer(f, delimiter='\t')
            writer.writerows(csv_rows)

示例#12

0

显示文件

    def match_all_with_uniprot_mutations(self):
        constructs = Construct.objects.all()
        csv_rows = [[
            'reference', 'pdb', 'construct name', 'class', 'lig type',
            'rec fam', 'uniprot', 'segment', 'gpcrdb#', 'AA no.', 'WT aa',
            'Mut aa', '', '', '', '', '', 'Remark'
        ]]
        for c in constructs:
            issues = []
            pdbname = c.structure.pdb_code.index
            # if pdbname!='4GPO':
            #     continue
            protein = Protein.objects.filter(entry_name=pdbname.lower()).get()
            uniprot = protein.parent.entry_name
            # if uniprot !='glp1r_human':
            #     continue
            d = cache.get(pdbname + "_mutations")
            # d = None
            if not d:
                d = fetch_pdb_info(pdbname, protein)
                cache.set(pdbname + "_mutations", d, 60 * 60 * 24)
            # print('pdb',d['mutations'])
            cons_muts = ConstructMutation.objects.filter(construct=c)
            for m in cons_muts:
                seq_pos = m.sequence_number
                found = False
                for pdb_m in d['mutations']:
                    if int(pdb_m['pos']) == seq_pos and pdb_m[
                            'wt'] == m.wild_type_amino_acid and (
                                pdb_m['mut'] == m.mutated_amino_acid):
                        found = True
                        break
                if not found:
                    ignore = False
                    for m_xlx in self.excel_mutations:
                        if m_xlx[6] == uniprot and int(
                                m_xlx[9]) == seq_pos and m_xlx[
                                    11] == m.mutated_amino_acid and m_xlx[
                                        10] == m.wild_type_amino_acid:
                            found = False
                            if pdbname in m_xlx[1] or m_xlx[1] == '':
                                found = True
                            if '%' + pdbname in m_xlx[1]:
                                found = False
                            if found:
                                if m_xlx[16] != 'Non-receptor' and m_xlx[
                                        16] != 'Wrong annotation - remove!':
                                    ignore = True
                    if ignore:
                        issues.append(('In excel but missing in pdb?', seq_pos,
                                       m.mutated_amino_acid))
                    else:
                        issues.append(('Not in excel nor pdb, deleting',
                                       seq_pos, m.mutated_amino_acid))
                        m.delete()
                    print(issues)
                    continue

                    issues.append(
                        ('missing in pdb?', seq_pos, m.mutated_amino_acid))
                    mut_aa = m.mutated_amino_acid
                    pos = m.sequence_number
                    wt_aa = m.wild_type_amino_acid
                    annotated_effect = "Not identified in PDB -- perhaps delete?"

                    res = Residue.objects.get(
                        protein_conformation__protein=protein.parent,
                        sequence_number=pos)
                    seg = res.protein_segment.slug
                    if res.generic_number:
                        gn = res.generic_number.label
                    else:
                        gn = ''
                    csv_rows.append([
                        '', pdbname, '', '', '', '', protein.parent.entry_name,
                        seg, gn, pos, wt_aa, mut_aa, '', '', '', '', '',
                        annotated_effect
                    ])

            for m in d['mutations']:
                cons_muts = ConstructMutation.objects.filter(
                    construct=c,
                    sequence_number=m['pos'],
                    mutated_amino_acid=m['mut'],
                    wild_type_amino_acid=m['wt'])
                if not cons_muts.exists():
                    # print('missing',m)
                    ignore = False
                    for m_xlx in self.excel_mutations:
                        if m_xlx[6] == uniprot and int(m_xlx[9]) == m['pos']:
                            found = False
                            if pdbname in m_xlx[1] or m_xlx[1] == '':
                                found = True
                            if '%' + pdbname in m_xlx[1]:
                                found = False
                            if found:
                                if m_xlx[16] == 'Non-receptor' or m_xlx[
                                        16] == 'Wrong annotation - remove!':
                                    ignore = True
                    if ignore:
                        continue
                    issues.append(
                        ('{}{}{} ({})'.format(m['wt'], m['pos'], m['mut'],
                                              m['type']),
                         ' not in db, nor to be ignored in excel'))
                    mut_aa = m['mut']
                    pos = m['pos']
                    wt_aa = m['wt']
                    annotated_effect = m['type']
                    res = Residue.objects.get(
                        protein_conformation__protein=protein.parent,
                        sequence_number=pos)
                    seg = res.protein_segment.slug
                    if res.generic_number:
                        gn = res.generic_number.label
                    else:
                        gn = ''
                    csv_rows.append([
                        '', pdbname, '', '', '', '', protein.parent.entry_name,
                        seg, gn, pos, wt_aa, mut_aa, '', '', '', '', '',
                        annotated_effect
                    ])
            if issues:
                print(pdbname)
                for i in issues:
                    print(i)
        import csv
        with open('construct_mut_issues.csv', 'w') as f:
            writer = csv.writer(f, delimiter='\t')
            writer.writerows(csv_rows)

示例#13

0

显示文件

文件： update_construct_mutations.py 项目： pszgaspar/protwis

    def check_mutations(self):
        track_annotated_mutations = []
        cached_mutations = {}
        mut_pdb_list = {}
        for i,mut in enumerate(self.excel_mutations):
            # print("Progress ",i,len(self.excel_mutations))
            # continue
            #print(mut)
            m = {}
            m['gn'] = mut[8]
            m['mut_aa'] = mut[11]
            m['wt_aa'] = mut[10]
            m['entry_name'] = mut[6]
            m['pos'] = int(mut[9])
            m['pdb'] = mut[1]
            m['thermo_effect'] = mut[12]
            m['expression_effect'] = mut[13]
            m['site_effect'] = mut[14]
            m['site_effect_type'] = mut[15]
            m['other_effect'] = mut[16]
            # if m['entry_name']!='glp1r_human':
            #     continue


            # print(m)
            if m['pdb'] and m['pdb'][0]!='%':
                pdbs = m['pdb'].split(',')
                # print(pdbs)
                for pdb in pdbs:
                    if pdb not in mut_pdb_list:
                        mut_pdb_list[pdb] = []
                if len(pdbs)==0:
                    cons = Construct.objects.filter(structure__pdb_code__index = m['pdb'])
                else:
                    cons = Construct.objects.filter(structure__pdb_code__index__in = pdbs)
            else:
                cons = Construct.objects.filter(structure__protein_conformation__protein__parent__entry_name = m['entry_name'])

            pdbs_has = []
            pdbs_hasnot = []
            for c in cons:
                c_pdb = c.structure.pdb_code.index
                if c_pdb not in mut_pdb_list:
                    mut_pdb_list[c_pdb] = []
                not_to_check = None
                if m['pdb'] and m['pdb'][0]=='%':
                    # if there are some pdbs not to check on this uniport
                    not_to_check = m['pdb'].replace("%","").split(",")
                    for pdb in not_to_check:
                        if pdb not in mut_pdb_list:
                            mut_pdb_list[pdb] = []
                    if c_pdb in not_to_check:
                        continue
                # if not_to_check:
                #     print(c_pdb,not_to_check,m['pdb'][0])
                protein = Protein.objects.filter(entry_name=c_pdb.lower()).get()
                # print(c_pdb,protein)
                if c_pdb in cached_mutations:
                    d = cached_mutations[c_pdb]
                else:
                    d = cache.get(c_pdb+"_auto_d")
                    if not d:
                        d = fetch_pdb_info(c_pdb,protein)
                        cache.set(c_pdb+"_auto_d",d,60*60*24)
                    cached_mutations[c_pdb] = d
                # Find construct mutation
                cons_muts = ConstructMutation.objects.filter(construct=c, sequence_number = m['pos'], mutated_amino_acid = m['mut_aa'], wild_type_amino_acid = m['wt_aa'])

                if not cons_muts.exists() and m['other_effect']!='Non-receptor' and m['other_effect']!='Wrong annotation - remove!':
                    # If no hits something is odd
                    # print(c.structure.pdb_code.index,' do not have following mutation:',mut)
                    found = False
                    for pdb_m in d['mutations']:
                        if int(pdb_m['pos']) == m['pos']:
                            found = True
                            break
                    if found:
                        # print('It was however found in pdb! ADDING')
                        res_wt = Residue.objects.get(protein_conformation__protein=protein.parent, sequence_number=m['pos'])
                        mut = ConstructMutation.objects.create(construct=c, sequence_number=m['pos'],wild_type_amino_acid= m['wt_aa'],mutated_amino_acid=m['mut_aa'], residue=res_wt)
                        pdbs_has.append(c_pdb)
                    else:
                        # print('Was also not found in pdb!')
                        pdbs_hasnot.append("%"+c_pdb)
                        cons_muts_odd = ConstructMutation.objects.filter(construct=c, sequence_number = m['pos'])
                        for cons_mut in cons_muts_odd:
                            print(c_pdb,cons_mut)
                else:
                    # print(c.structure.pdb_code.index,' HAS following mutation:',mut)
                    pdbs_has.append(c_pdb)

                cons_muts = ConstructMutation.objects.filter(construct=c, sequence_number = m['pos'], mutated_amino_acid = m['mut_aa'], wild_type_amino_acid = m['wt_aa'])
                for cons_mut in cons_muts:
                    if m['other_effect']=='Non-receptor' or m['other_effect']=='Wrong annotation - remove!':
                        # print('Delete!',cons_mut.construct.structure.pdb_code.index,cons_mut)
                        cons_mut.delete()
                        continue
                    # Clear existing to replace with current
                    cons_mut.effects.clear()

                    if m['thermo_effect']:
                        mutation_type, created = ConstructMutationType.objects.get_or_create(slug=slugify('Thermostabilising'),name='Thermostabilising', effect=m['thermo_effect'])
                        cons_mut.effects.add(mutation_type)

                    if m['expression_effect']:
                        mutation_type, created = ConstructMutationType.objects.get_or_create(slug=slugify('Receptor Expression'),name='Receptor Expression', effect=m['expression_effect'])
                        cons_mut.effects.add(mutation_type)

                    if m['site_effect']:
                        mutation_type, created = ConstructMutationType.objects.get_or_create(slug=slugify(m['site_effect']),name=m['site_effect'], effect=m['site_effect_type'])
                        cons_mut.effects.add(mutation_type)

                    if m['other_effect']:
                        # print(m['other_effect'])
                        mutation_type, created = ConstructMutationType.objects.get_or_create(slug=slugify('Other effect'),name='Other effect', effect=m['other_effect'])
                        cons_mut.effects.add(mutation_type)

                    track_annotated_mutations.append(cons_mut.pk)
            # if not m['pdb'] and len(pdbs_hasnot) and len(pdbs_has):
            #     print(m['entry_name'],m['wt_aa']+str(m['pos'])+m['mut_aa'])
            #     print("has",",".join(pdbs_has))
            #     print("hasnot",",".join(pdbs_hasnot))
            # if not len(pdbs_has):
            #     print('NOONE HAS',m['entry_name'],m['wt_aa']+str(m['pos'])+m['mut_aa'])
        print(len(track_annotated_mutations),'annotated mutations')

        non_annotated_muts = ConstructMutation.objects.all().exclude(pk__in=track_annotated_mutations).order_by('construct__protein__entry_name','sequence_number')
        print(len(non_annotated_muts),'non-annotated mutations')
        csv_rows = [['reference','pdb','construct name','class','lig type','rec fam','uniprot','segment','gpcrdb#','AA no.','WT aa','Mut aa','','','','','','Remark']]


        missing = list(set(self.all_pdbs) - set(mut_pdb_list.keys()))
        print(sorted(missing)," do not have any mutations annotated -- add them to sheet with NONE if they have none")
        missing2 = []
        for c_pdb in missing:
            d = cache.get(c_pdb+"_auto_d")
            if not d:
                protein = Protein.objects.filter(entry_name=c_pdb.lower()).get()
                d = fetch_pdb_info(c_pdb,protein)
                cache.set(c_pdb+"_auto_d",d,60*60*24)
            if len(d['mutations']):
                missing2.append(c_pdb)
        print(sorted(missing2)," do not have any mutations annotated (but auto has them having) -- add them to sheet with NONE if they have none")

        for mut in non_annotated_muts:
            pdb = mut.construct.structure.pdb_code.index 
            uniprot = mut.construct.protein.entry_name
            seg = mut.residue.protein_segment.slug
            if mut.residue.generic_number:
                gn = mut.residue.generic_number.label
            else:
                gn = ''
            pos = mut.sequence_number
            wt_aa = mut.wild_type_amino_acid
            mut_aa = mut.mutated_amino_acid

            annotated_effect = [e.slug for e in mut.effects.all()]

            # print(annotated_effect)

            csv_rows.append(['',pdb,'','','','',uniprot,seg,gn,pos,wt_aa,mut_aa,'','','','','',','.join(annotated_effect)])
            # print(csv_rows[-1])

       #  print(csv_rows)
        import csv
        with open('construct_mut_missing.csv', 'w') as f:
            writer = csv.writer(f, delimiter = '\t')
            writer.writerows(csv_rows)

示例#14

0

显示文件

文件： update_construct_mutations.py 项目： pszgaspar/protwis

    def match_all_with_uniprot_mutations(self):
        constructs = Construct.objects.all()
        csv_rows = [['reference','pdb','construct name','class','lig type','rec fam','uniprot','segment','gpcrdb#','AA no.','WT aa','Mut aa','','','','','','Remark']]
        for c in constructs:
            issues = []
            pdbname = c.structure.pdb_code.index
            # if pdbname!='4GPO':
            #     continue
            protein = Protein.objects.filter(entry_name=pdbname.lower()).get()
            uniprot = protein.parent.entry_name
            # if uniprot !='glp1r_human':
            #     continue
            d = cache.get(pdbname+"_auto_d")
            # d = None
            if not d:
                d = fetch_pdb_info(pdbname,protein)
                cache.set(pdbname+"_auto_d",d,60*60*24)
            # print('pdb',d['mutations'])
            cons_muts = ConstructMutation.objects.filter(construct = c)
            for m in cons_muts:
                seq_pos = m.sequence_number
                found = False
                for pdb_m in d['mutations']:
                    if int(pdb_m['pos']) == seq_pos and pdb_m['wt']==m.wild_type_amino_acid and (pdb_m['mut']==m.mutated_amino_acid):
                        found = True
                        break
                if not found:
                    ignore = False
                    for m_xlx in self.excel_mutations:
                        if m_xlx[6]==uniprot and int(m_xlx[9])==seq_pos and m_xlx[11]==m.mutated_amino_acid and m_xlx[10]==m.wild_type_amino_acid:
                            found = False
                            if pdbname in m_xlx[1] or m_xlx[1]=='':
                                found = True
                            if '%'+pdbname in m_xlx[1]:
                                found = False
                            if found:
                                if  m_xlx[16]!='Non-receptor' and m_xlx[16]!='Wrong annotation - remove!':
                                    ignore = True
                    if ignore:
                        issues.append(('In excel but missing in pdb?',seq_pos,m.mutated_amino_acid))
                    else:
                        issues.append(('Not in excel nor pdb, deleting',seq_pos,m.mutated_amino_acid))
                        m.delete()
                    print(issues)
                    continue

                    issues.append(('missing in pdb?',seq_pos,m.mutated_amino_acid))
                    mut_aa = m.mutated_amino_acid
                    pos = m.sequence_number
                    wt_aa = m.wild_type_amino_acid
                    annotated_effect = "Not identified in PDB -- perhaps delete?"

                    res = Residue.objects.get(protein_conformation__protein=protein.parent, sequence_number=pos)
                    seg = res.protein_segment.slug
                    if res.generic_number:
                        gn = res.generic_number.label
                    else:
                        gn = ''
                    csv_rows.append(['',pdbname,'','','','',protein.parent.entry_name,seg,gn,pos,wt_aa,mut_aa,'','','','','',annotated_effect])

            for m in d['mutations']:
                cons_muts = ConstructMutation.objects.filter(construct = c, sequence_number = m['pos'], mutated_amino_acid = m['mut'], wild_type_amino_acid = m['wt'])
                if not cons_muts.exists():
                    # print('missing',m)
                    ignore = False
                    for m_xlx in self.excel_mutations:
                        if m_xlx[6]==uniprot and int(m_xlx[9])==m['pos']:
                            found = False
                            if pdbname in m_xlx[1] or m_xlx[1]=='':
                                found = True
                            if '%'+pdbname in m_xlx[1]:
                                found = False
                            if found:
                                if  m_xlx[16]=='Non-receptor' or m_xlx[16]=='Wrong annotation - remove!':
                                    ignore = True
                    if ignore:
                        continue
                    issues.append(('{}{}{} ({})'.format(m['wt'],m['pos'],m['mut'],m['type']),' not in db, nor to be ignored in excel'))
                    mut_aa = m['mut']
                    pos = m['pos']
                    wt_aa = m['wt']
                    annotated_effect = m['type']
                    res = Residue.objects.get(protein_conformation__protein=protein.parent, sequence_number=pos)
                    seg = res.protein_segment.slug
                    if res.generic_number:
                        gn = res.generic_number.label
                    else:
                        gn = ''
                    csv_rows.append(['',pdbname,'','','','',protein.parent.entry_name,seg,gn,pos,wt_aa,mut_aa,'','','','','',annotated_effect])
            if issues:
                print(pdbname)
                for i in issues:
                    print(i)
        import csv
        with open('construct_mut_issues.csv', 'w') as f:
            writer = csv.writer(f, delimiter = '\t')
            writer.writerows(csv_rows)