Exemplo n.º 1
0
 def test_get_recent_changes(self):
     """Tests the Bio.PDB.PDBList.get_recent_changes method."""
     # obsolete_pdb declared to prevent from creating the "obsolete" directory
     pdblist = PDBList(obsolete_pdb="unimportant")
     url = pdblist.pdb_server + "/pub/pdb/data/status/latest/added.pdb"
     entries = pdblist.get_status_list(url)
     self.assertIsNotNone(entries)
Exemplo n.º 2
0
def DoFetchObsoletes(filename):

    obs = []
    fetch_ok = True

    try:
        pdblist = PDBList()
        sys.stdout.write(
            "INFO: Fetching obsolete structure information online...\n")
        obs = pdblist.get_all_obsolete()
    except:
        fetch_ok = False
        sys.stderr.write(
            "[FAILED]\nUnable to fetch obsolete structures information online.\n"
        )

    if fetch_ok:
        sys.stdout.write("[OK].\n")

        try:
            with open(filename, 'w') as f:
                for ob in obs:
                    f.write("%s\n" % ob)
            fetch_ok = True

        except IOError:
            sys.stderr.write(
                "ERROR: Could not write obsoletes into file: '%s'.\n" %
                filename)
            fetch_ok = False

    return fetch_ok, obs
Exemplo n.º 3
0
 def test_get_all_entries(self):
     """Tests the Bio.PDB.PDBList.get_all_entries method."""
     # obsolete_pdb declared to prevent from creating the "obsolete" directory
     pdblist = PDBList(obsolete_pdb="unimportant")
     entries = pdblist.get_all_entries()
     # As number of entries constantly grow, test checks if a certain number was
     # exceeded
     self.assertGreater(len(entries), 100000)
Exemplo n.º 4
0
 def test_get_all_obsolete(self):
     """Tests the Bio.PDB.PDBList.get_all_obsolete method."""
     pdblist = PDBList(
         obsolete_pdb="unimportant"
     )  # obsolete_pdb declared to prevent from creating the "obsolete" directory
     entries = pdblist.get_all_obsolete()
     # As number of obsolete entries constantly grow, test checks if a certain number was exceeded
     self.assertTrue(len(entries) > 3000)
Exemplo n.º 5
0
 def check(self, structure, filename, file_format, obsolete=False, pdir=None):
     with self.make_temp_directory(os.getcwd()) as tmp:
         pdblist = PDBList(pdb=tmp, obsolete_pdb=os.path.join(tmp, "obsolete"))
         path = os.path.join(tmp, filename)
         if pdir:
             pdir = os.path.join(tmp, pdir)
         pdblist.retrieve_pdb_file(structure, obsolete=obsolete, pdir=pdir, file_format=file_format)
         self.assertTrue(os.path.isfile(path))
         os.remove(path)
Exemplo n.º 6
0
 def check(self, structure, filename, file_format, obsolete=False, pdir=None):
     with self.make_temp_directory(os.getcwd()) as tmp:
         pdblist = PDBList(pdb=tmp, obsolete_pdb=os.path.join(tmp, "obsolete"))
         path = os.path.join(tmp, filename)
         if pdir:
             pdir = os.path.join(tmp, pdir)
         pdblist.retrieve_pdb_file(structure, obsolete=obsolete, pdir=pdir, file_format=file_format)
         self.assertTrue(os.path.isfile(path))
         os.remove(path)
Exemplo n.º 7
0
def download_PDB_file():
    fichiers = os.listdir("balibase/RV11.unaligned")
    for file in fichiers:
        records = saveFASTA("balibase/RV11.unaligned/" + file)
        ids = []
        for record in records:
            ids.append(record.id.split("_")[0])
        for i in ids:
            pdbl = PDBList()
            pdbl.retrieve_pdb_file(i, pdir="PDB")
Exemplo n.º 8
0
                                                    poly[residue].id)][6:])

                        featuresList = [
                            resCode, phi, psi, depth,
                            len(residues),
                            len(curCharged),
                            len(curPolar),
                            len(curNonPolar)
                        ] + [secondary] + energyList

                        if None not in featuresList:  #removes residues for which phi/psi cannot be calculated
                            result[poly[residue].id[1]] = featuresList
    return result


pdbList = PDBList()


#returns dict mapping PDB ID to (input array, binding output list, catalytic output list)
def readAnnotations(path):
    result = dict()

    string = readFile(path)
    for protein in string.splitlines():

        #parsing ANNOTATIONS file table
        elems = protein.split("  ")
        elems.remove("")
        elems = [i for i in elems if i != ""]
        PDBId = elems[0]
        print("extracting features for PDB ID: ", PDBId)
Exemplo n.º 9
0
def getpdbs(names):
    cwd = os.getcwd()
    pdbl = PDBList()
    pdbl.download_pdb_files(names, obsolete=False, file_format="pdb", pdir=cwd)
    pdbl.download_pdb_files(names, obsolete=True, file_format="pdb", pdir=cwd)
Exemplo n.º 10
0
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.PDBList import PDBList

pdbl = PDBList()
parser = PDBParser()

for i in ["ID"]:
    pdbl.retrieve_pdb_file(pdb_code=i,file_format="pdb",pdir="./")
    structure_id = i
    filename = "pdb"+i.lower()+".ent"
    structure = parser.get_structure(structure_id, filename)
    print("id: ",structure_id)
    print("name: ", structure.header["name"])
    print("deposition date :", structure.header["deposition_date"])
    print("release date :", structure.header["release_date"])
    print("structure method : ", structure.header["structure_method"])
    print("resolution : ", structure.header["resolution"]) 
    print("")
Exemplo n.º 11
0
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.PDBList import PDBList
import urllib
import xml.etree.ElementTree as ET

parser = PDBParser()
pdbl = PDBList()
structure = parser.get_structure(
    '4igk', 'p3_website/p3_app/static/p3_app/pdb_files/1JM7.pdb')
#structure = pdbl.retrieve_pdb_file('4igk')
residues = structure.get_residues()
compound = structure.header['compound']
title = structure.header['name']
journal = structure.header['journal_reference']
dict_keys = ['synonym', 'chain', 'fragment', 'molecule']
print compound
if 'unp residues' in compound['1']:
    start_pos, end_pos = compound['1']['fragment'].split(
        'unp residues ')[1].split('-')
    start_pos = int(start_pos)
    end_pos = int(end_pos)
print journal.split()
pdb_dict = {}

for comp in range(0, len(compound)):
    key = str(comp + 1)
    for dkey in dict_keys:
        compound[key][dkey]

#if residue is within start and end positions
Exemplo n.º 12
0
def index(request):
    errors = []
    which_error = None

    if request.POST:

        try:
            rsid = request.POST['rs_id']

        except MultiValueDictKeyError:
            rsid = False

        try:
            chr_num = request.POST['chr_num']
            chr_loc = request.POST['chr_loc']

        except MultiValueDictKeyError:
            chr_num = False
            chr_loc = False

        snpsub = request.POST.get('snpsubstitution', '')

        try:
            protein_id = request.POST['protein_id']
            aa_change = request.POST['aa_change']
        except MultiValueDictKeyError:
            protein_id = False
            aa_change = False

        try:
            gene_id = request.POST['gene_id']
        except MultiValueDictKeyError:
            gene_id = False

        if rsid != False:
            if rsid.startswith('rs') == False:
                errors.append('Please enter a valid rs number')
            elif not Brca1New.objects.filter(rsid=rsid).exists():
                errors.append('This ID is not currently in our database.')

            if not errors:
                brca1_objects = Brca1New.objects.filter(rsid=rsid)
                brca1_object = ''
                for obj in brca1_objects:
                    sub = obj.hgvs_cdna.split('>')[1]
                    if sub == snpsub:
                        brca1_object = obj
                        break

                if brca1_object == '':
                    errors.append(
                        'You have not entered a missense mutation for this position.'
                    )

        elif chr_loc != False:
            try:
                int(chr_loc)
            except ValueError:
                errors.append(
                    'You have not entered a valid integer for the chromosome location.'
                )
            try:
                int(chr_num)
            except ValueError:
                errors.append(
                    'You have not entered a valid integer for the chromosome number.'
                )
            if not Brca1New.objects.filter(hg19_pos=chr_loc).exists():
                errors.append(
                    'This position does not currently exist in our database.')
            var_loc_objects = Brca1New.objects.filter(hg19_pos=chr_loc)
            brca1_object = ''
            for obj in var_loc_objects:
                if obj.hg19_chr != chr_num:
                    var_loc_objects.remove(obj)
                else:
                    sub = obj.hgvs_cdna.split('>')[1]
                    if sub == snpsub:
                        brca1_object = obj
                        break

            if brca1_object == '':
                errors.append(
                    'You have not entered a missense mutation for this position.'
                )

        elif protein_id != False:

            if not Brca1New.objects.filter(gene=protein_id).exists():
                errors.append(
                    'You have entered an ID that does not currently exist in our database.'
                )
            else:
                brca1_object = None
                for obj in Brca1New.objects.filter(gene=protein_id):
                    if aa_change == obj.hgvs_prot.split(
                            '.')[1] or aa_change == obj.hgvs_prot_code1.split(
                                '.')[1]:
                        brca1_object = obj
                        break

                if brca1_object == None:
                    errors.append(
                        'That amino acid change was not found in the database.'
                    )

        elif gene_id != False:
            if not Brca1New.objects.filter(gene=gene_id).exists():
                errors.append(
                    'You have entered an ID that does not currently exist in our database.'
                )

        if errors:
            if rsid != False:
                which_error = 'rsid'
            elif chr_loc != False:
                which_error = 'chr'
            elif protein_id != False:
                which_error = 'protein_id'
            elif gene_id != False:
                which_error = 'gene_id'

            return render(request, 'p3_app/index.html', {
                'errors': errors,
                'which_error': which_error
            })

        context = {}

        context['rsid'] = rsid
        context['chr_loc'] = chr_loc
        context['chr_num'] = chr_num
        context['protein_id'] = protein_id
        context['gene_id'] = gene_id
        context['aa_change'] = aa_change

        #get right gene dict based on gene_id -- dictionary?
        context['gene_dict'] = gene_dict

        pdb_list_dict = {
            'BRCA1': ['1JM7', '4IGK'],
        }

        pdb_stop_start_dict = {
            '1JM7': [1, 103],
            '4IGK': [1646, 1859],
        }

        context['pdb_stop_start_dict'] = pdb_stop_start_dict

        if gene_id is False:
            resi_num = brca1_object.hgvs_prot_code1.split('.')[1][:-1][1:]
            context['resi_num'] = resi_num
            resi_string = 'resi:' + str(resi_num) + ';chain:A'
            context['resi_string'] = resi_string

            pdb_entry_list = pdb_list_dict[brca1_object.gene]

            pdb_entry = None

            for pdbe in pdb_entry_list:
                pdb_stop_start_list = pdb_stop_start_dict[pdbe]
                if int(resi_num) >= pdb_stop_start_list[0] and int(
                        resi_num) <= pdb_stop_start_list[1]:
                    pdb_entry = pdbe

        else:
            pdb_entry = pdb_list_dict[gene_id]
            resi_num = 0
            #create dictionary for this list

        context['pdb_entry'] = pdb_entry

        #list of pdb information for each gene for protein sequence diagram

        pdb_translate_list = [
            'ring', 'brca1', 'bard1', 'brct', 'unp', 'atrip', 'atm', 'rad3'
        ]
        do_not_translate_list = ['of', 'being', 'and', 'in', 'with', 'the']

        def correct_pdb_capitalization(title):
            title_split = title.split()
            title_list = []
            for t in title_split:
                if '/' in t:
                    t = t.split('/')
                    for ts in t:
                        for x in range(0, len(pdb_translate_list)):
                            if pdb_translate_list[x] == ts:
                                index = t.index(ts)
                                ts = ts.upper()
                                t[index] = ts
                        if ts not in do_not_translate_list and ts not in pdb_translate_list:
                            ts = ts.title()

                    t = "/".join(t)

                    title_list.append(t)

                elif '-' in t:
                    t = t.split('-')
                    for ts in t:
                        for x in range(0, len(pdb_translate_list)):
                            if pdb_translate_list[x] == ts:
                                index = t.index(ts)
                                ts = ts.upper()
                                t[index] = ts
                        if ts not in do_not_translate_list and ts not in pdb_translate_list:
                            ts = ts.title()

                    t = '-'.join(t)

                    title_list.append(t)

                else:
                    # for x in range(0,len(pdb_translate_list)):
                    #     if pdb_translate_list[x] == t:
                    #         t = t.upper()
                    if (t not in do_not_translate_list) and (
                            t not in pdb_translate_list):
                        t = t.title()
                    if t in pdb_translate_list:
                        t = t.upper()

                    title_list.append(t)

            title = ' '.join(title_list)
            return title

        if pdb_entry is not None:
            parser = PDBParser()
            pdbl = PDBList()
            module_dir = os.path.dirname(__file__)  # get current directory
            if type(pdb_entry) == str:
                file_path = os.path.join(
                    module_dir,
                    'static/p3_app/pdb_files/' + pdb_entry + '.pdb')
                pdb_file = open(file_path)
                structure = parser.get_structure(pdb_entry, pdb_file)
                compound = structure.header['compound']
                title = structure.header['name']

                title = correct_pdb_capitalization(title)

                for keys, values in compound.items():
                    for k, v in values.items():
                        if k != 'other_details':
                            values[k] = correct_pdb_capitalization(v)

                journal = structure.header['journal_reference']
                pdb_list = [compound, title, journal]
            elif type(pdb_entry) == list:
                pdb_dict = {}
                for pdbe in pdb_entry:
                    file_path = os.path.join(
                        module_dir, 'static/p3_app/pdb_files/' + pdbe + '.pdb')
                    pdb_file = open(file_path)
                    structure = parser.get_structure(pdb_entry, pdb_file)
                    compound = structure.header['compound']
                    title = structure.header['name']
                    title = correct_pdb_capitalization(title)
                    journal = structure.header['journal_reference']
                    pdb_list = [compound, title, journal]
                    pdb_dict[pdbe] = pdb_list

                pdb_list = []
                context['pdb_dict'] = pdb_dict

        else:
            pdb_list = []

        context['pdb_list'] = pdb_list

        #if gene_id is not False:
        #get gene list

        #create a list of synonyms and fragments so they're properly capitalized for site

        pfam_id = 'P38398'

        #create dictionary for uniprot/gene names when all gene names are present

        module_dir = os.path.dirname(__file__)  # get current directory
        file_path = os.path.join(
            module_dir, 'static/p3_app/pfam_files/' + pfam_id + '.xml')
        xml_file = open(file_path)
        tree = ET.parse(xml_file)
        root = tree.getroot()
        count = 0
        match_dict = {}

        resi_in_domain = None

        for child in root.iter('match'):
            type_attr = child.attrib['type']
            acc_attr = child.attrib['accession']
            id_attr = child.attrib['id']

            for grandchild in child.iter('location'):
                start = grandchild.attrib['start']
                end = grandchild.attrib['end']
                start = int(start)
                end = int(end)
                if resi_num > end and resi_num < start:
                    resi_in_domain = id_attr
                match_dict[count] = [type_attr, acc_attr, id_attr, start, end]
                count += 1

        for seq in root.iter('sequence'):
            sequence = seq.text

        context['sequence'] = sequence
        context['match_dict'] = match_dict
        context['resi_in_domain'] = resi_in_domain

        #additional pfam info for mouseover feature

        file_path = os.path.join(
            module_dir, 'static/p3_app/uniprot_files/' + pfam_id + '.xml')
        record = SeqIO.read(open(file_path), 'uniprot-xml')
        uniprot_comment_dict = {}

        uniprot_comment_dict['domain'] = record.annotations['comment_domain']
        uniprot_comment_dict['tissue_specificity'] = record.annotations[
            'comment_tissuespecificity']
        uniprot_comment_dict['enzyme_regulation'] = record.annotations[
            'comment_enzymeregulation']
        uniprot_comment_dict['disease'] = record.annotations['comment_disease']

        oi_dict = {}
        for oi in record.annotations['comment_onlineinformation']:
            oi = oi.split('@')
            oi_dict[oi[0]] = oi[1]

        uniprot_comment_dict['subunit'] = record.annotations['comment_subunit']
        uniprot_comment_dict['function'] = record.annotations[
            'comment_function']
        uniprot_comment_dict['polymorphism'] = record.annotations[
            'comment_polymorphism']
        uniprot_comment_dict['ptm'] = record.annotations['comment_PTM']

        context['uniprot_comment_dict'] = uniprot_comment_dict
        context['oi_dict'] = oi_dict

        uniprot_references = record.annotations['references']
        context['uniprot_references'] = uniprot_references

        secondary_structure_features = []
        variant_features = []
        other_uniprot_features = []

        for feature in record.features:
            if feature.qualifiers['type'] == 'strand' or feature.qualifiers[
                    'type'] == 'helix' or feature.qualifiers['type'] == 'turn':
                if feature.qualifiers['type'] == 'strand':
                    feature.type = 'beta_strand'
                secondary_structure_features.append(feature)
            elif feature.type == 'cross-link' or feature.type == 'modified residue' or feature.type == 'mutagenesis site' or feature.type == 'sequence variant' or feature.type == 'sequence conflict':
                variant_features.append(feature)
            else:
                other_uniprot_features.append(feature)

        context['uniprot_features'] = record.features

        context['secondary_structure_features'] = secondary_structure_features
        context['variant_features'] = variant_features
        context['other_uniprot_features'] = other_uniprot_features

        variation_dict = {}

        for rsid in Brca1New.objects.values('rsid').distinct():
            rsid = rsid['rsid']
            if rsid.startswith('rs'):
                variation_dict[rsid] = True
            else:
                continue
            '''
            if not rsid.startswith('rs'):
                continue
            else:
                obj = Brca1New.objects.filter(rsid=rsid)[0]
                pos = obj.codon
                variation_dict[rsid] = pos
            '''

        context['variation_dict'] = variation_dict
        #add info for pubmed list - turn into dictionary of citation values

        #add checkpoint to make sure residues match of pdb file and residue of variation in database
        #Brca1New.objects.values('swissprot_type').distinct()

        count = 0
        swissprot_dict = {}
        for sp_ran in Brca1New.objects.values_list(
                'swissprot_range').distinct():
            sp_ran = ''.join(sp_ran)
            obj = Brca1New.objects.filter(swissprot_range=sp_ran)[0]
            sp_type = obj.swissprot_type.encode('ascii')
            sp_desc = obj.swissprot_desc.encode('ascii')
            if sp_desc == '':
                continue

            sp_ran = sp_ran.split('[')[1].split(']')[0]

            if '-' in sp_ran:
                start, end = sp_ran.split('-')
                start = int(start)
                end = int(end)
            else:
                start = int(sp_ran)
                end = int(sp_ran)

            swissprot_dict[count] = [sp_type, start, end, sp_desc]
            count += 1

        context['swissprot_dict'] = swissprot_dict

        count = 0
        alamut_pd1_dict = {}
        ala_list = []
        ala_indices = []
        cdna_pos_list = []
        end = 0

        for ala_dom in Brca1New.objects.values_list('alamut_proteindomain1'):
            ala_dom = ''.join(ala_dom)
            ala_list.append(ala_dom)

        for cdna_pos in Brca1New.objects.values_list('codon'):
            cdna_pos = ''.join(cdna_pos)
            cdna_pos_list.append(cdna_pos)

        for ala_ind in range(1, len(ala_list)):
            if ala_list[ala_ind - 1] == ala_list[ala_ind]:
                continue
            else:
                # if ala_list[ala_ind-1] == '':
                #     continue

                if end == 0:
                    for ai in range(1, len(ala_list)):
                        if ala_list[ai] == ala_list[ala_ind - 1]:
                            start = int(cdna_pos_list[ai])
                            break

                else:
                    start = end + 1

                end = int(cdna_pos_list[ala_ind])
                domain = ala_list[ala_ind - 1]
                if domain != '':
                    alamut_pd1_dict[count] = [domain, start, end]
                    count += 1

        context['alamut_pd1_dict'] = alamut_pd1_dict

        ala_list = []
        alamut_pd2_dict = {}
        end = 0
        count = 0
        for ala_dom in Brca1New.objects.values_list('alamut_proteindomain2'):
            ala_dom = ''.join(ala_dom)
            ala_list.append(ala_dom)

        for ala_ind in range(1, len(ala_list)):
            if ala_list[ala_ind - 1] == ala_list[ala_ind]:
                continue
            else:
                # if ala_list[ala_ind-1] == '':
                #     continue

                if end == 0:
                    for ai in range(1, len(ala_list)):
                        if ala_list[ai] == ala_list[ala_ind - 1]:
                            start = int(cdna_pos_list[ai])
                            break
                else:
                    start = end + 1

                end = int(cdna_pos_list[ala_ind])
                domain = ala_list[ala_ind - 1]
                if domain != '':
                    alamut_pd2_dict[count] = [domain, start, end]
                    count += 1

        context['alamut_pd2_dict'] = alamut_pd2_dict

        ala_list = []
        alamut_pd3_dict = {}
        end = 0
        count = 0
        for ala_dom in Brca1New.objects.values_list('alamut_proteindomain3'):
            ala_dom = ''.join(ala_dom)
            ala_list.append(ala_dom)

        for ala_ind in range(1, len(ala_list)):
            if ala_list[ala_ind - 1] == ala_list[ala_ind]:
                continue
            else:
                # if ala_list[ala_ind-1] == '':
                #     continue

                if end == 0:
                    for ai in range(1, len(ala_list)):
                        if ala_list[ai] == ala_list[ala_ind - 1]:
                            start = int(cdna_pos_list[ai])
                            break
                else:
                    start = end + 1

                end = int(cdna_pos_list[ala_ind])
                domain = ala_list[ala_ind - 1]
                if domain != '':
                    alamut_pd3_dict[count] = [domain, start, end]
                    count += 1

        context['alamut_pd3_dict'] = alamut_pd3_dict

        #make condition if last entry is part of a domain
        ala_list = []
        alamut_pd4_dict = {}
        end = 0
        count = 0
        for ala_dom in Brca1New.objects.values_list('alamut_proteindomain4'):
            ala_dom = ''.join(ala_dom)
            ala_list.append(ala_dom)

        for ala_ind in range(1, len(ala_list)):
            if ala_list[ala_ind - 1] == ala_list[ala_ind]:
                continue
            else:
                # if ala_list[ala_ind-1] == '':
                #     continue

                if end == 0:
                    for ai in range(1, len(ala_list)):
                        if ala_list[ai] == ala_list[ala_ind - 1]:
                            start = int(cdna_pos_list[ai])
                            break
                else:
                    start = end + 1

                end = int(cdna_pos_list[ala_ind])
                domain = ala_list[ala_ind - 1]
                if domain != '':
                    alamut_pd4_dict[count] = [domain, start, end]
                    count += 1

        context['alamut_pd4_dict'] = alamut_pd4_dict

        if gene_id is not False:
            return render(request, 'p3_app/gene_results.html', context)

        if brca1_object.hgmd_pubmed == 'Pubmed':
            hgmd_pubmed_list = brca1_object.hgmd_pubmed_list.split(' ')
        else:
            hgmd_pubmed_list = None

        context['hgmd_pubmed_list'] = hgmd_pubmed_list

        pubmed_dict = {}
        if hgmd_pubmed_list is not None:
            for pmid in hgmd_pubmed_list:
                if pmid in pmid_dict.keys():
                    pubmed_dict[pmid] = pmid_dict[pmid]

        context['pubmed_dict'] = pubmed_dict

        if brca1_object.alamut_siftprediction == '':
            brca1_object.alamut_siftprediction = None

        if '_' in brca1_object.muttaster_prediction:
            brca1_object.muttaster_prediction = brca1_object.muttaster_prediction.replace(
                '_', ' ')

        if brca1_object.muttaster_features != '':
            brca1_object.muttaster_features = brca1_object.muttaster_features.split(
                ',')

        ss_img_loc = int(brca1_object.suspect_score) * 4
        context['ss_img_loc'] = ss_img_loc

        agvgd_dict = {
            'C0': 40,
            'C15': 92,
            'C25': 148,
            'C35': 205,
            'C45': 261,
            'C55': 317,
            'C65': 375
        }
        context['agvgd_dict'] = agvgd_dict

        muttaster_model_dict = {
            'complex_aa': 'mutation introducing a premature stop codon',
            'simple_aae':
            'substitution/insertion/deletion of a single amino acid'
        }
        context['muttaster_model_dict'] = muttaster_model_dict

        context['brca1_object'] = brca1_object
        '''
        context = {'rsid':rsid, 'brca1_object':brca1_object,'resi_string':resi_string, 'resi_num':resi_num, 'pdb_entry':pdb_entry, 'chr_num':chr_num,'chr_loc':chr_loc, 'pdb_list':pdb_list, 'match_dict':match_dict, 'sequence':sequence, 'hgmd_pubmed_list':hgmd_pubmed_list, 'swissprot_dict':swissprot_dict, 'alamut_pd1_dict':alamut_pd1_dict,'alamut_pd2_dict':alamut_pd2_dict,'alamut_pd3_dict':alamut_pd3_dict,
'alamut_pd4_dict':alamut_pd4_dict,'agvgd_dict':agvgd_dict,
'ss_img_loc':ss_img_loc,'muttaster_model_dict':muttaster_model_dict, 
'pubmed_dict':pubmed_dict, 'gene_dict':gene_dict, 'protein_id':protein_id,'aa_change':aa_change}
        '''

        return render(request, 'p3_app/results_page2.html', context)

    return render(request, 'p3_app/index.html', {})
Exemplo n.º 13
0
"""
prots,enzsites = findRestr("orf_coding_all.fasta")
print("Non restrictive proteins : ",prots)
for enz in enzsites:
    print("ID ",enz[0])
    print("EcoRI ",enz[1][0])
    print("XhoI ",enz[1][1])
    print("TaqI ",enz[1][2])"""

#######################################################################################################""
from Bio.PDB.MMCIFParser import MMCIFParser
from Bio.PDB.PDBList import PDBList
from Bio.PDB.MMCIF2Dict import MMCIF2Dict

pdbl = PDBList()
pdbl.retrieve_pdb_file("2GAA")


def readPDBFile(filename):
    mmcif_dict = MMCIF2Dict(filename)
    nbchains, nbres, nbatoms, res = mmcif_dict[
        '_struct_sheet.number_strands'], mmcif_dict[
            '_struct_site.pdbx_num_residues'], mmcif_dict[
                '_refine_hist.number_atoms_total'], mmcif_dict['_exptl.method']
    return sum([int(nbchains[i])
                for i in range(len(nbchains))]), nbres, nbatoms, res


print(readPDBFile("ga/2gaa.cif"))
Exemplo n.º 14
0
def DoRetrievePDBFile(aPDB_Code, aFolder):

    global USE_ALT_PDB_SERVER

    done = False
    errors_before_quit = 20
    seconds_between_retries = 30
    fetchedfile = ""
    alt_server = "http://www.rcsb.org/pdb/files/"

    while done == False:

        pdblist = None
        if USE_ALT_PDB_SERVER: pdblist = PDBList(server=alt_server)
        else: pdblist = PDBList()
        #pdblist = PDBList( server='ftp://ftp.wwpdb.org')
        #server = 'ftp://ftp.rcsb.org'
        #server = "ftp.ebi.ac.uk/pub/databases/pdb/"

        try:
            #http://biopython.org/DIST/docs/api/Bio.PDB.PDBList%27-pysrc.html
            #fetchedfile = pdblist.retrieve_pdb_file( pdb_code=aPDB_Code, pdir=aFolder, file_format="pdb", obsolete=False)
            fetchedfile = pdblist.retrieve_pdb_file(pdb_code=aPDB_Code,
                                                    pdir=aFolder,
                                                    file_format="pdb",
                                                    obsolete=False)
            done = True
            if fetchedfile and len(fetchedfile) and (
                    fetchedfile.find(".ent") > 0
                    or fetchedfile.find(".pdb") > 0):
                #print "Structure fetched, PDB code: " + aPDB_Code
                print "INFO: Structure " + aPDB_Code + " fetched. [OK]"
                #io = PDBIO()
                #io.set_structure( s)
                #io.save( filename)
            else:
                print "WARNING: Fetch failed [FAIL]"

        except IOError as ex:
            sys.stderr.write(
                "WARNING: Could not download structure {0}. An exception of type {1} occured.\n       Arguments: {2!r}\n"
                .format(aPDB_Code,
                        type(ex).__name__, ex.args))
            sys.stderr.write("INFO: Retrying connection in %i seconds...\n" %
                             seconds_between_retries)

            for a in ex.args:
                #Downloading too many structures too fast?
                if str(a).lower().find("too many") >= 0:
                    seconds_between_retries += 10
                    break
                if str(a).lower().find("No such file") >= 0:
                    #No need to retry
                    return fetchedfile
                if str(a).lower().find("did not properly respond") >= 0:
                    #No need to retry
                    sys.stderr.write(
                        "INFO: Switching download thread to alternative server '%s'.\n"
                        % alt_server)
                    USE_ALT_PDB_SERVER = True

            time.sleep(seconds_between_retries)
            done = False
            errors_before_quit -= 1
            if errors_before_quit <= 0:
                sys.stderr.write("ERROR: Failed too many times. Quitting...\n")
                break

    return fetchedfile
Exemplo n.º 15
0
def generate_structural_statistics(jobId,
                                   dom,
                                   pdb_code,
                                   selchain,
                                   uploaded_str,
                                   modeled_str=False,
                                   savequeue="jobinfo"):
    try:
        tdata = TripleMapping.objects.get(pk=jobId)
    except (KeyError, TripleMapping.DoesNotExist):
        return "str stats gen error!"
    threeList = [
        "ALA", "CYS", "ASP", "GLU", "PHE", "GLY", "HIS", "ILE", "LYS", "LEU",
        "MET", "ASN", "PRO", "GLN", "ARG", "SER", "THR", "VAL", "TRP", "TYR"
    ]
    if uploaded_str == False:
        pdbl = PDBList()
        pdbl.retrieve_pdb_file(pdb_code, pdir='./PDB', file_format="pdb")
        pdb_filename = "./PDB/pdb" + pdb_code.lower() + ".ent"
    else:
        if modeled_str == False:
            pdb_filename = "./PDB/" + jobId + "___" + pdb_code
        else:
            pdb_filename = "./PDB/model/" + pdb_code
    pdbsequencefull = []
    pdbsequencenum = []
    structure = Bio.PDB.PDBParser().get_structure(pdb_code, pdb_filename)
    model = structure[0]
    dssp = DSSP(model, pdb_filename, dssp='mkdssp', acc_array="Wilke")
    for chain in model:
        if chain.id == selchain:
            for residue in chain:
                if Bio.PDB.Polypeptide.is_aa(residue) == True:
                    number = residue.get_id()
                    try:
                        num = str(number[1]) + str((number[2].rstrip())[0])
                    except IndexError:
                        num = str(number[1])
                    pdbsequencenum.append(residue.get_resname() + num)
                    #new_id = (" ", residue.get_id()[1], residue.get_id()[2])
                    pdbsequencefull.append(residue.get_id())
    pdbsequencenum = pdbsequencenum[1:-1]
    pdbsequencefull = pdbsequencefull[1:-1]
    dssp_info = []
    for i in range(0, len(pdbsequencenum)):
        chain_res = pdbsequencenum[i]
        residue_key = pdbsequencefull[i]
        if (chain_res[0:3] in threeList):
            dssp_res = dssp[selchain, residue_key]
            dssp_info.append({
                "name": chain_res,
                "sec": str(dssp_res[2]),
                "phi": str(dssp_res[4]),
                "psi": str(dssp_res[5]),
                "depth": str(dssp_res[3])
            })

    pdb_coded = pdb_code
    if (modeled_str == True):
        pdb_coded = pdb_code.split("_")[3] + "_" + pdb_code.split("_")[4]

    full_dssp_info = {"_".join([dom, pdb_coded, selchain]): dssp_info}
    prev_dsspinfo = getattr(tdata, "dsspinfo")
    if prev_dsspinfo:
        prev_dsspinfo = prev_dsspinfo.split("]}]")[0] + "]},"
    else:
        prev_dsspinfo = "["
    setattr(tdata, "dsspinfo", prev_dsspinfo + str(full_dssp_info) + "]")
    tdata.save()

    # run ring software and obtain results
    process = Popen([
        "./bin/Ring", "-i", pdb_filename, "-c", selchain, "-N",
        "./jobs/nodes/" + jobId + "_" + dom + "_" + pdb_code + "_" + selchain +
        ".nds", "-E", "./jobs/edges/" + jobId + "_" + dom + "_" + pdb_code +
        "_" + selchain + ".eds", "-g", "1"
    ],
                    stdout=PIPE)
    (output, err) = process.communicate()
    exit_code = process.wait()

    # read ring software results and generate json objects
    f1 = open(
        "./jobs/edges/" + jobId + "_" + dom + "_" + pdb_code + "_" + selchain +
        ".eds", "r+")
    lines = f1.readlines()
    G = nx.MultiGraph()
    G2 = nx.Graph()
    pairs = []
    singlegraph = {}
    for l in range(1, len(lines)):
        line = lines[l]
        res1 = line.split()[0].split(":")[-1] + line.split()[0].split(":")[1]
        res2 = line.split()[2].split(":")[-1] + line.split()[2].split(":")[1]
        order_pair = sorted([res1, res2])
        interaction = line.split()[1]
        energy = float(line.split()[5])
        if "NLA" not in res1 and "NLA" not in res1:
            G.add_edge(res1, res2, weight=energy, itype=interaction)
            if order_pair not in pairs:
                singlegraph["".join(order_pair)] = (res1, res2, energy)
                pairs.append(order_pair)
            else:
                new_energy = singlegraph["".join(order_pair)][2] + energy
                singlegraph["".join(order_pair)] = (res1, res2, new_energy)
    G2.add_weighted_edges_from(singlegraph.values())

    g_distance_dict1 = {(e1, e2, w): 1 / w
                        for e1, e2, w in G.edges(data='weight')}
    nx.set_edge_attributes(G, g_distance_dict1, 'distance')

    g_distance_dict = {(e1, e2): 1 / weight
                       for e1, e2, weight in G2.edges(data='weight')}
    nx.set_edge_attributes(G2, g_distance_dict, 'distance')
    graph_stats = []

    weighted_degree = G.degree(weight='weight')

    between = nx.betweenness_centrality(G2, weight='weight')

    closeness = nx.closeness_centrality(G, distance='distance')

    mutstats = None
    for k in between:
        graph_stats.append({
            "res": k,
            "betweeness": between[k],
            "closeness": closeness[k],
            "wdegree": weighted_degree[k]
        })
        if (modeled_str == True and k == pdb_code.split("_")[5] +
                pdb_code.split("_")[6].split(".")[0]):
            mutstats = {
                "res": k,
                "betweeness": between[k],
                "closeness": closeness[k],
                "wdegree": weighted_degree[k]
            }

    graph_stats_full = {"_".join([dom, pdb_coded, selchain]): graph_stats}
    if (modeled_str == False):
        prev_gstats = getattr(tdata, "graph_stats")
        if prev_gstats:
            prev_gstats = prev_gstats.replace("];",
                                              ", ")  #.split("}}]")[0] + "}},"
        else:
            prev_gstats = "["
        setattr(tdata, "graph_stats",
                prev_gstats + str(graph_stats_full) + "];")
        tdata.save()
    else:
        prev_muts = getattr(tdata, "mut_stats")
        if prev_muts:
            prev_muts = prev_muts.replace("]", " , ")
        else:
            prev_muts = "["
        setattr(tdata, "mut_stats", prev_muts + str(mutstats) + "]")
        tdata.save()

        #pass
    #print(graph_stats)
    #quit()

    datag = json_graph.node_link_data(G)
    s = json.dumps(datag)
    datag_full = {"_".join([dom, pdb_coded, selchain]): s}
    if (modeled_str == False):
        prev_datag = getattr(tdata, "graph_json")
        if prev_datag:
            prev_datag = prev_datag.replace("];",
                                            ", ")  #.split("]}]")[0] + "]},"
        else:
            prev_datag = "["
        setattr(tdata, "graph_json", prev_datag + str(datag_full) + "];")
        tdata.save()
    else:
        mNode = {
            "id": pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0]
        }
        #nodesAt5 = [x for x,y in G.nodes(data=True) if y['id']== pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0]]
        #mNode = pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0]
        #newedges = [(u,v,d) for u,v,d in G.edges(data = True) if ((u['id'] == mNode) or (v['id'] == mNode))]
        nodesAt5 = [x for x in G.nodes() if x == mNode]
        #H = nx.MultiGraph()
        #H.add_edges_from(newedges)
        H = G.subgraph(nodesAt5)
        datam = json_graph.node_link_data(H)
        sm = json.dumps(datam)
        datam_full = {
            "_".join([
                dom, pdb_coded,
                pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0]
            ]):
            s
        }
        prevdatam = getattr(tdata, "mut_json")
        if prevdatam:
            prevdatam = prevdatam.replace("];end;", ", ")
        else:
            prevdatam = "["
        setattr(tdata, "mut_json", prevdatam + str(datam_full) + "];end;")
        tdata.save()
        pass
    f1.close()

    #save objects into attributes

    jobs = getattr(tdata, savequeue)
    print("jobs")
    print(jobs)
    job_this = dom + "_" + pdb_code + "_" + selchain
    if (modeled_str == True):
        job_this = dom + "_" + pdb_code.split("_")[3] + "_" + pdb_code.split(
            "_")[4] + "_" + pdb_code.split("_")[6].split(
                ".")[0] + "_" + pdb_code.split("_")[5]
    print("job_this")
    print(job_this)
    new_jobs = []
    for job in jobs.split(","):
        if job_this in job:
            new_job = "_".join(job.split("_")[:-1]) + "_done"
            new_jobs.append(new_job)
        else:
            new_jobs.append(job)
    print("new_jobs")
    print(new_jobs)
    setattr(tdata, savequeue, ",".join(new_jobs))
    tdata.save()
    return "str stats gen!"
Exemplo n.º 16
0
 def test_get_recent_changes(self):
     """Tests the Bio.PDB.PDBList.get_recent_changes method."""
     pdblist = PDBList(obsolete_pdb="unimportant")  # obsolete_pdb declared to prevent from creating the "obsolete" directory
     url = pdblist.pdb_server + '/pub/pdb/data/status/latest/added.pdb'
     entries = pdblist.get_status_list(url)
     self.assertIsNotNone(entries)
Exemplo n.º 17
0
from Bio.PDB.PDBList import PDBList

pdblist = PDBList()
pdblist.retrieve_pdb_file(
    "127d")  # downloads structure 127D in PDBx/mmCif format
pdblist.retrieve_pdb_file(
    "127d", file_format="pdb")  # downloads structure 127D in PDB format
pdblist.retrieve_pdb_file(
    "127d", file_format="xml")  # downloads structure 127D in PDBML/XML format
pdblist.retrieve_pdb_file(
    "127d", file_format="mmtf")  # downloads structure 127D in mmtf format
pdblist.retrieve_pdb_file(
    "3k1q",
    file_format="bundle")  # downloads large structure 3K1Q in pdb-like bundle
pdblist.retrieve_pdb_file(
    "347d",
    obsolete=True)  # downloads obsolete structure 347D in PDBx/mmCif format
pdblist.download_pdb_files(
    "1esy", "127D")  # downloads structures 127D and 1ESY in PDBx/mmCif format
pdblist.download_entire_pdb(
)  # downloads entire PDB database in PDBx/mmCif format
pdblist.update_pdb()  # performs weekle update of the database
Exemplo n.º 18
0
#!/usr/bin/env python
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.PDBList import PDBList
from Bio.PDB import vectors
import numpy as np
import os

# Reading a PDB file

#get PDB from rcsb.org
basePath = '/home/kenneth/proj/proMin/code/biopython'
filename = 'pdb1fdn.ent'
pdb = PDBList().retrieve_pdb_file("1FDN", file_format='pdb')

#Create a PDBParser object
parser = PDBParser()  #PERMISSIVE = 0 will list all errors with PDB file
structure = parser.get_structure("1FDN", os.path.join(basePath, filename))

# print(type(structure)) #what type of object did the parser return
# <class 'Bio.PDB.Structure.Structure'>

# print(dir(structure)) #check what attributes exist
# ['__class__', '__contains__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_generate_full_id', '_id', '_reset_full_id', 'add', 'child_dict', 'child_list', 'copy', 'detach_child', 'detach_parent', 'full_id', 'get_atoms', 'get_chains', 'get_full_id', 'get_id', 'get_iterator', 'get_level', 'get_list', 'get_models', 'get_parent', 'get_residues', 'has_id', 'header', 'id', 'insert', 'level', 'parent', 'set_parent', 'transform', 'xtra']

# # PDB Structure object, layers 1)model which contains 2)chains which contains 3)residues which contains 4)atoms
model = structure[0]
chain = model["A"]
# print(list(chain.get_residues()))

#dealing with hetero atom - http://biopython.org/DIST/docs/tutorial/Tutorial.html#sec201
residue = chain[(
Exemplo n.º 19
0
from autopack.Ingredient import MultiSphereIngr

from upy import hostHelper

autopack.helper = hostHelper.Helper()
autopack.helper.host = "none"
autopack.forceFetch = False

from scipy.cluster.vq import kmeans, vq
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.PDBList import PDBList
from Bio.SeqUtils.ProtParam import ProteinAnalysis
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.Polypeptide import is_aa

fetch = PDBList(pdb=data_folder)
p = PDBParser(PERMISSIVE=1)


def getMWFromSequence(sequence):
    X = ProteinAnalysis(sequence)
    mw = X.molecular_weight()
    return mw


def getSequenceStructure(s):
    seq = ""
    for r in s.get_residues():
        if is_aa(r.get_resname(), standard=True):
            seq += three_to_one(r.get_resname())
        else:
Exemplo n.º 20
0
from Bio.PDB.PDBList import PDBList
pdbl = PDBList()
pdbl.retrieve_pdb_file("6WO1", file_format="mmtf", pdir="/home/koreanraichu/")
# 확장자를 따로 입력하지 않으면 CIF파일로 다운르드 된다.
# file_format="확장자"로 입력하면 특정 파일 형식으로 받을 수 있다.
# pdir="경로"를 입력하면 다운로드 경로도 정할 수 있다.
Exemplo n.º 21
0
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.PDBList import PDBList

pdbl = PDBList()

for vrstica in open('./structures lists/new structures.txt'):
    structure_id = vrstica.strip('\n')
    pdbl.retrieve_pdb_file(structure_id,
                           file_format='pdb',
                           pdir='pdb structures')
Exemplo n.º 22
0
 def test_get_all_obsolete(self):
     """Tests the Bio.PDB.PDBList.get_all_obsolete method."""
     pdblist = PDBList(obsolete_pdb="unimportant")  # obsolete_pdb declared to prevent from creating the "obsolete" directory
     entries = pdblist.get_all_obsolete()
     # As number of obsolete entries constantly grow, test checks if a certain number was exceeded
     self.assertTrue(len(entries) > 3000)