def analyze_construct(self, filenames=None):
        self.logger.info("ANALYZING CONSTRUCT STRUCTURES")

        # read source files
        if not filenames:
            filenames = os.listdir(self.construct_data_dir)

        for filename in filenames:
            if filename[-3:] != 'pdb' and filename[-3:] != 'ent':
                continue
            root, ext = os.path.splitext(os.path.basename(filename))
            print(filename)
            print(root)
            filepath = os.sep.join([self.construct_data_dir, filename])
            self.logger.info("Working on a file: {}".format(filename))
            header = parse_pdb_header(filepath)
            parser = SequenceParser(filepath)

            json_data = OrderedDict()
            json_data["header"] = header
            json_data.update(parser.get_fusions())
            json_data.update(parser.get_mutations())
            json_data.update(parser.get_deletions())
            json.dump(json_data,
                      open(
                          os.sep.join(
                              [settings.DATA_DIR,
                               "{}_auto.json".format(root)]), 'w'),
                      indent=4,
                      separators=(',', ': '))
    def analyze_construct(self, filenames=None):
        self.logger.info("ANALYZING CONSTRUCT STRUCTURES")

        # read source files
        if not filenames:
            filenames = os.listdir(self.construct_data_dir)

        for filename in filenames:
            if filename[-3:]!='pdb' and filename[-3:]!='ent':
                continue
            root, ext = os.path.splitext(os.path.basename(filename))
            print(filename)
            print(root)
            filepath = os.sep.join([self.construct_data_dir, filename])
            self.logger.info("Working on a file: {}".format(filename))
            header = parse_pdb_header(filepath)
            parser = SequenceParser(filepath)


            json_data = OrderedDict()
            json_data["header"] = header
            json_data.update(parser.get_fusions())
            json_data.update(parser.get_mutations())
            json_data.update(parser.get_deletions())
            json.dump(json_data, open(os.sep.join([settings.DATA_DIR, "{}_auto.json".format(root)]), 'w'), indent=4, separators=(',', ': '))
    def handle(self, *args, **options):

        q = QueryPDB()
        q.list_xtals(verbose=False)

        for record in q.new_structures:
            pdb_code = record[0]
            wt_id = Protein.objects.get(entry_name=record[1]).id
            if not os.path.exists(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)])):
                self.download_pdb(pdb_code)
            self.parser = SequenceParser(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)]), wt_protein_id=wt_id)
            header = parse_pdb_header(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)]))
            self.create_yaml(pdb_code, record[1], header)
示例#4
0
 def handle(self, *args, **options):
     print("Working on file {}".format(options['pdb_file']))
     header = parse_pdb_header(options['pdb_file'])
     print(header['compound'])
     sp = SequenceParser(options['pdb_file'])
     c = list(sp.mapping.keys())[0]
     poly = sp.get_chain_peptides(c)
     for peptide in poly:
         print("Start: {} Stop: {} Len: {}".format(peptide[0].id[1], peptide[-1].id[1], len(peptide)))
         sp.map_to_wt_blast(c, peptide, None, int(peptide[0].id[1]))
     sp.map_seqres()
     sp.save_excel_report("test.xlsx")
     #sp.get_report()
示例#5
0
    def post(self, request):
        # root, ext = os.path.splitext(request._request.FILES['pdb_file'].name)
        pdb_file = StringIO(
            request._request.FILES['pdb_file'].file.read().decode(
                'UTF-8', "ignore"))
        header = parse_pdb_header(pdb_file)
        parser = SequenceParser(pdb_file)

        json_data = OrderedDict()
        json_data["header"] = header
        json_data.update(parser.get_fusions())
        json_data.update(parser.get_mutations())
        json_data.update(parser.get_deletions())

        return Response(json_data)
示例#6
0
 def handle(self, *args, **options):
     root, ext = os.path.splitext(os.path.basename(options['pdb_file']))
     print("Working on file {}".format(options['pdb_file']))
     header = parse_pdb_header(options['pdb_file'])
     sp = SequenceParser(options['pdb_file'])
     print(sp.get_fusions())
     print(sp.get_mutations())
     print(sp.get_deletions())
     json_data = {}
     json_data["header"] = header
     json_data.update(sp.get_fusions())
     json_data.update(sp.get_mutations())
     json_data.update(sp.get_deletions())
     json.dump(json_data, open(os.sep.join([settings.DATA_DIR, "{}_auto.json".format(root)]), 'w'), indent=4, separators=(',', ': '))
     #json.dump(json_data, open("test.json", 'w'), indent=4, separators=(',', ': '))
def Header_Data(AAAB14, pdbloc, ion_name):
    """ Returns crucial Header Data for a specific pdbid.
		Somewhat non specific to Ion Environments project. """
    Data = []
    Head = AAAB14[0]
    Tail = AAAB14[1:]
    Head.insert(1, 'StructMethod')
    Head.insert(1, 'Resolution')
    Head.insert(len(Head), 'ECNum')
    Head.insert(len(Head), 'Type')
    Head.insert(len(Head), 'Ion')
    Data.append(Head)
    for item in Tail:
        fname = item[0]
        fname = fname.split('_')
        pdbid = fname[0]
        pdb = 'pdb' + pdbid + '.ent'
        chain = fname[1]
        handle = open(pdbloc + pdb, 'r')
        header_dict = parse_pdb_header(handle)
        handle.close()
        name = header_dict['name']
        head = header_dict['head']
        method = header_dict['structure_method']
        reso = header_dict['resolution']
        Comp = header_dict['compound']
        ec = FindEC(chain.lower(), 'ec_number', Comp)
        if ec.startswith('1.'):
            txt = 'Oxidoreductase'
        elif ec.startswith('2.'):
            txt = 'Transferase'
        elif ec.startswith('3.'):
            txt = 'Hydrolase'
        elif ec.startswith('4.'):
            txt = 'Lyase'
        elif ec.startswith('5.'):
            txt = 'Isomerase'
        elif ec.startswith('6.'):
            txt = 'Ligase'
        else:
            txt = 'Non_Enzyme'
        item.insert(1, method)
        item.insert(1, reso)
        item.insert(len(item), ec)
        item.insert(len(item), txt)
        item.insert(len(item), ion_name)
        Data.append(item)
    return Data
示例#8
0
def get_pdb(pss_result,
            if_full_match=True,
            if_best_resolution=True,
            if_download=True):
    outdir_pdb = './pdb_download'
    if not path.exists(outdir_pdb):
        makedirs(outdir_pdb)

    pdb_reso = []

    for r in pss_result:
        pdb_id = None
        pdb_full = None

        # fully matched or not
        if if_full_match:
            info = r['services'][0]['nodes'][0]['match_context'][0]
            if info['mismatches'] == 0 \
                    and info['gaps_opened'] == 0 \
                    and info['query_length'] == info['subject_length']:
                pdb_full = r['identifier']
                pdb_id = pdb_full.split('_')[0]
        else:
            pdb_full = r['identifier']
            pdb_id = pdb_full.split('_')[0]

        # if match, download pdb file
        if pdb_id and pdb_full:
            outfile = path.join(outdir_pdb,
                                str(pdb_id) +
                                '.pdb') if if_download else path.join(
                                    outdir_pdb, 'tmp.pdb')
            if download_pdb(pdb_id, outfile):
                structure = parse_pdb_header(outfile)
                pdb_reso.append((pdb_full, structure['resolution']))

    if if_best_resolution:
        # find the pdb with best resolution
        tmp_dict = {r: p for p, r in pdb_reso}
        best_pdb_id = tmp_dict[max(tmp_dict.keys())]
        return [(best_pdb_id, tmp_dict[best_pdb_id])]
        # write to file
        # with open('./dataset_pos.csv', 'a') as f:
        #     f.write("{}, {}, {}\n".format(best_pdb_id, seq, pdb_reso))
        #     print("{} - {}".format(i, pdb_reso))

    return pdb_reso
示例#9
0
    def handle(self, *args, **options):

        q = QueryPDB()
        q.list_xtals(verbose=False)

        for record in q.new_structures:
            pdb_code = record[0]
            wt_id = Protein.objects.get(entry_name=record[1]).id
            if not os.path.exists(
                    os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)
                                 ])):
                self.download_pdb(pdb_code)
            self.parser = SequenceParser(os.sep.join(
                [self.pdb_data_dir, "{}.pdb".format(pdb_code)]),
                                         wt_protein_id=wt_id)
            header = parse_pdb_header(
                os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)]))
            self.create_yaml(pdb_code, record[1], header)
示例#10
0
 def handle(self, *args, **options):
     root, ext = os.path.splitext(os.path.basename(options['pdb_file']))
     print("Working on file {}".format(options['pdb_file']))
     header = parse_pdb_header(options['pdb_file'])
     sp = SequenceParser(options['pdb_file'])
     print(sp.get_fusions())
     print(sp.get_mutations())
     print(sp.get_deletions())
     json_data = {}
     json_data["header"] = header
     json_data.update(sp.get_fusions())
     json_data.update(sp.get_mutations())
     json_data.update(sp.get_deletions())
     json.dump(
         json_data,
         open(os.sep.join([settings.DATA_DIR, "{}_auto.json".format(root)]),
              'w'),
         indent=4,
         separators=(',', ': '))
from Bio.PDB.PDBParser import PDBParser
parser = PDBParser(PERMISSIVE=1)
structure_id = "2b10"
filename = "/home/koreanraichu/2b10.pdb"
structure = parser.get_structure(structure_id, filename)
print(structure)
# 이거 PC에 있는 파일 가져오는건가?
# FileNotFoundError: [Errno 2] No such file or directory: 'pdb1fat.ent'

from Bio.PDB import parse_pdb_header
with open(filename, "r") as handle:
    header_dict = parse_pdb_header(handle)
    print(header_dict)
示例#12
0
    def main_func(self, positions, iteration):
        # filenames
        if not positions[1]:
            filenames = self.filenames[positions[0]:]
        else:
            filenames = self.filenames[positions[0]:positions[1]]

        for source_file in filenames:
            source_file_path = os.sep.join([self.structure_data_dir, source_file])
            if os.path.isfile(source_file_path) and source_file[0] != '.':
                self.logger.info('Reading file {}'.format(source_file_path))
                # read the yaml file
                with open(source_file_path, 'r') as f:
                    sd = yaml.load(f)
                    
                    # is this a representative structure (will be used to guide structure-based alignments)?
                    representative = False
                    if 'representative' in sd and sd['representative']:
                        representative = True

                    # only process representative structures on first iteration
                    if not representative and iteration == 1:
                        continue

                    # skip representative structures on second iteration
                    if representative and iteration == 2:
                        continue

                    # is there a construct?
                    if 'construct' not in sd:
                        self.logger.error('No construct specified, skipping!')
                        continue

                    # does the construct exists?
                    try:
                        con = Protein.objects.get(entry_name=sd['construct'])
                    except Protein.DoesNotExist:
                        self.logger.error('Construct {} does not exists, skipping!'.format(sd['construct']))
                        continue

                    # create a structure record
                    try:
                        s = Structure.objects.get(protein_conformation__protein=con)
                    except Structure.DoesNotExist:
                        s = Structure()
                        s.representative = representative

                    # protein state
                    if 'state' not in sd:
                        self.logger.warning('State not defined, using default state {}'.format(
                            settings.DEFAULT_PROTEIN_STATE))
                        state = settings.DEFAULT_STATE.title()
                    else:
                        state = sd['state']
                    state_slug = slugify(state)
                    try:
                        ps, created = ProteinState.objects.get_or_create(slug=state_slug, defaults={'name': state})
                        if created:
                            self.logger.info('Created protein state {}'.format(ps.name))
                    except IntegrityError:
                        ps = ProteinState.objects.get(slug=state_slug)
                    s.state = ps

                    # protein conformation
                    try:
                        s.protein_conformation = ProteinConformation.objects.get(protein=con)
                    except ProteinConformation.DoesNotExist:
                        self.logger.error('Protein conformation for construct {} does not exists'.format(con))
                        continue
                    if s.protein_conformation.state is not state:
                        ProteinConformation.objects.filter(protein=con).update(state=ps)

                    # get the PDB file and save to DB
                    sd['pdb'] = sd['pdb'].upper()
                    if not os.path.exists(self.pdb_data_dir):
                        os.makedirs(self.pdb_data_dir)
                    
                    pdb_path = os.sep.join([self.pdb_data_dir, sd['pdb'] + '.pdb'])
                    if not os.path.isfile(pdb_path):
                        self.logger.info('Fetching PDB file {}'.format(sd['pdb']))
                        url = 'http://www.rcsb.org/pdb/files/%s.pdb' % sd['pdb']
                        pdbdata_raw = urlopen(url).read().decode('utf-8')
                        with open(pdb_path, 'w') as f:
                            f.write(pdbdata_raw)
                    else:
                        with open(pdb_path, 'r') as pdb_file:
                            pdbdata_raw = pdb_file.read()
                    
                    pdbdata, created = PdbData.objects.get_or_create(pdb=pdbdata_raw)
                    s.pdb_data = pdbdata

                    # UPDATE HETSYN with its PDB reference instead + GRAB PUB DATE, PMID, DOI AND RESOLUTION
                    hetsyn = {}
                    hetsyn_reverse = {}
                    for line in pdbdata_raw.splitlines():
                        if line.startswith('HETSYN'): 
                            m = re.match("HETSYN[\s]+([\w]{3})[\s]+(.+)",line) ### need to fix bad PDB formatting where col4 and col5 are put together for some reason -- usually seen when the id is +1000
                            if (m):
                                hetsyn[m.group(2).strip()] = m.group(1).upper()
                                hetsyn_reverse[m.group(1)] = m.group(2).strip().upper()
                        if line.startswith('HETNAM'): 
                            m = re.match("HETNAM[\s]+([\w]{3})[\s]+(.+)",line) ### need to fix bad PDB formatting where col4 and col5 are put together for some reason -- usually seen when the id is +1000
                            if (m):
                                hetsyn[m.group(2).strip()] = m.group(1).upper()
                                hetsyn_reverse[m.group(1)] = m.group(2).strip().upper()
                        if line.startswith('REVDAT   1'):
                            sd['publication_date'] = line[13:22]
                        if line.startswith('JRNL        PMID'):
                            sd['pubmed_id'] = line[19:].strip()
                        if line.startswith('JRNL        DOI'):
                            sd['doi_id'] = line[19:].strip()

                    if len(hetsyn) == 0:
                        self.logger.info("PDB file contained NO hetsyn")

                    with open(pdb_path,'r') as header:
                        header_dict = parse_pdb_header(header)
                    sd['publication_date'] = header_dict['release_date']
                    sd['resolution'] = str(header_dict['resolution']).strip()
                    sd['structure_method'] = header_dict['structure_method']

                    # structure type
                    if 'structure_method' in sd and sd['structure_method']:
                        structure_type = sd['structure_method'].capitalize()
                        structure_type_slug = slugify(sd['structure_method'])
                        
                        try:
                            st, created = StructureType.objects.get_or_create(slug=structure_type_slug,
                                defaults={'name': structure_type})
                            if created:
                                self.logger.info('Created structure type {}'.format(st))
                        except IntegrityError:
                            st = StructureType.objects.get(slug=structure_type_slug)
                        s.structure_type = st
                    else:
                        self.logger.warning('No structure type specified in PDB file {}'.format(sd['pdb']))

                    matched = 0
                    if 'ligand' in sd and sd['ligand']:
                        if isinstance(sd['ligand'], list):
                            ligands = sd['ligand']
                        else:
                            ligands = [sd['ligand']]
                        for ligand in ligands:
                            if 'name' in ligand:
                                if ligand['name'].upper() in hetsyn:
                                    self.logger.info('Ligand {} matched to PDB records'.format(ligand['name']))
                                    matched = 1
                                    ligand['name'] = hetsyn[ligand['name'].upper()]
                                elif ligand['name'].upper() in hetsyn_reverse:
                                    matched = 1

                    if matched==0 and len(hetsyn)>0:
                        self.logger.info('No ligand names found in HET in structure {}'.format(sd['pdb']))

                    # REMOVE? can be used to dump structure files with updated ligands
                    # yaml.dump(sd, open(source_file_path, 'w'), indent=4)

                    # pdb code
                    if 'pdb' in sd:
                        try:
                            web_resource = WebResource.objects.get(slug='pdb')
                        except:
                            # abort if pdb resource is not found
                            raise Exception('PDB resource not found, aborting!')
                        s.pdb_code, created = WebLink.objects.get_or_create(index=sd['pdb'],
                            web_resource=web_resource)
                    else:
                        self.logger.error('PDB code not specified for structure {}, skipping!'.format(sd['pdb']))
                        continue

                    # insert into plain text fields
                    if 'preferred_chain' in sd:
                        s.preferred_chain = sd['preferred_chain']
                    else:
                        self.logger.warning('Preferred chain not specified for structure {}'.format(sd['pdb']))
                    if 'resolution' in sd:
                        s.resolution = float(sd['resolution'])
                    else:
                        self.logger.warning('Resolution not specified for structure {}'.format(sd['pdb']))
                    if 'publication_date' in sd:
                        s.publication_date = sd['publication_date']
                    else:
                        self.logger.warning('Publication date not specified for structure {}'.format(sd['pdb']))

                    # publication
                    try:                     
                        if 'doi_id' in sd:
                            try:
                                s.publication = Publication.objects.get(web_link__index=sd['doi_id'])
                            except Publication.DoesNotExist as e:
                                p = Publication()
                                try:
                                    p.web_link = WebLink.objects.get(index=sd['doi_id'], web_resource__slug='doi')
                                except WebLink.DoesNotExist:
                                    wl = WebLink.objects.create(index=sd['doi_id'],
                                        web_resource = WebResource.objects.get(slug='doi'))
                                    p.web_link = wl
                                p.update_from_doi(doi=sd['doi_id'])
                                p.save()
                                s.publication = p
                        elif 'pubmed_id' in sd:
                            try:
                                s.publication = Publication.objects.get(web_link__index=sd['pubmed_id'])
                            except Publication.DoesNotExist as e:
                                p = Publication()
                                try:
                                    p.web_link = WebLink.objects.get(index=sd['pubmed_id'],
                                        web_resource__slug='pubmed')
                                except WebLink.DoesNotExist:
                                    wl = WebLink.objects.create(index=sd['pubmed_id'],
                                        web_resource = WebResource.objects.get(slug='pubmed'))
                                    p.web_link = wl
                                p.update_from_pubmed_data(index=sd['pubmed_id'])
                                p.save()
                                s.publication = p
                    except:
                        self.logger.error('Error saving publication'.format(ps.name))

                    # save structure before adding M2M relations
                    s.save()

                    #Delete previous interaction data to prevent errors.
                    ResidueFragmentInteraction.objects.filter(structure_ligand_pair__structure=s).delete()
                    StructureLigandInteraction.objects.filter(structure=s).delete()
                    #Remove previous Rotamers/Residues to prepare repopulate
                    Fragment.objects.filter(structure=s).delete()
                    Rotamer.objects.filter(structure=s).all().delete()
                    Residue.objects.filter(protein_conformation=s.protein_conformation).all().delete()

                    # endogenous ligand(s)
                    default_ligand_type = 'Small molecule'
                    if representative and 'endogenous_ligand' in sd and sd['endogenous_ligand']:
                        if isinstance(sd['endogenous_ligand'], list):
                            endogenous_ligands = sd['endogenous_ligand']
                        else:
                            endogenous_ligands = [sd['endogenous_ligand']]
                        for endogenous_ligand in endogenous_ligands:
                            if endogenous_ligand['type']:
                                lt, created = LigandType.objects.get_or_create(slug=slugify(endogenous_ligand['type']),
                                    defaults={'name': endogenous_ligand['type']})
                            else:
                                lt, created = LigandType.objects.get_or_create(slug=slugify(default_ligand_type),
                                    defaults={'name': default_ligand_type})
                            ligand = Ligand()

                            if 'iupharId' not in endogenous_ligand:
                                endogenous_ligand['iupharId'] = 0

                            ligand = ligand.load_by_gtop_id(endogenous_ligand['name'], endogenous_ligand['iupharId'],
                                lt)
                            try:
                                s.protein_conformation.protein.parent.endogenous_ligands.add(ligand)
                            except IntegrityError:
                                self.logger.info('Endogenous ligand for protein {}, already added. Skipping.'.format(
                                    s.protein_conformation.protein.parent))

                    # ligands
                    if 'ligand' in sd and sd['ligand']:
                        if isinstance(sd['ligand'], list):
                            ligands = sd['ligand']
                        else:
                            ligands = [sd['ligand']]
                        for ligand in ligands:
                            l = False
                            peptide_chain = ""
                            if 'chain' in ligand:
                                peptide_chain = ligand['chain']
                                ligand['name'] = 'pep'
                            if ligand['name'] and ligand['name'] != 'None': # some inserted as none.

                                # use annoted ligand type or default type
                                if ligand['type']:
                                    lt, created = LigandType.objects.get_or_create(slug=slugify(ligand['type']),
                                        defaults={'name': ligand['type']})
                                else:
                                    lt, created = LigandType.objects.get_or_create(
                                        slug=slugify(default_ligand_type), defaults={'name': default_ligand_type})

                                # set pdb reference for structure-ligand interaction
                                pdb_reference = ligand['name']

                                # use pubchem_id
                                if 'pubchemId' in ligand and ligand['pubchemId'] and ligand['pubchemId'] != 'None':
                                    # create ligand
                                    l = Ligand()


                                    # update ligand by pubchem id
                                    ligand_title = False
                                    if 'title' in ligand and ligand['title']:
                                        ligand_title = ligand['title']
                                    l = l.load_from_pubchem('cid', ligand['pubchemId'], lt, ligand_title)


                                # if no pubchem id is specified, use name
                                else:
                                    # use ligand title, if specified
                                    if 'title' in ligand and ligand['title']:
                                        ligand['name'] = ligand['title']

                                    # create empty properties
                                    lp = LigandProperities.objects.create()
                                    
                                    # create the ligand
                                    try:
                                        l, created = Ligand.objects.get_or_create(name=ligand['name'], canonical=True,
                                            defaults={'properities': lp, 'ambigious_alias': False})
                                        if created:
                                            self.logger.info('Created ligand {}'.format(ligand['name']))
                                        else:
                                            pass
                                    except IntegrityError:
                                        l = Ligand.objects.get(name=ligand['name'], canonical=True)

                                    # save ligand
                                    l.save()
                            else:
                                continue

                            # structure-ligand interaction
                            if l and ligand['role']:
                                role_slug = slugify(ligand['role'])
                                try:
                                    lr, created = LigandRole.objects.get_or_create(slug=role_slug,
                                    defaults={'name': ligand['role']})
                                    if created:
                                        self.logger.info('Created ligand role {}'.format(ligand['role']))
                                except IntegrityError:
                                    lr = LigandRole.objects.get(slug=role_slug)

                                i, created = StructureLigandInteraction.objects.get_or_create(structure=s,
                                    ligand=l, ligand_role=lr, annotated=True,
                                    defaults={'pdb_reference': pdb_reference})
                                if i.pdb_reference != pdb_reference:
                                    i.pdb_reference = pdb_reference
                                    i.save()


                    
                    # structure segments
                    if 'segments' in sd and sd['segments']:
                        for segment, positions in sd['segments'].items():
                            # fetch (create if needed) sequence segment
                            try:
                                protein_segment = ProteinSegment.objects.get(slug=segment)
                            except ProteinSegment.DoesNotExist:
                                self.logger.error('Segment {} not found'.format(segment))
                                continue

                            struct_seg, created = StructureSegment.objects.update_or_create(structure=s,
                                protein_segment=protein_segment, defaults={'start': positions[0], 'end': positions[1]})
                    # all representive structures should have defined segments
                    elif representative:
                        self.logger.warning('Segments not defined for representative structure {}'.format(sd['pdb']))

                    # structure segments for modeling
                    if 'segments_in_structure' in sd and sd['segments_in_structure']:
                        for segment, positions in sd['segments_in_structure'].items():
                            # fetch (create if needed) sequence segment
                            try:
                                protein_segment = ProteinSegment.objects.get(slug=segment)
                            except ProteinSegment.DoesNotExist:
                                self.logger.error('Segment {} not found'.format(segment))
                                continue

                            struct_seg_mod, created = StructureSegmentModeling.objects.update_or_create(structure=s,
                                protein_segment=protein_segment, defaults={'start': positions[0], 'end': positions[1]})

                    # structure coordinates
                    if 'coordinates' in sd and sd['coordinates']:
                        for segment, coordinates in sd['coordinates'].items():
                            # fetch (create if needed) sequence segment
                            try:
                                protein_segment = ProteinSegment.objects.get(slug=segment)
                            except ProteinSegment.DoesNotExist:
                                self.logger.error('Segment {} not found'.format(segment))
                                continue

                            # fetch (create if needed) coordinates description
                            try:
                                description, created = StructureCoordinatesDescription.objects.get_or_create(
                                    text=coordinates)
                                if created:
                                    self.logger.info('Created structure coordinate description {}'.format(coordinates))
                            except IntegrityError:
                                description = StructureCoordinatesDescription.objects.get(text=coordinates)

                            sc = StructureCoordinates()
                            sc.structure = s
                            sc.protein_segment = protein_segment
                            sc.description = description
                            sc.save()

                    # structure engineering
                    if 'engineering' in sd and sd['engineering']:
                        for segment, engineering in sd['engineering'].items():
                            # fetch (create if needed) sequence segment
                            try:
                                protein_segment = ProteinSegment.objects.get(slug=segment)
                            except ProteinSegment.DoesNotExist:
                                self.logger.error('Segment {} not found'.format(segment))
                                continue

                            # fetch (create if needed) engineering description
                            try:
                                description, created = StructureEngineeringDescription.objects.get_or_create(
                                    text=engineering)
                                if created:
                                    self.logger.info('Created structure coordinate description {}'.format(engineering))
                            except IntegrityError:
                                description = StructureEngineeringDescription.objects.get(text=engineering)

                            se = StructureEngineering()
                            se.structure = s
                            se.protein_segment = protein_segment
                            se.description = description
                            se.save()

                    # protein anomalies
                    scheme = s.protein_conformation.protein.residue_numbering_scheme
                    if 'bulges' in sd and sd['bulges']:
                        pa_slug = 'bulge'
                        try:
                            pab, created = ProteinAnomalyType.objects.get_or_create(slug=pa_slug, defaults={
                                'name': 'Bulge'})
                            if created:
                                self.logger.info('Created protein anomaly type {}'.format(pab))
                        except IntegrityError:
                            pab = ProteinAnomalyType.objects.get(slug=pa_slug)
                        
                        for segment, bulges in sd['bulges'].items():
                            for bulge in bulges:
                                try:
                                    gn, created = ResidueGenericNumber.objects.get_or_create(label=bulge,
                                        scheme=scheme, defaults={'protein_segment': ProteinSegment.objects.get(
                                        slug=segment)})
                                    if created:
                                        self.logger.info('Created generic number {}'.format(gn))
                                except IntegrityError:
                                    gn =  ResidueGenericNumber.objects.get(label=bulge, scheme=scheme)

                                try:
                                    pa, created = ProteinAnomaly.objects.get_or_create(anomaly_type=pab,
                                        generic_number=gn)
                                    if created:
                                        self.logger.info('Created protein anomaly {}'.format(pa))
                                except IntegrityError:
                                    pa, created = ProteinAnomaly.objects.get(anomaly_type=pab, generic_number=gn)

                                s.protein_anomalies.add(pa)
                    if 'constrictions' in sd and sd['constrictions']:
                        pa_slug = 'constriction'
                        try:
                            pac, created = ProteinAnomalyType.objects.get_or_create(slug=pa_slug, defaults={
                                'name': 'Constriction'})
                            if created:
                                self.logger.info('Created protein anomaly type {}'.format(pac))
                        except IntegrityError:
                            pac = ProteinAnomalyType.objects.get(slug=pa_slug)
                        
                        for segment, constrictions in sd['constrictions'].items():
                            for constriction in constrictions:
                                try:
                                    gn, created = ResidueGenericNumber.objects.get_or_create(label=constriction,
                                        scheme=scheme, defaults={'protein_segment': ProteinSegment.objects.get(
                                        slug=segment)})
                                    if created:
                                        self.logger.info('Created generic number {}'.format(gn))
                                except IntegrityError:
                                    gn =  ResidueGenericNumber.objects.get(label=constriction, scheme=scheme)

                                try:
                                    pa, created = ProteinAnomaly.objects.get_or_create(anomaly_type=pac,
                                        generic_number=gn)
                                    if created:
                                        self.logger.info('Created protein anomaly {}'.format(pa))
                                except IntegrityError:
                                    pa, created = ProteinAnomaly.objects.get(anomaly_type=pac, generic_number=gn)

                                s.protein_anomalies.add(pa)
                    
                    # stabilizing agents, FIXME - redesign this!
                    # fusion proteins moved to constructs, use this for G-proteins and other agents?
                    aux_proteins = []
                    if 'signaling_protein' in sd and sd['signaling_protein'] and sd['signaling_protein'] != 'None':
                        aux_proteins.append('signaling_protein')
                    if 'auxiliary_protein' in sd and sd['auxiliary_protein'] and sd['auxiliary_protein'] != 'None':
                        aux_proteins.append('auxiliary_protein')
                    for index in aux_proteins:
                        if isinstance(sd[index], list):
                            aps = sd[index]
                        else:
                            aps = [sd[index]]
                        for aux_protein in aps:
                            aux_protein_slug = slugify(aux_protein)[:50]
                            try:
                                sa, created = StructureStabilizingAgent.objects.get_or_create(
                                    slug=aux_protein_slug, defaults={'name': aux_protein})
                            except IntegrityError:
                                sa = StructureStabilizingAgent.objects.get(slug=aux_protein_slug)
                            s.stabilizing_agents.add(sa)

                    # save structure
                    s.save()

                    self.logger.info('Calculate rotamers / residues')
                    self.create_rotamers(s,pdb_path)

                    self.logger.info('Calculate interactions') #Should not error anymore. If it does, fix.
                    runcalculation(sd['pdb'],peptide_chain)
                    parsecalculation(sd['pdb'],False)
示例#13
0
from Bio.PDB import *
from Bio.PDB import parse_pdb_header
from numpy import loadtxt
import numpy as np
from Bio.PDB import PDBList
from Bio.PDB.Entity import Entity
from Bio.PDB.Residue import Residue

parser = PDBParser()
#taking a pdb file of choice from the user
# the strip() is to strip spaces in the input of the user so there is less error
# the lower() is to take all the input of the user in small letter to reduce the error rate as most pdb files are saved in lower case letters
pdb_file = input("Select a pdb file of your choice?").strip().lower()
# decided to use with open because i wanted to be able to use dictionary and also run a for loop if i wanted
with open(pdb_file, "r") as file:
    dict_file = parse_pdb_header(file)
    structure = parser.get_structure(file, file)
    for key in dict_file:
        if key == "idcode":
            id = dict_file[key]
            #print(id)
#calling a class in biopython
ppb = PPBuilder()
# using the functions of the class to get the sequence of the protein (pdb file)
for pp in ppb.build_peptides(structure):
    seq1 = pp.get_sequence()
    print("The sequence of the structure is :  " + seq1)

# storing the model of the structure of the first pdb file
model = structure[0]
# asking the user for the second file
示例#14
0
                    'query_length'] == info['subject_length']:
                pdb_id = r['identifier'].split('_')[0]
                pdb_full = r['identifier']

            # if match, download pdb file
            if pdb_id and pdb_full:
                page = 'http://files.rcsb.org/view/{}.pdb'.format(pdb_id)
                req = requests.get(page)
                if req.status_code == 200:
                    response = req.text
                    outfile = 'tmp.pdb'
                    if outfile:
                        with open(outfile, 'w') as f:
                            f.write(response)
                        # parse to get the resolution
                        structure = parse_pdb_header(outfile)
                        pdb_reso.append((pdb_full, structure['resolution']))

        # append to dataset file
        if pdb_reso:
            # find the pdb with best resolution
            tmp_dict = {r: p for p, r in pdb_reso}
            best_pdb_id = tmp_dict[max(tmp_dict.keys())]
            # write to file
            with open('./all_targets.csv', 'a') as f:
                f.write("{}, {}, {}, {}\n".format(best_pdb_id, pdb_reso,
                                                  records_all[i].description,
                                                  seq))
                print("{} - {}".format(i, pdb_reso))
    except:
        pass