def analyze_construct(self, filenames=None): self.logger.info("ANALYZING CONSTRUCT STRUCTURES") # read source files if not filenames: filenames = os.listdir(self.construct_data_dir) for filename in filenames: if filename[-3:]!='pdb' and filename[-3:]!='ent': continue root, ext = os.path.splitext(os.path.basename(filename)) print(filename) print(root) filepath = os.sep.join([self.construct_data_dir, filename]) self.logger.info("Working on a file: {}".format(filename)) header = parse_pdb_header(filepath) parser = SequenceParser(filepath) json_data = OrderedDict() json_data["header"] = header json_data.update(parser.get_fusions()) json_data.update(parser.get_mutations()) json_data.update(parser.get_deletions()) json.dump(json_data, open(os.sep.join([settings.DATA_DIR, "{}_auto.json".format(root)]), 'w'), indent=4, separators=(',', ': '))
def analyze_construct(self, filenames=None): self.logger.info("ANALYZING CONSTRUCT STRUCTURES") # read source files if not filenames: filenames = os.listdir(self.construct_data_dir) for filename in filenames: if filename[-3:] != 'pdb' and filename[-3:] != 'ent': continue root, ext = os.path.splitext(os.path.basename(filename)) print(filename) print(root) filepath = os.sep.join([self.construct_data_dir, filename]) self.logger.info("Working on a file: {}".format(filename)) header = parse_pdb_header(filepath) parser = SequenceParser(filepath) json_data = OrderedDict() json_data["header"] = header json_data.update(parser.get_fusions()) json_data.update(parser.get_mutations()) json_data.update(parser.get_deletions()) json.dump(json_data, open( os.sep.join( [settings.DATA_DIR, "{}_auto.json".format(root)]), 'w'), indent=4, separators=(',', ': '))
def post(self, request): # root, ext = os.path.splitext(request._request.FILES['pdb_file'].name) pdb_file = StringIO( request._request.FILES['pdb_file'].file.read().decode( 'UTF-8', "ignore")) header = parse_pdb_header(pdb_file) parser = SequenceParser(pdb_file) json_data = OrderedDict() json_data["header"] = header json_data.update(parser.get_fusions()) json_data.update(parser.get_mutations()) json_data.update(parser.get_deletions()) return Response(json_data)
def handle(self, *args, **options): print("Working on file {}".format(options['pdb_file'])) header = parse_pdb_header(options['pdb_file']) print(header['compound']) sp = SequenceParser(options['pdb_file']) c = list(sp.mapping.keys())[0] poly = sp.get_chain_peptides(c) for peptide in poly: print("Start: {} Stop: {} Len: {}".format(peptide[0].id[1], peptide[-1].id[1], len(peptide))) sp.map_to_wt_blast(c, peptide, None, int(peptide[0].id[1])) sp.map_seqres() sp.save_excel_report("test.xlsx") #sp.get_report()
def handle(self, *args, **options): q = QueryPDB() q.list_xtals(verbose=False) for record in q.new_structures: pdb_code = record[0] wt_id = Protein.objects.get(entry_name=record[1]).id if not os.path.exists( os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code) ])): self.download_pdb(pdb_code) self.parser = SequenceParser(os.sep.join( [self.pdb_data_dir, "{}.pdb".format(pdb_code)]), wt_protein_id=wt_id) header = parse_pdb_header( os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)])) self.create_yaml(pdb_code, record[1], header)
def handle(self, *args, **options): q = QueryPDB() q.list_xtals(verbose=False) for record in q.new_structures: pdb_code = record[0] wt_id = Protein.objects.get(entry_name=record[1]).id if not os.path.exists(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)])): self.download_pdb(pdb_code) self.parser = SequenceParser(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)]), wt_protein_id=wt_id) header = parse_pdb_header(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)])) self.create_yaml(pdb_code, record[1], header)
def handle(self, *args, **options): root, ext = os.path.splitext(os.path.basename(options['pdb_file'])) print("Working on file {}".format(options['pdb_file'])) header = parse_pdb_header(options['pdb_file']) sp = SequenceParser(options['pdb_file']) print(sp.get_fusions()) print(sp.get_mutations()) print(sp.get_deletions()) json_data = {} json_data["header"] = header json_data.update(sp.get_fusions()) json_data.update(sp.get_mutations()) json_data.update(sp.get_deletions()) json.dump(json_data, open(os.sep.join([settings.DATA_DIR, "{}_auto.json".format(root)]), 'w'), indent=4, separators=(',', ': ')) #json.dump(json_data, open("test.json", 'w'), indent=4, separators=(',', ': '))
def handle(self, *args, **options): root, ext = os.path.splitext(os.path.basename(options['pdb_file'])) print("Working on file {}".format(options['pdb_file'])) header = parse_pdb_header(options['pdb_file']) sp = SequenceParser(options['pdb_file']) print(sp.get_fusions()) print(sp.get_mutations()) print(sp.get_deletions()) json_data = {} json_data["header"] = header json_data.update(sp.get_fusions()) json_data.update(sp.get_mutations()) json_data.update(sp.get_deletions()) json.dump( json_data, open(os.sep.join([settings.DATA_DIR, "{}_auto.json".format(root)]), 'w'), indent=4, separators=(',', ': '))
class Command(BaseCommand): logger = logging.getLogger(__name__) # source file directory structure_data_dir = os.sep.join([settings.DATA_DIR, 'structure_data', 'structures']) structure_build_data_dir = os.sep.join([settings.DATA_DIR, 'structure_data']) pdb_data_dir = os.sep.join([settings.DATA_DIR, 'structure_data', 'pdbs']) def handle(self, *args, **options): q = QueryPDB() q.list_xtals(verbose=False) for record in q.new_structures: pdb_code = record[0] wt_id = Protein.objects.get(entry_name=record[1]).id if not os.path.exists(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)])): self.download_pdb(pdb_code) self.parser = SequenceParser(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)]), wt_protein_id=wt_id) header = parse_pdb_header(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)])) self.create_yaml(pdb_code, record[1], header) def download_pdb(self, pdb_code): url = "http://www.rcsb.org/pdb/files/{}.pdb".format(pdb_code) urllib.request.urlretrieve(url, os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)])) def create_yaml(self, pdb_code, prot_name, data): yaml_pdb_data = { 'pdb' : pdb_code, 'resolution' : data['resolution'], 'publication_date' : data['release_date'][:10], #PDB header contains full citation #'pubmed_id' : data['journal_reference'], } yaml_struct_annotations = { 'fusion_proteins' : { x : [y[0], y[1]] for x,y in enumerate(self.parser.fusions) } } yaml_other_data = { 'construct' : pdb_code.lower(), 'segments' : self.get_segments_data(prot_name), 'segments_in_structure' : self.parser.get_segments(), } out_fh = open('{}.yaml'.format(os.sep.join([self.structure_build_data_dir, 'structures', pdb_code])), 'w') out_fh.write('# PDB data\n\n') yaml.dump(yaml_pdb_data, out_fh, default_flow_style=False) out_fh.write('\n# Structure annotations\n\n') yaml.dump(yaml_struct_annotations, out_fh) out_fh.write('\n# Structure annotations\n\n') yaml.dump(yaml_other_data, out_fh) out_fh.close() yaml_construct = { 'name' : pdb_code.lower(), 'protein' : prot_name, } construct_fh = open('auto_{}.yaml'.format(os.sep.join([self.structure_build_data_dir, 'constructs', pdb_code])), 'w') yaml.dump(yaml_construct, construct_fh, indent=4) construct_fh.close() def get_segments_data(self, prot_entry_name): output = {} segments = ProteinSegment.objects.all() for segment in segments: resi = list(Residue.objects.filter(protein_segment = segment, protein_conformation__protein__entry_name = prot_entry_name).order_by('sequence_number')) try: if resi: output[segment.slug] = [resi[0].sequence_number, resi[-1].sequence_number] except Exception as e: output[segment.slug] = ['-,-'] return output
class Command(BaseCommand): logger = logging.getLogger(__name__) # source file directory structure_data_dir = os.sep.join( [settings.DATA_DIR, 'structure_data', 'structures']) structure_build_data_dir = os.sep.join( [settings.DATA_DIR, 'structure_data']) pdb_data_dir = os.sep.join([settings.DATA_DIR, 'structure_data', 'pdbs']) def handle(self, *args, **options): q = QueryPDB() q.list_xtals(verbose=False) for record in q.new_structures: pdb_code = record[0] wt_id = Protein.objects.get(entry_name=record[1]).id if not os.path.exists( os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code) ])): self.download_pdb(pdb_code) self.parser = SequenceParser(os.sep.join( [self.pdb_data_dir, "{}.pdb".format(pdb_code)]), wt_protein_id=wt_id) header = parse_pdb_header( os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)])) self.create_yaml(pdb_code, record[1], header) def download_pdb(self, pdb_code): url = "https://www.rcsb.org/pdb/files/{}.pdb".format(pdb_code) urllib.request.urlretrieve( url, os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)])) def create_yaml(self, pdb_code, prot_name, data): yaml_pdb_data = { 'pdb': pdb_code, 'resolution': data['resolution'], 'publication_date': data['release_date'][:10], #PDB header contains full citation #'pubmed_id' : data['journal_reference'], } yaml_struct_annotations = { 'fusion_proteins': {x: [y[0], y[1]] for x, y in enumerate(self.parser.fusions)} } yaml_other_data = { 'construct': pdb_code.lower(), 'segments': self.get_segments_data(prot_name), 'segments_in_structure': self.parser.get_segments(), } out_fh = open( '{}.yaml'.format( os.sep.join( [self.structure_build_data_dir, 'structures', pdb_code])), 'w') out_fh.write('# PDB data\n\n') yaml.dump(yaml_pdb_data, out_fh, default_flow_style=False) out_fh.write('\n# Structure annotations\n\n') yaml.dump(yaml_struct_annotations, out_fh) out_fh.write('\n# Structure annotations\n\n') yaml.dump(yaml_other_data, out_fh) out_fh.close() yaml_construct = { 'name': pdb_code.lower(), 'protein': prot_name, } construct_fh = open( 'auto_{}.yaml'.format( os.sep.join( [self.structure_build_data_dir, 'constructs', pdb_code])), 'w') yaml.dump(yaml_construct, construct_fh, indent=4) construct_fh.close() def get_segments_data(self, prot_entry_name): output = {} segments = ProteinSegment.objects.all() for segment in segments: resi = list( Residue.objects.filter( protein_segment=segment, protein_conformation__protein__entry_name=prot_entry_name). order_by('sequence_number')) try: if resi: output[segment.slug] = [ resi[0].sequence_number, resi[-1].sequence_number ] except Exception as e: output[segment.slug] = ['-,-'] return output