def handle(self, *args, **options): if options["verbose"] == 1: import logging logging.basicConfig(level=logging.DEBUG) pdbs = PDBs(options["pdbs_dir"]) pdbs.url_pdb_entries = options["entries_url"] if not os.path.exists(options["entries_path"]): pdbs.download_pdb_entries() pdbio = PDBIO(options['pdbs_dir'] + "/", options['entries_path'], options['tmp']) pdbio.init() try: pdbs.update_pdb(options['code']) pdbio.process_pdb(options['code'], force=options['force'], pocket_path=pdbs.pdb_pockets_path( options['code']), pdb_path=pdbs.pdb_path(options['code'])) except IOError as ex: traceback.print_exc() self.stderr.write("error processing pockets from %s: %s" % (options['code'], str(ex))) except Exception as ex: traceback.print_exc() raise CommandError(ex)
def load_pdb_pocket(self, pdb, pdb_dir="/data/databases/pdb/"): utils = PDBs(pdb_dir) if not os.path.exists(utils.pdb_pockets_path(pdb)): utils.update_pdb(pdb) fpocket = FPocket(utils.pdb_path(pdb)) result = fpocket.hunt_pockets() mkdir(os.path.dirname(utils.pdb_pockets_path(pdb))) result.save(utils.pdb_pockets_path(pdb)) with open(utils.pdb_pockets_path(pdb)) as h: result = json.load(h) self.pdb_data[pdb]["pockets"] = result return self.pdb_data[pdb]["pockets"]
def handle(self, *args, **options): pdbs_utils = PDBs(pdb_dir=options['pdbs_dir']) pdbs_utils.url_pdb_entries = options["entries_url"] if not options['entries_path']: options['entries_path'] = options['pdbs_dir'] + "/entries.idx" if (datetime.now() - datetime.fromtimestamp( os.path.getctime(options["entries_path"]))).days > 7: pdbs_utils.download_pdb_entries() pdb2sql = PDB2SQL(options['pdbs_dir'], options['entries_path']) pdb2sql.load_entries() if options["only_annotated"]: self.stderr.write("only_annotated option activated by default") from bioseq.models.Dbxref import Dbxref pdbs = [(x.accession.lower(), pdbs_utils.pdb_path(x.accession.lower())) for x in Dbxref.objects.filter(dbname="PDB")] else: pdbs = list(tqdm(iterpdbs(options['pdbs_dir']))) # 4zux 42 mer 2lo7("5my5","/data/databases/pdb/divided/my/pdb5my5.ent") # ("4zu4", "/data/databases/pdb/divided/zu/pdb4zu4.ent") with tqdm(pdbs) as pbar: for code, pdb_path in pbar: code = code.lower() if PDBsWS.is_obsolete(code): self.stderr.write(f"{code} entry is obsolete") continue try: pdb_path = pdbs_utils.update_pdb(code) except KeyboardInterrupt: raise except: self.stderr.write("PDB %s could not be downloaded" % code) continue if PDB.objects.filter(code=code).exists(): self.stderr.write("PDB %s already exists" % code) continue pbar.set_description(code) try: pdb2sql.create_pdb_entry(code, pdb_path) pdb2sql.update_entry_data(code, pdb_path) except KeyboardInterrupt: raise except Exception as ex: import traceback traceback.print_exc() raise CommandError(ex)
class StructureVariant: def __init__(self, pdb_dir="/data/databases/pdb/"): self.utils = PDBs(pdb_dir) self.seqs_path = "/tmp/seq.faa" self.aln_path = "/tmp/msa.faa" self.ref_seq = None self.pdbfile = None self.pdb_data = defaultdict(dict) def load_msa(self, input_sequence, pdb_code, pdb_chain=None): pdb_code = pdb_code.lower() self.utils.update_pdb(pdb_code) self.ref_seq = bpio.read(input_sequence, "fasta") self.pdbfile = PDBFile(pdb_code, self.utils.pdb_path(pdb_code)) with open(self.seqs_path, "w") as h: bpio.write(self.ref_seq, h, "fasta") bpio.write(self.pdbfile.seq(selected_chain=pdb_chain), h, "fasta") cmd = docker_wrap_command( f'mafft --quiet --localpair --maxiterate 1000 {self.seqs_path} > {self.aln_path} ' ) execute(cmd) self.msa = MSAMap.from_msa(self.aln_path) self.res_map = self.pdbfile.residues_map(pdb_chain) def residues_from_pos(self, pos): pos_data = [] for sample in self.msa.samples(): if sample != self.ref_seq.id: pdb, chain = sample.split("_")[:2] if self.msa.exists_pos(self.ref_seq.id, pos, sample): msa_pos = self.msa.pos_seq_msa_map[self.ref_seq.id][pos] sample_pos = self.msa.pos_from_seq(self.ref_seq.id, pos, sample) line = { "pos": pos + 1, "ref": self.msa.seqs[self.ref_seq.id][msa_pos], "alt": self.msa.seqs[sample][msa_pos], "pdb": pdb, "chain": chain, "resid": str(self.res_map[chain][sample_pos][1]), "icode": str(self.res_map[chain][sample_pos][2]), "pdb_pos": sample_pos } pos_data.append(line) return pos_data def residues_from_aln_seq(self, input_sequence, pdb_code, pdb_chain=None): self.load_msa(input_sequence, pdb_code, pdb_chain) variants = [ (k, v) for k, v in sorted(self.msa.variants(self.ref_seq.id).items(), key=lambda x: int(x[0].split("_")[1])) ] output = [] for ref_pos, alt_samples in variants: ref, pos = ref_pos.split("_") pos = int(pos) for alt, samples in alt_samples.items(): if alt != self.msa.gap_code: pos_data = self.residues_from_pos(pos) output += pos_data return pd.DataFrame(output) def annotate_resid(self, pdb: str, resid: str, structure_annotator: StructureAnnotator): pdb = pdb.lower() data = {} if pdb not in self.pdb_data: self.load_pdb_ann(pdb, structure_annotator) if str(resid) in self.pdb_data[pdb]["binding"]: data["binding"] = self.pdb_data[pdb]["binding"][str(resid)] if str(resid) in self.pdb_data[pdb]["pockets"]: data["pockets"] = self.pdb_data[pdb]["pockets"][str(resid)] return data def load_pdb_ann(self, pdb, structure_annotator: StructureAnnotator): binding_data = structure_annotator.load_pdb_binding_data(pdb) binding_dict = defaultdict(list) for site in binding_data: for site_res in site["site_residues"]: res = str(site_res["residue_number"]) + (site_res.get( "author_insertion_code", "") or "") binding_dict[res].append({ "site_id": site["site_id"], "details": site["details"], "ligands": [{ c: x[c] for c in ["chain_id", "author_residue_number", "chem_comp_id"] } for x in site["site_residues"] if x["chem_comp_id"] in binding_dict and ( x["chem_comp_id"] != "HOH")] }) self.pdb_data[pdb]["binding"] = binding_dict pockets_data = structure_annotator.load_pdb_pocket( pdb, self.utils.pdb_dir) pockets_dict = defaultdict(list) for pocket in pockets_data: for residue in set(pocket["residues"]): pockets_dict[residue].append({ "pocket_num": pocket["number"], "druggabilitty": pocket["properties"]['Druggability Score'] }) self.pdb_data[pdb]["pockets"] = dict(pockets_dict) def annotate_residue_list(self, df, structure_annotator: StructureAnnotator): """ :param df: columns=["pdb", "chain", "resid", "alt", "ref", "pos"] or generated by residues_from_aln_seq :return: """ output = {} for i, r in df.iterrows(): output[ f'{r.pdb}_{r.chain}_{r.resid}_{r.alt}'] = self.annotate_resid( r.pdb, str(r.resid), structure_annotator) return output