def offtarget(organism, offtarget_databases, offtarget_names, tmp_dir=None): if not tmp_dir: tmp_dir = "/data/organismos/" + organism + "/annotation/" mkdir(tmp_dir) proteins = tmp_dir + "proteins.fasta" if not os.path.exists(proteins): BioMongoDB.protein_fasta(proteins, organism) results = Offtarget.offtargets(proteins, tmp_dir, offtarget_databases) for i, name in enumerate(offtarget_names): load_blast_features(organism, results[i], name, min_identity=0.4, min_query_coverage=0.4, min_hit_coverage=0.4)
("MpyloriIndia", "Helicobacter pylori India7 (e-proteobacteria)", "/data/organismos/MpyloriIndia/GCF_000185185.1_ASM18518v1_genomic.gbff", 907238), ] for name, org, ann_path, tax in orgs: organism = name mkdir("/data/organismos/" + name + "/annotation/offtarget") mkdir("/data/organismos/" + name + "/annotation/pwtools") mkdir("/data/organismos/" + name + "/annotation/pathways") mkdir("/data/organismos/" + name + "/estructura/raw") mkdir("/data/organismos/" + name + "/estructura/sndg/modelos") mkdir("/data/organismos/" + name + "/estructura/sndg/pockets") from_ref_seq(name, ann_path, tax=tax, cpus=3) mdb.protein_fasta("/data/organismos/" + name + "/annotation/proteins.faa", name) update_proteins("/tmp/" + name + "/", "/data/organismos/" + name + "/annotation/proteins.faa", name, 1003200, db_init=mysqldb) Offtargeting.offtargets( "/data/organismos/" + name + "/annotation/proteins.faa", "/data/organismos/" + name + "/annotation/offtarget/", offtarget_dbs=[ "/data/databases/deg/degaa-p.dat", "/data/databases/human/gencode.v17.pc_translations.fa", "/data/databases/human/gut_microbiota.fasta" ]) import_prop_blast(mdb.db,
"options": ["No", "Yes"], "description": "Has a hit in Database of Essential Genes" }) } from SNDG.Sequence import read_blast_table from tqdm import tqdm # cols = list(SeqCollection.objects(name__nin=["cruzi","pdb"])) cols = list(SeqCollection.objects(name__nin=["cruzi", "pdb"])) cpus = 4 db = mdb.db for seqCol in tqdm(cols): mkdir("/data/organismos/" + seqCol.name + "/contigs") proteome = "/data/organismos/" + seqCol.name + "/contigs/genoma.fasta" if not os.path.exists(proteome): mdb.protein_fasta(proteome, seqCol.name) out = "/data/organismos/" + seqCol.name + "/annotation/offtarget/" mkdir(out) if not seqCol.has_druggability_param("human_offtarget"): seqCol.druggabilityParams.append(off_props["human_offtarget"]) db = "/data/databases/human/gencode.v17.pc_translations.fa" execute( "blastp -evalue 1e-5 -max_hsps 1 -outfmt 6 -max_target_seqs 1 -db {db} -query {query} -out {out} -num_threads {cpus}", db=db, query=proteome, out=out + "human_offtarget.tbl", cpus=cpus)