def selectPhageHostPairs(self,**kw): """Pick pairs of phages and hosts from p-h DB stratified at a genus level""" opt = self.opt giToTaxa = loadGiTaxBin() taxaTree = self.getTaxaTree() #taxaTree.getRootNode().setIsUnderUnclass() mapFastaRecordsToTaxaTree(inSeqs=opt.dbSeqInp,taxaTree=taxaTree,giToTaxa=giToTaxa) self.loadPhageHostDb() seqPicker = PhageHostSeqPicker(taxaTree=taxaTree) seqPicker.pickSeqHosts() seqPicker.groupSeqHosts() seqPicker.printGroupSeqHosts() seqPicker.pickPairs(maxMicSpe=1,maxMicSeq=1,maxVir=1,maxVirSeq=1) seqPicker.checkPairs(giToTaxa=giToTaxa) dbPhPairsFile = self.store.getObjPath(opt.dbPhPairs) seqPicker.save(dbPhPairsFile) seqPicker.load(dbPhPairsFile) seqPicker.checkPairs(giToTaxa=giToTaxa) seqPicker.saveSeqIds(self.store.getObjPath(opt.dbPhPairsSeqIds)) # import all selected FASTA sequences into internal format siOpt = Struct() siOpt.runMode = "inproc" siOpt.inSeq = opt.dbSeqInp siOpt.outFeat = opt.dbPhPairsSeq siOpt.inSeqIds = self.store.getObjPath(opt.dbPhPairsSeqIds) siOpt.inFormat = "ncbi" seqImpApp = SeqImportApp(opt=siOpt) seqImpApp.run() seqPicker = PhageHostSeqPicker(taxaTree=taxaTree) dbPhPairsFile = self.store.getObjPath(opt.dbPhPairs) seqPicker.load(dbPhPairsFile) for div in ("mic","vir","all"): self.store.saveIdLabs(seqPicker.makeIdLabsPicks(div=div),name="idlab."+div) self.exportVirSamples()
def run_SeqImportApp(): opt = makeOptSeqImportApp() app = SeqImportApp(opt=opt) jobs = app.run()