def __init__(self, edge_list_file, outdir, log_file): self.edge_list_file = edge_list_file self.cl_log = getLog(log_file, "Clustering") self.cl_log.info("CLUSTERING started ...") self.edge_list = self.getEdgeListFromFile() self.clustering_outTab = outdir + '/louv_clusering.tab' self.main()
def __init__(self, TH_path, input_fasta, outFasta, THthreads, log_file): self.TH_path, self.input_fasta = TH_path, input_fasta self.threads = THthreads self.outFasta = outFasta self.outTab = outFasta + '.tab' self.outFasta_all_monomersTH = outFasta + '_all_monomersTH.fasta' self.log_th = getLog(log_file, "TideHunter") self._run_TH() self._tab2fasta()
def __init__(self, out_dir, dir_clust, dir_canu,dir_reblast, opt_delete,log_file): self.out_dir=out_dir self.outdir_clust = dir_clust self.outdir_canu = dir_canu self.outdir_reblast=dir_reblast self.del_log = getLog(log_file, "DELETE") self.opt_delete = opt_delete self.del_dir() self.del_log.info("Exit.......\n Finished the work")
def __init__(self, TRF, consensus_name, outdir, log_name): self.outdir = outdir self.dir_trf = outdir + '/ReBlast/' self.run_TRF = TRF self.consensus_name = consensus_name self.file_num = self.dir_trf + '/TRF_seq_dr.fasta' self.filt_trf = self.dir_trf + '/seqFilt_trf.fasta' self.TRF_log = getLog(log_name, 'TRF') self.TRF_log.info("Module Run_TRF has started the job...") self.createdir() self.TRF() self.filt_tr()
def __init__(self,clustering_outTab,singleton_list,outdir,reads,THall,minAbundancy,log_file): self.minAbundancy = minAbundancy self.reads=reads self.singletonR=singleton_list self.clustering_outTab=clustering_outTab self.filtering_outTab=outdir+'/louv_clust_filtering.tab' self.clust_abund=outdir+'/clust_abund.tab' self.filt_log = getLog(log_file, "Filtering") self.filt_log.info("Filtering and preparing file with monomer sequences has started...") self.list_Rep=self.createListRep() self.THall_monomers=THall self.main(self.list_Rep)
def __init__(self,canu,filtering_outTab,singleton_list,outFasta,outdir,log_file,min_overlap,consensus_name): self.filtering_outTab=filtering_outTab self.min_overlap=min_overlap self.consensus_name=consensus_name self.outdir=outdir self.outFasta=outFasta self.outdir_clust=outdir+'/clusters/' self.outdir_canu=outdir+'/canu/' self.canuRun=canu self.singleton_list=singleton_list self.canu_log = getLog(log_file, "Consensus assembly") self.canu_log.info("CONSENSUS ASSEMBLY has started...") self.createfile() self.dirFile_canu=self.runCanu() self.writeFileCan()
def __init__(self, blast_run, makedb, threads, word_size, trf_file, outdir, abund_f, perc_abund, log_file): self.blast_run, self.makedb, self.outdir, self.threads, self.word_size = blast_run, makedb, outdir, threads, word_size self.trf_file = trf_file self.outdir_reblast = self.outdir + '/ReBlast/' self.out_blast = self.outdir_reblast + 'blast_sec.tab' self.out_clust = self.outdir_reblast + '/seq_clust.clst' self.nanoTRF = self.outdir + '/TR_nanotrf.fasta' self.end_nano = self.outdir + '/nanoTRF.fasta' self.Reclust_log = getLog(log_file, 'Reclustering') self.abund_f = abund_f self.perc_abund = perc_abund self.nanoTRF_abund = self.outdir + '/abund_nanotrf.tab' self.BLAST() self.list_BLAST = self.Blast_parsing() self.fasta_clust = self.createGraph(self.list_BLAST) self.filt_clust(self.fasta_clust) self.nano_end()
def main(): args = get_cmdline_args() w_TH=args.run_th outDirectory = '{0}/'.format(checkDir_or_create(args.out_directory)) reads = args.reads log_file =outDirectory + args.log_file LOG = getLog(log_file, 'nanoTRF') LOG.info("nanoTRF started...") read_data = '' ####### TideHunter parametres ###### """ run TH, format tab to fasta ( variable self.outTH_fasta_name). where sequence ids have view as follow: >readName*repN*consLen*copyNum """ TH_path = args.path_TH threads = args.threads outTH_fasta_name = outDirectory + "TH.out.fasta" outFasta_all_monomersTH = '' TH_data = '' #################################### ##############BLAST##################### blast_run = args.blast makedb = args.makedb outFile = outDirectory + "blast.out" wordsize = args.wordsize evalue = args.evalue edge_list_after_blast_file = '' ########################################## ###############CLUSTERING################ clustering_outTab = '' minAbundancy = args.max_abundancy ###############CANU##################### canu = args.canu min_overlap = args.min_Overlap consensus_name = outDirectory + args.consensus_name ###TRF### path_TR = args.TRF_run ### Reclustering### wordsize_f = args.wordsize_f perc_abund = args.perc_abund ###MAIN### ##READ PREPARATION## read_data = read_preparation.PrepareReads(reads) #########TH########## TH_outFasta='' TH_raw_tab='' TH_all_monomers='' if args.run_th: run_data=without_TH.without_TH(w_TH[0],outTH_fasta_name,log_file) TH_all_monomers=w_TH[1] TH_outFasta=run_data.outFasta else: TH_data = run_TideHunter.TideHunter_run(TH_path, read_data.read_file, outTH_fasta_name, threads, log_file) TH_raw_tab = TH_data.outTab TH_all_monomers = TH_data.outFasta_all_monomersTH TH_outFasta = TH_data.outFasta ##BLAST run### blast_module_data = run_BLAST.run_BLAST(blast_run, makedb, TH_outFasta, outFile, threads, wordsize, evalue, log_file) edge_list_after_blast_file = blast_module_data.edge_list_file singleton_list = blast_module_data.not_blast ##Clustering## louv_module_data = Louv_clustering.LouvClustering(edge_list_after_blast_file, outDirectory, log_file) ###Filtering## clustering_outTab = louv_module_data.clustering_outTab Filt_data = FilterRep.FilteringLouvTab(clustering_outTab, singleton_list, outDirectory, reads, TH_all_monomers, minAbundancy,log_file) tableFilt = Filt_data.filtering_outTab abund_tab = Filt_data.clust_abund ###Canu### consensus_out = Consensus_Assembly.ConsAssembly(canu, tableFilt, singleton_list, TH_outFasta, outDirectory, log_file, min_overlap, consensus_name) dir_clust = consensus_out.outdir_clust dir_canu = consensus_out.outdir_canu ###TRF### TRF_out = Run_TRF.Run_TRF(path_TR, consensus_name, outDirectory, log_file) re_blast = TRF_out.dir_trf trf_seq = TRF_out.filt_trf ###Reclustering### reclust_out = Reclustering.Reclustering(blast_run, makedb, threads, wordsize_f,trf_seq, outDirectory, abund_tab, perc_abund, log_file) nanoTRF_abund = reclust_out.nanoTRF_abund ###Delete directories### os.system('rm {0}*html'.format(outDirectory)) del_log = getLog(log_file, "DELETE") if not args.dir_cleanup: del_log.info("Removing directories has started...") # Delete an entire directory tree - ./clust/, ./canu/ and ./ReBlast/ shutil.rmtree(dir_canu) shutil.rmtree(dir_clust) shutil.rmtree(re_blast) # Delete an TRF html. reports and unnecessary BLAST files for file_t in os.listdir(outDirectory): if file_t != 'nanoTRF.fasta' or file_t != 'TH.out.fasta' or file_t != 'TH.out.fasta.tab' or file_t != 'TR_info.tab' and file_t != 'loging.log': path_t = outDirectory + file_t os.remove(path_t) else: del_log.info("Directories are not removed")
def __init__(self,blast_run,makedb,inFile, outFile, threads, wordsize, evalue, log_file): self.blast_run,self.makedb,self.inFile, self.outFile, self.threads, self.wordsize, self.evalue = blast_run,makedb,inFile, outFile, threads, wordsize, evalue self.bl_log = getLog(log_file, "BLAST module") self.not_blast=outFile+"_notBlast.list" self.edge_list_file = outFile + "edges.list" self.main()