def main(): parser = OptionParser() parser.add_option('-r', dest='readsf', help='Fasta file of reads') parser.add_option('-n', dest='numreads', type='int', help='Number of reads to sample for MCMC') parser.add_option('-k', dest='clusters', type='int', help='Number of clusters') parser.add_option('-m', dest='mers', type='int', help='Mers to count') parser.add_option('-p', dest='proc', type='int', help='Number of processes to run') parser.add_option('--em', dest='soft_assign', action='store_true', default=False, help='Use a soft assignment of reads to clusters') (options, args) = parser.parse_args() if options.soft_assign: em = '--em' else: em = '' # randomly sample reads total_reads = 0 for line in open(options.readsf): if line[0] == '>': total_reads += 1 if options.numreads and options.numreads < total_reads: dna.fasta_rand_big(options.numreads, options.readsf, 'sample.fa') else: if os.path.isfile('sample.fa') or os.path.islink('sample.fa'): os.remvoe('sample.fa') os.symlink(options.readsf, 'sample.fa') # CompostBin p = subprocess.Popen( '%s/compostbin.py -r sample.fa -c %d -k %d &> cb.log' % (bin_dir, options.clusters, options.mers), shell=True) os.waitpid(p.pid, 0) # initialize clusters init_clusters(options.readsf, options.clusters, options.soft_assign) # run seed_only p = subprocess.Popen( '%s/imm_cluster.py -k %d -r %s -p %d -s --seed_only %s >> cb.log' % (bin_dir, options.clusters, options.readsf, options.proc, em), shell=True) os.waitpid(p.pid, 0)
def main(): parser = OptionParser() parser.add_option('-r', dest='readsf', help='Fasta file of reads') parser.add_option('-n', dest='numreads', type='int', help='Number of reads to sample for MCMC') parser.add_option('-k', dest='k', type='int', help='Number of clusters') parser.add_option('-o', dest='order', type='int', help='Order of Markov model') parser.add_option('-p', dest='proc', type='int', help='Number of processes to run') parser.add_option('--em', dest='soft_assign', action='store_true', default=False, help='Use a soft assignment of reads to clusters') parser.add_option('--outfile', dest='outfile', help='Output file') (options, args) = parser.parse_args() if options.soft_assign: em = '--em' else: em = '' # randomly sample reads total_reads = 0 for line in open(options.readsf): if line[0] == '>': total_reads += 1 if options.numreads and options.numreads < total_reads: dna.fasta_rand_big(options.numreads, options.readsf, 'sample.fa') else: if os.path.isfile('sample.fa') or os.path.islink('sample.fa'): os.remove('sample.fa') os.symlink(options.readsf, 'sample.fa') # LikelyBin p = subprocess.Popen('%s/../likelybin-0.1/mcmc.pl sample.fa -outfile %s -num_sources %d -chain_order %d -num_threads %d &> lb.log' % (bin_dir, options.outfile, options.k, options.order, options.proc), shell=True) os.waitpid(p.pid, 0) if os.path.isfile('sample.fa.binning.allprobs') and os.path.getsize('sample.fa.binning.allprobs') > 0: # initialize clusters init_clusters(options.readsf, options.soft_assign) # check for k clusters new_k = drop_empty(options.k, options.soft_assign) # run seed_only p = subprocess.Popen('%s/imm_cluster.py -k %d -r %s -p %d -s --seed_only %s >> lb.log' % (bin_dir, new_k, options.readsf, options.proc, em), shell=True) os.waitpid(p.pid, 0)
def main(): parser = OptionParser() parser.add_option('-r', dest='readsf', help='Fasta file of reads') parser.add_option('-n', dest='numreads', type='int', help='Number of reads to sample for MCMC') parser.add_option('-k', dest='k', type='int', help='Number of clusters') parser.add_option('-o', dest='order', type='int', help='Order of Markov model') parser.add_option('-p', dest='proc', type='int', help='Number of processes to run') parser.add_option('--em', dest='soft_assign', action='store_true', default=False, help='Use a soft assignment of reads to clusters') (options, args) = parser.parse_args() if options.soft_assign: em = '--em' else: em = '' # randomly sample reads total_reads = 0 for line in open(options.readsf): if line[0] == '>': total_reads += 1 if options.numreads and options.numreads < total_reads: dna.fasta_rand_big(options.numreads, options.readsf, 'sample.fa') else: if os.path.isfile('sample.fa') or os.path.islink('sample.fa'): os.remove('sample.fa') os.symlink(options.readsf, 'sample.fa') # LikelyBin p = subprocess.Popen('%s/mcmc.pl sample.fa -num_sources %d -chain_order %d -num_threads %d &> lb.log' % (bin_dir,options.k, options.order, options.proc), shell=True) os.waitpid(p.pid, 0) if os.path.isfile('sample.fa.binning.allprobs') and os.path.getsize('sample.fa.binning.allprobs') > 0: # initialize clusters init_clusters(options.readsf, options.soft_assign) # check for k clusters new_k = drop_empty(options.k, options.soft_assign) # run seed_only p = subprocess.Popen('%s/imm_cluster.py -k %d -r %s -p %d -s --seed_only %s >> lb.log' % (bin_dir, new_k, options.readsf, options.proc, em), shell=True) os.waitpid(p.pid, 0)
def main(): parser = OptionParser() parser.add_option('-r', dest='readsf', help='Fasta file of reads') parser.add_option('-n', dest='numreads', type='int', help='Number of reads to sample for MCMC') parser.add_option('-k', dest='clusters', type='int', help='Number of clusters') parser.add_option('-m', dest='mers', type='int', help='Mers to count') parser.add_option('-p', dest='proc', type='int', help='Number of processes to run') parser.add_option('--em', dest='soft_assign', action='store_true', default=False, help='Use a soft assignment of reads to clusters') (options, args) = parser.parse_args() if options.soft_assign: em = '--em' else: em = '' # randomly sample reads total_reads = 0 for line in open(options.readsf): if line[0] == '>': total_reads += 1 if options.numreads and options.numreads < total_reads: dna.fasta_rand_big(options.numreads, options.readsf, 'sample.fa') else: if os.path.isfile('sample.fa') or os.path.islink('sample.fa'): os.remvoe('sample.fa') os.symlink(options.readsf, 'sample.fa') # CompostBin p = subprocess.Popen('%s/compostbin.py -r sample.fa -c %d -k %d &> cb.log' % (bin_dir, options.clusters, options.mers), shell=True) os.waitpid(p.pid, 0) # initialize clusters init_clusters(options.readsf, options.clusters, options.soft_assign) # run seed_only p = subprocess.Popen('%s/imm_cluster.py -k %d -r %s -p %d -s --seed_only %s >> cb.log' % (bin_dir, options.clusters, options.readsf, options.proc, em), shell=True) os.waitpid(p.pid, 0)
def main(): parser = OptionParser() # generic options parser.add_option('-s', dest='readsf', help='Fasta file of sequences') parser.add_option('-p', dest='proc', type='int', default=2, help='Number of processes to run') # phymm options parser.add_option( '--taxlevel', dest='taxlevel', default='family', help= 'Taxonomic level at which to cluster reads with Phymm [Default=%default]' ) parser.add_option( '--minbp_pct', dest='minbp_pct', type='float', default=.01, help= 'Minimum proportion of bp assigned to a class to become a cluster [Default=%default]' ) parser.add_option( '-n', '--numreads', dest='numreads', type='int', default=3000, help= 'Number of reads to sample from the data set to classify with Phymm [Default=%default]' ) parser.add_option( '-r', '--phymm_results', dest='phymm_results_file', help= 'Phymm results file to be used rather than running Phymm from scratch.' ) # my testing options # help='Use a soft assignment of reads to clusters [Default=%default]' parser.add_option('--em', dest='soft_assign', action='store_true', default=False, help=SUPPRESS_HELP) # help='Ask Phymm to ignore the IMMs in the given file' parser.add_option('-i', dest='ignore', help=SUPPRESS_HELP) # help='Run Phymm and initialize clusters only' parser.add_option('--init', dest='init', action='store_true', default=False, help=SUPPRESS_HELP) # help='Run my version of Phymm w/o Blast and w/ chromosomes only' parser.add_option('--bc', dest='bc', action='store_true', default=False, help=SUPPRESS_HELP) (options, args) = parser.parse_args() # check data data_integrity(options.readsf) # make robust to directory changes options.readsf = os.path.abspath(options.readsf) if options.ignore: options.ignore = os.path.abspath(options.ignore) if options.soft_assign: em = '--em' else: em = '' if options.phymm_results_file: if not os.path.isfile('sample.fa') and not os.path.islink('sample.fa'): print >> sys.stderr, 'Assuming Phymm results file includes all reads' os.symlink(options.readsf, 'sample.fa') phymm_results_file = options.phymm_results_file else: # randomly sample reads total_reads = 0 for line in open(options.readsf): if line[0] == '>': total_reads += 1 if options.numreads and options.numreads < total_reads: dna.fasta_rand_big(options.numreads, options.readsf, 'sample.fa') else: os.symlink(options.readsf, 'sample.fa') # classify if options.bc: bc_str = '-b -c' phymm_results_file = 'results.01.phymm_sample_fa.txt' else: bc_str = '' phymm_results_file = 'results.03.phymmBL_sample_fa.txt' p = subprocess.Popen('%s/phymm_par.py -p %d %s sample.fa' % (bin_dir, options.proc, bc_str), shell=True) os.waitpid(p.pid, 0) # determine minimum bp for cluster total_bp = 0 for line in open('sample.fa'): if line[0] != '>': total_bp += len(line.rstrip()) minbp = options.minbp_pct * total_bp # initialize clusters class_k = init_clusters(options.readsf, phymm_results_file, options.taxlevel, minbp, options.soft_assign) # run IMM clustering if not options.init: p = subprocess.Popen( '%s/imm_cluster.py -k %d -r %s -p %d -s %s &> immc.log' % (bin_dir, class_k, options.readsf, options.proc, em), shell=True) os.waitpid(p.pid, 0)
def main(): parser = OptionParser() # generic options parser.add_option('-s', dest='readsf', help='Fasta file of sequences') parser.add_option('-p', dest='proc', type='int', default=2, help='Number of processes to run') # phymm options parser.add_option('--taxlevel', dest='taxlevel', default='family', help='Taxonomic level at which to cluster reads with Phymm [Default=%default]') parser.add_option('--minbp_pct', dest='minbp_pct', type='float', default=.01, help='Minimum proportion of bp assigned to a class to become a cluster [Default=%default]') parser.add_option('-n','--numreads', dest='numreads', type='int', default=3000, help='Number of reads to sample from the data set to classify with Phymm [Default=%default]') parser.add_option('-r','--phymm_results', dest='phymm_results_file', help='Phymm results file to be used rather than running Phymm from scratch.') # my testing options # help='Use a soft assignment of reads to clusters [Default=%default]' parser.add_option('--em',dest='soft_assign', action='store_true', default=False, help=SUPPRESS_HELP) # help='Ask Phymm to ignore the IMMs in the given file' parser.add_option('-i', dest='ignore', help=SUPPRESS_HELP) # help='Run Phymm and initialize clusters only' parser.add_option('--init', dest='init', action='store_true', default=False, help=SUPPRESS_HELP) # help='Run my version of Phymm w/o Blast and w/ chromosomes only' parser.add_option('--bc', dest='bc', action='store_true', default=False, help=SUPPRESS_HELP) (options, args) = parser.parse_args() # check data data_integrity(options.readsf) # make robust to directory changes options.readsf = os.path.abspath(options.readsf) if options.ignore: options.ignore = os.path.abspath(options.ignore) if options.soft_assign: em = '--em' else: em = '' if options.phymm_results_file: if not os.path.isfile('sample.fa') and not os.path.islink('sample.fa'): print >> sys.stderr, 'Assuming Phymm results file includes all reads' os.symlink(options.readsf, 'sample.fa') phymm_results_file = options.phymm_results_file else: # randomly sample reads total_reads = 0 for line in open(options.readsf): if line[0] == '>': total_reads += 1 if options.numreads and options.numreads < total_reads: dna.fasta_rand_big(options.numreads, options.readsf, 'sample.fa') else: os.symlink(options.readsf, 'sample.fa') # classify if options.bc: bc_str = '-b -c' phymm_results_file = 'results.01.phymm_sample_fa.txt' else: bc_str = '' phymm_results_file = 'results.03.phymmBL_sample_fa.txt' p = subprocess.Popen('%s/phymm_par.py -p %d %s sample.fa' % (bin_dir, options.proc,bc_str), shell=True) os.waitpid(p.pid, 0) # determine minimum bp for cluster total_bp = 0 for line in open('sample.fa'): if line[0] != '>': total_bp += len(line.rstrip()) minbp = options.minbp_pct*total_bp # initialize clusters class_k = init_clusters(options.readsf, phymm_results_file, options.taxlevel, minbp, options.soft_assign) # run IMM clustering if not options.init: p = subprocess.Popen('%s/imm_cluster.py -k %d -r %s -p %d -s %s &> immc.log' % (bin_dir, class_k, options.readsf, options.proc, em), shell=True) os.waitpid(p.pid, 0)