Пример #1
0
def main():
    parser = OptionParser()

    parser.add_option('-r', dest='readsf', help='Fasta file of reads')
    parser.add_option('-n',
                      dest='numreads',
                      type='int',
                      help='Number of reads to sample for MCMC')
    parser.add_option('-k',
                      dest='clusters',
                      type='int',
                      help='Number of clusters')
    parser.add_option('-m', dest='mers', type='int', help='Mers to count')
    parser.add_option('-p',
                      dest='proc',
                      type='int',
                      help='Number of processes to run')
    parser.add_option('--em',
                      dest='soft_assign',
                      action='store_true',
                      default=False,
                      help='Use a soft assignment of reads to clusters')

    (options, args) = parser.parse_args()

    if options.soft_assign:
        em = '--em'
    else:
        em = ''

    # randomly sample reads
    total_reads = 0
    for line in open(options.readsf):
        if line[0] == '>':
            total_reads += 1
    if options.numreads and options.numreads < total_reads:
        dna.fasta_rand_big(options.numreads, options.readsf, 'sample.fa')
    else:
        if os.path.isfile('sample.fa') or os.path.islink('sample.fa'):
            os.remvoe('sample.fa')
        os.symlink(options.readsf, 'sample.fa')

    # CompostBin
    p = subprocess.Popen(
        '%s/compostbin.py -r sample.fa -c %d -k %d &> cb.log' %
        (bin_dir, options.clusters, options.mers),
        shell=True)
    os.waitpid(p.pid, 0)

    # initialize clusters
    init_clusters(options.readsf, options.clusters, options.soft_assign)

    # run seed_only
    p = subprocess.Popen(
        '%s/imm_cluster.py -k %d -r %s -p %d -s --seed_only %s >> cb.log' %
        (bin_dir, options.clusters, options.readsf, options.proc, em),
        shell=True)
    os.waitpid(p.pid, 0)
Пример #2
0
def main():
    parser = OptionParser()

    parser.add_option('-r', dest='readsf', help='Fasta file of reads')
    parser.add_option('-n', dest='numreads', type='int', help='Number of reads to sample for MCMC')
    parser.add_option('-k', dest='k', type='int', help='Number of clusters')
    parser.add_option('-o', dest='order', type='int', help='Order of Markov model')
    parser.add_option('-p', dest='proc', type='int', help='Number of processes to run')
    parser.add_option('--em', dest='soft_assign', action='store_true', default=False, help='Use a soft assignment of reads to clusters')
    parser.add_option('--outfile', dest='outfile', help='Output file')
    (options, args) = parser.parse_args()

    if options.soft_assign:
        em = '--em'
    else:
        em = ''

    # randomly sample reads
    total_reads = 0
    for line in open(options.readsf):
        if line[0] == '>':
            total_reads += 1
    if options.numreads and options.numreads < total_reads:
        dna.fasta_rand_big(options.numreads, options.readsf, 'sample.fa')
    else:
        if os.path.isfile('sample.fa') or os.path.islink('sample.fa'):
            os.remove('sample.fa')
        os.symlink(options.readsf, 'sample.fa')

    # LikelyBin
    p = subprocess.Popen('%s/../likelybin-0.1/mcmc.pl sample.fa -outfile %s -num_sources %d -chain_order %d -num_threads %d &> lb.log' % (bin_dir, options.outfile, options.k, options.order, options.proc), shell=True)
    os.waitpid(p.pid, 0)

    if os.path.isfile('sample.fa.binning.allprobs') and os.path.getsize('sample.fa.binning.allprobs') > 0:

        # initialize clusters
        init_clusters(options.readsf, options.soft_assign)
        
        # check for k clusters
        new_k = drop_empty(options.k, options.soft_assign)
    
        # run seed_only
        p = subprocess.Popen('%s/imm_cluster.py -k %d -r %s -p %d -s --seed_only %s >> lb.log' % (bin_dir, new_k, options.readsf, options.proc, em), shell=True)
        os.waitpid(p.pid, 0)
Пример #3
0
def main():
    parser = OptionParser()

    parser.add_option('-r', dest='readsf', help='Fasta file of reads')
    parser.add_option('-n', dest='numreads', type='int', help='Number of reads to sample for MCMC')
    parser.add_option('-k', dest='k', type='int', help='Number of clusters')
    parser.add_option('-o', dest='order', type='int', help='Order of Markov model')
    parser.add_option('-p', dest='proc', type='int', help='Number of processes to run')
    parser.add_option('--em', dest='soft_assign', action='store_true', default=False, help='Use a soft assignment of reads to clusters')

    (options, args) = parser.parse_args()

    if options.soft_assign:
        em = '--em'
    else:
        em = ''

    # randomly sample reads
    total_reads = 0
    for line in open(options.readsf):
        if line[0] == '>':
            total_reads += 1
    if options.numreads and options.numreads < total_reads:
        dna.fasta_rand_big(options.numreads, options.readsf, 'sample.fa')
    else:
        if os.path.isfile('sample.fa') or os.path.islink('sample.fa'):
            os.remove('sample.fa')
        os.symlink(options.readsf, 'sample.fa')

    # LikelyBin
    p = subprocess.Popen('%s/mcmc.pl sample.fa -num_sources %d -chain_order %d -num_threads %d &> lb.log' % (bin_dir,options.k, options.order, options.proc), shell=True)
    os.waitpid(p.pid, 0)

    if os.path.isfile('sample.fa.binning.allprobs') and os.path.getsize('sample.fa.binning.allprobs') > 0:

        # initialize clusters
        init_clusters(options.readsf, options.soft_assign)
        
        # check for k clusters
        new_k = drop_empty(options.k, options.soft_assign)
    
        # run seed_only
        p = subprocess.Popen('%s/imm_cluster.py -k %d -r %s -p %d -s --seed_only %s >> lb.log' % (bin_dir, new_k, options.readsf, options.proc, em), shell=True)
        os.waitpid(p.pid, 0)
Пример #4
0
def main():
    parser = OptionParser()

    parser.add_option('-r', dest='readsf', help='Fasta file of reads')
    parser.add_option('-n', dest='numreads', type='int', help='Number of reads to sample for MCMC')
    parser.add_option('-k', dest='clusters', type='int', help='Number of clusters')
    parser.add_option('-m', dest='mers', type='int', help='Mers to count')
    parser.add_option('-p', dest='proc', type='int', help='Number of processes to run')
    parser.add_option('--em', dest='soft_assign', action='store_true', default=False, help='Use a soft assignment of reads to clusters')

    (options, args) = parser.parse_args()

    if options.soft_assign:
        em = '--em'
    else:
        em = ''

    # randomly sample reads
    total_reads = 0
    for line in open(options.readsf):
        if line[0] == '>':
            total_reads += 1
    if options.numreads and options.numreads < total_reads:
        dna.fasta_rand_big(options.numreads, options.readsf, 'sample.fa')
    else:
        if os.path.isfile('sample.fa') or os.path.islink('sample.fa'):
            os.remvoe('sample.fa')
        os.symlink(options.readsf, 'sample.fa')

    # CompostBin
    p = subprocess.Popen('%s/compostbin.py -r sample.fa -c %d -k %d &> cb.log' % (bin_dir, options.clusters, options.mers), shell=True)
    os.waitpid(p.pid, 0)

    # initialize clusters
    init_clusters(options.readsf, options.clusters, options.soft_assign)

    # run seed_only
    p = subprocess.Popen('%s/imm_cluster.py -k %d -r %s -p %d -s --seed_only %s >> cb.log' % (bin_dir, options.clusters, options.readsf, options.proc, em), shell=True)
    os.waitpid(p.pid, 0)
Пример #5
0
def main():
    parser = OptionParser()

    # generic options
    parser.add_option('-s', dest='readsf', help='Fasta file of sequences')
    parser.add_option('-p',
                      dest='proc',
                      type='int',
                      default=2,
                      help='Number of processes to run')

    # phymm options
    parser.add_option(
        '--taxlevel',
        dest='taxlevel',
        default='family',
        help=
        'Taxonomic level at which to cluster reads with Phymm [Default=%default]'
    )
    parser.add_option(
        '--minbp_pct',
        dest='minbp_pct',
        type='float',
        default=.01,
        help=
        'Minimum proportion of bp assigned to a class to become a cluster [Default=%default]'
    )
    parser.add_option(
        '-n',
        '--numreads',
        dest='numreads',
        type='int',
        default=3000,
        help=
        'Number of reads to sample from the data set to classify with Phymm [Default=%default]'
    )
    parser.add_option(
        '-r',
        '--phymm_results',
        dest='phymm_results_file',
        help=
        'Phymm results file to be used rather than running Phymm from scratch.'
    )

    # my testing options
    # help='Use a soft assignment of reads to clusters [Default=%default]'
    parser.add_option('--em',
                      dest='soft_assign',
                      action='store_true',
                      default=False,
                      help=SUPPRESS_HELP)
    # help='Ask Phymm to ignore the IMMs in the given file'
    parser.add_option('-i', dest='ignore', help=SUPPRESS_HELP)
    # help='Run Phymm and initialize clusters only'
    parser.add_option('--init',
                      dest='init',
                      action='store_true',
                      default=False,
                      help=SUPPRESS_HELP)
    # help='Run my version of Phymm w/o Blast and w/ chromosomes only'
    parser.add_option('--bc',
                      dest='bc',
                      action='store_true',
                      default=False,
                      help=SUPPRESS_HELP)

    (options, args) = parser.parse_args()

    # check data
    data_integrity(options.readsf)

    # make robust to directory changes
    options.readsf = os.path.abspath(options.readsf)
    if options.ignore:
        options.ignore = os.path.abspath(options.ignore)

    if options.soft_assign:
        em = '--em'
    else:
        em = ''

    if options.phymm_results_file:
        if not os.path.isfile('sample.fa') and not os.path.islink('sample.fa'):
            print >> sys.stderr, 'Assuming Phymm results file includes all reads'
            os.symlink(options.readsf, 'sample.fa')
        phymm_results_file = options.phymm_results_file

    else:
        # randomly sample reads
        total_reads = 0
        for line in open(options.readsf):
            if line[0] == '>':
                total_reads += 1
        if options.numreads and options.numreads < total_reads:
            dna.fasta_rand_big(options.numreads, options.readsf, 'sample.fa')
        else:
            os.symlink(options.readsf, 'sample.fa')

        # classify
        if options.bc:
            bc_str = '-b -c'
            phymm_results_file = 'results.01.phymm_sample_fa.txt'
        else:
            bc_str = ''
            phymm_results_file = 'results.03.phymmBL_sample_fa.txt'
        p = subprocess.Popen('%s/phymm_par.py -p %d %s sample.fa' %
                             (bin_dir, options.proc, bc_str),
                             shell=True)
        os.waitpid(p.pid, 0)

    # determine minimum bp for cluster
    total_bp = 0
    for line in open('sample.fa'):
        if line[0] != '>':
            total_bp += len(line.rstrip())
    minbp = options.minbp_pct * total_bp

    # initialize clusters
    class_k = init_clusters(options.readsf, phymm_results_file,
                            options.taxlevel, minbp, options.soft_assign)

    # run IMM clustering
    if not options.init:
        p = subprocess.Popen(
            '%s/imm_cluster.py -k %d -r %s -p %d -s %s &> immc.log' %
            (bin_dir, class_k, options.readsf, options.proc, em),
            shell=True)
        os.waitpid(p.pid, 0)
Пример #6
0
def main():
    parser = OptionParser()

    # generic options
    parser.add_option('-s', dest='readsf', help='Fasta file of sequences')
    parser.add_option('-p', dest='proc', type='int', default=2, help='Number of processes to run')

    # phymm options
    parser.add_option('--taxlevel', dest='taxlevel', default='family', help='Taxonomic level at which to cluster reads with Phymm [Default=%default]')
    parser.add_option('--minbp_pct', dest='minbp_pct', type='float', default=.01, help='Minimum proportion of bp assigned to a class to become a cluster [Default=%default]')
    parser.add_option('-n','--numreads', dest='numreads', type='int', default=3000, help='Number of reads to sample from the data set to classify with Phymm [Default=%default]')
    parser.add_option('-r','--phymm_results', dest='phymm_results_file', help='Phymm results file to be used rather than running Phymm from scratch.')

    # my testing options
    # help='Use a soft assignment of reads to clusters [Default=%default]'
    parser.add_option('--em',dest='soft_assign', action='store_true', default=False, help=SUPPRESS_HELP)
    # help='Ask Phymm to ignore the IMMs in the given file'
    parser.add_option('-i', dest='ignore', help=SUPPRESS_HELP)
    # help='Run Phymm and initialize clusters only'
    parser.add_option('--init', dest='init', action='store_true', default=False, help=SUPPRESS_HELP)
    # help='Run my version of Phymm w/o Blast and w/ chromosomes only'
    parser.add_option('--bc', dest='bc', action='store_true', default=False, help=SUPPRESS_HELP)

    (options, args) = parser.parse_args()

    # check data
    data_integrity(options.readsf)
    
    # make robust to directory changes
    options.readsf = os.path.abspath(options.readsf)
    if options.ignore:
        options.ignore = os.path.abspath(options.ignore)    

    if options.soft_assign:
        em = '--em'
    else:
        em = ''

    if options.phymm_results_file:
        if not os.path.isfile('sample.fa') and not os.path.islink('sample.fa'):
            print >> sys.stderr, 'Assuming Phymm results file includes all reads'
            os.symlink(options.readsf, 'sample.fa')
        phymm_results_file = options.phymm_results_file

    else:
        # randomly sample reads
        total_reads = 0
        for line in open(options.readsf):
            if line[0] == '>':
                total_reads += 1
        if options.numreads and options.numreads < total_reads:
            dna.fasta_rand_big(options.numreads, options.readsf, 'sample.fa')
        else:
            os.symlink(options.readsf, 'sample.fa')

        # classify
        if options.bc:
            bc_str = '-b -c'
            phymm_results_file = 'results.01.phymm_sample_fa.txt'
        else:
            bc_str = ''
            phymm_results_file = 'results.03.phymmBL_sample_fa.txt'
        p = subprocess.Popen('%s/phymm_par.py -p %d %s sample.fa' % (bin_dir, options.proc,bc_str), shell=True)
        os.waitpid(p.pid, 0)

    # determine minimum bp for cluster
    total_bp = 0
    for line in open('sample.fa'):
        if line[0] != '>':
            total_bp += len(line.rstrip())
    minbp = options.minbp_pct*total_bp

    # initialize clusters
    class_k = init_clusters(options.readsf, phymm_results_file, options.taxlevel, minbp, options.soft_assign)

    # run IMM clustering
    if not options.init:
        p = subprocess.Popen('%s/imm_cluster.py -k %d -r %s -p %d -s %s &> immc.log' % (bin_dir, class_k, options.readsf, options.proc, em), shell=True)
        os.waitpid(p.pid, 0)