Пример #1
0
def fast_denoiser(sff_fps, fasta_fp, tmp_outdir, num_cpus, primer, verbose=True, titanium=False):
    """wrapper function calling methods from the Denoiser package."""
    if num_cpus > 1:
        denoise_seqs(
            sff_fps,
            fasta_fp,
            tmp_outdir,
            primer=primer,
            cluster=True,
            num_cpus=num_cpus,
            verbose=verbose,
            titanium=titanium,
        )
    else:
        denoise_seqs(sff_fps, fasta_fp, tmp_outdir, primer=primer, verbose=verbose, titanium=titanium)

    # read centroids and singletons
    centroids = parse_fasta(open(tmp_outdir + "/centroids.fasta"))
    singletons = parse_fasta(open(tmp_outdir + "/singletons.fasta"))

    seqs = chain(centroids, singletons)

    # read mapping
    mapping = {}
    cluster_mapping = open(tmp_outdir + "/denoiser_mapping.txt")
    for i, cluster in enumerate(cluster_mapping):
        cluster, members = cluster.split(":")
        members = members.split()
        clust = [cluster]
        clust.extend(members)
        mapping[i] = clust

    return seqs, mapping
Пример #2
0
def main(commandline_args=None):
    parser, opts, args = parse_command_line_parameters(**script_info)

    if not opts.sff_fp:
        parser.error('Required option flowgram file path (-i) not specified')
    elif not files_exist(opts.sff_fp):
        parser.error('Flowgram file path does not exist:\n %s \n Pass a valid one via -i.'
                     % opts.sff_fp)

    if(opts.checkpoint_fp):
        bp_fp = opts.checkpoint_fp
        if not exists(bp_fp):
            parser.error('Specified checkpoint file does not exist: %s' % bp_fp)

    #peek into sff.txt files to make sure they are parseable
    #cat_sff_fles is lazy and only reads header
    flowgrams, header = cat_sff_files(map(open, opts.sff_fp.split(',')))
    
    if(opts.split and opts.preprocess_fp):
        parser.error('Options --split and --preprocess_fp are exclusive')

    if(opts.preprocess_fp):
        pp_fp = opts.preprocess_fp
        if not exists(opts.preprocess_fp):
            parser.error('Specified preprocess directory does not exist: %s' % opts.preprocess_fp)
        if not files_exist('%s/prefix_mapping.txt,%s/prefix_dereplicated.fasta' %(pp_fp, pp_fp)):
            parser.error('Specified preprocess directory does not contain expected files: ' +\
                             'prefix_mapping.txt and prefix_dereplicated.fasta')

    if opts.titanium:
        opts.error_profile = DENOISER_DATA_DIR+'Titanium_error_profile.dat'
        opts.low_cutoff = 4
        opts.high_cutoff = 5

    if not exists(opts.error_profile):
        parser.error('Specified error profile %s does not exist' % opts.error_profile)

    if opts.output_dir:
        #make sure it always ends on /
        tmpoutdir=opts.output_dir+"/"
    else:
        #make random dir in current dir
        tmpoutdir = get_tmp_filename(tmp_dir="", prefix="denoiser_", suffix="/")

    create_dir(tmpoutdir, not opts.force)
    
    log_fp = 'denoiser.log'
    
    if opts.split:
        denoise_per_sample(opts.sff_fp, opts.fasta_fp, tmpoutdir, opts.cluster,
                           opts.num_cpus, opts.squeeze, opts.percent_id, opts.bail,
                           opts.primer, opts.low_cutoff, opts.high_cutoff, log_fp,
                           opts.low_memory, opts.verbose, opts.error_profile, opts.max_num_iter,
                           opts.titanium)
    else:
        denoise_seqs(opts.sff_fp, opts.fasta_fp, tmpoutdir, opts.preprocess_fp, opts.cluster,
                     opts.num_cpus, opts.squeeze, opts.percent_id, opts.bail, opts.primer,
                     opts.low_cutoff, opts.high_cutoff, log_fp, opts.low_memory,
                     opts.verbose, opts.error_profile, opts.max_num_iter, opts.titanium,
                     opts.checkpoint_fp)
Пример #3
0
def fast_denoiser(
        sff_fps, fasta_fp, tmp_outdir, num_cpus, primer, verbose=True,
        titanium=False):
    """wrapper function calling methods from the Denoiser package."""
    if num_cpus > 1:
        denoise_seqs(sff_fps, fasta_fp, tmp_outdir,
                     primer=primer, cluster=True, num_cpus=num_cpus,
                     verbose=verbose, titanium=titanium)
    else:
        denoise_seqs(sff_fps, fasta_fp, tmp_outdir, primer=primer,
                     verbose=verbose, titanium=titanium)

    # read centroids and singletons
    centroids = MinimalFastaParser(open(tmp_outdir + "/centroids.fasta"))
    singletons = MinimalFastaParser(open(tmp_outdir + "/singletons.fasta"))

    seqs = chain(centroids, singletons)

    # read mapping
    mapping = {}
    cluster_mapping = open(tmp_outdir + "/denoiser_mapping.txt")
    for i, cluster in enumerate(cluster_mapping):
        cluster, members = cluster.split(':')
        members = members.split()
        clust = [cluster]
        clust.extend(members)
        mapping[i] = clust

    return seqs, mapping
Пример #4
0
def main(commandline_args=None):
    parser, opts, args = parse_command_line_parameters(**script_info)

    if not opts.sff_fp:
        parser.error('Required option flowgram file path (-i) not specified')
    elif not files_exist(opts.sff_fp):
        parser.error('Flowgram file path does not exist:\n %s \n Pass a valid one via -i.'
                     % opts.sff_fp)

    if(opts.checkpoint_fp):
        bp_fp = opts.checkpoint_fp
        if not exists(bp_fp):
            parser.error('Specified checkpoint file does not exist: %s' % bp_fp)

    #peek into sff.txt files to make sure they are parseable
    #cat_sff_fles is lazy and only reads header
    flowgrams, header = cat_sff_files(map(open, opts.sff_fp.split(',')))
    
    if(opts.split and opts.preprocess_fp):
        parser.error('Options --split and --preprocess_fp are exclusive')

    if(opts.preprocess_fp):
        pp_fp = opts.preprocess_fp
        if not exists(opts.preprocess_fp):
            parser.error('Specified preprocess directory does not exist: %s' % opts.preprocess_fp)
        if not files_exist('%s/prefix_mapping.txt,%s/prefix_dereplicated.fasta' %(pp_fp, pp_fp)):
            parser.error('Specified preprocess directory does not contain expected files: ' +\
                             'prefix_mapping.txt and prefix_dereplicated.fasta')

    if opts.titanium:
        opts.error_profile = DENOISER_DATA_DIR+'Titanium_error_profile.dat'
        opts.low_cutoff = 4
        opts.high_cutoff = 5

    if not exists(opts.error_profile):
        parser.error('Specified error profile %s does not exist' % opts.error_profile)

    if opts.output_dir:
        #make sure it always ends on /
        tmpoutdir=opts.output_dir+"/"
    else:
        #make random dir in current dir
        tmpoutdir = get_tmp_filename(tmp_dir="", prefix="denoiser_", suffix="/")

    create_dir(tmpoutdir, not opts.force)
    
    log_fp = 'denoiser.log'
    
    if opts.split:
        denoise_per_sample(opts.sff_fp, opts.fasta_fp, tmpoutdir, opts.cluster,
                           opts.num_cpus, opts.squeeze, opts.percent_id, opts.bail,
                           opts.primer, opts.low_cutoff, opts.high_cutoff, log_fp,
                           opts.low_memory, opts.verbose, opts.error_profile, opts.max_num_iter,
                           opts.titanium)
    else:
        denoise_seqs(opts.sff_fp, opts.fasta_fp, tmpoutdir, opts.preprocess_fp, opts.cluster,
                     opts.num_cpus, opts.squeeze, opts.percent_id, opts.bail, opts.primer,
                     opts.low_cutoff, opts.high_cutoff, log_fp, opts.low_memory,
                     opts.verbose, opts.error_profile, opts.max_num_iter, opts.titanium,
                     opts.checkpoint_fp)