def main():
    option_parser, opts, args = \
        parse_command_line_parameters(suppress_verbose=True, **script_info)
        
    input_dir = opts.input_dir
    paired_data = opts.paired_data
    parameter_fp = opts.parameter_fp
    read1_indicator = opts.read1_indicator
    read2_indicator = opts.read2_indicator
    leading_text = opts.leading_text
    trailing_text = opts.trailing_text
    include_input_dir_path = opts.include_input_dir_path
    output_dir = abspath(opts.output_dir)
    remove_filepath_in_name = opts.remove_filepath_in_name
    print_only = opts.print_only
    
    if remove_filepath_in_name and not include_input_dir_path:
        option_parser.error("If --remove_filepath_in_name is enabled, "
            "--include_input_dir_path must also be enabled.")
            
    if opts.parameter_fp:
        with open(opts.parameter_fp, 'U') as parameter_f:
            params_dict = parse_qiime_parameters(parameter_f)
        params_str = get_params_str(params_dict['extract_barcodes'])
    else:
        params_dict = {}
        params_str = ""
    
    create_dir(output_dir)
                
    all_files = []
    extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']
    
    for root, dir, fps in walk(input_dir):
        for fp in fps:
            for extension in extensions:
                if fp.endswith(extension):
                    all_files += [abspath(join(root, fp))]

    if paired_data:
        all_files, bc_pairs = get_pairs(all_files, read1_indicator,
                                        read2_indicator)

    commands = create_commands_eb(all_files, paired_data, output_dir,
        params_str, leading_text, trailing_text, include_input_dir_path,
        remove_filepath_in_name)
        
    qiime_config = load_qiime_config()
    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params_dict,
                            qiime_config=qiime_config)
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback = no_status_updates,
                    logger=logger,
                    close_logger_on_success=True)
def main():
    option_parser, opts, args = \
        parse_command_line_parameters(suppress_verbose=True, **script_info)
        
    input_dir = opts.input_dir
    parameter_fp = opts.parameter_fp
    read1_indicator = opts.read1_indicator
    read2_indicator = opts.read2_indicator
    match_barcodes = opts.match_barcodes
    barcode_indicator = opts.barcode_indicator
    leading_text = opts.leading_text
    trailing_text = opts.trailing_text
    include_input_dir_path = opts.include_input_dir_path
    output_dir = abspath(opts.output_dir)
    remove_filepath_in_name = opts.remove_filepath_in_name
    print_only = opts.print_only
    
    if remove_filepath_in_name and not include_input_dir_path:
        option_parser.error("If --remove_filepath_in_name is enabled, "
            "--include_input_dir_path must also be enabled.")

    if opts.parameter_fp:
        with open(opts.parameter_fp, 'U') as parameter_f:
            params_dict = parse_qiime_parameters(parameter_f)
        params_str = get_params_str(params_dict['join_paired_ends'])
    else:
        params_dict = {}
        params_str = ""

    create_dir(output_dir)
    
    all_files = []
    extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']
    
    for root, dir, fps in walk(input_dir):
        for fp in fps:
            for extension in extensions:
                if fp.endswith(extension):
                    all_files += [abspath(join(root, fp))]
        
    pairs, bc_pairs = get_pairs(all_files, read1_indicator, read2_indicator,
        match_barcodes, barcode_indicator)

    commands = create_commands_jpe(pairs, output_dir,
        params_str, leading_text, trailing_text, include_input_dir_path,
        remove_filepath_in_name, match_barcodes, bc_pairs)
        
    qiime_config = load_qiime_config()
    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params_dict,
                            qiime_config=qiime_config)
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback=no_status_updates,
                    logger=logger,
                    close_logger_on_success=True)
Exemplo n.º 3
0
def pick_denovo_otus(input_fp, output_dir, new_ref_set_id, otu_picking_method, params, logger):
    try:
        d = params["pick_otus"].copy()
        del d["otu_picking_method"]
    except KeyError:
        pass

    d["uclust_otu_id_prefix"] = "%s.ReferenceOTU" % new_ref_set_id
    params_str = " %s" % get_params_str(d)
    # Build the OTU picking command
    result = "pick_otus.py -i %s -o %s -m %s %s" % (input_fp, output_dir, otu_picking_method, params_str)

    return result
Exemplo n.º 4
0
def pick_denovo_otus(input_fp, output_dir, new_ref_set_id, otu_picking_method,
                     params, logger):
    try:
        d = params['pick_otus'].copy()
        del d['otu_picking_method']
    except KeyError:
        pass

    d['uclust_otu_id_prefix'] = '%s.ReferenceOTU' % new_ref_set_id
    params_str = ' %s' % get_params_str(d)
    # Build the OTU picking command
    result = 'pick_otus.py -i %s -o %s -m %s %s' %\
     (input_fp, output_dir, otu_picking_method, params_str)

    return result
Exemplo n.º 5
0
def pick_denovo_otus(input_fp,
                     output_dir,
                     new_ref_set_id,
                     otu_picking_method,
                     params,
                     logger):
    try:
        d = params['pick_otus'].copy()
        del d['otu_picking_method']
    except KeyError:
        pass

    d['uclust_otu_id_prefix'] = '%s.ReferenceOTU' % new_ref_set_id
    params_str = ' %s' % get_params_str(d)
    # Build the OTU picking command
    result = 'pick_otus.py -i %s -o %s -m %s %s' %\
     (input_fp, output_dir, otu_picking_method, params_str)
    
    return result
def run_core_diversity_analyses(
    biom_fp,
    mapping_fp,
    sampling_depth,
    output_dir,
    qiime_config,
    command_handler=call_commands_serially,
    tree_fp=None,
    params=None,
    categories=None,
    arare_min_rare_depth=10,
    arare_num_steps=10,
    parallel=False,
    suppress_taxa_summary=False,
    suppress_beta_diversity=False,
    suppress_alpha_diversity=False,
    suppress_otu_category_significance=False,
    status_update_callback=print_to_stdout):
    """
    """
    if categories != None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = \
         parse_mapping_file_to_dict(open(mapping_fp,'U'))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError, ("Category '%s' is not a column header "
                 "in your mapping file. "
                 "Categories are case and white space sensitive. Valid "
                 "choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames)))
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError, ("Category '%s' contains only one value. "
                 "Categories analyzed here require at least two values." % c)
            
    else:
        categories= []
    
    # prep some variables
    if params == None:
        params = parse_qiime_parameters([])
        
    create_dir(output_dir)
    index_fp = '%s/index.html' % output_dir
    index_links = []
    commands = []
    
    # begin logging
    old_log_fps = glob(join(output_dir,'log_20*txt'))
    log_fp = generate_log_fp(output_dir)
    index_links.append(('Master run log',log_fp,_index_headers['run_summary']))
    for old_log_fp in old_log_fps:
        index_links.append(('Previous run log',old_log_fp,_index_headers['run_summary']))
    logger = WorkflowLogger(log_fp,
                            params=params,
                            qiime_config=qiime_config)
    input_fps = [biom_fp,mapping_fp]
    if tree_fp != None:
        input_fps.append(tree_fp)
    log_input_md5s(logger,input_fps)

    # run 'biom summarize-table' on input BIOM table
    try:
        params_str = get_params_str(params['biom-summarize-table'])
    except KeyError:
        params_str = ''
    biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir
    if not exists(biom_table_stats_output_fp):
        biom_table_summary_cmd = \
         "biom summarize-table -i %s -o %s --suppress-md5 %s" % \
         (biom_fp, biom_table_stats_output_fp,params_str)
        commands.append([('Generate BIOM table summary',
                          biom_table_summary_cmd)])
    else:
        logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" \
                     % biom_table_stats_output_fp)
    index_links.append(('BIOM table statistics',
                        biom_table_stats_output_fp,
                        _index_headers['run_summary']))
    
    # filter samples with fewer observations than the requested sampling_depth. 
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered 
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    if not exists(filtered_biom_fp):
        filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\
         (biom_fp,filtered_biom_fp,sampling_depth)
        commands.append([('Filter low sequence count samples from table (minimum sequence count: %d)' % sampling_depth,
                          filter_samples_cmd)])
    else:
        logger.write("Skipping filter_samples_from_otu_table.py as %s exists.\n\n" \
                     % filtered_biom_fp)
    biom_fp = filtered_biom_fp
    
    # run initial commands and reset the command list
    if len(commands) > 0:
        command_handler(commands, 
                        status_update_callback, 
                        logger,
                        close_logger_on_success=False)
        commands = []
    
    if not suppress_beta_diversity:
        bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir,sampling_depth)
        # Need to check for the existence of any distance matrices, since the user 
        # can select which will be generated.
        existing_dm_fps = glob('%s/*_dm.txt' % bdiv_even_output_dir)
        if len(existing_dm_fps) == 0:
            even_dm_fps = run_beta_diversity_through_plots(
             otu_table_fp=biom_fp, 
             mapping_fp=mapping_fp,
             output_dir=bdiv_even_output_dir,
             command_handler=command_handler,
             params=params,
             qiime_config=qiime_config,
             sampling_depth=sampling_depth,
             tree_fp=tree_fp,
             parallel=parallel,
             logger=logger,
             suppress_md5=True,
             status_update_callback=status_update_callback)
        else:
            logger.write("Skipping beta_diversity_through_plots.py as %s exist(s).\n\n" \
                         % ', '.join(existing_dm_fps))
            even_dm_fps = [(split(fp)[1].strip('_dm.txt'),fp) for fp in existing_dm_fps]
        
        # Get make_distance_boxplots parameters
        try:
            params_str = get_params_str(params['make_distance_boxplots'])
        except KeyError:
            params_str = ''
        
        for bdiv_metric, dm_fp in even_dm_fps:
            for category in categories:
                boxplots_output_dir = '%s/%s_boxplots/' % (bdiv_even_output_dir,bdiv_metric)
                plot_output_fp = '%s/%s_Distances.pdf' % (boxplots_output_dir,category)
                stats_output_fp = '%s/%s_Stats.txt' % (boxplots_output_dir,category)
                if not exists(plot_output_fp):
                    boxplots_cmd = \
                     'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\
                     (dm_fp, category, boxplots_output_dir, mapping_fp, params_str)
                    commands.append([('Boxplots (%s)' % category,
                                      boxplots_cmd)])
                else:
                    logger.write("Skipping make_distance_boxplots.py for %s as %s exists.\n\n" \
                                 % (category, plot_output_fp))
                index_links.append(('Distance boxplots (%s)' % bdiv_metric,
                                    plot_output_fp,
                                    _index_headers['beta_diversity_even'] % sampling_depth))
                index_links.append(('Distance boxplots statistics (%s)' % bdiv_metric,
                                    stats_output_fp,
                                    _index_headers['beta_diversity_even'] % sampling_depth))
            
            index_links.append(('PCoA plot (%s)' % bdiv_metric,
                                '%s/%s_emperor_pcoa_plot/index.html' % \
                                 (bdiv_even_output_dir,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('Distance matrix (%s)' % bdiv_metric,
                                '%s/%s_dm.txt' % \
                                 (bdiv_even_output_dir,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('Principal coordinate matrix (%s)' % bdiv_metric,
                                '%s/%s_pc.txt' % \
                                 (bdiv_even_output_dir,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
    
    if not suppress_alpha_diversity:
        ## Alpha rarefaction workflow
        arare_full_output_dir = '%s/arare_max%d/' % (output_dir,sampling_depth)
        rarefaction_plots_output_fp = \
         '%s/alpha_rarefaction_plots/rarefaction_plots.html' % arare_full_output_dir
        if not exists(rarefaction_plots_output_fp):
            run_alpha_rarefaction(
             otu_table_fp=biom_fp,
             mapping_fp=mapping_fp,
             output_dir=arare_full_output_dir,
             command_handler=command_handler,
             params=params,
             qiime_config=qiime_config,
             tree_fp=tree_fp,
             num_steps=arare_num_steps,
             parallel=parallel,
             logger=logger,
             min_rare_depth=arare_min_rare_depth,
             max_rare_depth=sampling_depth,
             suppress_md5=True,
             status_update_callback=status_update_callback)
        else:
            logger.write("Skipping alpha_rarefaction.py as %s exists.\n\n" \
                         % rarefaction_plots_output_fp)
    
        index_links.append(('Alpha rarefaction plots',
                            rarefaction_plots_output_fp,
                            _index_headers['alpha_diversity']))
                        
        collated_alpha_diversity_fps = \
         glob('%s/alpha_div_collated/*txt' % arare_full_output_dir)
        try:
            params_str = get_params_str(params['compare_alpha_diversity'])
        except KeyError:
            params_str = ''
            
        for category in categories:
            for collated_alpha_diversity_fp in collated_alpha_diversity_fps:
                alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0]
                alpha_comparison_output_fp = '%s/%s_%s.txt' % \
                 (arare_full_output_dir,category,alpha_metric)
                if not exists(alpha_comparison_output_fp):
                    compare_alpha_cmd = \
                     'compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s' %\
                     (collated_alpha_diversity_fp, mapping_fp, category, 
                      alpha_comparison_output_fp, params_str)
                    commands.append([('Compare alpha diversity (%s, %s)' %\
                                       (category,alpha_metric),
                                      compare_alpha_cmd)])
                else:
                    logger.write("Skipping compare_alpha_diversity.py for %s as %s exists.\n\n" \
                                 % (category, alpha_comparison_output_fp))
                index_links.append(
                 ('Alpha diversity statistics (%s, %s)' % (category,alpha_metric),
                  alpha_comparison_output_fp,
                  _index_headers['alpha_diversity']))
    
    if not suppress_taxa_summary:
        taxa_plots_output_dir = '%s/taxa_plots/' % output_dir
        # need to check for existence of any html files, since the user can 
        # select only certain ones to be generated
        existing_taxa_plot_html_fps = glob(join(output_dir,'taxa_summary_plots','*.html'))
        if len(existing_taxa_plot_html_fps) == 0:
            run_summarize_taxa_through_plots(
             otu_table_fp=biom_fp,
             mapping_fp=mapping_fp,
             output_dir=taxa_plots_output_dir,
             mapping_cat=None, 
             sort=True,
             command_handler=command_handler,
             params=params,
             qiime_config=qiime_config,
             logger=logger,
             suppress_md5=True,
             status_update_callback=status_update_callback)
        else:
            logger.write("Skipping summarize_taxa_through_plots.py for as %s exist(s).\n\n" \
                         % ', '.join(existing_taxa_plot_html_fps))

        index_links.append(('Taxa summary bar plots',
                            '%s/taxa_summary_plots/bar_charts.html'\
                              % taxa_plots_output_dir,
                            _index_headers['taxa_summary']))
        index_links.append(('Taxa summary area plots',
                            '%s/taxa_summary_plots/area_charts.html'\
                              % taxa_plots_output_dir,
                            _index_headers['taxa_summary']))
        for category in categories:
            taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir,category)
            # need to check for existence of any html files, since the user can 
            # select only certain ones to be generated
            existing_taxa_plot_html_fps = glob('%s/taxa_summary_plots/*.html' % taxa_plots_output_dir)
            if len(existing_taxa_plot_html_fps) == 0:
                run_summarize_taxa_through_plots(
                 otu_table_fp=biom_fp,
                 mapping_fp=mapping_fp,
                 output_dir=taxa_plots_output_dir,
                 mapping_cat=category, 
                 sort=True,
                 command_handler=command_handler,
                 params=params,
                 qiime_config=qiime_config,
                 logger=logger,
                 suppress_md5=True,
                 status_update_callback=status_update_callback)
            else:
                logger.write("Skipping summarize_taxa_through_plots.py for %s as %s exist(s).\n\n" \
                             % (category, ', '.join(existing_taxa_plot_html_fps)))

            index_links.append(('Taxa summary bar plots',
                                '%s/taxa_summary_plots/bar_charts.html'\
                                  % taxa_plots_output_dir,
                                _index_headers['taxa_summary_categorical'] % category))
            index_links.append(('Taxa summary area plots',
                                '%s/taxa_summary_plots/area_charts.html'\
                                  % taxa_plots_output_dir,
                                _index_headers['taxa_summary_categorical'] % category))
    
    if not suppress_otu_category_significance:
        try:
            params_str = get_params_str(params['otu_category_significance'])
        except KeyError:
            params_str = ''
        # OTU category significance
        for category in categories:
            category_signifance_fp = \
             '%s/category_significance_%s.txt' % (output_dir, category)
            if not exists(category_signifance_fp):
                # Build the OTU cateogry significance command
                category_significance_cmd = \
                 'otu_category_significance.py -i %s -m %s -c %s -o %s %s' %\
                 (biom_fp, mapping_fp, category, 
                  category_signifance_fp, params_str)
                commands.append([('OTU category significance (%s)' % category, 
                                  category_significance_cmd)])
            else:
                logger.write("Skipping otu_category_significance.py for %s as %s exists.\n\n" \
                             % (category, category_signifance_fp))
            
            index_links.append(('Category significance (%s)' % category,
                        category_signifance_fp,
                        _index_headers['otu_category_sig']))
    filtered_biom_gzip_fp = '%s.gz' % filtered_biom_fp
    if not exists(filtered_biom_gzip_fp):
        commands.append([('Compress the filtered BIOM table','gzip %s' % filtered_biom_fp)])
        index_links.append(('Filtered BIOM table (minimum sequence count: %d)' % sampling_depth,
                            filtered_biom_gzip_fp,
                            _index_headers['run_summary']))
    else:
        logger.write("Skipping compressing of filtered BIOM table as %s exists.\n\n" \
                     % filtered_biom_gzip_fp)
    if len(commands) > 0:
        command_handler(commands, status_update_callback, logger)
    else:
        logger.close()
    
    generate_index_page(index_links,index_fp)
Exemplo n.º 7
0
def pick_reference_otus(
    input_fp, output_dir, otu_picking_method, refseqs_fp, parallel, params, logger, similarity_override=None
):
    params_copy = deepcopy(params)
    if "pick_otus" in params_copy and "refseqs_fp" in params_copy["pick_otus"]:
        raise WorkflowError(
            "Cannot pass pick_otus:refseqs_fp in parameters file. This can only be"
            " passed on the command line or through the API."
        )
    if similarity_override is not None:
        logger.write("Overridding similiary with %1.3f.\n" % similarity_override)
        if "pick_otus" in params_copy:
            params_copy["pick_otus"]["similarity"] = str(similarity_override)
        else:
            params_copy["pick_otus"] = {"similarity": str(similarity_override)}

    if parallel and otu_picking_method == "uclust_ref":
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params_copy["parallel"])
        except KeyError:
            params_str = ""

        # Grab the OTU picker parameters
        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --otu_picking_method
            # option. This works for now though.
            if "otu_picking_method" in params_copy["pick_otus"]:
                del params_copy["pick_otus"]["otu_picking_method"]
        except KeyError:
            pass

        params_str += " %s" % get_params_str(params_copy["pick_otus"])
        otu_picking_script = "parallel_pick_otus_%s.py" % otu_picking_method
        # Build the OTU picking command
        pick_otus_cmd = "%s -i %s -o %s -r %s -T %s" % (
            otu_picking_script,
            input_fp,
            output_dir,
            refseqs_fp,
            params_str,
        )
    else:
        try:
            params_str = get_params_str(params_copy["pick_otus"])
        except KeyError:
            params_str = ""
        # Since this is reference-based OTU picking we always want to
        # suppress new clusters -- force it here.
        params_str += " --suppress_new_clusters"
        logger.write("Forcing --suppress_new_clusters as this is reference-based OTU picking.\n\n")
        # Build the OTU picking command
        pick_otus_cmd = "pick_otus.py -i %s -o %s -r %s -m %s %s" % (
            input_fp,
            output_dir,
            refseqs_fp,
            otu_picking_method,
            params_str,
        )
    return pick_otus_cmd
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(suppress_verbose=True, **script_info)

    input_dir = opts.input_dir
    demultiplexing_method = opts.demultiplexing_method
    parameter_fp = opts.parameter_fp
    read_indicator = opts.read_indicator
    barcode_indicator = opts.barcode_indicator
    mapping_indicator = opts.mapping_indicator
    mapping_extensions = opts.mapping_extensions.split(',')
    sampleid_indicator = opts.sampleid_indicator
    leading_text = opts.leading_text
    trailing_text = opts.trailing_text
    include_input_dir_path = opts.include_input_dir_path
    output_dir = abspath(opts.output_dir)
    remove_filepath_in_name = opts.remove_filepath_in_name
    print_only = opts.print_only

    if remove_filepath_in_name and not include_input_dir_path:
        option_parser.error("If --remove_filepath_in_name enabled, "
            "--include_input_dir_path must be enabled.")

    if opts.parameter_fp:
        with open(opts.parameter_fp, 'U') as parameter_f:
            params_dict = parse_qiime_parameters(parameter_f)
        params_str = get_params_str(params_dict['split_libraries_fastq'])
    else:
        params_dict = {}
        params_str = ""

    create_dir(output_dir)

    all_fastq = []
    all_mapping = []

    extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']

    for root, dir, fps in walk(input_dir):
        for fp in fps:
            for extension in extensions:
                if fp.endswith(extension):
                    all_fastq += [abspath(join(root, fp))]

    if demultiplexing_method == 'mapping_barcode_files':
        for root, dir, fps in walk(input_dir):
            for fp in fps:
                for mapping_extension in mapping_extensions:
                    if fp.endswith(mapping_extension):
                        all_mapping += [abspath(join(root, fp))]

        all_files = get_matching_files(all_fastq, all_mapping,
            read_indicator, barcode_indicator, mapping_indicator)
    else:
        all_files = all_fastq

    commands = create_commands_slf(all_files, demultiplexing_method, output_dir,
        params_str, leading_text, trailing_text, include_input_dir_path,
        remove_filepath_in_name, sampleid_indicator)

    qiime_config = load_qiime_config()
    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params_dict,
                            qiime_config=qiime_config)
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback=no_status_updates,
                    logger=logger,
                    close_logger_on_success=True)
Exemplo n.º 9
0
def align_and_tree(repset_fasta_fp,
                   output_dir,
                   command_handler,
                   params,
                   qiime_config,
                   parallel=False,
                   logger=None,
                   status_update_callback=print_to_stdout):
                   
    input_dir, input_filename = split(repset_fasta_fp)
    input_basename, input_ext = splitext(input_filename)
    commands = []
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False
    
    ## Prep the pynast alignment command
    alignment_method = 'pynast'
    pynast_dir = '%s/%s_aligned_seqs' % (output_dir,alignment_method)
    aln_fp = '%s/%s_aligned.fasta' % (pynast_dir,input_basename)
    failures_fp = '%s/%s_failures.fasta' % (pynast_dir,input_basename)
    if exists(pynast_dir):
        rmtree(pynast_dir)
    
    if parallel:
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params['parallel'])
        except KeyError:
            params_str = ''
        
        # Grab the OTU picker parameters
        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --alignment_method
            # option. This works for now though.
            d = params['align_seqs'].copy()
            if 'alignment_method' in d:
                del d['alignment_method']
            params_str += ' %s' % get_params_str(d)
        except KeyError:
            pass
            
        # Build the parallel pynast alignment command
        align_seqs_cmd = 'parallel_align_seqs_pynast.py -i %s -o %s -T %s' %\
         (repset_fasta_fp, pynast_dir, params_str)
    else:
        try:
            params_str = get_params_str(params['align_seqs'])
        except KeyError:
            params_str = ''
        # Build the pynast alignment command
        align_seqs_cmd = 'align_seqs.py -i %s -o %s %s' %\
         (repset_fasta_fp, pynast_dir, params_str)
    commands.append([('Align sequences', align_seqs_cmd)])
    
    
    ## Prep the alignment filtering command
    filtered_aln_fp = '%s/%s_aligned_pfiltered.fasta' %\
     (pynast_dir,input_basename)
    try:
        params_str = get_params_str(params['filter_alignment'])
    except KeyError:
        params_str = ''
    # Build the alignment filtering command
    filter_alignment_cmd = 'filter_alignment.py -o %s -i %s %s' %\
     (pynast_dir, aln_fp, params_str)
    commands.append([('Filter alignment', filter_alignment_cmd)])
    
    
    ## Prep the tree building command
    tree_fp = '%s/rep_set.tre' % output_dir
    try:
        params_str = get_params_str(params['make_phylogeny'])
    except KeyError:
        params_str = ''
    # Build the tree building command
    make_phylogeny_cmd = 'make_phylogeny.py -i %s -o %s %s' %\
     (filtered_aln_fp, tree_fp,params_str)
    commands.append([('Build phylogenetic tree', make_phylogeny_cmd)])
    if exists(tree_fp):
        remove_files([tree_fp])
    
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
    return failures_fp
Exemplo n.º 10
0
def run_beta_diversity_through_plots(otu_table_fp, 
                                     mapping_fp,
                                     output_dir,
                                     command_handler,
                                     params,
                                     qiime_config,
                                     color_by_interesting_fields_only=True,
                                     sampling_depth=None,
                                     histogram_categories=None,
                                     tree_fp=None,
                                     parallel=False,
                                     logger=None,
                                     suppress_3d_plots=False,
                                     suppress_2d_plots=False,
                                     suppress_md5=False,
                                     status_update_callback=print_to_stdout):
    """ Run the data preparation steps of Qiime 
    
        The steps performed by this function are:
         1) Compute a beta diversity distance matrix;
         2) Peform a principal coordinates analysis on the result of
          Step 1;
         3) Generate a 3D prefs file for optimized coloring of continuous
          variables;
         4) Generate a 3D plot for all mapping fields with colors
          optimized for continuous data;
         5) Generate a 3D plot for all mapping fields with colors
          optimized for discrete data.
    
    """  
    # Prepare some variables for the later steps
    otu_table_dir, otu_table_filename = split(otu_table_fp)
    otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    create_dir(output_dir)
    commands = []
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False
    
    if not suppress_md5:
        log_input_md5s(logger,[otu_table_fp,mapping_fp,tree_fp])
    
    mapping_data, mapping_header, mapping_comments =\
     parse_mapping_file(open(mapping_fp,'U'))
    if histogram_categories:
        invalid_categories = set(histogram_categories) - set(mapping_header)
        if invalid_categories:
            raise ValueError,\
             "Invalid histogram categories - these must exactly match "+\
             "mapping file column headers: %s" % (' '.join(invalid_categories))
    # Get the interesting mapping fields to color by -- if none are
    # interesting, take all of them. Interesting is defined as those
    # which have greater than one value and fewer values than the number 
    # of samples
    if color_by_interesting_fields_only:
        mapping_fields =\
          get_interesting_mapping_fields(mapping_data, mapping_header) or\
          mapping_header
    else:
        mapping_fields = mapping_header
    mapping_fields = ','.join(mapping_fields)
    
    if sampling_depth:
        # Sample the OTU table at even depth
        even_sampled_otu_table_fp = '%s/%s_even%d%s' %\
         (output_dir, otu_table_basename, 
          sampling_depth, otu_table_ext)
        single_rarefaction_cmd = \
         '%s %s/single_rarefaction.py -i %s -o %s -d %d' %\
         (python_exe_fp, script_dir, otu_table_fp,
          even_sampled_otu_table_fp, sampling_depth)
        commands.append([
         ('Sample OTU table at %d seqs/sample' % sampling_depth,
          single_rarefaction_cmd)])
        otu_table_fp = even_sampled_otu_table_fp
        otu_table_dir, otu_table_filename = split(even_sampled_otu_table_fp)
        otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    try:
        beta_diversity_metrics = params['beta_diversity']['metrics'].split(',')
    except KeyError:
        beta_diversity_metrics = ['weighted_unifrac','unweighted_unifrac']
    
    # Prep the 3d prefs file generator command
    prefs_fp = '%s/prefs.txt' % output_dir
    try:
        params_str = get_params_str(params['make_prefs_file'])
    except KeyError:
        params_str = ''
    if not 'mapping_headers_to_use' in params['make_prefs_file']:
        params_str = '%s --mapping_headers_to_use %s' \
         % (params_str,mapping_fields)
    # Build the 3d prefs file generator command
    prefs_cmd = \
     '%s %s/make_prefs_file.py -m %s -o %s %s' %\
     (python_exe_fp, script_dir, mapping_fp, prefs_fp, params_str)
    commands.append([('Build prefs file', prefs_cmd)])
    
    dm_fps = []
    for beta_diversity_metric in beta_diversity_metrics:
        
        # Prep the beta-diversity command
        try:
            bdiv_params_copy = params['beta_diversity'].copy()
        except KeyError:
            bdiv_params_copy = {}
        try:
            del bdiv_params_copy['metrics']
        except KeyError:
            pass
        
        params_str = get_params_str(bdiv_params_copy)
            
        if tree_fp:
            params_str = '%s -t %s ' % (params_str,tree_fp)
            
        # Build the beta-diversity command
        if parallel:
            # Grab the parallel-specific parameters
            try:
                params_str += get_params_str(params['parallel'])
            except KeyError:
                pass
            beta_div_cmd = '%s %s/parallel_beta_diversity.py -i %s -o %s --metrics %s -T %s' %\
             (python_exe_fp, script_dir, otu_table_fp,
              output_dir, beta_diversity_metric, params_str)
            commands.append(\
             [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)])
        else:
            beta_div_cmd = '%s %s/beta_diversity.py -i %s -o %s --metrics %s %s' %\
             (python_exe_fp, script_dir, otu_table_fp, 
              output_dir, beta_diversity_metric, params_str)
            commands.append(\
             [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)])
        
        
        orig_beta_div_fp = '%s/%s_%s.txt' % \
         (output_dir, beta_diversity_metric, otu_table_basename)
        beta_div_fp = '%s/%s_dm.txt' % \
         (output_dir, beta_diversity_metric)
        commands.append([('Rename distance matrix (%s)' % beta_diversity_metric,
                         'mv %s %s' % (orig_beta_div_fp, beta_div_fp))])
        dm_fps.append((beta_diversity_metric, beta_div_fp))
        
        # Prep the principal coordinates command
        pc_fp = '%s/%s_pc.txt' % (output_dir, beta_diversity_metric)
        try:
            params_str = get_params_str(params['principal_coordinates'])
        except KeyError:
            params_str = ''
        # Build the principal coordinates command
        pc_cmd = '%s %s/principal_coordinates.py -i %s -o %s %s' %\
         (python_exe_fp, script_dir, beta_div_fp, pc_fp, params_str)
        commands.append(\
         [('Principal coordinates (%s)' % beta_diversity_metric, pc_cmd)])
        
        # Generate 3d plots
        if not suppress_3d_plots:
            # Prep the continuous-coloring 3d plots command
            continuous_3d_dir = '%s/%s_3d_continuous/' %\
             (output_dir, beta_diversity_metric)
            create_dir(continuous_3d_dir)
            try:
                params_str = get_params_str(params['make_3d_plots'])
            except KeyError:
                params_str = ''
            # Build the continuous-coloring 3d plots command
            continuous_3d_command = \
             '%s %s/make_3d_plots.py -p %s -i %s -o %s -m %s %s' %\
              (python_exe_fp, script_dir, prefs_fp, pc_fp, continuous_3d_dir,
               mapping_fp, params_str)
    
            # Prep the discrete-coloring 3d plots command
            discrete_3d_dir = '%s/%s_3d_discrete/' %\
             (output_dir, beta_diversity_metric)
            create_dir(discrete_3d_dir)
            try:
                params_str = get_params_str(params['make_3d_plots'])
            except KeyError:
                params_str = ''
            # Build the discrete-coloring 3d plots command
            discrete_3d_command = \
             '%s %s/make_3d_plots.py -b "%s" -i %s -o %s -m %s %s' %\
              (python_exe_fp, script_dir, mapping_fields, pc_fp, discrete_3d_dir,
               mapping_fp, params_str)
       
            commands.append([\
              ('Make 3D plots (continuous coloring, %s)' %\
                beta_diversity_metric,continuous_3d_command),\
              ('Make 3D plots (discrete coloring, %s)' %\
                beta_diversity_metric,discrete_3d_command,)])
    
        # Generate 3d plots
        if not suppress_2d_plots:
            # Prep the continuous-coloring 3d plots command
            continuous_2d_dir = '%s/%s_2d_continuous/' %\
             (output_dir, beta_diversity_metric)
            create_dir(continuous_2d_dir)
            try:
                params_str = get_params_str(params['make_2d_plots'])
            except KeyError:
                params_str = ''
            # Build the continuous-coloring 3d plots command
            continuous_2d_command = \
             '%s %s/make_2d_plots.py -p %s -i %s -o %s -m %s %s' %\
              (python_exe_fp, script_dir, prefs_fp, pc_fp, continuous_2d_dir,
               mapping_fp, params_str)
               
            # Prep the discrete-coloring 3d plots command
            discrete_2d_dir = '%s/%s_2d_discrete/' %\
             (output_dir, beta_diversity_metric)
            create_dir(discrete_2d_dir)
            try:
                params_str = get_params_str(params['make_2d_plots'])
            except KeyError:
                params_str = ''
            # Build the discrete-coloring 2d plots command
            discrete_2d_command = \
             '%s %s/make_2d_plots.py -b "%s" -i %s -o %s -m %s %s' %\
              (python_exe_fp, script_dir, mapping_fields, pc_fp, discrete_2d_dir,
               mapping_fp, params_str)
       
            commands.append([\
              ('Make 2D plots (continuous coloring, %s)' %\
                beta_diversity_metric,continuous_2d_command),\
              ('Make 2D plots (discrete coloring, %s)' %\
                beta_diversity_metric,discrete_2d_command,)])
                
        if histogram_categories:
            # Prep the discrete-coloring 3d plots command
            histograms_dir = '%s/%s_histograms/' %\
             (output_dir, beta_diversity_metric)
            create_dir(histograms_dir)
            try:
                params_str = get_params_str(params['make_distance_histograms'])
            except KeyError:
                params_str = ''
            # Build the make_distance_histograms command
            distance_histograms_command = \
             '%s %s/make_distance_histograms.py -d %s -o %s -m %s -f "%s" %s' %\
              (python_exe_fp, script_dir, beta_div_fp, 
               histograms_dir, mapping_fp, 
               ','.join(histogram_categories), params_str)
       
            commands.append([\
              ('Make Distance Histograms (%s)' %\
                beta_diversity_metric,distance_histograms_command)])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
    
    return dm_fps
Exemplo n.º 11
0
def align_and_tree(repset_fasta_fp,
                   output_dir,
                   command_handler,
                   params,
                   qiime_config,
                   parallel=False,
                   logger=None,
                   status_update_callback=print_to_stdout):

    input_dir, input_filename = split(repset_fasta_fp)
    input_basename, input_ext = splitext(input_filename)
    commands = []
    if logger is None:
        log_fp = generate_log_fp(output_dir)
        logger = WorkflowLogger(log_fp,
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    # Prep the pynast alignment command
    alignment_method = 'pynast'
    pynast_dir = '%s/%s_aligned_seqs' % (output_dir, alignment_method)
    aln_fp = '%s/%s_aligned.fasta' % (pynast_dir, input_basename)
    failures_fp = '%s/%s_failures.fasta' % (pynast_dir, input_basename)
    if exists(pynast_dir):
        rmtree(pynast_dir)

    if parallel:
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params['parallel'])
        except KeyError:
            params_str = ''

        # Grab the OTU picker parameters
        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --alignment_method
            # option. This works for now though.
            d = params['align_seqs'].copy()
            if 'alignment_method' in d:
                del d['alignment_method']
            params_str += ' %s' % get_params_str(d)
        except KeyError:
            pass

        # Build the parallel pynast alignment command
        align_seqs_cmd = 'parallel_align_seqs_pynast.py -i %s -o %s -T %s' %\
            (repset_fasta_fp, pynast_dir, params_str)
    else:
        try:
            params_str = get_params_str(params['align_seqs'])
        except KeyError:
            params_str = ''
        # Build the pynast alignment command
        align_seqs_cmd = 'align_seqs.py -i %s -o %s %s' %\
            (repset_fasta_fp, pynast_dir, params_str)
    commands.append([('Align sequences', align_seqs_cmd)])

    # Prep the alignment filtering command
    filtered_aln_fp = '%s/%s_aligned_pfiltered.fasta' %\
        (pynast_dir, input_basename)
    try:
        params_str = get_params_str(params['filter_alignment'])
    except KeyError:
        params_str = ''
    # Build the alignment filtering command
    filter_alignment_cmd = 'filter_alignment.py -o %s -i %s %s' %\
        (pynast_dir, aln_fp, params_str)
    commands.append([('Filter alignment', filter_alignment_cmd)])

    # Prep the tree building command
    tree_fp = '%s/rep_set.tre' % output_dir
    try:
        params_str = get_params_str(params['make_phylogeny'])
    except KeyError:
        params_str = ''
    # Build the tree building command
    make_phylogeny_cmd = 'make_phylogeny.py -i %s -o %s %s' %\
        (filtered_aln_fp, tree_fp, params_str)
    commands.append([('Build phylogenetic tree', make_phylogeny_cmd)])
    if exists(tree_fp):
        remove_files([tree_fp])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
    return failures_fp
Exemplo n.º 12
0
def run_pick_closed_reference_otus(input_fp,
                                   refseqs_fp,
                                   output_dir,
                                   taxonomy_fp,
                                   command_handler,
                                   params,
                                   qiime_config,
                                   assign_taxonomy=False,
                                   parallel=False,
                                   logger=None,
                                   suppress_md5=False,
                                   status_update_callback=print_to_stdout):
    """ Run the data preparation steps of Qiime

        The steps performed by this function are:
          1) Pick OTUs;
          2) If assignment_taxonomy is True, choose representative sequence
             for OTUs and assign taxonomy using a classifier.
          3) Build an OTU table with optional predefined taxonomy
             (if assign_taxonomy=False) or taxonomic assignments from step 2
             (if assign_taxonomy=True).

    """

    # confirm that a valid otu picking method was supplied before doing
    # any work
    reference_otu_picking_methods = [
        'blast', 'uclust_ref', 'usearch61_ref', 'usearch_ref', 'sortmerna'
    ]

    try:
        otu_picking_method = params['pick_otus']['otu_picking_method']
    except KeyError:
        otu_picking_method = 'uclust_ref'
    assert otu_picking_method in reference_otu_picking_methods,\
        "Invalid OTU picking method supplied: %s. Valid choices are: %s"\
        % (otu_picking_method, ' '.join(reference_otu_picking_methods))

    # Prepare some variables for the later steps
    input_dir, input_filename = split(input_fp)
    input_basename, input_ext = splitext(input_filename)
    create_dir(output_dir)
    commands = []
    if logger is None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    if not suppress_md5:
        log_input_md5s(logger, [input_fp, refseqs_fp, taxonomy_fp])

    # Prep the OTU picking command
    pick_otu_dir = '%s/%s_picked_otus' % (output_dir, otu_picking_method)
    otu_fp = '%s/%s_otus.txt' % (pick_otu_dir, input_basename)
    if parallel and (otu_picking_method == 'blast' or otu_picking_method
                     == 'uclust_ref' or otu_picking_method == 'usearch61_ref'):
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params['parallel'])
        except KeyError:
            params_str = ''

        # Grab the OTU picker parameters
        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --alignment_method
            # option. This works for now though.
            d = params['pick_otus'].copy()
            if 'otu_picking_method' in d:
                del d['otu_picking_method']
            params_str += ' %s' % get_params_str(d)
        except KeyError:
            pass
        otu_picking_script = 'parallel_pick_otus_%s.py' % otu_picking_method
        # Build the OTU picking command
        pick_otus_cmd = '%s -i %s -o %s -r %s -T %s' %\
            (otu_picking_script,
             input_fp,
             pick_otu_dir,
             refseqs_fp,
             params_str)
    else:
        try:
            params_str = get_params_str(params['pick_otus'])
        except KeyError:
            params_str = ''
        # Since this is reference-based OTU picking we always want to
        # suppress new clusters -- force it here.
        params_str += ' --suppress_new_clusters'
        logger.write("Forcing --suppress_new_clusters as this is "
                     "closed-reference OTU picking.\n\n")
        # Build the OTU picking command
        pick_otus_cmd = 'pick_otus.py -i %s -o %s -r %s -m %s %s' %\
            (input_fp,
             pick_otu_dir,
             refseqs_fp,
             otu_picking_method,
             params_str)

    commands.append([('Pick OTUs', pick_otus_cmd)])

    # Assign taxonomy using a taxonomy classifier, if request by the user.
    # (Alternatively predefined taxonomic assignments will be used, if provided.)
    if assign_taxonomy:
        # Prep the representative set picking command
        rep_set_dir = '%s/rep_set/' % output_dir
        create_dir(rep_set_dir)
        rep_set_fp = '%s/%s_rep_set.fasta' % (rep_set_dir, input_basename)
        rep_set_log_fp = '%s/%s_rep_set.log' % (rep_set_dir, input_basename)

        try:
            params_str = get_params_str(params['pick_rep_set'])
        except KeyError:
            params_str = ''
        # Build the representative set picking command
        pick_rep_set_cmd = 'pick_rep_set.py -i %s -f %s -l %s -o %s %s' %\
            (otu_fp, input_fp, rep_set_log_fp, rep_set_fp, params_str)
        commands.append([('Pick representative set', pick_rep_set_cmd)])

        # Prep the taxonomy assignment command
        try:
            assignment_method = params['assign_taxonomy']['assignment_method']
        except KeyError:
            assignment_method = 'uclust'
        assign_taxonomy_dir = '%s/%s_assigned_taxonomy' %\
            (output_dir, assignment_method)
        taxonomy_fp = '%s/%s_rep_set_tax_assignments.txt' % \
            (assign_taxonomy_dir, input_basename)
        if parallel and (assignment_method == 'rdp' or assignment_method
                         == 'blast' or assignment_method == 'uclust'):
            # Grab the parallel-specific parameters
            try:
                params_str = get_params_str(params['parallel'])
            except KeyError:
                params_str = ''

            # Grab the taxonomy assignment parameters
            try:
                # Want to find a cleaner strategy for this: the parallel script
                # is method-specific, so doesn't take a --assignment_method
                # option. This works for now though.
                d = params['assign_taxonomy'].copy()
                if 'assignment_method' in d:
                    del d['assignment_method']
                params_str += ' %s' % get_params_str(d)
            except KeyError:
                pass

            # Build the parallel taxonomy assignment command
            assign_taxonomy_cmd = \
                'parallel_assign_taxonomy_%s.py -i %s -o %s -T %s' %\
                (assignment_method, rep_set_fp, assign_taxonomy_dir, params_str)
        else:
            try:
                params_str = get_params_str(params['assign_taxonomy'])
            except KeyError:
                params_str = ''
            # Build the taxonomy assignment command
            assign_taxonomy_cmd = 'assign_taxonomy.py -o %s -i %s %s' %\
                (assign_taxonomy_dir, rep_set_fp, params_str)

        commands.append([('Assign taxonomy', assign_taxonomy_cmd)])

    # Prep the OTU table building command
    otu_table_fp = '%s/otu_table.biom' % output_dir
    try:
        params_str = get_params_str(params['make_otu_table'])
    except KeyError:
        params_str = ''
    # If assign_taxonomy is True, this will be the path to the taxonomic
    # assignment results. If assign_taxonomy is False this will be either
    # the precomputed taxonomic assignments that the user passed in,
    # or None.
    if taxonomy_fp:
        taxonomy_str = '-t %s' % taxonomy_fp
    else:
        taxonomy_str = ''
    # Build the OTU table building command
    make_otu_table_cmd = 'make_otu_table.py -i %s %s -o %s %s' %\
        (otu_fp, taxonomy_str, otu_table_fp, params_str)

    commands.append([('Make OTU table', make_otu_table_cmd)])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
Exemplo n.º 13
0
def run_summarize_taxa_through_plots(otu_table_fp,
                                     mapping_fp,
                                     output_dir,
                                     mapping_cat,
                                     sort,
                                     command_handler,
                                     params,
                                     qiime_config,
                                     logger=None,
                                     suppress_md5=False,
                                     status_update_callback=print_to_stdout):
    """ Run the data preparation for summarizing taxonomies and generating plots

        The steps performed by this function are:
          1) Summarize OTU by Category
          2) Summarize Taxonomy
          3) Plot Taxonomy Summary

    """
    # Prepare some variables for the later steps
    otu_table_dir, otu_table_filename = split(otu_table_fp)
    otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    create_dir(output_dir)

    commands = []

    if logger is None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    if not suppress_md5:
        log_input_md5s(logger, [otu_table_fp, mapping_fp])

    # if mapping category not passed via command-line,
    # check if it is passed in params file
    if not mapping_cat:
        try:
            mapping_cat = params['collapse_samples']['collapse_fields']
        except:
            mapping_cat = None

    try:
        params_str = get_params_str(params['collapse_samples'])
        # Need to remove the mapping category option, since it is defined above.
        # Using this method since we don't want to change the params dict
        split_params = params_str.split('--')
        updated_params_str = []
        for i in split_params:
            if not i.startswith('collapse_fields'):
                updated_params_str.append(i)
        params_str = '--'.join(updated_params_str)
    except:
        params_str = ''

    if mapping_cat:
        base_filename = mapping_cat.replace(' ', '-').replace(',', '')
        output_biom_fp = join(output_dir, '%s_otu_table.biom' % base_filename)
        output_map_fp = join(output_dir, '%s_map.txt' % base_filename)
        # Build the collapse samples command
        collapse_samples_cmd = \
            "collapse_samples.py -m %s -b %s --output_biom_fp %s --output_mapping_fp %s --collapse_fields '%s' %s" %\
            (mapping_fp, otu_table_fp, output_biom_fp, output_map_fp, mapping_cat, params_str)

        commands.append([('Collapse samples in OTU table by categories',
                          collapse_samples_cmd)])

        otu_table_fp = output_biom_fp

    # Build the sort OTU table command
    if sort:
        # Prep the sort_otu_table command
        try:
            params_str = get_params_str(params['sort_otu_table'])
        except:
            params_str = ''

        # define output otu table
        sorted_fp = join(output_dir,
                         splitext(split(otu_table_fp)[-1])[0] + '_sorted.biom')

        if mapping_cat or params_str == '':
            # for this case we don't have a collapsed mapping file so must
            # handle separately
            sort_otu_table_cmd = \
                "sort_otu_table.py -i %s -o %s" % (otu_table_fp, sorted_fp)
        else:
            sort_otu_table_cmd = \
                "sort_otu_table.py -i %s -o %s -m %s %s" %\
                (otu_table_fp, sorted_fp, mapping_fp, params_str)

        commands.append([('Sort OTU Table', sort_otu_table_cmd)])

        # redefine otu_table_fp to use
        otu_table_fp = sorted_fp

    # Prep the summarize taxonomy command
    try:
        params_str = get_params_str(params['summarize_taxa'])
    except:
        params_str = ''

    try:
        sum_taxa_levels = params['summarize_taxa']['level']
    except:
        sum_taxa_levels = None

    # Build the summarize taxonomy command
    summarize_taxa_cmd = 'summarize_taxa.py -i %s -o %s %s' %\
        (otu_table_fp, output_dir, params_str)

    commands.append([('Summarize Taxonomy', summarize_taxa_cmd)])

    sum_taxa_fps = []

    if sum_taxa_levels:
        basename = join(output_dir, splitext(split(otu_table_fp)[-1])[0])
        for i in sum_taxa_levels.split(','):
            sum_taxa_fps.append(basename + '_L%s.txt' % (str(i)))
    else:
        basename = join(output_dir, splitext(split(otu_table_fp)[-1])[0])
        # this is the default levels from summarize_taxa, but cannot import
        # script to get these values
        for i in [2, 3, 4, 5, 6]:
            sum_taxa_fps.append(basename + '_L%s.txt' % (str(i)))

    # Prep the plot taxa summary plot command(s)
    taxa_summary_plots_dir = '%s/taxa_summary_plots/' % output_dir
    create_dir(taxa_summary_plots_dir)

    try:
        params_str = get_params_str(params['plot_taxa_summary'])
    except:
        params_str = ''
    # Build the plot taxa summary plot command(s)

    plot_taxa_summary_cmd =\
        'plot_taxa_summary.py -i %s -o %s %s' %\
        (','.join(sum_taxa_fps), taxa_summary_plots_dir, params_str)

    commands.append([('Plot Taxonomy Summary', plot_taxa_summary_cmd)])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
Exemplo n.º 14
0
def run_jackknifed_beta_diversity(otu_table_fp,
                                  tree_fp,
                                  seqs_per_sample,
                                  output_dir,
                                  command_handler,
                                  params,
                                  qiime_config,
                                  mapping_fp,
                                  parallel=False,
                                  logger=None,
                                  suppress_md5=False,
                                  status_update_callback=print_to_stdout,
                                  master_tree=None):
    """ Run the data preparation steps of Qiime

        The steps performed by this function are:
          1) Compute beta diversity distance matrix from otu table (and
           tree, if applicable)
          2) Build rarefied OTU tables;
          3) Build UPGMA tree from full distance matrix;
          4) Compute distance matrics for rarefied OTU tables;
          5) Build UPGMA trees from rarefied OTU table distance matrices;
          5.5) Build a consensus tree from the rarefied UPGMA trees
          6) Compare rarefied OTU table distance matrix UPGMA trees
           to tree full UPGMA tree and write support file and newick tree
           with support values as node labels.

        master_tree can be 'full' or 'consensus', default full
    """
    # Prepare some variables for the later steps
    if master_tree is None:
        master_tree = 'full'
    otu_table_dir, otu_table_filename = split(otu_table_fp)
    otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    create_dir(output_dir)
    commands = []
    if logger is None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    if not suppress_md5:
        log_input_md5s(logger, [otu_table_fp, mapping_fp, tree_fp])

    try:
        beta_diversity_metrics = params['beta_diversity']['metrics'].split(',')
    except KeyError:
        beta_diversity_metrics = ['weighted_unifrac', 'unweighted_unifrac']

    # Prep the beta-diversity command
    try:
        params_str = get_params_str(params['beta_diversity'])
    except KeyError:
        params_str = ''
    if tree_fp:
        params_str = '%s -t %s' % (params_str, tree_fp)
    # Build the beta-diversity command
    beta_div_cmd = 'beta_diversity.py -i %s -o %s %s' %\
        (otu_table_fp, output_dir, params_str)
    commands.append([
        ('Beta Diversity (%s)' % ', '.join(beta_diversity_metrics),
         beta_div_cmd)
    ])

    # Prep rarefaction command
    rarefaction_dir = '%s/rarefaction/' % output_dir
    create_dir(rarefaction_dir)
    try:
        params_str = get_params_str(params['multiple_rarefactions_even_depth'])
    except KeyError:
        params_str = ''
    # Build the rarefaction command
    rarefaction_cmd = \
        'multiple_rarefactions_even_depth.py -i %s -d %d -o %s %s' %\
        (otu_table_fp, seqs_per_sample, rarefaction_dir, params_str)
    commands.append([('Rarefaction', rarefaction_cmd)])

    # Begin iterating over beta diversity distance metrics, if more than one
    # was provided
    for beta_diversity_metric in beta_diversity_metrics:
        metric_output_dir = '%s/%s/' % (output_dir, beta_diversity_metric)
        distance_matrix_fp = '%s/%s_%s.txt' % \
            (output_dir, beta_diversity_metric, otu_table_basename)

        # Prep the hierarchical clustering command (for full distance matrix)
        full_tree_fp = '%s/%s_upgma.tre' % (metric_output_dir,
                                            otu_table_basename)
        try:
            params_str = get_params_str(params['upgma_cluster'])
        except KeyError:
            params_str = ''
        # Build the hierarchical clustering command (for full distance matrix)
        hierarchical_cluster_cmd = 'upgma_cluster.py -i %s -o %s %s' %\
            (distance_matrix_fp, full_tree_fp, params_str)
        commands.append([
            ('UPGMA on full distance matrix: %s' % beta_diversity_metric,
             hierarchical_cluster_cmd)
        ])

        # Prep the beta diversity command (for rarefied OTU tables)
        dm_dir = '%s/rare_dm/' % metric_output_dir
        create_dir(dm_dir)
        # the metrics parameter needs to be ignored as we need to run
        # beta_diversity one metric at a time to keep the per-metric
        # output files in separate directories
        try:
            d = params['beta_diversity'].copy()
            del d['metrics']
        except KeyError:
            params_str = {}
        params_str = get_params_str(d) + ' -m %s ' % beta_diversity_metric
        if tree_fp:
            params_str = '%s -t %s' % (params_str, tree_fp)
        if parallel:
            params_str += ' %s' % get_params_str(params['parallel'])
            # Build the parallel beta diversity command (for rarefied OTU
            # tables)
            beta_div_rarefied_cmd = \
                'parallel_beta_diversity.py -T -i %s -o %s %s' %\
                (rarefaction_dir, dm_dir, params_str)
        else:
            # Build the serial beta diversity command (for rarefied OTU tables)
            beta_div_rarefied_cmd = \
                'beta_diversity.py -i %s -o %s %s' %\
                (rarefaction_dir, dm_dir, params_str)
        commands.append([('Beta diversity on rarefied OTU tables (%s)' %
                          beta_diversity_metric, beta_div_rarefied_cmd)])

        # Prep the hierarchical clustering command (for rarefied
        # distance matrices)
        upgma_dir = '%s/rare_upgma/' % metric_output_dir
        create_dir(upgma_dir)

        try:
            params_str = get_params_str(params['upgma_cluster'])
        except KeyError:
            params_str = ''
        # Build the hierarchical clustering command (for rarefied
        # distance matrices)
        hierarchical_cluster_cmd =\
            'upgma_cluster.py -i %s -o %s %s' % (dm_dir, upgma_dir, params_str)
        commands.append([
            ('UPGMA on rarefied distance matrix (%s)' % beta_diversity_metric,
             hierarchical_cluster_cmd)
        ])

        # Build the consensus tree command
        consensus_tree_cmd =\
            'consensus_tree.py -i %s -o %s %s' %\
            (upgma_dir, metric_output_dir + "/rare_upgma_consensus.tre",
             params_str)
        commands.append([('consensus on rarefied distance matrices (%s)' %
                          beta_diversity_metric, consensus_tree_cmd)])

        # Prep the tree compare command
        tree_compare_dir = '%s/upgma_cmp/' % metric_output_dir
        create_dir(tree_compare_dir)
        try:
            params_str = get_params_str(params['tree_compare'])
        except KeyError:
            params_str = ''

        # Build the tree compare command
        if master_tree == "full":
            master_tree_fp = full_tree_fp
        elif master_tree == "consensus":
            master_tree_fp = metric_output_dir + "/rare_upgma_consensus.tre"
        else:
            raise RuntimeError('master tree method "%s" not found' %
                               (master_tree, ))
        tree_compare_cmd = 'tree_compare.py -s %s -m %s -o %s %s' %\
            (upgma_dir, master_tree_fp, tree_compare_dir, params_str)
        commands.append([('Tree compare (%s)' % beta_diversity_metric,
                          tree_compare_cmd)])

        # Prep the PCoA command
        pcoa_dir = '%s/pcoa/' % metric_output_dir
        create_dir(pcoa_dir)
        try:
            params_str = get_params_str(params['principal_coordinates'])
        except KeyError:
            params_str = ''
        # Build the PCoA command
        pcoa_cmd = 'principal_coordinates.py -i %s -o %s %s' %\
            (dm_dir, pcoa_dir, params_str)
        commands.append([('Principal coordinates (%s)' % beta_diversity_metric,
                          pcoa_cmd)])

        # Prep the emperor plots command
        emperor_dir = '%s/emperor_pcoa_plots/' % metric_output_dir
        create_dir(emperor_dir)
        try:
            params_str = get_params_str(params['make_emperor'])
        except KeyError:
            params_str = ''
        emperor_cmd = 'make_emperor.py -i %s -o %s -m %s %s' %\
            (pcoa_dir, emperor_dir, mapping_fp, params_str)
        commands.append([('emperor plots (%s)' % beta_diversity_metric,
                          emperor_cmd)])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
Exemplo n.º 15
0
def run_beta_diversity_through_plots(otu_table_fp,
                                     mapping_fp,
                                     output_dir,
                                     command_handler,
                                     params,
                                     qiime_config,
                                     color_by_interesting_fields_only=True,
                                     sampling_depth=None,
                                     tree_fp=None,
                                     parallel=False,
                                     logger=None,
                                     suppress_emperor_plots=False,
                                     suppress_md5=False,
                                     status_update_callback=print_to_stdout):
    """ Compute beta diversity distance matrices, run PCoA, and generate emperor plots

        The steps performed by this function are:
         1) Compute a beta diversity distance matrix for each metric
         2) Peform a principal coordinates analysis on the result of step 1
         3) Generate an emperor plot for each result of step 2

    """
    # Prepare some variables for the later steps
    otu_table_dir, otu_table_filename = split(otu_table_fp)
    otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    create_dir(output_dir)
    commands = []
    if logger is None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    if not suppress_md5:
        log_input_md5s(logger, [otu_table_fp, mapping_fp, tree_fp])

    mapping_data, mapping_header, mapping_comments =\
        parse_mapping_file(open(mapping_fp, 'U'))

    # Get the interesting mapping fields to color by -- if none are
    # interesting, take all of them. Interesting is defined as those
    # which have greater than one value and fewer values than the number
    # of samples
    if color_by_interesting_fields_only:
        mapping_fields =\
            get_interesting_mapping_fields(mapping_data, mapping_header) or\
            mapping_header
    else:
        mapping_fields = mapping_header
    mapping_fields = ','.join(mapping_fields)

    if sampling_depth:
        # Sample the OTU table at even depth
        even_sampled_otu_table_fp = '%s/%s_even%d%s' %\
            (output_dir, otu_table_basename,
             sampling_depth, otu_table_ext)
        single_rarefaction_cmd = \
            'single_rarefaction.py -i %s -o %s -d %d' %\
            (otu_table_fp, even_sampled_otu_table_fp, sampling_depth)
        commands.append([
            ('Sample OTU table at %d seqs/sample' % sampling_depth,
             single_rarefaction_cmd)
        ])
        otu_table_fp = even_sampled_otu_table_fp
        otu_table_dir, otu_table_filename = split(even_sampled_otu_table_fp)
        otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    try:
        beta_diversity_metrics = params['beta_diversity']['metrics'].split(',')
    except KeyError:
        beta_diversity_metrics = ['weighted_unifrac', 'unweighted_unifrac']

    dm_fps = []
    for beta_diversity_metric in beta_diversity_metrics:

        # Prep the beta-diversity command
        try:
            bdiv_params_copy = params['beta_diversity'].copy()
        except KeyError:
            bdiv_params_copy = {}
        try:
            del bdiv_params_copy['metrics']
        except KeyError:
            pass

        params_str = get_params_str(bdiv_params_copy)

        if tree_fp:
            params_str = '%s -t %s ' % (params_str, tree_fp)

        # Build the beta-diversity command
        if parallel:
            # Grab the parallel-specific parameters
            try:
                params_str += get_params_str(params['parallel'])
            except KeyError:
                pass
            beta_div_cmd = 'parallel_beta_diversity.py -i %s -o %s --metrics %s -T %s' %\
                (otu_table_fp, output_dir, beta_diversity_metric, params_str)
            commands.append([('Beta Diversity (%s)' % beta_diversity_metric,
                              beta_div_cmd)])
        else:
            beta_div_cmd = 'beta_diversity.py -i %s -o %s --metrics %s %s' %\
                (otu_table_fp, output_dir, beta_diversity_metric, params_str)
            commands.append([('Beta Diversity (%s)' % beta_diversity_metric,
                              beta_div_cmd)])

        orig_beta_div_fp = '%s/%s_%s.txt' % \
            (output_dir, beta_diversity_metric, otu_table_basename)
        beta_div_fp = '%s/%s_dm.txt' % \
            (output_dir, beta_diversity_metric)
        commands.append([
            ('Rename distance matrix (%s)' % beta_diversity_metric,
             'mv %s %s' % (orig_beta_div_fp, beta_div_fp))
        ])
        dm_fps.append((beta_diversity_metric, beta_div_fp))

        # Prep the principal coordinates command
        pc_fp = '%s/%s_pc.txt' % (output_dir, beta_diversity_metric)
        try:
            params_str = get_params_str(params['principal_coordinates'])
        except KeyError:
            params_str = ''
        # Build the principal coordinates command
        pc_cmd = 'principal_coordinates.py -i %s -o %s %s' %\
            (beta_div_fp, pc_fp, params_str)
        commands.append([('Principal coordinates (%s)' % beta_diversity_metric,
                          pc_cmd)])

        # Generate emperor plots
        if not suppress_emperor_plots:
            # Prep the emperor plots command
            emperor_dir = '%s/%s_emperor_pcoa_plot/' % (output_dir,
                                                        beta_diversity_metric)
            create_dir(emperor_dir)
            try:
                params_str = get_params_str(params['make_emperor'])
            except KeyError:
                params_str = ''
            # Build the continuous-coloring 3d plots command
            emperor_command = \
                'make_emperor.py -i %s -o %s -m %s %s' % (pc_fp,
                                                          emperor_dir,
                                                          mapping_fp,
                                                          params_str)

            commands.append([
                ('Make emperor plots, %s)' % beta_diversity_metric,
                 emperor_command)
            ])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)

    return dm_fps
Exemplo n.º 16
0
def run_alpha_rarefaction(otu_table_fp,
                          mapping_fp,
                          output_dir,
                          command_handler,
                          params,
                          qiime_config,
                          tree_fp=None,
                          num_steps=10,
                          parallel=False,
                          logger=None,
                          min_rare_depth=10,
                          max_rare_depth=None,
                          suppress_md5=False,
                          status_update_callback=print_to_stdout,
                          plot_stderr_and_stddev=False,
                          retain_intermediate_files=True):
    """ Run the data preparation steps of Qiime

        The steps performed by this function are:
          1) Generate rarefied OTU tables;
          2) Compute alpha diversity metrics for each rarefied OTU table;
          3) Collate alpha diversity results;
          4) Generate alpha rarefaction plots.

    """
    # Prepare some variables for the later steps
    otu_table_dir, otu_table_filename = split(otu_table_fp)
    otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    create_dir(output_dir)
    commands = []
    if logger is None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    if not suppress_md5:
        log_input_md5s(logger, [otu_table_fp, mapping_fp, tree_fp])

    if max_rare_depth is None:
        min_count, max_count, median_count, mean_count, counts_per_sample =\
            compute_counts_per_sample_stats(
                load_table(otu_table_fp))
        max_rare_depth = median_count
    step = int((max_rare_depth - min_rare_depth) / num_steps) or 1
    max_rare_depth = int(max_rare_depth)

    rarefaction_dir = '%s/rarefaction/' % output_dir
    create_dir(rarefaction_dir)
    try:
        params_str = get_params_str(params['multiple_rarefactions'])
    except KeyError:
        params_str = ''
    if parallel:
        params_str += ' %s' % get_params_str(params['parallel'])
        # Build the rarefaction command
        rarefaction_cmd = \
            'parallel_multiple_rarefactions.py -T -i %s -m %s -x %s -s %s -o %s %s' %\
            (otu_table_fp, min_rare_depth, max_rare_depth, step,
             rarefaction_dir, params_str)
    else:
        # Build the rarefaction command
        rarefaction_cmd = \
            'multiple_rarefactions.py -i %s -m %s -x %s -s %s -o %s %s' %\
            (otu_table_fp, min_rare_depth, max_rare_depth, step,
             rarefaction_dir, params_str)
    commands.append([('Alpha rarefaction', rarefaction_cmd)])

    # Prep the alpha diversity command
    alpha_diversity_dir = '%s/alpha_div/' % output_dir
    create_dir(alpha_diversity_dir)
    try:
        params_str = get_params_str(params['alpha_diversity'])
    except KeyError:
        params_str = ''
    if tree_fp:
        params_str += ' -t %s' % tree_fp
    if parallel:
        params_str += ' %s' % get_params_str(params['parallel'])
        # Build the alpha diversity command
        alpha_diversity_cmd = \
            "parallel_alpha_diversity.py -T -i %s -o %s %s" %\
            (rarefaction_dir, alpha_diversity_dir, params_str)
    else:
        # Build the alpha diversity command
        alpha_diversity_cmd = \
            "alpha_diversity.py -i %s -o %s %s" %\
            (rarefaction_dir, alpha_diversity_dir, params_str)

    commands.append([('Alpha diversity on rarefied OTU tables',
                      alpha_diversity_cmd)])

    # Prep the alpha diversity collation command
    alpha_collated_dir = '%s/alpha_div_collated/' % output_dir
    create_dir(alpha_collated_dir)
    try:
        params_str = get_params_str(params['collate_alpha'])
    except KeyError:
        params_str = ''
    # Build the alpha diversity collation command
    alpha_collated_cmd = 'collate_alpha.py -i %s -o %s %s' %\
        (alpha_diversity_dir, alpha_collated_dir, params_str)
    commands.append([('Collate alpha', alpha_collated_cmd)])

    if not retain_intermediate_files:
        commands.append([
            ('Removing intermediate files',
             'rm -r %s %s' % (rarefaction_dir, alpha_diversity_dir))
        ])
    else:
        commands.append([('Skipping removal of intermediate files.', '')])

    # Prep the make rarefaction plot command(s)
    try:
        params_str = get_params_str(params['make_rarefaction_plots'])
    except KeyError:
        params_str = ''

    if 'std_type' in params[
            'make_rarefaction_plots'] or not plot_stderr_and_stddev:
        rarefaction_plot_dir = '%s/alpha_rarefaction_plots/' % output_dir
        create_dir(rarefaction_plot_dir)

        # Build the make rarefaction plot command(s)
        # for metric in alpha_diversity_metrics:
        make_rarefaction_plot_cmd =\
            'make_rarefaction_plots.py -i %s -m %s -o %s %s' %\
            (alpha_collated_dir, mapping_fp, rarefaction_plot_dir, params_str)
        commands.append([('Rarefaction plot: %s' % 'All metrics',
                          make_rarefaction_plot_cmd)])
    else:
        rarefaction_plot_dir_stddev = '%s/alpha_rarefaction_plots_stddev/' % output_dir
        rarefaction_plot_dir_stderr = '%s/alpha_rarefaction_plots_stderr/' % output_dir
        create_dir(rarefaction_plot_dir_stddev)
        create_dir(rarefaction_plot_dir_stderr)

        # Build the make rarefaction plot command(s)
        # for metric in alpha_diversity_metrics:
        make_rarefaction_plot_cmd =\
            'make_rarefaction_plots.py -i %s -m %s -o %s %s --std_type stddev' %\
            (alpha_collated_dir, mapping_fp, rarefaction_plot_dir_stddev,
             params_str)
        commands.append([('Rarefaction plot: %s' % 'All metrics',
                          make_rarefaction_plot_cmd)])
        make_rarefaction_plot_cmd =\
            'make_rarefaction_plots.py -i %s -m %s -o %s %s --std_type stderr' %\
            (alpha_collated_dir, mapping_fp, rarefaction_plot_dir_stderr,
             params_str)
        commands.append([('Rarefaction plot: %s' % 'All metrics',
                          make_rarefaction_plot_cmd)])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
Exemplo n.º 17
0
def run_beta_diversity_through_plots(otu_table_fp,
                                     mapping_fp,
                                     output_dir,
                                     command_handler,
                                     params,
                                     qiime_config,
                                     color_by_interesting_fields_only=True,
                                     sampling_depth=None,
                                     histogram_categories=None,
                                     tree_fp=None,
                                     parallel=False,
                                     logger=None,
                                     suppress_3d_plots=False,
                                     suppress_2d_plots=False,
                                     suppress_md5=False,
                                     status_update_callback=print_to_stdout):
    """ Run the data preparation steps of Qiime 
    
        The steps performed by this function are:
         1) Compute a beta diversity distance matrix;
         2) Peform a principal coordinates analysis on the result of
          Step 1;
         3) Generate a 3D prefs file for optimized coloring of continuous
          variables;
         4) Generate a 3D plot for all mapping fields with colors
          optimized for continuous data;
         5) Generate a 3D plot for all mapping fields with colors
          optimized for discrete data.
    
    """
    # Prepare some variables for the later steps
    otu_table_dir, otu_table_filename = split(otu_table_fp)
    otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    create_dir(output_dir)
    commands = []
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    if not suppress_md5:
        log_input_md5s(logger, [otu_table_fp, mapping_fp, tree_fp])

    mapping_data, mapping_header, mapping_comments =\
     parse_mapping_file(open(mapping_fp,'U'))
    if histogram_categories:
        invalid_categories = set(histogram_categories) - set(mapping_header)
        if invalid_categories:
            raise ValueError,\
             "Invalid histogram categories - these must exactly match "+\
             "mapping file column headers: %s" % (' '.join(invalid_categories))
    # Get the interesting mapping fields to color by -- if none are
    # interesting, take all of them. Interesting is defined as those
    # which have greater than one value and fewer values than the number
    # of samples
    if color_by_interesting_fields_only:
        mapping_fields =\
          get_interesting_mapping_fields(mapping_data, mapping_header) or\
          mapping_header
    else:
        mapping_fields = mapping_header
    mapping_fields = ','.join(mapping_fields)

    if sampling_depth:
        # Sample the OTU table at even depth
        even_sampled_otu_table_fp = '%s/%s_even%d%s' %\
         (output_dir, otu_table_basename,
          sampling_depth, otu_table_ext)
        single_rarefaction_cmd = \
         '%s %s/single_rarefaction.py -i %s -o %s -d %d' %\
         (python_exe_fp, script_dir, otu_table_fp,
          even_sampled_otu_table_fp, sampling_depth)
        commands.append([
            ('Sample OTU table at %d seqs/sample' % sampling_depth,
             single_rarefaction_cmd)
        ])
        otu_table_fp = even_sampled_otu_table_fp
        otu_table_dir, otu_table_filename = split(even_sampled_otu_table_fp)
        otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    try:
        beta_diversity_metrics = params['beta_diversity']['metrics'].split(',')
    except KeyError:
        beta_diversity_metrics = ['weighted_unifrac', 'unweighted_unifrac']

    # Prep the 3d prefs file generator command
    prefs_fp = '%s/prefs.txt' % output_dir
    try:
        params_str = get_params_str(params['make_prefs_file'])
    except KeyError:
        params_str = ''
    if not 'mapping_headers_to_use' in params['make_prefs_file']:
        params_str = '%s --mapping_headers_to_use %s' \
         % (params_str,mapping_fields)
    # Build the 3d prefs file generator command
    prefs_cmd = \
     '%s %s/make_prefs_file.py -m %s -o %s %s' %\
     (python_exe_fp, script_dir, mapping_fp, prefs_fp, params_str)
    commands.append([('Build prefs file', prefs_cmd)])

    dm_fps = []
    for beta_diversity_metric in beta_diversity_metrics:

        # Prep the beta-diversity command
        try:
            bdiv_params_copy = params['beta_diversity'].copy()
        except KeyError:
            bdiv_params_copy = {}
        try:
            del bdiv_params_copy['metrics']
        except KeyError:
            pass

        params_str = get_params_str(bdiv_params_copy)

        if tree_fp:
            params_str = '%s -t %s ' % (params_str, tree_fp)

        # Build the beta-diversity command
        if parallel:
            # Grab the parallel-specific parameters
            try:
                params_str += get_params_str(params['parallel'])
            except KeyError:
                pass
            beta_div_cmd = '%s %s/parallel_beta_diversity.py -i %s -o %s --metrics %s -T %s' %\
             (python_exe_fp, script_dir, otu_table_fp,
              output_dir, beta_diversity_metric, params_str)
            commands.append(\
             [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)])
        else:
            beta_div_cmd = '%s %s/beta_diversity.py -i %s -o %s --metrics %s %s' %\
             (python_exe_fp, script_dir, otu_table_fp,
              output_dir, beta_diversity_metric, params_str)
            commands.append(\
             [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)])


        orig_beta_div_fp = '%s/%s_%s.txt' % \
         (output_dir, beta_diversity_metric, otu_table_basename)
        beta_div_fp = '%s/%s_dm.txt' % \
         (output_dir, beta_diversity_metric)
        commands.append([
            ('Rename distance matrix (%s)' % beta_diversity_metric,
             'mv %s %s' % (orig_beta_div_fp, beta_div_fp))
        ])
        dm_fps.append((beta_diversity_metric, beta_div_fp))

        # Prep the principal coordinates command
        pc_fp = '%s/%s_pc.txt' % (output_dir, beta_diversity_metric)
        try:
            params_str = get_params_str(params['principal_coordinates'])
        except KeyError:
            params_str = ''
        # Build the principal coordinates command
        pc_cmd = '%s %s/principal_coordinates.py -i %s -o %s %s' %\
         (python_exe_fp, script_dir, beta_div_fp, pc_fp, params_str)
        commands.append(\
         [('Principal coordinates (%s)' % beta_diversity_metric, pc_cmd)])

        # Generate 3d plots
        if not suppress_3d_plots:
            # Prep the continuous-coloring 3d plots command
            continuous_3d_dir = '%s/%s_3d_continuous/' %\
             (output_dir, beta_diversity_metric)
            create_dir(continuous_3d_dir)
            try:
                params_str = get_params_str(params['make_3d_plots'])
            except KeyError:
                params_str = ''
            # Build the continuous-coloring 3d plots command
            continuous_3d_command = \
             '%s %s/make_3d_plots.py -p %s -i %s -o %s -m %s %s' %\
              (python_exe_fp, script_dir, prefs_fp, pc_fp, continuous_3d_dir,
               mapping_fp, params_str)

            # Prep the discrete-coloring 3d plots command
            discrete_3d_dir = '%s/%s_3d_discrete/' %\
             (output_dir, beta_diversity_metric)
            create_dir(discrete_3d_dir)
            try:
                params_str = get_params_str(params['make_3d_plots'])
            except KeyError:
                params_str = ''
            # Build the discrete-coloring 3d plots command
            discrete_3d_command = \
             '%s %s/make_3d_plots.py -b "%s" -i %s -o %s -m %s %s' %\
              (python_exe_fp, script_dir, mapping_fields, pc_fp, discrete_3d_dir,
               mapping_fp, params_str)

            commands.append([\
              ('Make 3D plots (continuous coloring, %s)' %\
                beta_diversity_metric,continuous_3d_command),\
              ('Make 3D plots (discrete coloring, %s)' %\
                beta_diversity_metric,discrete_3d_command,)])

        # Generate 3d plots
        if not suppress_2d_plots:
            # Prep the continuous-coloring 3d plots command
            continuous_2d_dir = '%s/%s_2d_continuous/' %\
             (output_dir, beta_diversity_metric)
            create_dir(continuous_2d_dir)
            try:
                params_str = get_params_str(params['make_2d_plots'])
            except KeyError:
                params_str = ''
            # Build the continuous-coloring 3d plots command
            continuous_2d_command = \
             '%s %s/make_2d_plots.py -p %s -i %s -o %s -m %s %s' %\
              (python_exe_fp, script_dir, prefs_fp, pc_fp, continuous_2d_dir,
               mapping_fp, params_str)

            # Prep the discrete-coloring 3d plots command
            discrete_2d_dir = '%s/%s_2d_discrete/' %\
             (output_dir, beta_diversity_metric)
            create_dir(discrete_2d_dir)
            try:
                params_str = get_params_str(params['make_2d_plots'])
            except KeyError:
                params_str = ''
            # Build the discrete-coloring 2d plots command
            discrete_2d_command = \
             '%s %s/make_2d_plots.py -b "%s" -i %s -o %s -m %s %s' %\
              (python_exe_fp, script_dir, mapping_fields, pc_fp, discrete_2d_dir,
               mapping_fp, params_str)

            commands.append([\
              ('Make 2D plots (continuous coloring, %s)' %\
                beta_diversity_metric,continuous_2d_command),\
              ('Make 2D plots (discrete coloring, %s)' %\
                beta_diversity_metric,discrete_2d_command,)])

        if histogram_categories:
            # Prep the discrete-coloring 3d plots command
            histograms_dir = '%s/%s_histograms/' %\
             (output_dir, beta_diversity_metric)
            create_dir(histograms_dir)
            try:
                params_str = get_params_str(params['make_distance_histograms'])
            except KeyError:
                params_str = ''
            # Build the make_distance_histograms command
            distance_histograms_command = \
             '%s %s/make_distance_histograms.py -d %s -o %s -m %s -f "%s" %s' %\
              (python_exe_fp, script_dir, beta_div_fp,
               histograms_dir, mapping_fp,
               ','.join(histogram_categories), params_str)

            commands.append([\
              ('Make Distance Histograms (%s)' %\
                beta_diversity_metric,distance_histograms_command)])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)

    return dm_fps
Exemplo n.º 18
0
def run_core_diversity_analyses(
    biom_fp,
    mapping_fp,
    sampling_depth,
    output_dir,
    qiime_config,
    command_handler=call_commands_serially,
    tree_fp=None,
    params=None,
    categories=None,
    arare_min_rare_depth=10,
    arare_num_steps=10,
    parallel=False,
    suppress_taxa_summary=False,
    suppress_beta_diversity=False,
    suppress_alpha_diversity=False,
    suppress_group_significance=False,
    status_update_callback=print_to_stdout,
):
    """
    """
    if categories is not None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = parse_mapping_file_to_dict(open(mapping_fp, "U"))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError(
                    "Category '%s' is not a column header "
                    "in your mapping file. "
                    "Categories are case and white space sensitive. Valid "
                    "choices are: (%s)" % (c, ", ".join(metadata_map.CategoryNames))
                )
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError(
                    "Category '%s' contains only one value. "
                    "Categories analyzed here require at least two values." % c
                )

    else:
        categories = []
    comma_separated_categories = ",".join(categories)
    # prep some variables
    if params is None:
        params = parse_qiime_parameters([])

    create_dir(output_dir)
    index_fp = "%s/index.html" % output_dir
    index_links = []
    commands = []

    # begin logging
    old_log_fps = glob(join(output_dir, "log_20*txt"))
    log_fp = generate_log_fp(output_dir)
    index_links.append(("Master run log", log_fp, _index_headers["run_summary"]))
    for old_log_fp in old_log_fps:
        index_links.append(("Previous run log", old_log_fp, _index_headers["run_summary"]))
    logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config)
    input_fps = [biom_fp, mapping_fp]
    if tree_fp is not None:
        input_fps.append(tree_fp)
    log_input_md5s(logger, input_fps)

    # run 'biom summarize-table' on input BIOM table
    try:
        params_str = get_params_str(params["biom-summarize-table"])
    except KeyError:
        params_str = ""
    biom_table_stats_output_fp = "%s/biom_table_summary.txt" % output_dir
    if not exists(biom_table_stats_output_fp):
        biom_table_summary_cmd = "biom summarize-table -i %s -o %s --suppress-md5 %s" % (
            biom_fp,
            biom_table_stats_output_fp,
            params_str,
        )
        commands.append([("Generate BIOM table summary", biom_table_summary_cmd)])
    else:
        logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" % biom_table_stats_output_fp)
    index_links.append(("BIOM table statistics", biom_table_stats_output_fp, _index_headers["run_summary"]))

    # filter samples with fewer observations than the requested sampling_depth.
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    if not exists(filtered_biom_fp):
        filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" % (
            biom_fp,
            filtered_biom_fp,
            sampling_depth,
        )
        commands.append(
            [
                (
                    "Filter low sequence count samples from table (minimum sequence count: %d)" % sampling_depth,
                    filter_samples_cmd,
                )
            ]
        )
    else:
        logger.write("Skipping filter_samples_from_otu_table.py as %s exists.\n\n" % filtered_biom_fp)
    biom_fp = filtered_biom_fp

    # rarify the BIOM table to sampling_depth
    rarefied_biom_fp = "%s/table_even%d.biom" % (output_dir, sampling_depth)
    if not exists(rarefied_biom_fp):
        single_rarefaction_cmd = "single_rarefaction.py -i %s -o %s -d %d" % (biom_fp, rarefied_biom_fp, sampling_depth)
        commands.append([("Rarify the OTU table to %d sequences/sample" % sampling_depth, single_rarefaction_cmd)])
    else:
        logger.write("Skipping single_rarefaction.py as %s exists.\n\n" % rarefied_biom_fp)

    # run initial commands and reset the command list
    if len(commands) > 0:
        command_handler(commands, status_update_callback, logger, close_logger_on_success=False)
        commands = []

    if not suppress_beta_diversity:
        bdiv_even_output_dir = "%s/bdiv_even%d/" % (output_dir, sampling_depth)
        # Need to check for the existence of any distance matrices, since the user
        # can select which will be generated.
        existing_dm_fps = glob("%s/*_dm.txt" % bdiv_even_output_dir)
        if len(existing_dm_fps) == 0:
            even_dm_fps = run_beta_diversity_through_plots(
                otu_table_fp=rarefied_biom_fp,
                mapping_fp=mapping_fp,
                output_dir=bdiv_even_output_dir,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                # Note: we pass sampling depth=None here as
                # we rarify the BIOM table above and pass that
                # in here.
                sampling_depth=None,
                tree_fp=tree_fp,
                parallel=parallel,
                logger=logger,
                suppress_md5=True,
                status_update_callback=status_update_callback,
            )
        else:
            logger.write("Skipping beta_diversity_through_plots.py as %s exist(s).\n\n" % ", ".join(existing_dm_fps))
            even_dm_fps = [(split(fp)[1].strip("_dm.txt"), fp) for fp in existing_dm_fps]

        # Get make_distance_boxplots parameters
        try:
            params_str = get_params_str(params["make_distance_boxplots"])
        except KeyError:
            params_str = ""

        for bdiv_metric, dm_fp in even_dm_fps:
            for category in categories:
                boxplots_output_dir = "%s/%s_boxplots/" % (bdiv_even_output_dir, bdiv_metric)
                plot_output_fp = "%s/%s_Distances.pdf" % (boxplots_output_dir, category)
                stats_output_fp = "%s/%s_Stats.txt" % (boxplots_output_dir, category)
                if not exists(plot_output_fp):
                    boxplots_cmd = "make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s" % (
                        dm_fp,
                        category,
                        boxplots_output_dir,
                        mapping_fp,
                        params_str,
                    )
                    commands.append([("Boxplots (%s)" % category, boxplots_cmd)])
                else:
                    logger.write(
                        "Skipping make_distance_boxplots.py for %s as %s exists.\n\n" % (category, plot_output_fp)
                    )
                index_links.append(
                    (
                        "Distance boxplots (%s)" % bdiv_metric,
                        plot_output_fp,
                        _index_headers["beta_diversity_even"] % sampling_depth,
                    )
                )
                index_links.append(
                    (
                        "Distance boxplots statistics (%s)" % bdiv_metric,
                        stats_output_fp,
                        _index_headers["beta_diversity_even"] % sampling_depth,
                    )
                )

            index_links.append(
                (
                    "PCoA plot (%s)" % bdiv_metric,
                    "%s/%s_emperor_pcoa_plot/index.html" % (bdiv_even_output_dir, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "Distance matrix (%s)" % bdiv_metric,
                    "%s/%s_dm.txt" % (bdiv_even_output_dir, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "Principal coordinate matrix (%s)" % bdiv_metric,
                    "%s/%s_pc.txt" % (bdiv_even_output_dir, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )

    if not suppress_alpha_diversity:
        # Alpha rarefaction workflow
        arare_full_output_dir = "%s/arare_max%d/" % (output_dir, sampling_depth)
        rarefaction_plots_output_fp = "%s/alpha_rarefaction_plots/rarefaction_plots.html" % arare_full_output_dir
        if not exists(rarefaction_plots_output_fp):
            run_alpha_rarefaction(
                otu_table_fp=biom_fp,
                mapping_fp=mapping_fp,
                output_dir=arare_full_output_dir,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                tree_fp=tree_fp,
                num_steps=arare_num_steps,
                parallel=parallel,
                logger=logger,
                min_rare_depth=arare_min_rare_depth,
                max_rare_depth=sampling_depth,
                suppress_md5=True,
                status_update_callback=status_update_callback,
                retain_intermediate_files=False,
            )
        else:
            logger.write("Skipping alpha_rarefaction.py as %s exists.\n\n" % rarefaction_plots_output_fp)

        index_links.append(("Alpha rarefaction plots", rarefaction_plots_output_fp, _index_headers["alpha_diversity"]))

        collated_alpha_diversity_fps = glob("%s/alpha_div_collated/*txt" % arare_full_output_dir)
        try:
            params_str = get_params_str(params["compare_alpha_diversity"])
        except KeyError:
            params_str = ""

        if len(categories) > 0:
            for collated_alpha_diversity_fp in collated_alpha_diversity_fps:
                alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0]
                compare_alpha_output_dir = "%s/compare_%s" % (arare_full_output_dir, alpha_metric)
                if not exists(compare_alpha_output_dir):
                    compare_alpha_cmd = "compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s" % (
                        collated_alpha_diversity_fp,
                        mapping_fp,
                        comma_separated_categories,
                        compare_alpha_output_dir,
                        params_str,
                    )
                    commands.append([("Compare alpha diversity (%s)" % alpha_metric, compare_alpha_cmd)])
                    for category in categories:
                        alpha_comparison_stat_fp = "%s/%s_stats.txt" % (compare_alpha_output_dir, category)
                        alpha_comparison_boxplot_fp = "%s/%s_boxplots.pdf" % (compare_alpha_output_dir, category)
                        index_links.append(
                            (
                                "Alpha diversity statistics (%s, %s)" % (category, alpha_metric),
                                alpha_comparison_stat_fp,
                                _index_headers["alpha_diversity"],
                            )
                        )
                        index_links.append(
                            (
                                "Alpha diversity boxplots (%s, %s)" % (category, alpha_metric),
                                alpha_comparison_boxplot_fp,
                                _index_headers["alpha_diversity"],
                            )
                        )
                else:
                    logger.write(
                        "Skipping compare_alpha_diversity.py"
                        " for %s as %s exists.\n\n" % (alpha_metric, compare_alpha_output_dir)
                    )
        else:
            logger.write("Skipping compare_alpha_diversity.py as" " no categories were provided.\n\n")

    if not suppress_taxa_summary:
        taxa_plots_output_dir = "%s/taxa_plots/" % output_dir
        # need to check for existence of any html files, since the user can
        # select only certain ones to be generated
        existing_taxa_plot_html_fps = glob(join(taxa_plots_output_dir, "taxa_summary_plots", "*.html"))
        if len(existing_taxa_plot_html_fps) == 0:
            run_summarize_taxa_through_plots(
                otu_table_fp=biom_fp,
                mapping_fp=mapping_fp,
                output_dir=taxa_plots_output_dir,
                mapping_cat=None,
                sort=True,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                logger=logger,
                suppress_md5=True,
                status_update_callback=status_update_callback,
            )
        else:
            logger.write(
                "Skipping summarize_taxa_through_plots.py for as %s exist(s).\n\n"
                % ", ".join(existing_taxa_plot_html_fps)
            )

        index_links.append(
            (
                "Taxa summary bar plots",
                "%s/taxa_summary_plots/bar_charts.html" % taxa_plots_output_dir,
                _index_headers["taxa_summary"],
            )
        )
        index_links.append(
            (
                "Taxa summary area plots",
                "%s/taxa_summary_plots/area_charts.html" % taxa_plots_output_dir,
                _index_headers["taxa_summary"],
            )
        )
        for category in categories:
            taxa_plots_output_dir = "%s/taxa_plots_%s/" % (output_dir, category)
            # need to check for existence of any html files, since the user can
            # select only certain ones to be generated
            existing_taxa_plot_html_fps = glob("%s/taxa_summary_plots/*.html" % taxa_plots_output_dir)
            if len(existing_taxa_plot_html_fps) == 0:
                run_summarize_taxa_through_plots(
                    otu_table_fp=biom_fp,
                    mapping_fp=mapping_fp,
                    output_dir=taxa_plots_output_dir,
                    mapping_cat=category,
                    sort=True,
                    command_handler=command_handler,
                    params=params,
                    qiime_config=qiime_config,
                    logger=logger,
                    suppress_md5=True,
                    status_update_callback=status_update_callback,
                )
            else:
                logger.write(
                    "Skipping summarize_taxa_through_plots.py for %s as %s exist(s).\n\n"
                    % (category, ", ".join(existing_taxa_plot_html_fps))
                )

            index_links.append(
                (
                    "Taxa summary bar plots",
                    "%s/taxa_summary_plots/bar_charts.html" % taxa_plots_output_dir,
                    _index_headers["taxa_summary_categorical"] % category,
                )
            )
            index_links.append(
                (
                    "Taxa summary area plots",
                    "%s/taxa_summary_plots/area_charts.html" % taxa_plots_output_dir,
                    _index_headers["taxa_summary_categorical"] % category,
                )
            )

    if not suppress_group_significance:
        params_str = get_params_str(params["group_significance"])
        # group significance tests, aka category significance
        for category in categories:
            group_signifance_fp = "%s/group_significance_%s.txt" % (output_dir, category)
            if not exists(group_signifance_fp):
                # Build the OTU cateogry significance command
                group_significance_cmd = "group_significance.py -i %s -m %s -c %s -o %s %s" % (
                    rarefied_biom_fp,
                    mapping_fp,
                    category,
                    group_signifance_fp,
                    params_str,
                )
                commands.append([("Group significance (%s)" % category, group_significance_cmd)])
            else:
                logger.write(
                    "Skipping group_significance.py for %s as %s exists.\n\n" % (category, group_signifance_fp)
                )

            index_links.append(
                ("Category significance (%s)" % category, group_signifance_fp, _index_headers["group_significance"])
            )

    filtered_biom_gzip_fp = "%s.gz" % filtered_biom_fp
    if not exists(filtered_biom_gzip_fp):
        commands.append([("Compress the filtered BIOM table", "gzip %s" % filtered_biom_fp)])
    else:
        logger.write("Skipping compressing of filtered BIOM table as %s exists.\n\n" % filtered_biom_gzip_fp)
    index_links.append(
        (
            "Filtered BIOM table (minimum sequence count: %d)" % sampling_depth,
            filtered_biom_gzip_fp,
            _index_headers["run_summary"],
        )
    )

    rarified_biom_gzip_fp = "%s.gz" % rarefied_biom_fp
    if not exists(rarified_biom_gzip_fp):
        commands.append([("Compress the rarified BIOM table", "gzip %s" % rarefied_biom_fp)])
    else:
        logger.write("Skipping compressing of rarified BIOM table as %s exists.\n\n" % rarified_biom_gzip_fp)
    index_links.append(
        (
            "Rarified BIOM table (sampling depth: %d)" % sampling_depth,
            rarified_biom_gzip_fp,
            _index_headers["run_summary"],
        )
    )

    if len(commands) > 0:
        command_handler(commands, status_update_callback, logger)
    else:
        logger.close()

    generate_index_page(index_links, index_fp)
def run_core_diversity_analyses(
    biom_fp,
    mapping_fp,
    sampling_depth,
    output_dir,
    qiime_config,
    command_handler=call_commands_serially,
    tree_fp=None,
    params=None,
    categories=None,
    arare_min_rare_depth=10,
    arare_num_steps=10,
    parallel=False,
    suppress_taxa_summary=False,
    suppress_beta_diversity=False,
    suppress_alpha_diversity=False,
    suppress_otu_category_significance=False,
    status_update_callback=print_to_stdout,
):
    """
    """
    if categories != None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = parse_mapping_file_to_dict(open(mapping_fp, "U"))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError, (
                    "Category '%s' is not a column header "
                    "in your mapping file. "
                    "Categories are case and white space sensitive. Valid "
                    "choices are: (%s)" % (c, ", ".join(metadata_map.CategoryNames))
                )
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError, (
                    "Category '%s' contains only one value. "
                    "Categories analyzed here require at least two values." % c
                )

    else:
        categories = []

    # prep some variables
    if params == None:
        params = parse_qiime_parameters([])

    create_dir(output_dir)
    index_fp = "%s/index.html" % output_dir
    index_links = []
    commands = []

    # begin logging
    log_fp = generate_log_fp(output_dir)
    index_links.append(("Master run log", log_fp, _index_headers["run_summary"]))
    logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config)
    input_fps = [biom_fp, mapping_fp]
    if tree_fp != None:
        input_fps.append(tree_fp)
    log_input_md5s(logger, input_fps)

    # run print_biom_table_summary.py on input BIOM table
    try:
        params_str = get_params_str(params["print_biom_table_summary"])
    except KeyError:
        params_str = ""
    biom_table_stats_output_fp = "%s/biom_table_summary.txt" % output_dir
    print_biom_table_summary_cmd = "print_biom_table_summary.py -i %s -o %s --suppress_md5 %s" % (
        biom_fp,
        biom_table_stats_output_fp,
        params_str,
    )
    index_links.append(("BIOM table statistics", biom_table_stats_output_fp, _index_headers["run_summary"]))
    commands.append([("Generate BIOM table summary", print_biom_table_summary_cmd)])

    # filter samples with fewer observations than the requested sampling_depth.
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" % (
        biom_fp,
        filtered_biom_fp,
        sampling_depth,
    )
    commands.append(
        [
            (
                "Filter low sequence count samples from table (minimum sequence count: %d)" % sampling_depth,
                filter_samples_cmd,
            )
        ]
    )
    biom_fp = filtered_biom_fp

    # run initial commands and reset the command list
    command_handler(commands, status_update_callback, logger, close_logger_on_success=False)
    commands = []

    if not suppress_beta_diversity:
        bdiv_even_output_dir = "%s/bdiv_even%d/" % (output_dir, sampling_depth)
        even_dm_fps = run_beta_diversity_through_plots(
            otu_table_fp=biom_fp,
            mapping_fp=mapping_fp,
            output_dir=bdiv_even_output_dir,
            command_handler=command_handler,
            params=params,
            qiime_config=qiime_config,
            sampling_depth=sampling_depth,
            # force suppression of distance histograms - boxplots work better
            # in this context, and are created below.
            histogram_categories=[],
            tree_fp=tree_fp,
            parallel=parallel,
            logger=logger,
            suppress_md5=True,
            status_update_callback=status_update_callback,
        )

        for bdiv_metric, dm_fp in even_dm_fps:
            for category in categories:
                boxplots_output_dir = "%s/%s_boxplots/" % (bdiv_even_output_dir, bdiv_metric)
                try:
                    params_str = get_params_str(params["make_distance_boxplots"])
                except KeyError:
                    params_str = ""
                boxplots_cmd = "make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s" % (
                    dm_fp,
                    category,
                    boxplots_output_dir,
                    mapping_fp,
                    params_str,
                )
                commands.append([("Boxplots (%s)" % category, boxplots_cmd)])
                index_links.append(
                    (
                        "Distance boxplots (%s)" % bdiv_metric,
                        "%s/%s_Distances.pdf" % (boxplots_output_dir, category),
                        _index_headers["beta_diversity_even"] % sampling_depth,
                    )
                )
                index_links.append(
                    (
                        "Distance boxplots statistics (%s)" % bdiv_metric,
                        "%s/%s_Stats.txt" % (boxplots_output_dir, category),
                        _index_headers["beta_diversity_even"] % sampling_depth,
                    )
                )

            index_links.append(
                (
                    "3D plot (%s, continuous coloring)" % bdiv_metric,
                    "%s/%s_3d_continuous/%s_pc_3D_PCoA_plots.html" % (bdiv_even_output_dir, bdiv_metric, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "3D plot (%s, discrete coloring)" % bdiv_metric,
                    "%s/%s_3d_discrete/%s_pc_3D_PCoA_plots.html" % (bdiv_even_output_dir, bdiv_metric, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "2D plot (%s, continuous coloring)" % bdiv_metric,
                    "%s/%s_2d_continuous/%s_pc_2D_PCoA_plots.html" % (bdiv_even_output_dir, bdiv_metric, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "2D plot (%s, discrete coloring)" % bdiv_metric,
                    "%s/%s_2d_discrete/%s_pc_2D_PCoA_plots.html" % (bdiv_even_output_dir, bdiv_metric, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "Distance matrix (%s)" % bdiv_metric,
                    "%s/%s_dm.txt" % (bdiv_even_output_dir, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )
            index_links.append(
                (
                    "Principal coordinate matrix (%s)" % bdiv_metric,
                    "%s/%s_pc.txt" % (bdiv_even_output_dir, bdiv_metric),
                    _index_headers["beta_diversity_even"] % sampling_depth,
                )
            )

    if not suppress_alpha_diversity:
        ## Alpha rarefaction workflow
        arare_full_output_dir = "%s/arare_max%d/" % (output_dir, sampling_depth)
        run_alpha_rarefaction(
            otu_table_fp=biom_fp,
            mapping_fp=mapping_fp,
            output_dir=arare_full_output_dir,
            command_handler=command_handler,
            params=params,
            qiime_config=qiime_config,
            tree_fp=tree_fp,
            num_steps=arare_num_steps,
            parallel=parallel,
            logger=logger,
            min_rare_depth=arare_min_rare_depth,
            max_rare_depth=sampling_depth,
            suppress_md5=True,
            status_update_callback=status_update_callback,
        )

        index_links.append(
            (
                "Alpha rarefaction plots",
                "%s/alpha_rarefaction_plots/rarefaction_plots.html" % arare_full_output_dir,
                _index_headers["alpha_diversity"],
            )
        )

        collated_alpha_diversity_fps = glob("%s/alpha_div_collated/*txt" % arare_full_output_dir)
        try:
            params_str = get_params_str(params["compare_alpha_diversity"])
        except KeyError:
            params_str = ""
        for category in categories:
            for collated_alpha_diversity_fp in collated_alpha_diversity_fps:
                alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0]
                alpha_comparison_output_fp = "%s/%s_%s.txt" % (arare_full_output_dir, category, alpha_metric)
                compare_alpha_cmd = "compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s" % (
                    collated_alpha_diversity_fp,
                    mapping_fp,
                    category,
                    alpha_comparison_output_fp,
                    params_str,
                )
                commands.append([("Compare alpha diversity (%s, %s)" % (category, alpha_metric), compare_alpha_cmd)])
                index_links.append(
                    (
                        "Alpha diversity statistics (%s, %s)" % (category, alpha_metric),
                        alpha_comparison_output_fp,
                        _index_headers["alpha_diversity"],
                    )
                )

    if not suppress_taxa_summary:
        taxa_plots_output_dir = "%s/taxa_plots/" % output_dir
        run_summarize_taxa_through_plots(
            otu_table_fp=biom_fp,
            mapping_fp=mapping_fp,
            output_dir=taxa_plots_output_dir,
            mapping_cat=None,
            sort=True,
            command_handler=command_handler,
            params=params,
            qiime_config=qiime_config,
            logger=logger,
            suppress_md5=True,
            status_update_callback=status_update_callback,
        )

        index_links.append(
            (
                "Taxa summary bar plots",
                "%s/taxa_summary_plots/bar_charts.html" % taxa_plots_output_dir,
                _index_headers["taxa_summary"],
            )
        )
        index_links.append(
            (
                "Taxa summary area plots",
                "%s/taxa_summary_plots/area_charts.html" % taxa_plots_output_dir,
                _index_headers["taxa_summary"],
            )
        )
        for category in categories:
            taxa_plots_output_dir = "%s/taxa_plots_%s/" % (output_dir, category)
            run_summarize_taxa_through_plots(
                otu_table_fp=biom_fp,
                mapping_fp=mapping_fp,
                output_dir=taxa_plots_output_dir,
                mapping_cat=category,
                sort=True,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                logger=logger,
                suppress_md5=True,
                status_update_callback=status_update_callback,
            )

            index_links.append(
                (
                    "Taxa summary bar plots",
                    "%s/taxa_summary_plots/bar_charts.html" % taxa_plots_output_dir,
                    _index_headers["taxa_summary_categorical"] % category,
                )
            )
            index_links.append(
                (
                    "Taxa summary area plots",
                    "%s/taxa_summary_plots/area_charts.html" % taxa_plots_output_dir,
                    _index_headers["taxa_summary_categorical"] % category,
                )
            )

    if not suppress_otu_category_significance:
        # OTU category significance
        for category in categories:
            category_signifance_fp = "%s/category_significance_%s.txt" % (output_dir, category)
            try:
                params_str = get_params_str(params["otu_category_significance"])
            except KeyError:
                params_str = ""
            # Build the OTU cateogry significance command
            category_significance_cmd = "otu_category_significance.py -i %s -m %s -c %s -o %s %s" % (
                biom_fp,
                mapping_fp,
                category,
                category_signifance_fp,
                params_str,
            )
            commands.append([("OTU category significance (%s)" % category, category_significance_cmd)])

            index_links.append(
                ("Category significance (%s)" % category, category_signifance_fp, _index_headers["otu_category_sig"])
            )

    commands.append([("Compress the filtered BIOM table", "gzip %s" % filtered_biom_fp)])
    index_links.append(
        (
            "Filtered BIOM table (minimum sequence count: %d)" % sampling_depth,
            "%s.gz" % filtered_biom_fp,
            _index_headers["run_summary"],
        )
    )

    command_handler(commands, status_update_callback, logger)
    generate_index_page(index_links, index_fp)
Exemplo n.º 20
0
def assign_tax(repset_fasta_fp,
               output_dir,
               command_handler,
               params,
               qiime_config,
               parallel=False,
               logger=None,
               status_update_callback=print_to_stdout):

    input_dir, input_filename = split(repset_fasta_fp)
    input_basename, input_ext = splitext(input_filename)
    commands = []
    if logger is None:
        log_fp = generate_log_fp(output_dir)
        logger = WorkflowLogger(log_fp,
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    # Prep the taxonomy assignment command
    try:
        assignment_method = params['assign_taxonomy']['assignment_method']
    except KeyError:
        assignment_method = 'uclust'
    assign_taxonomy_dir = '%s/%s_assigned_taxonomy' %\
        (output_dir, assignment_method)
    taxonomy_fp = '%s/%s_tax_assignments.txt' % \
        (assign_taxonomy_dir, input_basename)
    if parallel and (assignment_method == 'rdp' or assignment_method == 'blast'
                     or assignment_method == 'uclust'):
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params['parallel'])
        except KeyError:
            params_str = ''

        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --assignment_method
            # option. This works for now though.
            d = params['assign_taxonomy'].copy()
            if 'assignment_method' in d:
                del d['assignment_method']
            params_str += ' %s' % get_params_str(d)
        except KeyError:
            pass

        # Build the parallel taxonomy assignment command
        assign_taxonomy_cmd = \
            'parallel_assign_taxonomy_%s.py -i %s -o %s -T %s' %\
            (assignment_method, repset_fasta_fp,
             assign_taxonomy_dir, params_str)
    else:
        try:
            params_str = get_params_str(params['assign_taxonomy'])
        except KeyError:
            params_str = ''
        # Build the taxonomy assignment command
        assign_taxonomy_cmd = 'assign_taxonomy.py -o %s -i %s %s' %\
            (assign_taxonomy_dir, repset_fasta_fp, params_str)
    if exists(assign_taxonomy_dir):
        rmtree(assign_taxonomy_dir)
    commands.append([('Assign taxonomy', assign_taxonomy_cmd)])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
    return taxonomy_fp
Exemplo n.º 21
0
def run_pick_closed_reference_otus(
                              input_fp, 
                              refseqs_fp,
                              output_dir,
                              taxonomy_fp,
                              command_handler,
                              params,
                              qiime_config,
                              parallel=False,
                              logger=None,
                              suppress_md5=False,
                              status_update_callback=print_to_stdout):
    """ Run the data preparation steps of Qiime 
    
        The steps performed by this function are:
          1) Pick OTUs;
          2) Build an OTU table with optional pre-defined taxonmy.
    
    """
    
    # confirm that a valid otu picking method was supplied before doing
    # any work
    reference_otu_picking_methods = ['blast','uclust_ref','usearch61_ref']

    try:
        otu_picking_method = params['pick_otus']['otu_picking_method']
    except KeyError:
        otu_picking_method = 'uclust_ref'
    assert otu_picking_method in reference_otu_picking_methods,\
     "Invalid OTU picking method supplied: %s. Valid choices are: %s"\
     % (otu_picking_method,' '.join(reference_otu_picking_methods))
    
    # Prepare some variables for the later steps
    input_dir, input_filename = split(input_fp)
    input_basename, input_ext = splitext(input_filename)
    create_dir(output_dir)
    commands = []
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    if not suppress_md5:
        log_input_md5s(logger,[input_fp,refseqs_fp,taxonomy_fp])

    # Prep the OTU picking command
    pick_otu_dir = '%s/%s_picked_otus' % (output_dir, otu_picking_method)
    otu_fp = '%s/%s_otus.txt' % (pick_otu_dir,input_basename)
    if parallel and (otu_picking_method == 'blast' or 
                     otu_picking_method == 'uclust_ref' or
                     otu_picking_method == 'usearch61_ref'):
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params['parallel'])
        except KeyError:
            params_str = ''
        
        # Grab the OTU picker parameters
        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --alignment_method
            # option. This works for now though.
            d = params['pick_otus'].copy()
            if 'otu_picking_method' in d:
                del d['otu_picking_method']
            params_str += ' %s' % get_params_str(d)
        except KeyError:
            pass
        otu_picking_script = 'parallel_pick_otus_%s.py' % otu_picking_method
        # Build the OTU picking command
        pick_otus_cmd = '%s %s/%s -i %s -o %s -r %s -T %s' %\
          (python_exe_fp, 
           script_dir, 
           otu_picking_script,
           input_fp,
           pick_otu_dir,
           refseqs_fp,
           params_str)
    else:
        try:
            params_str = get_params_str(params['pick_otus'])
        except KeyError:
            params_str = ''
        # Since this is reference-based OTU picking we always want to
        # suppress new clusters -- force it here.
        params_str+= ' --suppress_new_clusters'
        logger.write("Forcing --suppress_new_clusters as this is closed-reference OTU picking.\n\n")
        # Build the OTU picking command
        pick_otus_cmd = '%s %s/pick_otus.py -i %s -o %s -r %s -m %s %s' %\
         (python_exe_fp,
          script_dir,
          input_fp,
          pick_otu_dir,
          refseqs_fp,
          otu_picking_method,
          params_str)

    commands.append([('Pick OTUs', pick_otus_cmd)])

    # Prep the OTU table building command
    otu_table_fp = '%s/otu_table.biom' % output_dir
    try:
        params_str = get_params_str(params['make_otu_table'])
    except KeyError:
        params_str = ''
    if taxonomy_fp:
        taxonomy_str = '-t %s' % taxonomy_fp
    else:
        taxonomy_str = ''
    # Build the OTU table building command
    make_otu_table_cmd = '%s %s/make_otu_table.py -i %s %s -o %s %s' %\
     (python_exe_fp, script_dir, otu_fp, taxonomy_str, otu_table_fp, params_str)
    
    commands.append([('Make OTU table', make_otu_table_cmd)])
    

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
Exemplo n.º 22
0
def pick_reference_otus(input_fp,
                        output_dir,
                        otu_picking_method,
                        refseqs_fp,
                        parallel,
                        params,
                        logger,
                        similarity_override=None):
    params_copy = deepcopy(params)
    if 'pick_otus' in params_copy and 'refseqs_fp' in params_copy['pick_otus']:
        raise WorkflowError(
            "Cannot pass pick_otus:refseqs_fp in parameters file. This can only be"
            " passed on the command line or through the API.")
    if similarity_override is not None:
        logger.write('Similiarity of %1.3f being used for pre-filtering.\n' %
                     similarity_override)
        if 'pick_otus' in params_copy:
            params_copy['pick_otus']['similarity'] = str(similarity_override)
        else:
            params_copy['pick_otus'] = {'similarity': str(similarity_override)}

    if parallel and (otu_picking_method == 'uclust_ref'
                     or otu_picking_method == "sortmerna"):
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params_copy['parallel'])
        except KeyError:
            params_str = ''

        # Grab the OTU picker parameters
        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --otu_picking_method
            # option. This works for now though.
            if 'otu_picking_method' in params_copy['pick_otus']:
                del params_copy['pick_otus']['otu_picking_method']
        except KeyError:
            pass

        params_str += ' %s' % get_params_str(params_copy['pick_otus'])
        otu_picking_script = 'parallel_pick_otus_%s.py' % otu_picking_method
        # Build the OTU picking command
        pick_otus_cmd = '%s -i %s -o %s -r %s -T %s' %\
            (otu_picking_script,
             input_fp,
             output_dir,
             refseqs_fp,
             params_str)
    else:
        try:
            params_str = get_params_str(params_copy['pick_otus'])
        except KeyError:
            params_str = ''
        # Since this is reference-based OTU picking we always want to
        # suppress new clusters -- force it here.
        params_str += ' --suppress_new_clusters'
        logger.write(
            "Forcing --suppress_new_clusters as this is reference-based OTU picking.\n\n"
        )
        # Build the OTU picking command
        pick_otus_cmd = 'pick_otus.py -i %s -o %s -r %s -m %s %s' %\
            (input_fp,
             output_dir,
             refseqs_fp,
             otu_picking_method,
             params_str)

    return pick_otus_cmd
Exemplo n.º 23
0
def run_pick_closed_reference_otus(
        input_fp,
        refseqs_fp,
        output_dir,
        taxonomy_fp,
        command_handler,
        params,
        qiime_config,
        assign_taxonomy=False,
        parallel=False,
        logger=None,
        suppress_md5=False,
        status_update_callback=print_to_stdout):
    """ Run the data preparation steps of Qiime

        The steps performed by this function are:
          1) Pick OTUs;
          2) If assignment_taxonomy is True, choose representative sequence
             for OTUs and assign taxonomy using a classifier.
          3) Build an OTU table with optional predefined taxonomy
             (if assign_taxonomy=False) or taxonomic assignments from step 2
             (if assign_taxonomy=True).

    """

    # confirm that a valid otu picking method was supplied before doing
    # any work
    reference_otu_picking_methods = ['blast', 'uclust_ref', 'usearch61_ref',
                                     'usearch_ref', 'sortmerna']

    try:
        otu_picking_method = params['pick_otus']['otu_picking_method']
    except KeyError:
        otu_picking_method = 'uclust_ref'
    assert otu_picking_method in reference_otu_picking_methods,\
        "Invalid OTU picking method supplied: %s. Valid choices are: %s"\
        % (otu_picking_method, ' '.join(reference_otu_picking_methods))

    # Prepare some variables for the later steps
    input_dir, input_filename = split(input_fp)
    input_basename, input_ext = splitext(input_filename)
    create_dir(output_dir)
    commands = []
    if logger is None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    if not suppress_md5:
        log_input_md5s(logger, [input_fp, refseqs_fp, taxonomy_fp])

    # Prep the OTU picking command
    pick_otu_dir = '%s/%s_picked_otus' % (output_dir, otu_picking_method)
    otu_fp = '%s/%s_otus.txt' % (pick_otu_dir, input_basename)
    if parallel and (otu_picking_method == 'blast' or
                     otu_picking_method == 'uclust_ref' or
                     otu_picking_method == 'usearch61_ref' or
                     otu_picking_method == 'sortmerna'):
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params['parallel'])
        except KeyError:
            params_str = ''

        # Grab the OTU picker parameters
        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --alignment_method
            # option. This works for now though.
            d = params['pick_otus'].copy()
            if 'otu_picking_method' in d:
                del d['otu_picking_method']
            params_str += ' %s' % get_params_str(d)
        except KeyError:
            pass
        otu_picking_script = 'parallel_pick_otus_%s.py' % otu_picking_method
        # Build the OTU picking command
        pick_otus_cmd = '%s -i %s -o %s -r %s -T %s' %\
            (otu_picking_script,
             input_fp,
             pick_otu_dir,
             refseqs_fp,
             params_str)
    else:
        try:
            params_str = get_params_str(params['pick_otus'])
        except KeyError:
            params_str = ''
        # Since this is reference-based OTU picking we always want to
        # suppress new clusters -- force it here.
        params_str += ' --suppress_new_clusters'
        logger.write(
            "Forcing --suppress_new_clusters as this is "
            "closed-reference OTU picking.\n\n")
        # Build the OTU picking command
        pick_otus_cmd = 'pick_otus.py -i %s -o %s -r %s -m %s %s' %\
            (input_fp,
             pick_otu_dir,
             refseqs_fp,
             otu_picking_method,
             params_str)

    commands.append([('Pick OTUs', pick_otus_cmd)])

    # Assign taxonomy using a taxonomy classifier, if request by the user.
    # (Alternatively predefined taxonomic assignments will be used, if provided.)
    if assign_taxonomy:
        # Prep the representative set picking command
        rep_set_dir = '%s/rep_set/' % output_dir
        create_dir(rep_set_dir)
        rep_set_fp = '%s/%s_rep_set.fasta' % (rep_set_dir, input_basename)
        rep_set_log_fp = '%s/%s_rep_set.log' % (rep_set_dir, input_basename)

        try:
            params_str = get_params_str(params['pick_rep_set'])
        except KeyError:
            params_str = ''
        # Build the representative set picking command
        pick_rep_set_cmd = 'pick_rep_set.py -i %s -f %s -l %s -o %s %s' %\
            (otu_fp, input_fp, rep_set_log_fp, rep_set_fp, params_str)
        commands.append([('Pick representative set', pick_rep_set_cmd)])

        # Prep the taxonomy assignment command
        try:
            assignment_method = params['assign_taxonomy']['assignment_method']
        except KeyError:
            assignment_method = 'uclust'
        assign_taxonomy_dir = '%s/%s_assigned_taxonomy' %\
            (output_dir, assignment_method)
        taxonomy_fp = '%s/%s_rep_set_tax_assignments.txt' % \
            (assign_taxonomy_dir, input_basename)
        if parallel and (assignment_method == 'rdp' or
                         assignment_method == 'blast' or
                         assignment_method == 'uclust'):
            # Grab the parallel-specific parameters
            try:
                params_str = get_params_str(params['parallel'])
            except KeyError:
                params_str = ''

            # Grab the taxonomy assignment parameters
            try:
                # Want to find a cleaner strategy for this: the parallel script
                # is method-specific, so doesn't take a --assignment_method
                # option. This works for now though.
                d = params['assign_taxonomy'].copy()
                if 'assignment_method' in d:
                    del d['assignment_method']
                params_str += ' %s' % get_params_str(d)
            except KeyError:
                pass

            # Build the parallel taxonomy assignment command
            assign_taxonomy_cmd = \
                'parallel_assign_taxonomy_%s.py -i %s -o %s -T %s' %\
                (assignment_method, rep_set_fp, assign_taxonomy_dir, params_str)
        else:
            try:
                params_str = get_params_str(params['assign_taxonomy'])
            except KeyError:
                params_str = ''
            # Build the taxonomy assignment command
            assign_taxonomy_cmd = 'assign_taxonomy.py -o %s -i %s %s' %\
                (assign_taxonomy_dir, rep_set_fp, params_str)

        commands.append([('Assign taxonomy', assign_taxonomy_cmd)])

    # Prep the OTU table building command
    otu_table_fp = '%s/otu_table.biom' % output_dir
    try:
        params_str = get_params_str(params['make_otu_table'])
    except KeyError:
        params_str = ''
    # If assign_taxonomy is True, this will be the path to the taxonomic
    # assignment results. If assign_taxonomy is False this will be either
    # the precomputed taxonomic assignments that the user passed in,
    # or None.
    if taxonomy_fp:
        taxonomy_str = '-t %s' % taxonomy_fp
    else:
        taxonomy_str = ''
    # Build the OTU table building command
    make_otu_table_cmd = 'make_otu_table.py -i %s %s -o %s %s' %\
        (otu_fp, taxonomy_str, otu_table_fp, params_str)

    commands.append([('Make OTU table', make_otu_table_cmd)])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
Exemplo n.º 24
0
def run_core_diversity_analyses(biom_fp,
                                mapping_fp,
                                sampling_depth,
                                output_dir,
                                qiime_config,
                                command_handler=call_commands_serially,
                                tree_fp=None,
                                params=None,
                                categories=None,
                                arare_min_rare_depth=10,
                                arare_num_steps=10,
                                parallel=False,
                                suppress_taxa_summary=False,
                                suppress_beta_diversity=False,
                                suppress_alpha_diversity=False,
                                suppress_group_significance=False,
                                status_update_callback=print_to_stdout):
    """
    """
    if categories is not None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = \
            parse_mapping_file_to_dict(open(mapping_fp, 'U'))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError(
                    "Category '%s' is not a column header "
                    "in your mapping file. "
                    "Categories are case and white space sensitive. Valid "
                    "choices are: (%s)" %
                    (c, ', '.join(metadata_map.CategoryNames)))
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError(
                    "Category '%s' contains only one value. "
                    "Categories analyzed here require at least two values." %
                    c)

    else:
        categories = []
    comma_separated_categories = ','.join(categories)
    # prep some variables
    if params is None:
        params = parse_qiime_parameters([])

    create_dir(output_dir)
    index_fp = '%s/index.html' % output_dir
    index_links = []
    commands = []

    # begin logging
    old_log_fps = glob(join(output_dir, 'log_20*txt'))
    log_fp = generate_log_fp(output_dir)
    index_links.append(
        ('Master run log', log_fp, _index_headers['run_summary']))
    for old_log_fp in old_log_fps:
        index_links.append(
            ('Previous run log', old_log_fp, _index_headers['run_summary']))
    logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config)
    input_fps = [biom_fp, mapping_fp]
    if tree_fp is not None:
        input_fps.append(tree_fp)
    log_input_md5s(logger, input_fps)

    # run 'biom summarize-table' on input BIOM table
    try:
        params_str = get_params_str(params['biom-summarize-table'])
    except KeyError:
        params_str = ''
    biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir
    if not exists(biom_table_stats_output_fp):
        biom_table_summary_cmd = \
            "biom summarize-table -i %s -o %s %s" % \
            (biom_fp, biom_table_stats_output_fp, params_str)
        commands.append([('Generate BIOM table summary',
                          biom_table_summary_cmd)])
    else:
        logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" %
                     biom_table_stats_output_fp)
    index_links.append(('BIOM table statistics', biom_table_stats_output_fp,
                        _index_headers['run_summary']))

    # filter samples with fewer observations than the requested sampling_depth.
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    if not exists(filtered_biom_fp):
        filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\
            (biom_fp, filtered_biom_fp, sampling_depth)
        commands.append([(
            'Filter low sequence count samples from table (minimum sequence count: %d)'
            % sampling_depth, filter_samples_cmd)])
    else:
        logger.write(
            "Skipping filter_samples_from_otu_table.py as %s exists.\n\n" %
            filtered_biom_fp)
    biom_fp = filtered_biom_fp

    # rarify the BIOM table to sampling_depth
    rarefied_biom_fp = "%s/table_even%d.biom" % (output_dir, sampling_depth)
    if not exists(rarefied_biom_fp):
        single_rarefaction_cmd = "single_rarefaction.py -i %s -o %s -d %d" %\
            (biom_fp, rarefied_biom_fp, sampling_depth)
        commands.append([
            ('Rarify the OTU table to %d sequences/sample' % sampling_depth,
             single_rarefaction_cmd)
        ])
    else:
        logger.write("Skipping single_rarefaction.py as %s exists.\n\n" %
                     rarefied_biom_fp)

    # run initial commands and reset the command list
    if len(commands) > 0:
        command_handler(commands,
                        status_update_callback,
                        logger,
                        close_logger_on_success=False)
        commands = []

    if not suppress_beta_diversity:
        bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir, sampling_depth)
        # Need to check for the existence of any distance matrices, since the user
        # can select which will be generated.
        existing_dm_fps = glob('%s/*_dm.txt' % bdiv_even_output_dir)
        if len(existing_dm_fps) == 0:
            even_dm_fps = run_beta_diversity_through_plots(
                otu_table_fp=rarefied_biom_fp,
                mapping_fp=mapping_fp,
                output_dir=bdiv_even_output_dir,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                # Note: we pass sampling depth=None here as
                # we rarify the BIOM table above and pass that
                # in here.
                sampling_depth=None,
                tree_fp=tree_fp,
                parallel=parallel,
                logger=logger,
                suppress_md5=True,
                status_update_callback=status_update_callback)
        else:
            logger.write(
                "Skipping beta_diversity_through_plots.py as %s exist(s).\n\n"
                % ', '.join(existing_dm_fps))
            even_dm_fps = [(split(fp)[1].strip('_dm.txt'), fp)
                           for fp in existing_dm_fps]

        # Get make_distance_boxplots parameters
        try:
            params_str = get_params_str(params['make_distance_boxplots'])
        except KeyError:
            params_str = ''

        for bdiv_metric, dm_fp in even_dm_fps:
            for category in categories:
                boxplots_output_dir = '%s/%s_boxplots/' % (
                    bdiv_even_output_dir, bdiv_metric)
                plot_output_fp = '%s/%s_Distances.pdf' % (boxplots_output_dir,
                                                          category)
                stats_output_fp = '%s/%s_Stats.txt' % (boxplots_output_dir,
                                                       category)
                if not exists(plot_output_fp):
                    boxplots_cmd = \
                        'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\
                        (dm_fp, category, boxplots_output_dir,
                         mapping_fp, params_str)
                    commands.append([('Boxplots (%s)' % category, boxplots_cmd)
                                     ])
                else:
                    logger.write(
                        "Skipping make_distance_boxplots.py for %s as %s exists.\n\n"
                        % (category, plot_output_fp))
                index_links.append(
                    ('Distance boxplots (%s)' % bdiv_metric, plot_output_fp,
                     _index_headers['beta_diversity_even'] % sampling_depth))
                index_links.append(
                    ('Distance boxplots statistics (%s)' % bdiv_metric,
                     stats_output_fp,
                     _index_headers['beta_diversity_even'] % sampling_depth))

            index_links.append(
                ('PCoA plot (%s)' % bdiv_metric,
                 '%s/%s_emperor_pcoa_plot/index.html' %
                 (bdiv_even_output_dir, bdiv_metric),
                 _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(
                ('Distance matrix (%s)' % bdiv_metric,
                 '%s/%s_dm.txt' % (bdiv_even_output_dir, bdiv_metric),
                 _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(
                ('Principal coordinate matrix (%s)' % bdiv_metric,
                 '%s/%s_pc.txt' % (bdiv_even_output_dir, bdiv_metric),
                 _index_headers['beta_diversity_even'] % sampling_depth))

    if not suppress_alpha_diversity:
        # Alpha rarefaction workflow
        arare_full_output_dir = '%s/arare_max%d/' % (output_dir,
                                                     sampling_depth)
        rarefaction_plots_output_fp = \
            '%s/alpha_rarefaction_plots/rarefaction_plots.html' % arare_full_output_dir
        if not exists(rarefaction_plots_output_fp):
            run_alpha_rarefaction(
                otu_table_fp=biom_fp,
                mapping_fp=mapping_fp,
                output_dir=arare_full_output_dir,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                tree_fp=tree_fp,
                num_steps=arare_num_steps,
                parallel=parallel,
                logger=logger,
                min_rare_depth=arare_min_rare_depth,
                max_rare_depth=sampling_depth,
                suppress_md5=True,
                status_update_callback=status_update_callback,
                retain_intermediate_files=False)
        else:
            logger.write("Skipping alpha_rarefaction.py as %s exists.\n\n" %
                         rarefaction_plots_output_fp)

        index_links.append(
            ('Alpha rarefaction plots', rarefaction_plots_output_fp,
             _index_headers['alpha_diversity']))

        collated_alpha_diversity_fps = \
            glob('%s/alpha_div_collated/*txt' % arare_full_output_dir)
        try:
            params_str = get_params_str(params['compare_alpha_diversity'])
        except KeyError:
            params_str = ''

        if len(categories) > 0:
            for collated_alpha_diversity_fp in collated_alpha_diversity_fps:
                alpha_metric = splitext(
                    split(collated_alpha_diversity_fp)[1])[0]
                compare_alpha_output_dir = '%s/compare_%s' % \
                    (arare_full_output_dir, alpha_metric)
                if not exists(compare_alpha_output_dir):
                    compare_alpha_cmd = \
                        'compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s' %\
                        (collated_alpha_diversity_fp,
                         mapping_fp,
                         comma_separated_categories,
                         compare_alpha_output_dir,
                         params_str)
                    commands.append([
                        ('Compare alpha diversity (%s)' % alpha_metric,
                         compare_alpha_cmd)
                    ])
                    for category in categories:
                        alpha_comparison_stat_fp = '%s/%s_stats.txt' % \
                            (compare_alpha_output_dir, category)
                        alpha_comparison_boxplot_fp = '%s/%s_boxplots.pdf' % \
                            (compare_alpha_output_dir, category)
                        index_links.append(
                            ('Alpha diversity statistics (%s, %s)' %
                             (category, alpha_metric),
                             alpha_comparison_stat_fp,
                             _index_headers['alpha_diversity']))
                        index_links.append(
                            ('Alpha diversity boxplots (%s, %s)' %
                             (category, alpha_metric),
                             alpha_comparison_boxplot_fp,
                             _index_headers['alpha_diversity']))
                else:
                    logger.write("Skipping compare_alpha_diversity.py"
                                 " for %s as %s exists.\n\n" %
                                 (alpha_metric, compare_alpha_output_dir))
        else:
            logger.write("Skipping compare_alpha_diversity.py as"
                         " no categories were provided.\n\n")

    if not suppress_taxa_summary:
        taxa_plots_output_dir = '%s/taxa_plots/' % output_dir
        # need to check for existence of any html files, since the user can
        # select only certain ones to be generated
        existing_taxa_plot_html_fps = glob(
            join(taxa_plots_output_dir, 'taxa_summary_plots', '*.html'))
        if len(existing_taxa_plot_html_fps) == 0:
            run_summarize_taxa_through_plots(
                otu_table_fp=biom_fp,
                mapping_fp=mapping_fp,
                output_dir=taxa_plots_output_dir,
                mapping_cat=None,
                sort=True,
                command_handler=command_handler,
                params=params,
                qiime_config=qiime_config,
                logger=logger,
                suppress_md5=True,
                status_update_callback=status_update_callback)
        else:
            logger.write(
                "Skipping summarize_taxa_through_plots.py for as %s exist(s).\n\n"
                % ', '.join(existing_taxa_plot_html_fps))

        index_links.append(
            ('Taxa summary bar plots',
             '%s/taxa_summary_plots/bar_charts.html' % taxa_plots_output_dir,
             _index_headers['taxa_summary']))
        index_links.append(
            ('Taxa summary area plots',
             '%s/taxa_summary_plots/area_charts.html' % taxa_plots_output_dir,
             _index_headers['taxa_summary']))
        for category in categories:
            taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir,
                                                           category)
            # need to check for existence of any html files, since the user can
            # select only certain ones to be generated
            existing_taxa_plot_html_fps = glob('%s/taxa_summary_plots/*.html' %
                                               taxa_plots_output_dir)
            if len(existing_taxa_plot_html_fps) == 0:
                run_summarize_taxa_through_plots(
                    otu_table_fp=biom_fp,
                    mapping_fp=mapping_fp,
                    output_dir=taxa_plots_output_dir,
                    mapping_cat=category,
                    sort=True,
                    command_handler=command_handler,
                    params=params,
                    qiime_config=qiime_config,
                    logger=logger,
                    suppress_md5=True,
                    status_update_callback=status_update_callback)
            else:
                logger.write(
                    "Skipping summarize_taxa_through_plots.py for %s as %s exist(s).\n\n"
                    % (category, ', '.join(existing_taxa_plot_html_fps)))

            index_links.append(
                ('Taxa summary bar plots',
                 '%s/taxa_summary_plots/bar_charts.html' %
                 taxa_plots_output_dir,
                 _index_headers['taxa_summary_categorical'] % category))
            index_links.append(
                ('Taxa summary area plots',
                 '%s/taxa_summary_plots/area_charts.html' %
                 taxa_plots_output_dir,
                 _index_headers['taxa_summary_categorical'] % category))

    if not suppress_group_significance:
        params_str = get_params_str(params['group_significance'])
        # group significance tests, aka category significance
        for category in categories:
            group_signifance_fp = \
                '%s/group_significance_%s.txt' % (output_dir, category)
            if not exists(group_signifance_fp):
                # Build the OTU cateogry significance command
                group_significance_cmd = \
                    'group_significance.py -i %s -m %s -c %s -o %s %s' %\
                    (rarefied_biom_fp, mapping_fp, category,
                     group_signifance_fp, params_str)
                commands.append([('Group significance (%s)' % category,
                                  group_significance_cmd)])
            else:
                logger.write(
                    "Skipping group_significance.py for %s as %s exists.\n\n" %
                    (category, group_signifance_fp))

            index_links.append(
                ('Category significance (%s)' % category, group_signifance_fp,
                 _index_headers['group_significance']))

    filtered_biom_gzip_fp = '%s.gz' % filtered_biom_fp
    if not exists(filtered_biom_gzip_fp):
        commands.append([('Compress the filtered BIOM table',
                          'gzip %s' % filtered_biom_fp)])
    else:
        logger.write(
            "Skipping compressing of filtered BIOM table as %s exists.\n\n" %
            filtered_biom_gzip_fp)
    index_links.append(
        ('Filtered BIOM table (minimum sequence count: %d)' % sampling_depth,
         filtered_biom_gzip_fp, _index_headers['run_summary']))

    rarified_biom_gzip_fp = '%s.gz' % rarefied_biom_fp
    if not exists(rarified_biom_gzip_fp):
        commands.append([('Compress the rarified BIOM table',
                          'gzip %s' % rarefied_biom_fp)])
    else:
        logger.write(
            "Skipping compressing of rarified BIOM table as %s exists.\n\n" %
            rarified_biom_gzip_fp)
    index_links.append(
        ('Rarified BIOM table (sampling depth: %d)' % sampling_depth,
         rarified_biom_gzip_fp, _index_headers['run_summary']))

    if len(commands) > 0:
        command_handler(commands, status_update_callback, logger)
    else:
        logger.close()

    generate_index_page(index_links, index_fp)
Exemplo n.º 25
0
def run_alpha_rarefaction(otu_table_fp, 
                          mapping_fp,
                          output_dir,
                          command_handler,
                          params,
                          qiime_config,
                          tree_fp=None,
                          num_steps=10,
                          parallel=False,
                          logger=None,
                          min_rare_depth=10,
                          max_rare_depth=None,
                          suppress_md5=False,
                          status_update_callback=print_to_stdout,
                          plot_stderr_and_stddev=False,
                          retain_intermediate_files=True):
    """ Run the data preparation steps of Qiime 
    
        The steps performed by this function are:
          1) Generate rarefied OTU tables;
          2) Compute alpha diversity metrics for each rarefied OTU table;
          3) Collate alpha diversity results;
          4) Generate alpha rarefaction plots.
    
    """
    # Prepare some variables for the later steps
    otu_table_dir, otu_table_filename = split(otu_table_fp)
    otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    create_dir(output_dir)
    commands = []
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False

    if not suppress_md5:
        log_input_md5s(logger,[otu_table_fp,mapping_fp,tree_fp])
    
    if max_rare_depth == None:
        min_count, max_count, median_count, mean_count, counts_per_sample =\
         compute_counts_per_sample_stats(parse_biom_table(open(otu_table_fp,'U')))
        max_rare_depth = median_count
    step = int((max_rare_depth - min_rare_depth) / num_steps) or 1
    max_rare_depth = int(max_rare_depth)
    
    rarefaction_dir = '%s/rarefaction/' % output_dir
    create_dir(rarefaction_dir)
    try:
        params_str = get_params_str(params['multiple_rarefactions'])
    except KeyError:
        params_str = ''
    if parallel:
        params_str += ' %s' % get_params_str(params['parallel'])        
        # Build the rarefaction command
        rarefaction_cmd = \
         '%s %s/parallel_multiple_rarefactions.py -T -i %s -m %s -x %s -s %s -o %s %s' %\
         (python_exe_fp, script_dir, otu_table_fp, min_rare_depth, max_rare_depth,
          step, rarefaction_dir, params_str)
    else:
        # Build the rarefaction command
        rarefaction_cmd = \
         '%s %s/multiple_rarefactions.py -i %s -m %s -x %s -s %s -o %s %s' %\
         (python_exe_fp, script_dir, otu_table_fp, min_rare_depth, max_rare_depth,
          step, rarefaction_dir, params_str)
    commands.append([('Alpha rarefaction', rarefaction_cmd)])
    
    # Prep the alpha diversity command
    alpha_diversity_dir = '%s/alpha_div/' % output_dir
    create_dir(alpha_diversity_dir)
    try:
        params_str = get_params_str(params['alpha_diversity'])
    except KeyError:
        params_str = ''
    if tree_fp:
        params_str += ' -t %s' % tree_fp
    if parallel:
        params_str += ' %s' % get_params_str(params['parallel'])   
        # Build the alpha diversity command
        alpha_diversity_cmd = \
         "%s %s/parallel_alpha_diversity.py -T -i %s -o %s %s" %\
         (python_exe_fp, script_dir, rarefaction_dir, alpha_diversity_dir,
          params_str)
    else:  
        # Build the alpha diversity command
        alpha_diversity_cmd = \
         "%s %s/alpha_diversity.py -i %s -o %s %s" %\
         (python_exe_fp, script_dir, rarefaction_dir, alpha_diversity_dir,
          params_str)

    commands.append(\
     [('Alpha diversity on rarefied OTU tables',alpha_diversity_cmd)])
     
    # Prep the alpha diversity collation command
    alpha_collated_dir = '%s/alpha_div_collated/' % output_dir
    create_dir(alpha_collated_dir)
    try:
        params_str = get_params_str(params['collate_alpha'])
    except KeyError:
        params_str = ''
    # Build the alpha diversity collation command
    alpha_collated_cmd = '%s %s/collate_alpha.py -i %s -o %s %s' %\
     (python_exe_fp, script_dir, alpha_diversity_dir, \
      alpha_collated_dir, params_str)
    commands.append([('Collate alpha',alpha_collated_cmd)])
    
    if not retain_intermediate_files:
        commands.append([('Removing intermediate files',
                          'rm -r %s %s' % (rarefaction_dir,alpha_diversity_dir))])
    else:
        commands.append([('Skipping removal of intermediate files.','')])

    # Prep the make rarefaction plot command(s)
    try:
        params_str = get_params_str(params['make_rarefaction_plots'])
    except KeyError:
        params_str = ''
    
    if 'std_type' in params['make_rarefaction_plots'] or not plot_stderr_and_stddev:
        rarefaction_plot_dir = '%s/alpha_rarefaction_plots/' % output_dir
        create_dir(rarefaction_plot_dir)
        
        # Build the make rarefaction plot command(s)
        #for metric in alpha_diversity_metrics:
        make_rarefaction_plot_cmd =\
             '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s' %\
             (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp,
              rarefaction_plot_dir, params_str)
        commands.append(\
             [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)])
    else:
        rarefaction_plot_dir_stddev = '%s/alpha_rarefaction_plots_stddev/' % output_dir
        rarefaction_plot_dir_stderr = '%s/alpha_rarefaction_plots_stderr/' % output_dir
        create_dir(rarefaction_plot_dir_stddev)
        create_dir(rarefaction_plot_dir_stderr)
        
        # Build the make rarefaction plot command(s)
        # for metric in alpha_diversity_metrics:
        make_rarefaction_plot_cmd =\
             '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s --std_type stddev' %\
             (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp,
              rarefaction_plot_dir_stddev, params_str)
        commands.append(\
             [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)])
        make_rarefaction_plot_cmd =\
             '%s %s/make_rarefaction_plots.py -i %s -m %s -o %s %s --std_type stderr' %\
             (python_exe_fp, script_dir, alpha_collated_dir, mapping_fp,
              rarefaction_plot_dir_stderr, params_str)
        commands.append(\
             [('Rarefaction plot: %s' % 'All metrics',make_rarefaction_plot_cmd)])
   
    
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
Exemplo n.º 26
0
def run_pick_de_novo_otus(input_fp, 
                               output_dir, 
                               command_handler,
                               params, 
                               qiime_config,
                               parallel=False,
                               logger=None,
                               suppress_md5=False,
                               status_update_callback=print_to_stdout):
    """ Run the data preparation steps of Qiime 
    
        The steps performed by this function are:
          1) Pick OTUs;
          2) Pick a representative set;
          3) Align the representative set; 
          4) Assign taxonomy;
          5) Filter the alignment prior to tree building - remove positions
             which are all gaps, and specified as 0 in the lanemask
          6) Build a phylogenetic tree;
          7) Build an OTU table.
    
    """
    
    # Prepare some variables for the later steps
    input_dir, input_filename = split(input_fp)
    input_basename, input_ext = splitext(input_filename)
    create_dir(output_dir)
    commands = []
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    cluster_failures = False
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False
    
    if not suppress_md5:
        log_input_md5s(logger,[input_fp])
    
    # Prep the OTU picking command
    try:
        otu_picking_method = params['pick_otus']['otu_picking_method']
    except KeyError:
        otu_picking_method = 'uclust'
    pick_otu_dir = '%s/%s_picked_otus' % (output_dir, otu_picking_method)
    otu_fp = '%s/%s_otus.txt' % (pick_otu_dir,input_basename)
    if parallel and (otu_picking_method == 'blast' or 
                     otu_picking_method == 'uclust_ref'):
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params['parallel'])
        except KeyError:
            params_str = ''
        
        # Grab the OTU picker parameters
        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --otu_picking_method
            # option. This works for now though.
            d = params['pick_otus'].copy()
            del d['otu_picking_method']
        except KeyError:
            pass
        
        if otu_picking_method == 'uclust_ref':
            try:
                suppress_new_clusters = d['suppress_new_clusters']
                del d['suppress_new_clusters']
                cluster_failures = False
            except KeyError:
                cluster_failures = True
                failure_otu_picking_method = 'uclust'
        
        params_str += ' %s' % get_params_str(d)
        otu_picking_script = 'parallel_pick_otus_%s.py' % otu_picking_method
        # Build the OTU picking command
        pick_otus_cmd = '%s %s/%s -i %s -o %s -T %s' % (python_exe_fp, 
                                                        script_dir, 
                                                        otu_picking_script,
                                                        input_fp,
                                                        pick_otu_dir,
                                                        params_str)
    else:
        try:
            params_str = get_params_str(params['pick_otus'])
        except KeyError:
            params_str = ''
        # Build the OTU picking command
        pick_otus_cmd = '%s %s/pick_otus.py -i %s -o %s %s' %\
         (python_exe_fp, script_dir, input_fp, pick_otu_dir, params_str)

    commands.append([('Pick OTUs', pick_otus_cmd)])
    
    if cluster_failures:
        reference_otu_fp = otu_fp
        clustered_failures_dir = '%s/failure_otus/' % pick_otu_dir
        
        try:
            d = params['pick_otus'].copy()
            del d['otu_picking_method']
        except KeyError:
            pass

        if 'uclust_otu_id_prefix' not in d:
            d['uclust_otu_id_prefix'] = 'DeNovoOTU'        
        params_str = ' %s' % get_params_str(d)

        failures_list_fp = '%s/%s_failures.txt' % \
         (pick_otu_dir,input_basename)
        failures_fasta_fp = '%s/%s_failures.fasta' % \
         (pick_otu_dir,input_basename)
        
        filter_fasta_cmd = 'filter_fasta.py -f %s -s %s -o %s' %\
         (input_fp,failures_list_fp,failures_fasta_fp)
        
        commands.append([('Generate failures fasta file',
                          filter_fasta_cmd)])
        
        # Prep the OTU picking command for
        failure_otu_fp = '%s/%s_failures_otus.txt' % (clustered_failures_dir,input_basename)
        # Build the OTU picking command
        pick_otus_cmd = '%s %s/pick_otus.py -i %s -o %s -m %s %s' %\
         (python_exe_fp, script_dir, failures_fasta_fp, clustered_failures_dir, 
          failure_otu_picking_method, params_str)

        commands.append([('Pick de novo OTUs for new clusters', pick_otus_cmd)])
        
        merged_otu_map_fp = '%s/merged_otu_map.txt' % clustered_failures_dir
        cat_otu_tables_cmd = 'cat %s %s >> %s' %\
         (reference_otu_fp,failure_otu_fp,merged_otu_map_fp)
        commands.append([('Merge OTU maps',cat_otu_tables_cmd)])
        otu_fp = merged_otu_map_fp

    # Prep the representative set picking command
    rep_set_dir = '%s/rep_set/' % output_dir
    create_dir(rep_set_dir)
    rep_set_fp = '%s/%s_rep_set.fasta' % (rep_set_dir,input_basename)
    rep_set_log_fp = '%s/%s_rep_set.log' % (rep_set_dir,input_basename)
    
    try:
        params_str = get_params_str(params['pick_rep_set'])
    except KeyError:
        params_str = ''
    # Build the representative set picking command
    pick_rep_set_cmd = '%s %s/pick_rep_set.py -i %s -f %s -l %s -o %s %s' %\
     (python_exe_fp, script_dir, otu_fp, input_fp, rep_set_log_fp,\
      rep_set_fp, params_str)
    commands.append([('Pick representative set', pick_rep_set_cmd)])
    
    # Prep the taxonomy assignment command
    try:
        assignment_method = params['assign_taxonomy']['assignment_method']
    except KeyError:
        assignment_method = 'uclust'
    assign_taxonomy_dir = '%s/%s_assigned_taxonomy' %\
     (output_dir,assignment_method)
    taxonomy_fp = '%s/%s_rep_set_tax_assignments.txt' % \
     (assign_taxonomy_dir,input_basename)
    if parallel and (assignment_method == 'rdp' or
                     assignment_method == 'blast' or
                     assignment_method == 'uclust'):
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params['parallel'])
        except KeyError:
            params_str = ''
        
        # Grab the taxonomy assignment parameters
        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --assignment_method
            # option. This works for now though.
            d = params['assign_taxonomy'].copy()
            if 'assignment_method' in d:
                del d['assignment_method']
            params_str += ' %s' % get_params_str(d)
        except KeyError:
            pass
            
        # Build the parallel taxonomy assignment command
        assign_taxonomy_cmd = \
         '%s %s/parallel_assign_taxonomy_%s.py -i %s -o %s -T %s' %\
         (python_exe_fp, script_dir, assignment_method, rep_set_fp,\
          assign_taxonomy_dir, params_str)
    else:
        try:
            params_str = get_params_str(params['assign_taxonomy'])
        except KeyError:
            params_str = ''
        # Build the taxonomy assignment command
        assign_taxonomy_cmd = '%s %s/assign_taxonomy.py -o %s -i %s %s' %\
         (python_exe_fp, script_dir, assign_taxonomy_dir,\
          rep_set_fp, params_str)
    
    commands.append([('Assign taxonomy',assign_taxonomy_cmd)])
    
    # Prep the OTU table building command
    otu_table_fp = '%s/otu_table.biom' % output_dir
    try:
        params_str = get_params_str(params['make_otu_table'])
    except KeyError:
        params_str = ''
    # Build the OTU table building command
    make_otu_table_cmd = '%s %s/make_otu_table.py -i %s -t %s -o %s %s' %\
     (python_exe_fp, script_dir, otu_fp, taxonomy_fp, otu_table_fp, params_str)
    
    commands.append([('Make OTU table', make_otu_table_cmd)])
    
    if cluster_failures:
        reference_otu_table_fp = '%s/reference_only_otu_table.biom' % output_dir
        # Build the OTU table building command
        make_otu_table_cmd = '%s %s/make_otu_table.py -i %s -t %s -o %s %s' %\
         (python_exe_fp, script_dir, reference_otu_fp, taxonomy_fp, 
          reference_otu_table_fp, params_str)
    
        commands.append([('Make reference-only OTU table', make_otu_table_cmd)])
    
    # Prep the pynast alignment command
    try:
        alignment_method = params['align_seqs']['alignment_method']
    except KeyError:
        alignment_method = 'pynast'
    pynast_dir = '%s/%s_aligned_seqs' % (output_dir,alignment_method)
    aln_fp = '%s/%s_rep_set_aligned.fasta' % (pynast_dir,input_basename)
    if parallel and alignment_method == 'pynast':
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params['parallel'])
        except KeyError:
            params_str = ''
        
        # Grab the alignment parameters        
        # Want to find a cleaner strategy for this: the parallel script
        # is method-specific, so doesn't take a --alignment_method
        # option. This works for now though.
        try:
            d = params['align_seqs'].copy()
        except KeyError:
            d = {}
        try:
            del d['alignment_method']
        except KeyError:
            pass
        params_str += ' %s' % get_params_str(d)
        
        # Build the parallel pynast alignment command
        align_seqs_cmd = '%s %s/parallel_align_seqs_pynast.py -i %s -o %s -T %s' %\
         (python_exe_fp, script_dir, rep_set_fp, pynast_dir, params_str)
    else:
        try:
            params_str = get_params_str(params['align_seqs'])
        except KeyError:
            params_str = ''
        # Build the pynast alignment command
        align_seqs_cmd = '%s %s/align_seqs.py -i %s -o %s %s' %\
         (python_exe_fp, script_dir, rep_set_fp, pynast_dir, params_str)
    commands.append([('Align sequences', align_seqs_cmd)])
    
    # Prep the alignment filtering command
    filtered_aln_fp = '%s/%s_rep_set_aligned_pfiltered.fasta' %\
     (pynast_dir,input_basename)
    try:
        params_str = get_params_str(params['filter_alignment'])
    except KeyError:
        params_str = ''
    # Build the alignment filtering command
    filter_alignment_cmd = '%s %s/filter_alignment.py -o %s -i %s %s' %\
     (python_exe_fp, script_dir, pynast_dir, aln_fp, params_str)
    commands.append([('Filter alignment', filter_alignment_cmd)])
    
    # Prep the tree building command
    tree_fp = '%s/rep_set.tre' % output_dir
    try:
        params_str = get_params_str(params['make_phylogeny'])
    except KeyError:
        params_str = ''
    # Build the tree building command
    make_phylogeny_cmd = '%s %s/make_phylogeny.py -i %s -o %s %s' %\
     (python_exe_fp, script_dir, filtered_aln_fp, tree_fp,\
     params_str)
    commands.append([('Build phylogenetic tree', make_phylogeny_cmd)])
    
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
    
    return abspath(tree_fp), abspath(otu_table_fp)
Exemplo n.º 27
0
def run_beta_diversity_through_plots(otu_table_fp, 
                                     mapping_fp,
                                     output_dir,
                                     command_handler,
                                     params,
                                     qiime_config,
                                     color_by_interesting_fields_only=True,
                                     sampling_depth=None,
                                     tree_fp=None,
                                     parallel=False,
                                     logger=None,
                                     suppress_emperor_plots=False,
                                     suppress_md5=False,
                                     status_update_callback=print_to_stdout):
    """ Compute beta diversity distance matrices, run PCoA, and generate emperor plots
    
        The steps performed by this function are:
         1) Compute a beta diversity distance matrix for each metric
         2) Peform a principal coordinates analysis on the result of step 1
         3) Generate an emperor plot for each result of step 2
    
    """  
    # Prepare some variables for the later steps
    otu_table_dir, otu_table_filename = split(otu_table_fp)
    otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    create_dir(output_dir)
    commands = []
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False
    
    if not suppress_md5:
        log_input_md5s(logger,[otu_table_fp,mapping_fp,tree_fp])
    
    mapping_data, mapping_header, mapping_comments =\
     parse_mapping_file(open(mapping_fp,'U'))

    # Get the interesting mapping fields to color by -- if none are
    # interesting, take all of them. Interesting is defined as those
    # which have greater than one value and fewer values than the number 
    # of samples
    if color_by_interesting_fields_only:
        mapping_fields =\
          get_interesting_mapping_fields(mapping_data, mapping_header) or\
          mapping_header
    else:
        mapping_fields = mapping_header
    mapping_fields = ','.join(mapping_fields)
    
    if sampling_depth:
        # Sample the OTU table at even depth
        even_sampled_otu_table_fp = '%s/%s_even%d%s' %\
         (output_dir, otu_table_basename, 
          sampling_depth, otu_table_ext)
        single_rarefaction_cmd = \
         '%s %s/single_rarefaction.py -i %s -o %s -d %d' %\
         (python_exe_fp, script_dir, otu_table_fp,
          even_sampled_otu_table_fp, sampling_depth)
        commands.append([
         ('Sample OTU table at %d seqs/sample' % sampling_depth,
          single_rarefaction_cmd)])
        otu_table_fp = even_sampled_otu_table_fp
        otu_table_dir, otu_table_filename = split(even_sampled_otu_table_fp)
        otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    try:
        beta_diversity_metrics = params['beta_diversity']['metrics'].split(',')
    except KeyError:
        beta_diversity_metrics = ['weighted_unifrac','unweighted_unifrac']

    dm_fps = []
    for beta_diversity_metric in beta_diversity_metrics:
        
        # Prep the beta-diversity command
        try:
            bdiv_params_copy = params['beta_diversity'].copy()
        except KeyError:
            bdiv_params_copy = {}
        try:
            del bdiv_params_copy['metrics']
        except KeyError:
            pass
        
        params_str = get_params_str(bdiv_params_copy)
            
        if tree_fp:
            params_str = '%s -t %s ' % (params_str,tree_fp)
            
        # Build the beta-diversity command
        if parallel:
            # Grab the parallel-specific parameters
            try:
                params_str += get_params_str(params['parallel'])
            except KeyError:
                pass
            beta_div_cmd = '%s %s/parallel_beta_diversity.py -i %s -o %s --metrics %s -T %s' %\
             (python_exe_fp, script_dir, otu_table_fp,
              output_dir, beta_diversity_metric, params_str)
            commands.append(\
             [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)])
        else:
            beta_div_cmd = '%s %s/beta_diversity.py -i %s -o %s --metrics %s %s' %\
             (python_exe_fp, script_dir, otu_table_fp, 
              output_dir, beta_diversity_metric, params_str)
            commands.append(\
             [('Beta Diversity (%s)' % beta_diversity_metric, beta_div_cmd)])
        
        
        orig_beta_div_fp = '%s/%s_%s.txt' % \
         (output_dir, beta_diversity_metric, otu_table_basename)
        beta_div_fp = '%s/%s_dm.txt' % \
         (output_dir, beta_diversity_metric)
        commands.append([('Rename distance matrix (%s)' % beta_diversity_metric,
                         'mv %s %s' % (orig_beta_div_fp, beta_div_fp))])
        dm_fps.append((beta_diversity_metric, beta_div_fp))
        
        # Prep the principal coordinates command
        pc_fp = '%s/%s_pc.txt' % (output_dir, beta_diversity_metric)
        try:
            params_str = get_params_str(params['principal_coordinates'])
        except KeyError:
            params_str = ''
        # Build the principal coordinates command
        pc_cmd = '%s %s/principal_coordinates.py -i %s -o %s %s' %\
         (python_exe_fp, script_dir, beta_div_fp, pc_fp, params_str)
        commands.append(\
         [('Principal coordinates (%s)' % beta_diversity_metric, pc_cmd)])
        
        # Generate emperor plots
        if not suppress_emperor_plots:
            # Prep the emperor plots command
            emperor_dir = '%s/%s_emperor_pcoa_plot/' % (output_dir, beta_diversity_metric)
            create_dir(emperor_dir)
            try:
                params_str = get_params_str(params['make_emperor'])
            except KeyError:
                params_str = ''
            # Build the continuous-coloring 3d plots command
            emperor_command = \
             'make_emperor.py -i %s -o %s -m %s %s' % (pc_fp,
                                                       emperor_dir,
                                                       mapping_fp,
                                                       params_str)
       
            commands.append([('Make emperor plots, %s)' % beta_diversity_metric,
                              emperor_command)])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
    
    return dm_fps
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(suppress_verbose=True, **script_info)

    input_dir = opts.input_dir
    demultiplexing_method = opts.demultiplexing_method
    parameter_fp = opts.parameter_fp
    read_indicator = opts.read_indicator
    barcode_indicator = opts.barcode_indicator
    mapping_indicator = opts.mapping_indicator
    mapping_extensions = opts.mapping_extensions.split(',')
    sampleid_indicator = opts.sampleid_indicator
    leading_text = opts.leading_text
    trailing_text = opts.trailing_text
    include_input_dir_path = opts.include_input_dir_path
    output_dir = abspath(opts.output_dir)
    remove_filepath_in_name = opts.remove_filepath_in_name
    print_only = opts.print_only

    if remove_filepath_in_name and not include_input_dir_path:
        option_parser.error("If --remove_filepath_in_name enabled, "
                            "--include_input_dir_path must be enabled.")

    if opts.parameter_fp:
        with open(opts.parameter_fp, 'U') as parameter_f:
            params_dict = parse_qiime_parameters(parameter_f)
        params_str = get_params_str(params_dict['split_libraries_fastq'])
    else:
        params_dict = {}
        params_str = ""

    create_dir(output_dir)

    all_fastq = []
    all_mapping = []

    extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']

    for root, dir, fps in walk(input_dir):
        for fp in fps:
            for extension in extensions:
                if fp.endswith(extension):
                    all_fastq += [abspath(join(root, fp))]

    if demultiplexing_method == 'mapping_barcode_files':
        for root, dir, fps in walk(input_dir):
            for fp in fps:
                for mapping_extension in mapping_extensions:
                    if fp.endswith(mapping_extension):
                        all_mapping += [abspath(join(root, fp))]

        all_files = get_matching_files(all_fastq, all_mapping, read_indicator,
                                       barcode_indicator, mapping_indicator)
    else:
        all_files = all_fastq

    commands = create_commands_slf(all_files, demultiplexing_method,
                                   output_dir, params_str, leading_text,
                                   trailing_text, include_input_dir_path,
                                   remove_filepath_in_name, sampleid_indicator)

    qiime_config = load_qiime_config()
    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params_dict,
                            qiime_config=qiime_config)
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback=no_status_updates,
                    logger=logger,
                    close_logger_on_success=True)
Exemplo n.º 29
0
def run_jackknifed_beta_diversity(otu_table_fp,
                                  tree_fp,
                                  seqs_per_sample,
                                  output_dir,
                                  command_handler,
                                  params,
                                  qiime_config,
                                  mapping_fp,
                                  parallel=False,
                                  logger=None,
                                  suppress_md5=False,
                                  status_update_callback=print_to_stdout,
                                  master_tree=None):
    """ Run the data preparation steps of Qiime 
    
        The steps performed by this function are:
          1) Compute beta diversity distance matrix from otu table (and
           tree, if applicable)
          2) Build rarefied OTU tables;
          3) Build UPGMA tree from full distance matrix;
          4) Compute distance matrics for rarefied OTU tables;
          5) Build UPGMA trees from rarefied OTU table distance matrices;
          5.5) Build a consensus tree from the rarefied UPGMA trees
          6) Compare rarefied OTU table distance matrix UPGMA trees 
           to tree full UPGMA tree and write support file and newick tree
           with support values as node labels.
           
        master_tree can be 'full' or 'consensus', default full
    """
    # Prepare some variables for the later steps
    if master_tree == None:
        master_tree = 'full'
    otu_table_dir, otu_table_filename = split(otu_table_fp)
    otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    create_dir(output_dir)
    commands = []
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False
    
    if not suppress_md5:
        log_input_md5s(logger,[otu_table_fp,mapping_fp,tree_fp])
    
    try:
        beta_diversity_metrics = params['beta_diversity']['metrics'].split(',')
    except KeyError:
        beta_diversity_metrics = ['weighted_unifrac','unweighted_unifrac']
    
    # Prep the beta-diversity command
    try:
        params_str = get_params_str(params['beta_diversity'])
    except KeyError:
        params_str = ''
    if tree_fp:
        params_str = '%s -t %s' % (params_str,tree_fp)
    # Build the beta-diversity command
    beta_div_cmd = '%s %s/beta_diversity.py -i %s -o %s %s' %\
     (python_exe_fp, script_dir, otu_table_fp, output_dir, params_str)
    commands.append(\
     [('Beta Diversity (%s)' % ', '.join(beta_diversity_metrics), beta_div_cmd)])

    # Prep rarefaction command
    rarefaction_dir = '%s/rarefaction/' % output_dir
    create_dir(rarefaction_dir)
    try:
        params_str = get_params_str(params['multiple_rarefactions_even_depth'])
    except KeyError:
        params_str = ''
    # Build the rarefaction command
    rarefaction_cmd = \
     '%s %s/multiple_rarefactions_even_depth.py -i %s -d %d -o %s %s' %\
     (python_exe_fp, script_dir, otu_table_fp, seqs_per_sample,
      rarefaction_dir, params_str)
    commands.append([('Rarefaction', rarefaction_cmd)])

    # Begin iterating over beta diversity distance metrics, if more than one
    # was provided
    for beta_diversity_metric in beta_diversity_metrics:
        metric_output_dir = '%s/%s/' % (output_dir, beta_diversity_metric)
        distance_matrix_fp = '%s/%s_%s.txt' % \
         (output_dir, beta_diversity_metric, otu_table_basename)
    
        # Prep the hierarchical clustering command (for full distance matrix)
        full_tree_fp = '%s/%s_upgma.tre' % (metric_output_dir,otu_table_basename)
        try:
            params_str = get_params_str(params['upgma_cluster'])
        except KeyError:
            params_str = ''
        # Build the hierarchical clustering command (for full distance matrix)
        hierarchical_cluster_cmd = '%s %s/upgma_cluster.py -i %s -o %s %s' %\
         (python_exe_fp, script_dir, distance_matrix_fp, full_tree_fp, params_str)
        commands.append(\
         [('UPGMA on full distance matrix: %s' % beta_diversity_metric,\
           hierarchical_cluster_cmd)])
           
        # Prep the beta diversity command (for rarefied OTU tables)
        dm_dir = '%s/rare_dm/' % metric_output_dir
        create_dir(dm_dir)
        # the metrics parameter needs to be ignored as we need to run
        # beta_diversity one metric at a time to keep the per-metric
        # output files in separate directories
        try:
            d = params['beta_diversity'].copy()
            del d['metrics']
        except KeyError:
            params_str = {}
        params_str = get_params_str(d) + ' -m %s ' % beta_diversity_metric
        if tree_fp:
            params_str = '%s -t %s' % (params_str,tree_fp)
        if parallel:
            params_str += ' %s' % get_params_str(params['parallel'])        
            # Build the parallel beta diversity command (for rarefied OTU tables)
            beta_div_rarefied_cmd = \
             '%s %s/parallel_beta_diversity.py -T -i %s -o %s %s' %\
             (python_exe_fp, script_dir, rarefaction_dir, dm_dir, params_str)
        else:
            # Build the serial beta diversity command (for rarefied OTU tables)
            beta_div_rarefied_cmd = \
             '%s %s/beta_diversity.py -i %s -o %s %s' %\
             (python_exe_fp, script_dir, rarefaction_dir, dm_dir, params_str)
        commands.append(\
         [('Beta diversity on rarefied OTU tables (%s)' % beta_diversity_metric,
           beta_div_rarefied_cmd)])

        # Prep the hierarchical clustering command (for rarefied 
        # distance matrices)
        upgma_dir = '%s/rare_upgma/' % metric_output_dir
        create_dir(upgma_dir)

        try:
            params_str = get_params_str(params['upgma_cluster'])
        except KeyError:
            params_str = ''
        # Build the hierarchical clustering command (for rarefied 
        # distance matrices)
        hierarchical_cluster_cmd =\
         '%s %s/upgma_cluster.py -i %s -o %s %s' %\
         (python_exe_fp, script_dir, dm_dir, upgma_dir, params_str)
        commands.append(\
         [('UPGMA on rarefied distance matrix (%s)' % beta_diversity_metric,
           hierarchical_cluster_cmd)])
        

        # Build the consensus tree command
        consensus_tree_cmd =\
         '%s %s/consensus_tree.py -i %s -o %s %s' %\
         (python_exe_fp, script_dir, upgma_dir, metric_output_dir + "/rare_upgma_consensus.tre",
            params_str)
        commands.append(\
         [('consensus on rarefied distance matrices (%s)' % beta_diversity_metric,
           consensus_tree_cmd)])
           
           
        # Prep the tree compare command
        tree_compare_dir = '%s/upgma_cmp/' % metric_output_dir
        create_dir(tree_compare_dir)
        try:
            params_str = get_params_str(params['tree_compare'])
        except KeyError:
            params_str = ''

        # Build the tree compare command
        if master_tree == "full":
            master_tree_fp = full_tree_fp
        elif master_tree == "consensus":
            master_tree_fp = metric_output_dir + "/rare_upgma_consensus.tre"
        else:
            raise RuntimeError('master tree method "%s" not found' % (master_tree,))
        tree_compare_cmd = '%s %s/tree_compare.py -s %s -m %s -o %s %s' %\
         (python_exe_fp, script_dir, upgma_dir, master_tree_fp, 
          tree_compare_dir, params_str)
        commands.append(\
         [('Tree compare (%s)' % beta_diversity_metric, tree_compare_cmd)])
           
        # Prep the PCoA command
        pcoa_dir = '%s/pcoa/' % metric_output_dir
        create_dir(pcoa_dir)
        try:
            params_str = get_params_str(params['principal_coordinates'])
        except KeyError:
            params_str = ''
        # Build the PCoA command
        pcoa_cmd = '%s %s/principal_coordinates.py -i %s -o %s %s' %\
         (python_exe_fp, script_dir, dm_dir, pcoa_dir, params_str)
        commands.append(\
         [('Principal coordinates (%s)' % beta_diversity_metric, pcoa_cmd)])
         
        # Prep the emperor plots command
        emperor_dir = '%s/emperor_pcoa_plots/' % metric_output_dir
        create_dir(emperor_dir)
        try:
            params_str = get_params_str(params['make_emperor'])
        except KeyError:
            params_str = ''
        emperor_cmd = 'make_emperor.py -i %s -o %s -m %s %s' %\
         (pcoa_dir, emperor_dir, mapping_fp, params_str)
        commands.append(\
         [('emperor plots (%s)' % beta_diversity_metric, emperor_cmd)])

    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
Exemplo n.º 30
0
def assign_tax(repset_fasta_fp,
                   output_dir,
                   command_handler,
                   params,
                   qiime_config,
                   parallel=False,
                   logger=None,
                   status_update_callback=print_to_stdout):
                   
    input_dir, input_filename = split(repset_fasta_fp)
    input_basename, input_ext = splitext(input_filename)
    commands = []
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False
    
    ## Prep the taxonomy assignment command
    try:
        assignment_method = params['assign_taxonomy']['assignment_method']
    except KeyError:
        assignment_method = 'rdp'
    assign_taxonomy_dir = '%s/%s_assigned_taxonomy' %\
     (output_dir,assignment_method)
    taxonomy_fp = '%s/%s_tax_assignments.txt' % \
     (assign_taxonomy_dir,input_basename)
    if parallel and (assignment_method == 'rdp' or
                     assignment_method == 'blast' or
                     assignment_method == 'uclust'):
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params['parallel'])
        except KeyError:
            params_str = ''
        
        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --assignment_method
            # option. This works for now though.
            d = params['assign_taxonomy'].copy()
            if 'assignment_method' in d:
                del d['assignment_method']
            params_str += ' %s' % get_params_str(d)
        except KeyError:
            pass
            
        # Build the parallel taxonomy assignment command
        assign_taxonomy_cmd = \
         'parallel_assign_taxonomy_%s.py -i %s -o %s -T %s' %\
         (assignment_method, repset_fasta_fp, assign_taxonomy_dir, params_str)
    else:
        try:
            params_str = get_params_str(params['assign_taxonomy'])
        except KeyError:
            params_str = ''
        # Build the taxonomy assignment command
        assign_taxonomy_cmd = 'assign_taxonomy.py -o %s -i %s %s' %\
         (assign_taxonomy_dir,repset_fasta_fp, params_str)
    if exists(assign_taxonomy_dir):
        rmtree(assign_taxonomy_dir)
    commands.append([('Assign taxonomy',assign_taxonomy_cmd)])
    
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
    return taxonomy_fp
Exemplo n.º 31
0
def run_summarize_taxa_through_plots(otu_table_fp, 
                                     mapping_fp,
                                     output_dir,
                                     mapping_cat,
                                     sort,
                                     command_handler,
                                     params,
                                     qiime_config,
                                     logger=None, 
                                     suppress_md5=False,
                                     status_update_callback=print_to_stdout):
    """ Run the data preparation for summarizing taxonomies and generating plots
    
        The steps performed by this function are:
          1) Summarize OTU by Category
          2) Summarize Taxonomy
          3) Plot Taxonomy Summary
          
    """
    # Prepare some variables for the later steps
    otu_table_dir, otu_table_filename = split(otu_table_fp)
    otu_table_basename, otu_table_ext = splitext(otu_table_filename)
    create_dir(output_dir)
    
    commands = []
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    
    if logger == None:
        logger = WorkflowLogger(generate_log_fp(output_dir),
                                params=params,
                                qiime_config=qiime_config)
        close_logger_on_success = True
    else:
        close_logger_on_success = False
    
    if not suppress_md5:
        log_input_md5s(logger,[otu_table_fp,mapping_fp])
    
    # if mapping category not passed via command-line, 
    # check if it is passed in params file
    if not mapping_cat:
        try:
            mapping_cat=params['summarize_otu_by_cat']['mapping_category']
        except:
            mapping_cat=None
        
    try:
        params_str = get_params_str(params['summarize_otu_by_cat'])
        # Need to remove the mapping category option, since it is defined above.
        # Using this method since we don't want to change the params dict
        split_params=params_str.split('--')
        updated_params_str=[]
        for i in split_params:
            if not i.startswith('mapping_category'):
                updated_params_str.append(i)
        params_str='--'.join(updated_params_str)
    except:
        params_str = ''
    
    if mapping_cat:
        output_fp=join(output_dir,'%s_otu_table.biom' % (mapping_cat.replace(' ','-')))
        # Build the summarize otu by category command
        summarize_otu_by_cat_cmd = \
         "%s %s/summarize_otu_by_cat.py -m %s -i %s -o %s -c '%s' %s" %\
         (python_exe_fp, script_dir, mapping_fp, otu_table_fp, output_fp,
          mapping_cat, params_str)
        
        commands.append(\
         [('Summarize OTU table by Category',summarize_otu_by_cat_cmd)])
         
        otu_table_fp=output_fp
    
    # Build the sort OTU table command
    if sort:
        # Prep the sort_otu_table command
        try:
            params_str = get_params_str(params['sort_otu_table'])
        except:
            params_str = ''
            
        # define output otu table
        sorted_fp=join(output_dir,
                       splitext(split(otu_table_fp)[-1])[0]+'_sorted.biom')
        
        if mapping_cat or params_str=='':
            # for this case we don't have a collapsed mapping file so must
            # handle separately
            sort_otu_table_cmd = \
             "%s %s/sort_otu_table.py -i %s -o %s" %\
             (python_exe_fp, script_dir, otu_table_fp, sorted_fp)
        else:
            sort_otu_table_cmd = \
             "%s %s/sort_otu_table.py -i %s -o %s -m %s %s" %\
             (python_exe_fp, script_dir, otu_table_fp, sorted_fp,
              mapping_fp, params_str)
        
        commands.append([('Sort OTU Table',sort_otu_table_cmd)])

        # redefine otu_table_fp to use
        otu_table_fp=sorted_fp
    
    # Prep the summarize taxonomy command
    try:
        params_str = get_params_str(params['summarize_taxa'])
    except:
        params_str = ''
    
    try:
        sum_taxa_levels=params['summarize_taxa']['level']
    except:
        sum_taxa_levels=None
        
    # Build the summarize taxonomy command
    summarize_taxa_cmd = '%s %s/summarize_taxa.py -i %s -o %s %s' %\
     (python_exe_fp, script_dir, otu_table_fp, output_dir, params_str)
    
    commands.append([('Summarize Taxonomy',summarize_taxa_cmd)])

    sum_taxa_fps=[]
    
    if sum_taxa_levels:
        basename=join(output_dir,splitext(split(otu_table_fp)[-1])[0])
        for i in sum_taxa_levels.split(','):
            sum_taxa_fps.append(basename+'_L%s.txt' % (str(i)))
    else:
        basename=join(output_dir,splitext(split(otu_table_fp)[-1])[0])
        # this is the default levels from summarize_taxa, but cannot import
        # script to get these values
        for i in [2,3,4,5,6]:
            sum_taxa_fps.append(basename+'_L%s.txt' % (str(i)))

    # Prep the plot taxa summary plot command(s)
    taxa_summary_plots_dir = '%s/taxa_summary_plots/' % output_dir
    create_dir(taxa_summary_plots_dir)
        
    try:
        params_str = get_params_str(params['plot_taxa_summary'])
    except:
        params_str = ''
    # Build the plot taxa summary plot command(s)

    plot_taxa_summary_cmd =\
         '%s %s/plot_taxa_summary.py -i %s -o %s %s' %\
         (python_exe_fp, script_dir, ','.join(sum_taxa_fps),
          taxa_summary_plots_dir, params_str)
    
    commands.append(\
         [('Plot Taxonomy Summary',plot_taxa_summary_cmd)])
    
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback,
                    logger=logger,
                    close_logger_on_success=close_logger_on_success)
Exemplo n.º 32
0
def pick_reference_otus(input_fp,
                        output_dir,
                        otu_picking_method,
                        refseqs_fp,
                        parallel,
                        params,
                        logger,
                        similarity_override=None):
    params_copy = deepcopy(params)
    if 'pick_otus' in params_copy and 'refseqs_fp' in params_copy['pick_otus']:
        raise WorkflowError, \
         ("Cannot pass pick_otus:refseqs_fp in parameters file. This can only be"
          " passed on the command line or through the API.")
    if similarity_override != None:
        logger.write('Overridding similiary with %1.3f.\n' % similarity_override)
        if 'pick_otus' in params_copy:
            params_copy['pick_otus']['similarity'] = str(similarity_override)
        else:
            params_copy['pick_otus'] = {'similarity':str(similarity_override)}
    
    if parallel and otu_picking_method == 'uclust_ref':
        # Grab the parallel-specific parameters
        try:
            params_str = get_params_str(params_copy['parallel'])
        except KeyError:
            params_str = ''
        
        # Grab the OTU picker parameters
        try:
            # Want to find a cleaner strategy for this: the parallel script
            # is method-specific, so doesn't take a --otu_picking_method
            # option. This works for now though.
            if 'otu_picking_method' in params_copy['pick_otus']:
                del params_copy['pick_otus']['otu_picking_method']
        except KeyError:
            pass
        
        params_str += ' %s' % get_params_str(params_copy['pick_otus'])
        otu_picking_script = 'parallel_pick_otus_%s.py' % otu_picking_method
        # Build the OTU picking command
        pick_otus_cmd = '%s -i %s -o %s -r %s -T %s' %\
          (otu_picking_script,
           input_fp,
           output_dir,
           refseqs_fp,
           params_str)
    else:
        try:
            params_str = get_params_str(params_copy['pick_otus'])
        except KeyError:
            params_str = ''
        # Since this is reference-based OTU picking we always want to
        # suppress new clusters -- force it here.
        params_str+= ' --suppress_new_clusters'
        logger.write("Forcing --suppress_new_clusters as this is reference-based OTU picking.\n\n")
        # Build the OTU picking command
        pick_otus_cmd = 'pick_otus.py -i %s -o %s -r %s -m %s %s' %\
         (input_fp,
          output_dir,
          refseqs_fp,
          otu_picking_method,
          params_str)
    return pick_otus_cmd
Exemplo n.º 33
0
def run_core_diversity_analyses(
    biom_fp,
    mapping_fp,
    sampling_depth,
    output_dir,
    qiime_config,
    command_handler=call_commands_serially,
    tree_fp=None,
    params=None,
    categories=None,
    arare_min_rare_depth=10,
    arare_num_steps=10,
    parallel=False,
    suppress_taxa_summary=False,
    suppress_beta_diversity=False,
    suppress_alpha_diversity=False,
    suppress_otu_category_significance=False,
    status_update_callback=print_to_stdout):
    """
    """
    if categories != None:
        # Validate categories provided by the users
        mapping_data, mapping_comments = \
         parse_mapping_file_to_dict(open(mapping_fp,'U'))
        metadata_map = MetadataMap(mapping_data, mapping_comments)
        for c in categories:
            if c not in metadata_map.CategoryNames:
                raise ValueError, ("Category '%s' is not a column header "
                 "in your mapping file. "
                 "Categories are case and white space sensitive. Valid "
                 "choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames)))
            if metadata_map.hasSingleCategoryValue(c):
                raise ValueError, ("Category '%s' contains only one value. "
                 "Categories analyzed here require at least two values." % c)
            
    else:
        categories= []
    
    # prep some variables
    if params == None:
        params = parse_qiime_parameters([])
        
    create_dir(output_dir)
    index_fp = '%s/index.html' % output_dir
    index_links = []
    commands = []
    
    # begin logging
    log_fp = generate_log_fp(output_dir)
    index_links.append(('Master run log',log_fp,_index_headers['run_summary']))
    logger = WorkflowLogger(log_fp,
                            params=params,
                            qiime_config=qiime_config)
    input_fps = [biom_fp,mapping_fp]
    if tree_fp != None:
        input_fps.append(tree_fp)
    log_input_md5s(logger,input_fps)

    # run print_biom_table_summary.py on input BIOM table
    try:
        params_str = get_params_str(params['print_biom_table_summary'])
    except KeyError:
        params_str = ''
    biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir
    print_biom_table_summary_cmd = \
     "print_biom_table_summary.py -i %s -o %s --suppress_md5 %s" % \
     (biom_fp, biom_table_stats_output_fp,params_str)
    index_links.append(('BIOM table statistics',
                        biom_table_stats_output_fp,
                        _index_headers['run_summary']))
    commands.append([('Generate BIOM table summary',
                      print_biom_table_summary_cmd)])
    
    # filter samples with fewer observations than the requested sampling_depth. 
    # since these get filtered for some analyses (eg beta diversity after
    # even sampling) it's useful to filter them here so they're filtered 
    # from all analyses.
    filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth)
    filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\
     (biom_fp,filtered_biom_fp,sampling_depth)
    commands.append([('Filter low sequence count samples from table (minimum sequence count: %d)' % sampling_depth,
                      filter_samples_cmd)])
    biom_fp = filtered_biom_fp
    
    # run initial commands and reset the command list
    command_handler(commands, 
                    status_update_callback, 
                    logger,
                    close_logger_on_success=False)
    commands = []
    
    if not suppress_beta_diversity:
        bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir,sampling_depth)
        even_dm_fps = run_beta_diversity_through_plots(
         otu_table_fp=biom_fp, 
         mapping_fp=mapping_fp,
         output_dir=bdiv_even_output_dir,
         command_handler=command_handler,
         params=params,
         qiime_config=qiime_config,
         sampling_depth=sampling_depth,
         # force suppression of distance histograms - boxplots work better
         # in this context, and are created below.
         histogram_categories=[],
         tree_fp=tree_fp,
         parallel=parallel,
         logger=logger,
         suppress_md5=True,
         status_update_callback=status_update_callback)
    
        for bdiv_metric, dm_fp in even_dm_fps:
            for category in categories:
                boxplots_output_dir = '%s/%s_boxplots/' % (bdiv_even_output_dir,bdiv_metric)
                try:
                    params_str = get_params_str(params['make_distance_boxplots'])
                except KeyError:
                    params_str = ''
                boxplots_cmd = \
                 'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\
                 (dm_fp, category, boxplots_output_dir, mapping_fp, params_str)
                commands.append([('Boxplots (%s)' % category,
                                  boxplots_cmd)])
                index_links.append(('Distance boxplots (%s)' % bdiv_metric,
                                    '%s/%s_Distances.pdf' % \
                                     (boxplots_output_dir,category),
                                    _index_headers['beta_diversity_even'] % sampling_depth))
                index_links.append(('Distance boxplots statistics (%s)' % bdiv_metric,
                                    '%s/%s_Stats.txt' % \
                                     (boxplots_output_dir,category),
                                    _index_headers['beta_diversity_even'] % sampling_depth))
            
            index_links.append(('3D plot (%s, continuous coloring)' % bdiv_metric,
                                '%s/%s_3d_continuous/%s_pc_3D_PCoA_plots.html' % \
                                 (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('3D plot (%s, discrete coloring)' % bdiv_metric,
                                '%s/%s_3d_discrete/%s_pc_3D_PCoA_plots.html' % \
                                 (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('2D plot (%s, continuous coloring)' % bdiv_metric,
                                '%s/%s_2d_continuous/%s_pc_2D_PCoA_plots.html' % \
                                 (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('2D plot (%s, discrete coloring)' % bdiv_metric,
                                '%s/%s_2d_discrete/%s_pc_2D_PCoA_plots.html' % \
                                 (bdiv_even_output_dir,bdiv_metric,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('Distance matrix (%s)' % bdiv_metric,
                                '%s/%s_dm.txt' % \
                                 (bdiv_even_output_dir,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
            index_links.append(('Principal coordinate matrix (%s)' % bdiv_metric,
                                '%s/%s_pc.txt' % \
                                 (bdiv_even_output_dir,bdiv_metric),
                                _index_headers['beta_diversity_even'] % sampling_depth))
    
    if not suppress_alpha_diversity:
        ## Alpha rarefaction workflow
        arare_full_output_dir = '%s/arare_max%d/' % (output_dir,sampling_depth)
        run_alpha_rarefaction(
         otu_table_fp=biom_fp,
         mapping_fp=mapping_fp,
         output_dir=arare_full_output_dir,
         command_handler=command_handler,
         params=params,
         qiime_config=qiime_config,
         tree_fp=tree_fp,
         num_steps=arare_num_steps,
         parallel=parallel,
         logger=logger,
         min_rare_depth=arare_min_rare_depth,
         max_rare_depth=sampling_depth,
         suppress_md5=True,
         status_update_callback=status_update_callback)
    
        index_links.append(('Alpha rarefaction plots',
                            '%s/alpha_rarefaction_plots/rarefaction_plots.html'\
                              % arare_full_output_dir,
                            _index_headers['alpha_diversity']))
                        
        collated_alpha_diversity_fps = \
         glob('%s/alpha_div_collated/*txt' % arare_full_output_dir)
        try:
            params_str = get_params_str(params['compare_alpha_diversity'])
        except KeyError:
            params_str = ''
        for category in categories:
            for collated_alpha_diversity_fp in collated_alpha_diversity_fps:
                alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0]
                alpha_comparison_output_fp = '%s/%s_%s.txt' % \
                 (arare_full_output_dir,category,alpha_metric)
                compare_alpha_cmd = \
                 'compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s' %\
                 (collated_alpha_diversity_fp, mapping_fp, category, 
                  alpha_comparison_output_fp, params_str)
                commands.append([('Compare alpha diversity (%s, %s)' %\
                                   (category,alpha_metric),
                                  compare_alpha_cmd)])
                index_links.append(
                 ('Alpha diversity statistics (%s, %s)' % (category,alpha_metric),
                  alpha_comparison_output_fp,
                  _index_headers['alpha_diversity']))
    
    if not suppress_taxa_summary:
        taxa_plots_output_dir = '%s/taxa_plots/' % output_dir
        run_summarize_taxa_through_plots(
         otu_table_fp=biom_fp,
         mapping_fp=mapping_fp,
         output_dir=taxa_plots_output_dir,
         mapping_cat=None, 
         sort=True,
         command_handler=command_handler,
         params=params,
         qiime_config=qiime_config,
         logger=logger,
         suppress_md5=True,
         status_update_callback=status_update_callback)
    

        index_links.append(('Taxa summary bar plots',
                            '%s/taxa_summary_plots/bar_charts.html'\
                              % taxa_plots_output_dir,
                            _index_headers['taxa_summary']))
        index_links.append(('Taxa summary area plots',
                            '%s/taxa_summary_plots/area_charts.html'\
                              % taxa_plots_output_dir,
                            _index_headers['taxa_summary']))
        for category in categories:
            taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir,category)
            run_summarize_taxa_through_plots(
             otu_table_fp=biom_fp,
             mapping_fp=mapping_fp,
             output_dir=taxa_plots_output_dir,
             mapping_cat=category, 
             sort=True,
             command_handler=command_handler,
             params=params,
             qiime_config=qiime_config,
             logger=logger,
             suppress_md5=True,
             status_update_callback=status_update_callback)

            index_links.append(('Taxa summary bar plots',
                                '%s/taxa_summary_plots/bar_charts.html'\
                                  % taxa_plots_output_dir,
                                _index_headers['taxa_summary_categorical'] % category))
            index_links.append(('Taxa summary area plots',
                                '%s/taxa_summary_plots/area_charts.html'\
                                  % taxa_plots_output_dir,
                                _index_headers['taxa_summary_categorical'] % category))
    
    if not suppress_otu_category_significance:
        # OTU category significance
        for category in categories:
            category_signifance_fp = \
             '%s/category_significance_%s.txt' % (output_dir, category)
            try:
                params_str = get_params_str(params['otu_category_significance'])
            except KeyError:
                params_str = ''
            # Build the OTU cateogry significance command
            category_significance_cmd = \
             'otu_category_significance.py -i %s -m %s -c %s -o %s %s' %\
             (biom_fp, mapping_fp, category, 
              category_signifance_fp, params_str)
            commands.append([('OTU category significance (%s)' % category, 
                              category_significance_cmd)])
                          
            index_links.append(('Category significance (%s)' % category,
                        category_signifance_fp,
                        _index_headers['otu_category_sig']))
    
    commands.append([('Compress the filtered BIOM table','gzip %s' % filtered_biom_fp)])
    index_links.append(('Filtered BIOM table (minimum sequence count: %d)' % sampling_depth,
                        '%s.gz' % filtered_biom_fp,
                        _index_headers['run_summary']))
    
    command_handler(commands, status_update_callback, logger)
    generate_index_page(index_links,index_fp)