def test_run_core_diversity_analyses_error_on_invalid_category(self): """run_core_diversity_analyses: error raised on invalid categories """ # too few values in 'month' category self.assertRaises(ValueError, run_core_diversity_analyses, self.test_data['biom'][0], self.test_data['map'][0], 20, output_dir=self.test_out, params=parse_qiime_parameters({}), qiime_config=self.qiime_config, categories=['SampleType', 'month'], tree_fp=self.test_data['tree'][0], parallel=False, status_update_callback=no_status_updates) # invalid category name self.assertRaises(ValueError, run_core_diversity_analyses, self.test_data['biom'][0], self.test_data['map'][0], 20, output_dir=self.test_out, params=parse_qiime_parameters({}), qiime_config=self.qiime_config, categories=['not-a-real-category'], tree_fp=self.test_data['tree'][0], parallel=False, status_update_callback=no_status_updates)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose otu_table_fp = opts.otu_table_fp output_dir = opts.output_dir mapping_fp = opts.mapping_fp verbose = opts.verbose print_only = opts.print_only mapping_cat = opts.mapping_category sort = opts.sort if mapping_cat is not None and mapping_fp is None: option_parser.error("If passing -c must also pass -m.") if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: raise IOError( "Can't open parameters file (%s). Does it exist? Do you have read access?" % opts.parameter_fp) params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now try: makedirs(output_dir) except OSError: if opts.force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. option_parser.error( "Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_summarize_taxa_through_plots( otu_table_fp=otu_table_fp, mapping_fp=mapping_fp, output_dir=output_dir, mapping_cat=mapping_cat, sort=sort, command_handler=command_handler, params=params, qiime_config=qiime_config, status_update_callback=status_update_callback)
def run_command(self, options, arguments): verbose = options["verbose"] input_fp = options["input_fp"] output_dir = options["output_dir"] verbose = options["verbose"] print_only = options["print_only"] parallel = options["parallel"] # No longer checking that jobs_to_start > 2, but # commenting as we may change our minds about this. # if parallel: raise_error_on_parallel_unavailable() if options["parameter_fp"]: try: parameter_f = open(options["parameter_fp"]) except IOError: raise QiimeCommandError, "Can't open parameters file (%s). Does it exist? Do you have read access?" % options[ "parameter_fp" ] params = parse_qiime_parameters(parameter_f) else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now params["parallel"]["jobs_to_start"] = self._validate_jobs_to_start( options["jobs_to_start"], qiime_config["jobs_to_start"], parallel ) try: makedirs(output_dir) except OSError: if options["force"]: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. print "Output directory already exists. Please choose " + "a different directory, or force overwrite with -f." exit(1) if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_qiime_data_preparation( input_fp, output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, parallel=parallel, status_update_callback=status_update_callback, )
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose input_fp = opts.input_fp output_dir = opts.output_dir verbose = opts.verbose print_only = opts.print_only parallel = opts.parallel # No longer checking that jobs_to_start > 2, but # commenting as we may change our minds about this. # if parallel: raise_error_on_parallel_unavailable() if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp) except IOError: raise IOError, "Can't open parameters file (%s). Does it exist? Do you have read access?" % opts.parameter_fp params = parse_qiime_parameters(parameter_f) else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config["jobs_to_start"] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) try: makedirs(output_dir) except OSError: if opts.force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. print "Output directory already exists. Please choose " + "a different directory, or force overwrite with -f." exit(1) if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_qiime_data_preparation( input_fp, output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, parallel=parallel, status_update_callback=status_update_callback, )
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose otu_table_fp = opts.otu_table_fp output_dir = opts.output_dir mapping_fp = opts.mapping_fp verbose = opts.verbose print_only = opts.print_only mapping_cat = opts.mapping_category sort=opts.sort if mapping_cat != None and mapping_fp == None: option_parser.error("If passing -c must also pass -m.") if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: raise IOError,\ "Can't open parameters file (%s). Does it exist? Do you have read access?" \ % opts.parameter_fp params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now try: makedirs(output_dir) except OSError: if opts.force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. option_parser.error("Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_summarize_taxa_through_plots( otu_table_fp=otu_table_fp, mapping_fp=mapping_fp, output_dir=output_dir, mapping_cat=mapping_cat, sort=sort, command_handler=command_handler, params=params, qiime_config=qiime_config, status_update_callback=status_update_callback)
def test_run_core_diversity_analyses_parallel(self): """run_core_diversity_analyses functions with categories in parallel """ run_core_diversity_analyses( self.test_data['biom'][0], self.test_data['map'][0], 20, output_dir=self.test_out, params=parse_qiime_parameters({}), arare_num_steps=3, qiime_config=self.qiime_config, categories=['SampleType', 'days_since_epoch'], tree_fp=self.test_data['tree'][0], parallel=True, status_update_callback=no_status_updates) # Basic sanity test that output directories and files exist fps = [ '%s/bdiv_even20' % self.test_out, '%s/arare_max20' % self.test_out, '%s/taxa_plots' % self.test_out, '%s/bdiv_even20/unweighted_unifrac_dm.txt' % self.test_out, '%s/bdiv_even20/weighted_unifrac_pc.txt' % self.test_out, '%s/arare_max20/compare_chao1/days_since_epoch_stats.txt' % self.test_out, '%s/arare_max20/compare_PD_whole_tree/SampleType_boxplots.pdf' % self.test_out, '%s/index.html' % self.test_out, '%s/table_mc%d.biom.gz' % (self.test_out, 20) ] for fp in fps: self.assertTrue(exists(fp))
def test_run_core_diversity_analyses_no_categories(self): """run_core_diversity_analyses functions without categories """ # this takes a long time, so use a longer sigalrm run_core_diversity_analyses( self.test_data['biom'][0], self.test_data['map'][0], 20, output_dir=self.test_out, params=parse_qiime_parameters({}), qiime_config=self.qiime_config, categories=None, tree_fp=self.test_data['tree'][0], parallel=False, status_update_callback=no_status_updates) # Basic sanity test that output directories and files exist fps = [ '%s/bdiv_even20' % self.test_out, '%s/arare_max20' % self.test_out, '%s/taxa_plots' % self.test_out, '%s/bdiv_even20/unweighted_unifrac_dm.txt' % self.test_out, '%s/bdiv_even20/weighted_unifrac_pc.txt' % self.test_out, '%s/index.html' % self.test_out, '%s/table_mc%d.biom.gz' % (self.test_out, 20) ] for fp in fps: self.assertTrue(exists(fp)) # categorical output files don't exist self.assertFalse(exists( '%s/arare_max20/compare_chao1/days_since_epoch_stats.txt' % self.test_out)) self.assertFalse(exists( '%s/arare_max20/compare_PD_whole_tree/SampleType_boxplots.pdf' % self.test_out))
def test_run_core_diversity_analyses_no_tree(self): """run_core_diversity_analyses functions without tree """ # this takes a long time, so use a longer sigalrm run_core_diversity_analyses( self.test_data['biom'][0], self.test_data['map'][0], 20, output_dir=self.test_out, params=parse_qiime_parameters( ['beta_diversity:metrics bray_curtis', 'alpha_diversity:metrics observed_species,chao1']), qiime_config=self.qiime_config, categories=['SampleType'], tree_fp=None, parallel=False, status_update_callback=no_status_updates) # Basic sanity test that output directories and files exist fps = [ '%s/bdiv_even20' % self.test_out, '%s/arare_max20' % self.test_out, '%s/taxa_plots' % self.test_out, '%s/bdiv_even20/bray_curtis_dm.txt' % self.test_out, '%s/arare_max20/compare_observed_species/SampleType_boxplots.pdf' % self.test_out, '%s/index.html' % self.test_out, '%s/table_mc%d.biom.gz' % (self.test_out, 20) ] for fp in fps: self.assertTrue(exists(fp)) # phylogenetic diversity output files do not exist self.assertFalse(exists( '%s/bdiv_even20/unweighted_unifrac_dm.txt' % self.test_out))
def test_run_core_diversity_analyses_no_categories(self): """run_core_diversity_analyses functions without categories """ # this takes a long time, so use a longer sigalrm run_core_diversity_analyses( self.test_data['biom'][0], self.test_data['map'][0], 20, output_dir=self.test_out, params=parse_qiime_parameters({}), qiime_config=self.qiime_config, categories=None, tree_fp=self.test_data['tree'][0], parallel=False, status_update_callback=no_status_updates) # Basic sanity test that output directories and files exist fps = [ '%s/bdiv_even20' % self.test_out, '%s/arare_max20' % self.test_out, '%s/taxa_plots' % self.test_out, '%s/bdiv_even20/unweighted_unifrac_dm.txt' % self.test_out, '%s/bdiv_even20/weighted_unifrac_pc.txt' % self.test_out, '%s/index.html' % self.test_out, '%s/table_mc%d.biom.gz' % (self.test_out,20) ] for fp in fps: self.assertTrue(exists(fp)) # categorical output files don't exist self.assertFalse(exists( '%s/arare_max20/days_since_epoch_chao1.txt' % self.test_out)) self.assertFalse(exists( '%s/arare_max20/SampleType_PD_whole_tree.txt' % self.test_out))
def setUp(self): """ """ self.test_data = get_test_data_fps() self.files_to_remove = [] self.dirs_to_remove = [] # Create example output directory tmp_dir = get_qiime_temp_dir() self.test_out = mkdtemp(dir=tmp_dir, prefix='core_qiime_analyses_test_', suffix='') self.dirs_to_remove.append(self.test_out) self.qiime_config = load_qiime_config() self.params = parse_qiime_parameters(params_f1) # suppress stderr during tests (one of the systems calls in the # workflow prints a warning, and we can't suppress that warning with # warnings.filterwarnings) here because it comes from within the code # executed through the system call. Found this trick here: # http://stackoverflow.com/questions/9949633/suppressing-print-as-stdout-python self.saved_stderr = sys.stderr sys.stderr = StringIO() initiate_timeout(180)
def main(): option_parser, opts, args = \ parse_command_line_parameters(suppress_verbose=True, **script_info) input_dir = opts.input_dir paired_data = opts.paired_data parameter_fp = opts.parameter_fp read1_indicator = opts.read1_indicator read2_indicator = opts.read2_indicator leading_text = opts.leading_text trailing_text = opts.trailing_text include_input_dir_path = opts.include_input_dir_path output_dir = abspath(opts.output_dir) remove_filepath_in_name = opts.remove_filepath_in_name print_only = opts.print_only if remove_filepath_in_name and not include_input_dir_path: option_parser.error("If --remove_filepath_in_name is enabled, " "--include_input_dir_path must also be enabled.") if opts.parameter_fp: with open(opts.parameter_fp, 'U') as parameter_f: params_dict = parse_qiime_parameters(parameter_f) params_str = get_params_str(params_dict['extract_barcodes']) else: params_dict = {} params_str = "" create_dir(output_dir) all_files = [] extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq'] for root, dir, fps in walk(input_dir): for fp in fps: for extension in extensions: if fp.endswith(extension): all_files += [abspath(join(root, fp))] if paired_data: all_files, bc_pairs = get_pairs(all_files, read1_indicator, read2_indicator) commands = create_commands_eb(all_files, paired_data, output_dir, params_str, leading_text, trailing_text, include_input_dir_path, remove_filepath_in_name) qiime_config = load_qiime_config() if print_only: command_handler = print_commands else: command_handler = call_commands_serially logger = WorkflowLogger(generate_log_fp(output_dir), params=params_dict, qiime_config=qiime_config) # Call the command handler on the list of commands command_handler(commands, status_update_callback = no_status_updates, logger=logger, close_logger_on_success=True)
def test_run_core_diversity_analyses_parallel(self): """run_core_diversity_analyses functions with categories in parallel """ run_core_diversity_analyses( self.test_data['biom'][0], self.test_data['map'][0], 20, output_dir=self.test_out, params=parse_qiime_parameters({}), qiime_config=self.qiime_config, categories=['SampleType', 'days_since_epoch'], tree_fp=self.test_data['tree'][0], parallel=True, status_update_callback=no_status_updates) # Basic sanity test that output directories and files exist fps = [ '%s/bdiv_even20' % self.test_out, '%s/arare_max20' % self.test_out, '%s/taxa_plots' % self.test_out, '%s/bdiv_even20/unweighted_unifrac_dm.txt' % self.test_out, '%s/bdiv_even20/weighted_unifrac_pc.txt' % self.test_out, '%s/arare_max20/compare_chao1/days_since_epoch_stats.txt' % self.test_out, '%s/arare_max20/compare_PD_whole_tree/SampleType_boxplots.pdf' % self.test_out, '%s/index.html' % self.test_out, '%s/table_mc%d.biom.gz' % (self.test_out, 20) ] for fp in fps: self.assertTrue(exists(fp))
def main(): option_parser, opts, args = \ parse_command_line_parameters(suppress_verbose=True, **script_info) input_dir = opts.input_dir parameter_fp = opts.parameter_fp read1_indicator = opts.read1_indicator read2_indicator = opts.read2_indicator match_barcodes = opts.match_barcodes barcode_indicator = opts.barcode_indicator leading_text = opts.leading_text trailing_text = opts.trailing_text include_input_dir_path = opts.include_input_dir_path output_dir = abspath(opts.output_dir) remove_filepath_in_name = opts.remove_filepath_in_name print_only = opts.print_only if remove_filepath_in_name and not include_input_dir_path: option_parser.error("If --remove_filepath_in_name is enabled, " "--include_input_dir_path must also be enabled.") if opts.parameter_fp: with open(opts.parameter_fp, 'U') as parameter_f: params_dict = parse_qiime_parameters(parameter_f) params_str = get_params_str(params_dict['join_paired_ends']) else: params_dict = {} params_str = "" create_dir(output_dir) all_files = [] extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq'] for root, dir, fps in walk(input_dir): for fp in fps: for extension in extensions: if fp.endswith(extension): all_files += [abspath(join(root, fp))] pairs, bc_pairs = get_pairs(all_files, read1_indicator, read2_indicator, match_barcodes, barcode_indicator) commands = create_commands_jpe(pairs, output_dir, params_str, leading_text, trailing_text, include_input_dir_path, remove_filepath_in_name, match_barcodes, bc_pairs) qiime_config = load_qiime_config() if print_only: command_handler = print_commands else: command_handler = call_commands_serially logger = WorkflowLogger(generate_log_fp(output_dir), params=params_dict, qiime_config=qiime_config) # Call the command handler on the list of commands command_handler(commands, status_update_callback=no_status_updates, logger=logger, close_logger_on_success=True)
def test_run_core_diversity_analyses(self): """run_core_diversity_analyses functions with categories """ run_core_diversity_analyses( self.test_data["biom"][0], self.test_data["map"][0], 20, output_dir=self.test_out, params=parse_qiime_parameters({}), qiime_config=self.qiime_config, categories=["SampleType", "days_since_epoch"], tree_fp=self.test_data["tree"][0], parallel=False, status_update_callback=no_status_updates, ) # Basic sanity test that output directories and files exist fps = [ "%s/bdiv_even20" % self.test_out, "%s/arare_max20" % self.test_out, "%s/taxa_plots" % self.test_out, "%s/bdiv_even20/unweighted_unifrac_dm.txt" % self.test_out, "%s/bdiv_even20/weighted_unifrac_pc.txt" % self.test_out, "%s/arare_max20/compare_chao1/days_since_epoch_stats.txt" % self.test_out, "%s/arare_max20/compare_PD_whole_tree/SampleType_boxplots.pdf" % self.test_out, "%s/index.html" % self.test_out, "%s/table_mc%d.biom.gz" % (self.test_out, 20), ] for fp in fps: self.assertTrue(exists(fp))
def setUp(self): """setup the test values""" self.qiime_config = load_qiime_config() self.dirs_to_remove = [] self.files_to_remove = [] #this is specific to the web-apps only test_dir = abspath(dirname(__file__)) self.fna_original_fp = os.path.join(test_dir, 'support_files', \ 'test.fna') tmp_dir = self.qiime_config['temp_dir'] or '/tmp/' if not exists(tmp_dir): makedirs(tmp_dir) # if test creates the temp dir, also remove it self.dirs_to_remove.append(tmp_dir) self.wf_out = get_tmp_filename(tmp_dir=tmp_dir, prefix='qiime_wf_out',suffix='',result_constructor=str) if not exists(self.wf_out): makedirs(self.wf_out) self.dirs_to_remove.append(self.wf_out) #print self.wf_out working_dir = self.qiime_config['working_dir'] or './' jobs_dir = join(working_dir,'jobs') if not exists(jobs_dir): # only clean up the jobs dir if it doesn't already exist self.dirs_to_remove.append(jobs_dir) self.params = parse_qiime_parameters(qiime_parameters_f.split('\n')) signal.signal(signal.SIGALRM, timeout) # set the 'alarm' to go off in allowed_seconds seconds signal.alarm(allowed_seconds_per_test)
def setUp(self): """ """ self.qiime_config = load_qiime_config() self.dirs_to_remove = [] self.files_to_remove = [] #this is specific to the web-apps only test_dir = abspath(dirname(__file__)) sff_original_fp = os.path.join(test_dir, 'support_files', \ 'Fasting_subset.sff') self.sff_fp = os.path.join('/%s/' % environ['HOME'], 'Fasting_subset.sff') self.files_to_remove.append(self.sff_fp) copy(sff_original_fp, self.sff_fp) self.illumina_fps = [os.path.join(test_dir, 'support_files', \ 's_8_1_sequence_100_records.txt'), os.path.join(test_dir, 'support_files', \ 's_8_2_sequence_100_records.txt')] self.illumina_map_fp = os.path.join(test_dir, 'support_files', \ 's8_map_incomplete.txt') self.fasta_fps=[os.path.join(test_dir,'support_files', 'test_split_lib_seqs.fasta')] self.fasta_map_fp = os.path.join(test_dir, 'support_files', \ 'fasta_mapping_file.txt') tmp_dir = "/%s/test_wf" % environ['HOME'] self.dirs_to_remove.append(tmp_dir) #self.qiime_config['temp_dir'] or '/tmp/' if not exists(tmp_dir): makedirs(tmp_dir) # if test creates the temp dir, also remove it #self.dirs_to_remove.append(tmp_dir) self.wf_out="/%s/test_processed_data" % environ['HOME'] #print self.wf_out self.dirs_to_remove.append(self.wf_out) self.gg_out=os.path.join(self.wf_out,'gg_97_otus') if not exists(self.gg_out): makedirs(self.gg_out) #self.dirs_to_remove.append(self.gg_out) self.fasting_mapping_fp = get_tmp_filename(tmp_dir=tmp_dir, prefix='qiime_wf_mapping',suffix='.txt') fasting_mapping_f = open(self.fasting_mapping_fp,'w') fasting_mapping_f.write(fasting_map) fasting_mapping_f.close() self.files_to_remove.append(self.fasting_mapping_fp) self.params = parse_qiime_parameters(qiime_parameters_f) signal.signal(signal.SIGALRM, timeout) # set the 'alarm' to go off in allowed_seconds seconds signal.alarm(allowed_seconds_per_test)
def setUp(self): """ """ self.test_data = get_test_data_fps() self.files_to_remove = [] self.dirs_to_remove = [] # Create example output directory tmp_dir = get_qiime_temp_dir() self.test_out = mkdtemp(dir=tmp_dir, prefix='core_qiime_analyses_test_', suffix='') self.dirs_to_remove.append(self.test_out) self.qiime_config = load_qiime_config() self.params = parse_qiime_parameters([]) self.params_sortmerna = parse_qiime_parameters( ['pick_otus:otu_picking_method\tsortmerna']) self.params_sumaclust = parse_qiime_parameters( ['pick_otus:otu_picking_method\tsumaclust']) initiate_timeout(60)
def setUp(self): """ """ self.qiime_config = load_qiime_config() self.dirs_to_remove = [] self.files_to_remove = [] #this is specific to the web-apps only test_dir = abspath(dirname(__file__)) sff_original_fp = os.path.join(test_dir, 'support_files', \ 'Fasting_subset.sff') # copy sff file to working directory self.sff_dir = tempfile.mkdtemp() self.dirs_to_remove.append(self.sff_dir) self.sff_fp = os.path.join(self.sff_dir, 'Fasting_subset.sff') copy(sff_original_fp, self.sff_fp) self.files_to_remove.append(self.sff_fp) tmp_dir = self.qiime_config['temp_dir'] or '/tmp/' if not exists(tmp_dir): makedirs(tmp_dir) # if test creates the temp dir, also remove it self.dirs_to_remove.append(tmp_dir) self.wf_out = get_tmp_filename(tmp_dir=tmp_dir, prefix='qiime_wf_out',suffix='',result_constructor=str) self.dirs_to_remove.append(self.wf_out) self.fasting_mapping_fp = get_tmp_filename(tmp_dir=tmp_dir, prefix='qiime_wf_mapping',suffix='.txt') fasting_mapping_f = open(self.fasting_mapping_fp,'w') fasting_mapping_f.write(fasting_map) fasting_mapping_f.close() self.files_to_remove.append(self.fasting_mapping_fp) working_dir = self.qiime_config['working_dir'] or './' jobs_dir = join(working_dir,'jobs') if not exists(jobs_dir): # only clean up the jobs dir if it doesn't already exist self.dirs_to_remove.append(jobs_dir) self.params = parse_qiime_parameters(qiime_parameters_f.split('\n')) signal.signal(signal.SIGALRM, timeout) # set the 'alarm' to go off in allowed_seconds seconds signal.alarm(allowed_seconds_per_test)
def setUp(self): """ """ self.test_data = get_test_data_fps() self.files_to_remove = [] self.dirs_to_remove = [] # Create example output directory tmp_dir = get_qiime_temp_dir() self.test_out = mkdtemp(dir=tmp_dir, prefix='core_qiime_analyses_test_', suffix='') self.dirs_to_remove.append(self.test_out) self.qiime_config = load_qiime_config() self.params = parse_qiime_parameters([]) initiate_timeout(60)
def setUp(self): """ """ self.test_data = get_test_data_fps() self.files_to_remove = [] self.dirs_to_remove = [] # Create example output directory tmp_dir = get_qiime_temp_dir() self.test_out = mkdtemp(dir=tmp_dir, prefix='core_qiime_analyses_test_', suffix='') self.dirs_to_remove.append(self.test_out) self.qiime_config = load_qiime_config() self.params = parse_qiime_parameters({}) initiate_timeout(60)
def test_parse_qiime_parameters(self): """parse_qiime_parameters: functions with valid input """ lines = ["#Don't edit this file!",\ "pick_otus:similarity 0.94",\ "pick_otus:otu_picking_method\tcdhit",\ "align_seqs:verbose",\ "assign_taxonomy:use_rdp\ttRuE",\ "assign_taxonomy:something\tNone",\ "",\ "#some_script:fake_parameter\t99.0"] actual = parse_qiime_parameters(lines) expected = {'pick_otus':\ {'similarity':'0.94', 'otu_picking_method':'cdhit'},\ 'assign_taxonomy':\ {'use_rdp':None}} self.assertEqual(actual, expected) # default dict functions as expected -- looking up non-existant key # returns empty dict self.assertEqual(actual['some_other_script'], {})
def test_parse_qiime_parameters(self): """parse_qiime_parameters: functions with valid input """ lines = ["#Don't edit this file!",\ "pick_otus:similarity 0.94",\ "pick_otus:otu_picking_method\tcdhit",\ "align_seqs:verbose",\ "assign_taxonomy:use_rdp\ttRuE",\ "assign_taxonomy:something\tNone",\ "",\ "#some_script:fake_parameter\t99.0"] actual = parse_qiime_parameters(lines) expected = {'pick_otus':\ {'similarity':'0.94', 'otu_picking_method':'cdhit'},\ 'assign_taxonomy':\ {'use_rdp':None}} self.assertEqual(actual,expected) # default dict functions as expected -- looking up non-existant key # returns empty dict self.assertEqual(actual['some_other_script'],{})
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose otu_table_fp = opts.otu_table_fp output_dir = opts.output_dir mapping_fp = opts.mapping_fp tree_fp = opts.tree_fp verbose = opts.verbose print_only = opts.print_only seqs_per_sample = opts.seqs_per_sample parallel = opts.parallel # No longer checking that jobs_to_start > 2, but # commenting as we may change our minds about this. #if parallel: raise_error_on_parallel_unavailable() if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: raise IOError( "Can't open parameters file (%s). Does it exist? Do you have read access?" % opts.parameter_fp) params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) create_dir(output_dir, fail_on_exist=not opts.force) if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_beta_diversity_through_plots( otu_table_fp=otu_table_fp, mapping_fp=mapping_fp, output_dir=output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, color_by_interesting_fields_only=not opts.color_by_all_fields, sampling_depth=seqs_per_sample, tree_fp=tree_fp, parallel=parallel, suppress_emperor_plots=opts.suppress_emperor_plots, status_update_callback=status_update_callback)
def run_core_diversity_analyses( biom_fp, mapping_fp, sampling_depth, output_dir, qiime_config, command_handler=call_commands_serially, tree_fp=None, params=None, categories=None, arare_min_rare_depth=10, arare_num_steps=10, parallel=False, suppress_taxa_summary=False, suppress_beta_diversity=False, suppress_alpha_diversity=False, suppress_otu_category_significance=False, status_update_callback=print_to_stdout): """ """ if categories != None: # Validate categories provided by the users mapping_data, mapping_comments = \ parse_mapping_file_to_dict(open(mapping_fp,'U')) metadata_map = MetadataMap(mapping_data, mapping_comments) for c in categories: if c not in metadata_map.CategoryNames: raise ValueError, ("Category '%s' is not a column header " "in your mapping file. " "Categories are case and white space sensitive. Valid " "choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames))) if metadata_map.hasSingleCategoryValue(c): raise ValueError, ("Category '%s' contains only one value. " "Categories analyzed here require at least two values." % c) else: categories= [] # prep some variables if params == None: params = parse_qiime_parameters([]) create_dir(output_dir) index_fp = '%s/index.html' % output_dir index_links = [] commands = [] # begin logging old_log_fps = glob(join(output_dir,'log_20*txt')) log_fp = generate_log_fp(output_dir) index_links.append(('Master run log',log_fp,_index_headers['run_summary'])) for old_log_fp in old_log_fps: index_links.append(('Previous run log',old_log_fp,_index_headers['run_summary'])) logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config) input_fps = [biom_fp,mapping_fp] if tree_fp != None: input_fps.append(tree_fp) log_input_md5s(logger,input_fps) # run 'biom summarize-table' on input BIOM table try: params_str = get_params_str(params['biom-summarize-table']) except KeyError: params_str = '' biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir if not exists(biom_table_stats_output_fp): biom_table_summary_cmd = \ "biom summarize-table -i %s -o %s --suppress-md5 %s" % \ (biom_fp, biom_table_stats_output_fp,params_str) commands.append([('Generate BIOM table summary', biom_table_summary_cmd)]) else: logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" \ % biom_table_stats_output_fp) index_links.append(('BIOM table statistics', biom_table_stats_output_fp, _index_headers['run_summary'])) # filter samples with fewer observations than the requested sampling_depth. # since these get filtered for some analyses (eg beta diversity after # even sampling) it's useful to filter them here so they're filtered # from all analyses. filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth) if not exists(filtered_biom_fp): filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\ (biom_fp,filtered_biom_fp,sampling_depth) commands.append([('Filter low sequence count samples from table (minimum sequence count: %d)' % sampling_depth, filter_samples_cmd)]) else: logger.write("Skipping filter_samples_from_otu_table.py as %s exists.\n\n" \ % filtered_biom_fp) biom_fp = filtered_biom_fp # run initial commands and reset the command list if len(commands) > 0: command_handler(commands, status_update_callback, logger, close_logger_on_success=False) commands = [] if not suppress_beta_diversity: bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir,sampling_depth) # Need to check for the existence of any distance matrices, since the user # can select which will be generated. existing_dm_fps = glob('%s/*_dm.txt' % bdiv_even_output_dir) if len(existing_dm_fps) == 0: even_dm_fps = run_beta_diversity_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=bdiv_even_output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, sampling_depth=sampling_depth, tree_fp=tree_fp, parallel=parallel, logger=logger, suppress_md5=True, status_update_callback=status_update_callback) else: logger.write("Skipping beta_diversity_through_plots.py as %s exist(s).\n\n" \ % ', '.join(existing_dm_fps)) even_dm_fps = [(split(fp)[1].strip('_dm.txt'),fp) for fp in existing_dm_fps] # Get make_distance_boxplots parameters try: params_str = get_params_str(params['make_distance_boxplots']) except KeyError: params_str = '' for bdiv_metric, dm_fp in even_dm_fps: for category in categories: boxplots_output_dir = '%s/%s_boxplots/' % (bdiv_even_output_dir,bdiv_metric) plot_output_fp = '%s/%s_Distances.pdf' % (boxplots_output_dir,category) stats_output_fp = '%s/%s_Stats.txt' % (boxplots_output_dir,category) if not exists(plot_output_fp): boxplots_cmd = \ 'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\ (dm_fp, category, boxplots_output_dir, mapping_fp, params_str) commands.append([('Boxplots (%s)' % category, boxplots_cmd)]) else: logger.write("Skipping make_distance_boxplots.py for %s as %s exists.\n\n" \ % (category, plot_output_fp)) index_links.append(('Distance boxplots (%s)' % bdiv_metric, plot_output_fp, _index_headers['beta_diversity_even'] % sampling_depth)) index_links.append(('Distance boxplots statistics (%s)' % bdiv_metric, stats_output_fp, _index_headers['beta_diversity_even'] % sampling_depth)) index_links.append(('PCoA plot (%s)' % bdiv_metric, '%s/%s_emperor_pcoa_plot/index.html' % \ (bdiv_even_output_dir,bdiv_metric), _index_headers['beta_diversity_even'] % sampling_depth)) index_links.append(('Distance matrix (%s)' % bdiv_metric, '%s/%s_dm.txt' % \ (bdiv_even_output_dir,bdiv_metric), _index_headers['beta_diversity_even'] % sampling_depth)) index_links.append(('Principal coordinate matrix (%s)' % bdiv_metric, '%s/%s_pc.txt' % \ (bdiv_even_output_dir,bdiv_metric), _index_headers['beta_diversity_even'] % sampling_depth)) if not suppress_alpha_diversity: ## Alpha rarefaction workflow arare_full_output_dir = '%s/arare_max%d/' % (output_dir,sampling_depth) rarefaction_plots_output_fp = \ '%s/alpha_rarefaction_plots/rarefaction_plots.html' % arare_full_output_dir if not exists(rarefaction_plots_output_fp): run_alpha_rarefaction( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=arare_full_output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, tree_fp=tree_fp, num_steps=arare_num_steps, parallel=parallel, logger=logger, min_rare_depth=arare_min_rare_depth, max_rare_depth=sampling_depth, suppress_md5=True, status_update_callback=status_update_callback) else: logger.write("Skipping alpha_rarefaction.py as %s exists.\n\n" \ % rarefaction_plots_output_fp) index_links.append(('Alpha rarefaction plots', rarefaction_plots_output_fp, _index_headers['alpha_diversity'])) collated_alpha_diversity_fps = \ glob('%s/alpha_div_collated/*txt' % arare_full_output_dir) try: params_str = get_params_str(params['compare_alpha_diversity']) except KeyError: params_str = '' for category in categories: for collated_alpha_diversity_fp in collated_alpha_diversity_fps: alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0] alpha_comparison_output_fp = '%s/%s_%s.txt' % \ (arare_full_output_dir,category,alpha_metric) if not exists(alpha_comparison_output_fp): compare_alpha_cmd = \ 'compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s' %\ (collated_alpha_diversity_fp, mapping_fp, category, alpha_comparison_output_fp, params_str) commands.append([('Compare alpha diversity (%s, %s)' %\ (category,alpha_metric), compare_alpha_cmd)]) else: logger.write("Skipping compare_alpha_diversity.py for %s as %s exists.\n\n" \ % (category, alpha_comparison_output_fp)) index_links.append( ('Alpha diversity statistics (%s, %s)' % (category,alpha_metric), alpha_comparison_output_fp, _index_headers['alpha_diversity'])) if not suppress_taxa_summary: taxa_plots_output_dir = '%s/taxa_plots/' % output_dir # need to check for existence of any html files, since the user can # select only certain ones to be generated existing_taxa_plot_html_fps = glob(join(output_dir,'taxa_summary_plots','*.html')) if len(existing_taxa_plot_html_fps) == 0: run_summarize_taxa_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=taxa_plots_output_dir, mapping_cat=None, sort=True, command_handler=command_handler, params=params, qiime_config=qiime_config, logger=logger, suppress_md5=True, status_update_callback=status_update_callback) else: logger.write("Skipping summarize_taxa_through_plots.py for as %s exist(s).\n\n" \ % ', '.join(existing_taxa_plot_html_fps)) index_links.append(('Taxa summary bar plots', '%s/taxa_summary_plots/bar_charts.html'\ % taxa_plots_output_dir, _index_headers['taxa_summary'])) index_links.append(('Taxa summary area plots', '%s/taxa_summary_plots/area_charts.html'\ % taxa_plots_output_dir, _index_headers['taxa_summary'])) for category in categories: taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir,category) # need to check for existence of any html files, since the user can # select only certain ones to be generated existing_taxa_plot_html_fps = glob('%s/taxa_summary_plots/*.html' % taxa_plots_output_dir) if len(existing_taxa_plot_html_fps) == 0: run_summarize_taxa_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=taxa_plots_output_dir, mapping_cat=category, sort=True, command_handler=command_handler, params=params, qiime_config=qiime_config, logger=logger, suppress_md5=True, status_update_callback=status_update_callback) else: logger.write("Skipping summarize_taxa_through_plots.py for %s as %s exist(s).\n\n" \ % (category, ', '.join(existing_taxa_plot_html_fps))) index_links.append(('Taxa summary bar plots', '%s/taxa_summary_plots/bar_charts.html'\ % taxa_plots_output_dir, _index_headers['taxa_summary_categorical'] % category)) index_links.append(('Taxa summary area plots', '%s/taxa_summary_plots/area_charts.html'\ % taxa_plots_output_dir, _index_headers['taxa_summary_categorical'] % category)) if not suppress_otu_category_significance: try: params_str = get_params_str(params['otu_category_significance']) except KeyError: params_str = '' # OTU category significance for category in categories: category_signifance_fp = \ '%s/category_significance_%s.txt' % (output_dir, category) if not exists(category_signifance_fp): # Build the OTU cateogry significance command category_significance_cmd = \ 'otu_category_significance.py -i %s -m %s -c %s -o %s %s' %\ (biom_fp, mapping_fp, category, category_signifance_fp, params_str) commands.append([('OTU category significance (%s)' % category, category_significance_cmd)]) else: logger.write("Skipping otu_category_significance.py for %s as %s exists.\n\n" \ % (category, category_signifance_fp)) index_links.append(('Category significance (%s)' % category, category_signifance_fp, _index_headers['otu_category_sig'])) filtered_biom_gzip_fp = '%s.gz' % filtered_biom_fp if not exists(filtered_biom_gzip_fp): commands.append([('Compress the filtered BIOM table','gzip %s' % filtered_biom_fp)]) index_links.append(('Filtered BIOM table (minimum sequence count: %d)' % sampling_depth, filtered_biom_gzip_fp, _index_headers['run_summary'])) else: logger.write("Skipping compressing of filtered BIOM table as %s exists.\n\n" \ % filtered_biom_gzip_fp) if len(commands) > 0: command_handler(commands, status_update_callback, logger) else: logger.close() generate_index_page(index_links,index_fp)
def run_core_diversity_analyses( biom_fp, mapping_fp, sampling_depth, output_dir, qiime_config, command_handler=call_commands_serially, tree_fp=None, params=None, categories=None, arare_min_rare_depth=10, arare_num_steps=10, parallel=False, suppress_taxa_summary=False, suppress_beta_diversity=False, suppress_alpha_diversity=False, suppress_otu_category_significance=False, status_update_callback=print_to_stdout): """ """ if categories != None: # Validate categories provided by the users mapping_data, mapping_comments = \ parse_mapping_file_to_dict(open(mapping_fp,'U')) metadata_map = MetadataMap(mapping_data, mapping_comments) for c in categories: if c not in metadata_map.CategoryNames: raise ValueError, ("Category '%s' is not a column header " "in your mapping file. " "Categories are case and white space sensitive. Valid " "choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames))) if metadata_map.hasSingleCategoryValue(c): raise ValueError, ("Category '%s' contains only one value. " "Categories analyzed here require at least two values." % c) else: categories= [] # prep some variables if params == None: params = parse_qiime_parameters([]) create_dir(output_dir) index_fp = '%s/index.html' % output_dir index_links = [] commands = [] # begin logging log_fp = generate_log_fp(output_dir) index_links.append(('Master run log',log_fp,_index_headers['run_summary'])) logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config) input_fps = [biom_fp,mapping_fp] if tree_fp != None: input_fps.append(tree_fp) log_input_md5s(logger,input_fps) # run print_biom_table_summary.py on input BIOM table try: params_str = get_params_str(params['print_biom_table_summary']) except KeyError: params_str = '' biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir print_biom_table_summary_cmd = \ "print_biom_table_summary.py -i %s -o %s --suppress_md5 %s" % \ (biom_fp, biom_table_stats_output_fp,params_str) index_links.append(('BIOM table statistics', biom_table_stats_output_fp, _index_headers['run_summary'])) commands.append([('Generate BIOM table summary', print_biom_table_summary_cmd)]) # filter samples with fewer observations than the requested sampling_depth. # since these get filtered for some analyses (eg beta diversity after # even sampling) it's useful to filter them here so they're filtered # from all analyses. filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth) filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\ (biom_fp,filtered_biom_fp,sampling_depth) commands.append([('Filter low sequence count samples from table (minimum sequence count: %d)' % sampling_depth, filter_samples_cmd)]) biom_fp = filtered_biom_fp # run initial commands and reset the command list command_handler(commands, status_update_callback, logger, close_logger_on_success=False) commands = [] if not suppress_beta_diversity: bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir,sampling_depth) even_dm_fps = run_beta_diversity_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=bdiv_even_output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, sampling_depth=sampling_depth, # force suppression of distance histograms - boxplots work better # in this context, and are created below. histogram_categories=[], tree_fp=tree_fp, parallel=parallel, logger=logger, suppress_md5=True, status_update_callback=status_update_callback) for bdiv_metric, dm_fp in even_dm_fps: for category in categories: boxplots_output_dir = '%s/%s_boxplots/' % (bdiv_even_output_dir,bdiv_metric) try: params_str = get_params_str(params['make_distance_boxplots']) except KeyError: params_str = '' boxplots_cmd = \ 'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\ (dm_fp, category, boxplots_output_dir, mapping_fp, params_str) commands.append([('Boxplots (%s)' % category, boxplots_cmd)]) index_links.append(('Distance boxplots (%s)' % bdiv_metric, '%s/%s_Distances.pdf' % \ (boxplots_output_dir,category), _index_headers['beta_diversity_even'] % sampling_depth)) index_links.append(('Distance boxplots statistics (%s)' % bdiv_metric, '%s/%s_Stats.txt' % \ (boxplots_output_dir,category), _index_headers['beta_diversity_even'] % sampling_depth)) index_links.append(('3D plot (%s, continuous coloring)' % bdiv_metric, '%s/%s_3d_continuous/%s_pc_3D_PCoA_plots.html' % \ (bdiv_even_output_dir,bdiv_metric,bdiv_metric), _index_headers['beta_diversity_even'] % sampling_depth)) index_links.append(('3D plot (%s, discrete coloring)' % bdiv_metric, '%s/%s_3d_discrete/%s_pc_3D_PCoA_plots.html' % \ (bdiv_even_output_dir,bdiv_metric,bdiv_metric), _index_headers['beta_diversity_even'] % sampling_depth)) index_links.append(('2D plot (%s, continuous coloring)' % bdiv_metric, '%s/%s_2d_continuous/%s_pc_2D_PCoA_plots.html' % \ (bdiv_even_output_dir,bdiv_metric,bdiv_metric), _index_headers['beta_diversity_even'] % sampling_depth)) index_links.append(('2D plot (%s, discrete coloring)' % bdiv_metric, '%s/%s_2d_discrete/%s_pc_2D_PCoA_plots.html' % \ (bdiv_even_output_dir,bdiv_metric,bdiv_metric), _index_headers['beta_diversity_even'] % sampling_depth)) index_links.append(('Distance matrix (%s)' % bdiv_metric, '%s/%s_dm.txt' % \ (bdiv_even_output_dir,bdiv_metric), _index_headers['beta_diversity_even'] % sampling_depth)) index_links.append(('Principal coordinate matrix (%s)' % bdiv_metric, '%s/%s_pc.txt' % \ (bdiv_even_output_dir,bdiv_metric), _index_headers['beta_diversity_even'] % sampling_depth)) if not suppress_alpha_diversity: ## Alpha rarefaction workflow arare_full_output_dir = '%s/arare_max%d/' % (output_dir,sampling_depth) run_alpha_rarefaction( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=arare_full_output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, tree_fp=tree_fp, num_steps=arare_num_steps, parallel=parallel, logger=logger, min_rare_depth=arare_min_rare_depth, max_rare_depth=sampling_depth, suppress_md5=True, status_update_callback=status_update_callback) index_links.append(('Alpha rarefaction plots', '%s/alpha_rarefaction_plots/rarefaction_plots.html'\ % arare_full_output_dir, _index_headers['alpha_diversity'])) collated_alpha_diversity_fps = \ glob('%s/alpha_div_collated/*txt' % arare_full_output_dir) try: params_str = get_params_str(params['compare_alpha_diversity']) except KeyError: params_str = '' for category in categories: for collated_alpha_diversity_fp in collated_alpha_diversity_fps: alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0] alpha_comparison_output_fp = '%s/%s_%s.txt' % \ (arare_full_output_dir,category,alpha_metric) compare_alpha_cmd = \ 'compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s' %\ (collated_alpha_diversity_fp, mapping_fp, category, alpha_comparison_output_fp, params_str) commands.append([('Compare alpha diversity (%s, %s)' %\ (category,alpha_metric), compare_alpha_cmd)]) index_links.append( ('Alpha diversity statistics (%s, %s)' % (category,alpha_metric), alpha_comparison_output_fp, _index_headers['alpha_diversity'])) if not suppress_taxa_summary: taxa_plots_output_dir = '%s/taxa_plots/' % output_dir run_summarize_taxa_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=taxa_plots_output_dir, mapping_cat=None, sort=True, command_handler=command_handler, params=params, qiime_config=qiime_config, logger=logger, suppress_md5=True, status_update_callback=status_update_callback) index_links.append(('Taxa summary bar plots', '%s/taxa_summary_plots/bar_charts.html'\ % taxa_plots_output_dir, _index_headers['taxa_summary'])) index_links.append(('Taxa summary area plots', '%s/taxa_summary_plots/area_charts.html'\ % taxa_plots_output_dir, _index_headers['taxa_summary'])) for category in categories: taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir,category) run_summarize_taxa_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=taxa_plots_output_dir, mapping_cat=category, sort=True, command_handler=command_handler, params=params, qiime_config=qiime_config, logger=logger, suppress_md5=True, status_update_callback=status_update_callback) index_links.append(('Taxa summary bar plots', '%s/taxa_summary_plots/bar_charts.html'\ % taxa_plots_output_dir, _index_headers['taxa_summary_categorical'] % category)) index_links.append(('Taxa summary area plots', '%s/taxa_summary_plots/area_charts.html'\ % taxa_plots_output_dir, _index_headers['taxa_summary_categorical'] % category)) if not suppress_otu_category_significance: # OTU category significance for category in categories: category_signifance_fp = \ '%s/category_significance_%s.txt' % (output_dir, category) try: params_str = get_params_str(params['otu_category_significance']) except KeyError: params_str = '' # Build the OTU cateogry significance command category_significance_cmd = \ 'otu_category_significance.py -i %s -m %s -c %s -o %s %s' %\ (biom_fp, mapping_fp, category, category_signifance_fp, params_str) commands.append([('OTU category significance (%s)' % category, category_significance_cmd)]) index_links.append(('Category significance (%s)' % category, category_signifance_fp, _index_headers['otu_category_sig'])) commands.append([('Compress the filtered BIOM table','gzip %s' % filtered_biom_fp)]) index_links.append(('Filtered BIOM table (minimum sequence count: %d)' % sampling_depth, '%s.gz' % filtered_biom_fp, _index_headers['run_summary'])) command_handler(commands, status_update_callback, logger) generate_index_page(index_links,index_fp)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) parameters = {} # get the tree insertion method to use module = opts.insertion_method # create output directory output_dir = opts.output_dir create_dir(output_dir) # list of tree insertion methods tree_insertion_module_names = \ {'raxml_v730':cogent.app.raxml_v730, 'parsinsert':cogent.app.parsinsert, 'pplacer':cogent.app.pplacer} # load input sequences and convert to phylip since the tools require # the query sequences to phylip-compliant names load_aln = MinimalFastaParser(open(opts.input_fasta_fp, 'U')) aln = DenseAlignment(load_aln) seqs, align_map = aln.toPhylip() if opts.method_params_fp: param_dict = parse_qiime_parameters(open(opts.method_params_fp, 'U')) if module == 'raxml_v730': # load the reference sequences load_ref_aln = \ DenseAlignment(MinimalFastaParser(open(opts.refseq_fp,'U'))) # combine and load the reference plus query combined_aln = MinimalFastaParser(StringIO(load_ref_aln.toFasta() + \ '\n' + aln.toFasta())) # overwrite the alignment map aln = DenseAlignment(combined_aln) seqs, align_map = aln.toPhylip() try: parameters = param_dict['raxml'] except: parameters = {} tree = convert_tree_tips(align_map, opts.starting_tree_fp) # write out the tree with phylip labels updated_tree_fp = join(output_dir, \ '%s_phylip_named_tree.tre' % (module)) write_updated_tree_file(updated_tree_fp, tree) # set the primary parameters for raxml parameters['-w'] = abspath(output_dir) + '/' parameters["-n"] = split(splitext(get_tmp_filename())[0])[-1] parameters["-t"] = updated_tree_fp if "-f" not in parameters: parameters["-f"] = 'v' if "-m" not in parameters: parameters["-m"] = 'GTRGAMMA' elif module == 'pplacer': try: parameters = param_dict['pplacer'] except: parameters = {} # make sure stats file is passed if not opts.stats_fp: raise IOError, \ 'When using pplacer, the RAxML produced info file is required.' # set the primary parameters for pplacer - allow for user-defined parameters['--out-dir'] = abspath(output_dir) + '/' parameters["-t"] = opts.starting_tree_fp parameters['-r'] = opts.refseq_fp parameters['-s'] = opts.stats_fp elif module == 'parsinsert': try: parameters = param_dict['parsinsert'] except: parameters = {} # define log fp log_fp = join(output_dir, 'parsinsert.log') # define tax assignment values fp tax_assign_fp = join(output_dir, 'parsinsert_assignments.log') parameters["-l"] = log_fp parameters["-o"] = tax_assign_fp parameters["-s"] = opts.refseq_fp parameters["-t"] = opts.starting_tree_fp # call the module and return a tree object result = \ tree_insertion_module_names[module].insert_sequences_into_tree(seqs, moltype=DNA, params=parameters) result_tree = strip_and_rename_unwanted_labels_from_tree(align_map, result) # write out the resulting tree final_tree = join(output_dir, '%s_final_placement.tre' % (module)) write_updated_tree_file(final_tree, result)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose input_biom_fp = opts.input_biom_fp output_dir = opts.output_dir categories = opts.categories if categories is not None: categories = categories.split(',') tree_fp = opts.tree_fp mapping_fp = opts.mapping_fp verbose = opts.verbose parallel = opts.parallel sampling_depth = opts.sampling_depth nonphylogenetic_diversity = opts.nonphylogenetic_diversity print_only = opts.print_only suppress_taxa_summary = opts.suppress_taxa_summary suppress_beta_diversity = opts.suppress_beta_diversity suppress_alpha_diversity = opts.suppress_alpha_diversity suppress_group_significance = opts.suppress_group_significance if opts.parameter_fp is not None: params = parse_qiime_parameters(open(opts.parameter_fp, 'U')) else: params = parse_qiime_parameters([]) if nonphylogenetic_diversity: # if the user specified --nonphylogenetic_diversity and they # didn't define metrics in a parameters file, define them here if 'metrics' not in params['beta_diversity']: params['beta_diversity']['metrics'] = 'bray_curtis' if 'metrics' not in params['alpha_diversity']: params['alpha_diversity']['metrics'] = 'observed_otus,chao1' jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) # Create the output directory. If it already exists and the user # isn't trying to recover from a failed run, raise an error. create_dir(output_dir, fail_on_exist=not opts.recover_from_failure) if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_core_diversity_analyses( biom_fp=input_biom_fp, mapping_fp=mapping_fp, sampling_depth=sampling_depth, output_dir=output_dir, qiime_config=load_qiime_config(), command_handler=command_handler, tree_fp=tree_fp, params=params, categories=categories, arare_min_rare_depth=10, arare_num_steps=10, parallel=parallel, suppress_taxa_summary=suppress_taxa_summary, suppress_beta_diversity=suppress_beta_diversity, suppress_alpha_diversity=suppress_alpha_diversity, suppress_group_significance=suppress_group_significance, status_update_callback=status_update_callback)
def main(): option_parser, opts, args =\ parse_command_line_parameters(suppress_verbose=True, **script_info) input_dir = opts.input_dir demultiplexing_method = opts.demultiplexing_method parameter_fp = opts.parameter_fp read_indicator = opts.read_indicator barcode_indicator = opts.barcode_indicator mapping_indicator = opts.mapping_indicator mapping_extensions = opts.mapping_extensions.split(',') sampleid_indicator = opts.sampleid_indicator leading_text = opts.leading_text trailing_text = opts.trailing_text include_input_dir_path = opts.include_input_dir_path output_dir = abspath(opts.output_dir) remove_filepath_in_name = opts.remove_filepath_in_name print_only = opts.print_only if remove_filepath_in_name and not include_input_dir_path: option_parser.error("If --remove_filepath_in_name enabled, " "--include_input_dir_path must be enabled.") if opts.parameter_fp: with open(opts.parameter_fp, 'U') as parameter_f: params_dict = parse_qiime_parameters(parameter_f) params_str = get_params_str(params_dict['split_libraries_fastq']) else: params_dict = {} params_str = "" create_dir(output_dir) all_fastq = [] all_mapping = [] extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq'] for root, dir, fps in walk(input_dir): for fp in fps: for extension in extensions: if fp.endswith(extension): all_fastq += [abspath(join(root, fp))] if demultiplexing_method == 'mapping_barcode_files': for root, dir, fps in walk(input_dir): for fp in fps: for mapping_extension in mapping_extensions: if fp.endswith(mapping_extension): all_mapping += [abspath(join(root, fp))] all_files = get_matching_files(all_fastq, all_mapping, read_indicator, barcode_indicator, mapping_indicator) else: all_files = all_fastq commands = create_commands_slf(all_files, demultiplexing_method, output_dir, params_str, leading_text, trailing_text, include_input_dir_path, remove_filepath_in_name, sampleid_indicator) qiime_config = load_qiime_config() if print_only: command_handler = print_commands else: command_handler = call_commands_serially logger = WorkflowLogger(generate_log_fp(output_dir), params=params_dict, qiime_config=qiime_config) # Call the command handler on the list of commands command_handler(commands, status_update_callback=no_status_updates, logger=logger, close_logger_on_success=True)
def write_mapping_and_otu_table(data_access, table_col_value, fs_fp, web_fp, file_name_prefix,user_id,meta_id,params_path, rarefied_at,otutable_rarefied_at, jobs_to_start,tax_name,tree_fp): """ Write out the mapping file and OTU-table """ tmp_prefix=get_tmp_filename('',suffix='').strip() total1 = time() unique_cols=[] # get the directory location for the files to write otu_table_file_dir=path.join(fs_fp,'otu_table_files') mapping_file_dir=path.join(fs_fp,'mapping_files') zip_file_dir=path.join(fs_fp,'zip_files') #pcoa_file_dir_loc=path.join(fs_fp,'pcoa_files') # get the url location for the files to write otu_table_file_dir_db=path.join(web_fp,'otu_table_files') mapping_file_dir_db=path.join(web_fp,'mapping_files') zip_file_dir_db=path.join(web_fp,'zip_files') pcoa_file_dir_loc_db=path.join(web_fp,'pcoa_files') # generate random directory name alphabet = "ABCDEFGHIJKLMNOPQRSTUZWXYZ" alphabet += alphabet.lower() alphabet += "01234567890" random_dir_name=''.join([choice(alphabet) for i in range(10)]) unique_name=strftime("%Y_%m_%d_%H_%M_%S")+random_dir_name #plot_unique_name=beta_metric+'_plots_'+unique_name #pcoa_file_dir=os.path.join(pcoa_file_dir_loc,plot_unique_name) #pcoa_file_dir_db=os.path.join(pcoa_file_dir_loc_db,plot_unique_name) #create_dir(pcoa_file_dir) map_files=[] t1 = time() # Get the user details user_details = data_access.getUserDetails(user_id) if not user_details: raise ValueError('No details found for this user') is_admin = user_details['is_admin'] # get mapping results results,cur_description=get_mapping_data(data_access, is_admin, table_col_value, user_id) # need to reconnect to data_access, since it gets closed due to con.close() try: from data_access_connections import data_access_factory from enums import ServerConfig import cx_Oracle data_access = data_access_factory(ServerConfig.data_access_type) except ImportError: print "NOT IMPORTING QIIMEDATAACCESS" pass # get filepaths for mapping files using the run-prefixes tmp_mapping_file = file(os.path.join(mapping_file_dir, file_name_prefix + \ '_map_tmp.txt'), 'w') map_filepath=os.path.join(mapping_file_dir, file_name_prefix + '_' + \ tmp_prefix + '_map.txt') map_filepath_db=os.path.join(mapping_file_dir_db, file_name_prefix + '_' + \ tmp_prefix + '_map.txt') # All mapping files start with an opening hash tmp_mapping_file.write('#') # determine if a column is a controlled vaocabulary columnn controlled_vocab_columns={} for i,column in enumerate(cur_description): if column in ['SAMPLE_NAME', 'BARCODE', 'LINKER', 'PRIMER', \ 'EXPERIMENT_TITLE']: pass else: valid_controlled_vocab=\ data_access.checkIfColumnControlledVocab(column[0]) if valid_controlled_vocab: controlled_vocab_columns[str(column[0])]=i # create a dictionary storing the controlled columns and their values controlled_vocab_lookup={} for column in controlled_vocab_columns: vocab_id_to_valid_term=data_access.getValidControlledVocabTerms(column) controlled_vocab_lookup[controlled_vocab_columns[column]] = \ dict(vocab_id_to_valid_term) # Write out the key field headers to_write = '' for column in cur_description: if column[0]=='SAMPLEID': to_write+='SampleID\t' elif column[0]=='BARCODE': to_write+='BarcodeSequence\t' elif column[0]=='DESCRIPTION': to_write+='Description\t' elif column[0]=='LINKERPRIMERSEQUENCE': to_write+='LinkerPrimerSequence\t' else: to_write += column[0] + '\t' tmp_mapping_file.write(to_write[0:len(to_write)-1] + '\n') sample_to_run_prefix=[] study_id_and_run_prefix=[] samples_list=[] map_file_write=[] duplicate_samples=[] samples_list=[] for row in results: # Can't use something like '\t'.join(row) because not all items in list # are string values, hence the explicit loop structure here. to_write = '' sample_to_run_prefix.append(list((str(row[0]),str(row[4]),str(row[3])))) if list((str(row[3]),str(row[4]))) not in study_id_and_run_prefix: study_id_and_run_prefix.append(list((str(row[3]),str(row[4])))) if str(row[0]) in samples_list: # Order of row goes as follows: SampleID, BarcodeSequence, # LinkerPrimerSequence,Run_Prefix, then Description is at the end row=list(row) row[0]=row[0]+'_'+str(row[4]) row=tuple(row) duplicate_samples.append(str(row[0])) else: samples_list.append(str(row[0])) # write out controlled vocabulary values for i,column in enumerate(row): if controlled_vocab_lookup.has_key(i): val = str(column) if val == 'None': new_val = '' else: new_val=controlled_vocab_lookup[i][int(val)] to_write += new_val + '\t' else: val = str(column) if val == 'None': val = '' to_write += val + '\t' # Write the row minus the last tab tmp_mapping_file.write(to_write[0:len(to_write)] + '\n') tmp_mapping_file.close() open_tmp_mapping_file=open(os.path.join(mapping_file_dir, file_name_prefix + '_map_tmp.txt')).readlines() mapping_file = file(os.path.join(mapping_file_dir, file_name_prefix + \ '_' + tmp_prefix + '_map.txt'), 'w') mapping_lines = [] all_headers = {} result = [] # iterate over mapping files, parsing each data, current_headers, current_comments = \ parse_mapping_file(open_tmp_mapping_file,strip_quotes=False) all_headers.update(dict.fromkeys(current_headers)) for d in data: current_values = {} for i,v in enumerate(d): if v !='': current_values[current_headers[i]] = v mapping_lines.append(current_values) # remove and place the fields whose order is important del all_headers['SampleID'] del all_headers['BarcodeSequence'] del all_headers['LinkerPrimerSequence'] del all_headers['Description'] all_headers = ['SampleID','BarcodeSequence','LinkerPrimerSequence'] \ + list(all_headers) + ['Description'] # generate the mapping file lines containing all fields result.append(all_headers) for mapping_line in mapping_lines: result.append([mapping_line.get(h,'NA') for h in all_headers if h!='']) #Create an array using multiple columns from mapping file try: parameter_f = open(params_path) except IOError: raise IOError,\ "Can't open parameters file (%s). Does it exist? Do you have read access?"\ % params_path # determine if columns should be combined qiime_params=parse_qiime_parameters(parameter_f) try: combined_mapping_categories = \ qiime_params['combine_metadata']['columns'].split(',') except: combined_mapping_categories='' # combine metadata columns if combined_mapping_categories: for mapping_category in combined_mapping_categories: combinecolorby=mapping_category.strip('\'').strip('"').split('_and_') result=combine_map_header_cols(combinecolorby,result) # write final mapping file final_mapping=[] for i,mdata in enumerate(result): if i==0: final_mapping.append('#'+'\t'.join(mdata)) else: final_mapping.append('\t'.join(mdata)) #test=merge_mapping_files([merged_file]) mapping_file.write('\n'.join(final_mapping)) mapping_file.close() #flush result final_mapping=[] result=[] t2 = time() print 'Making map file: %s' % (t2 - t1) t1 = time() # write the OTU-table otu_table_filepath, otu_table_filepath_db, otu_table_fname = \ write_otu_table(data_access, samples_list, tax_name, file_name_prefix, tmp_prefix, otu_table_file_dir, otu_table_file_dir_db) # zip up the files and add the paths to DB zip_fpath, zip_fpath_db = zip_and_add_filepaths(data_access, file_name_prefix, tmp_prefix, meta_id, otu_table_file_dir, mapping_file_dir, zip_file_dir, zip_file_dir_db, map_filepath, map_filepath_db, otu_table_filepath, otu_table_filepath_db, params_path) # rarefy OTU table if necessary if otutable_rarefied_at: rarefy_otu_table(data_access, otu_table_fname, otu_table_file_dir, otu_table_file_dir_db, otutable_rarefied_at, meta_id, otu_table_filepath, otu_table_filepath_db, zip_fpath) # run any additional analysis, such as heatmap, bdiv, alpha-rare run_other_qiime_analysis(data_access, fs_fp, web_fp, otu_table_filepath, map_filepath, file_name_prefix, user_id, meta_id, params_path, rarefied_at, jobs_to_start, tree_fp, zip_fpath, zip_fpath_db)
def run_core_diversity_analyses( biom_fp, mapping_fp, sampling_depth, output_dir, qiime_config, command_handler=call_commands_serially, tree_fp=None, params=None, categories=None, arare_min_rare_depth=10, arare_num_steps=10, parallel=False, suppress_taxa_summary=False, suppress_beta_diversity=False, suppress_alpha_diversity=False, suppress_otu_category_significance=False, status_update_callback=print_to_stdout, ): """ """ if categories != None: # Validate categories provided by the users mapping_data, mapping_comments = parse_mapping_file_to_dict(open(mapping_fp, "U")) metadata_map = MetadataMap(mapping_data, mapping_comments) for c in categories: if c not in metadata_map.CategoryNames: raise ValueError, ( "Category '%s' is not a column header " "in your mapping file. " "Categories are case and white space sensitive. Valid " "choices are: (%s)" % (c, ", ".join(metadata_map.CategoryNames)) ) if metadata_map.hasSingleCategoryValue(c): raise ValueError, ( "Category '%s' contains only one value. " "Categories analyzed here require at least two values." % c ) else: categories = [] # prep some variables if params == None: params = parse_qiime_parameters([]) create_dir(output_dir) index_fp = "%s/index.html" % output_dir index_links = [] commands = [] # begin logging log_fp = generate_log_fp(output_dir) index_links.append(("Master run log", log_fp, _index_headers["run_summary"])) logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config) input_fps = [biom_fp, mapping_fp] if tree_fp != None: input_fps.append(tree_fp) log_input_md5s(logger, input_fps) # run print_biom_table_summary.py on input BIOM table try: params_str = get_params_str(params["print_biom_table_summary"]) except KeyError: params_str = "" biom_table_stats_output_fp = "%s/biom_table_summary.txt" % output_dir print_biom_table_summary_cmd = "print_biom_table_summary.py -i %s -o %s --suppress_md5 %s" % ( biom_fp, biom_table_stats_output_fp, params_str, ) index_links.append(("BIOM table statistics", biom_table_stats_output_fp, _index_headers["run_summary"])) commands.append([("Generate BIOM table summary", print_biom_table_summary_cmd)]) # filter samples with fewer observations than the requested sampling_depth. # since these get filtered for some analyses (eg beta diversity after # even sampling) it's useful to filter them here so they're filtered # from all analyses. filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth) filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" % ( biom_fp, filtered_biom_fp, sampling_depth, ) commands.append( [ ( "Filter low sequence count samples from table (minimum sequence count: %d)" % sampling_depth, filter_samples_cmd, ) ] ) biom_fp = filtered_biom_fp # run initial commands and reset the command list command_handler(commands, status_update_callback, logger, close_logger_on_success=False) commands = [] if not suppress_beta_diversity: bdiv_even_output_dir = "%s/bdiv_even%d/" % (output_dir, sampling_depth) even_dm_fps = run_beta_diversity_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=bdiv_even_output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, sampling_depth=sampling_depth, # force suppression of distance histograms - boxplots work better # in this context, and are created below. histogram_categories=[], tree_fp=tree_fp, parallel=parallel, logger=logger, suppress_md5=True, status_update_callback=status_update_callback, ) for bdiv_metric, dm_fp in even_dm_fps: for category in categories: boxplots_output_dir = "%s/%s_boxplots/" % (bdiv_even_output_dir, bdiv_metric) try: params_str = get_params_str(params["make_distance_boxplots"]) except KeyError: params_str = "" boxplots_cmd = "make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s" % ( dm_fp, category, boxplots_output_dir, mapping_fp, params_str, ) commands.append([("Boxplots (%s)" % category, boxplots_cmd)]) index_links.append( ( "Distance boxplots (%s)" % bdiv_metric, "%s/%s_Distances.pdf" % (boxplots_output_dir, category), _index_headers["beta_diversity_even"] % sampling_depth, ) ) index_links.append( ( "Distance boxplots statistics (%s)" % bdiv_metric, "%s/%s_Stats.txt" % (boxplots_output_dir, category), _index_headers["beta_diversity_even"] % sampling_depth, ) ) index_links.append( ( "3D plot (%s, continuous coloring)" % bdiv_metric, "%s/%s_3d_continuous/%s_pc_3D_PCoA_plots.html" % (bdiv_even_output_dir, bdiv_metric, bdiv_metric), _index_headers["beta_diversity_even"] % sampling_depth, ) ) index_links.append( ( "3D plot (%s, discrete coloring)" % bdiv_metric, "%s/%s_3d_discrete/%s_pc_3D_PCoA_plots.html" % (bdiv_even_output_dir, bdiv_metric, bdiv_metric), _index_headers["beta_diversity_even"] % sampling_depth, ) ) index_links.append( ( "2D plot (%s, continuous coloring)" % bdiv_metric, "%s/%s_2d_continuous/%s_pc_2D_PCoA_plots.html" % (bdiv_even_output_dir, bdiv_metric, bdiv_metric), _index_headers["beta_diversity_even"] % sampling_depth, ) ) index_links.append( ( "2D plot (%s, discrete coloring)" % bdiv_metric, "%s/%s_2d_discrete/%s_pc_2D_PCoA_plots.html" % (bdiv_even_output_dir, bdiv_metric, bdiv_metric), _index_headers["beta_diversity_even"] % sampling_depth, ) ) index_links.append( ( "Distance matrix (%s)" % bdiv_metric, "%s/%s_dm.txt" % (bdiv_even_output_dir, bdiv_metric), _index_headers["beta_diversity_even"] % sampling_depth, ) ) index_links.append( ( "Principal coordinate matrix (%s)" % bdiv_metric, "%s/%s_pc.txt" % (bdiv_even_output_dir, bdiv_metric), _index_headers["beta_diversity_even"] % sampling_depth, ) ) if not suppress_alpha_diversity: ## Alpha rarefaction workflow arare_full_output_dir = "%s/arare_max%d/" % (output_dir, sampling_depth) run_alpha_rarefaction( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=arare_full_output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, tree_fp=tree_fp, num_steps=arare_num_steps, parallel=parallel, logger=logger, min_rare_depth=arare_min_rare_depth, max_rare_depth=sampling_depth, suppress_md5=True, status_update_callback=status_update_callback, ) index_links.append( ( "Alpha rarefaction plots", "%s/alpha_rarefaction_plots/rarefaction_plots.html" % arare_full_output_dir, _index_headers["alpha_diversity"], ) ) collated_alpha_diversity_fps = glob("%s/alpha_div_collated/*txt" % arare_full_output_dir) try: params_str = get_params_str(params["compare_alpha_diversity"]) except KeyError: params_str = "" for category in categories: for collated_alpha_diversity_fp in collated_alpha_diversity_fps: alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0] alpha_comparison_output_fp = "%s/%s_%s.txt" % (arare_full_output_dir, category, alpha_metric) compare_alpha_cmd = "compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s" % ( collated_alpha_diversity_fp, mapping_fp, category, alpha_comparison_output_fp, params_str, ) commands.append([("Compare alpha diversity (%s, %s)" % (category, alpha_metric), compare_alpha_cmd)]) index_links.append( ( "Alpha diversity statistics (%s, %s)" % (category, alpha_metric), alpha_comparison_output_fp, _index_headers["alpha_diversity"], ) ) if not suppress_taxa_summary: taxa_plots_output_dir = "%s/taxa_plots/" % output_dir run_summarize_taxa_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=taxa_plots_output_dir, mapping_cat=None, sort=True, command_handler=command_handler, params=params, qiime_config=qiime_config, logger=logger, suppress_md5=True, status_update_callback=status_update_callback, ) index_links.append( ( "Taxa summary bar plots", "%s/taxa_summary_plots/bar_charts.html" % taxa_plots_output_dir, _index_headers["taxa_summary"], ) ) index_links.append( ( "Taxa summary area plots", "%s/taxa_summary_plots/area_charts.html" % taxa_plots_output_dir, _index_headers["taxa_summary"], ) ) for category in categories: taxa_plots_output_dir = "%s/taxa_plots_%s/" % (output_dir, category) run_summarize_taxa_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=taxa_plots_output_dir, mapping_cat=category, sort=True, command_handler=command_handler, params=params, qiime_config=qiime_config, logger=logger, suppress_md5=True, status_update_callback=status_update_callback, ) index_links.append( ( "Taxa summary bar plots", "%s/taxa_summary_plots/bar_charts.html" % taxa_plots_output_dir, _index_headers["taxa_summary_categorical"] % category, ) ) index_links.append( ( "Taxa summary area plots", "%s/taxa_summary_plots/area_charts.html" % taxa_plots_output_dir, _index_headers["taxa_summary_categorical"] % category, ) ) if not suppress_otu_category_significance: # OTU category significance for category in categories: category_signifance_fp = "%s/category_significance_%s.txt" % (output_dir, category) try: params_str = get_params_str(params["otu_category_significance"]) except KeyError: params_str = "" # Build the OTU cateogry significance command category_significance_cmd = "otu_category_significance.py -i %s -m %s -c %s -o %s %s" % ( biom_fp, mapping_fp, category, category_signifance_fp, params_str, ) commands.append([("OTU category significance (%s)" % category, category_significance_cmd)]) index_links.append( ("Category significance (%s)" % category, category_signifance_fp, _index_headers["otu_category_sig"]) ) commands.append([("Compress the filtered BIOM table", "gzip %s" % filtered_biom_fp)]) index_links.append( ( "Filtered BIOM table (minimum sequence count: %d)" % sampling_depth, "%s.gz" % filtered_biom_fp, _index_headers["run_summary"], ) ) command_handler(commands, status_update_callback, logger) generate_index_page(index_links, index_fp)
def run_core_diversity_analyses( biom_fp, mapping_fp, sampling_depth, output_dir, qiime_config, command_handler=call_commands_serially, tree_fp=None, params=None, categories=None, arare_min_rare_depth=10, arare_num_steps=10, parallel=False, status_update_callback=print_to_stdout): """ """ if categories != None: # Validate categories provided by the users mapping_data, mapping_comments = \ parse_mapping_file_to_dict(open(mapping_fp,'U')) metadata_map = MetadataMap(mapping_data, mapping_comments) for c in categories: if c not in metadata_map.CategoryNames: raise ValueError, ("Category '%s' is not a column header " "in your mapping file. " "Categories are case and white space sensitive. Valid " "choices are: (%s)" % (c,', '.join(metadata_map.CategoryNames))) if metadata_map.hasSingleCategoryValue(c): raise ValueError, ("Category '%s' contains only one value. " "Categories analyzed here require at least two values." % c) else: categories= [] # prep some variables if params == None: params = parse_qiime_parameters([]) create_dir(output_dir) index_fp = '%s/index.html' % output_dir index_links = [] commands = [] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() # begin logging log_fp = generate_log_fp(output_dir) index_links.append(('Master run log',log_fp,'Log files')) logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config) input_fps = [biom_fp,mapping_fp] if tree_fp != None: input_fps.append(tree_fp) log_input_md5s(logger,input_fps) bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir,sampling_depth) even_dm_fps = run_beta_diversity_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=bdiv_even_output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, sampling_depth=sampling_depth, # force suppression of distance histograms - boxplots work better # in this context, and are created below. histogram_categories=[], tree_fp=tree_fp, parallel=parallel, logger=logger, status_update_callback=status_update_callback) for bdiv_metric, dm_fp in even_dm_fps: for category in categories: boxplots_output_dir = '%s/%s_boxplots/' % (bdiv_even_output_dir,bdiv_metric) try: params_str = get_params_str(params['make_distance_boxplots']) except KeyError: params_str = '' boxplots_cmd = \ 'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\ (dm_fp, category, boxplots_output_dir, mapping_fp, params_str) commands.append([('Boxplots (%s)' % category, boxplots_cmd)]) index_links.append(('Distance boxplots (%s)' % bdiv_metric, '%s/%s_Distances.pdf' % \ (boxplots_output_dir,category), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('Distance boxplots statistics (%s)' % bdiv_metric, '%s/%s_Stats.txt' % \ (boxplots_output_dir,category), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('3D plot (%s, continuous coloring)' % bdiv_metric, '%s/%s_3d_continuous/%s_pc_3D_PCoA_plots.html' % \ (bdiv_even_output_dir,bdiv_metric,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('3D plot (%s, discrete coloring)' % bdiv_metric, '%s/%s_3d_discrete/%s_pc_3D_PCoA_plots.html' % \ (bdiv_even_output_dir,bdiv_metric,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('2D plot (%s, continuous coloring)' % bdiv_metric, '%s/%s_2d_continuous/%s_pc_2D_PCoA_plots.html' % \ (bdiv_even_output_dir,bdiv_metric,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('2D plot (%s, discrete coloring)' % bdiv_metric, '%s/%s_2d_discrete/%s_pc_2D_PCoA_plots.html' % \ (bdiv_even_output_dir,bdiv_metric,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('Distance matrix (%s)' % bdiv_metric, '%s/%s_dm.txt' % \ (bdiv_even_output_dir,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) index_links.append(('Principal coordinate matrix (%s)' % bdiv_metric, '%s/%s_pc.txt' % \ (bdiv_even_output_dir,bdiv_metric), 'Beta diversity results (even sampling: %d)' % sampling_depth)) ## Alpha rarefaction workflow arare_full_output_dir = '%s/arare_max%d/' % (output_dir,sampling_depth) run_qiime_alpha_rarefaction( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=arare_full_output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, tree_fp=tree_fp, num_steps=arare_num_steps, parallel=parallel, logger=logger, min_rare_depth=arare_min_rare_depth, max_rare_depth=sampling_depth, status_update_callback=status_update_callback) index_links.append(('Alpha rarefaction plots', '%s/alpha_rarefaction_plots/rarefaction_plots.html'\ % arare_full_output_dir, "Alpha rarefaction results")) collated_alpha_diversity_fps = \ glob('%s/alpha_div_collated/*txt' % arare_full_output_dir) try: params_str = get_params_str(params['compare_alpha_diversity']) except KeyError: params_str = '' for c in categories: for collated_alpha_diversity_fp in collated_alpha_diversity_fps: alpha_metric = splitext(split(collated_alpha_diversity_fp)[1])[0] alpha_comparison_output_fp = '%s/%s_%s.txt' % \ (arare_full_output_dir,c,alpha_metric) compare_alpha_cmd = \ 'compare_alpha_diversity.py -i %s -m %s -c %s -d %s -o %s -n 999 %s' %\ (collated_alpha_diversity_fp, mapping_fp, c, sampling_depth, alpha_comparison_output_fp, params_str) commands.append([('Compare alpha diversity (%s, %s)' %\ (category,alpha_metric), compare_alpha_cmd)]) index_links.append( ('Alpha diversity statistics (%s, %s)' % (category,alpha_metric), alpha_comparison_output_fp, "Alpha rarefaction results")) taxa_plots_output_dir = '%s/taxa_plots/' % output_dir run_summarize_taxa_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=taxa_plots_output_dir, mapping_cat=None, sort=True, command_handler=command_handler, params=params, qiime_config=qiime_config, logger=logger, status_update_callback=status_update_callback) index_links.append(('Taxa summary bar plots', '%s/taxa_summary_plots/bar_charts.html'\ % taxa_plots_output_dir, "Taxonomic summary results")) index_links.append(('Taxa summary area plots', '%s/taxa_summary_plots/area_charts.html'\ % taxa_plots_output_dir, "Taxonomic summary results")) for c in categories: taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir,c) run_summarize_taxa_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=taxa_plots_output_dir, mapping_cat=c, sort=True, command_handler=command_handler, params=params, qiime_config=qiime_config, logger=logger, status_update_callback=status_update_callback) index_links.append(('Taxa summary bar plots', '%s/taxa_summary_plots/bar_charts.html'\ % taxa_plots_output_dir, "Taxonomic summary results (by %s)" % c)) index_links.append(('Taxa summary area plots', '%s/taxa_summary_plots/area_charts.html'\ % taxa_plots_output_dir, "Taxonomic summary results (by %s)" % c)) # OTU category significance for category in categories: category_signifance_fp = \ '%s/category_significance_%s.txt' % (output_dir, category) try: params_str = get_params_str(params['otu_category_significance']) except KeyError: params_str = '' # Build the OTU cateogry significance command category_significance_cmd = \ 'otu_category_significance.py -i %s -m %s -c %s -o %s %s' %\ (biom_fp, mapping_fp, category, category_signifance_fp, params_str) commands.append([('OTU category significance (%s)' % category, category_significance_cmd)]) index_links.append(('Category significance (%s)' % category, category_signifance_fp, "Category results")) command_handler(commands, status_update_callback, logger) generate_index_page(index_links,index_fp)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose input_biom_fp = opts.input_biom_fp output_dir = opts.output_dir categories = opts.categories if categories is not None: categories = categories.split(',') tree_fp = opts.tree_fp mapping_fp = opts.mapping_fp verbose = opts.verbose parallel = opts.parallel sampling_depth = opts.sampling_depth nonphylogenetic_diversity = opts.nonphylogenetic_diversity print_only = opts.print_only suppress_taxa_summary = opts.suppress_taxa_summary suppress_beta_diversity = opts.suppress_beta_diversity suppress_alpha_diversity = opts.suppress_alpha_diversity suppress_group_significance = opts.suppress_group_significance if opts.parameter_fp is not None: params = parse_qiime_parameters(open(opts.parameter_fp, 'U')) else: params = parse_qiime_parameters([]) if nonphylogenetic_diversity: # if the user specified --nonphylogenetic_diversity and they # didn't define metrics in a parameters file, define them here if 'metrics' not in params['beta_diversity']: params['beta_diversity']['metrics'] = 'bray_curtis' if 'metrics' not in params['alpha_diversity']: params['alpha_diversity']['metrics'] = 'observed_otus,chao1' else: if tree_fp is None: option_parser.error( "--tree_fp is required unless --nonphylogenetic_diversity " "is passed.") jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) # Create the output directory. If it already exists and the user # isn't trying to recover from a failed run, raise an error. create_dir(output_dir, fail_on_exist=not opts.recover_from_failure) if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_core_diversity_analyses( biom_fp=input_biom_fp, mapping_fp=mapping_fp, sampling_depth=sampling_depth, output_dir=output_dir, qiime_config=load_qiime_config(), command_handler=command_handler, tree_fp=tree_fp, params=params, categories=categories, arare_min_rare_depth=10, arare_num_steps=10, parallel=parallel, suppress_taxa_summary=suppress_taxa_summary, suppress_beta_diversity=suppress_beta_diversity, suppress_alpha_diversity=suppress_alpha_diversity, suppress_group_significance=suppress_group_significance, status_update_callback=status_update_callback)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) #get all the options cd_dir=path.join(opts.fs_fp,'arare') tmp_prefix=get_tmp_filename('',suffix='').strip() output_dir=path.join(opts.fs_fp,'arare','arare_'+tmp_prefix) web_fp=path.join(opts.web_fp,'arare','arare_'+tmp_prefix) otu_table_fp=opts.otu_table_fp mapping_file_fp=opts.mapping_file_fp file_name_prefix=opts.fname_prefix user_id=int(opts.user_id) meta_id=int(opts.meta_id) bdiv_rarefied_at=int(opts.bdiv_rarefied_at) jobs_to_start=opts.jobs_to_start tree_fp=opts.tree_fp command_handler=call_commands_serially status_update_callback=no_status_updates zip_fpath=opts.zip_fpath zip_fpath_db=opts.zip_fpath_db run_date=opts.run_date force=True try: from data_access_connections import data_access_factory from enums import ServerConfig import cx_Oracle data_access = data_access_factory(ServerConfig.data_access_type) except ImportError: print "NOT IMPORTING QIIMEDATAACCESS" pass try: parameter_f = open(opts.params_path) except IOError: raise IOError,\ "Can't open parameters file (%s). Does it exist? Do you have read access?"\ % opts.params_path params=parse_qiime_parameters(parameter_f) try: makedirs(output_dir) except OSError: if force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. print "Output directory already exists. Please choose "+\ "a different directory, or force overwrite with -f." exit(1) commands=[] python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) # determine whether to run alpha-diversity in serial or parallel serial_or_parallel = params['serial_or_parallel']['method'] if serial_or_parallel=='Serial': arare_cmd='%s %s/alpha_rarefaction.py -i %s -m %s -o %s -t %s -p %s -f' %\ (python_exe_fp, script_dir, otu_table_fp, mapping_file_fp, \ output_dir,tree_fp,opts.params_path) else: arare_cmd='%s %s/alpha_rarefaction.py -i %s -m %s -o %s -t %s -a -O 50 -p %s -f' %\ (python_exe_fp, script_dir, otu_table_fp, mapping_file_fp, \ output_dir,tree_fp,opts.params_path) commands.append([('Alpha-Rarefaction',arare_cmd)]) command_handler(commands, status_update_callback, logger) #zip the distance matrices cmd_call='cd %s; zip -r %s %s' % (cd_dir,zip_fpath,'arare_'+tmp_prefix) system(cmd_call) #convert link into web-link web_link=path.join(web_fp, 'alpha_rarefaction_plots', 'rarefaction_plots.html') #add the distance matrices valid=data_access.addMetaAnalysisFiles(True, int(meta_id), web_link, 'ARARE', run_date, 'ARARE') if not valid: raise ValueError, 'There was an issue uploading the filepaths to the DB!'
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) verbose = opts.verbose input_fps = opts.input_fps refseqs_fp = opts.reference_fp output_dir = opts.output_dir verbose = opts.verbose print_only = False percent_subsample = opts.percent_subsample new_ref_set_id = opts.new_ref_set_id prefilter_refseqs_fp = opts.prefilter_refseqs_fp prefilter_percent_id = opts.prefilter_percent_id if prefilter_percent_id == 0.0: prefilter_percent_id = None parallel = opts.parallel # No longer checking that jobs_to_start > 2, but # commenting as we may change our minds about this. #if parallel: raise_error_on_parallel_unavailable() if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp) except IOError: raise IOError,\ "Can't open parameters file (%s). Does it exist? Do you have read access?"\ % opts.parameter_fp params = parse_qiime_parameters(parameter_f) else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) try: makedirs(output_dir) except OSError: if opts.force: pass else: print "Output directory already exists. Please choose "+\ "a different directory, or force overwrite with -f." exit(1) if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates if len(input_fps) == 1: pick_subsampled_open_referenence_otus( input_fp=input_fps[0], refseqs_fp=refseqs_fp, output_dir=output_dir, percent_subsample=percent_subsample, new_ref_set_id=new_ref_set_id, command_handler=command_handler, params=params, min_otu_size=opts.min_otu_size, qiime_config=qiime_config, prefilter_refseqs_fp=prefilter_refseqs_fp, prefilter_percent_id=prefilter_percent_id, step1_otu_map_fp=opts.step1_otu_map_fp, step1_failures_fasta_fp=opts.step1_failures_fasta_fp, parallel=parallel, suppress_step4=opts.suppress_step4, logger=None, status_update_callback=status_update_callback) else: iterative_pick_subsampled_open_referenence_otus( input_fps=input_fps, refseqs_fp=refseqs_fp, output_dir=output_dir, percent_subsample=percent_subsample, new_ref_set_id=new_ref_set_id, command_handler=command_handler, params=params, min_otu_size=opts.min_otu_size, qiime_config=qiime_config, prefilter_refseqs_fp=prefilter_refseqs_fp, prefilter_percent_id=prefilter_percent_id, step1_otu_map_fp=opts.step1_otu_map_fp, step1_failures_fasta_fp=opts.step1_failures_fasta_fp, parallel=parallel, suppress_step4=opts.suppress_step4, logger=None, status_update_callback=status_update_callback)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose input_fp = opts.input_fp output_dir = opts.output_dir verbose = opts.verbose print_only = opts.print_only parallel = opts.parallel # No longer checking that jobs_to_start > 2, but # commenting as we may change our minds about this. #if parallel: raise_error_on_parallel_unavailable() if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp) except IOError: raise IOError,\ "Can't open parameters file (%s). Does it exist? Do you have read access?"\ % opts.parameter_fp params = parse_qiime_parameters(parameter_f) else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) try: makedirs(output_dir) except OSError: if opts.force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. print "Output directory already exists. Please choose "+\ "a different directory, or force overwrite with -f." exit(1) if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_qiime_data_preparation( input_fp, output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, parallel=parallel,\ status_update_callback=status_update_callback)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if opts.output_dir is None: opts.output_dir = opts.output_filepath + '_dir' if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: raise IOError("Can't open parameters file (%s). Does it exist? Do you have read access?" % opts.parameter_fp) params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now try: makedirs(opts.output_dir) except OSError: if opts.force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. option_parser.error("Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") if opts.print_only: command_handler = print_commands else: command_handler = call_commands_serially if opts.verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates # set env variable if opts.platform == 'flx': existing_pyro_fp = os.environ['PYRO_LOOKUP_FILE'] new_pyro_fp = os.path.join( os.path.split(existing_pyro_fp)[0], 'LookUp_E123.dat') os.environ['PYRO_LOOKUP_FILE'] = new_pyro_fp elif opts.platform == 'titanium': existing_pyro_fp = os.environ['PYRO_LOOKUP_FILE'] new_pyro_fp = os.path.join( os.path.split(existing_pyro_fp)[0], 'LookUp_Titanium.dat') os.environ['PYRO_LOOKUP_FILE'] = new_pyro_fp else: raise RuntimeError( 'could not find PYRO_LOOKUP_FILE for platform ' + platform) if opts.truncate_len: try: truncate_len_int_check = int(opts.truncate_len) truncate_len = str(truncate_len_int_check) except ValueError: raise ValueError(("If specified, truncate_len must be int type.")) else: truncate_len = None run_ampliconnoise( mapping_fp=opts.mapping_fp, output_dir=os.path.abspath(opts.output_dir), command_handler=command_handler, params=params, qiime_config=qiime_config, status_update_callback=status_update_callback, chimera_alpha=opts.chimera_alpha, chimera_beta=opts.chimera_beta, sff_txt_fp=opts.sff_filepath, numnodes=opts.np, suppress_perseus=opts.suppress_perseus, output_filepath=os.path.abspath(opts.output_filepath), platform=opts.platform, seqnoise_resolution=opts.seqnoise_resolution, truncate_len=truncate_len )
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) verbose = opts.verbose input_fps = opts.input_fps refseqs_fp = opts.reference_fp output_dir = opts.output_dir verbose = opts.verbose print_only = False percent_subsample = opts.percent_subsample new_ref_set_id = opts.new_ref_set_id prefilter_refseqs_fp = opts.prefilter_refseqs_fp prefilter_percent_id = opts.prefilter_percent_id if prefilter_percent_id == 0.0: prefilter_percent_id = None parallel = opts.parallel # No longer checking that jobs_to_start > 2, but # commenting as we may change our minds about this. #if parallel: raise_error_on_parallel_unavailable() if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: raise IOError,\ "Can't open parameters file (%s). Does it exist? Do you have read access?"\ % opts.parameter_fp params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) try: makedirs(output_dir) except OSError: if opts.force: pass else: option_parser.error("Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates if len(input_fps) == 1: pick_subsampled_open_reference_otus(input_fp=input_fps[0], refseqs_fp=refseqs_fp, output_dir=output_dir, percent_subsample=percent_subsample, new_ref_set_id=new_ref_set_id, command_handler=command_handler, params=params, min_otu_size=opts.min_otu_size, run_assign_tax=not opts.suppress_taxonomy_assignment, run_align_and_tree=not opts.suppress_align_and_tree, qiime_config=qiime_config, prefilter_refseqs_fp=prefilter_refseqs_fp, prefilter_percent_id=prefilter_percent_id, step1_otu_map_fp=opts.step1_otu_map_fp, step1_failures_fasta_fp=opts.step1_failures_fasta_fp, parallel=parallel, suppress_step4=opts.suppress_step4, logger=None, status_update_callback=status_update_callback) else: iterative_pick_subsampled_open_reference_otus(input_fps=input_fps, refseqs_fp=refseqs_fp, output_dir=output_dir, percent_subsample=percent_subsample, new_ref_set_id=new_ref_set_id, command_handler=command_handler, params=params, min_otu_size=opts.min_otu_size, run_assign_tax=not opts.suppress_taxonomy_assignment, run_align_and_tree=not opts.suppress_align_and_tree, qiime_config=qiime_config, prefilter_refseqs_fp=prefilter_refseqs_fp, prefilter_percent_id=prefilter_percent_id, step1_otu_map_fp=opts.step1_otu_map_fp, step1_failures_fasta_fp=opts.step1_failures_fasta_fp, parallel=parallel, suppress_step4=opts.suppress_step4, logger=None, status_update_callback=status_update_callback)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) study_id = opts.study_id for i in range(1,25): output_dir='/home/wwwdevuser/user_data/studies/study_%s/processed_data_%s/' % (study_id,i) if not exists(output_dir): break else: continue print output_dir sff_fname=opts.sff_fname map_fname = opts.map_fname verbose = opts.verbose print_only = opts.print_only parallel = opts.parallel denoise=opts.denoise if parallel: raise_error_on_parallel_unavailable() try: parameter_f = open(opts.parameter_fp) except IOError: raise IOError,\ "Can't open parameters file (%s). Does it exist? Do you have read access?"\ % opts.parameter_fp try: makedirs(output_dir) except OSError: if opts.force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. print "Output directory already exists. Please choose "+\ "a different directory, or force overwrite with -f." exit(1) if print_only: command_handler = print_commands else: command_handler = web_app_call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_process_sff_through_pick_otus(sff_input_fp=sff_fname,\ mapping_fp=map_fname,\ output_dir=output_dir,\ denoise=denoise,\ command_handler=command_handler,\ params=parse_qiime_parameters(parameter_f),\ qiime_config=qiime_config,\ parallel=parallel,\ status_update_callback=status_update_callback)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose input_fps = opts.input_fps refseqs_fp = opts.reference_fp output_dir = opts.output_dir otu_picking_method = opts.otu_picking_method verbose = opts.verbose print_only = False percent_subsample = opts.percent_subsample new_ref_set_id = opts.new_ref_set_id prefilter_refseqs_fp = opts.prefilter_refseqs_fp prefilter_percent_id = opts.prefilter_percent_id if prefilter_percent_id == 0.0: prefilter_percent_id = None if otu_picking_method == 'uclust': denovo_otu_picking_method = 'uclust' reference_otu_picking_method = 'uclust_ref' elif otu_picking_method == 'usearch61': denovo_otu_picking_method = 'usearch61' reference_otu_picking_method = 'usearch61_ref' elif otu_picking_method == 'sortmerna_sumaclust': denovo_otu_picking_method = 'sumaclust' reference_otu_picking_method = 'sortmerna' # SortMeRNA uses the E-value to filter out erroneous # sequences, this option does not apply for this # tool if prefilter_percent_id > 0.0: prefilter_percent_id = None else: # it shouldn't be possible to get here option_parser.error('Unkown OTU picking method: %s' % otu_picking_method) parallel = opts.parallel # No longer checking that jobs_to_start > 2, but # commenting as we may change our minds about this. #if parallel: raise_error_on_parallel_unavailable() if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: raise IOError("Can't open parameters file (%s). Does it exist? " "Do you have read access?" % opts.parameter_fp) params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) try: makedirs(output_dir) except OSError: if opts.force: pass else: option_parser.error("Output directory already exists. Please " "choose a different directory, or force overwrite with -f.") if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates if len(input_fps) == 1: pick_subsampled_open_reference_otus(input_fp=input_fps[0], refseqs_fp=refseqs_fp, output_dir=output_dir, percent_subsample=percent_subsample, new_ref_set_id=new_ref_set_id, command_handler=command_handler, params=params, min_otu_size=opts.min_otu_size, run_assign_tax=not opts.suppress_taxonomy_assignment, run_align_and_tree=not opts.suppress_align_and_tree, qiime_config=qiime_config, prefilter_refseqs_fp=prefilter_refseqs_fp, prefilter_percent_id=prefilter_percent_id, step1_otu_map_fp=opts.step1_otu_map_fp, step1_failures_fasta_fp=opts.step1_failures_fasta_fp, parallel=parallel, suppress_step4=opts.suppress_step4, logger=None, denovo_otu_picking_method=denovo_otu_picking_method, reference_otu_picking_method=reference_otu_picking_method, status_update_callback=status_update_callback) else: iterative_pick_subsampled_open_reference_otus(input_fps=input_fps, refseqs_fp=refseqs_fp, output_dir=output_dir, percent_subsample=percent_subsample, new_ref_set_id=new_ref_set_id, command_handler=command_handler, params=params, min_otu_size=opts.min_otu_size, run_assign_tax=not opts.suppress_taxonomy_assignment, run_align_and_tree=not opts.suppress_align_and_tree, qiime_config=qiime_config, prefilter_refseqs_fp=prefilter_refseqs_fp, prefilter_percent_id=prefilter_percent_id, step1_otu_map_fp=opts.step1_otu_map_fp, step1_failures_fasta_fp=opts.step1_failures_fasta_fp, parallel=parallel, suppress_step4=opts.suppress_step4, logger=None, denovo_otu_picking_method=denovo_otu_picking_method, reference_otu_picking_method=reference_otu_picking_method, status_update_callback=status_update_callback)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose input_fp = opts.input_fp reference_fp = opts.reference_fp taxonomy_fp = opts.taxonomy_fp output_dir = opts.output_dir verbose = opts.verbose print_only = opts.print_only assign_taxonomy = opts.assign_taxonomy parallel = opts.parallel # No longer checking that jobs_to_start > 2, but # commenting as we may change our minds about this. # if parallel: raise_error_on_parallel_unavailable() if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: raise IOError( "Can't open parameters file (%s). Does it exist? Do you have read access?" % opts.parameter_fp) params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) if print_only: command_handler = print_commands else: command_handler = call_commands_serially try: makedirs(output_dir) except OSError: if opts.force: pass else: option_parser.error( "Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_pick_closed_reference_otus( input_fp, reference_fp, output_dir, taxonomy_fp, assign_taxonomy=assign_taxonomy, command_handler=command_handler, params=params, qiime_config=qiime_config, parallel=parallel, status_update_callback=status_update_callback)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) if opts.output_dir is None: opts.output_dir = opts.output_filepath + '_dir' if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: raise IOError( "Can't open parameters file (%s). Does it exist? Do you have read access?" % opts.parameter_fp) params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now try: makedirs(opts.output_dir) except OSError: if opts.force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. option_parser.error( "Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") if opts.print_only: command_handler = print_commands else: command_handler = call_commands_serially if opts.verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates # set env variable if opts.platform == 'flx': existing_pyro_fp = os.environ['PYRO_LOOKUP_FILE'] new_pyro_fp = os.path.join( os.path.split(existing_pyro_fp)[0], 'LookUp_E123.dat') os.environ['PYRO_LOOKUP_FILE'] = new_pyro_fp elif opts.platform == 'titanium': existing_pyro_fp = os.environ['PYRO_LOOKUP_FILE'] new_pyro_fp = os.path.join( os.path.split(existing_pyro_fp)[0], 'LookUp_Titanium.dat') os.environ['PYRO_LOOKUP_FILE'] = new_pyro_fp else: raise RuntimeError('could not find PYRO_LOOKUP_FILE for platform ' + platform) if opts.truncate_len: try: truncate_len_int_check = int(opts.truncate_len) truncate_len = str(truncate_len_int_check) except ValueError: raise ValueError(("If specified, truncate_len must be int type.")) else: truncate_len = None run_ampliconnoise(mapping_fp=opts.mapping_fp, output_dir=os.path.abspath(opts.output_dir), command_handler=command_handler, params=params, qiime_config=qiime_config, status_update_callback=status_update_callback, chimera_alpha=opts.chimera_alpha, chimera_beta=opts.chimera_beta, sff_txt_fp=opts.sff_filepath, numnodes=opts.np, suppress_perseus=opts.suppress_perseus, output_filepath=os.path.abspath(opts.output_filepath), platform=opts.platform, seqnoise_resolution=opts.seqnoise_resolution, truncate_len=truncate_len)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose input_fnas = opts.input_fnas input_quals = opts.input_quals output_dir = opts.output_dir sampling_depth = opts.seqs_per_sample categories = opts.categories reference_tree_fp = opts.reference_tree_fp mapping_fp = opts.mapping_fp verbose = opts.verbose print_only = False # This feature is not currently supported suppress_split_libraries = opts.suppress_split_libraries even_sampling_keeps_all_samples = opts.even_sampling_keeps_all_samples parallel = opts.parallel if suppress_split_libraries and len(input_fnas) > 1: option_parser.error("Only a single fasta file can be passed with "+\ "--suppress_split_libraries") if opts.parameter_fp != None: try: parameter_f = open(opts.parameter_fp) except IOError: raise IOError,\ "Can't open parameters file (%s). Does it exist? Do you have read access?"\ % opts.parameter_fp params = parse_qiime_parameters(parameter_f) else: params = parse_qiime_parameters([]) jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) try: makedirs(output_dir) except OSError: if opts.force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. print "Output directory already exists. Please choose "+\ "a different directory, or force overwrite with -f." exit(1) if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates input_fnas_string = input_fnas[0] for inp_fna in input_fnas[1:]: input_fnas_string = input_fnas_string + ',' + inp_fna input_quals_string = None if input_quals: input_quals_string = input_quals[0] for inp_qual in input_quals[1:]: input_quals_string = input_quals_string + ',' + inp_qual run_core_qiime_analyses( fna_fps=input_fnas_string, qual_fps=input_quals_string, mapping_fp=mapping_fp, output_dir=output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, categories=categories, sampling_depth=sampling_depth, suppress_split_libraries=suppress_split_libraries, even_sampling_keeps_all_samples=even_sampling_keeps_all_samples, arare_min_rare_depth=10, arare_num_steps=10, reference_tree_fp=reference_tree_fp, parallel=parallel, status_update_callback=status_update_callback)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) otu_table_fp = opts.otu_table_fp output_dir = opts.output_dir mapping_fp = opts.mapping_fp tree_fp = opts.tree_fp verbose = opts.verbose print_only = opts.print_only seqs_per_sample = int(opts.seqs_per_sample) parallel = opts.parallel min_seqs_sample = opts.min_seqs_sample subject_category = opts.subject_name try: makedirs(output_dir) except OSError: if opts.force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. option_parser.error("Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") ## ******************** make_evident_selectors ******************** ## The code for make_evident_selectors.py is here and has to go before the params ## validation as we need to know the main cats before creating the params file map_data, headers, comments = parse_mapping_file(open(mapping_fp, 'U')) biom_table = parse_biom_table(open(otu_table_fp, 'U')) # getting valid samples from biom file real_map_headers, real_map_data = filter_mapping_file(map_data, headers,\ biom_table.SampleIds, include_repeat_cols=False) if subject_category not in real_map_headers: option_parser.error('This column: %s is not in the mapping file, try %s'%\ (subject_category, real_map_headers)) sorted_counts_per_sample = get_sorted_counts_per_sample(biom_table) mapping_file_tuple = (real_map_data, real_map_headers) # calculate the available subjects at each rarefaction level results, main_map_cat = make_selectors(sorted_counts_per_sample, min_seqs_sample,\ mapping_file_tuple, subject_category, verbose=verbose) fout = open(join(output_dir,'selectors.txt'),'w') fout.write('#Sequences\tSubjects\tSamples\tMetadata\n') fout.write('\n'.join(results)) fout.close() fout = open(join(output_dir,'mapping_file.txt'),'w') fout.write(format_mapping_file(real_map_headers, real_map_data)) fout.close() ## ******************** make_evident_selectors ******************** fout = open(join(output_dir,'study_preferences.txt'),'w') fout.write('%d\n' % seqs_per_sample) fout.write('%s\n' % subject_category) fout.close() ## ******************** filter_samples_from_otu_table ******************** ## Filtering original biom file to only have samples above the max length to avoid ## ugly plots alpha_biom_file = join(output_dir,'filtered_otu_table_for_alpha.biom') fout = open(alpha_biom_file,'w') sample_ids_to_keep = biom_table.SampleIds filtered_otu_table = filter_samples_from_otu_table(biom_table, sample_ids_to_keep, min_count=seqs_per_sample, max_count=inf) fout.write(format_biom_table(filtered_otu_table)) fout.close() ## ******************** filter_samples_from_otu_table ******************** if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: option_parser.error("Can't open parameters file (%s). Does it exist? " \ "Do you have read access?" % opts.parameter_fp) params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters( ['beta_diversity:metrics unweighted_unifrac',\ 'make_rarefaction_plots:prefs_path %s' % join(output_dir,'prefs.txt'), 'make_rarefaction_plots:colorby %s' % ','.join(main_map_cat), 'make_rarefaction_plots:output_type memory', 'multiple_rarefactions:min %d' % int(seqs_per_sample/4), 'multiple_rarefactions:max %d' % (seqs_per_sample+1), 'multiple_rarefactions:step %d' % int(seqs_per_sample/4), 'multiple_rarefactions:num-reps 4', ]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates copyfile(otu_table_fp, join(output_dir,'raw.biom')) run_beta_diversity_through_plots(otu_table_fp=otu_table_fp, mapping_fp=mapping_fp, output_dir=output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, color_by_interesting_fields_only=False, sampling_depth=seqs_per_sample, histogram_categories=None, tree_fp=tree_fp, parallel=parallel, suppress_3d_plots=True, suppress_2d_plots=True, status_update_callback=status_update_callback) output_dir = join(output_dir,'alpha') run_alpha_rarefaction(otu_table_fp=alpha_biom_file,\ mapping_fp=mapping_fp,\ output_dir=output_dir,\ command_handler=command_handler,\ params=params, qiime_config=qiime_config,\ tree_fp=tree_fp,\ num_steps=4,\ parallel=parallel,\ min_rare_depth=10, max_rare_depth=20, status_update_callback=status_update_callback, plot_stderr_and_stddev=True)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose otu_table_fp = opts.otu_table_fp output_dir = opts.output_dir mapping_fp = opts.mapping_fp tree_fp = opts.tree_fp num_steps = opts.num_steps verbose = opts.verbose print_only = opts.print_only parallel = opts.parallel min_rare_depth = opts.min_rare_depth max_rare_depth = opts.max_rare_depth if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp) except IOError: raise IOError,\ "Can't open parameters file (%s). Does it exist? Do you have read access?"\ % opts.parameter_fp params = parse_qiime_parameters(parameter_f) else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) try: makedirs(output_dir) except OSError: if opts.force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. option_parser.error("Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_qiime_alpha_rarefaction(otu_table_fp=otu_table_fp,\ mapping_fp=mapping_fp,\ output_dir=output_dir,\ command_handler=command_handler,\ params=params, qiime_config=qiime_config,\ tree_fp=tree_fp,\ num_steps=num_steps,\ parallel=parallel,\ min_rare_depth=min_rare_depth, max_rare_depth=max_rare_depth, status_update_callback=status_update_callback)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose otu_table_fp = opts.otu_table_fp output_dir = opts.output_dir mapping_fp = opts.mapping_fp tree_fp = opts.tree_fp verbose = opts.verbose print_only = opts.print_only seqs_per_sample = opts.seqs_per_sample parallel = opts.parallel # No longer checking that jobs_to_start > 2, but # commenting as we may change our minds about this. #if parallel: raise_error_on_parallel_unavailable() if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: raise IOError,\ "Can't open parameters file (%s). Does it exist? Do you have read access?"\ % opts.parameter_fp params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) try: makedirs(output_dir) except OSError: if opts.force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. option_parser.error("Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_beta_diversity_through_plots(otu_table_fp=otu_table_fp, mapping_fp=mapping_fp, output_dir=output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, color_by_interesting_fields_only=not opts.color_by_all_fields, sampling_depth=seqs_per_sample, tree_fp=tree_fp, parallel=parallel, suppress_emperor_plots=opts.suppress_emperor_plots, status_update_callback=status_update_callback)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose input_fp = opts.input_fp reference_fp = opts.reference_fp taxonomy_fp = opts.taxonomy_fp output_dir = opts.output_dir verbose = opts.verbose print_only = opts.print_only parallel = opts.parallel # No longer checking that jobs_to_start > 2, but # commenting as we may change our minds about this. #if parallel: raise_error_on_parallel_unavailable() if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: raise IOError,\ "Can't open parameters file (%s). Does it exist? Do you have read access?"\ % opts.parameter_fp params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) try: makedirs(output_dir) except OSError: if opts.force: pass else: option_parser.error("Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_pick_closed_reference_otus( input_fp, reference_fp, output_dir, taxonomy_fp, command_handler=command_handler, params=params, qiime_config=qiime_config, parallel=parallel, status_update_callback=status_update_callback)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) parameters = {} # get the tree insertion method to use module = opts.insertion_method # create output directory output_dir = opts.output_dir create_dir(output_dir) # list of tree insertion methods tree_insertion_module_names = \ {'raxml_v730': brokit.raxml_v730, 'parsinsert': brokit.parsinsert, 'pplacer': brokit.pplacer} # load input sequences and convert to phylip since the tools require # the query sequences to phylip-compliant names load_aln = parse_fasta(open(opts.input_fasta_fp, 'U')) aln = DenseAlignment(load_aln) seqs, align_map = aln.toPhylip() if opts.method_params_fp: param_dict = parse_qiime_parameters(open(opts.method_params_fp, 'U')) if module == 'raxml_v730': # load the reference sequences load_ref_aln = \ DenseAlignment(parse_fasta(open(opts.refseq_fp, 'U'))) # combine and load the reference plus query combined_aln = parse_fasta(StringIO(load_ref_aln.toFasta() + '\n' + aln.toFasta())) # overwrite the alignment map aln = DenseAlignment(combined_aln) seqs, align_map = aln.toPhylip() try: parameters = param_dict['raxml'] except: parameters = {} tree = convert_tree_tips(align_map, opts.starting_tree_fp) # write out the tree with phylip labels updated_tree_fp = join(output_dir, '%s_phylip_named_tree.tre' % (module)) write_updated_tree_file(updated_tree_fp, tree) # set the primary parameters for raxml parameters['-w'] = abspath(output_dir) + '/' parameters["-n"] = split(splitext(get_tmp_filename())[0])[-1] parameters["-t"] = updated_tree_fp if "-f" not in parameters: parameters["-f"] = 'v' if "-m" not in parameters: parameters["-m"] = 'GTRGAMMA' elif module == 'pplacer': try: parameters = param_dict['pplacer'] except: parameters = {} # make sure stats file is passed if not opts.stats_fp: raise IOError( 'When using pplacer, the RAxML produced info file is required.') # set the primary parameters for pplacer - allow for user-defined parameters['--out-dir'] = abspath(output_dir) + '/' parameters["-t"] = opts.starting_tree_fp parameters['-r'] = opts.refseq_fp parameters['-s'] = opts.stats_fp elif module == 'parsinsert': try: parameters = param_dict['parsinsert'] except: parameters = {} # define log fp log_fp = join(output_dir, 'parsinsert.log') # define tax assignment values fp tax_assign_fp = join(output_dir, 'parsinsert_assignments.log') parameters["-l"] = log_fp parameters["-o"] = tax_assign_fp parameters["-s"] = opts.refseq_fp parameters["-t"] = opts.starting_tree_fp # call the module and return a tree object result = \ tree_insertion_module_names[module].insert_sequences_into_tree(seqs, moltype=DNA, params=parameters) result_tree = strip_and_rename_unwanted_labels_from_tree(align_map, result) # write out the resulting tree final_tree = join(output_dir, '%s_final_placement.tre' % (module)) write_updated_tree_file(final_tree, result)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) verbose = opts.verbose otu_table_fp = opts.otu_table_fp output_dir = opts.output_dir mapping_fp = opts.mapping_fp tree_fp = opts.tree_fp num_steps = opts.num_steps verbose = opts.verbose print_only = opts.print_only parallel = opts.parallel min_rare_depth = opts.min_rare_depth max_rare_depth = opts.max_rare_depth retain_intermediate_files = opts.retain_intermediate_files if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: raise IOError,\ "Can't open parameters file (%s). Does it exist? Do you have read access?"\ % opts.parameter_fp params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) try: makedirs(output_dir) except OSError: if opts.force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. option_parser.error("Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_alpha_rarefaction(otu_table_fp=otu_table_fp,\ mapping_fp=mapping_fp,\ output_dir=output_dir,\ command_handler=command_handler,\ params=params, qiime_config=qiime_config,\ tree_fp=tree_fp,\ num_steps=num_steps,\ parallel=parallel,\ min_rare_depth=min_rare_depth, max_rare_depth=max_rare_depth, status_update_callback=status_update_callback, retain_intermediate_files=retain_intermediate_files)
def main(): option_parser, opts, args = \ parse_command_line_parameters(**script_info) verbose = opts.verbose otu_table_fp = opts.otu_table_fp output_dir = opts.output_dir tree_fp = opts.tree_fp seqs_per_sample = opts.seqs_per_sample verbose = opts.verbose print_only = opts.print_only master_tree = opts.master_tree parallel = opts.parallel # No longer checking that jobs_to_start > 2, but # commenting as we may change our minds about this. #if parallel: raise_error_on_parallel_unavailable() if opts.parameter_fp: try: parameter_f = open(opts.parameter_fp, 'U') except IOError: raise IOError("Can't open parameters file (%s). Does it exist? Do you have read access?" % opts.parameter_fp) params = parse_qiime_parameters(parameter_f) parameter_f.close() else: params = parse_qiime_parameters([]) # empty list returns empty defaultdict for now jobs_to_start = opts.jobs_to_start default_jobs_to_start = qiime_config['jobs_to_start'] validate_and_set_jobs_to_start(params, jobs_to_start, default_jobs_to_start, parallel, option_parser) try: makedirs(output_dir) except OSError: if opts.force: pass else: # Since the analysis can take quite a while, I put this check # in to help users avoid overwriting previous output. option_parser.error("Output directory already exists. Please choose" " a different directory, or force overwrite with -f.") if print_only: command_handler = print_commands else: command_handler = call_commands_serially if verbose: status_update_callback = print_to_stdout else: status_update_callback = no_status_updates run_jackknifed_beta_diversity(otu_table_fp=otu_table_fp, tree_fp=tree_fp, seqs_per_sample=seqs_per_sample, output_dir=output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, mapping_fp=opts.mapping_fp, parallel=parallel, status_update_callback=status_update_callback, master_tree=master_tree)
def run_core_diversity_analyses(biom_fp, mapping_fp, sampling_depth, output_dir, qiime_config, command_handler=call_commands_serially, tree_fp=None, params=None, categories=None, arare_min_rare_depth=10, arare_num_steps=10, parallel=False, suppress_taxa_summary=False, suppress_beta_diversity=False, suppress_alpha_diversity=False, suppress_group_significance=False, status_update_callback=print_to_stdout): """ """ if categories is not None: # Validate categories provided by the users mapping_data, mapping_comments = \ parse_mapping_file_to_dict(open(mapping_fp, 'U')) metadata_map = MetadataMap(mapping_data, mapping_comments) for c in categories: if c not in metadata_map.CategoryNames: raise ValueError( "Category '%s' is not a column header " "in your mapping file. " "Categories are case and white space sensitive. Valid " "choices are: (%s)" % (c, ', '.join(metadata_map.CategoryNames))) if metadata_map.hasSingleCategoryValue(c): raise ValueError( "Category '%s' contains only one value. " "Categories analyzed here require at least two values." % c) else: categories = [] comma_separated_categories = ','.join(categories) # prep some variables if params is None: params = parse_qiime_parameters([]) create_dir(output_dir) index_fp = '%s/index.html' % output_dir index_links = [] commands = [] # begin logging old_log_fps = glob(join(output_dir, 'log_20*txt')) log_fp = generate_log_fp(output_dir) index_links.append( ('Master run log', log_fp, _index_headers['run_summary'])) for old_log_fp in old_log_fps: index_links.append( ('Previous run log', old_log_fp, _index_headers['run_summary'])) logger = WorkflowLogger(log_fp, params=params, qiime_config=qiime_config) input_fps = [biom_fp, mapping_fp] if tree_fp is not None: input_fps.append(tree_fp) log_input_md5s(logger, input_fps) # run 'biom summarize-table' on input BIOM table try: params_str = get_params_str(params['biom-summarize-table']) except KeyError: params_str = '' biom_table_stats_output_fp = '%s/biom_table_summary.txt' % output_dir if not exists(biom_table_stats_output_fp): biom_table_summary_cmd = \ "biom summarize-table -i %s -o %s %s" % \ (biom_fp, biom_table_stats_output_fp, params_str) commands.append([('Generate BIOM table summary', biom_table_summary_cmd)]) else: logger.write("Skipping 'biom summarize-table' as %s exists.\n\n" % biom_table_stats_output_fp) index_links.append(('BIOM table statistics', biom_table_stats_output_fp, _index_headers['run_summary'])) # filter samples with fewer observations than the requested sampling_depth. # since these get filtered for some analyses (eg beta diversity after # even sampling) it's useful to filter them here so they're filtered # from all analyses. filtered_biom_fp = "%s/table_mc%d.biom" % (output_dir, sampling_depth) if not exists(filtered_biom_fp): filter_samples_cmd = "filter_samples_from_otu_table.py -i %s -o %s -n %d" %\ (biom_fp, filtered_biom_fp, sampling_depth) commands.append([( 'Filter low sequence count samples from table (minimum sequence count: %d)' % sampling_depth, filter_samples_cmd)]) else: logger.write( "Skipping filter_samples_from_otu_table.py as %s exists.\n\n" % filtered_biom_fp) biom_fp = filtered_biom_fp # rarify the BIOM table to sampling_depth rarefied_biom_fp = "%s/table_even%d.biom" % (output_dir, sampling_depth) if not exists(rarefied_biom_fp): single_rarefaction_cmd = "single_rarefaction.py -i %s -o %s -d %d" %\ (biom_fp, rarefied_biom_fp, sampling_depth) commands.append([ ('Rarify the OTU table to %d sequences/sample' % sampling_depth, single_rarefaction_cmd) ]) else: logger.write("Skipping single_rarefaction.py as %s exists.\n\n" % rarefied_biom_fp) # run initial commands and reset the command list if len(commands) > 0: command_handler(commands, status_update_callback, logger, close_logger_on_success=False) commands = [] if not suppress_beta_diversity: bdiv_even_output_dir = '%s/bdiv_even%d/' % (output_dir, sampling_depth) # Need to check for the existence of any distance matrices, since the user # can select which will be generated. existing_dm_fps = glob('%s/*_dm.txt' % bdiv_even_output_dir) if len(existing_dm_fps) == 0: even_dm_fps = run_beta_diversity_through_plots( otu_table_fp=rarefied_biom_fp, mapping_fp=mapping_fp, output_dir=bdiv_even_output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, # Note: we pass sampling depth=None here as # we rarify the BIOM table above and pass that # in here. sampling_depth=None, tree_fp=tree_fp, parallel=parallel, logger=logger, suppress_md5=True, status_update_callback=status_update_callback) else: logger.write( "Skipping beta_diversity_through_plots.py as %s exist(s).\n\n" % ', '.join(existing_dm_fps)) even_dm_fps = [(split(fp)[1].strip('_dm.txt'), fp) for fp in existing_dm_fps] # Get make_distance_boxplots parameters try: params_str = get_params_str(params['make_distance_boxplots']) except KeyError: params_str = '' for bdiv_metric, dm_fp in even_dm_fps: for category in categories: boxplots_output_dir = '%s/%s_boxplots/' % ( bdiv_even_output_dir, bdiv_metric) plot_output_fp = '%s/%s_Distances.pdf' % (boxplots_output_dir, category) stats_output_fp = '%s/%s_Stats.txt' % (boxplots_output_dir, category) if not exists(plot_output_fp): boxplots_cmd = \ 'make_distance_boxplots.py -d %s -f %s -o %s -m %s -n 999 %s' %\ (dm_fp, category, boxplots_output_dir, mapping_fp, params_str) commands.append([('Boxplots (%s)' % category, boxplots_cmd) ]) else: logger.write( "Skipping make_distance_boxplots.py for %s as %s exists.\n\n" % (category, plot_output_fp)) index_links.append( ('Distance boxplots (%s)' % bdiv_metric, plot_output_fp, _index_headers['beta_diversity_even'] % sampling_depth)) index_links.append( ('Distance boxplots statistics (%s)' % bdiv_metric, stats_output_fp, _index_headers['beta_diversity_even'] % sampling_depth)) index_links.append( ('PCoA plot (%s)' % bdiv_metric, '%s/%s_emperor_pcoa_plot/index.html' % (bdiv_even_output_dir, bdiv_metric), _index_headers['beta_diversity_even'] % sampling_depth)) index_links.append( ('Distance matrix (%s)' % bdiv_metric, '%s/%s_dm.txt' % (bdiv_even_output_dir, bdiv_metric), _index_headers['beta_diversity_even'] % sampling_depth)) index_links.append( ('Principal coordinate matrix (%s)' % bdiv_metric, '%s/%s_pc.txt' % (bdiv_even_output_dir, bdiv_metric), _index_headers['beta_diversity_even'] % sampling_depth)) if not suppress_alpha_diversity: # Alpha rarefaction workflow arare_full_output_dir = '%s/arare_max%d/' % (output_dir, sampling_depth) rarefaction_plots_output_fp = \ '%s/alpha_rarefaction_plots/rarefaction_plots.html' % arare_full_output_dir if not exists(rarefaction_plots_output_fp): run_alpha_rarefaction( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=arare_full_output_dir, command_handler=command_handler, params=params, qiime_config=qiime_config, tree_fp=tree_fp, num_steps=arare_num_steps, parallel=parallel, logger=logger, min_rare_depth=arare_min_rare_depth, max_rare_depth=sampling_depth, suppress_md5=True, status_update_callback=status_update_callback, retain_intermediate_files=False) else: logger.write("Skipping alpha_rarefaction.py as %s exists.\n\n" % rarefaction_plots_output_fp) index_links.append( ('Alpha rarefaction plots', rarefaction_plots_output_fp, _index_headers['alpha_diversity'])) collated_alpha_diversity_fps = \ glob('%s/alpha_div_collated/*txt' % arare_full_output_dir) try: params_str = get_params_str(params['compare_alpha_diversity']) except KeyError: params_str = '' if len(categories) > 0: for collated_alpha_diversity_fp in collated_alpha_diversity_fps: alpha_metric = splitext( split(collated_alpha_diversity_fp)[1])[0] compare_alpha_output_dir = '%s/compare_%s' % \ (arare_full_output_dir, alpha_metric) if not exists(compare_alpha_output_dir): compare_alpha_cmd = \ 'compare_alpha_diversity.py -i %s -m %s -c %s -o %s -n 999 %s' %\ (collated_alpha_diversity_fp, mapping_fp, comma_separated_categories, compare_alpha_output_dir, params_str) commands.append([ ('Compare alpha diversity (%s)' % alpha_metric, compare_alpha_cmd) ]) for category in categories: alpha_comparison_stat_fp = '%s/%s_stats.txt' % \ (compare_alpha_output_dir, category) alpha_comparison_boxplot_fp = '%s/%s_boxplots.pdf' % \ (compare_alpha_output_dir, category) index_links.append( ('Alpha diversity statistics (%s, %s)' % (category, alpha_metric), alpha_comparison_stat_fp, _index_headers['alpha_diversity'])) index_links.append( ('Alpha diversity boxplots (%s, %s)' % (category, alpha_metric), alpha_comparison_boxplot_fp, _index_headers['alpha_diversity'])) else: logger.write("Skipping compare_alpha_diversity.py" " for %s as %s exists.\n\n" % (alpha_metric, compare_alpha_output_dir)) else: logger.write("Skipping compare_alpha_diversity.py as" " no categories were provided.\n\n") if not suppress_taxa_summary: taxa_plots_output_dir = '%s/taxa_plots/' % output_dir # need to check for existence of any html files, since the user can # select only certain ones to be generated existing_taxa_plot_html_fps = glob( join(taxa_plots_output_dir, 'taxa_summary_plots', '*.html')) if len(existing_taxa_plot_html_fps) == 0: run_summarize_taxa_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=taxa_plots_output_dir, mapping_cat=None, sort=True, command_handler=command_handler, params=params, qiime_config=qiime_config, logger=logger, suppress_md5=True, status_update_callback=status_update_callback) else: logger.write( "Skipping summarize_taxa_through_plots.py for as %s exist(s).\n\n" % ', '.join(existing_taxa_plot_html_fps)) index_links.append( ('Taxa summary bar plots', '%s/taxa_summary_plots/bar_charts.html' % taxa_plots_output_dir, _index_headers['taxa_summary'])) index_links.append( ('Taxa summary area plots', '%s/taxa_summary_plots/area_charts.html' % taxa_plots_output_dir, _index_headers['taxa_summary'])) for category in categories: taxa_plots_output_dir = '%s/taxa_plots_%s/' % (output_dir, category) # need to check for existence of any html files, since the user can # select only certain ones to be generated existing_taxa_plot_html_fps = glob('%s/taxa_summary_plots/*.html' % taxa_plots_output_dir) if len(existing_taxa_plot_html_fps) == 0: run_summarize_taxa_through_plots( otu_table_fp=biom_fp, mapping_fp=mapping_fp, output_dir=taxa_plots_output_dir, mapping_cat=category, sort=True, command_handler=command_handler, params=params, qiime_config=qiime_config, logger=logger, suppress_md5=True, status_update_callback=status_update_callback) else: logger.write( "Skipping summarize_taxa_through_plots.py for %s as %s exist(s).\n\n" % (category, ', '.join(existing_taxa_plot_html_fps))) index_links.append( ('Taxa summary bar plots', '%s/taxa_summary_plots/bar_charts.html' % taxa_plots_output_dir, _index_headers['taxa_summary_categorical'] % category)) index_links.append( ('Taxa summary area plots', '%s/taxa_summary_plots/area_charts.html' % taxa_plots_output_dir, _index_headers['taxa_summary_categorical'] % category)) if not suppress_group_significance: params_str = get_params_str(params['group_significance']) # group significance tests, aka category significance for category in categories: group_signifance_fp = \ '%s/group_significance_%s.txt' % (output_dir, category) if not exists(group_signifance_fp): # Build the OTU cateogry significance command group_significance_cmd = \ 'group_significance.py -i %s -m %s -c %s -o %s %s' %\ (rarefied_biom_fp, mapping_fp, category, group_signifance_fp, params_str) commands.append([('Group significance (%s)' % category, group_significance_cmd)]) else: logger.write( "Skipping group_significance.py for %s as %s exists.\n\n" % (category, group_signifance_fp)) index_links.append( ('Category significance (%s)' % category, group_signifance_fp, _index_headers['group_significance'])) filtered_biom_gzip_fp = '%s.gz' % filtered_biom_fp if not exists(filtered_biom_gzip_fp): commands.append([('Compress the filtered BIOM table', 'gzip %s' % filtered_biom_fp)]) else: logger.write( "Skipping compressing of filtered BIOM table as %s exists.\n\n" % filtered_biom_gzip_fp) index_links.append( ('Filtered BIOM table (minimum sequence count: %d)' % sampling_depth, filtered_biom_gzip_fp, _index_headers['run_summary'])) rarified_biom_gzip_fp = '%s.gz' % rarefied_biom_fp if not exists(rarified_biom_gzip_fp): commands.append([('Compress the rarified BIOM table', 'gzip %s' % rarefied_biom_fp)]) else: logger.write( "Skipping compressing of rarified BIOM table as %s exists.\n\n" % rarified_biom_gzip_fp) index_links.append( ('Rarified BIOM table (sampling depth: %d)' % sampling_depth, rarified_biom_gzip_fp, _index_headers['run_summary'])) if len(commands) > 0: command_handler(commands, status_update_callback, logger) else: logger.close() generate_index_page(index_links, index_fp)