def test_plot_rank_abundance_graphs(self): """plot_rank_abundance_graphs works with any number of samples""" self.otu_table = otu_table_fake.split("\n") self.dir = get_tmp_filename(tmp_dir=self.tmp_dir, prefix="test_plot_rank_abundance", suffix="/") create_dir(self.dir) self._dirs_to_remove.append(self.dir) # test empty sample name self.assertRaises(ValueError, plot_rank_abundance_graphs, "", iter(self.otu_table), self.dir) # test invalid sample name self.assertRaises(ValueError, plot_rank_abundance_graphs, "Invalid_sample_name", iter(self.otu_table), self.dir) # test with two samples file_type = "pdf" plot_rank_abundance_graphs("S3,S5", iter(self.otu_table), self.dir, file_type=file_type) tmp_file = abspath(self.dir + "rank_abundance_cols_0_2." + file_type) self.assertTrue(exists(tmp_file)) self.files_to_remove.append(tmp_file) # test with all samples plot_rank_abundance_graphs("*", iter(self.otu_table), self.dir, file_type=file_type) tmp_file = abspath(self.dir + "rank_abundance_cols_0_1_2." + file_type) self.files_to_remove.append(tmp_file) self.assertTrue(exists(tmp_file))
def setUp(self): """ """ self.test_data = get_test_data_fps() self.files_to_remove = [] self.dirs_to_remove = [] # Create example output directory tmp_dir = get_qiime_temp_dir() self.test_out = get_tmp_filename(tmp_dir=tmp_dir, prefix='core_qiime_analyses_test_', suffix='', result_constructor=str) self.dirs_to_remove.append(self.test_out) create_dir(self.test_out) self.qiime_config = load_qiime_config() self.params = parse_qiime_parameters(params_f1) # suppress stderr during tests (one of the systems calls in the # workflow prints a warning, and we can't suppress that warning with # warnings.filterwarnings) here because it comes from within the code # executed through the system call. Found this trick here: # http://stackoverflow.com/questions/9949633/suppressing-print-as-stdout-python self.saved_stderr = sys.stderr sys.stderr = StringIO() initiate_timeout(180)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) out_dir = opts.output_dir create_dir(out_dir) if opts.type == 'gradient': subset_fn = choose_gradient_subset elif opts.type == 'cluster': subset_fn = choose_cluster_subsets subset_otu_table, subset_map_str = subset_fn(open(opts.otu_table_fp, 'U'), open(opts.map_fp, 'U'), opts.category, opts.num_total_samples) subset_otu_table_fp = join(out_dir, basename(opts.otu_table_fp)) subset_otu_table_f = open(subset_otu_table_fp, 'w') subset_otu_table.getBiomFormatJsonString('choose_data_subset.py ' '(microbiogeo)', subset_otu_table_f) subset_otu_table_f.close() subset_map_fp = join(out_dir, basename(opts.map_fp)) subset_map_f = open(subset_map_fp, 'w') subset_map_f.write(subset_map_str) subset_map_f.close()
def split_otu_table_on_taxonomy_to_files(otu_table_fp, level, output_dir, md_identifier='taxonomy', md_processor=process_md_as_list): """ Split OTU table by taxonomic level, writing otu tables to output dir """ results = [] otu_table = parse_biom_table(open(otu_table_fp, 'U')) create_dir(output_dir) def split_f(obs_md): try: result = md_processor(obs_md, md_identifier, level) except KeyError: raise KeyError,\ "Metadata identifier (%s) is not associated with all (or any) observerations. You can modify the key with the md_identifier parameter." % md_identifier except TypeError: raise TypeError,\ "Can't correctly process the metadata string. If your input file was generated from QIIME 1.4.0 or earlier you may need to pass --md_as_string." except AttributeError: raise AttributeError,\ "Metadata category not found. If your input file was generated from QIIME 1.4.0 or earlier you may need to pass --md_identifier \"Consensus Lineage\"." return result for bin, sub_otu_table in otu_table.binObservationsByMetadata(split_f): output_fp = '%s/otu_table_%s.biom' % (output_dir, bin) output_f = open(output_fp, 'w') output_f.write(format_biom_table(sub_otu_table)) output_f.close() results.append(output_fp) return results
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) mapping_fp = opts.mapping_fp fasta_dir = opts.fasta_dir output_dir = opts.output_dir count_start = int(opts.count_start) filename_column = opts.filename_column # Check input filepaths try: test_mapping_f = open(mapping_fp, "U") except IOError: raise IOError,("Cannot open mapping filepath "+\ "%s, please check filepath and permissions." % mapping_fp) if not isdir(fasta_dir): raise IOError, ("Specified fasta dir " + "%s, does not exist" % fasta_dir) # Create output directory, check path/access to mapping file create_dir(output_dir) add_qiime_labels(open(mapping_fp, "U"), fasta_dir, filename_column, output_dir, count_start)
def test_create_dir(self): """create_dir creates dir and fails meaningful.""" tmp_dir_path = get_random_directory_name() tmp_dir_path2 = get_random_directory_name(suppress_mkdir=True) tmp_dir_path3 = get_random_directory_name(suppress_mkdir=True) self.dirs_to_remove.append(tmp_dir_path) self.dirs_to_remove.append(tmp_dir_path2) self.dirs_to_remove.append(tmp_dir_path3) # create on existing dir raises OSError if fail_on_exist=True self.assertRaises(OSError, create_dir, tmp_dir_path, fail_on_exist=True) self.assertEquals( create_dir(tmp_dir_path, fail_on_exist=True, handle_errors_externally=True), 1) # return should be 1 if dir exist and fail_on_exist=False self.assertEqual(create_dir(tmp_dir_path, fail_on_exist=False), 1) # if dir not there make it and return always 0 self.assertEqual(create_dir(tmp_dir_path2), 0) self.assertEqual(create_dir(tmp_dir_path3, fail_on_exist=True), 0)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) output_dir = opts.output_dir if output_dir: create_dir(output_dir) else: if isfile(opts.input_dir): # if output_dir is empty after the split, then a relative path was # passed, and the input file is in the current directory output_dir = split(opts.input_dir)[0] or '.' else: # opts.input_dir is a directory output_dir = opts.input_dir if opts.no_trim and not opts.use_sfftools: raise ValueError( "When using the --no_trim option you must have the sfftools installed and must also pass the --use_sfftools option") prep_sffs_in_dir( opts.input_dir, output_dir, make_flowgram=opts.make_flowgram, convert_to_flx=opts.convert_to_FLX, use_sfftools=opts.use_sfftools, no_trim=opts.no_trim)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) fasta_fp = opts.fasta_fp mapping_fp = opts.mapping_fp output_dir = opts.output_dir truncate_option = opts.truncate_option primer_mismatches = int(opts.primer_mismatches) create_dir(output_dir) if truncate_option not in ['truncate_only', 'truncate_remove']: raise ValueError('-z option must be either truncate_only or ' + 'truncate_remove') try: fasta_f = open(fasta_fp, "U") fasta_f.close() except IOError: raise IOError("Unable to open fasta file, please check path/" + "permissions.") try: mapping_f = open(fasta_fp, "U") mapping_f.close() except IOError: raise IOError("Unable to open mapping file, please check path/" + "permissions.") truncate_reverse_primer(fasta_fp, mapping_fp, output_dir, truncate_option, primer_mismatches)
def setUp(self): self._files_to_remove = [] self.fasta_file_path = get_tmp_filename(prefix='fastq_', \ suffix='.fastq') fastq_file = open(self.fasta_file_path, 'w') fastq_file.write(fastq_test_string) fastq_file.close() #Error testing files false_fasta_file = '/' false_qual_file = '/' self.read_only_output_dir = get_tmp_filename(prefix = 'read_only_', \ suffix = '/') create_dir(self.read_only_output_dir) chmod(self.read_only_output_dir, 0577) self.output_dir = get_tmp_filename(prefix = 'convert_fastaqual_fastq_',\ suffix = '/') self.output_dir += sep create_dir(self.output_dir) self._files_to_remove.append(self.fasta_file_path)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) output_dir = opts.output_dir create_dir(output_dir) otu_table_fp = opts.otu_table otu_table_fh = open(otu_table_fp, 'U') otu_table = parse_biom_table(otu_table_fh) otu_table_fh.close() tree_fh = open(opts.tree_file, 'U') tree = DndParser(tree_fh) tree_fh.close() mapping_fp = opts.mapping_fp if mapping_fp: mapping_f = open(mapping_fp, 'U') input_map_basename = splitext(split(mapping_fp)[1])[0] else: mapping_f = None input_map_basename = None input_table_basename = splitext(split(otu_table_fp)[1])[0] simsam_range_to_files(otu_table, tree, simulated_sample_sizes=map(int, opts.num.split(',')), dissimilarities=map(float, opts.dissim.split(',')), output_dir=output_dir, mapping_f=mapping_f, output_table_basename=input_table_basename, output_map_basename=input_map_basename)
def main(): option_parser, opts, args = \ parse_command_line_parameters(suppress_verbose=True, **script_info) input_dir = opts.input_dir paired_data = opts.paired_data parameter_fp = opts.parameter_fp read1_indicator = opts.read1_indicator read2_indicator = opts.read2_indicator leading_text = opts.leading_text trailing_text = opts.trailing_text include_input_dir_path = opts.include_input_dir_path output_dir = abspath(opts.output_dir) remove_filepath_in_name = opts.remove_filepath_in_name print_only = opts.print_only if remove_filepath_in_name and not include_input_dir_path: option_parser.error("If --remove_filepath_in_name is enabled, " "--include_input_dir_path must also be enabled.") if opts.parameter_fp: with open(opts.parameter_fp, 'U') as parameter_f: params_dict = parse_qiime_parameters(parameter_f) params_str = get_params_str(params_dict['extract_barcodes']) else: params_dict = {} params_str = "" create_dir(output_dir) all_files = [] extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq'] for root, dir, fps in walk(input_dir): for fp in fps: for extension in extensions: if fp.endswith(extension): all_files += [abspath(join(root, fp))] if paired_data: all_files, bc_pairs = get_pairs(all_files, read1_indicator, read2_indicator) commands = create_commands_eb(all_files, paired_data, output_dir, params_str, leading_text, trailing_text, include_input_dir_path, remove_filepath_in_name) qiime_config = load_qiime_config() if print_only: command_handler = print_commands else: command_handler = call_commands_serially logger = WorkflowLogger(generate_log_fp(output_dir), params=params_dict, qiime_config=qiime_config) # Call the command handler on the list of commands command_handler(commands, status_update_callback = no_status_updates, logger=logger, close_logger_on_success=True)
def test_make_plots(self): """make_plots: tests whether the average plots are generated and if dictionary for the html generation is properly formatted""" filename1='/tmp/test/testSampleIDSample1_ave.png' filename2='/tmp/test/testSampleIDSample1_raw.png' folder1='/tmp/test/' self._paths_to_clean_up = [filename1,filename2] self._folders_to_cleanup=[folder1] exp1={'SampleID': {'Sample1': {'test': {'ave': [' 7.000', ' 2.052'], 'err': [' nan', ' 0.000']}}}} exp2={'test': {'groups': {'SampleID': {'Sample1': {'groupcolor': '#0000ff', 'raw_link': 'html_plots/testSampleIDSample1_raw.png', 'groupsamples': ['Sample1'], 'ave_link': 'html_plots/testSampleIDSample1_ave.png'}}}, 'samples': {'Sample1': {'color': '#0000ff', 'link': 'html_plots/testSample1.png'}}}} create_dir('/tmp/test/',False) obs1,obs2 = make_plots(self.background_color,self.label_color, \ self.rare_data,self.ymax, self.xmax,'/tmp/test/', \ self.resolution, self.imagetype,self.groups,\ self.colors,self.data_colors,self.metric_name,\ self.labelname,self.rarefaction_data_mat, \ self.rarefaction_legend_mat,self.sample_dict, \ self.data_colors,self.colors2) self.assertEqual(obs1,exp1) self.assertEqual(obs2,exp2) self.assertTrue(exists(filename1)) self.assertTrue(exists(filename2)) self.assertTrue(exists(folder1))
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) mapping_fp = opts.mapping_fp fasta_dir = opts.fasta_dir output_dir = opts.output_dir count_start = int(opts.count_start) filename_column = opts.filename_column # Check input filepaths try: test_mapping_f = open(mapping_fp, "U") except IOError: raise IOError,("Cannot open mapping filepath "+\ "%s, please check filepath and permissions." % mapping_fp) if not isdir(fasta_dir): raise IOError,("Specified fasta dir "+ "%s, does not exist" % fasta_dir) # Create output directory, check path/access to mapping file create_dir(output_dir) add_qiime_labels(open(mapping_fp, "U"), fasta_dir, filename_column, output_dir, count_start)
def split_otu_table_on_taxonomy_to_files(otu_table_fp, level, output_dir, md_identifier='taxonomy', md_processor=process_md_as_list): """ Split OTU table by taxonomic level, writing otu tables to output dir """ results = [] otu_table = parse_biom_table(open(otu_table_fp,'U')) create_dir(output_dir) def split_f(obs_md): try: result = md_processor(obs_md,md_identifier,level) except KeyError: raise KeyError,\ "Metadata identifier (%s) is not associated with all (or any) observerations. You can modify the key with the md_identifier parameter." % md_identifier except TypeError: raise TypeError,\ "Can't correctly process the metadata string. If your input file was generated from QIIME 1.4.0 or earlier you may need to pass --md_as_string." except AttributeError: raise AttributeError,\ "Metadata category not found. If your input file was generated from QIIME 1.4.0 or earlier you may need to pass --md_identifier \"Consensus Lineage\"." return result for bin, sub_otu_table in otu_table.binObservationsByMetadata(split_f): output_fp = '%s/otu_table_%s.biom' % (output_dir,bin) output_f = open(output_fp,'w') output_f.write(format_biom_table(sub_otu_table)) output_f.close() results.append(output_fp) return results
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) otu_table_fp = opts.biom_fp map_fp = opts.map_fp output_dir = opts.output_dir scolors = opts.scolors.split(',') ocolors = opts.ocolors.split(',') sshapes = opts.sshapes.split(',') oshapes = opts.oshapes.split(',') ssizes = opts.ssizes.split(',') osizes = opts.osizes.split(',') md_fields = opts.md_fields.split(',') # check that the otu fields asked for are available shared_options = ['NodeType', 'Abundance'] if not all( [i in md_fields + shared_options for i in ocolors + oshapes + osizes]): option_parser.error('The fields specified for observation colors, ' 'sizes, or shapes are not in either the shared ' 'options (NodeType,Abundance) or the supplied ' 'md_fields. These fields must be a subset of the ' 'union of these sets. Have you passed ocolors, ' 'osizes or oshapes that are not in the md_fields?') # check that the sample fields asked for are available. mapping file # elements should all have same metadata keys sopts = parse_mapping_file_to_dict(map_fp)[0].items()[0][1].keys() if not all( [i in sopts + shared_options for i in scolors + sshapes + ssizes]): option_parser.error('The fields specified for sample colors, sizes, ' 'or shapes are not in either the shared options ' '(NodeType,Abundance) or the supplied mapping ' 'file. These fields must be a subset of the union ' 'of these sets. Have you passed scolors, ssizes ' 'or sshapes that are not in the mapping file ' 'headers?') # actual compuation begins try: create_dir(output_dir, fail_on_exist=True) except OSError: option_parser.error('Directory already exists. Will not overwrite.') bt = load_table(otu_table_fp) pmf = parse_mapping_file_to_dict(map_fp)[0] # [1] is comments, don't need sample_node_table = make_sample_node_table(bt, pmf) otu_node_table = make_otu_node_table(bt, opts.observation_md_header_key, md_fields) node_attr_table = make_node_attr_table(otu_node_table, sample_node_table, scolors, ocolors, ssizes, osizes, sshapes, oshapes) edge_table = make_edge_table(bt) _write_table(sample_node_table, os.path.join(output_dir, 'SampleNodeTable.txt')) _write_table(otu_node_table, os.path.join(output_dir, 'OTUNodeTable.txt')) _write_table(node_attr_table, os.path.join(output_dir, 'NodeAttrTable.txt')) _write_table(edge_table, os.path.join(output_dir, 'EdgeTable.txt'))
def setUp(self): # create the temporary input files that will be used self.iupac = { 'A': 'A', 'T': 'T', 'G': 'G', 'C': 'C', 'R': '[AG]', 'Y': '[CT]', 'S': '[GC]', 'W': '[AT]', 'K': '[GT]', 'M': '[AC]', 'B': '[CGT]', 'D': '[AGT]', 'H': '[ACT]', 'V': '[ACG]', 'N': '[ACGT]' } self.output_dir = get_random_directory_name(prefix='/tmp/') self.output_dir += '/' create_dir(self.output_dir)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) create_dir(opts.output_dir) generate_passwords(open(opts.personal_ids_fp, 'U'), opts.results_dir, opts.password_dir, opts.output_dir)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) output_dir = opts.output_dir create_dir(output_dir) otu_table_fp = opts.otu_table otu_table = load_table(otu_table_fp) tree_fh = open(opts.tree_file, 'U') tree = DndParser(tree_fh) tree_fh.close() mapping_fp = opts.mapping_fp if mapping_fp: mapping_f = open(mapping_fp, 'U') input_map_basename = splitext(split(mapping_fp)[1])[0] else: mapping_f = None input_map_basename = None input_table_basename = splitext(split(otu_table_fp)[1])[0] simsam_range_to_files(otu_table, tree, simulated_sample_sizes=map(int, opts.num.split(',')), dissimilarities=map(float, opts.dissim.split(',')), output_dir=output_dir, mapping_f=mapping_f, output_table_basename=input_table_basename, output_map_basename=input_map_basename)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) otu_table_fp = opts.otu_table_fp mapping_fp = opts.mapping_fp mapping_field = opts.mapping_field output_dir = opts.output_dir otu_table_base_name = splitext(split(otu_table_fp)[1])[0] mapping_data, headers, comments = parse_mapping_file(open(mapping_fp,'U')) try: field_index = headers.index(mapping_field) except ValueError: option_parser.error("Field is not in mapping file (search is case "+\ "and white-space sensitive). \n\tProvided field: "+\ "%s. \n\tValid fields: %s" % (mapping_field,' '.join(headers))) mapping_values = set([e[field_index] for e in mapping_data]) create_dir(output_dir) for v in mapping_values: v_fp_str = v.replace(' ','_') otu_table_output_fp = join(output_dir,'%s_%s.txt' % (otu_table_base_name, v_fp_str)) mapping_output_fp = join(output_dir,'mapping_%s.txt' % v_fp_str) filter_otus_and_map(open(mapping_fp,'U'), open(otu_table_fp,'U'), open(mapping_output_fp,'w'), open(otu_table_output_fp,'w'), valid_states_str="%s:%s" % (mapping_field,v), num_seqs_per_otu=1)
def main(): option_parser, opts, args = \ parse_command_line_parameters(suppress_verbose=True, **script_info) input_dir = opts.input_dir parameter_fp = opts.parameter_fp read1_indicator = opts.read1_indicator read2_indicator = opts.read2_indicator match_barcodes = opts.match_barcodes barcode_indicator = opts.barcode_indicator leading_text = opts.leading_text trailing_text = opts.trailing_text include_input_dir_path = opts.include_input_dir_path output_dir = abspath(opts.output_dir) remove_filepath_in_name = opts.remove_filepath_in_name print_only = opts.print_only if remove_filepath_in_name and not include_input_dir_path: option_parser.error("If --remove_filepath_in_name is enabled, " "--include_input_dir_path must also be enabled.") if opts.parameter_fp: with open(opts.parameter_fp, 'U') as parameter_f: params_dict = parse_qiime_parameters(parameter_f) params_str = get_params_str(params_dict['join_paired_ends']) else: params_dict = {} params_str = "" create_dir(output_dir) all_files = [] extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq'] for root, dir, fps in walk(input_dir): for fp in fps: for extension in extensions: if fp.endswith(extension): all_files += [abspath(join(root, fp))] pairs, bc_pairs = get_pairs(all_files, read1_indicator, read2_indicator, match_barcodes, barcode_indicator) commands = create_commands_jpe(pairs, output_dir, params_str, leading_text, trailing_text, include_input_dir_path, remove_filepath_in_name, match_barcodes, bc_pairs) qiime_config = load_qiime_config() if print_only: command_handler = print_commands else: command_handler = call_commands_serially logger = WorkflowLogger(generate_log_fp(output_dir), params=params_dict, qiime_config=qiime_config) # Call the command handler on the list of commands command_handler(commands, status_update_callback=no_status_updates, logger=logger, close_logger_on_success=True)
def split_otu_table_on_taxonomy_to_files(otu_table_fp, level, output_dir, md_identifier='taxonomy', md_processor=process_md_as_list): """ Split OTU table by taxonomic level, writing otu tables to output dir """ results = [] otu_table = load_table(otu_table_fp) create_dir(output_dir) def split_f(id_, obs_md): try: result = md_processor(obs_md, md_identifier, level) except KeyError: raise KeyError("Metadata identifier (%s) is not associated with " "all (or any) observerations. You can modify the " "key with the md_identifier parameter." % md_identifier) except TypeError: raise TypeError("Can't correctly process the metadata string. If " "your input file was generated from QIIME 1.4.0 or" " earlier you may need to pass --md_as_string.") except AttributeError: raise AttributeError("Metadata category not found. If your input " "file was generated from QIIME 1.4.0 or " "earlier you may need to pass --md_identifier" " \"Consensus Lineage\".") return result for bin, sub_otu_table in otu_table.partition(split_f, axis='observation'): output_fp = '%s/otu_table_%s.biom' % (output_dir, bin) write_biom_table(sub_otu_table, output_fp) results.append(output_fp) return results
def copy_support_files(file_path): """Copy the support files to a named destination file_path: path where you want the support files to be copied to Will raise EmperorSupportFilesError if a problem is found whilst trying to copy the files. """ file_path = join(file_path, 'emperor_required_resources') if exists(file_path) == False: create_dir(file_path, False) # shutil.copytree does not provide an easy way to copy the contents of a # directory into another existing directory, hence the system call. # use double quotes for the paths to escape any invalid chracter(s)/spaces cmd = 'cp -R "%s/"* "%s"' % (get_emperor_support_files_dir(), abspath(file_path)) cmd_o, cmd_e, cmd_r = qiime_system_call(cmd) if cmd_e: raise EmperorSupportFilesError, "Error found whilst trying to copy " +\ "the support files:\n%s\n Could not execute: %s" % (cmd_e, cmd) return
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table_fp = opts.otu_table_fp mapping_fp = opts.mapping_fp mapping_field = opts.mapping_field output_dir = opts.output_dir # column_rename_ids = opts.column_rename_ids # include_repeat_cols = opts.include_repeat_cols create_dir(output_dir) # split mapping file mapping_f = open(mapping_fp, 'U') for fp_str, sub_mapping_s in split_mapping_file_on_field(mapping_f, mapping_field): mapping_output_fp = join(output_dir, 'mapping_%s.txt' % fp_str) open(mapping_output_fp, 'w').write(sub_mapping_s) # split otu table otu_table_base_name = splitext(split(otu_table_fp)[1])[0] mapping_f = open(mapping_fp, 'U') otu_table_f = open(otu_table_fp, 'U') for fp_str, sub_otu_table_s in split_otu_table_on_sample_metadata( otu_table_f, mapping_f, mapping_field): otu_table_output_fp = join( output_dir, '%s_%s.biom' % (otu_table_base_name, fp_str)) open(otu_table_output_fp, 'w').write(sub_otu_table_s)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) otu_table_fp = opts.otu_table_fp mapping_fp = opts.mapping_fp mapping_field = opts.mapping_field output_dir = opts.output_dir # column_rename_ids = opts.column_rename_ids # include_repeat_cols = opts.include_repeat_cols create_dir(output_dir) # split mapping file mapping_f = open(mapping_fp, 'U') for fp_str, sub_mapping_s in split_mapping_file_on_field(mapping_f, mapping_field): mapping_output_fp = join(output_dir, 'mapping_%s.txt' % fp_str) open(mapping_output_fp, 'w').write(sub_mapping_s) # split otu table otu_table_base_name = splitext(split(otu_table_fp)[1])[0] mapping_f = open(mapping_fp, 'U') otu_table = load_table(otu_table_fp) try: for fp_str, sub_otu_table_s in split_otu_table_on_sample_metadata( otu_table, mapping_f, mapping_field): otu_table_output_fp = join(output_dir, '%s_%s.biom' % ( otu_table_base_name, fp_str)) write_biom_table(sub_otu_table_s, otu_table_output_fp) except OTUTableSplitError as e: option_parser.error(e)
def __call__(self, query_fasta_fp, database_fasta_fp, output_dir, observation_metadata_fp=None, params=None, HALT_EXEC=False): if params is None: params = {} """ Call the DatabaseMapper """ create_dir(output_dir) raw_output_fp = self._get_raw_output_fp(output_dir, params) output_observation_map_fp = '%s/observation_map.txt' % output_dir output_biom_fp = '%s/observation_table.biom' % output_dir log_fp = '%s/observation_table.log' % output_dir self._assign_dna_reads_to_database( query_fasta_fp=query_fasta_fp, database_fasta_fp=database_fasta_fp, raw_output_fp=raw_output_fp, temp_dir=get_qiime_temp_dir(), params=params, HALT_EXEC=HALT_EXEC) self._process_raw_output(raw_output_fp, log_fp, output_observation_map_fp) self._generate_biom_output(output_observation_map_fp, output_biom_fp, observation_metadata_fp)
def copy_support_files(file_path): """Copy the support files to a named destination file_path: path where you want the support files to be copied to Will raise EmperorSupportFilesError if a problem is found whilst trying to copy the files. """ file_path = join(file_path, "emperor_required_resources") if exists(file_path) == False: create_dir(file_path, False) # shutil.copytree does not provide an easy way to copy the contents of a # directory into another existing directory, hence the system call. # use double quotes for the paths to escape any invalid chracter(s)/spaces cmd = 'cp -R "%s/"* "%s"' % (get_emperor_support_files_dir(), abspath(file_path)) cmd_o, cmd_e, cmd_r = qiime_system_call(cmd) if cmd_e: raise EmperorSupportFilesError, "Error found whilst trying to copy " + "the support files:\n%s\n Could not execute: %s" % ( cmd_e, cmd, ) return
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) output_dir = opts.output_dir if output_dir: create_dir(output_dir) else: if isfile(opts.input_dir): # if output_dir is empty after the split, then a relative path was # passed, and the input file is in the current directory output_dir = split(opts.input_dir)[0] or '.' else: # opts.input_dir is a directory output_dir = opts.input_dir if opts.no_trim and not opts.use_sfftools: raise ValueError( "When using the --no_trim option you must have the sfftools installed and must also pass the --use_sfftools option" ) prep_sffs_in_dir(opts.input_dir, output_dir, make_flowgram=opts.make_flowgram, convert_to_flx=opts.convert_to_FLX, use_sfftools=opts.use_sfftools, no_trim=opts.no_trim)
def test_truncate_fasta_qual(self): """ Test for overall module functionality """ base_pos = 80 output_dir = '/tmp/truncate_fasta_qual_test/' create_dir(output_dir) truncate_fasta_qual(self.fasta_fp, self.qual_fp, output_dir, base_pos) actual_trunc_fasta_fp = output_dir +\ basename(self.fasta_fp).replace(".fasta", "_filtered.fasta") actual_trunc_fasta_fp = open(actual_trunc_fasta_fp, "U") actual_trunc_fasta = [line.strip() for line in actual_trunc_fasta_fp] self.assertEqual(actual_trunc_fasta, expected_fasta_seqs) actual_trunc_qual_fp = output_dir +\ basename(self.qual_fp).replace(".qual", "_filtered.qual") actual_trunc_qual_fp = open(actual_trunc_qual_fp, "U") actual_trunc_qual = [line.strip() for line in actual_trunc_qual_fp] self.assertEqual(actual_trunc_qual, expected_qual_scores)
def test_plot_rank_abundance_graphs_dense(self): """plot_rank_abundance_graphs works with any number of samples (DenseOTUTable)""" self.otu_table = parse_biom_table_str(otu_table_dense) self.dir = get_tmp_filename(tmp_dir=self.tmp_dir, prefix="test_plot_rank_abundance", suffix="/") create_dir(self.dir) self._dirs_to_remove.append(self.dir) #test empty sample name self.assertRaises(ValueError, plot_rank_abundance_graphs, '', self.otu_table, self.dir) #test invalid sample name self.assertRaises(ValueError, plot_rank_abundance_graphs, 'Invalid_sample_name', self.otu_table, self.dir) #test with two samples file_type="pdf" plot_rank_abundance_graphs('S3,S5', self.otu_table, self.dir, file_type=file_type) tmp_file = abspath(self.dir+"rank_abundance_cols_0_2."+file_type) self.assertTrue(exists(tmp_file)) self.files_to_remove.append(tmp_file) # test with all samples plot_rank_abundance_graphs('*', self.otu_table, self.dir, file_type=file_type) tmp_file = abspath(self.dir+"rank_abundance_cols_0_1_2."+file_type) self.files_to_remove.append(tmp_file) self.assertTrue(exists(tmp_file))
def main(): option_parser, opts, args =\ parse_command_line_parameters(suppress_verbose=True, **script_info) mapping_fp = opts.mapping_fp has_barcodes = not opts.not_barcoded variable_len_barcodes = opts.variable_len_barcodes output_dir = opts.output_dir + "/" char_replace = opts.char_replace verbose = opts.verbose disable_primer_check = opts.disable_primer_check added_demultiplex_field = opts.added_demultiplex_field # Create output directory, check path/access to mapping file create_dir(output_dir) # Test for valid replacement characters valid_replacement_chars = digits + letters + "_" + "." if char_replace not in valid_replacement_chars: option_parser.error('-c option requires alphanumeric, period, or '+\ 'underscore character.') if len(char_replace) != 1: option_parser.error('-c parameter must be a single character.') check_mapping_file(mapping_fp, output_dir, has_barcodes, char_replace,\ verbose, variable_len_barcodes, disable_primer_check, added_demultiplex_field)
def test_make_plots(self): """make_plots: tests whether the average plots are generated and if dictionary for the html generation is properly formatted""" filename1='/tmp/test/testcol_0_row_0_ave.png' filename2='/tmp/test/testcol_0_row_0_raw.png' folder1='/tmp/test/' self._paths_to_clean_up = [filename1,filename2] self._folders_to_cleanup=[folder1] exp1={'SampleID': {'Sample1': {'test': {'ave': [' 7.000', ' 2.052'], 'err': [' nan', ' 0.000']}}}} exp2={'test': {'groups': {'SampleID': {'Sample1': {'groupcolor': '#ff0000', 'raw_link': 'html_plots/testcol_0_row_0_raw.png', 'groupsamples': ['Sample1'], 'ave_link': 'html_plots/testcol_0_row_0_ave.png'}}}, 'samples': {'Sample1': {'color': '#ff0000', 'link': 'html_plots/testcol_0_row_0.png'}}}} create_dir('/tmp/test/',False) obs1,obs2 = make_plots(self.background_color,self.label_color, \ self.rare_data,self.ymax, self.xmax,'/tmp/test/', \ self.resolution, self.imagetype,self.groups,\ self.colors,self.data_colors,self.metric_name,\ self.labelname,self.rarefaction_data_mat, \ self.rarefaction_legend_mat,self.sample_dict, \ self.data_colors,self.colors2,self.mapping_lookup) self.assertEqual(obs1,exp1) self.assertEqual(obs2,exp2) self.assertTrue(exists(filename1)) self.assertTrue(exists(filename2)) self.assertTrue(exists(folder1))
def test_plot_rank_abundance_graphs_dense(self): """plot_rank_abundance_graphs works with any number of samples (DenseOTUTable)""" self.otu_table = parse_biom_table_str(otu_table_dense) self.dir = get_tmp_filename(tmp_dir=self.tmp_dir, prefix="test_plot_rank_abundance", suffix="/") create_dir(self.dir) self._dirs_to_remove.append(self.dir) tmp_fname = get_tmp_filename(tmp_dir=self.dir) #test empty sample name self.assertRaises(ValueError, plot_rank_abundance_graphs, tmp_fname,'', self.otu_table) #test invalid sample name self.assertRaises(ValueError, plot_rank_abundance_graphs, tmp_fname, 'Invalid_sample_name', self.otu_table) #test with two samples file_type="pdf" tmp_file = abspath(self.dir+"rank_abundance_cols_0_2."+file_type) plot_rank_abundance_graphs(tmp_file, 'S3,S5', self.otu_table, file_type=file_type) self.assertTrue(exists(tmp_file)) self.files_to_remove.append(tmp_file) # test with all samples tmp_file = abspath(self.dir+"rank_abundance_cols_0_1_2."+file_type) plot_rank_abundance_graphs(tmp_file,'*', self.otu_table,file_type=file_type) self.files_to_remove.append(tmp_file) self.assertTrue(exists(tmp_file))
def main(): option_parser, opts, args =\ parse_command_line_parameters(suppress_verbose=True, **script_info) mapping_fp = opts.mapping_fp has_barcodes = not opts.not_barcoded variable_len_barcodes = opts.variable_len_barcodes output_dir = opts.output_dir + "/" char_replace = opts.char_replace verbose = opts.verbose disable_primer_check = opts.disable_primer_check added_demultiplex_field = opts.added_demultiplex_field suppress_html = opts.suppress_html # Create output directory, check path/access to mapping file create_dir(output_dir) # Test for valid replacement characters valid_replacement_chars = digits + letters + "_" + "." if char_replace not in valid_replacement_chars: option_parser.error('-c option requires alphanumeric, period, or ' + 'underscore character.') if len(char_replace) != 1: option_parser.error('-c parameter must be a single character.') check_mapping_file(mapping_fp, output_dir, has_barcodes, char_replace, verbose, variable_len_barcodes, disable_primer_check, added_demultiplex_field, suppress_html)
def __call__(self, query_fasta_fp, database_fasta_fp, output_dir, observation_metadata_fp=None, params=None, HALT_EXEC=False): if params is None: params = {} """ Call the DatabaseMapper """ create_dir(output_dir) raw_output_fp = self._get_raw_output_fp(output_dir, params) output_observation_map_fp = '%s/observation_map.txt' % output_dir output_biom_fp = '%s/observation_table.biom' % output_dir log_fp = '%s/observation_table.log' % output_dir self._assign_dna_reads_to_database(query_fasta_fp=query_fasta_fp, database_fasta_fp=database_fasta_fp, raw_output_fp=raw_output_fp, temp_dir=get_qiime_temp_dir(), params=params, HALT_EXEC=HALT_EXEC) self._process_raw_output(raw_output_fp, log_fp, output_observation_map_fp) self._generate_biom_output(output_observation_map_fp, output_biom_fp, observation_metadata_fp)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) create_dir(opts.output_dir, fail_on_exist=False) post_process(opts.fasta_fp, opts.denoiser_map_file, opts.denoised_fasta_fp, opts.otu_picker_map_file, opts.output_dir)
def setUp(self): # create the temporary input files that will be used self._files_to_remove = [] self.sample_fasta_file1_data = sample_fasta_file1 self.sample_fasta_file_bad_labels_data =\ sample_fasta_file_bad_labels self.sample_mapping_file1_data = sample_mapping_file1 self.sample_mapping_file_no_revprimer_header =\ sample_mapping_file_no_revprimer_header self.sample_mapping_file_bad_revprimer =\ sample_mapping_file_bad_revprimer self.expected_truncation_default_settings =\ expected_truncation_default_settings self.expected_truncation_zero_mismatches =\ expected_truncation_zero_mismatches self.expected_truncation_zero_mismatches_truncate_remove =\ expected_truncation_zero_mismatches_truncate_remove self.fasta_fp = get_tmp_filename(prefix='fasta_seqs_', suffix='.fna') seq_file = open(self.fasta_fp, 'w') seq_file.write(self.sample_fasta_file1_data) seq_file.close() self.fasta_badlabels_fp = get_tmp_filename( prefix="fasta_seqs_badlabels_", suffix=".fna") seq_file = open(self.fasta_badlabels_fp, "w") seq_file.write(self.sample_fasta_file_bad_labels_data) seq_file.close() self.mapping_fp = get_tmp_filename(prefix='sample_mapping_', suffix='.txt') mapping_file = open(self.mapping_fp, "w") mapping_file.write(self.sample_mapping_file1_data) mapping_file.close() self.mapping_bad_header_fp = get_tmp_filename( prefix='sample_mapping_badheader_', suffix=".txt") mapping_file = open(self.mapping_bad_header_fp, "w") mapping_file.write(self.sample_mapping_file_no_revprimer_header) mapping_file.close() self.mapping_bad_primer_fp = get_tmp_filename( prefix='sample_mapping_badprimer_', suffix=".txt") mapping_file = open(self.mapping_bad_primer_fp, "w") mapping_file.write(self.sample_mapping_file_bad_revprimer) mapping_file.close() self.output_dir = mkdtemp() self.output_dir += '/' create_dir(self.output_dir) self._files_to_remove =\ [self.fasta_fp, self.mapping_fp, self.mapping_bad_header_fp, self.mapping_bad_primer_fp, self.fasta_badlabels_fp]
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) biom_table_fp = opts.biom_table_fp mapping_fp = opts.mapping_fp fields = opts.fields.split(',') output_dir = opts.output_dir suppress_mf = opts.suppress_mapping_file_output # column_rename_ids = opts.column_rename_ids # include_repeat_cols = opts.include_repeat_cols bt = load_table(biom_table_fp) mdata, mheaders, mcomments = parse_mapping_file(mapping_fp) mdata = array(mdata) # check that biom file and mapping file have matching sample names. discard # those samples that do not appear in both. shared_samples = list(set(mdata[:, 0]).intersection(bt.ids(axis='sample'))) if len(shared_samples) == 0: raise ValueError('Mapping file and biom table share no samples.') elif len(shared_samples) == len(mdata[:, 0]): mdata = array(mdata) else: # we want to preserve the order of the samples in the biom table ss_bt_order = [s for s in bt.ids(axis='sample') if s in shared_samples] bt = bt.filter(ss_bt_order, axis='sample', inplace=True) mdata = subset_mapping_data(mdata, shared_samples) # check that headers in mapping data if not all([i in mheaders for i in fields]): raise ValueError('One or more of the specified fields was not found ' +\ 'in the mapping file.') # create output directory and create base names create_dir(output_dir) mf_base_name = join(output_dir, splitext(split(mapping_fp)[1])[0]) bt_base_name = join(output_dir, splitext(split(biom_table_fp)[1])[0]) # run code and append output sample_groups, value_groups = make_non_empty_sample_lists(fields, mheaders, mdata) for sg, vg in zip(sample_groups, value_groups): name_base = '__' + '%s_%s_' * len(vg) + '_' name_tmp = [] for f, v in zip(fields, vg): name_tmp.extend([f, v]) nb = name_base % tuple(name_tmp) tmp_mf_data = subset_mapping_data(mdata, sg) tmp_mf_str = format_mapping_file(mheaders, tmp_mf_data, mcomments) write_biom_table(bt.filter(sg, axis='sample', inplace=False), bt_base_name + nb + '.biom') if not suppress_mf: o = open(mf_base_name + nb + '.txt', 'w') o.writelines(tmp_mf_str) o.close()
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) mapping_fp = opts.mapping_fp alpha_diversity_fp = opts.alpha_diversity_fp categories = opts.categories.split(',') depth = opts.depth output_dir = opts.output_dir correction_method = opts.correction_method test_type = opts.test_type num_permutations = opts.num_permutations if num_permutations < 10: option_parser.error('Number of permuations must be greater than or ' 'equal to 10.') create_dir(output_dir) for category in categories: stat_output_fp = join(output_dir, '%s_stats.txt' % category) boxplot_output_fp = join(output_dir, '%s_boxplots.pdf' % category) alpha_diversity_f = open(alpha_diversity_fp, 'U') mapping_f = open(mapping_fp, 'U') ttest_result, alphadiv_avgs = \ compare_alpha_diversities(alpha_diversity_f, mapping_f, category, depth, test_type, num_permutations) alpha_diversity_f.close() mapping_f.close() corrected_result = _correct_compare_alpha_results( ttest_result, correction_method) # write stats results stat_output_f = open(stat_output_fp, 'w') header = ('Group1\tGroup2\tGroup1 mean\tGroup1 std\tGroup2 mean\t' 'Group2 std\tt stat\tp-value') lines = [header] for (t0, t1), v in corrected_result.items(): lines.append('\t'.join( map(str, [ t0, t1, alphadiv_avgs[t0][0], alphadiv_avgs[t0][1], alphadiv_avgs[t1][0], alphadiv_avgs[t1][1], v[0], v[1] ]))) stat_output_f.write('\n'.join(lines) + '\n') stat_output_f.close() # write box plots alpha_diversity_f = open(alpha_diversity_fp, 'U') mapping_f = open(mapping_fp, 'U') boxplot = generate_alpha_diversity_boxplots(alpha_diversity_f, mapping_f, category, depth) alpha_diversity_f.close() mapping_f.close() boxplot.savefig(boxplot_output_fp)
def main(): """run denoiser on input flowgrams""" option_parser, opts, args = parse_command_line_parameters(**script_info) sff_files = opts.sff_fps for f in sff_files: if (not exists(f)): option_parser.error(('Flowgram file path does not exist:\n %s \n' + 'Pass a valid one via -i.') % f) outdir = opts.output_dir create_dir(outdir, fail_on_exist=not opts.force) log_fh = None if (not (opts.primer or opts.map_fname)): raise ApplicationError("Either mapping file or primer required") # Read primer from Meta data file if not set on command line if not opts.primer: mapping_data, header, comments = \ parse_mapping_file(open(opts.map_fname, "U")) index = header.index("LinkerPrimerSequence") all_primers = set(array(mapping_data)[:, index]) if len(all_primers) != 1: raise ValueError( "Currently only data sets with one primer are allowed.\n" + "Make separate mapping files with only one primer, re-run split_libraries and\n" + "denoise with each split_library output separately.") primer = list(all_primers)[0] last_char = primer[-1] if (last_char not in "ACGT"): raise ValueError("We currently do not support primer with " + "degenerate bases at it's 3' end.") else: primer = opts.primer centroids, cluster_mapping = fast_denoiser(opts.sff_fps, opts.fasta_fp, outdir, opts.num_cpus, primer, titanium=opts.titanium) # store mapping file and centroids result_otu_path = '%s/denoised_clusters.txt' % outdir of = open(result_otu_path, 'w') for i, cluster in cluster_mapping.iteritems(): of.write('%s\t%s\n' % (str(i), '\t'.join(cluster))) of.close() result_fasta_path = '%s/denoised_seqs.fasta' % outdir oh = open(result_fasta_path, 'w') write_Fasta_from_name_seq_pairs(centroids, oh)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) otu_table_fp = opts.otu_table_fp mapping_fp = opts.mapping_fp tree_fp = opts.tree_fp output_dir = opts.out_fp output_basename = splitext(split(otu_table_fp)[1])[0] if not output_dir: output_dir = 'make_tep_output/' create_dir(output_dir) tep_fp = '%s/%s.tep' % (output_dir,output_basename) # opts.out_fp+'.tep' jnlp_fp = '%s/%s.jnlp' % (output_dir,output_basename) tepfile = open(tep_fp, 'w') otu_lines = open(otu_table_fp, 'U').readlines() sample_ids, otu_ids, otu_table, metadata = parse_otu_table(otu_lines) mapping_lines = open(mapping_fp, 'U') tree_lines = open(tree_fp, 'U') lines = ['>>tre\n'] lines += tree_lines.readlines() lines += '\n' if(metadata): lines += '>>otm\n#OTU ID\tOTU Metadata\n' for i in range(len(otu_ids)): lines += otu_ids[i] + '\t' for m in metadata[i]: lines += m + ';' # lines = lines[:len(lines)-1] lines += '\n' lines += '>>osm\n' lines += otu_lines lines += '\n>>sam\n' lines += mapping_lines.readlines() tepfile.writelines(lines) jnlpfile = open(jnlp_fp, 'w') lines = [jnlp_top_block] if(opts.web_flag): lines += 'http://topiaryexplorer.sourceforge.net/app/' else: lines += 'file:'+load_qiime_config()['topiaryexplorer_project_dir'] lines += jnlp_middle_block if(opts.url): lines += opts.url else: lines += os.path.abspath(tep_fp) # lines += os.path.abspath(tep_fp) lines += jnlp_bottom_block jnlpfile.writelines(lines)
def run_process_illumina_through_split_lib(study_id,run_prefix,input_fp, mapping_fp, output_dir, command_handler, params, qiime_config, write_to_all_fasta=False, status_update_callback=print_to_stdout): """ NOTE: Parts of this function are a directly copied from the run_qiime_data_preparation function from the workflow.py library file in QIIME. The steps performed by this function are: 1) De-multiplex sequences. (split_libraries_fastq.py) """ # Prepare some variables for the later steps filenames=input_fp.split(',') commands = [] create_dir(output_dir) python_exe_fp = qiime_config['python_exe_fp'] script_dir = get_qiime_scripts_dir() logger = WorkflowLogger(generate_log_fp(output_dir), params=params, qiime_config=qiime_config) # copy the mapping file copied_mapping=split(mapping_fp)[-1] mapping_input_fp_copy=join(output_dir, copied_mapping) copy_mapping_cmd='cp %s %s' % (mapping_fp,mapping_input_fp_copy) commands.append([('CopyMapping', copy_mapping_cmd)]) # sort the filenames filenames.sort() # determine which file is seq-file and which is barcode-file and associate # to mapping file if len(filenames) == 1: try: # Format of sample_id needs to be seqs_<sample_name>.<sequence_prep_id>.fastq data_access = data_access_factory(ServerConfig.data_access_type) sql = """ select s.sample_name || '.' || sp.sequence_prep_id from sample s inner join sequence_prep sp on s.sample_id = sp.sample_id where s.study_id = {0} and sp.run_prefix = '{1}' """.format(study_id, run_prefix[:-1]) sample_and_prep = data_access.dynamicMetadataSelect(sql).fetchone()[0] input_str = '-i {0} --sample_id {1}'.format(filenames[0], sample_and_prep) except Exception, e: error = 'Failed to obtain sample and sequence prep info for study_id {0} and run_prefix {1}\n'.format(study_id, run_prefix) error += 'SQL was: \n {0} \n'.format(sql) error += 'Original exception was: \n {0}'.format(str(e)) raise Exception(error)
def setUp(self): # create the temporary input files that will be used self.iupac = {'A':'A', 'T':'T', 'G':'G', 'C':'C', 'R':'[AG]', 'Y':'[CT]', 'S':'[GC]', 'W':'[AT]', 'K':'[GT]', 'M':'[AC]', 'B':'[CGT]','D':'[AGT]', 'H':'[ACT]', 'V':'[ACG]', 'N':'[ACGT]'} self.output_dir = get_random_directory_name(prefix = '/tmp/') self.output_dir += '/' create_dir(self.output_dir)
def main(): # parse command line parameters option_parser, opts, args = parse_command_line_parameters(**script_info) # Create local copy of options forward_reads_fp = opts.forward_reads_fp reverse_reads_fp = opts.reverse_reads_fp pe_join_method = opts.pe_join_method output_dir = opts.output_dir # fastq-join only options: perc_max_diff = opts.perc_max_diff # SeqPrep only options: max_ascii_score = opts.max_ascii_score min_frac_match = opts.min_frac_match max_good_mismatch = opts.max_good_mismatch phred_64 = opts.phred_64 # both fastq-join & SeqPrep options min_overlap = opts.min_overlap create_dir(output_dir, fail_on_exist=False) # send parameters to appropriate join method # currently only two join methods exist: # 'fastq-join' and 'SeqPrep' if pe_join_method == "fastq-join": join_func = join_method_names["fastq-join"] paths = join_func( forward_reads_fp, reverse_reads_fp, perc_max_diff=perc_max_diff, min_overlap=min_overlap, working_dir=output_dir, ) if pe_join_method == "SeqPrep": join_func = join_method_names["SeqPrep"] paths = join_func( forward_reads_fp, reverse_reads_fp, max_overlap_ascii_q_score=max_ascii_score, min_overlap=min_overlap, max_mismatch_good_frac=max_good_mismatch, min_frac_matching=min_frac_match, phred_64=phred_64, working_dir=output_dir, ) # If index / barcode file is supplied, filter unused barcode reads # and write them to a new file. Name based on joined-pairs / assembled # outfile if opts.index_reads_fp: index_reads = opts.index_reads_fp assembly_fp = paths["Assembled"] # grab joined-pairs output path write_synced_barcodes_fastq(assembly_fp, index_reads)
def make_per_sample_fasta(input_seqs_fp, mapping_file, output_dir): """ Creates per-sample fasta files from a multiplexed fasta file and a mapping file """ mapping_data, header, comments = parse_mapping_file(mapping_file, suppress_stripping=False) for item in mapping_data: negate = False create_dir(output_dir) seqs_to_keep = item[0] output_file = join(output_dir, seqs_to_keep + '.fna') seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_prefix(open(input_seqs_fp), seqs_to_keep) filter_fasta_fp(input_seqs_fp, output_file, seqs_to_keep_lookup, negate)
def main(): option_parser, options, args = parse_command_line_parameters(**script_info) create_dir(options.output_dir, fail_on_exist=False) master_tree, support_trees = load_tree_files(options.master_tree, options.support_dir) # get support of each node in master new_master, bootstraps = bootstrap_support(master_tree, support_trees) write_bootstrap_support_files(new_master, bootstraps, options.output_dir, len(support_trees))
def main(): """run denoiser on input flowgrams""" option_parser, opts, args = parse_command_line_parameters(**script_info) sff_files = opts.sff_fps for f in sff_files: if (not exists(f)): option_parser.error(('Flowgram file path does not exist:\n %s \n' + 'Pass a valid one via -i.') % f) outdir = opts.output_dir create_dir(outdir, fail_on_exist=not opts.force) log_fh = None if (not (opts.primer or opts.map_fname)): raise ApplicationError("Either mapping file or primer required") # Read primer from Meta data file if not set on command line if not opts.primer: mapping_data, header, comments = \ parse_mapping_file(open(opts.map_fname, "U")) index = header.index("LinkerPrimerSequence") all_primers = set(array(mapping_data)[:, index]) if len(all_primers) != 1: raise ValueError("Currently only data sets with one primer are allowed.\n" + "Make separate mapping files with only one primer, re-run split_libraries and\n" + "denoise with each split_library output separately.") primer = list(all_primers)[0] last_char = primer[-1] if(last_char not in "ACGT"): raise ValueError("We currently do not support primer with " + "degenerate bases at it's 3' end.") else: primer = opts.primer centroids, cluster_mapping = fast_denoiser(opts.sff_fps, opts.fasta_fp, outdir, opts.num_cpus, primer, titanium=opts.titanium) # store mapping file and centroids result_otu_path = '%s/denoised_clusters.txt' % outdir of = open(result_otu_path, 'w') for i, cluster in cluster_mapping.iteritems(): of.write('%s\t%s\n' % (str(i), '\t'.join(cluster))) of.close() result_fasta_path = '%s/denoised_seqs.fasta' % outdir oh = open(result_fasta_path, 'w') write_Fasta_from_name_seq_pairs(centroids, oh)
def test_generate_heatmap_plots(self): """generate_heatmap_plots: create default output files""" # create directories and move js files to verify everything works # in the script file dir_path = join(self.output_dir, 'test') create_dir(dir_path) js_dir_path = join(dir_path, 'js') create_dir(js_dir_path) self._folders_to_cleanup.append(dir_path) qiime_dir = get_qiime_project_dir() js_path = join(qiime_dir, 'qiime/support_files/js') shutil.copyfile(join(js_path, 'overlib.js'), join(js_dir_path, 'overlib.js')) shutil.copyfile(join(js_path, 'otu_count_display.js'), join(js_dir_path, 'otu_count_display.js')) shutil.copyfile(join(js_path, 'jquery.js'), join(js_dir_path, 'jquery.js')) shutil.copyfile(join(js_path, 'jquery.tablednd_0_5.js'), join(js_dir_path, 'jquery.tablednd_0_5.js')) # generate otu_table object orig_data = array([[0, 1, 2], [1000, 0, 0]]) orig_otu_table = table_factory(orig_data, ['Sample1', 'Sample2', 'Sample3'], ['OTU1', 'OTU2'], [None, None, None], [{ "taxonomy": ["Bacteria"] }, { "taxonomy": ["Archaea"] }]) # put in an OTU sort order and sample order otu_sort = ['OTU2', 'OTU1'] sample_sort = ['Sample2', 'Sample1', 'Sample3'] num_otu_hits = 3 # generate test files generate_heatmap_plots(num_otu_hits, orig_otu_table, otu_sort, sample_sort, dir_path, js_dir_path, 'test', fractional_values=False) self.assertEqual( open(join(js_dir_path, 'test.js'), 'U').read(), exp_js_output_file)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) fasta_fp = opts.fasta_fp qual_fp = opts.qual_fp output_dir = opts.output_dir base_pos = int(opts.base_pos) create_dir(output_dir) truncate_fasta_qual(fasta_fp, qual_fp, output_dir, base_pos)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) create_dir(opts.output_dir, fail_on_exist=False) levels = map(int, opts.levels.split(',')) results = generate_taxa_compare_table(opts.root_dir, opts.key_dir, levels) results = format_output(results, opts.separator) for level in levels: with open(join(opts.output_dir, 'compare_table_L' + str(level) + '.txt'), 'w') as f: f.writelines(results[level])
def main(): # parse command line parameters option_parser, opts, args = parse_command_line_parameters(**script_info) # Create local copy of options input_fp = opts.input_fp output_dir = opts.output_dir forward_read_identifier = opts.forward_read_identifier reverse_read_identifier = opts.reverse_read_identifier create_dir(output_dir, fail_on_exist=False) extract_reads_from_interleaved(input_fp, forward_read_identifier, reverse_read_identifier, output_dir)
def main(): # parse command line parameters option_parser, opts, args = parse_command_line_parameters(**script_info) # Create local copy of options forward_reads_fp = opts.forward_reads_fp reverse_reads_fp = opts.reverse_reads_fp pe_join_method = opts.pe_join_method output_dir = opts.output_dir # fastq-join only options: perc_max_diff = opts.perc_max_diff # SeqPrep only options: max_ascii_score = opts.max_ascii_score min_frac_match = opts.min_frac_match max_good_mismatch = opts.max_good_mismatch phred_64 = opts.phred_64 # both fastq-join & SeqPrep options min_overlap = opts.min_overlap create_dir(output_dir, fail_on_exist=False) # send parameters to appropriate join method # currently only two join methods exist: # 'fastq-join' and 'SeqPrep' if pe_join_method == "fastq-join": join_func = join_method_names["fastq-join"] paths = join_func(forward_reads_fp, reverse_reads_fp, perc_max_diff=perc_max_diff, min_overlap=min_overlap, working_dir=output_dir) if pe_join_method == "SeqPrep": join_func = join_method_names["SeqPrep"] paths = join_func(forward_reads_fp, reverse_reads_fp, max_overlap_ascii_q_score=max_ascii_score, min_overlap=min_overlap, max_mismatch_good_frac=max_good_mismatch, min_frac_matching=min_frac_match, phred_64=phred_64, working_dir=output_dir) # If index / barcode file is supplied, filter unused barcode reads # and write them to a new file. Name based on joined-pairs / assembled # outfile if opts.index_reads_fp: index_reads = opts.index_reads_fp assembly_fp = paths['Assembled'] # grab joined-pairs output path write_synced_barcodes_fastq(assembly_fp, index_reads)
def setUp(self): """ Creates variables and tmp filepaths for use in unit testing """ self.sample_fasta_fp = get_tmp_filename(prefix="sample_fasta_", suffix=".fna") seq_file = open(self.sample_fasta_fp, 'w') seq_file.write(sample_fasta_file) seq_file.close() self.sample_fasta_invalid_fp = get_tmp_filename(prefix="sample_fasta_", suffix=".fna") seq_file = open(self.sample_fasta_invalid_fp, 'w') seq_file.write(sample_fasta_file_invalid) seq_file.close() self.sample_mapping_fp = get_tmp_filename(prefix="sample_mapping_", suffix=".txt") map_file = open(self.sample_mapping_fp, "w") map_file.write(sample_mapping_file) map_file.close() self.sample_tree_3tips_fp = get_tmp_filename( prefix="sample_tree3tips_", suffix=".tre") tree_file = open(self.sample_tree_3tips_fp, "w") tree_file.write(sample_tree_file_3tips) tree_file.close() self.sample_tree_5tips_fp = get_tmp_filename( prefix="sample_tree3tips_", suffix=".tre") tree_file = open(self.sample_tree_5tips_fp, "w") tree_file.write(sample_tree_file_5tips) tree_file.close() self.sample_mapping_file_errors_fp =\ get_tmp_filename(prefix="error_mapping_", suffix=".txt") map_file = open(self.sample_mapping_file_errors_fp, "w") map_file.write(sample_mapping_file_errors) map_file.close() self._files_to_remove = [ self.sample_fasta_fp, self.sample_fasta_invalid_fp, self.sample_mapping_fp, self.sample_tree_3tips_fp, self.sample_tree_5tips_fp, self.sample_mapping_file_errors_fp ] self.output_dir =\ get_tmp_filename(prefix="validate_demultiplexed_fasta_", suffix="/") create_dir(self.output_dir)