def test_plot_rank_abundance_graphs(self):
        """plot_rank_abundance_graphs works with any number of samples"""

        self.otu_table = otu_table_fake.split("\n")
        self.dir = get_tmp_filename(tmp_dir=self.tmp_dir, prefix="test_plot_rank_abundance", suffix="/")
        create_dir(self.dir)
        self._dirs_to_remove.append(self.dir)
        # test empty sample name
        self.assertRaises(ValueError, plot_rank_abundance_graphs, "", iter(self.otu_table), self.dir)
        # test invalid sample name
        self.assertRaises(ValueError, plot_rank_abundance_graphs, "Invalid_sample_name", iter(self.otu_table), self.dir)

        # test with two samples
        file_type = "pdf"
        plot_rank_abundance_graphs("S3,S5", iter(self.otu_table), self.dir, file_type=file_type)
        tmp_file = abspath(self.dir + "rank_abundance_cols_0_2." + file_type)

        self.assertTrue(exists(tmp_file))
        self.files_to_remove.append(tmp_file)
        # test with all samples
        plot_rank_abundance_graphs("*", iter(self.otu_table), self.dir, file_type=file_type)
        tmp_file = abspath(self.dir + "rank_abundance_cols_0_1_2." + file_type)

        self.files_to_remove.append(tmp_file)
        self.assertTrue(exists(tmp_file))
Exemplo n.º 2
0
    def setUp(self):
        """ """
        self.test_data = get_test_data_fps()
        self.files_to_remove = []
        self.dirs_to_remove = []
        
        # Create example output directory
        tmp_dir = get_qiime_temp_dir()
        self.test_out = get_tmp_filename(tmp_dir=tmp_dir,
                                         prefix='core_qiime_analyses_test_',
                                         suffix='',
                                         result_constructor=str)
        self.dirs_to_remove.append(self.test_out)
        create_dir(self.test_out)
        
        self.qiime_config = load_qiime_config()
        self.params = parse_qiime_parameters(params_f1)

        # suppress stderr during tests (one of the systems calls in the 
        # workflow prints a warning, and we can't suppress that warning with 
        # warnings.filterwarnings) here because it comes from within the code 
        # executed through the system call. Found this trick here:
        # http://stackoverflow.com/questions/9949633/suppressing-print-as-stdout-python
        self.saved_stderr = sys.stderr
        sys.stderr = StringIO()
        
        initiate_timeout(180)
Exemplo n.º 3
0
    def setUp(self):
        """ """
        self.test_data = get_test_data_fps()
        self.files_to_remove = []
        self.dirs_to_remove = []
        
        # Create example output directory
        tmp_dir = get_qiime_temp_dir()
        self.test_out = get_tmp_filename(tmp_dir=tmp_dir,
                                         prefix='core_qiime_analyses_test_',
                                         suffix='',
                                         result_constructor=str)
        self.dirs_to_remove.append(self.test_out)
        create_dir(self.test_out)
        
        self.qiime_config = load_qiime_config()
        self.params = parse_qiime_parameters(params_f1)

        # suppress stderr during tests (one of the systems calls in the 
        # workflow prints a warning, and we can't suppress that warning with 
        # warnings.filterwarnings) here because it comes from within the code 
        # executed through the system call. Found this trick here:
        # http://stackoverflow.com/questions/9949633/suppressing-print-as-stdout-python
        self.saved_stderr = sys.stderr
        sys.stderr = StringIO()
        
        initiate_timeout(180)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    out_dir = opts.output_dir
    create_dir(out_dir)

    if opts.type == 'gradient':
        subset_fn = choose_gradient_subset
    elif opts.type == 'cluster':
        subset_fn = choose_cluster_subsets

    subset_otu_table, subset_map_str = subset_fn(open(opts.otu_table_fp, 'U'),
            open(opts.map_fp, 'U'), opts.category, opts.num_total_samples)

    subset_otu_table_fp = join(out_dir, basename(opts.otu_table_fp))
    subset_otu_table_f = open(subset_otu_table_fp, 'w')
    subset_otu_table.getBiomFormatJsonString('choose_data_subset.py '
                                             '(microbiogeo)',
                                             subset_otu_table_f)
    subset_otu_table_f.close()

    subset_map_fp = join(out_dir, basename(opts.map_fp))
    subset_map_f = open(subset_map_fp, 'w')
    subset_map_f.write(subset_map_str)
    subset_map_f.close()
Exemplo n.º 5
0
def split_otu_table_on_taxonomy_to_files(otu_table_fp,
                                         level,
                                         output_dir,
                                         md_identifier='taxonomy',
                                         md_processor=process_md_as_list):
    """ Split OTU table by taxonomic level, writing otu tables to output dir
    """
    results = []
    otu_table = parse_biom_table(open(otu_table_fp, 'U'))
    create_dir(output_dir)

    def split_f(obs_md):
        try:
            result = md_processor(obs_md, md_identifier, level)
        except KeyError:
            raise KeyError,\
             "Metadata identifier (%s) is not associated with all (or any) observerations. You can modify the key with the md_identifier parameter." % md_identifier
        except TypeError:
            raise TypeError,\
             "Can't correctly process the metadata string. If your input file was generated from QIIME 1.4.0 or earlier you may need to pass --md_as_string."
        except AttributeError:
            raise AttributeError,\
             "Metadata category not found. If your input file was generated from QIIME 1.4.0 or earlier you may need to pass --md_identifier \"Consensus Lineage\"."

        return result

    for bin, sub_otu_table in otu_table.binObservationsByMetadata(split_f):
        output_fp = '%s/otu_table_%s.biom' % (output_dir, bin)
        output_f = open(output_fp, 'w')
        output_f.write(format_biom_table(sub_otu_table))
        output_f.close()
        results.append(output_fp)
    return results
Exemplo n.º 6
0
def main():
    option_parser, opts, args =\
     parse_command_line_parameters(**script_info)

    mapping_fp = opts.mapping_fp
    fasta_dir = opts.fasta_dir
    output_dir = opts.output_dir
    count_start = int(opts.count_start)
    filename_column = opts.filename_column

    # Check input filepaths
    try:
        test_mapping_f = open(mapping_fp, "U")
    except IOError:
        raise IOError,("Cannot open mapping filepath "+\
         "%s, please check filepath and permissions." % mapping_fp)

    if not isdir(fasta_dir):
        raise IOError, ("Specified fasta dir " +
                        "%s, does not exist" % fasta_dir)

    # Create output directory, check path/access to mapping file
    create_dir(output_dir)

    add_qiime_labels(open(mapping_fp, "U"), fasta_dir, filename_column,
                     output_dir, count_start)
Exemplo n.º 7
0
    def test_create_dir(self):
        """create_dir creates dir and fails meaningful."""

        tmp_dir_path = get_random_directory_name()
        tmp_dir_path2 = get_random_directory_name(suppress_mkdir=True)
        tmp_dir_path3 = get_random_directory_name(suppress_mkdir=True)

        self.dirs_to_remove.append(tmp_dir_path)
        self.dirs_to_remove.append(tmp_dir_path2)
        self.dirs_to_remove.append(tmp_dir_path3)

        # create on existing dir raises OSError if fail_on_exist=True
        self.assertRaises(OSError,
                          create_dir,
                          tmp_dir_path,
                          fail_on_exist=True)
        self.assertEquals(
            create_dir(tmp_dir_path,
                       fail_on_exist=True,
                       handle_errors_externally=True), 1)

        # return should be 1 if dir exist and fail_on_exist=False
        self.assertEqual(create_dir(tmp_dir_path, fail_on_exist=False), 1)

        # if dir not there make it and return always 0
        self.assertEqual(create_dir(tmp_dir_path2), 0)
        self.assertEqual(create_dir(tmp_dir_path3, fail_on_exist=True), 0)
Exemplo n.º 8
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    output_dir = opts.output_dir

    if output_dir:
        create_dir(output_dir)
    else:
        if isfile(opts.input_dir):
            # if output_dir is empty after the split, then a relative path was
            # passed, and the input file is in the current directory
            output_dir = split(opts.input_dir)[0] or '.'

        else:  # opts.input_dir is a directory
            output_dir = opts.input_dir

    if opts.no_trim and not opts.use_sfftools:
        raise ValueError(
            "When using the --no_trim option you must have the sfftools installed and must also pass the --use_sfftools option")

    prep_sffs_in_dir(
        opts.input_dir,
        output_dir,
        make_flowgram=opts.make_flowgram,
        convert_to_flx=opts.convert_to_FLX,
        use_sfftools=opts.use_sfftools,
        no_trim=opts.no_trim)
Exemplo n.º 9
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    fasta_fp = opts.fasta_fp
    mapping_fp = opts.mapping_fp
    output_dir = opts.output_dir
    truncate_option = opts.truncate_option
    primer_mismatches = int(opts.primer_mismatches)

    create_dir(output_dir)

    if truncate_option not in ['truncate_only', 'truncate_remove']:
        raise ValueError('-z option must be either truncate_only or ' +
                         'truncate_remove')

    try:
        fasta_f = open(fasta_fp, "U")
        fasta_f.close()
    except IOError:
        raise IOError("Unable to open fasta file, please check path/" +
                      "permissions.")
    try:
        mapping_f = open(fasta_fp, "U")
        mapping_f.close()
    except IOError:
        raise IOError("Unable to open mapping file, please check path/" +
                      "permissions.")

    truncate_reverse_primer(fasta_fp, mapping_fp, output_dir, truncate_option,
                            primer_mismatches)
    def setUp(self):
        self._files_to_remove = []
        
        self.fasta_file_path = get_tmp_filename(prefix='fastq_', \
        suffix='.fastq')
        
        fastq_file = open(self.fasta_file_path, 'w')
        
        fastq_file.write(fastq_test_string)
        fastq_file.close()
        
        #Error testing files
        false_fasta_file = '/'
        false_qual_file = '/'
        self.read_only_output_dir = get_tmp_filename(prefix = 'read_only_', \
        suffix = '/')
        create_dir(self.read_only_output_dir)
        chmod(self.read_only_output_dir, 0577)

        self.output_dir = get_tmp_filename(prefix = 'convert_fastaqual_fastq_',\
         suffix = '/')
        self.output_dir += sep
        
        create_dir(self.output_dir)
        
        self._files_to_remove.append(self.fasta_file_path)
Exemplo n.º 11
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    output_dir = opts.output_dir
    create_dir(output_dir)

    otu_table_fp = opts.otu_table
    otu_table_fh = open(otu_table_fp, 'U')
    otu_table = parse_biom_table(otu_table_fh)
    otu_table_fh.close()

    tree_fh = open(opts.tree_file, 'U')
    tree = DndParser(tree_fh)
    tree_fh.close()

    mapping_fp = opts.mapping_fp
    if mapping_fp:
        mapping_f = open(mapping_fp, 'U')
        input_map_basename = splitext(split(mapping_fp)[1])[0]
    else:
        mapping_f = None
        input_map_basename = None

    input_table_basename = splitext(split(otu_table_fp)[1])[0]

    simsam_range_to_files(otu_table,
                          tree,
                          simulated_sample_sizes=map(int, opts.num.split(',')),
                          dissimilarities=map(float, opts.dissim.split(',')),
                          output_dir=output_dir,
                          mapping_f=mapping_f,
                          output_table_basename=input_table_basename,
                          output_map_basename=input_map_basename)
def main():
    option_parser, opts, args = \
        parse_command_line_parameters(suppress_verbose=True, **script_info)
        
    input_dir = opts.input_dir
    paired_data = opts.paired_data
    parameter_fp = opts.parameter_fp
    read1_indicator = opts.read1_indicator
    read2_indicator = opts.read2_indicator
    leading_text = opts.leading_text
    trailing_text = opts.trailing_text
    include_input_dir_path = opts.include_input_dir_path
    output_dir = abspath(opts.output_dir)
    remove_filepath_in_name = opts.remove_filepath_in_name
    print_only = opts.print_only
    
    if remove_filepath_in_name and not include_input_dir_path:
        option_parser.error("If --remove_filepath_in_name is enabled, "
            "--include_input_dir_path must also be enabled.")
            
    if opts.parameter_fp:
        with open(opts.parameter_fp, 'U') as parameter_f:
            params_dict = parse_qiime_parameters(parameter_f)
        params_str = get_params_str(params_dict['extract_barcodes'])
    else:
        params_dict = {}
        params_str = ""
    
    create_dir(output_dir)
                
    all_files = []
    extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']
    
    for root, dir, fps in walk(input_dir):
        for fp in fps:
            for extension in extensions:
                if fp.endswith(extension):
                    all_files += [abspath(join(root, fp))]

    if paired_data:
        all_files, bc_pairs = get_pairs(all_files, read1_indicator,
                                        read2_indicator)

    commands = create_commands_eb(all_files, paired_data, output_dir,
        params_str, leading_text, trailing_text, include_input_dir_path,
        remove_filepath_in_name)
        
    qiime_config = load_qiime_config()
    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params_dict,
                            qiime_config=qiime_config)
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback = no_status_updates,
                    logger=logger,
                    close_logger_on_success=True)
Exemplo n.º 13
0
    def test_make_plots(self):
        """make_plots: tests whether the average plots are generated and if
           dictionary for the html generation is properly formatted"""

        filename1='/tmp/test/testSampleIDSample1_ave.png'
        filename2='/tmp/test/testSampleIDSample1_raw.png'
        folder1='/tmp/test/'
        
        self._paths_to_clean_up = [filename1,filename2]
        self._folders_to_cleanup=[folder1]

        exp1={'SampleID': {'Sample1': {'test': {'ave': ['     7.000', '     2.052'], 'err': ['       nan', '     0.000']}}}}
        exp2={'test': {'groups': {'SampleID': {'Sample1': {'groupcolor': '#0000ff', 'raw_link': 'html_plots/testSampleIDSample1_raw.png', 'groupsamples': ['Sample1'], 'ave_link': 'html_plots/testSampleIDSample1_ave.png'}}}, 'samples': {'Sample1': {'color': '#0000ff', 'link': 'html_plots/testSample1.png'}}}}
        
        create_dir('/tmp/test/',False)
        
        obs1,obs2 = make_plots(self.background_color,self.label_color, \
                          self.rare_data,self.ymax, self.xmax,'/tmp/test/', \
                          self.resolution, self.imagetype,self.groups,\
                          self.colors,self.data_colors,self.metric_name,\
                          self.labelname,self.rarefaction_data_mat, \
                          self.rarefaction_legend_mat,self.sample_dict, \
                          self.data_colors,self.colors2)
        
        self.assertEqual(obs1,exp1)
        self.assertEqual(obs2,exp2)
        self.assertTrue(exists(filename1))
        self.assertTrue(exists(filename2))
        self.assertTrue(exists(folder1))
Exemplo n.º 14
0
def main():
    option_parser, opts, args =\
     parse_command_line_parameters(**script_info)
      
    mapping_fp = opts.mapping_fp
    fasta_dir = opts.fasta_dir
    output_dir = opts.output_dir
    count_start = int(opts.count_start)
    filename_column = opts.filename_column

    # Check input filepaths
    try:
        test_mapping_f = open(mapping_fp, "U")
    except IOError:
        raise IOError,("Cannot open mapping filepath "+\
         "%s, please check filepath and permissions." % mapping_fp)
         
    if not isdir(fasta_dir):
        raise IOError,("Specified fasta dir "+
         "%s, does not exist" % fasta_dir)
    
    # Create output directory, check path/access to mapping file
    create_dir(output_dir)
    
    add_qiime_labels(open(mapping_fp, "U"), fasta_dir, filename_column,
     output_dir, count_start)
Exemplo n.º 15
0
def split_otu_table_on_taxonomy_to_files(otu_table_fp,
                                         level,
                                         output_dir,
                                         md_identifier='taxonomy',
                                         md_processor=process_md_as_list):
    """ Split OTU table by taxonomic level, writing otu tables to output dir
    """
    results = []
    otu_table = parse_biom_table(open(otu_table_fp,'U'))
    create_dir(output_dir)
    
    def split_f(obs_md):
        try:
            result = md_processor(obs_md,md_identifier,level)
        except KeyError:
            raise KeyError,\
             "Metadata identifier (%s) is not associated with all (or any) observerations. You can modify the key with the md_identifier parameter." % md_identifier
        except TypeError:
            raise TypeError,\
             "Can't correctly process the metadata string. If your input file was generated from QIIME 1.4.0 or earlier you may need to pass --md_as_string."
        except AttributeError:
            raise AttributeError,\
             "Metadata category not found. If your input file was generated from QIIME 1.4.0 or earlier you may need to pass --md_identifier \"Consensus Lineage\"."
    
        return result
    
    for bin, sub_otu_table in otu_table.binObservationsByMetadata(split_f):
        output_fp = '%s/otu_table_%s.biom' % (output_dir,bin)
        output_f = open(output_fp,'w')
        output_f.write(format_biom_table(sub_otu_table))
        output_f.close()
        results.append(output_fp)
    return results
Exemplo n.º 16
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    otu_table_fp = opts.biom_fp
    map_fp = opts.map_fp
    output_dir = opts.output_dir
    scolors = opts.scolors.split(',')
    ocolors = opts.ocolors.split(',')
    sshapes = opts.sshapes.split(',')
    oshapes = opts.oshapes.split(',')
    ssizes = opts.ssizes.split(',')
    osizes = opts.osizes.split(',')
    md_fields = opts.md_fields.split(',')

    # check that the otu fields asked for are available
    shared_options = ['NodeType', 'Abundance']
    if not all(
        [i in md_fields + shared_options for i in ocolors + oshapes + osizes]):
        option_parser.error('The fields specified for observation colors, '
                            'sizes, or shapes are not in either the shared '
                            'options (NodeType,Abundance) or the supplied '
                            'md_fields. These fields must be a subset of the '
                            'union of these sets. Have you passed ocolors, '
                            'osizes or oshapes that are not in the md_fields?')
    # check that the sample fields asked for are available. mapping file
    # elements should all have same metadata keys
    sopts = parse_mapping_file_to_dict(map_fp)[0].items()[0][1].keys()
    if not all(
        [i in sopts + shared_options for i in scolors + sshapes + ssizes]):
        option_parser.error('The fields specified for sample colors, sizes, '
                            'or shapes are not in either the shared options '
                            '(NodeType,Abundance) or the supplied mapping '
                            'file. These fields must be a subset of the union '
                            'of these sets. Have you passed scolors, ssizes '
                            'or sshapes that are not in the mapping file '
                            'headers?')

    # actual compuation begins
    try:
        create_dir(output_dir, fail_on_exist=True)
    except OSError:
        option_parser.error('Directory already exists. Will not overwrite.')

    bt = load_table(otu_table_fp)
    pmf = parse_mapping_file_to_dict(map_fp)[0]  # [1] is comments, don't need
    sample_node_table = make_sample_node_table(bt, pmf)
    otu_node_table = make_otu_node_table(bt, opts.observation_md_header_key,
                                         md_fields)
    node_attr_table = make_node_attr_table(otu_node_table, sample_node_table,
                                           scolors, ocolors, ssizes, osizes,
                                           sshapes, oshapes)
    edge_table = make_edge_table(bt)

    _write_table(sample_node_table,
                 os.path.join(output_dir, 'SampleNodeTable.txt'))
    _write_table(otu_node_table, os.path.join(output_dir, 'OTUNodeTable.txt'))
    _write_table(node_attr_table, os.path.join(output_dir,
                                               'NodeAttrTable.txt'))
    _write_table(edge_table, os.path.join(output_dir, 'EdgeTable.txt'))
Exemplo n.º 17
0
    def setUp(self):
        # create the temporary input files that will be used

        self.iupac = {
            'A': 'A',
            'T': 'T',
            'G': 'G',
            'C': 'C',
            'R': '[AG]',
            'Y': '[CT]',
            'S': '[GC]',
            'W': '[AT]',
            'K': '[GT]',
            'M': '[AC]',
            'B': '[CGT]',
            'D': '[AGT]',
            'H': '[ACT]',
            'V': '[ACG]',
            'N': '[ACGT]'
        }

        self.output_dir = get_random_directory_name(prefix='/tmp/')
        self.output_dir += '/'

        create_dir(self.output_dir)
Exemplo n.º 18
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    create_dir(opts.output_dir)

    generate_passwords(open(opts.personal_ids_fp, 'U'), opts.results_dir,
                       opts.password_dir, opts.output_dir)
Exemplo n.º 19
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    output_dir = opts.output_dir
    create_dir(output_dir)

    otu_table_fp = opts.otu_table
    otu_table = load_table(otu_table_fp)

    tree_fh = open(opts.tree_file, 'U')
    tree = DndParser(tree_fh)
    tree_fh.close()

    mapping_fp = opts.mapping_fp
    if mapping_fp:
        mapping_f = open(mapping_fp, 'U')
        input_map_basename = splitext(split(mapping_fp)[1])[0]
    else:
        mapping_f = None
        input_map_basename = None

    input_table_basename = splitext(split(otu_table_fp)[1])[0]

    simsam_range_to_files(otu_table,
                          tree,
                          simulated_sample_sizes=map(int, opts.num.split(',')),
                          dissimilarities=map(float, opts.dissim.split(',')),
                          output_dir=output_dir,
                          mapping_f=mapping_f,
                          output_table_basename=input_table_basename,
                          output_map_basename=input_map_basename)
Exemplo n.º 20
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    otu_table_fp = opts.otu_table_fp
    mapping_fp = opts.mapping_fp
    mapping_field = opts.mapping_field
    output_dir = opts.output_dir
    
    otu_table_base_name = splitext(split(otu_table_fp)[1])[0]
    
    mapping_data, headers, comments = parse_mapping_file(open(mapping_fp,'U'))
    try:
        field_index = headers.index(mapping_field)
    except ValueError:
        option_parser.error("Field is not in mapping file (search is case "+\
        "and white-space sensitive). \n\tProvided field: "+\
        "%s. \n\tValid fields: %s" % (mapping_field,' '.join(headers)))
    
    mapping_values = set([e[field_index] for e in mapping_data])
    
    create_dir(output_dir)
    
    for v in mapping_values:
        v_fp_str = v.replace(' ','_')
        otu_table_output_fp = join(output_dir,'%s_%s.txt' % (otu_table_base_name, v_fp_str))
        mapping_output_fp = join(output_dir,'mapping_%s.txt' % v_fp_str)
        filter_otus_and_map(open(mapping_fp,'U'), 
                            open(otu_table_fp,'U'), 
                            open(mapping_output_fp,'w'), 
                            open(otu_table_output_fp,'w'),
                            valid_states_str="%s:%s" % (mapping_field,v),
                            num_seqs_per_otu=1)
Exemplo n.º 21
0
def main():
    option_parser, opts, args = \
        parse_command_line_parameters(suppress_verbose=True, **script_info)
        
    input_dir = opts.input_dir
    parameter_fp = opts.parameter_fp
    read1_indicator = opts.read1_indicator
    read2_indicator = opts.read2_indicator
    match_barcodes = opts.match_barcodes
    barcode_indicator = opts.barcode_indicator
    leading_text = opts.leading_text
    trailing_text = opts.trailing_text
    include_input_dir_path = opts.include_input_dir_path
    output_dir = abspath(opts.output_dir)
    remove_filepath_in_name = opts.remove_filepath_in_name
    print_only = opts.print_only
    
    if remove_filepath_in_name and not include_input_dir_path:
        option_parser.error("If --remove_filepath_in_name is enabled, "
            "--include_input_dir_path must also be enabled.")

    if opts.parameter_fp:
        with open(opts.parameter_fp, 'U') as parameter_f:
            params_dict = parse_qiime_parameters(parameter_f)
        params_str = get_params_str(params_dict['join_paired_ends'])
    else:
        params_dict = {}
        params_str = ""

    create_dir(output_dir)
    
    all_files = []
    extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']
    
    for root, dir, fps in walk(input_dir):
        for fp in fps:
            for extension in extensions:
                if fp.endswith(extension):
                    all_files += [abspath(join(root, fp))]
        
    pairs, bc_pairs = get_pairs(all_files, read1_indicator, read2_indicator,
        match_barcodes, barcode_indicator)

    commands = create_commands_jpe(pairs, output_dir,
        params_str, leading_text, trailing_text, include_input_dir_path,
        remove_filepath_in_name, match_barcodes, bc_pairs)
        
    qiime_config = load_qiime_config()
    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params_dict,
                            qiime_config=qiime_config)
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback=no_status_updates,
                    logger=logger,
                    close_logger_on_success=True)
Exemplo n.º 22
0
def split_otu_table_on_taxonomy_to_files(otu_table_fp, level, output_dir,
                                         md_identifier='taxonomy',
                                         md_processor=process_md_as_list):
    """ Split OTU table by taxonomic level, writing otu tables to output dir
    """
    results = []
    otu_table = load_table(otu_table_fp)
    create_dir(output_dir)

    def split_f(id_, obs_md):
        try:
            result = md_processor(obs_md, md_identifier, level)
        except KeyError:
            raise KeyError("Metadata identifier (%s) is not associated with "
                           "all (or any) observerations. You can modify the "
                           "key with the md_identifier parameter." %
                           md_identifier)
        except TypeError:
            raise TypeError("Can't correctly process the metadata string. If "
                            "your input file was generated from QIIME 1.4.0 or"
                            " earlier you may need to pass --md_as_string.")
        except AttributeError:
            raise AttributeError("Metadata category not found. If your input "
                                 "file was generated from QIIME 1.4.0 or "
                                 "earlier you may need to pass --md_identifier"
                                 " \"Consensus Lineage\".")

        return result

    for bin, sub_otu_table in otu_table.partition(split_f, axis='observation'):
        output_fp = '%s/otu_table_%s.biom' % (output_dir, bin)
        write_biom_table(sub_otu_table, output_fp)

        results.append(output_fp)
    return results
Exemplo n.º 23
0
def copy_support_files(file_path):
    """Copy the support files to a named destination 

    file_path: path where you want the support files to be copied to

    Will raise EmperorSupportFilesError if a problem is found whilst trying to
    copy the files.
    """
    file_path = join(file_path, 'emperor_required_resources')

    if exists(file_path) == False:
        create_dir(file_path, False)

    # shutil.copytree does not provide an easy way to copy the contents of a
    # directory into another existing directory, hence the system call.
    # use double quotes for the paths to escape any invalid chracter(s)/spaces
    cmd = 'cp -R "%s/"* "%s"' % (get_emperor_support_files_dir(),
                                 abspath(file_path))
    cmd_o, cmd_e, cmd_r = qiime_system_call(cmd)

    if cmd_e:
        raise EmperorSupportFilesError, "Error found whilst trying to copy " +\
            "the support files:\n%s\n Could not execute: %s" % (cmd_e, cmd)

    return
Exemplo n.º 24
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    otu_table_fp = opts.otu_table_fp
    mapping_fp = opts.mapping_fp
    mapping_field = opts.mapping_field
    output_dir = opts.output_dir
    # column_rename_ids = opts.column_rename_ids
    # include_repeat_cols = opts.include_repeat_cols

    create_dir(output_dir)

    # split mapping file
    mapping_f = open(mapping_fp, 'U')
    for fp_str, sub_mapping_s in split_mapping_file_on_field(mapping_f, mapping_field):
        mapping_output_fp = join(output_dir, 'mapping_%s.txt' % fp_str)
        open(mapping_output_fp, 'w').write(sub_mapping_s)

    # split otu table
    otu_table_base_name = splitext(split(otu_table_fp)[1])[0]
    mapping_f = open(mapping_fp, 'U')
    otu_table_f = open(otu_table_fp, 'U')
    for fp_str, sub_otu_table_s in split_otu_table_on_sample_metadata(
            otu_table_f,
            mapping_f,
            mapping_field):
        otu_table_output_fp = join(
            output_dir, '%s_%s.biom' %
            (otu_table_base_name, fp_str))
        open(otu_table_output_fp, 'w').write(sub_otu_table_s)
Exemplo n.º 25
0
    def setUp(self):
        self._files_to_remove = []

        self.fasta_file_path = get_tmp_filename(prefix='fastq_', \
        suffix='.fastq')

        fastq_file = open(self.fasta_file_path, 'w')

        fastq_file.write(fastq_test_string)
        fastq_file.close()

        #Error testing files
        false_fasta_file = '/'
        false_qual_file = '/'
        self.read_only_output_dir = get_tmp_filename(prefix = 'read_only_', \
        suffix = '/')
        create_dir(self.read_only_output_dir)
        chmod(self.read_only_output_dir, 0577)

        self.output_dir = get_tmp_filename(prefix = 'convert_fastaqual_fastq_',\
         suffix = '/')
        self.output_dir += sep

        create_dir(self.output_dir)

        self._files_to_remove.append(self.fasta_file_path)
Exemplo n.º 26
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    otu_table_fp = opts.otu_table_fp
    mapping_fp = opts.mapping_fp
    mapping_field = opts.mapping_field
    output_dir = opts.output_dir
    # column_rename_ids = opts.column_rename_ids
    # include_repeat_cols = opts.include_repeat_cols

    create_dir(output_dir)

    # split mapping file
    mapping_f = open(mapping_fp, 'U')
    for fp_str, sub_mapping_s in split_mapping_file_on_field(mapping_f, mapping_field):
        mapping_output_fp = join(output_dir, 'mapping_%s.txt' % fp_str)
        open(mapping_output_fp, 'w').write(sub_mapping_s)

    # split otu table
    otu_table_base_name = splitext(split(otu_table_fp)[1])[0]
    mapping_f = open(mapping_fp, 'U')

    otu_table = load_table(otu_table_fp)

    try:
        for fp_str, sub_otu_table_s in split_otu_table_on_sample_metadata(
                otu_table,
                mapping_f,
                mapping_field):
            otu_table_output_fp = join(output_dir, '%s_%s.biom' % (
                otu_table_base_name, fp_str))

            write_biom_table(sub_otu_table_s, otu_table_output_fp)
    except OTUTableSplitError as e:
        option_parser.error(e)
    def __call__(self,
                 query_fasta_fp,
                 database_fasta_fp,
                 output_dir,
                 observation_metadata_fp=None,
                 params=None,
                 HALT_EXEC=False):

        if params is None:
            params = {}

        """ Call the DatabaseMapper """
        create_dir(output_dir)
        raw_output_fp = self._get_raw_output_fp(output_dir,
                                                params)
        output_observation_map_fp = '%s/observation_map.txt' % output_dir
        output_biom_fp = '%s/observation_table.biom' % output_dir
        log_fp = '%s/observation_table.log' % output_dir

        self._assign_dna_reads_to_database(
            query_fasta_fp=query_fasta_fp,
            database_fasta_fp=database_fasta_fp,
            raw_output_fp=raw_output_fp,
            temp_dir=get_qiime_temp_dir(),
            params=params,
            HALT_EXEC=HALT_EXEC)

        self._process_raw_output(raw_output_fp,
                                 log_fp,
                                 output_observation_map_fp)

        self._generate_biom_output(output_observation_map_fp,
                                   output_biom_fp,
                                   observation_metadata_fp)
Exemplo n.º 28
0
def copy_support_files(file_path):
    """Copy the support files to a named destination 

    file_path: path where you want the support files to be copied to

    Will raise EmperorSupportFilesError if a problem is found whilst trying to
    copy the files.
    """
    file_path = join(file_path, "emperor_required_resources")

    if exists(file_path) == False:
        create_dir(file_path, False)

    # shutil.copytree does not provide an easy way to copy the contents of a
    # directory into another existing directory, hence the system call.
    # use double quotes for the paths to escape any invalid chracter(s)/spaces
    cmd = 'cp -R "%s/"* "%s"' % (get_emperor_support_files_dir(), abspath(file_path))
    cmd_o, cmd_e, cmd_r = qiime_system_call(cmd)

    if cmd_e:
        raise EmperorSupportFilesError, "Error found whilst trying to copy " + "the support files:\n%s\n Could not execute: %s" % (
            cmd_e,
            cmd,
        )

    return
Exemplo n.º 29
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    fasta_fp = opts.fasta_fp
    mapping_fp = opts.mapping_fp
    output_dir = opts.output_dir
    truncate_option = opts.truncate_option
    primer_mismatches = int(opts.primer_mismatches)

    create_dir(output_dir)

    if truncate_option not in ['truncate_only', 'truncate_remove']:
        raise ValueError('-z option must be either truncate_only or ' +
                         'truncate_remove')

    try:
        fasta_f = open(fasta_fp, "U")
        fasta_f.close()
    except IOError:
        raise IOError("Unable to open fasta file, please check path/" +
                      "permissions.")
    try:
        mapping_f = open(fasta_fp, "U")
        mapping_f.close()
    except IOError:
        raise IOError("Unable to open mapping file, please check path/" +
                      "permissions.")

    truncate_reverse_primer(fasta_fp, mapping_fp, output_dir, truncate_option,
                            primer_mismatches)
Exemplo n.º 30
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    output_dir = opts.output_dir

    if output_dir:
        create_dir(output_dir)
    else:
        if isfile(opts.input_dir):
            # if output_dir is empty after the split, then a relative path was
            # passed, and the input file is in the current directory
            output_dir = split(opts.input_dir)[0] or '.'

        else:  # opts.input_dir is a directory
            output_dir = opts.input_dir

    if opts.no_trim and not opts.use_sfftools:
        raise ValueError(
            "When using the --no_trim option you must have the sfftools installed and must also pass the --use_sfftools option"
        )

    prep_sffs_in_dir(opts.input_dir,
                     output_dir,
                     make_flowgram=opts.make_flowgram,
                     convert_to_flx=opts.convert_to_FLX,
                     use_sfftools=opts.use_sfftools,
                     no_trim=opts.no_trim)
Exemplo n.º 31
0
 def test_truncate_fasta_qual(self):
     """ Test for overall module functionality """
     
     base_pos = 80
     output_dir = '/tmp/truncate_fasta_qual_test/'
     
     create_dir(output_dir)
     
     truncate_fasta_qual(self.fasta_fp, self.qual_fp, output_dir, base_pos)
     
     actual_trunc_fasta_fp = output_dir +\
      basename(self.fasta_fp).replace(".fasta", "_filtered.fasta")
      
     actual_trunc_fasta_fp = open(actual_trunc_fasta_fp, "U")
     
     actual_trunc_fasta = [line.strip() for line in actual_trunc_fasta_fp]
     
     self.assertEqual(actual_trunc_fasta, expected_fasta_seqs)
     
     actual_trunc_qual_fp = output_dir +\
      basename(self.qual_fp).replace(".qual", "_filtered.qual")
      
     actual_trunc_qual_fp = open(actual_trunc_qual_fp, "U")
     
     actual_trunc_qual = [line.strip() for line in actual_trunc_qual_fp]
     
     self.assertEqual(actual_trunc_qual, expected_qual_scores)
Exemplo n.º 32
0
    def test_plot_rank_abundance_graphs_dense(self):
        """plot_rank_abundance_graphs works with any number of samples (DenseOTUTable)"""
 
        self.otu_table = parse_biom_table_str(otu_table_dense)
        self.dir = get_tmp_filename(tmp_dir=self.tmp_dir,
                                   prefix="test_plot_rank_abundance",
                                   suffix="/")
        create_dir(self.dir)
        self._dirs_to_remove.append(self.dir)
        #test empty sample name
        self.assertRaises(ValueError, plot_rank_abundance_graphs, '',
                          self.otu_table, self.dir)
        #test invalid sample name
        self.assertRaises(ValueError, plot_rank_abundance_graphs,
                          'Invalid_sample_name',
                          self.otu_table, self.dir)

        #test with two samples
        file_type="pdf"
        plot_rank_abundance_graphs('S3,S5', self.otu_table, self.dir,
                                       file_type=file_type)
        tmp_file = abspath(self.dir+"rank_abundance_cols_0_2."+file_type)

        self.assertTrue(exists(tmp_file)) 
        self.files_to_remove.append(tmp_file)
        # test with all samples
        plot_rank_abundance_graphs('*', self.otu_table, self.dir,
                                       file_type=file_type)
        tmp_file = abspath(self.dir+"rank_abundance_cols_0_1_2."+file_type)
        
        self.files_to_remove.append(tmp_file)
        self.assertTrue(exists(tmp_file)) 
Exemplo n.º 33
0
def main():
    option_parser, opts, args =\
     parse_command_line_parameters(suppress_verbose=True, **script_info)
      
    mapping_fp = opts.mapping_fp
    has_barcodes = not opts.not_barcoded
    variable_len_barcodes = opts.variable_len_barcodes
    output_dir = opts.output_dir + "/"
    char_replace = opts.char_replace
    verbose = opts.verbose
    disable_primer_check = opts.disable_primer_check
    added_demultiplex_field = opts.added_demultiplex_field
        
    # Create output directory, check path/access to mapping file
    create_dir(output_dir)
    
    # Test for valid replacement characters
    valid_replacement_chars = digits + letters + "_" + "."
    if char_replace not in valid_replacement_chars:
        option_parser.error('-c option requires alphanumeric, period, or '+\
        'underscore character.')
    if len(char_replace) != 1:
        option_parser.error('-c parameter must be a single character.')
    
    check_mapping_file(mapping_fp, output_dir, has_barcodes, char_replace,\
     verbose, variable_len_barcodes,
     disable_primer_check, added_demultiplex_field)
Exemplo n.º 34
0
    def test_make_plots(self):
        """make_plots: tests whether the average plots are generated and if
           dictionary for the html generation is properly formatted"""

        filename1='/tmp/test/testcol_0_row_0_ave.png'
        filename2='/tmp/test/testcol_0_row_0_raw.png'
        folder1='/tmp/test/'
        
        self._paths_to_clean_up = [filename1,filename2]
        self._folders_to_cleanup=[folder1]

        exp1={'SampleID': {'Sample1': {'test': {'ave': ['     7.000', '     2.052'], 'err': ['       nan', '     0.000']}}}}
        exp2={'test': {'groups': {'SampleID': {'Sample1': {'groupcolor': '#ff0000', 'raw_link': 'html_plots/testcol_0_row_0_raw.png', 'groupsamples': ['Sample1'], 'ave_link': 'html_plots/testcol_0_row_0_ave.png'}}}, 'samples': {'Sample1': {'color': '#ff0000', 'link': 'html_plots/testcol_0_row_0.png'}}}}
        
        create_dir('/tmp/test/',False)
        
        obs1,obs2 = make_plots(self.background_color,self.label_color, \
                          self.rare_data,self.ymax, self.xmax,'/tmp/test/', \
                          self.resolution, self.imagetype,self.groups,\
                          self.colors,self.data_colors,self.metric_name,\
                          self.labelname,self.rarefaction_data_mat, \
                          self.rarefaction_legend_mat,self.sample_dict, \
                          self.data_colors,self.colors2,self.mapping_lookup)
        
        self.assertEqual(obs1,exp1)
        self.assertEqual(obs2,exp2)
        self.assertTrue(exists(filename1))
        self.assertTrue(exists(filename2))
        self.assertTrue(exists(folder1))
    def test_plot_rank_abundance_graphs_dense(self):
        """plot_rank_abundance_graphs works with any number of samples (DenseOTUTable)"""
 
        self.otu_table = parse_biom_table_str(otu_table_dense)
        self.dir = get_tmp_filename(tmp_dir=self.tmp_dir,
                                   prefix="test_plot_rank_abundance",
                                   suffix="/")
        create_dir(self.dir)
        self._dirs_to_remove.append(self.dir)
        tmp_fname = get_tmp_filename(tmp_dir=self.dir)

        #test empty sample name
        self.assertRaises(ValueError, plot_rank_abundance_graphs, tmp_fname,'',
                          self.otu_table)
        #test invalid sample name
        self.assertRaises(ValueError, plot_rank_abundance_graphs, tmp_fname,
                          'Invalid_sample_name',
                          self.otu_table)

        #test with two samples
        file_type="pdf"
        tmp_file = abspath(self.dir+"rank_abundance_cols_0_2."+file_type)
        plot_rank_abundance_graphs(tmp_file, 'S3,S5', self.otu_table,
                                       file_type=file_type)

        self.assertTrue(exists(tmp_file)) 
        self.files_to_remove.append(tmp_file)
        # test with all samples
        tmp_file = abspath(self.dir+"rank_abundance_cols_0_1_2."+file_type)

        plot_rank_abundance_graphs(tmp_file,'*', self.otu_table,file_type=file_type)
        
        self.files_to_remove.append(tmp_file)
        self.assertTrue(exists(tmp_file)) 
Exemplo n.º 36
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(suppress_verbose=True, **script_info)

    mapping_fp = opts.mapping_fp
    has_barcodes = not opts.not_barcoded
    variable_len_barcodes = opts.variable_len_barcodes
    output_dir = opts.output_dir + "/"
    char_replace = opts.char_replace
    verbose = opts.verbose
    disable_primer_check = opts.disable_primer_check
    added_demultiplex_field = opts.added_demultiplex_field
    suppress_html = opts.suppress_html

    # Create output directory, check path/access to mapping file
    create_dir(output_dir)

    # Test for valid replacement characters
    valid_replacement_chars = digits + letters + "_" + "."
    if char_replace not in valid_replacement_chars:
        option_parser.error('-c option requires alphanumeric, period, or ' +
                            'underscore character.')
    if len(char_replace) != 1:
        option_parser.error('-c parameter must be a single character.')

    check_mapping_file(mapping_fp, output_dir, has_barcodes, char_replace,
                       verbose, variable_len_barcodes, disable_primer_check,
                       added_demultiplex_field, suppress_html)
Exemplo n.º 37
0
    def test_truncate_fasta_qual(self):
        """ Test for overall module functionality """

        base_pos = 80
        output_dir = '/tmp/truncate_fasta_qual_test/'

        create_dir(output_dir)

        truncate_fasta_qual(self.fasta_fp, self.qual_fp, output_dir, base_pos)

        actual_trunc_fasta_fp = output_dir +\
         basename(self.fasta_fp).replace(".fasta", "_filtered.fasta")

        actual_trunc_fasta_fp = open(actual_trunc_fasta_fp, "U")

        actual_trunc_fasta = [line.strip() for line in actual_trunc_fasta_fp]

        self.assertEqual(actual_trunc_fasta, expected_fasta_seqs)

        actual_trunc_qual_fp = output_dir +\
         basename(self.qual_fp).replace(".qual", "_filtered.qual")

        actual_trunc_qual_fp = open(actual_trunc_qual_fp, "U")

        actual_trunc_qual = [line.strip() for line in actual_trunc_qual_fp]

        self.assertEqual(actual_trunc_qual, expected_qual_scores)
Exemplo n.º 38
0
    def __call__(self,
                 query_fasta_fp,
                 database_fasta_fp,
                 output_dir,
                 observation_metadata_fp=None,
                 params=None,
                 HALT_EXEC=False):

        if params is None:
            params = {}
        """ Call the DatabaseMapper """
        create_dir(output_dir)
        raw_output_fp = self._get_raw_output_fp(output_dir, params)
        output_observation_map_fp = '%s/observation_map.txt' % output_dir
        output_biom_fp = '%s/observation_table.biom' % output_dir
        log_fp = '%s/observation_table.log' % output_dir

        self._assign_dna_reads_to_database(query_fasta_fp=query_fasta_fp,
                                           database_fasta_fp=database_fasta_fp,
                                           raw_output_fp=raw_output_fp,
                                           temp_dir=get_qiime_temp_dir(),
                                           params=params,
                                           HALT_EXEC=HALT_EXEC)

        self._process_raw_output(raw_output_fp, log_fp,
                                 output_observation_map_fp)

        self._generate_biom_output(output_observation_map_fp, output_biom_fp,
                                   observation_metadata_fp)
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    create_dir(opts.output_dir, fail_on_exist=False)
           
    post_process(opts.fasta_fp, opts.denoiser_map_file, opts.denoised_fasta_fp,
                 opts.otu_picker_map_file, opts.output_dir)
Exemplo n.º 40
0
    def setUp(self):
        # create the temporary input files that will be used

        self._files_to_remove = []

        self.sample_fasta_file1_data = sample_fasta_file1
        self.sample_fasta_file_bad_labels_data =\
            sample_fasta_file_bad_labels

        self.sample_mapping_file1_data = sample_mapping_file1
        self.sample_mapping_file_no_revprimer_header =\
            sample_mapping_file_no_revprimer_header
        self.sample_mapping_file_bad_revprimer =\
            sample_mapping_file_bad_revprimer
        self.expected_truncation_default_settings =\
            expected_truncation_default_settings
        self.expected_truncation_zero_mismatches =\
            expected_truncation_zero_mismatches
        self.expected_truncation_zero_mismatches_truncate_remove =\
            expected_truncation_zero_mismatches_truncate_remove

        self.fasta_fp = get_tmp_filename(prefix='fasta_seqs_', suffix='.fna')
        seq_file = open(self.fasta_fp, 'w')
        seq_file.write(self.sample_fasta_file1_data)
        seq_file.close()

        self.fasta_badlabels_fp = get_tmp_filename(
            prefix="fasta_seqs_badlabels_", suffix=".fna")
        seq_file = open(self.fasta_badlabels_fp, "w")
        seq_file.write(self.sample_fasta_file_bad_labels_data)
        seq_file.close()

        self.mapping_fp = get_tmp_filename(prefix='sample_mapping_',
                                           suffix='.txt')
        mapping_file = open(self.mapping_fp, "w")
        mapping_file.write(self.sample_mapping_file1_data)
        mapping_file.close()

        self.mapping_bad_header_fp = get_tmp_filename(
            prefix='sample_mapping_badheader_', suffix=".txt")
        mapping_file = open(self.mapping_bad_header_fp, "w")
        mapping_file.write(self.sample_mapping_file_no_revprimer_header)
        mapping_file.close()

        self.mapping_bad_primer_fp = get_tmp_filename(
            prefix='sample_mapping_badprimer_', suffix=".txt")
        mapping_file = open(self.mapping_bad_primer_fp, "w")
        mapping_file.write(self.sample_mapping_file_bad_revprimer)
        mapping_file.close()

        self.output_dir = mkdtemp()
        self.output_dir += '/'

        create_dir(self.output_dir)

        self._files_to_remove =\
            [self.fasta_fp, self.mapping_fp, self.mapping_bad_header_fp,
             self.mapping_bad_primer_fp, self.fasta_badlabels_fp]
Exemplo n.º 41
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    biom_table_fp = opts.biom_table_fp
    mapping_fp = opts.mapping_fp
    fields = opts.fields.split(',')
    output_dir = opts.output_dir
    suppress_mf = opts.suppress_mapping_file_output
    # column_rename_ids = opts.column_rename_ids
    # include_repeat_cols = opts.include_repeat_cols

    bt = load_table(biom_table_fp)
    mdata, mheaders, mcomments = parse_mapping_file(mapping_fp)
    mdata = array(mdata)

    # check that biom file and mapping file have matching sample names. discard
    # those samples that do not appear in both. 
    shared_samples = list(set(mdata[:, 0]).intersection(bt.ids(axis='sample')))
    if len(shared_samples) == 0:
        raise ValueError('Mapping file and biom table share no samples.')
    elif len(shared_samples) == len(mdata[:, 0]):
        mdata = array(mdata)
    else:
        # we want to preserve the order of the samples in the biom table
        ss_bt_order = [s for s in bt.ids(axis='sample') if s in
                       shared_samples]
        bt = bt.filter(ss_bt_order, axis='sample', inplace=True)
        mdata = subset_mapping_data(mdata, shared_samples)
    # check that headers in mapping data
    if not all([i in mheaders for i in fields]):
        raise ValueError('One or more of the specified fields was not found ' +\
                         'in the mapping file.')

    # create output directory and create base names
    create_dir(output_dir)
    mf_base_name = join(output_dir, splitext(split(mapping_fp)[1])[0])
    bt_base_name = join(output_dir, splitext(split(biom_table_fp)[1])[0])

    # run code and append output
    sample_groups, value_groups = make_non_empty_sample_lists(fields, mheaders,
                                                              mdata)

    for sg, vg in zip(sample_groups, value_groups):
        name_base = '__' + '%s_%s_' * len(vg) + '_'
        name_tmp = []
        for f, v in zip(fields, vg):
            name_tmp.extend([f, v])
        nb = name_base % tuple(name_tmp)

        tmp_mf_data = subset_mapping_data(mdata, sg)
        tmp_mf_str = format_mapping_file(mheaders, tmp_mf_data, mcomments)
        write_biom_table(bt.filter(sg, axis='sample', inplace=False),
                         bt_base_name + nb + '.biom')
        
        if not suppress_mf:
            o = open(mf_base_name + nb + '.txt', 'w')
            o.writelines(tmp_mf_str)
            o.close()
Exemplo n.º 42
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    biom_table_fp = opts.biom_table_fp
    mapping_fp = opts.mapping_fp
    fields = opts.fields.split(',')
    output_dir = opts.output_dir
    suppress_mf = opts.suppress_mapping_file_output
    # column_rename_ids = opts.column_rename_ids
    # include_repeat_cols = opts.include_repeat_cols

    bt = load_table(biom_table_fp)
    mdata, mheaders, mcomments = parse_mapping_file(mapping_fp)
    mdata = array(mdata)

    # check that biom file and mapping file have matching sample names. discard
    # those samples that do not appear in both. 
    shared_samples = list(set(mdata[:, 0]).intersection(bt.ids(axis='sample')))
    if len(shared_samples) == 0:
        raise ValueError('Mapping file and biom table share no samples.')
    elif len(shared_samples) == len(mdata[:, 0]):
        mdata = array(mdata)
    else:
        # we want to preserve the order of the samples in the biom table
        ss_bt_order = [s for s in bt.ids(axis='sample') if s in
                       shared_samples]
        bt = bt.filter(ss_bt_order, axis='sample', inplace=True)
        mdata = subset_mapping_data(mdata, shared_samples)
    # check that headers in mapping data
    if not all([i in mheaders for i in fields]):
        raise ValueError('One or more of the specified fields was not found ' +\
                         'in the mapping file.')

    # create output directory and create base names
    create_dir(output_dir)
    mf_base_name = join(output_dir, splitext(split(mapping_fp)[1])[0])
    bt_base_name = join(output_dir, splitext(split(biom_table_fp)[1])[0])

    # run code and append output
    sample_groups, value_groups = make_non_empty_sample_lists(fields, mheaders,
                                                              mdata)

    for sg, vg in zip(sample_groups, value_groups):
        name_base = '__' + '%s_%s_' * len(vg) + '_'
        name_tmp = []
        for f, v in zip(fields, vg):
            name_tmp.extend([f, v])
        nb = name_base % tuple(name_tmp)

        tmp_mf_data = subset_mapping_data(mdata, sg)
        tmp_mf_str = format_mapping_file(mheaders, tmp_mf_data, mcomments)
        write_biom_table(bt.filter(sg, axis='sample', inplace=False),
                         bt_base_name + nb + '.biom')
        
        if not suppress_mf:
            o = open(mf_base_name + nb + '.txt', 'w')
            o.writelines(tmp_mf_str)
            o.close()
Exemplo n.º 43
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    mapping_fp = opts.mapping_fp
    alpha_diversity_fp = opts.alpha_diversity_fp
    categories = opts.categories.split(',')
    depth = opts.depth
    output_dir = opts.output_dir
    correction_method = opts.correction_method
    test_type = opts.test_type
    num_permutations = opts.num_permutations

    if num_permutations < 10:
        option_parser.error('Number of permuations must be greater than or '
                            'equal to 10.')

    create_dir(output_dir)
    for category in categories:
        stat_output_fp = join(output_dir, '%s_stats.txt' % category)
        boxplot_output_fp = join(output_dir, '%s_boxplots.pdf' % category)

        alpha_diversity_f = open(alpha_diversity_fp, 'U')
        mapping_f = open(mapping_fp, 'U')
        ttest_result, alphadiv_avgs = \
         compare_alpha_diversities(alpha_diversity_f,
                                   mapping_f,
                                   category,
                                   depth,
                                   test_type,
                                   num_permutations)
        alpha_diversity_f.close()
        mapping_f.close()

        corrected_result = _correct_compare_alpha_results(
            ttest_result, correction_method)

        # write stats results
        stat_output_f = open(stat_output_fp, 'w')
        header = ('Group1\tGroup2\tGroup1 mean\tGroup1 std\tGroup2 mean\t'
                  'Group2 std\tt stat\tp-value')
        lines = [header]
        for (t0, t1), v in corrected_result.items():
            lines.append('\t'.join(
                map(str, [
                    t0, t1, alphadiv_avgs[t0][0], alphadiv_avgs[t0][1],
                    alphadiv_avgs[t1][0], alphadiv_avgs[t1][1], v[0], v[1]
                ])))
        stat_output_f.write('\n'.join(lines) + '\n')
        stat_output_f.close()

        # write box plots
        alpha_diversity_f = open(alpha_diversity_fp, 'U')
        mapping_f = open(mapping_fp, 'U')
        boxplot = generate_alpha_diversity_boxplots(alpha_diversity_f,
                                                    mapping_f, category, depth)
        alpha_diversity_f.close()
        mapping_f.close()
        boxplot.savefig(boxplot_output_fp)
Exemplo n.º 44
0
def main():
    """run denoiser on input flowgrams"""
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    sff_files = opts.sff_fps

    for f in sff_files:
        if (not exists(f)):
            option_parser.error(('Flowgram file path does not exist:\n %s \n' +
                                 'Pass a valid one via -i.') % f)
    outdir = opts.output_dir

    create_dir(outdir, fail_on_exist=not opts.force)

    log_fh = None

    if (not (opts.primer or opts.map_fname)):
        raise ApplicationError("Either mapping file or primer required")
    # Read primer from Meta data file if not set on command line
    if not opts.primer:
        mapping_data, header, comments = \
            parse_mapping_file(open(opts.map_fname, "U"))

        index = header.index("LinkerPrimerSequence")
        all_primers = set(array(mapping_data)[:, index])

        if len(all_primers) != 1:
            raise ValueError(
                "Currently only data sets with one primer are allowed.\n" +
                "Make separate mapping files with only one primer, re-run split_libraries and\n"
                + "denoise with each split_library output separately.")
        primer = list(all_primers)[0]
        last_char = primer[-1]
        if (last_char not in "ACGT"):
            raise ValueError("We currently do not support primer with " +
                             "degenerate bases at it's 3' end.")

    else:
        primer = opts.primer

    centroids, cluster_mapping = fast_denoiser(opts.sff_fps,
                                               opts.fasta_fp,
                                               outdir,
                                               opts.num_cpus,
                                               primer,
                                               titanium=opts.titanium)

    # store mapping file and centroids
    result_otu_path = '%s/denoised_clusters.txt' % outdir
    of = open(result_otu_path, 'w')
    for i, cluster in cluster_mapping.iteritems():
        of.write('%s\t%s\n' % (str(i), '\t'.join(cluster)))
    of.close()

    result_fasta_path = '%s/denoised_seqs.fasta' % outdir
    oh = open(result_fasta_path, 'w')
    write_Fasta_from_name_seq_pairs(centroids, oh)
Exemplo n.º 45
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    otu_table_fp = opts.otu_table_fp
    mapping_fp = opts.mapping_fp
    tree_fp = opts.tree_fp
    output_dir = opts.out_fp
    output_basename = splitext(split(otu_table_fp)[1])[0]
    
    if not output_dir:
        output_dir = 'make_tep_output/'
    
    create_dir(output_dir)
    
    tep_fp = '%s/%s.tep' % (output_dir,output_basename)      # opts.out_fp+'.tep'
    jnlp_fp = '%s/%s.jnlp' % (output_dir,output_basename)
    tepfile = open(tep_fp, 'w')
    otu_lines = open(otu_table_fp, 'U').readlines()
    sample_ids, otu_ids, otu_table, metadata = parse_otu_table(otu_lines)
    mapping_lines = open(mapping_fp, 'U')    
    tree_lines = open(tree_fp, 'U')
    
    lines = ['>>tre\n']
    lines += tree_lines.readlines() 
    lines += '\n'
    if(metadata):
        lines += '>>otm\n#OTU ID\tOTU Metadata\n'
        for i in range(len(otu_ids)):
            lines += otu_ids[i] + '\t'
            for m in metadata[i]:
                lines += m + ';'
            # lines = lines[:len(lines)-1]
            lines += '\n'
    lines += '>>osm\n'
    lines += otu_lines
    lines += '\n>>sam\n'
    lines += mapping_lines.readlines()
    
    tepfile.writelines(lines)
    
    jnlpfile = open(jnlp_fp, 'w')
    lines = [jnlp_top_block]
    if(opts.web_flag):
        lines += 'http://topiaryexplorer.sourceforge.net/app/'
    else:
        lines += 'file:'+load_qiime_config()['topiaryexplorer_project_dir']
    lines += jnlp_middle_block
    if(opts.url):
        lines += opts.url
    else:
        lines += os.path.abspath(tep_fp)
    # lines += os.path.abspath(tep_fp)
    lines += jnlp_bottom_block
    jnlpfile.writelines(lines)
def run_process_illumina_through_split_lib(study_id,run_prefix,input_fp,
    mapping_fp, output_dir, 
    command_handler, params, qiime_config,
    write_to_all_fasta=False,
    status_update_callback=print_to_stdout):
    """ NOTE: Parts of this function are a directly copied from the
        run_qiime_data_preparation function from the workflow.py library file 
        in QIIME.
    
        The steps performed by this function are:
          1) De-multiplex sequences. (split_libraries_fastq.py)
    
    """

    # Prepare some variables for the later steps
    filenames=input_fp.split(',')
    commands = []
    create_dir(output_dir)
    python_exe_fp = qiime_config['python_exe_fp']
    script_dir = get_qiime_scripts_dir()
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params,
                            qiime_config=qiime_config)
    
    # copy the mapping file
    copied_mapping=split(mapping_fp)[-1]
    mapping_input_fp_copy=join(output_dir, copied_mapping)
    copy_mapping_cmd='cp %s %s' % (mapping_fp,mapping_input_fp_copy)
    commands.append([('CopyMapping', copy_mapping_cmd)])

    # sort the filenames
    filenames.sort()
    
    # determine which file is seq-file and which is barcode-file and associate
    # to mapping file
    if len(filenames) == 1:
        try:
            # Format of sample_id needs to be seqs_<sample_name>.<sequence_prep_id>.fastq
            data_access = data_access_factory(ServerConfig.data_access_type)
            sql = """
            select  s.sample_name || '.' || sp.sequence_prep_id 
            from    sample s 
                    inner join sequence_prep sp 
                    on s.sample_id = sp.sample_id
            where   s.study_id = {0}
                    and sp.run_prefix = '{1}'
            """.format(study_id, run_prefix[:-1])
            sample_and_prep = data_access.dynamicMetadataSelect(sql).fetchone()[0]
            input_str = '-i {0} --sample_id {1}'.format(filenames[0], sample_and_prep)
        except Exception, e:
            error = 'Failed to obtain sample and sequence prep info for study_id {0} and run_prefix {1}\n'.format(study_id, run_prefix)
            error += 'SQL was: \n {0} \n'.format(sql)
            error += 'Original exception was: \n {0}'.format(str(e))
            raise Exception(error)
Exemplo n.º 47
0
    def setUp(self):
        # create the temporary input files that will be used
        
        self.iupac = {'A':'A', 'T':'T', 'G':'G', 'C':'C', 'R':'[AG]',
            'Y':'[CT]', 'S':'[GC]', 'W':'[AT]', 'K':'[GT]', 'M':'[AC]',
            'B':'[CGT]','D':'[AGT]', 'H':'[ACT]', 'V':'[ACG]', 'N':'[ACGT]'}
 
        self.output_dir = get_random_directory_name(prefix = '/tmp/')
        self.output_dir += '/'
        
        create_dir(self.output_dir)
Exemplo n.º 48
0
def main():
    # parse command line parameters
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create local copy of options
    forward_reads_fp = opts.forward_reads_fp
    reverse_reads_fp = opts.reverse_reads_fp
    pe_join_method = opts.pe_join_method
    output_dir = opts.output_dir
    # fastq-join only options:
    perc_max_diff = opts.perc_max_diff
    # SeqPrep only options:
    max_ascii_score = opts.max_ascii_score
    min_frac_match = opts.min_frac_match
    max_good_mismatch = opts.max_good_mismatch
    phred_64 = opts.phred_64
    # both fastq-join & SeqPrep options
    min_overlap = opts.min_overlap

    create_dir(output_dir, fail_on_exist=False)

    # send parameters to appropriate join method
    # currently only two join methods exist:
    # 'fastq-join' and 'SeqPrep'
    if pe_join_method == "fastq-join":
        join_func = join_method_names["fastq-join"]
        paths = join_func(
            forward_reads_fp,
            reverse_reads_fp,
            perc_max_diff=perc_max_diff,
            min_overlap=min_overlap,
            working_dir=output_dir,
        )

    if pe_join_method == "SeqPrep":
        join_func = join_method_names["SeqPrep"]
        paths = join_func(
            forward_reads_fp,
            reverse_reads_fp,
            max_overlap_ascii_q_score=max_ascii_score,
            min_overlap=min_overlap,
            max_mismatch_good_frac=max_good_mismatch,
            min_frac_matching=min_frac_match,
            phred_64=phred_64,
            working_dir=output_dir,
        )

    # If index / barcode file is supplied, filter unused barcode reads
    # and write them to a new file. Name based on joined-pairs / assembled
    # outfile
    if opts.index_reads_fp:
        index_reads = opts.index_reads_fp
        assembly_fp = paths["Assembled"]  # grab joined-pairs output path
        write_synced_barcodes_fastq(assembly_fp, index_reads)
def make_per_sample_fasta(input_seqs_fp, mapping_file, output_dir):
    """ Creates per-sample fasta files from a multiplexed fasta file and a mapping file """
    mapping_data, header, comments = parse_mapping_file(mapping_file, suppress_stripping=False)

    for item in mapping_data:
        negate = False
        create_dir(output_dir)
        seqs_to_keep = item[0]
        output_file = join(output_dir, seqs_to_keep + '.fna')
        seqs_to_keep_lookup = get_seqs_to_keep_lookup_from_prefix(open(input_seqs_fp), seqs_to_keep)
        filter_fasta_fp(input_seqs_fp, output_file, seqs_to_keep_lookup, negate)
Exemplo n.º 50
0
def main():
    option_parser, options, args = parse_command_line_parameters(**script_info)

    create_dir(options.output_dir, fail_on_exist=False)

    master_tree, support_trees = load_tree_files(options.master_tree,
        options.support_dir)
    # get support of each node in master
    new_master, bootstraps = bootstrap_support(master_tree, support_trees)

    write_bootstrap_support_files(new_master, bootstraps, options.output_dir,
    len(support_trees))
Exemplo n.º 51
0
def main():
    """run denoiser on input flowgrams"""
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    sff_files = opts.sff_fps

    for f in sff_files:
        if (not exists(f)):
            option_parser.error(('Flowgram file path does not exist:\n %s \n' +
                                 'Pass a valid one via -i.') % f)
    outdir = opts.output_dir

    create_dir(outdir, fail_on_exist=not opts.force)

    log_fh = None

    if (not (opts.primer or opts.map_fname)):
        raise ApplicationError("Either mapping file or primer required")
    # Read primer from Meta data file if not set on command line
    if not opts.primer:
        mapping_data, header, comments = \
            parse_mapping_file(open(opts.map_fname, "U"))

        index = header.index("LinkerPrimerSequence")
        all_primers = set(array(mapping_data)[:, index])

        if len(all_primers) != 1:
            raise ValueError("Currently only data sets with one primer are allowed.\n" +
                             "Make separate mapping files with only one primer, re-run split_libraries and\n"
                             + "denoise with each split_library output separately.")
        primer = list(all_primers)[0]
        last_char = primer[-1]
        if(last_char not in "ACGT"):
            raise ValueError("We currently do not support primer with " +
                             "degenerate bases at it's 3' end.")

    else:
        primer = opts.primer

    centroids, cluster_mapping = fast_denoiser(opts.sff_fps, opts.fasta_fp,
                                               outdir, opts.num_cpus, primer,
                                               titanium=opts.titanium)

    # store mapping file and centroids
    result_otu_path = '%s/denoised_clusters.txt' % outdir
    of = open(result_otu_path, 'w')
    for i, cluster in cluster_mapping.iteritems():
        of.write('%s\t%s\n' % (str(i), '\t'.join(cluster)))
    of.close()

    result_fasta_path = '%s/denoised_seqs.fasta' % outdir
    oh = open(result_fasta_path, 'w')
    write_Fasta_from_name_seq_pairs(centroids, oh)
Exemplo n.º 52
0
def main():
    option_parser, options, args = parse_command_line_parameters(**script_info)

    create_dir(options.output_dir, fail_on_exist=False)

    master_tree, support_trees = load_tree_files(options.master_tree,
                                                 options.support_dir)
    # get support of each node in master
    new_master, bootstraps = bootstrap_support(master_tree, support_trees)

    write_bootstrap_support_files(new_master, bootstraps, options.output_dir,
                                  len(support_trees))
Exemplo n.º 53
0
    def test_generate_heatmap_plots(self):
        """generate_heatmap_plots: create default output files"""

        # create directories and move js files to verify everything works
        # in the script file
        dir_path = join(self.output_dir, 'test')
        create_dir(dir_path)

        js_dir_path = join(dir_path, 'js')
        create_dir(js_dir_path)

        self._folders_to_cleanup.append(dir_path)

        qiime_dir = get_qiime_project_dir()

        js_path = join(qiime_dir, 'qiime/support_files/js')
        shutil.copyfile(join(js_path, 'overlib.js'),
                        join(js_dir_path, 'overlib.js'))
        shutil.copyfile(join(js_path, 'otu_count_display.js'),
                        join(js_dir_path, 'otu_count_display.js'))
        shutil.copyfile(join(js_path, 'jquery.js'),
                        join(js_dir_path, 'jquery.js'))
        shutil.copyfile(join(js_path, 'jquery.tablednd_0_5.js'),
                        join(js_dir_path, 'jquery.tablednd_0_5.js'))

        # generate otu_table object
        orig_data = array([[0, 1, 2], [1000, 0, 0]])

        orig_otu_table = table_factory(orig_data,
                                       ['Sample1', 'Sample2', 'Sample3'],
                                       ['OTU1', 'OTU2'], [None, None, None], [{
                                           "taxonomy": ["Bacteria"]
                                       }, {
                                           "taxonomy": ["Archaea"]
                                       }])

        # put in an OTU sort order and sample order
        otu_sort = ['OTU2', 'OTU1']
        sample_sort = ['Sample2', 'Sample1', 'Sample3']
        num_otu_hits = 3

        # generate test files
        generate_heatmap_plots(num_otu_hits,
                               orig_otu_table,
                               otu_sort,
                               sample_sort,
                               dir_path,
                               js_dir_path,
                               'test',
                               fractional_values=False)

        self.assertEqual(
            open(join(js_dir_path, 'test.js'), 'U').read(), exp_js_output_file)
Exemplo n.º 54
0
def main():
    option_parser, opts, args =\
     parse_command_line_parameters(**script_info)
      
    fasta_fp = opts.fasta_fp
    qual_fp = opts.qual_fp
    output_dir = opts.output_dir
    base_pos = int(opts.base_pos)
    
    create_dir(output_dir)
    
    truncate_fasta_qual(fasta_fp, qual_fp, output_dir, base_pos)
def main():
    option_parser, opts, args =\
     parse_command_line_parameters(**script_info)

    fasta_fp = opts.fasta_fp
    qual_fp = opts.qual_fp
    output_dir = opts.output_dir
    base_pos = int(opts.base_pos)

    create_dir(output_dir)

    truncate_fasta_qual(fasta_fp, qual_fp, output_dir, base_pos)
Exemplo n.º 56
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    create_dir(opts.output_dir, fail_on_exist=False)

    levels = map(int, opts.levels.split(','))

    results = generate_taxa_compare_table(opts.root_dir, opts.key_dir, levels)
    results = format_output(results, opts.separator)

    for level in levels:
        with open(join(opts.output_dir, 'compare_table_L' + str(level) + '.txt'), 'w') as f:
            f.writelines(results[level])
def main():
    # parse command line parameters
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create local copy of options
    input_fp = opts.input_fp
    output_dir = opts.output_dir
    forward_read_identifier = opts.forward_read_identifier
    reverse_read_identifier = opts.reverse_read_identifier

    create_dir(output_dir, fail_on_exist=False)

    extract_reads_from_interleaved(input_fp, forward_read_identifier,
                                   reverse_read_identifier, output_dir)
Exemplo n.º 58
0
def main():
    # parse command line parameters
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    # Create local copy of options
    forward_reads_fp = opts.forward_reads_fp
    reverse_reads_fp = opts.reverse_reads_fp
    pe_join_method = opts.pe_join_method
    output_dir = opts.output_dir
    # fastq-join only options:
    perc_max_diff = opts.perc_max_diff
    # SeqPrep only options:
    max_ascii_score = opts.max_ascii_score
    min_frac_match = opts.min_frac_match
    max_good_mismatch = opts.max_good_mismatch
    phred_64 = opts.phred_64
    # both fastq-join & SeqPrep options
    min_overlap = opts.min_overlap

    create_dir(output_dir, fail_on_exist=False)

    # send parameters to appropriate join method
    # currently only two join methods exist:
    # 'fastq-join' and 'SeqPrep'
    if pe_join_method == "fastq-join":
        join_func = join_method_names["fastq-join"]
        paths = join_func(forward_reads_fp,
                          reverse_reads_fp,
                          perc_max_diff=perc_max_diff,
                          min_overlap=min_overlap,
                          working_dir=output_dir)

    if pe_join_method == "SeqPrep":
        join_func = join_method_names["SeqPrep"]
        paths = join_func(forward_reads_fp,
                          reverse_reads_fp,
                          max_overlap_ascii_q_score=max_ascii_score,
                          min_overlap=min_overlap,
                          max_mismatch_good_frac=max_good_mismatch,
                          min_frac_matching=min_frac_match,
                          phred_64=phred_64,
                          working_dir=output_dir)

    # If index / barcode file is supplied, filter unused barcode reads
    # and write them to a new file. Name based on joined-pairs / assembled
    # outfile
    if opts.index_reads_fp:
        index_reads = opts.index_reads_fp
        assembly_fp = paths['Assembled']  # grab joined-pairs output path
        write_synced_barcodes_fastq(assembly_fp, index_reads)
Exemplo n.º 59
0
    def setUp(self):
        """ Creates variables and tmp filepaths for use in unit testing """

        self.sample_fasta_fp = get_tmp_filename(prefix="sample_fasta_",
                                                suffix=".fna")
        seq_file = open(self.sample_fasta_fp, 'w')
        seq_file.write(sample_fasta_file)
        seq_file.close()

        self.sample_fasta_invalid_fp = get_tmp_filename(prefix="sample_fasta_",
                                                        suffix=".fna")
        seq_file = open(self.sample_fasta_invalid_fp, 'w')
        seq_file.write(sample_fasta_file_invalid)
        seq_file.close()

        self.sample_mapping_fp = get_tmp_filename(prefix="sample_mapping_",
                                                  suffix=".txt")
        map_file = open(self.sample_mapping_fp, "w")
        map_file.write(sample_mapping_file)
        map_file.close()

        self.sample_tree_3tips_fp = get_tmp_filename(
            prefix="sample_tree3tips_", suffix=".tre")
        tree_file = open(self.sample_tree_3tips_fp, "w")
        tree_file.write(sample_tree_file_3tips)
        tree_file.close()

        self.sample_tree_5tips_fp = get_tmp_filename(
            prefix="sample_tree3tips_", suffix=".tre")
        tree_file = open(self.sample_tree_5tips_fp, "w")
        tree_file.write(sample_tree_file_5tips)
        tree_file.close()

        self.sample_mapping_file_errors_fp =\
            get_tmp_filename(prefix="error_mapping_", suffix=".txt")
        map_file = open(self.sample_mapping_file_errors_fp, "w")
        map_file.write(sample_mapping_file_errors)
        map_file.close()

        self._files_to_remove = [
            self.sample_fasta_fp, self.sample_fasta_invalid_fp,
            self.sample_mapping_fp, self.sample_tree_3tips_fp,
            self.sample_tree_5tips_fp, self.sample_mapping_file_errors_fp
        ]

        self.output_dir =\
            get_tmp_filename(prefix="validate_demultiplexed_fasta_",
                             suffix="/")
        create_dir(self.output_dir)