示例#1
0
    def test_get_matching_files(self):
        """ Properly returns matching reads/barcodes/mapping files """

        all_fastq = [
            'sample1_r1_000.fastq', 'sample2_r1_000.fastq',
            'sample1_bc_000.fastq', 'sample2_bc_000.fastq'
        ]
        all_mapping = ['sample1_mapping_000.txt', 'sample2_mapping_000.txt']
        read_indicator = 'r1'
        barcode_indicator = 'bc'
        mapping_indicator = 'mapping'

        actual_matching_files = get_matching_files(all_fastq, all_mapping,
                                                   read_indicator,
                                                   barcode_indicator,
                                                   mapping_indicator)

        actual_reads = set(actual_matching_files.keys())
        actual_bcs_mapping = set(actual_matching_files.values())

        expected_matching_reads = set(
            ['sample1_r1_000.fastq', 'sample2_r1_000.fastq'])
        expected_matching_bcs_reads = set([
            ('sample1_bc_000.fastq', 'sample1_mapping_000.txt'),
            ('sample2_bc_000.fastq', 'sample2_mapping_000.txt')
        ])

        self.assertEqual(actual_reads, expected_matching_reads)
        self.assertEqual(actual_bcs_mapping, expected_matching_bcs_reads)
示例#2
0
    def test_get_matching_files(self):
        """ Properly returns matching reads/barcodes/mapping files """

        all_fastq = ['sample1_r1_000.fastq', 'sample2_r1_000.fastq',
            'sample1_bc_000.fastq', 'sample2_bc_000.fastq']
        all_mapping = ['sample1_mapping_000.txt', 'sample2_mapping_000.txt']
        read_indicator = 'r1'
        barcode_indicator = 'bc'
        mapping_indicator = 'mapping'

        actual_matching_files = get_matching_files(all_fastq, all_mapping,
            read_indicator, barcode_indicator, mapping_indicator)

        actual_reads = set(actual_matching_files.keys())
        actual_bcs_mapping = set(actual_matching_files.values())

        expected_matching_reads = set(['sample1_r1_000.fastq',
            'sample2_r1_000.fastq'])
        expected_matching_bcs_reads = set([('sample1_bc_000.fastq',
            'sample1_mapping_000.txt'),
            ('sample2_bc_000.fastq', 'sample2_mapping_000.txt')])

        self.assertEqual(actual_reads, expected_matching_reads)
        self.assertEqual(actual_bcs_mapping, expected_matching_bcs_reads)
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(suppress_verbose=True, **script_info)

    input_dir = opts.input_dir
    demultiplexing_method = opts.demultiplexing_method
    parameter_fp = opts.parameter_fp
    read_indicator = opts.read_indicator
    barcode_indicator = opts.barcode_indicator
    mapping_indicator = opts.mapping_indicator
    mapping_extensions = opts.mapping_extensions.split(',')
    sampleid_indicator = opts.sampleid_indicator
    leading_text = opts.leading_text
    trailing_text = opts.trailing_text
    include_input_dir_path = opts.include_input_dir_path
    output_dir = abspath(opts.output_dir)
    remove_filepath_in_name = opts.remove_filepath_in_name
    print_only = opts.print_only

    if remove_filepath_in_name and not include_input_dir_path:
        option_parser.error("If --remove_filepath_in_name enabled, "
                            "--include_input_dir_path must be enabled.")

    if opts.parameter_fp:
        with open(opts.parameter_fp, 'U') as parameter_f:
            params_dict = parse_qiime_parameters(parameter_f)
        params_str = get_params_str(params_dict['split_libraries_fastq'])
    else:
        params_dict = {}
        params_str = ""

    create_dir(output_dir)

    all_fastq = []
    all_mapping = []

    extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']

    for root, dir, fps in walk(input_dir):
        for fp in fps:
            for extension in extensions:
                if fp.endswith(extension):
                    all_fastq += [abspath(join(root, fp))]

    if demultiplexing_method == 'mapping_barcode_files':
        for root, dir, fps in walk(input_dir):
            for fp in fps:
                for mapping_extension in mapping_extensions:
                    if fp.endswith(mapping_extension):
                        all_mapping += [abspath(join(root, fp))]

        all_files = get_matching_files(all_fastq, all_mapping, read_indicator,
                                       barcode_indicator, mapping_indicator)
    else:
        all_files = all_fastq

    commands = create_commands_slf(all_files, demultiplexing_method,
                                   output_dir, params_str, leading_text,
                                   trailing_text, include_input_dir_path,
                                   remove_filepath_in_name, sampleid_indicator)

    qiime_config = load_qiime_config()
    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params_dict,
                            qiime_config=qiime_config)
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback=no_status_updates,
                    logger=logger,
                    close_logger_on_success=True)
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(suppress_verbose=True, **script_info)

    input_dir = opts.input_dir
    demultiplexing_method = opts.demultiplexing_method
    parameter_fp = opts.parameter_fp
    read_indicator = opts.read_indicator
    barcode_indicator = opts.barcode_indicator
    mapping_indicator = opts.mapping_indicator
    mapping_extensions = opts.mapping_extensions.split(',')
    sampleid_indicator = opts.sampleid_indicator
    leading_text = opts.leading_text
    trailing_text = opts.trailing_text
    include_input_dir_path = opts.include_input_dir_path
    output_dir = abspath(opts.output_dir)
    remove_filepath_in_name = opts.remove_filepath_in_name
    print_only = opts.print_only

    if remove_filepath_in_name and not include_input_dir_path:
        option_parser.error("If --remove_filepath_in_name enabled, "
            "--include_input_dir_path must be enabled.")

    if opts.parameter_fp:
        with open(opts.parameter_fp, 'U') as parameter_f:
            params_dict = parse_qiime_parameters(parameter_f)
        params_str = get_params_str(params_dict['split_libraries_fastq'])
    else:
        params_dict = {}
        params_str = ""

    create_dir(output_dir)

    all_fastq = []
    all_mapping = []

    extensions = ['.fastq.gz', '.fastq', '.fq.gz', '.fq']

    for root, dir, fps in walk(input_dir):
        for fp in fps:
            for extension in extensions:
                if fp.endswith(extension):
                    all_fastq += [abspath(join(root, fp))]

    if demultiplexing_method == 'mapping_barcode_files':
        for root, dir, fps in walk(input_dir):
            for fp in fps:
                for mapping_extension in mapping_extensions:
                    if fp.endswith(mapping_extension):
                        all_mapping += [abspath(join(root, fp))]

        all_files = get_matching_files(all_fastq, all_mapping,
            read_indicator, barcode_indicator, mapping_indicator)
    else:
        all_files = all_fastq

    commands = create_commands_slf(all_files, demultiplexing_method, output_dir,
        params_str, leading_text, trailing_text, include_input_dir_path,
        remove_filepath_in_name, sampleid_indicator)

    qiime_config = load_qiime_config()
    if print_only:
        command_handler = print_commands
    else:
        command_handler = call_commands_serially
    logger = WorkflowLogger(generate_log_fp(output_dir),
                            params=params_dict,
                            qiime_config=qiime_config)
    # Call the command handler on the list of commands
    command_handler(commands,
                    status_update_callback=no_status_updates,
                    logger=logger,
                    close_logger_on_success=True)