def test_extract_barcodes_paired_end(self): """ Extracts barcodes from paired-end reads """ fastq1_lines =\ "@HWI-ST830\nAAAATTTTCCCCGGGG\n+\n1234567890ABCDEF".split('\n') fastq2_lines =\ "@HWI-ST830\nGGGGTTTTAAAACCCC\n+\n1234567890ABCDEF".split('\n') extract_barcodes(fastq1 = fastq1_lines, fastq2 = fastq2_lines, input_type = "barcode_paired_end", output_dir = self.output_dir) output_bcs_fp = open(join(self.output_dir, "barcodes.fastq"), "U") actual_bcs = [line for line in output_bcs_fp] expected_bcs =\ ['@HWI-ST830\n', 'AAAATTGGGGTT\n', '+\n', '123456123456\n'] self.assertEqual(actual_bcs, expected_bcs) # reads 1 output output_reads_fp = open(join(self.output_dir, "reads1.fastq"), "U") actual_reads = [line for line in output_reads_fp] expected_reads = ['@HWI-ST830\n', 'TTCCCCGGGG\n', '+\n', '7890ABCDEF\n'] self.assertEqual(actual_reads, expected_reads) # reads 2 output output_reads_fp = open(join(self.output_dir, "reads2.fastq"), "U") actual_reads = [line for line in output_reads_fp] expected_reads = ['@HWI-ST830\n', 'TTAAAACCCC\n', '+\n', '7890ABCDEF\n'] self.assertEqual(actual_reads, expected_reads)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) if opts.attempt_read_reorientation: if not opts.mapping_fp: option_parser.error("To use --attempt_read_reorientation, one must " "supply a mapping file that contains both LinkerPrimerSequence " "and ReversePrimer columns.") if opts.input_type == "barcode_paired_end": if not opts.fastq2: option_parser.error("To use input_type of barcode_paired_end, " "a second fastq file must be specified with --fastq2") if not opts.fastq2: disable_header_match = True else: disable_header_match = opts.disable_header_match fastq1 = qiime_open(opts.fastq1) if opts.fastq2: fastq2 = qiime_open(opts.fastq2) else: fastq2 = None create_dir(opts.output_dir) if opts.mapping_fp: map_fp = qiime_open(opts.mapping_fp) else: map_fp = None extract_barcodes(fastq1, fastq2, opts.output_dir, opts.input_type, opts.bc1_len, opts.bc2_len, opts.rev_comp_bc1, opts.rev_comp_bc2, opts.char_delineator, opts.switch_bc_order, map_fp, opts.attempt_read_reorientation, disable_header_match)
def test_extract_barcodes_from_labels(self): """ Extracts barcodes from fastq labels """ fastq_lines =\ "@HWI-ST830:GTATCT\nAAAATTTTCCCCGGGG\n+\n1234567890ABCDEF".split('\n') extract_barcodes(fastq_lines, input_type="barcode_in_label", output_dir=self.output_dir, disable_header_match=True) output_bcs_fp = open(join(self.output_dir, "barcodes.fastq"), "U") actual_bcs = [line for line in output_bcs_fp] expected_bcs =\ ['@HWI-ST830:GTATCT\n', 'GTATCT\n', '+\n', 'FFFFFF\n'] self.assertEqual(actual_bcs, expected_bcs)
def test_extract_barcodes_from_labels(self): """ Extracts barcodes from fastq labels """ fastq_lines =\ "@HWI-ST830:GTATCT\nAAAATTTTCCCCGGGG\n+\n1234567890ABCDEF".split('\n') extract_barcodes(fastq_lines, input_type="barcode_in_label", output_dir=self.output_dir, disable_header_match=True) output_bcs_fp = open(join(self.output_dir, "barcodes.fastq"), "U") actual_bcs = [line for line in output_bcs_fp] expected_bcs =\ ['@HWI-ST830:GTATCT\n', 'GTATCT\n', '+\n', "''''''\n"] self.assertEqual(actual_bcs, expected_bcs)
def test_extract_barcodes_single_end(self): """ Extracts barcodes from single-end read """ fastq_lines =\ "@HWI-ST830\nAAAATTTTCCCCGGGG\n+\n1234567890ABCDEF".split('\n') extract_barcodes(fastq_lines, output_dir = self.output_dir, disable_header_match = True) output_bcs_fp = open(join(self.output_dir, "barcodes.fastq"), "U") actual_bcs = [line for line in output_bcs_fp] expected_bcs = ['@HWI-ST830\n', 'AAAATT\n', '+\n', '123456\n'] self.assertEqual(actual_bcs, expected_bcs) output_reads_fp = open(join(self.output_dir, "reads.fastq"), "U") actual_reads = [line for line in output_reads_fp] expected_reads = ['@HWI-ST830\n', 'TTCCCCGGGG\n', '+\n', '7890ABCDEF\n'] self.assertEqual(actual_reads, expected_reads)
def test_extract_barcodes_stitched_reads(self): """ Extracts barcodes from ends of a single read """ fastq_lines =\ "@HWI-ST830\nAAAATTTTCCCCGGGG\n+\n1234567890ABCDEF\n".split('\n') extract_barcodes(fastq_lines, input_type="barcode_paired_stitched", output_dir=self.output_dir, disable_header_match=True) output_bcs_fp = open(join(self.output_dir, "barcodes.fastq"), "U") actual_bcs = [line for line in output_bcs_fp] expected_bcs =\ ['@HWI-ST830\n', 'AAAATTCCGGGG\n', '+\n', '123456ABCDEF\n'] self.assertEqual(actual_bcs, expected_bcs) output_reads_fp = open(join(self.output_dir, "reads.fastq"), "U") actual_reads = [line for line in output_reads_fp] expected_reads = ['@HWI-ST830\n', 'TTCC\n', '+\n', '7890\n'] self.assertEqual(actual_reads, expected_reads)
def test_extract_barcodes_paired_end(self): """ Extracts barcodes from paired-end reads """ fastq1_lines =\ "@HWI-ST830\nAAAATTTTCCCCGGGG\n+\n1234567890ABCDEF".split('\n') fastq2_lines =\ "@HWI-ST830\nGGGGTTTTAAAACCCC\n+\n1234567890ABCDEF".split('\n') extract_barcodes(fastq1=fastq1_lines, fastq2=fastq2_lines, input_type="barcode_paired_end", output_dir=self.output_dir) output_bcs_fp = open(join(self.output_dir, "barcodes.fastq"), "U") actual_bcs = [line for line in output_bcs_fp] expected_bcs =\ ['@HWI-ST830\n', 'AAAATTGGGGTT\n', '+\n', '123456123456\n'] self.assertEqual(actual_bcs, expected_bcs) # reads 1 output output_reads_fp = open(join(self.output_dir, "reads1.fastq"), "U") actual_reads = [line for line in output_reads_fp] expected_reads = [ '@HWI-ST830\n', 'TTCCCCGGGG\n', '+\n', '7890ABCDEF\n'] self.assertEqual(actual_reads, expected_reads) # reads 2 output output_reads_fp = open(join(self.output_dir, "reads2.fastq"), "U") actual_reads = [line for line in output_reads_fp] expected_reads = [ '@HWI-ST830\n', 'TTAAAACCCC\n', '+\n', '7890ABCDEF\n'] self.assertEqual(actual_reads, expected_reads)