def test_process_barcode_paired_end_data(self): """ Handles paired fastq lines, parses barcodes """ fastq1_data = ["HWI-ST830", "AAAATTTTCCCCGGGG", "1234567890ABCDEF"] fastq2_data = ["HWI-ST830", "TCCCCGGGG", "ABCDEFGHI"] reads1_out = FakeOutFile() reads2_out = FakeOutFile() bcs_out = FakeOutFile() process_barcode_paired_end_data(fastq1_data, fastq2_data, bcs_out, reads1_out, reads2_out, bc1_len=5, bc2_len=3, rev_comp_bc1=True, rev_comp_bc2=True) actual_bcs = bcs_out.data.split('\n') expected_bcs = ['@HWI-ST830', 'ATTTTGGA', '+', '54321CBA', ''] self.assertEqual(actual_bcs, expected_bcs) actual_reads = reads1_out.data.split('\n') expected_reads = ['@HWI-ST830', 'TTTCCCCGGGG', '+', '67890ABCDEF', ''] self.assertEqual(actual_reads, expected_reads) actual_reads = reads2_out.data.split('\n') expected_reads = ['@HWI-ST830', 'CCGGGG', '+', 'DEFGHI', ''] self.assertEqual(actual_reads, expected_reads)
def test_process_barcode_paired_end_data(self): """ Handles paired fastq lines, parses barcodes """ fastq1_data = ["HWI-ST830", "AAAATTTTCCCCGGGG", np.arange(3, 19, dtype=np.int8)] fastq2_data = ["HWI-ST830", "TCCCCGGGG", np.arange(3, 12, dtype=np.int8)] reads1_out = FakeOutFile() reads2_out = FakeOutFile() bcs_out = FakeOutFile() process_barcode_paired_end_data(fastq1_data, fastq2_data, bcs_out, reads1_out, reads2_out, bc1_len=5, bc2_len=3, rev_comp_bc1=True, rev_comp_bc2=True) actual_bcs = bcs_out.data.split('\n') expected_bcs = ['@HWI-ST830', 'ATTTTGGA', '+', "('&%$&%$", ''] self.assertEqual(actual_bcs, expected_bcs) actual_reads = reads1_out.data.split('\n') expected_reads = ['@HWI-ST830', 'TTTCCCCGGGG', '+', ')*+,-./0123', ''] self.assertEqual(actual_reads, expected_reads) actual_reads = reads2_out.data.split('\n') expected_reads = ['@HWI-ST830', 'CCGGGG', '+', "'()*+,", ''] self.assertEqual(actual_reads, expected_reads)
def test_process_barcode_paired_end_data(self): """ Handles paired fastq lines, parses barcodes """ fastq1_data = ["HWI-ST830", "AAAATTTTCCCCGGGG", "1234567890ABCDEF"] fastq2_data = ["HWI-ST830", "TCCCCGGGG", "ABCDEFGHI"] reads1_out = FakeOutFile() reads2_out = FakeOutFile() bcs_out = FakeOutFile() process_barcode_paired_end_data(fastq1_data, fastq2_data, bcs_out, reads1_out, reads2_out, bc1_len=5, bc2_len=3, rev_comp_bc1=True, rev_comp_bc2=True) actual_bcs = bcs_out.data.split('\n') expected_bcs = ['@HWI-ST830', 'ATTTTGGA', '+', '54321CBA', ''] self.assertEqual(actual_bcs, expected_bcs) actual_reads = reads1_out.data.split('\n') expected_reads = ['@HWI-ST830', 'TTTCCCCGGGG', '+', '67890ABCDEF', ''] self.assertEqual(actual_reads, expected_reads) actual_reads = reads2_out.data.split('\n') expected_reads = ['@HWI-ST830', 'CCGGGG', '+', 'DEFGHI', ''] self.assertEqual(actual_reads, expected_reads)
def test_process_barcode_paired_end_data_orientation_reverse_in_read1( self): """ Handles paired fastq lines, parses barcodes, orients reads """ fastq1_data = ["HWI-ST830", "ATCGATCGATCGATCGATCG", "1234567890ABCDEFGHIJ"] fastq2_data = ["HWI-ST830", "GGTTCCAA", "ABCDEFGH"] reads1_out = FakeOutFile() reads2_out = FakeOutFile() bcs_out = FakeOutFile() forward_primers = [compile(''.join([self.iupac[symbol] for symbol in 'TTTTT']))] reverse_primers = [compile(''.join([self.iupac[symbol] for symbol in 'CGATCGA']))] output_bc_not_oriented = FakeOutFile() fastq1_out_not_oriented = FakeOutFile() fastq2_out_not_oriented = FakeOutFile() # With a forward primer match in read 2, should reverse read order process_barcode_paired_end_data(fastq1_data, fastq2_data, bcs_out, reads1_out, reads2_out, bc1_len=5, bc2_len=3, rev_comp_bc1=False, rev_comp_bc2=False, attempt_read_orientation=True, forward_primers=forward_primers, reverse_primers=reverse_primers, output_bc_not_oriented=output_bc_not_oriented, fastq1_out_not_oriented=fastq1_out_not_oriented, fastq2_out_not_oriented=fastq2_out_not_oriented) actual_bcs = bcs_out.data.split('\n') expected_bcs = ['@HWI-ST830', 'GGTTCATC', '+', 'ABCDE123', ''] self.assertEqual(actual_bcs, expected_bcs) actual_reads = reads1_out.data.split('\n') expected_reads = ['@HWI-ST830', 'CAA', '+', 'FGH', ''] self.assertEqual(actual_reads, expected_reads) actual_reads = reads2_out.data.split('\n') expected_reads = ['@HWI-ST830', 'GATCGATCGATCGATCG', '+', '4567890ABCDEFGHIJ', ''] self.assertEqual(actual_reads, expected_reads) actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n') expected_bcs = [''] self.assertEqual(actual_bcs_not_oriented, expected_bcs) actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n') expected_reads = [''] self.assertEqual(actual_reads_not_oriented, expected_reads) actual_reads_not_oriented = fastq2_out_not_oriented.data.split('\n') expected_reads = [''] self.assertEqual(actual_reads_not_oriented, expected_reads)
def test_process_barcode_paired_end_data_orientation_no_match(self): """ Handles paired fastq lines, parses barcodes, orients reads """ fastq1_data = ["HWI-ST830", "ATCGATCGATCGATCGATCG", "1234567890ABCDEFGHIJ"] fastq2_data = ["HWI-ST830", "GGTTCCAA", "ABCDEFGH"] reads1_out = FakeOutFile() reads2_out = FakeOutFile() bcs_out = FakeOutFile() forward_primers = [compile(''.join([self.iupac[symbol] for symbol in 'AYA']))] reverse_primers = [compile(''.join([self.iupac[symbol] for symbol in 'ATA']))] output_bc_not_oriented = FakeOutFile() fastq1_out_not_oriented = FakeOutFile() fastq2_out_not_oriented = FakeOutFile() # With no matches, should write to the not_oriented files, and keep # in the same order of file 1 and file 2 process_barcode_paired_end_data(fastq1_data, fastq2_data, bcs_out, reads1_out, reads2_out, bc1_len=5, bc2_len=3, rev_comp_bc1=False, rev_comp_bc2=False, attempt_read_orientation=True, forward_primers=forward_primers, reverse_primers=reverse_primers, output_bc_not_oriented=output_bc_not_oriented, fastq1_out_not_oriented=fastq1_out_not_oriented, fastq2_out_not_oriented=fastq2_out_not_oriented) actual_bcs = bcs_out.data.split('\n') expected_bcs = [''] self.assertEqual(actual_bcs, expected_bcs) actual_reads = reads1_out.data.split('\n') expected_reads = [''] self.assertEqual(actual_reads, expected_reads) actual_reads = reads2_out.data.split('\n') expected_reads = [''] self.assertEqual(actual_reads, expected_reads) actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n') expected_bcs = ['@HWI-ST830', 'ATCGAGGT', '+', '12345ABC', ''] self.assertEqual(actual_bcs_not_oriented, expected_bcs) actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n') expected_reads = ['@HWI-ST830', 'TCGATCGATCGATCG', '+', '67890ABCDEFGHIJ', ''] self.assertEqual(actual_reads_not_oriented, expected_reads) actual_reads_not_oriented = fastq2_out_not_oriented.data.split('\n') expected_reads = ['@HWI-ST830', 'TCCAA', '+', 'DEFGH', ''] self.assertEqual(actual_reads_not_oriented, expected_reads)
def test_process_barcode_paired_end_data_orientation_reverse_in_read1( self): """ Handles paired fastq lines, parses barcodes, orients reads """ fastq1_data = ["HWI-ST830", "ATCGATCGATCGATCGATCG", np.arange(3, 23, dtype=np.int8)] fastq2_data = ["HWI-ST830", "GGTTCCAA", np.arange(3, 11, dtype=np.int8)] reads1_out = FakeOutFile() reads2_out = FakeOutFile() bcs_out = FakeOutFile() forward_primers = [compile(''.join([self.iupac[symbol] for symbol in 'TTTTT']))] reverse_primers = [compile(''.join([self.iupac[symbol] for symbol in 'CGATCGA']))] output_bc_not_oriented = FakeOutFile() fastq1_out_not_oriented = FakeOutFile() fastq2_out_not_oriented = FakeOutFile() # With a forward primer match in read 2, should reverse read order process_barcode_paired_end_data(fastq1_data, fastq2_data, bcs_out, reads1_out, reads2_out, bc1_len=5, bc2_len=3, rev_comp_bc1=False, rev_comp_bc2=False, attempt_read_orientation=True, forward_primers=forward_primers, reverse_primers=reverse_primers, output_bc_not_oriented=output_bc_not_oriented, fastq1_out_not_oriented=fastq1_out_not_oriented, fastq2_out_not_oriented=fastq2_out_not_oriented) actual_bcs = bcs_out.data.split('\n') expected_bcs = ['@HWI-ST830', 'GGTTCATC', '+', "$%&'($%&", ''] self.assertEqual(actual_bcs, expected_bcs) actual_reads = reads1_out.data.split('\n') expected_reads = ['@HWI-ST830', 'CAA', '+', ')*+', ''] self.assertEqual(actual_reads, expected_reads) actual_reads = reads2_out.data.split('\n') expected_reads = ['@HWI-ST830', 'GATCGATCGATCGATCG', '+', "'()*+,-./01234567", ''] self.assertEqual(actual_reads, expected_reads) actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n') expected_bcs = [''] self.assertEqual(actual_bcs_not_oriented, expected_bcs) actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n') expected_reads = [''] self.assertEqual(actual_reads_not_oriented, expected_reads) actual_reads_not_oriented = fastq2_out_not_oriented.data.split('\n') expected_reads = [''] self.assertEqual(actual_reads_not_oriented, expected_reads)
def test_process_barcode_paired_end_data_orientation_forward_in_read2( self): """ Handles paired fastq lines, parses barcodes, orients reads """ fastq1_data = ["HWI-ST830", "ATCGATCGATCGATCGATCG", np.arange(3, 23, dtype=np.int8)] fastq2_data = ["HWI-ST830", "GGTTCCAA", np.arange(3, 11, dtype=np.int8)] reads1_out = FakeOutFile() reads2_out = FakeOutFile() bcs_out = FakeOutFile() forward_primers = [compile(''.join([self.iupac[symbol] for symbol in 'TTCCA']))] reverse_primers = [compile(''.join([self.iupac[symbol] for symbol in 'ATA']))] output_bc_not_oriented = FakeOutFile() fastq1_out_not_oriented = FakeOutFile() fastq2_out_not_oriented = FakeOutFile() # With a forward primer match in read 2, should reverse read order process_barcode_paired_end_data(fastq1_data, fastq2_data, bcs_out, reads1_out, reads2_out, bc1_len=5, bc2_len=3, rev_comp_bc1=False, rev_comp_bc2=False, attempt_read_orientation=True, forward_primers=forward_primers, reverse_primers=reverse_primers, output_bc_not_oriented=output_bc_not_oriented, fastq1_out_not_oriented=fastq1_out_not_oriented, fastq2_out_not_oriented=fastq2_out_not_oriented) actual_bcs = bcs_out.data.split('\n') expected_bcs = ['@HWI-ST830', 'GGTTCATC', '+', "$%&'($%&", ''] self.assertEqual(actual_bcs, expected_bcs) actual_reads = reads1_out.data.split('\n') expected_reads = ['@HWI-ST830', 'CAA', '+', ')*+', ''] self.assertEqual(actual_reads, expected_reads) actual_reads = reads2_out.data.split('\n') expected_reads = ['@HWI-ST830', 'GATCGATCGATCGATCG', '+', "'()*+,-./01234567", ''] self.assertEqual(actual_reads, expected_reads) actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n') expected_bcs = [''] self.assertEqual(actual_bcs_not_oriented, expected_bcs) actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n') expected_reads = [''] self.assertEqual(actual_reads_not_oriented, expected_reads) actual_reads_not_oriented = fastq2_out_not_oriented.data.split('\n') expected_reads = [''] self.assertEqual(actual_reads_not_oriented, expected_reads)
def test_process_barcode_paired_end_data_orientation_rev_in_read2(self): """ Handles paired fastq lines, parses barcodes, orients reads """ fastq1_data = [ "HWI-ST830", "ATCGATCGATCGATCGATCG", "1234567890ABCDEFGHIJ" ] fastq2_data = ["HWI-ST830", "GGTTCCAA", "ABCDEFGH"] reads1_out = FakeOutFile() reads2_out = FakeOutFile() bcs_out = FakeOutFile() forward_primers = [ compile(''.join([self.iupac[symbol] for symbol in 'TTTTTT'])) ] reverse_primers = [ compile(''.join([self.iupac[symbol] for symbol in 'TCCAA'])) ] output_bc_not_oriented = FakeOutFile() fastq1_out_not_oriented = FakeOutFile() fastq2_out_not_oriented = FakeOutFile() # With a reverse primer in read 2, should write in current order. process_barcode_paired_end_data( fastq1_data, fastq2_data, bcs_out, reads1_out, reads2_out, bc1_len=5, bc2_len=3, rev_comp_bc1=False, rev_comp_bc2=False, attempt_read_orientation=True, forward_primers=forward_primers, reverse_primers=reverse_primers, output_bc_not_oriented=output_bc_not_oriented, fastq1_out_not_oriented=fastq1_out_not_oriented, fastq2_out_not_oriented=fastq2_out_not_oriented) actual_bcs = bcs_out.data.split('\n') expected_bcs = ['@HWI-ST830', 'ATCGAGGT', '+', '12345ABC', ''] self.assertEqual(actual_bcs, expected_bcs) actual_reads = reads1_out.data.split('\n') expected_reads = [ '@HWI-ST830', 'TCGATCGATCGATCG', '+', '67890ABCDEFGHIJ', '' ] self.assertEqual(actual_reads, expected_reads) actual_reads = reads2_out.data.split('\n') expected_reads = ['@HWI-ST830', 'TCCAA', '+', 'DEFGH', ''] self.assertEqual(actual_reads, expected_reads) actual_bcs_not_oriented = output_bc_not_oriented.data.split('\n') expected_bcs = [''] self.assertEqual(actual_bcs_not_oriented, expected_bcs) actual_reads_not_oriented = fastq1_out_not_oriented.data.split('\n') expected_reads = [''] self.assertEqual(actual_reads_not_oriented, expected_reads) actual_reads_not_oriented = fastq2_out_not_oriented.data.split('\n') expected_reads = [''] self.assertEqual(actual_reads_not_oriented, expected_reads)