def test_read_old_style_counts_file(self): """BarcodeCounter: read in data from old-style 3 column '.counts' file """ # Read old-style 3 column counts files self._make_working_dir() old_style_counts_file = self._make_file("old_style.counts", """#Rank Sequence Count 1 TATGCGCGGTA 285302 2 TATGCGCGGTG 532 3 ACCTACCGGTA 315 4 CCCTTATGCGA 22""") # Read the file bc = BarcodeCounter(old_style_counts_file) # Check the contents self.assertEqual(bc.barcodes(),["TATGCGCGGTA", "TATGCGCGGTG", "ACCTACCGGTA", "CCCTTATGCGA"]) # Lanes self.assertEqual(bc.lanes,[]) # Counts for individual barcodes self.assertEqual(bc.counts("TATGCGCGGTA"),285302) self.assertEqual(bc.counts("TATGCGCGGTG"),532) self.assertEqual(bc.counts("ACCTACCGGTA"),315) self.assertEqual(bc.counts("CCCTTATGCGA"),22) self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),0) self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302) # Read counts self.assertEqual(bc.nreads(),286171)
def test_empty_counter(self): """BarcodeCounter: check empty counter """ # Initialise counter object bc = BarcodeCounter() self.assertEqual(bc.barcodes(),[]) self.assertEqual(bc.lanes,[]) self.assertEqual(bc.filter_barcodes(),[]) self.assertEqual(bc.counts("AGGCAGAATCTTACGC"),0) self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=1),0) self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"),0) self.assertEqual(bc.nreads(),0) self.assertEqual(bc.nreads(1),0)
def test_read_multiple_counts_file(self): """BarcodeCounter: read in data from multiple '.counts' files """ # Read multiple counts files counts_lane1 = self._make_file("lane1.counts", """#Lane Rank Sequence Count 1 1 TATGCGCGGTA 285302 1 2 TATGCGCGGTG 532 1 3 ACCTACCGGTA 315 1 4 CCCTTATGCGA 22""") counts_lane2 = self._make_file("lane2.counts", """#Lane Rank Sequence Count 2 1 ACCTAGCGGTA 477 2 2 ACCTCTATGCT 368""") counts_lane3 = self._make_file("lane3.counts", """#Lane Rank Sequence Count 3 1 ACCCTNCGGTA 312 3 2 ACCTTATGCGC 248""") # Read the file bc = BarcodeCounter(counts_lane1,counts_lane2,counts_lane3) # Check the contents self.assertEqual(bc.barcodes(),["TATGCGCGGTA", "TATGCGCGGTG", "ACCTAGCGGTA", "ACCTCTATGCT", "ACCTACCGGTA", "ACCCTNCGGTA", "ACCTTATGCGC", "CCCTTATGCGA"]) # Lanes self.assertEqual(bc.lanes,[1,2,3]) # Counts for individual barcodes self.assertEqual(bc.counts("TATGCGCGGTA"),285302) self.assertEqual(bc.counts("TATGCGCGGTG"),532) self.assertEqual(bc.counts("ACCTAGCGGTA"),477) self.assertEqual(bc.counts("ACCTCTATGCT"),368) self.assertEqual(bc.counts("ACCTACCGGTA"),315) self.assertEqual(bc.counts("ACCCTNCGGTA"),312) self.assertEqual(bc.counts("ACCTTATGCGC"),248) self.assertEqual(bc.counts("CCCTTATGCGA"),22) self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),285302) self.assertEqual(bc.counts("TATGCGCGGTA",lane=2),0) self.assertEqual(bc.counts("TATGCGCGGTA",lane=3),0) self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302) self.assertEqual(bc.counts("ACCTTATGCGC",lane=1),0) self.assertEqual(bc.counts("ACCTTATGCGC",lane=2),0) self.assertEqual(bc.counts("ACCTTATGCGC",lane=3),248) self.assertEqual(bc.counts_all("ACCTTATGCGC"),248) # Read counts self.assertEqual(bc.nreads(),287576) self.assertEqual(bc.nreads(1),286171) self.assertEqual(bc.nreads(2),845) self.assertEqual(bc.nreads(3),560)
def test_read_counts_file(self): """BarcodeCounter: read in data from '.counts' file """ # Read a counts file counts_file = self._make_file("test.counts","""#Lane Rank Sequence Count 1 1 TATGCGCGGTA 285302 1 2 TATGCGCGGTG 532 1 3 ACCTACCGGTA 315 1 4 CCCTTATGCGA 22 2 5 ACCTAGCGGTA 477 2 6 ACCTCTATGCT 368 3 7 ACCCTNCGGTA 312 3 8 ACCTTATGCGC 248""") # Read the file bc = BarcodeCounter(counts_file) # Check the contents self.assertEqual(bc.barcodes(),["TATGCGCGGTA", "TATGCGCGGTG", "ACCTAGCGGTA", "ACCTCTATGCT", "ACCTACCGGTA", "ACCCTNCGGTA", "ACCTTATGCGC", "CCCTTATGCGA"]) # Lanes self.assertEqual(bc.lanes,[1,2,3]) # Counts for individual barcodes self.assertEqual(bc.counts("TATGCGCGGTA"),285302) self.assertEqual(bc.counts("TATGCGCGGTG"),532) self.assertEqual(bc.counts("ACCTAGCGGTA"),477) self.assertEqual(bc.counts("ACCTCTATGCT"),368) self.assertEqual(bc.counts("ACCTACCGGTA"),315) self.assertEqual(bc.counts("ACCCTNCGGTA"),312) self.assertEqual(bc.counts("ACCTTATGCGC"),248) self.assertEqual(bc.counts("CCCTTATGCGA"),22) self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),285302) self.assertEqual(bc.counts("TATGCGCGGTA",lane=2),0) self.assertEqual(bc.counts("TATGCGCGGTA",lane=3),0) self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302) self.assertEqual(bc.counts("ACCTTATGCGC",lane=1),0) self.assertEqual(bc.counts("ACCTTATGCGC",lane=2),0) self.assertEqual(bc.counts("ACCTTATGCGC",lane=3),248) self.assertEqual(bc.counts_all("ACCTTATGCGC"),248) # Read counts self.assertEqual(bc.nreads(),287576) self.assertEqual(bc.nreads(1),286171) self.assertEqual(bc.nreads(2),845) self.assertEqual(bc.nreads(3),560)
def test_count_fastq_sequences(self): """BarcodeCounter: count barcode sequences """ # Initialise counter object bc = BarcodeCounter() # Populate with sequences for r,incr in (((1,"AGGCAGAATCTTACGC"),102), ((1,"TCCTGAGCTCTTACGC"),10), ((1,"ACAGTGATTCTTTCCC"),3), ((1,"ATGCTCGTCTCGCATC"),1), ((2,"CGTACTAGTCTTACGC"),95), ((2,"ATGTCAGATCTTTCCC"),29), ((2,"AGGCAGAATCTTACGC"),12), ((2,"CAGATCATTCTTTCCC"),6), ((3,"GGACTCCTTCTTACGC"),75), ((3,"ACCGATTCGCGCGTAG"),74), ((3,"CCAGCAATATCGCGAG"),2), ((3,"CCGCGTAAGCAATAGA"),1)): lane,seq = r for i in xrange(incr): bc.count_barcode(seq,lane=lane) # Check contents self.assertEqual(bc.barcodes(),["AGGCAGAATCTTACGC", "CGTACTAGTCTTACGC", "GGACTCCTTCTTACGC", "ACCGATTCGCGCGTAG", "ATGTCAGATCTTTCCC", "TCCTGAGCTCTTACGC", "CAGATCATTCTTTCCC", "ACAGTGATTCTTTCCC", "CCAGCAATATCGCGAG", "ATGCTCGTCTCGCATC", "CCGCGTAAGCAATAGA"]) # Lanes self.assertEqual(bc.lanes,[1,2,3]) # Counts for individual barcodes self.assertEqual(bc.counts("AGGCAGAATCTTACGC"),114) self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=1),102) self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=2),12) self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=3),0) self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"),114) self.assertEqual(bc.counts("CCGCGTAAGCAATAGA"),1) self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=1),0) self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=2),0) self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=3),1) self.assertEqual(bc.counts_all("CCGCGTAAGCAATAGA"),1) # Read counts self.assertEqual(bc.nreads(),410) self.assertEqual(bc.nreads(1),116) self.assertEqual(bc.nreads(2),142) self.assertEqual(bc.nreads(3),152)