def test_read_old_style_counts_file(self):
        """BarcodeCounter: read in data from old-style 3 column '.counts' file
        """
        # Read old-style 3 column counts files
        self._make_working_dir()
        old_style_counts_file = self._make_file("old_style.counts",
                                                """#Rank	Sequence	Count
1	TATGCGCGGTA	285302
2	TATGCGCGGTG	532
3	ACCTACCGGTA	315
4	CCCTTATGCGA	22""")
        # Read the file
        bc = BarcodeCounter(old_style_counts_file)
        # Check the contents
        self.assertEqual(bc.barcodes(),["TATGCGCGGTA",
                                        "TATGCGCGGTG",
                                        "ACCTACCGGTA",
                                        "CCCTTATGCGA"])
        # Lanes
        self.assertEqual(bc.lanes,[])
        # Counts for individual barcodes
        self.assertEqual(bc.counts("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("TATGCGCGGTG"),532)
        self.assertEqual(bc.counts("ACCTACCGGTA"),315)
        self.assertEqual(bc.counts("CCCTTATGCGA"),22)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),0)
        self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302)
        # Read counts
        self.assertEqual(bc.nreads(),286171)
Exemplo n.º 2
0
    def test_read_old_style_counts_file(self):
        """BarcodeCounter: read in data from old-style 3 column '.counts' file
        """
        # Read old-style 3 column counts files
        self._make_working_dir()
        old_style_counts_file = self._make_file("old_style.counts",
                                                """#Rank	Sequence	Count
1	TATGCGCGGTA	285302
2	TATGCGCGGTG	532
3	ACCTACCGGTA	315
4	CCCTTATGCGA	22""")
        # Read the file
        bc = BarcodeCounter(old_style_counts_file)
        # Check the contents
        self.assertEqual(bc.barcodes(),["TATGCGCGGTA",
                                        "TATGCGCGGTG",
                                        "ACCTACCGGTA",
                                        "CCCTTATGCGA"])
        # Lanes
        self.assertEqual(bc.lanes,[])
        # Counts for individual barcodes
        self.assertEqual(bc.counts("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("TATGCGCGGTG"),532)
        self.assertEqual(bc.counts("ACCTACCGGTA"),315)
        self.assertEqual(bc.counts("CCCTTATGCGA"),22)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),0)
        self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302)
        # Read counts
        self.assertEqual(bc.nreads(),286171)
Exemplo n.º 3
0
    def test_read_multiple_counts_file(self):
        """BarcodeCounter: read in data from multiple '.counts' files
        """
        # Read multiple counts files
        counts_lane1 = self._make_file("lane1.counts",
                                       """#Lane	Rank	Sequence	Count
1	1	TATGCGCGGTA	285302
1	2	TATGCGCGGTG	532
1	3	ACCTACCGGTA	315
1	4	CCCTTATGCGA	22""")
        counts_lane2 = self._make_file("lane2.counts",
                                       """#Lane	Rank	Sequence	Count
2	1	ACCTAGCGGTA	477
2	2	ACCTCTATGCT	368""")
        counts_lane3 = self._make_file("lane3.counts",
                                       """#Lane	Rank	Sequence	Count
3	1	ACCCTNCGGTA	312
3	2	ACCTTATGCGC	248""")
        # Read the file
        bc = BarcodeCounter(counts_lane1,counts_lane2,counts_lane3)
        # Check the contents
        self.assertEqual(bc.barcodes(),["TATGCGCGGTA",
                                        "TATGCGCGGTG",
                                        "ACCTAGCGGTA",
                                        "ACCTCTATGCT",
                                        "ACCTACCGGTA",
                                        "ACCCTNCGGTA",
                                        "ACCTTATGCGC",
                                        "CCCTTATGCGA"])
        # Lanes
        self.assertEqual(bc.lanes,[1,2,3])
        # Counts for individual barcodes
        self.assertEqual(bc.counts("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("TATGCGCGGTG"),532)
        self.assertEqual(bc.counts("ACCTAGCGGTA"),477)
        self.assertEqual(bc.counts("ACCTCTATGCT"),368)
        self.assertEqual(bc.counts("ACCTACCGGTA"),315)
        self.assertEqual(bc.counts("ACCCTNCGGTA"),312)
        self.assertEqual(bc.counts("ACCTTATGCGC"),248)
        self.assertEqual(bc.counts("CCCTTATGCGA"),22)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),285302)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=2),0)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=3),0)
        self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=1),0)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=2),0)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=3),248)
        self.assertEqual(bc.counts_all("ACCTTATGCGC"),248)
        # Read counts
        self.assertEqual(bc.nreads(),287576)
        self.assertEqual(bc.nreads(1),286171)
        self.assertEqual(bc.nreads(2),845)
        self.assertEqual(bc.nreads(3),560)
    def test_read_multiple_counts_file(self):
        """BarcodeCounter: read in data from multiple '.counts' files
        """
        # Read multiple counts files
        counts_lane1 = self._make_file("lane1.counts",
                                       """#Lane	Rank	Sequence	Count
1	1	TATGCGCGGTA	285302
1	2	TATGCGCGGTG	532
1	3	ACCTACCGGTA	315
1	4	CCCTTATGCGA	22""")
        counts_lane2 = self._make_file("lane2.counts",
                                       """#Lane	Rank	Sequence	Count
2	1	ACCTAGCGGTA	477
2	2	ACCTCTATGCT	368""")
        counts_lane3 = self._make_file("lane3.counts",
                                       """#Lane	Rank	Sequence	Count
3	1	ACCCTNCGGTA	312
3	2	ACCTTATGCGC	248""")
        # Read the file
        bc = BarcodeCounter(counts_lane1,counts_lane2,counts_lane3)
        # Check the contents
        self.assertEqual(bc.barcodes(),["TATGCGCGGTA",
                                        "TATGCGCGGTG",
                                        "ACCTAGCGGTA",
                                        "ACCTCTATGCT",
                                        "ACCTACCGGTA",
                                        "ACCCTNCGGTA",
                                        "ACCTTATGCGC",
                                        "CCCTTATGCGA"])
        # Lanes
        self.assertEqual(bc.lanes,[1,2,3])
        # Counts for individual barcodes
        self.assertEqual(bc.counts("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("TATGCGCGGTG"),532)
        self.assertEqual(bc.counts("ACCTAGCGGTA"),477)
        self.assertEqual(bc.counts("ACCTCTATGCT"),368)
        self.assertEqual(bc.counts("ACCTACCGGTA"),315)
        self.assertEqual(bc.counts("ACCCTNCGGTA"),312)
        self.assertEqual(bc.counts("ACCTTATGCGC"),248)
        self.assertEqual(bc.counts("CCCTTATGCGA"),22)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),285302)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=2),0)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=3),0)
        self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=1),0)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=2),0)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=3),248)
        self.assertEqual(bc.counts_all("ACCTTATGCGC"),248)
        # Read counts
        self.assertEqual(bc.nreads(),287576)
        self.assertEqual(bc.nreads(1),286171)
        self.assertEqual(bc.nreads(2),845)
        self.assertEqual(bc.nreads(3),560)
Exemplo n.º 5
0
 def test_empty_counter(self):
     """BarcodeCounter: check empty counter
     """
     # Initialise counter object
     bc = BarcodeCounter()
     self.assertEqual(bc.barcodes(),[])
     self.assertEqual(bc.lanes,[])
     self.assertEqual(bc.filter_barcodes(),[])
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC"),0)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=1),0)
     self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"),0)
     self.assertEqual(bc.nreads(),0)
     self.assertEqual(bc.nreads(1),0)
 def test_empty_counter(self):
     """BarcodeCounter: check empty counter
     """
     # Initialise counter object
     bc = BarcodeCounter()
     self.assertEqual(bc.barcodes(),[])
     self.assertEqual(bc.lanes,[])
     self.assertEqual(bc.filter_barcodes(),[])
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC"),0)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=1),0)
     self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"),0)
     self.assertEqual(bc.nreads(),0)
     self.assertEqual(bc.nreads(1),0)
Exemplo n.º 7
0
 def test_count_fastq_sequences(self):
     """BarcodeCounter: count barcode sequences
     """
     # Initialise counter object
     bc = BarcodeCounter()
     # Populate with sequences
     for r,incr in (((1,"AGGCAGAATCTTACGC"),102),
                    ((1,"TCCTGAGCTCTTACGC"),10),
                    ((1,"ACAGTGATTCTTTCCC"),3),
                    ((1,"ATGCTCGTCTCGCATC"),1),
                    ((2,"CGTACTAGTCTTACGC"),95),
                    ((2,"ATGTCAGATCTTTCCC"),29),
                    ((2,"AGGCAGAATCTTACGC"),12),
                    ((2,"CAGATCATTCTTTCCC"),6),
                    ((3,"GGACTCCTTCTTACGC"),75),
                    ((3,"ACCGATTCGCGCGTAG"),74),
                    ((3,"CCAGCAATATCGCGAG"),2),
                    ((3,"CCGCGTAAGCAATAGA"),1)):
         lane,seq = r
         for i in xrange(incr):
             bc.count_barcode(seq,lane=lane)
     # Check contents
     self.assertEqual(bc.barcodes(),["AGGCAGAATCTTACGC",
                                     "CGTACTAGTCTTACGC",
                                     "GGACTCCTTCTTACGC",
                                     "ACCGATTCGCGCGTAG",
                                     "ATGTCAGATCTTTCCC",
                                     "TCCTGAGCTCTTACGC",
                                     "CAGATCATTCTTTCCC",
                                     "ACAGTGATTCTTTCCC",
                                     "CCAGCAATATCGCGAG",
                                     "ATGCTCGTCTCGCATC",
                                     "CCGCGTAAGCAATAGA"])
     # Lanes
     self.assertEqual(bc.lanes,[1,2,3])
     # Counts for individual barcodes
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC"),114)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=1),102)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=2),12)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=3),0)
     self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"),114)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA"),1)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=1),0)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=2),0)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=3),1)
     self.assertEqual(bc.counts_all("CCGCGTAAGCAATAGA"),1)
     # Read counts
     self.assertEqual(bc.nreads(),410)
     self.assertEqual(bc.nreads(1),116)
     self.assertEqual(bc.nreads(2),142)
     self.assertEqual(bc.nreads(3),152)
 def test_count_fastq_sequences(self):
     """BarcodeCounter: count barcode sequences
     """
     # Initialise counter object
     bc = BarcodeCounter()
     # Populate with sequences
     for r,incr in (((1,"AGGCAGAATCTTACGC"),102),
                    ((1,"TCCTGAGCTCTTACGC"),10),
                    ((1,"ACAGTGATTCTTTCCC"),3),
                    ((1,"ATGCTCGTCTCGCATC"),1),
                    ((2,"CGTACTAGTCTTACGC"),95),
                    ((2,"ATGTCAGATCTTTCCC"),29),
                    ((2,"AGGCAGAATCTTACGC"),12),
                    ((2,"CAGATCATTCTTTCCC"),6),
                    ((3,"GGACTCCTTCTTACGC"),75),
                    ((3,"ACCGATTCGCGCGTAG"),74),
                    ((3,"CCAGCAATATCGCGAG"),2),
                    ((3,"CCGCGTAAGCAATAGA"),1)):
         lane,seq = r
         for i in xrange(incr):
             bc.count_barcode(seq,lane=lane)
     # Check contents
     self.assertEqual(bc.barcodes(),["AGGCAGAATCTTACGC",
                                     "CGTACTAGTCTTACGC",
                                     "GGACTCCTTCTTACGC",
                                     "ACCGATTCGCGCGTAG",
                                     "ATGTCAGATCTTTCCC",
                                     "TCCTGAGCTCTTACGC",
                                     "CAGATCATTCTTTCCC",
                                     "ACAGTGATTCTTTCCC",
                                     "CCAGCAATATCGCGAG",
                                     "ATGCTCGTCTCGCATC",
                                     "CCGCGTAAGCAATAGA"])
     # Lanes
     self.assertEqual(bc.lanes,[1,2,3])
     # Counts for individual barcodes
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC"),114)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=1),102)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=2),12)
     self.assertEqual(bc.counts("AGGCAGAATCTTACGC",lane=3),0)
     self.assertEqual(bc.counts_all("AGGCAGAATCTTACGC"),114)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA"),1)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=1),0)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=2),0)
     self.assertEqual(bc.counts("CCGCGTAAGCAATAGA",lane=3),1)
     self.assertEqual(bc.counts_all("CCGCGTAAGCAATAGA"),1)
     # Read counts
     self.assertEqual(bc.nreads(),410)
     self.assertEqual(bc.nreads(1),116)
     self.assertEqual(bc.nreads(2),142)
     self.assertEqual(bc.nreads(3),152)
Exemplo n.º 9
0
    def test_read_counts_file(self):
        """BarcodeCounter: read in data from '.counts' file
        """
        # Read a counts file
        counts_file = self._make_file("test.counts","""#Lane	Rank	Sequence	Count
1	1	TATGCGCGGTA	285302
1	2	TATGCGCGGTG	532
1	3	ACCTACCGGTA	315
1	4	CCCTTATGCGA	22
2	5	ACCTAGCGGTA	477
2	6	ACCTCTATGCT	368
3	7	ACCCTNCGGTA	312
3	8	ACCTTATGCGC	248""")
        # Read the file
        bc = BarcodeCounter(counts_file)
        # Check the contents
        self.assertEqual(bc.barcodes(),["TATGCGCGGTA",
                                        "TATGCGCGGTG",
                                        "ACCTAGCGGTA",
                                        "ACCTCTATGCT",
                                        "ACCTACCGGTA",
                                        "ACCCTNCGGTA",
                                        "ACCTTATGCGC",
                                        "CCCTTATGCGA"])
        # Lanes
        self.assertEqual(bc.lanes,[1,2,3])
        # Counts for individual barcodes
        self.assertEqual(bc.counts("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("TATGCGCGGTG"),532)
        self.assertEqual(bc.counts("ACCTAGCGGTA"),477)
        self.assertEqual(bc.counts("ACCTCTATGCT"),368)
        self.assertEqual(bc.counts("ACCTACCGGTA"),315)
        self.assertEqual(bc.counts("ACCCTNCGGTA"),312)
        self.assertEqual(bc.counts("ACCTTATGCGC"),248)
        self.assertEqual(bc.counts("CCCTTATGCGA"),22)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),285302)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=2),0)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=3),0)
        self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=1),0)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=2),0)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=3),248)
        self.assertEqual(bc.counts_all("ACCTTATGCGC"),248)
        # Read counts
        self.assertEqual(bc.nreads(),287576)
        self.assertEqual(bc.nreads(1),286171)
        self.assertEqual(bc.nreads(2),845)
        self.assertEqual(bc.nreads(3),560)
    def test_read_counts_file(self):
        """BarcodeCounter: read in data from '.counts' file
        """
        # Read a counts file
        counts_file = self._make_file("test.counts","""#Lane	Rank	Sequence	Count
1	1	TATGCGCGGTA	285302
1	2	TATGCGCGGTG	532
1	3	ACCTACCGGTA	315
1	4	CCCTTATGCGA	22
2	5	ACCTAGCGGTA	477
2	6	ACCTCTATGCT	368
3	7	ACCCTNCGGTA	312
3	8	ACCTTATGCGC	248""")
        # Read the file
        bc = BarcodeCounter(counts_file)
        # Check the contents
        self.assertEqual(bc.barcodes(),["TATGCGCGGTA",
                                        "TATGCGCGGTG",
                                        "ACCTAGCGGTA",
                                        "ACCTCTATGCT",
                                        "ACCTACCGGTA",
                                        "ACCCTNCGGTA",
                                        "ACCTTATGCGC",
                                        "CCCTTATGCGA"])
        # Lanes
        self.assertEqual(bc.lanes,[1,2,3])
        # Counts for individual barcodes
        self.assertEqual(bc.counts("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("TATGCGCGGTG"),532)
        self.assertEqual(bc.counts("ACCTAGCGGTA"),477)
        self.assertEqual(bc.counts("ACCTCTATGCT"),368)
        self.assertEqual(bc.counts("ACCTACCGGTA"),315)
        self.assertEqual(bc.counts("ACCCTNCGGTA"),312)
        self.assertEqual(bc.counts("ACCTTATGCGC"),248)
        self.assertEqual(bc.counts("CCCTTATGCGA"),22)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=1),285302)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=2),0)
        self.assertEqual(bc.counts("TATGCGCGGTA",lane=3),0)
        self.assertEqual(bc.counts_all("TATGCGCGGTA"),285302)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=1),0)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=2),0)
        self.assertEqual(bc.counts("ACCTTATGCGC",lane=3),248)
        self.assertEqual(bc.counts_all("ACCTTATGCGC"),248)
        # Read counts
        self.assertEqual(bc.nreads(),287576)
        self.assertEqual(bc.nreads(1),286171)
        self.assertEqual(bc.nreads(2),845)
        self.assertEqual(bc.nreads(3),560)
Exemplo n.º 11
0
    def test_filter_barcodes(self):
        """BarcodeCounter: check filtering by lane and cutoff
        """
        bc = BarcodeCounter()
        bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302)
        bc.count_barcode("TATGCGCGGTG",lane=1,incr=532)
        bc.count_barcode("ACCTACCGGTA",lane=1,incr=315)
        bc.count_barcode("CCCTTATGCGA",lane=1,incr=22)
	bc.count_barcode("ACCTAGCGGTA",lane=2,incr=477)
        bc.count_barcode("ACCTCTATGCT",lane=2,incr=368)
        self.assertEqual(bc.barcodes(),["TATGCGCGGTA",
                                        "TATGCGCGGTG",
                                        "ACCTAGCGGTA",
                                        "ACCTCTATGCT",
                                        "ACCTACCGGTA",
                                        "CCCTTATGCGA"])
        # No filtering
        self.assertEqual(bc.filter_barcodes(),["TATGCGCGGTA",
                                               "TATGCGCGGTG",
                                               "ACCTAGCGGTA",
                                               "ACCTCTATGCT",
                                               "ACCTACCGGTA",
                                               "CCCTTATGCGA"])
        # Filter by lane
        self.assertEqual(bc.filter_barcodes(lane=1),["TATGCGCGGTA",
                                                     "TATGCGCGGTG",
                                                     "ACCTACCGGTA",
                                                     "CCCTTATGCGA"]),
        self.assertEqual(bc.filter_barcodes(lane=2),["ACCTAGCGGTA",
                                                     "ACCTCTATGCT"])
        # Filter by cutoff
        self.assertEqual(bc.filter_barcodes(cutoff=0.5),
                         ["TATGCGCGGTA",])
        self.assertEqual(bc.filter_barcodes(cutoff=0.0015,lane=1),
                         ["TATGCGCGGTA","TATGCGCGGTG"])
        self.assertEqual(bc.filter_barcodes(cutoff=0.5,lane=2),
                         ["ACCTAGCGGTA",])
    def test_filter_barcodes(self):
        """BarcodeCounter: check filtering by lane and cutoff
        """
        bc = BarcodeCounter()
        bc.count_barcode("TATGCGCGGTA",lane=1,incr=285302)
        bc.count_barcode("TATGCGCGGTG",lane=1,incr=532)
        bc.count_barcode("ACCTACCGGTA",lane=1,incr=315)
        bc.count_barcode("CCCTTATGCGA",lane=1,incr=22)
	bc.count_barcode("ACCTAGCGGTA",lane=2,incr=477)
        bc.count_barcode("ACCTCTATGCT",lane=2,incr=368)
        self.assertEqual(bc.barcodes(),["TATGCGCGGTA",
                                        "TATGCGCGGTG",
                                        "ACCTAGCGGTA",
                                        "ACCTCTATGCT",
                                        "ACCTACCGGTA",
                                        "CCCTTATGCGA"])
        # No filtering
        self.assertEqual(bc.filter_barcodes(),["TATGCGCGGTA",
                                               "TATGCGCGGTG",
                                               "ACCTAGCGGTA",
                                               "ACCTCTATGCT",
                                               "ACCTACCGGTA",
                                               "CCCTTATGCGA"])
        # Filter by lane
        self.assertEqual(bc.filter_barcodes(lane=1),["TATGCGCGGTA",
                                                     "TATGCGCGGTG",
                                                     "ACCTACCGGTA",
                                                     "CCCTTATGCGA"]),
        self.assertEqual(bc.filter_barcodes(lane=2),["ACCTAGCGGTA",
                                                     "ACCTCTATGCT"])
        # Filter by cutoff
        self.assertEqual(bc.filter_barcodes(cutoff=0.5),
                         ["TATGCGCGGTA",])
        self.assertEqual(bc.filter_barcodes(cutoff=0.0015,lane=1),
                         ["TATGCGCGGTA","TATGCGCGGTG"])
        self.assertEqual(bc.filter_barcodes(cutoff=0.5,lane=2),
                         ["ACCTAGCGGTA",])