Exemplo n.º 1
0
    def testBin(self):
        # first do all liquidation together at once, then do it in two runs appending, and verify everything matches
        bin_size = len(self.sequence1)
        together_dir_path = os.path.join(self.dir_path, 'together')
        liquidator = blb.BinLiquidator(bin_size=bin_size,
                                       output_directory=together_dir_path,
                                       bam_file_path=self.dir_path)

        appending_dir = os.path.join(self.dir_path, 'appending')
        #print 'liquidating bams at path:', self.bam1_file_path
        liquidator = blb.BinLiquidator(bin_size=bin_size,
                                       output_directory=appending_dir,
                                       bam_file_path=self.bam1_file_path)

        appending_h5_path = os.path.join(appending_dir, 'counts.h5')
        liquidator = blb.BinLiquidator(bin_size=bin_size,
                                       output_directory=os.path.join(
                                           self.dir_path,
                                           'appending_extra_without_h5_file'),
                                       bam_file_path=self.bam2_file_path,
                                       counts_file_path=appending_h5_path)

        with tables.open_file(os.path.join(together_dir_path,
                                           'counts.h5')) as together_h5:
            with tables.open_file(appending_h5_path) as appending_h5:
                self.assertEqual(str(together_h5.root.bin_counts[:]),
                                 str(appending_h5.root.bin_counts[:]))
                self.assertEqual(str(together_h5.root.normalized_counts[:]),
                                 str(appending_h5.root.normalized_counts[:]))
                self.assertEqual(str(together_h5.root.summary[:]),
                                 str(appending_h5.root.summary[:]))
                self.assertEqual(str(together_h5.root.sorted_summary[:]),
                                 str(appending_h5.root.sorted_summary[:]))
Exemplo n.º 2
0
    def test_bin_liquidation(self):
        bin_size = len(self.sequence)
        liquidator = blb.BinLiquidator(bin_size=bin_size,
                                       output_directory=os.path.join(
                                           self.dir_path, 'output'),
                                       bam_file_path=self.bam_file_path)

        liquidator.flatten()

        with tables.open_file(liquidator.counts_file_path) as counts:
            self.assertEqual(1, len(
                counts.root.files))  # 1 since only a single bam file
            file_record = counts.root.files[0]
            self.assertEqual(
                1, file_record['length'])  # 1 since only a single read
            self.assertEqual(1, file_record['key'])

            self.assertEqual(1, len(counts.root.bin_counts)
                             )  # 1 since 1 bin accommodates full sequence

            record = counts.root.bin_counts[0]
            self.assertEqual(0, record['bin_number'])
            self.assertEqual(self.chromosome, record['chromosome'].decode())
            self.assertEqual(
                len(self.sequence),
                record['count'])  # count represents how many base pair reads
Exemplo n.º 3
0
    def test_liquidation_in_long_directory(self):
        self.dir_path = tempfile.mkdtemp(prefix='blt_' + 'a' * 16)
        truncated_cell_type = os.path.basename(self.dir_path)[:15]
        self.bam_file_path = create_bam(self.dir_path, [self.chromosome],
                                        self.sequence)
        bin_size = len(self.sequence)
        liquidator = blb.BinLiquidator(bin_size=bin_size,
                                       output_directory=os.path.join(
                                           self.dir_path, 'output'),
                                       bam_file_path=self.bam_file_path)

        with tables.open_file(liquidator.counts_file_path) as counts:
            self.assertEqual(1, len(
                counts.root.files))  # 1 since only a single bam file
            file_record = counts.root.files[0]
            self.assertEqual(
                1, file_record['length'])  # 1 since only a single read
            self.assertEqual(1, file_record['key'])

            self.assertEqual(1, len(counts.root.bin_counts)
                             )  # 1 since 1 bin accommodates full sequence

            record = counts.root.bin_counts[0]
            self.assertEqual(0, record['bin_number'])
            self.assertEqual(truncated_cell_type, record['cell_type'].decode())
            self.assertEqual(self.chromosome, record['chromosome'].decode())
            self.assertEqual(
                len(self.sequence),
                record['count'])  # count represents how many base pair reads
Exemplo n.º 4
0
    def testOverridingBlackList(self):
        chromosomes = ['chr1', 'chr2', 'chr2_random']

        sequence = 'ATTTAAAAATTAATTTAATGCTTGGCTAAATCTTAATTACATATATAATT'
        bam_file_path = create_bam(self.dir_path,
                                   chromosomes,
                                   sequence,
                                   file_name='multiple.bam')
        bin_size = len(sequence)

        liquidator = blb.BinLiquidator(bin_size=bin_size,
                                       output_directory=os.path.join(
                                           self.dir_path, 'output'),
                                       bam_file_path=bam_file_path,
                                       blacklist=[])

        with tables.open_file(liquidator.counts_file_path) as counts:
            self.assertEqual(1, len(
                counts.root.files))  # 1 since only a single bam file
            file_record = counts.root.files[0]
            self.assertEqual(
                3, file_record['length'])  # 1 read for each chromosome
            self.assertEqual(1, file_record['key'])

            self.assertEqual(
                3, len(counts.root.bin_counts
                       ))  # 1 for each chromosome that should be liquidated

            for record_index, chromosome in enumerate(chromosomes):
                record = counts.root.bin_counts[record_index]
                self.assertEqual(0, record['bin_number'])
                self.assertEqual(chromosome, record['chromosome'].decode())
                self.assertEqual(len(sequence), record['count']
                                 )  # count represents how many base pair reads
Exemplo n.º 5
0
 def test_bin_liquidation_zero_bin_size(self):
     with self.assertRaises(Exception):
         liquidator = blb.BinLiquidator(bin_size=0,
                                        output_directory=os.path.join(
                                            self.dir_path, 'output'),
                                        bam_file_path=self.bam_file_path)
         liquidator.batch(extension=0, sense='.')
Exemplo n.º 6
0
 def test_bin_long_bam_file_name(self):
     long_file_name = 'x' * 65  # more than Float64Col
     long_file_path = os.path.join(self.dir_path, long_file_name)
     shutil.copyfile(self.bam_file_path, long_file_path)
     shutil.copyfile(self.bam_file_path + '.bai', long_file_path + '.bai')
     bin_liquidator = blb.BinLiquidator(bin_size=len(self.sequence),
                                        output_directory=os.path.join(
                                            self.dir_path, 'bin_output'),
                                        bam_file_path=long_file_path)