def test_group_countfiles_raises_exception_if_missing_type(self): """ Test the method that aggregates all the countfiles generated from each 'type' of bam file. That is, we may have multiple bam files for each sample (e.g. primary alignments, deduplicated, etc). We will be generating a countfile for each one of those. When we assemble into a count matrix, we obviously group the files of a particular 'type' (e.g. those coming from deduplicated BAM files). This tests that the the glob methods are called with the correct parameters given the sample annotations prescribed. This one tests that an exception is raised if one of the countfile 'types' is missing. Here, sample B is missing a countfile corresponding to the primary.counts- based BAM files """ p = Params() p.add(feature_counts_output_dir='/path/to/final/featureCounts') s1 = Sample('A', 'X') s1.countfiles = [ '/path/to/final/featureCounts/A.counts', '/path/to/final/featureCounts/A.primary.counts', '/path/to/final/featureCounts/A.primary.dedup.counts' ] s2 = Sample('B', 'Y') s2.countfiles = [ '/path/to/final/featureCounts/B.counts', '/path/to/final/featureCounts/B.primary.dedup.counts' ] s3 = Sample('C', 'Z') s3.countfiles = [ '/path/to/final/featureCounts/C.counts', '/path/to/final/featureCounts/C.primary.counts', '/path/to/final/featureCounts/C.primary.dedup.counts' ] project = Project() project.add_parameters(p) project.add_samples([s1, s2, s3]) mock_util_methods = mock.Mock() mock_case_insensitive_glob = mock.Mock() mock_case_insensitive_glob.side_effect = [ [ '/path/to/final/featureCounts/A.counts', '/path/to/final/featureCounts/B.counts', '/path/to/final/featureCounts/C.counts' ], [ '/path/to/final/featureCounts/A.primary.counts', '/path/to/final/featureCounts/C.primary.counts' ], [ '/path/to/final/featureCounts/A.primary.dedup.counts', '/path/to/final/featureCounts/B.primary.dedup.counts', '/path/to/final/featureCounts/C.primary.dedup.counts' ] ] with self.assertRaises(self.module.CountfileQuantityException): result = self.module.get_countfile_groupings( project, mock_case_insensitive_glob)
def test_group_countfiles(self): """ Test the method that aggregates all the countfiles generated from each 'type' of bam file. That is, we may have multiple bam files for each sample (e.g. primary alignments, deduplicated, etc). We will be generating a countfile for each one of those. When we assemble into a count matrix, we obviously group the files of a particular 'type' (e.g. those coming from deduplicated BAM files). This tests that the the glob methods are called with the correct parameters given the sample annotations prescribed. """ p = Params() cp = Params() cp.add(feature_counts_output_dir='/path/to/final/featureCounts') s1 = Sample('A', 'X') s1.countfiles = [ '/path/to/final/featureCounts/A.counts', '/path/to/final/featureCounts/A.primary.counts', '/path/to/final/featureCounts/A.primary.dedup.counts' ] s2 = Sample('B', 'Y') s2.countfiles = [ '/path/to/final/featureCounts/B.counts', '/path/to/final/featureCounts/B.primary.counts', '/path/to/final/featureCounts/B.primary.dedup.counts' ] s3 = Sample('C', 'Z') s3.countfiles = [ '/path/to/final/featureCounts/C.counts', '/path/to/final/featureCounts/C.primary.counts', '/path/to/final/featureCounts/C.primary.dedup.counts' ] project = Project() project.add_parameters(p) project.add_samples([s1, s2, s3]) result = self.module.get_countfile_groupings(project, cp) expected_result = [ [ '/path/to/final/featureCounts/A.counts', '/path/to/final/featureCounts/B.counts', '/path/to/final/featureCounts/C.counts' ], [ '/path/to/final/featureCounts/A.primary.counts', '/path/to/final/featureCounts/B.primary.counts', '/path/to/final/featureCounts/C.primary.counts' ], [ '/path/to/final/featureCounts/A.primary.dedup.counts', '/path/to/final/featureCounts/B.primary.dedup.counts', '/path/to/final/featureCounts/C.primary.dedup.counts' ] ] self.assertEqual(result, expected_result)
def test_countfile_merging(self): """ This tests that the correct files are used to merge. The result (a data structure) of the merging is mocked out. Tests that the expected data is written to the file and tests that the file ends up in the correct location """ # a dummy method to mock the reading/concatenating of the data in the individual files def mock_read(matrix, f): dummy = [['geneA', '0', '100', '200'], ['geneB', '1', '101', '201'], ['geneC', '2', '102', '202']] if len(matrix) == 0: for k in range(len(dummy)): matrix.append([]) for i, l in enumerate(dummy): matrix[i] = l # mock out the actual implementations self.module.get_countfile_groupings = mock.Mock() self.module.get_countfile_groupings.return_value = [ [ '/path/to/final/featureCounts/A.counts', '/path/to/final/featureCounts/C.counts', '/path/to/final/featureCounts/B.counts' ], [ '/path/to/final/featureCounts/A.primary.counts', '/path/to/final/featureCounts/C.primary.counts' ], [ '/path/to/final/featureCounts/A.primary.dedup.counts', '/path/to/final/featureCounts/B.primary.dedup.counts', '/path/to/final/featureCounts/C.primary.dedup.counts' ] ] self.module.read = mock_read p = Params() p.add(raw_count_matrix_file_prefix='merged_counts') s1 = Sample('A', 'X') s1.countfiles = [ '/path/to/final/featureCounts/A.primary.counts', '/path/to/final/featureCounts/A.counts', '/path/to/final/featureCounts/A.primary.dedup.counts' ] s2 = Sample('B', 'Y') s2.countfiles = [ '/path/to/final/featureCounts/B.counts', '/path/to/final/featureCounts/B.primary.dedup.counts', '/path/to/final/featureCounts/B.primary.counts' ] s3 = Sample('C', 'Z') s3.countfiles = [ '/path/to/final/featureCounts/C.counts', '/path/to/final/featureCounts/C.primary.counts', '/path/to/final/featureCounts/C.primary.dedup.counts' ] project = Project() project.add_parameters(p) project.add_samples([s1, s3, s2]) m = mock.mock_open() with mock.patch.object(__builtin__, 'open', m): self.module.create_count_matrices(project, mock.Mock()) m.assert_any_call( '/path/to/final/featureCounts/merged_counts.counts', 'w') m.assert_any_call( '/path/to/final/featureCounts/merged_counts.primary.counts', 'w') m.assert_any_call( '/path/to/final/featureCounts/merged_counts.primary.dedup.counts', 'w') handle = m() calls = [ mock.call('Gene\tA\tB\tC\n'), mock.call('geneA\t0\t100\t200\n'), mock.call('geneB\t1\t101\t201\n'), mock.call('geneC\t2\t102\t202\n') ] * 3 handle.write.assert_has_calls(calls)