예제 #1
0
	def test_missing_countfile_raises_exception(self):
		"""
		Test one of the files is ok (the first), but the second is not found (for whatever reason).  Test that we throw an exception, 
		and that the one successful call was indeed made correctly.
		"""
		self.module.call_script = mock.Mock()
		project = Project()
		project.raw_count_matrices = ['/path/to/raw_counts/raw_count_matrix.primary.counts',
					'/path/to/raw_counts/raw_count_matrix.primary.dedup.counts']
		project_params = Params()
		component_params = Params()
		project_params.add(raw_count_matrix_file_prefix = 'raw_count_matrix')
		component_params.add(normalized_counts_file_prefix = 'normalized_count_matrix')
		component_params.add(normalized_counts_output_dir = '/path/to/final/norm_counts_dir')
		component_params.add(normalization_script = 'normalize.R')
		project_params.add(sample_annotation_file = '/path/to/samples.txt')
		project.add_parameters(project_params)

		m = mock.MagicMock(side_effect = [True, False])
		path = self.module.os.path
		with mock.patch.object(path, 'isfile', m):
			with self.assertRaises(self.module.MissingCountMatrixFileException):
				self.module.normalize(project, component_params)
			calls = [mock.call('normalize.R', '/path/to/raw_counts/raw_count_matrix.primary.counts', 
					'/path/to/final/norm_counts_dir/normalized_count_matrix.primary.counts', '/path/to/samples.txt' )]
			self.module.call_script.assert_has_calls(calls)
    def test_bad_bamfile_path_raises_exception(self):

        self.module.subprocess = mock.Mock()

        p = Params()
        p.add(gtf='/path/to/GTF/mock.gtf')
        p.add(feature_counts='/path/to/bin/featureCounts')
        p.add(feature_counts_file_extension='counts')
        p.add(feature_counts_output_dir='/path/to/final/featureCounts')
        p.add(paired_alignment=False)

        s1 = Sample('A', 'X')
        s1.bamfiles = [
            '/path/to/bamdir/A.bam', '/path/to/bamdir/A.primary.bam',
            '/path/to/bamdir/A.primary.dedup.bam'
        ]
        s2 = Sample('B', 'X')
        s2.bamfiles = ['/path/to/bamdir/B.bam', '/bad/path/B.sort.bam']

        project = Project()
        project.add_parameters(p)
        project.add_samples([s1, s2])

        m = mock.MagicMock(side_effect=[True, True, True, True, False])
        path = self.module.os.path
        with mock.patch.object(path, 'isfile', m):
            with self.assertRaises(self.module.MissingBamFileException):
                self.module.execute_counting(project, util_methods)
예제 #3
0
	def test_correct_calls_are_made(self):
		"""
		Tests that the correct arguments are passed to the method which calls the normalization script.
		Mostly tests the path renaming, etc.
		"""
		self.module.call_script = mock.Mock()
		project = Project()
		project.raw_count_matrices = ['/path/to/raw_counts/raw_count_matrix.primary.counts',
					'/path/to/raw_counts/raw_count_matrix.primary.dedup.counts']
		project_params = Params()
		component_params = Params()
		project_params.add(raw_count_matrix_file_prefix = 'raw_count_matrix')
		component_params.add(normalized_counts_file_prefix = 'normalized_count_matrix')
		component_params.add(normalized_counts_output_dir = '/path/to/final/norm_counts_dir')
		component_params.add(normalization_script = 'normalize.R')
		project_params.add(sample_annotation_file = '/path/to/samples.txt')
		project.add_parameters(project_params)

		m = mock.MagicMock(side_effect = [True, True])
		path = self.module.os.path
		with mock.patch.object(path, 'isfile', m):
			self.module.normalize(project, component_params)
			calls = [mock.call('normalize.R', '/path/to/raw_counts/raw_count_matrix.primary.counts', 
					'/path/to/final/norm_counts_dir/normalized_count_matrix.primary.counts', '/path/to/samples.txt' ), 
				mock.call('normalize.R', '/path/to/raw_counts/raw_count_matrix.primary.dedup.counts', 
					'/path/to/final/norm_counts_dir/normalized_count_matrix.primary.dedup.counts', '/path/to/samples.txt' )]
			self.module.call_script.assert_has_calls(calls)
    def test_group_countfiles_raises_exception_if_missing_type(self):
        """
		Test the method that aggregates all the countfiles generated from each 'type' of bam file.  That is, we may have multiple bam files for each sample (e.g. primary alignments, deduplicated, etc).
		We will be generating a countfile for each one of those.  When we assemble into a count matrix, we obviously group the files of a particular 'type' (e.g. those coming from deduplicated BAM files).
		This tests that the the glob methods are called with the correct parameters given the sample annotations prescribed.

		This one tests that an exception is raised if one of the countfile 'types' is missing.  Here, sample B is missing a countfile corresponding to the primary.counts- based BAM files
		"""

        p = Params()
        p.add(feature_counts_output_dir='/path/to/final/featureCounts')

        s1 = Sample('A', 'X')
        s1.countfiles = [
            '/path/to/final/featureCounts/A.counts',
            '/path/to/final/featureCounts/A.primary.counts',
            '/path/to/final/featureCounts/A.primary.dedup.counts'
        ]
        s2 = Sample('B', 'Y')
        s2.countfiles = [
            '/path/to/final/featureCounts/B.counts',
            '/path/to/final/featureCounts/B.primary.dedup.counts'
        ]
        s3 = Sample('C', 'Z')
        s3.countfiles = [
            '/path/to/final/featureCounts/C.counts',
            '/path/to/final/featureCounts/C.primary.counts',
            '/path/to/final/featureCounts/C.primary.dedup.counts'
        ]

        project = Project()
        project.add_parameters(p)
        project.add_samples([s1, s2, s3])

        mock_util_methods = mock.Mock()
        mock_case_insensitive_glob = mock.Mock()
        mock_case_insensitive_glob.side_effect = [
            [
                '/path/to/final/featureCounts/A.counts',
                '/path/to/final/featureCounts/B.counts',
                '/path/to/final/featureCounts/C.counts'
            ],
            [
                '/path/to/final/featureCounts/A.primary.counts',
                '/path/to/final/featureCounts/C.primary.counts'
            ],
            [
                '/path/to/final/featureCounts/A.primary.dedup.counts',
                '/path/to/final/featureCounts/B.primary.dedup.counts',
                '/path/to/final/featureCounts/C.primary.dedup.counts'
            ]
        ]
        with self.assertRaises(self.module.CountfileQuantityException):
            result = self.module.get_countfile_groupings(
                project, mock_case_insensitive_glob)
예제 #5
0
    def test_group_countfiles(self):
        """
		Test the method that aggregates all the countfiles generated from each 'type' of bam file.  That is, we may have multiple bam files for each sample (e.g. primary alignments, deduplicated, etc).
		We will be generating a countfile for each one of those.  When we assemble into a count matrix, we obviously group the files of a particular 'type' (e.g. those coming from deduplicated BAM files).
		This tests that the the glob methods are called with the correct parameters given the sample annotations prescribed.
		"""

        p = Params()
        cp = Params()
        cp.add(feature_counts_output_dir='/path/to/final/featureCounts')

        s1 = Sample('A', 'X')
        s1.countfiles = [
            '/path/to/final/featureCounts/A.counts',
            '/path/to/final/featureCounts/A.primary.counts',
            '/path/to/final/featureCounts/A.primary.dedup.counts'
        ]
        s2 = Sample('B', 'Y')
        s2.countfiles = [
            '/path/to/final/featureCounts/B.counts',
            '/path/to/final/featureCounts/B.primary.counts',
            '/path/to/final/featureCounts/B.primary.dedup.counts'
        ]
        s3 = Sample('C', 'Z')
        s3.countfiles = [
            '/path/to/final/featureCounts/C.counts',
            '/path/to/final/featureCounts/C.primary.counts',
            '/path/to/final/featureCounts/C.primary.dedup.counts'
        ]

        project = Project()
        project.add_parameters(p)
        project.add_samples([s1, s2, s3])

        result = self.module.get_countfile_groupings(project, cp)
        expected_result = [
            [
                '/path/to/final/featureCounts/A.counts',
                '/path/to/final/featureCounts/B.counts',
                '/path/to/final/featureCounts/C.counts'
            ],
            [
                '/path/to/final/featureCounts/A.primary.counts',
                '/path/to/final/featureCounts/B.primary.counts',
                '/path/to/final/featureCounts/C.primary.counts'
            ],
            [
                '/path/to/final/featureCounts/A.primary.dedup.counts',
                '/path/to/final/featureCounts/B.primary.dedup.counts',
                '/path/to/final/featureCounts/C.primary.dedup.counts'
            ]
        ]
        self.assertEqual(result, expected_result)
    def test_system_calls_single_end_experiment(self):
        self.module.subprocess = mock.Mock()

        p = Params()
        p.add(gtf='/path/to/GTF/mock.gtf')
        p.add(feature_counts='/path/to/bin/featureCounts')
        p.add(feature_counts_file_extension='counts')
        p.add(feature_counts_output_dir='/path/to/final/featureCounts')
        p.add(paired_alignment=False)

        s1 = Sample('A', 'X')
        s1.bamfiles = [
            '/path/to/bamdir/A.bam', '/path/to/bamdir/A.primary.bam',
            '/path/to/bamdir/A.primary.dedup.bam'
        ]

        project = Project()
        project.add_parameters(p)
        project.add_samples([s1])

        m = mock.MagicMock(side_effect=[True, True, True])
        path = self.module.os.path
        with mock.patch.object(path, 'isfile', m):
            self.module.execute_counting(project, util_methods)

            calls = [
                mock.call(
                    '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -o /path/to/final/featureCounts/A.counts /path/to/bamdir/A.bam',
                    shell=True),
                mock.call(
                    '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -o /path/to/final/featureCounts/A.primary.counts /path/to/bamdir/A.primary.bam',
                    shell=True),
                mock.call(
                    '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -o /path/to/final/featureCounts/A.primary.dedup.counts /path/to/bamdir/A.primary.dedup.bam',
                    shell=True)
            ]
            self.module.subprocess.check_call.assert_has_calls(calls)

            # check that the sample contains paths to the new count files in the correct locations:
            expected_files = [
                os.path.join('/path/to/final/featureCounts',
                             re.sub('bam', 'counts', os.path.basename(f)))
                for f in s1.bamfiles
            ]
            actual_files = s1.countfiles
            self.assertEqual(actual_files, expected_files)
예제 #7
0
    def test_correct_calls_are_made(self):
        """
		Tests that the correct arguments are passed to the method which calls the DESeq script.
		Mostly tests the path renaming, etc.
		"""
        self.module.call_script = mock.Mock()
        project = Project()
        project.raw_count_matrices = [
            '/path/to/raw_counts/raw_count_matrix.primary.counts',
            '/path/to/raw_counts/raw_count_matrix.primary.dedup.counts'
        ]
        project_params = Params()
        component_params = Params()
        project_params.add(raw_count_matrix_file_prefix='raw_count_matrix')
        project_params.add(feature_counts_file_extension='counts')
        component_params.add(deseq_output_dir='/path/to/final/deseq_dir')
        component_params.add(deseq_script='deseq_original.R')
        project_params.add(sample_annotation_file='/path/to/samples.txt')
        component_params.add(deseq_output_tag='deseq')
        component_params.add(deseq_contrast_flag='_vs_')
        component_params.add(number_of_genes_for_heatmap='30')
        component_params.add(heatmap_file_tag='heatmap.png')

        project.add_parameters(project_params)
        project.contrasts = [('X', 'Y'), ('X', 'Z')]

        # construct the expected call strings:
        call_1 = '/path/to/raw_counts/raw_count_matrix.primary.counts /path/to/samples.txt X Y /path/to/final/deseq_dir/Y_vs_X.primary.deseq /path/to/final/deseq_dir/Y_vs_X.primary.heatmap.png 30'
        call_2 = '/path/to/raw_counts/raw_count_matrix.primary.counts /path/to/samples.txt X Z /path/to/final/deseq_dir/Z_vs_X.primary.deseq /path/to/final/deseq_dir/Z_vs_X.primary.heatmap.png 30'
        call_3 = '/path/to/raw_counts/raw_count_matrix.primary.dedup.counts /path/to/samples.txt X Y /path/to/final/deseq_dir/Y_vs_X.primary.dedup.deseq /path/to/final/deseq_dir/Y_vs_X.primary.dedup.heatmap.png 30'
        call_4 = '/path/to/raw_counts/raw_count_matrix.primary.dedup.counts /path/to/samples.txt X Z /path/to/final/deseq_dir/Z_vs_X.primary.dedup.deseq /path/to/final/deseq_dir/Z_vs_X.primary.dedup.heatmap.png 30'

        m = mock.MagicMock(side_effect=[True, True])
        path = self.module.os.path
        with mock.patch.object(path, 'isfile', m):
            self.module.call_deseq(project, component_params)
            calls = [
                mock.call('deseq_original.R', call_1),
                mock.call('deseq_original.R', call_2),
                mock.call('deseq_original.R', call_3),
                mock.call('deseq_original.R', call_4)
            ]
            self.module.call_script.assert_has_calls(calls)
예제 #8
0
    def test_missing_countfile_raises_exception(self):
        """
		Test one of the files is ok (the first), but the second is not found (for whatever reason).  Test that we throw an exception, 
		and that the one successful call was indeed made correctly.
		"""
        self.module.call_script = mock.Mock()
        project = Project()
        project.raw_count_matrices = [
            '/path/to/raw_counts/raw_count_matrix.primary.counts',
            '/path/to/raw_counts/raw_count_matrix.primary.dedup.counts'
        ]

        project_params = Params()
        component_params = Params()
        project_params.add(raw_count_matrix_file_prefix='raw_count_matrix')
        project_params.add(feature_counts_file_extension='counts')
        component_params.add(deseq_output_dir='/path/to/final/deseq_dir')
        component_params.add(deseq_script='deseq_original.R')
        project_params.add(sample_annotation_file='/path/to/samples.txt')
        component_params.add(deseq_output_tag='deseq')
        component_params.add(deseq_contrast_flag='_vs_')
        component_params.add(number_of_genes_for_heatmap='30')
        component_params.add(heatmap_file_tag='heatmap.png')

        project.add_parameters(project_params)
        project.contrasts = [('X', 'Y'), ('X', 'Z')]

        # construct the expected call strings:
        call_1 = '/path/to/raw_counts/raw_count_matrix.primary.counts /path/to/samples.txt X Y /path/to/final/deseq_dir/Y_vs_X.primary.deseq /path/to/final/deseq_dir/Y_vs_X.primary.heatmap.png 30'
        call_2 = '/path/to/raw_counts/raw_count_matrix.primary.counts /path/to/samples.txt X Z /path/to/final/deseq_dir/Z_vs_X.primary.deseq /path/to/final/deseq_dir/Z_vs_X.primary.heatmap.png 30'

        m = mock.MagicMock(side_effect=[True, False])
        path = self.module.os.path
        with mock.patch.object(path, 'isfile', m):
            with self.assertRaises(
                    self.module.MissingCountMatrixFileException):
                self.module.call_deseq(project, component_params)
            calls = [
                mock.call('deseq_original.R', call_1),
                mock.call('deseq_original.R', call_2)
            ]
            self.module.call_script.assert_has_calls(calls)
    def test_countfile_merging(self):
        """
		This tests that the correct files are used to merge.  The result (a data structure) of the merging is mocked out.
		Tests that the expected data is written to the file and tests that the file ends up in the correct location
		"""

        # a dummy method to mock the reading/concatenating of the data in the individual files
        def mock_read(matrix, f):
            dummy = [['geneA', '0', '100',
                      '200'], ['geneB', '1', '101', '201'],
                     ['geneC', '2', '102', '202']]
            if len(matrix) == 0:
                for k in range(len(dummy)):
                    matrix.append([])

            for i, l in enumerate(dummy):
                matrix[i] = l

        # mock out the actual implementations
        self.module.get_countfile_groupings = mock.Mock()

        self.module.get_countfile_groupings.return_value = [
            [
                '/path/to/final/featureCounts/A.counts',
                '/path/to/final/featureCounts/C.counts',
                '/path/to/final/featureCounts/B.counts'
            ],
            [
                '/path/to/final/featureCounts/A.primary.counts',
                '/path/to/final/featureCounts/C.primary.counts'
            ],
            [
                '/path/to/final/featureCounts/A.primary.dedup.counts',
                '/path/to/final/featureCounts/B.primary.dedup.counts',
                '/path/to/final/featureCounts/C.primary.dedup.counts'
            ]
        ]

        self.module.read = mock_read

        p = Params()
        p.add(raw_count_matrix_file_prefix='merged_counts')

        s1 = Sample('A', 'X')
        s1.countfiles = [
            '/path/to/final/featureCounts/A.primary.counts',
            '/path/to/final/featureCounts/A.counts',
            '/path/to/final/featureCounts/A.primary.dedup.counts'
        ]
        s2 = Sample('B', 'Y')
        s2.countfiles = [
            '/path/to/final/featureCounts/B.counts',
            '/path/to/final/featureCounts/B.primary.dedup.counts',
            '/path/to/final/featureCounts/B.primary.counts'
        ]
        s3 = Sample('C', 'Z')
        s3.countfiles = [
            '/path/to/final/featureCounts/C.counts',
            '/path/to/final/featureCounts/C.primary.counts',
            '/path/to/final/featureCounts/C.primary.dedup.counts'
        ]

        project = Project()
        project.add_parameters(p)
        project.add_samples([s1, s3, s2])

        m = mock.mock_open()
        with mock.patch.object(__builtin__, 'open', m):
            self.module.create_count_matrices(project, mock.Mock())
            m.assert_any_call(
                '/path/to/final/featureCounts/merged_counts.counts', 'w')
            m.assert_any_call(
                '/path/to/final/featureCounts/merged_counts.primary.counts',
                'w')
            m.assert_any_call(
                '/path/to/final/featureCounts/merged_counts.primary.dedup.counts',
                'w')
            handle = m()
            calls = [
                mock.call('Gene\tA\tB\tC\n'),
                mock.call('geneA\t0\t100\t200\n'),
                mock.call('geneB\t1\t101\t201\n'),
                mock.call('geneC\t2\t102\t202\n')
            ] * 3
            handle.write.assert_has_calls(calls)
예제 #10
0
    def test_system_calls_paired_experiment(self):

        mock_process = mock.Mock(name='mock_process')
        mock_process.communicate.return_value = (('', ''))
        mock_process.returncode = 0

        mock_popen = mock.Mock(name='mock_popen')
        mock_popen.return_value = mock_process

        self.module.subprocess = mock.Mock()
        self.module.subprocess.Popen = mock_popen
        self.module.subprocess.STDOUT = ''
        self.module.subprocess.PIPE = ''

        p = Params()
        cp = Params()
        p.add(gtf='/path/to/GTF/mock.gtf')
        cp.add(feature_counts='/path/to/bin/featureCounts')
        cp.add(feature_counts_file_extension='counts')
        cp.add(feature_counts_output_dir='/path/to/final/featureCounts')
        p.add(paired_alignment=True)

        s1 = Sample('A', 'X')
        s1.bamfiles = [
            '/path/to/bamdir/A.bam', '/path/to/bamdir/A.primary.bam',
            '/path/to/bamdir/A.primary.dedup.bam'
        ]

        project = Project()
        project.add_parameters(p)
        project.add_samples([s1])

        m = mock.MagicMock(side_effect=[True, True, True])
        path = self.module.os.path
        with mock.patch.object(path, 'isfile', m):
            self.module.execute_counting(project, cp, util_methods)

            calls = [
                mock.call(
                    '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -p -o /path/to/final/featureCounts/A.counts /path/to/bamdir/A.bam',
                    shell=True,
                    stderr=self.module.subprocess.STDOUT,
                    stdout=self.module.subprocess.PIPE),
                mock.call(
                    '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -p -o /path/to/final/featureCounts/A.primary.counts /path/to/bamdir/A.primary.bam',
                    shell=True,
                    stderr=self.module.subprocess.STDOUT,
                    stdout=self.module.subprocess.PIPE),
                mock.call(
                    '/path/to/bin/featureCounts -a /path/to/GTF/mock.gtf -t exon -g gene_name -p -o /path/to/final/featureCounts/A.primary.dedup.counts /path/to/bamdir/A.primary.dedup.bam',
                    shell=True,
                    stderr=self.module.subprocess.STDOUT,
                    stdout=self.module.subprocess.PIPE)
            ]
            mock_popen.assert_has_calls(calls)

        # check that the sample contains paths to the new count files in the correct locations:
        expected_files = [
            os.path.join('/path/to/final/featureCounts',
                         re.sub('bam', 'counts', os.path.basename(f)))
            for f in s1.bamfiles
        ]
        actual_files = s1.countfiles
        self.assertEqual(actual_files, expected_files)