예제 #1
0
    def test_trimmomatic_fastqc_start_no_reference_database_paired_end(self):
        """
        Test running the default flow of trimmomatic on paired end input as no
        reference database is provided
        Test running fastqc at the beginning of the workflow
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file, "--input", cfg.fastq_file,
            "--output", tempdir, "--run-fastqc-start"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        basename = utils.file_basename(cfg.fastq_file)

        expected_output_files = [
            os.path.join("fastqc", basename + cfg.fastqc_extensions[0]),
            os.path.join("fastqc", basename + cfg.fastqc_extensions[1]),
            basename + cfg.log_extension,
            basename + cfg.paired_trim_extensions[0],
            basename + cfg.paired_trim_extensions[1]
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(expected_output_files,
                                                      tempdir):
            self.assertTrue(expression, message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
예제 #2
0
    def test_trimmomatic_and_trf_no_reference_database_single_end(self):
        """
        Test running the default flow of trimmomatic on single end input as no
        reference database is provided
        Test with also running trf
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file, "--output", tempdir,
            "--run-trf"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        basename = utils.file_basename(cfg.fastq_file)

        expected_output_files = [
            basename + cfg.log_extension, basename + cfg.single_trim_extension,
            basename + cfg.repeats_removed_extension
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(expected_output_files,
                                                      tempdir):
            self.assertTrue(expression, message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
예제 #3
0
    def test_trf_only_paired_end(self):
        """
        Test running only trf on paired end input
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file, "--input", cfg.fastq_file,
            "--output", tempdir, "--run-trf", "--bypass-trim"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        basename = utils.file_basename(cfg.fastq_file)

        expected_output_files = [
            basename + cfg.log_extension,
            basename + cfg.paired_repeats_removed_extensions[0],
            basename + cfg.paired_repeats_removed_extensions[1]
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(expected_output_files,
                                                      tempdir):
            self.assertTrue(expression, message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
예제 #4
0
    def test_trimmomatic_bowtie2_database_and_trf_single_end_gzipped_input(
            self):
        """
        Test running the default flow of trimmomatic on single end input with
        bowtie2 database provided
        Test with keeping temp files
        Test with TRF
        Test with gzipped input fastq file
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file_gzipped, "--output",
            tempdir, "--reference-db", cfg.bowtie2_db_folder,
            "--store-temp-output", "--run-trf"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        fastq_file_basename = utils.file_basename(cfg.fastq_file_gzipped)
        basename = utils.file_basename(cfg.fastq_file)
        filtered_file_basename = utils.get_filtered_file_basename(
            basename, cfg.bowtie2_db_folder, "bowtie2")

        expected_output_files = [
            fastq_file_basename, basename + cfg.log_extension,
            basename + cfg.single_trim_extension,
            filtered_file_basename + cfg.clean_extension,
            filtered_file_basename + cfg.contaminated_extension,
            filtered_file_basename + cfg.sam_extension,
            basename + cfg.final_extension,
            basename + cfg.repeats_removed_extension
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(expected_output_files,
                                                      tempdir):
            self.assertTrue(expression, message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
예제 #5
0
    def test_trimmomatic_bowtie2_database_fastqc_end_single_end(self):
        """
        Test running the default flow of trimmomatic on single end input with
        bowtie2 database provided
        Test with keeping temp files
        Test running fastqc at the end of the workflow
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file, "--output", tempdir,
            "--reference-db", cfg.bowtie2_db_folder, "--store-temp-output",
            "--run-fastqc-end"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        basename = utils.file_basename(cfg.fastq_file)
        final_basename = utils.file_basename(basename + cfg.final_extension)
        filtered_file_basename = utils.get_filtered_file_basename(
            basename, cfg.bowtie2_db_folder, "bowtie2")

        expected_output_files = [
            os.path.join("fastqc", final_basename + cfg.fastqc_extensions[0]),
            os.path.join("fastqc", final_basename + cfg.fastqc_extensions[1]),
            basename + cfg.log_extension, basename + cfg.single_trim_extension,
            filtered_file_basename + cfg.clean_extension,
            filtered_file_basename + cfg.contaminated_extension,
            filtered_file_basename + cfg.sam_extension,
            basename + cfg.final_extension
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(expected_output_files,
                                                      tempdir):
            self.assertTrue(expression, message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
예제 #6
0
    def test_trimmomatic_bowtie2_database_and_trf_paired_end_remove_intermedite_temp_output(
            self):
        """
        Test running the default flow of trimmomatic on paired end input with a
        bowtie2 database provided
        Test running with remove intermediate temp output files
        Test running trf
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file, "--input", cfg.fastq_file,
            "--output", tempdir, "--reference-db", cfg.bowtie2_db_folder,
            "--run-trf", "--no-discordant"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        basename = basename = utils.file_basename(cfg.fastq_file)
        filtered_file_basename = utils.get_filtered_file_basename(
            basename, cfg.bowtie2_db_folder, "bowtie2", True)

        expected_non_empty_output_files = [
            basename + cfg.log_extension,
            basename + cfg.paired_trim_extensions[0],
            basename + cfg.paired_trim_extensions[1],
            basename + cfg.final_extensions_paired[0],
            basename + cfg.final_extensions_paired[1],
            basename + cfg.paired_repeats_removed_extensions[0],
            basename + cfg.paired_repeats_removed_extensions[1]
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(
                expected_non_empty_output_files, tempdir):
            self.assertTrue(expression, message)

        # add the expected output files which can be empty
        expected_output_files = expected_non_empty_output_files
        expected_output_files += [
            filtered_file_basename + cfg.paired_contaminated_extension[0],
            filtered_file_basename + cfg.paired_contaminated_extension[1]
        ]

        # check there are only three files in the output folder
        actual_output_files = os.listdir(tempdir)
        self.assertEqual(len(actual_output_files), len(expected_output_files))

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
예제 #7
0
    def test_trimmomatic_bowtie2_paired_end_remove_intermedite_temp_output_discordant_trf(
            self):
        """
        Test running the default flow of trimmomatic on paired end input with one
        bowtie2 database provided
        Test running with remove intermediate temp output files
        Test with discordant alignments
        Test with TRF
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file, "--input",
            cfg.fastq_pair_file, "--output", tempdir, "--reference-db",
            cfg.bowtie2_db_folder, "--reference-db", cfg.bowtie2_db_folder,
            "--run-trf"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        basename = basename = utils.file_basename(cfg.fastq_file)
        filtered_file_basename = utils.get_filtered_file_basename(
            basename, cfg.bowtie2_db_folder, "bowtie2")

        expected_non_empty_output_files = [
            basename + cfg.log_extension,
            basename + cfg.paired_trim_extensions[0],
            basename + cfg.paired_trim_extensions[1],
            basename + cfg.final_extensions_paired[0],
            basename + cfg.final_extensions_paired[1],
            basename + cfg.paired_repeats_removed_extensions[0],
            basename + cfg.paired_repeats_removed_extensions[1]
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(
                expected_non_empty_output_files, tempdir):
            self.assertTrue(expression, message)

        # check there are the expected number of files in the output folder
        actual_output_files = list(
            filter(
                os.path.getsize,
                [os.path.join(tempdir, file) for file in os.listdir(tempdir)]))
        self.assertEqual(len(actual_output_files),
                         len(expected_non_empty_output_files))

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
예제 #8
0
    def test_trimmomatic_bowtie2_two_databases_paired_end_serial(self):
        """
        Test running the default flow of trimmomatic on paired end input with two
        bowtie2 database provided (both with the same name)
        Test running in serial alignment mode
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file, "--input", cfg.fastq_file,
            "--output", tempdir, "--reference-db", cfg.bowtie2_db_folder,
            "--reference-db", cfg.bowtie2_db_folder, "--no-discordant",
            "--serial"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        basename = basename = utils.file_basename(cfg.fastq_file)
        filtered_file_basename = utils.get_filtered_file_basename(
            basename, cfg.bowtie2_db_folder, "bowtie2")

        expected_non_empty_output_files = [
            basename + cfg.log_extension,
            basename + cfg.paired_trim_extensions[0],
            basename + cfg.paired_trim_extensions[1],
            basename + cfg.final_extensions_paired[0],
            basename + cfg.final_extensions_paired[1]
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(
                expected_non_empty_output_files, tempdir):
            self.assertTrue(expression, message)

        # add the expected output files which can be empty
        expected_output_files = expected_non_empty_output_files
        expected_output_files += [
            filtered_file_basename + cfg.paired_contaminated_extension[0],
            filtered_file_basename + cfg.paired_contaminated_extension[1]
        ]

        # check there are at least the main expected files in the output folder
        actual_output_files = os.listdir(tempdir)
        self.assertGreater(len(actual_output_files),
                           len(expected_output_files))

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
예제 #9
0
    def test_bowtie2_only_single_end(self):
        """
        Test on single end input with bowtie2 database provided
        Test with keeping temp files
        Test bypassing trim step
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file, "--output", tempdir,
            "--reference-db", cfg.bowtie2_db_folder, "--store-temp-output",
            "--bypass-trim"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        basename = utils.file_basename(cfg.fastq_file)
        filtered_file_basename = utils.get_filtered_file_basename(
            basename, cfg.bowtie2_db_folder, "bowtie2")

        expected_output_files = [
            basename + cfg.log_extension,
            filtered_file_basename + cfg.clean_extension,
            filtered_file_basename + cfg.contaminated_extension,
            filtered_file_basename + cfg.sam_extension,
            basename + cfg.final_extension
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(expected_output_files,
                                                      tempdir):
            self.assertTrue(expression, message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)