Пример #1
0
    def test_space_in_sample_name(self):
        directory = path.join(path_to_module, "ngs_space_in_sample_name")
        data_dir = path.join(directory, parsers.miniseq.Parser.get_relative_data_directory())
        data_dir = data_dir.replace("*", "some_dir")
        file_list = parsers.common.get_file_list(data_dir)
        file_path = path.join(directory, "SampleSheet.csv")

        # Just making sure this doesn't throw an error
        sample_parser.parse_sample_list(sample_sheet_file=file_path, run_data_directory=data_dir, run_data_directory_file_list=file_list)
Пример #2
0
    def test_build_valid_with_description_field(self):
        """
        When given a valid directory, ensure a valid SequencingRun is built with Projects, Samples, ect
        :return:
        """
        directory = path.join(path_to_module, "iseq_with_desc_field")
        sheet_file = path.join(directory, "SampleSheet.csv")
        meta_data = sample_parser.parse_metadata(sheet_file)
        data_dir = path.join(directory, parsers.miniseq.Parser.get_relative_data_directory())
        data_dir = data_dir.replace("*", "some_dir")
        file_list = parsers.common.get_file_list(data_dir)

        sample_list = sample_parser.parse_sample_list(sample_sheet_file=sheet_file,
                                                      run_data_directory=data_dir,
                                                      run_data_directory_file_list=file_list)
        sequencing_run = parsers.common.build_sequencing_run_from_samples(sample_list, meta_data)

        # Returns a SequencingRun
        self.assertEqual(type(sequencing_run), model.SequencingRun)
        # Includes a single project
        self.assertEqual(len(sequencing_run.project_list), 1)
        # is of type Project
        self.assertEqual(type(sequencing_run.project_list[0]), model.Project)
        # Project has 3 samples
        self.assertEqual(len(sequencing_run.project_list[0].sample_list), 3)
        # samples are of type Sample
        self.assertEqual(type(sequencing_run.project_list[0].sample_list[0]), model.Sample)
        # samples have correct description
        self.assertEqual(sequencing_run.project_list[0].sample_list[0].description, "desc1")
        # samples have SequenceFile
        self.assertEqual(type(sequencing_run.project_list[0].sample_list[0].sequence_file), model.SequenceFile)
Пример #3
0
    def test_valid(self):
        """
        Ensure a a parsed valid directory matches the expected sample list
        :return:
        """
        directory = path.join(path_to_module, "fake_ngs_data")
        sheet_file = path.join(directory, "SampleSheet.csv")
        data_dir = path.join(
            directory, parsers.miniseq.Parser.get_relative_data_directory())
        data_dir = data_dir.replace("*", "some_dir")
        file_list = parsers.common.get_file_list(data_dir)

        sample = model.Sample(
            "01-1111", "", 1, {
                "index": "AAAAAAAA",
                "I7_Index_ID": "N01",
                "sample_project": "6",
                "sequencer_sample_ID": "01-1111-4004",
                "I5_Index_ID": "S01",
                "index2": "TTTTTTTT"
            })

        sequence_file_properties = {
            "sequencer_sample_ID": "01-1111-4004",
            "index": "AAAAAAAA",
            "I7_Index_ID": "N01",
            "I5_Index_ID": "S01",
            "index2": "TTTTTTTT",
            "description": ""
        }

        file_path_1 = path.join(path_to_module, "fake_ngs_data", "Alignment_1",
                                "some_dir", "Fastq",
                                "01-1111_S1_L001_R1_001.fastq.gz")
        file_path_2 = path.join(path_to_module, "fake_ngs_data", "Alignment_1",
                                "some_dir", "Fastq",
                                "01-1111_S1_L001_R2_001.fastq.gz")
        raw_file_list = [file_path_1, file_path_2]

        res = sample_parser.parse_sample_list(
            sample_sheet_file=sheet_file,
            run_data_directory=data_dir,
            run_data_directory_file_list=file_list)

        # Check sample is the same
        self.assertEqual(res[0].get_uploadable_dict(),
                         sample.get_uploadable_dict())
        # Check sequencing file is correct
        self.assertEqual(res[0].sequence_file.properties_dict,
                         sequence_file_properties)
        self.assertEqual(res[0].sequence_file.file_list.sort(),
                         raw_file_list.sort())
Пример #4
0
    def test_not_valid_pf_list(self):
        """
        The file list in the sample sheet is invalid
        :return:
        """
        directory = path.join(path_to_module, "ngs_not_valid_pf_list")
        data_dir = path.join(directory, parsers.miniseq.Parser.get_relative_data_directory())
        data_dir = data_dir.replace("*", "some_dir")
        file_list = parsers.common.get_file_list(data_dir)
        file_path = path.join(directory, "SampleSheet.csv")

        with self.assertRaises(SequenceFileError):
            res = sample_parser.parse_sample_list(sample_sheet_file=file_path, run_data_directory=data_dir, run_data_directory_file_list=file_list)
Пример #5
0
    def get_sequencing_run(sample_sheet,
                           run_data_directory=None,
                           run_data_directory_file_list=None):
        """
        Does local validation on the integrity of the run directory / sample sheet

        Throws a ValidationError with a validation result attached if it cannot make a sequencing run

        :param sample_sheet: Sample Sheet File
        :param run_data_directory: Optional: Directory (including run directory) to data files.
                                   Can be provided for bypassing os calls when developing on cloud systems
        :param run_data_directory_file_list: Optional: List of files in data directory.
                                             Can be provided for bypassing os calls when developing on cloud systems
        :return: SequencingRun
        """

        # get data directory and file list
        validation_result = model.ValidationResult()

        try:
            if run_data_directory is None:
                run_data_directory = Parser.get_full_data_directory(
                    sample_sheet)
            if run_data_directory_file_list is None:
                run_data_directory_file_list = common.get_file_list(
                    run_data_directory)
        except exceptions.DirectoryError as error:
            validation_result.add_error(error)
            logging.error("Errors occurred while parsing files")
            raise exceptions.ValidationError(
                "Errors occurred while parsing files", validation_result)

        # Try to get the sample sheet, validate that the sample sheet is valid
        validation_result = validation.validate_sample_sheet(sample_sheet)
        if not validation_result.is_valid():
            logging.error("Errors occurred while getting sample sheet")
            raise exceptions.ValidationError(
                "Errors occurred while getting sample sheet",
                validation_result)

        # Try to parse the meta data from the sample sheet, throw validation error if errors occur
        validation_result = model.ValidationResult()
        try:
            run_metadata = sample_parser.parse_metadata(sample_sheet)
        except exceptions.SampleSheetError as error:
            validation_result.add_error(error)
            logging.error("Errors occurred while parsing metadata")
            raise exceptions.ValidationError(
                "Errors occurred while parsing metadata", validation_result)

        # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur
        try:
            sample_list = sample_parser.parse_sample_list(
                sample_sheet, run_data_directory, run_data_directory_file_list)
            sequencing_run = common.build_sequencing_run_from_samples(
                sample_list, run_metadata)
        except exceptions.SequenceFileError as error:
            validation_result.add_error(error)
            logging.error(
                "Errors occurred while building sequence run from sample sheet"
            )
            raise exceptions.ValidationError(
                "Errors occurred while building sequence run from sample sheet",
                validation_result)

        return sequencing_run