def prepare_and_validate_for_upload(sequencing_run): """ Prepares IRIDA to accept the sequencing run Validates that projects exist, Creates Samples on Projects on Irida if they do not exist yet Collects all errors during prep/validation in ValidationResult :param sequencing_run: SequencingRun object :return: ValidationResult object with all errors that raised while prepping """ # get api api_instance = _get_api_instance() validation_result = model.ValidationResult() # Start online validation logging.debug("Checking existence of projects") for project in sequencing_run.project_list: logging.debug("Checking existence of project: {}".format(project.id)) if not api_instance.project_exists(project.id): # No project, add error to validation result and continue logging.debug("Could not find project: {}".format(project.id)) err = api.exceptions.IridaResourceError("Project does not exist", project.id) validation_result.add_error(err) continue logging.debug("Project {} exists".format(project.id)) logging.debug("Checking existence of samples") for sample in project.sample_list: logging.debug( "Checking existence of Sample {} on Project {}".format( sample.sample_name, project.id)) if api_instance.sample_exists(sample.sample_name, project.id): logging.debug("Sample {} exists on Project {}".format( sample.sample_name, project.id)) else: logging.debug("Sample not found, creating new Sample") try: api_instance.send_sample(sample, project.id) except api.exceptions.IridaResourceError as e: logging.debug("Sample could not be created") validation_result.add_error(e) continue except api.exceptions.IridaConnectionError as e: logging.debug("Sample could not be created") validation_result.add_error(e) continue logging.debug("Verifying sample was created") if not api_instance.sample_exists(sample.sample_name, project.id): logging.debug("Sample was not created") err = api.exceptions.IridaResourceError( "Could not create new Sample on Project {}", project.id) validation_result.add_error(err) continue logging.debug("Sample Created") return validation_result
def validate_sequencing_run(sequencing_run): """ Validate a SequencingRun object for upload to irida If the parser is working as intended (and has proper validation) this should never return a ValidationResult with a list of errors. This function should be used when building a model from scratch, building a new parser, and as a final redundancy on parsers. :param sequencing_run: SequencingRun object to validate :return: ValidationResult object with list of errors if any """ validation_result = model.ValidationResult() # Validation objects v_sequencing_run = Validator(model.SequencingRun.uploadable_schema, allow_unknown=True) v_project = Validator(model.Project.uploadable_schema, allow_unknown=True) v_sample = Validator(model.Sample.uploadable_schema, allow_unknown=True) v_sequence_file = Validator(model.SequenceFile.uploadable_schema, allow_unknown=True) # validation is nested so we can catch multiple levels of project/sample/file errors # Validate base SequencingRun Object try: _validate_object(v_sequencing_run, sequencing_run) # Validate projects in sequencing run for p in sequencing_run.project_list: try: _validate_object(v_project, p) # Validate samples in project for s in p.sample_list: try: _validate_object(v_sample, s) # Validate SequenceFile on Sample _validate_object(v_sequence_file, s.sequence_file) # Validate tricky sequence_file rule _validate_sequence_file_names(s.sequence_file) except model.exceptions.ModelValidationError as e: validation_result.add_error(e) except model.exceptions.ModelValidationError as e: validation_result.add_error(e) except model.exceptions.ModelValidationError as e: validation_result.add_error(e) return validation_result
def get_sequencing_run(sample_sheet, run_data_directory_file_list=None): """ Does local validation on the integrity of the run directory / sample sheet Throws a ValidationError with a validation result attached if it cannot make a sequencing run :param sample_sheet: :return: SequencingRun """ # get file list validation_result = model.ValidationResult() try: if run_data_directory_file_list is None: data_dir = os.path.dirname(sample_sheet) run_data_directory_file_list = common.get_file_list(data_dir) except exceptions.DirectoryError as error: validation_result.add_error(error) logging.error("Errors occurred while parsing files") raise exceptions.ValidationError( "Errors occurred while parsing files", validation_result) # Try to get the sample sheet, validate that the sample sheet is valid validation_result = validation.validate_sample_sheet(sample_sheet) if not validation_result.is_valid(): logging.error("Errors occurred while getting sample sheet") raise exceptions.ValidationError( "Errors occurred while getting sample sheet", validation_result) # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur try: sample_list = sample_parser.parse_sample_list( sample_sheet, run_data_directory_file_list) run_metadata = sample_parser.parse_metadata(sample_list) sequencing_run = common.build_sequencing_run_from_samples( sample_list, run_metadata) except exceptions.SequenceFileError as error: validation_result.add_error(error) logging.error( "Errors occurred while building sequence run from sample sheet" ) raise exceptions.ValidationError( "Errors occurred while building sequence run from sample sheet", validation_result) return sequencing_run
def get_sequencing_run(sample_sheet): """ Does local validation on the integrety of the run directory / sample sheet Throws a ValidationError with a valadation result attached if it cannot make a sequencing run :param sample_sheet: :return: SequencingRun """ # Try to get the sample sheet, validate that the sample sheet is valid validation_result = validation.validate_sample_sheet(sample_sheet) if not validation_result.is_valid(): logging.error("Errors occurred while getting sample sheet") raise exceptions.ValidationError( "Errors occurred while getting sample sheet", validation_result) # Try to parse the meta data from the sample sheet, throw validation error if errors occur validation_result = model.ValidationResult() try: run_metadata = sample_parser.parse_metadata(sample_sheet) except exceptions.SampleSheetError as error: validation_result.add_error(error) logging.error("Errors occurred while parsing metadata") raise exceptions.ValidationError( "Errors occurred while parsing metadata", validation_result) # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur try: sequencing_run = sample_parser.build_sequencing_run_from_samples( sample_sheet, run_metadata) except exceptions.SequenceFileError as error: validation_result.add_error(error) logging.error( "Errors occurred while building sequence run from sample sheet" ) raise exceptions.ValidationError( "Errors occurred while building sequence run from sample sheet", validation_result) return sequencing_run
def validate_file_size_minimum(sequencing_run): """ Validate the files in a SequencingRun object have the minimum file size requirement from the config :param sequencing_run: SequencingRun object to validate :return: ValidationResult object with list of errors if any """ minimum_file_size = config.read_config_option("minimum_file_size", int, 0) validation_result = model.ValidationResult() for p in sequencing_run.project_list: for s in p.sample_list: # do validation of file size if not _file_size_is_valid(s.sequence_file, minimum_file_size): error_msg = "File size for sample `{}`is smaller than configured minimum of `{} KB`. " \ "Please verify your data.".format(s.sample_name, minimum_file_size) validation_result.add_error( FileSizeError(error_msg, s.sequence_file)) return validation_result
def get_sequencing_run(self, sample_sheet, run_data_directory=None, run_data_directory_file_list=None): """ Does local validation on the integrety of the run directory / sample sheet Throws a ValidationError with a valadation result attached if it cannot make a sequencing run :param sample_sheet: Sample Sheet File :param run_data_directory: Optional: Directory (including run directory) to data files. Can be provided for bypassing os calls when developing on cloud systems :param run_data_directory_file_list: Optional: List of files in data directory. Can be provided for bypassing os calls when developing on cloud systems :return: SequencingRun """ # get data directory and file list validation_result = model.ValidationResult() try: if run_data_directory is None: run_data_directory = Parser.get_full_data_directory( sample_sheet) if run_data_directory_file_list is None: run_data_directory_file_list = common.get_file_list( run_data_directory) except exceptions.DirectoryError as error: validation_result.add_error(error) logging.error("Errors occurred while parsing files") raise exceptions.ValidationError( "Errors occurred while parsing files", validation_result) # Try to get the sample sheet, validate that the sample sheet is valid validation_result = validation.validate_sample_sheet(sample_sheet) if not validation_result.is_valid(): logging.error("Errors occurred while getting sample sheet") raise exceptions.ValidationError( "Errors occurred while getting sample sheet", validation_result) # Try to parse the meta data from the sample sheet, throw validation error if errors occur validation_result = model.ValidationResult() try: run_metadata = sample_parser.parse_metadata(sample_sheet) except exceptions.SampleSheetError as error: validation_result.add_error(error) logging.error("Errors occurred while parsing metadata") raise exceptions.ValidationError( "Errors occurred while parsing metadata", validation_result) # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur try: sample_list = sample_parser.parse_sample_list( sample_sheet, run_data_directory, run_data_directory_file_list) sequencing_run = common.build_sequencing_run_from_samples( sample_list, run_metadata, self.get_parser_type_name()) except exceptions.SequenceFileError as error: validation_result.add_error(error) logging.error( "Errors occurred while building sequence run from sample sheet" ) raise exceptions.ValidationError( "Errors occurred while building sequence run from sample sheet", validation_result) return sequencing_run
def validate_sample_sheet(sample_sheet_file): """ Checks if the given sample_sheet_file can be parsed Requires [Header] because it contains Workflow Requires [Data] for creating Sample objects and requires Sample_ID, Sample_Name, Sample_Project and Description table headers arguments: sample_sheet_file -- path to SampleSheet.csv returns ValidationResult object - stores list of string error messages """ csv_reader = common.get_csv_reader(sample_sheet_file) v_res = model.ValidationResult() all_data_headers_found = False data_sect_found = False check_data_headers = False # status of required data headers found_data_headers = { "Sample_Name": False, "Project_ID": False, "File_Forward": False, "File_Reverse": False } for line in csv_reader: if "[Data]" in line: data_sect_found = True check_data_headers = True # next line contains data headers elif check_data_headers: for data_header in found_data_headers.keys(): if data_header in line: found_data_headers[data_header] = True # if all required dataHeaders are found if all(found_data_headers.values()): all_data_headers_found = True check_data_headers = False if not all([data_sect_found, all_data_headers_found]): if data_sect_found is False: v_res.add_error( exceptions.SampleSheetError( "[Data] section not found in SampleSheet", sample_sheet_file)) if all_data_headers_found is False: missing_str = "" for data_header in found_data_headers: if found_data_headers[data_header] is False: missing_str = missing_str + data_header + ", " missing_str = missing_str[:-2] # remove last ", " v_res.add_error( exceptions.SampleSheetError( "Missing required data header(s): " + missing_str, sample_sheet_file)) return v_res