Exemplo n.º 1
0
def validate_sequencing_run(sequencing_run):
    """
    Validate a SequencingRun object for upload to irida

    If the parser is working as intended (and has proper validation) this should never return
    a ValidationResult with a list of errors. This function should be used when building a model
    from scratch, building a new parser, and as a final redundancy on parsers.

    :param sequencing_run: SequencingRun object to validate
    :return: ValidationResult object with list of errors if any
    """
    validation_result = model.ValidationResult()

    # Validation objects
    v_sequencing_run = Validator(model.SequencingRun.uploadable_schema,
                                 allow_unknown=True)
    v_project = Validator(model.Project.uploadable_schema, allow_unknown=True)
    v_sample = Validator(model.Sample.uploadable_schema, allow_unknown=True)
    v_sequence_file = Validator(model.SequenceFile.uploadable_schema,
                                allow_unknown=True)

    # validation is nested so we can catch multiple levels of project/sample/file errors

    # Validate base SequencingRun Object
    try:
        _validate_object(v_sequencing_run, sequencing_run)

        # Validate projects in sequencing run
        for p in sequencing_run.project_list:
            try:
                _validate_object(v_project, p)

                # Validate samples in project
                for s in p.sample_list:
                    try:
                        _validate_object(v_sample, s)

                        # Validate SequenceFile on Sample
                        _validate_object(v_sequence_file, s.sequence_file)

                        # Validate tricky sequence_file rule
                        _validate_sequence_file_names(s.sequence_file)

                    except model.exceptions.ModelValidationError as e:
                        validation_result.add_error(e)

            except model.exceptions.ModelValidationError as e:
                validation_result.add_error(e)

    except model.exceptions.ModelValidationError as e:
        validation_result.add_error(e)

    return validation_result
Exemplo n.º 2
0
def prepare_and_validate_for_upload(sequencing_run):
    """
    Prepares IRIDA to accept the sequencing run
    Validates that projects exist,
    Creates Samples on Projects on Irida if they do not exist yet

    Collects all errors during prep/validation in ValidationResult

    :param sequencing_run: SequencingRun object
    :return: ValidationResult object with all errors that raised while prepping
    """
    # get api
    api_instance = _get_api_instance()

    validation_result = model.ValidationResult()
    # Start online validation
    logging.debug("Checking existence of projects")
    for project in sequencing_run.project_list:
        logging.debug("Checking existence of project: {}".format(project.id))
        if not api_instance.project_exists(project.id):
            # No project, add error to validation result and continue
            logging.debug("Could not find project: {}".format(project.id))
            err = api.exceptions.IridaResourceError("Project does not exist", project.id)
            validation_result.add_error(err)
            continue
        logging.debug("Project {} exists".format(project.id))

        logging.debug("Checking existence of samples")
        for sample in project.sample_list:
            logging.debug("Checking existence of Sample {} on Project {}".format(sample.sample_name, project.id))
            if api_instance.sample_exists(sample.sample_name, project.id):
                logging.debug("Sample {} exists on Project {}".format(sample.sample_name, project.id))
            else:
                logging.debug("Sample not found, creating new Sample")
                try:
                    api_instance.send_sample(sample, project.id)
                except api.exceptions.IridaResourceError as e:
                    logging.debug("Sample could not be created")
                    validation_result.add_error(e)
                    continue
                logging.debug("Verifying sample was created")
                if not api_instance.sample_exists(sample.sample_name, project.id):
                    logging.debug("Sample was not created")
                    err = api.exceptions.IridaResourceError("Could not create new Sample on Project {}", project.id)
                    validation_result.add_error(err)
                    continue
                logging.debug("Sample Created")

    return validation_result
Exemplo n.º 3
0
    def get_sequencing_run(sample_sheet):
        """
        Does local validation on the integrety of the run directory / sample sheet

        Throws a ValidationError with a valadation result attached if it cannot make a sequencing run

        :param sample_sheet:
        :return: SequencingRun
        """

        # Try to get the sample sheet, validate that the sample sheet is valid
        validation_result = validation.validate_sample_sheet(sample_sheet)
        if not validation_result.is_valid():
            logging.error("Errors occurred while getting sample sheet")
            raise exceptions.ValidationError(
                "Errors occurred while getting sample sheet",
                validation_result)

        # Try to parse the meta data from the sample sheet, throw validation error if errors occur
        validation_result = model.ValidationResult()
        try:
            run_metadata = sample_parser.parse_metadata(sample_sheet)
        except exceptions.SampleSheetError as error:
            validation_result.add_error(error)
            logging.error("Errors occurred while parsing metadata")
            raise exceptions.ValidationError(
                "Errors occurred while parsing metadata", validation_result)

        # Try to build sequencing run from sample sheet & meta data, raise validation error if errors occur
        try:
            sequencing_run = sample_parser.build_sequencing_run_from_samples(
                sample_sheet, run_metadata)
        except exceptions.SequenceFileError as error:
            validation_result.add_error(error)
            logging.error(
                "Errors occurred while building sequence run from sample sheet"
            )
            raise exceptions.ValidationError(
                "Errors occurred while building sequence run from sample sheet",
                validation_result)

        return sequencing_run
Exemplo n.º 4
0
def validate_sample_sheet(sample_sheet_file):

    """
    Checks if the given sample_sheet_file can be parsed
    Requires [Header] because it contains Workflow
    Requires [Data] for creating Sample objects and requires
        Sample_ID, Sample_Name, Sample_Project and Description table headers

    arguments:
            sample_sheet_file -- path to SampleSheet.csv

    returns ValidationResult object - stores list of string error messages
    """

    csv_reader = get_csv_reader(sample_sheet_file)

    v_res = model.ValidationResult()

    all_data_headers_found = False
    data_sect_found = False
    check_data_headers = False

    # status of required data headers
    found_data_headers = {
        "Sample_Name": False,
        "Project_ID": False,
        "File_Forward": False,
        "File_Reverse": False}

    for line in csv_reader:

        if "[Data]" in line:
            data_sect_found = True
            check_data_headers = True  # next line contains data headers

        elif check_data_headers:
            for data_header in found_data_headers.keys():
                if data_header in line:
                    found_data_headers[data_header] = True

            # if all required dataHeaders are found
            if all(found_data_headers.values()):
                all_data_headers_found = True

            check_data_headers = False

    if not all([data_sect_found, all_data_headers_found]):

        if data_sect_found is False:
            v_res.add_error(exceptions.SampleSheetError("[Data] section not found in SampleSheet", sample_sheet_file))

        if all_data_headers_found is False:
            missing_str = ""
            for data_header in found_data_headers:
                if found_data_headers[data_header] is False:
                    missing_str = missing_str + data_header + ", "

            missing_str = missing_str[:-2]  # remove last ", "
            v_res.add_error(exceptions.SampleSheetError("Missing required data header(s): " +
                            missing_str, sample_sheet_file))

    return v_res