Exemplo n.º 1
0
def organism_part_sex_check(sample: Dict, animal: Dict,
                            existing_results: VRR) -> VRR:
    """
    Context validation to check organism part matches sex, i.e. s***n only from male animal
    For annotated with unknown sex, a Warning will be raised
    :param sample: the sample record
    :param animal: the derived from animal record
    :param existing_results: the existing validation result
    :return: the updated validation result
    """
    sex: str = animal['attributes']['Sex'][0]['value']
    organism_part_ontology = misc.extract_ontology_id_from_iri(
        sample['attributes']['Organism part'][0]['terms'][0]['url'])
    if organism_part_ontology == 'UBERON_0001968':  #s***n
        if sex.lower() == "female":
            existing_results.add_validation_result_column(
                VRC(
                    VRConstant.ERROR,
                    "Organism part (S***n) could not be taken from a female animal",
                    existing_results.record_id, "organism part",
                    VRConstant.CONTEXT))
        # the third sex opiton is 'record of unknown sex'
        elif 'unknown sex' in sex.lower():
            existing_results.add_validation_result_column(
                VRC(
                    VRConstant.WARNING,
                    "Organism part (S***n) is expected to be taken from a male animal, "
                    "please check the sex value (record of unknown sex) is correct",
                    existing_results.record_id, "organism part",
                    VRConstant.CONTEXT))
    return existing_results
Exemplo n.º 2
0
    def validate(self, record: Dict, id_field: str = 'Data source ID') -> VRR:
        """
        Validate the record with the full ruleset
        :param record: the record data
        :param id_field: the name of the id field, in IMAGE ruleset it is Data source ID
        :return: the validation result
        """
        logger.debug(f"got record: {record}, id_field: {id_field}")
        attributes = record['attributes']
        record_id = attributes[id_field][0]['value']
        record_result = VRR(record_id)

        unmapped = attributes.copy(
        )  # create a copy and remove the ruleset-mapped columns
        del unmapped[id_field]
        for section_name in self.get_all_section_names():
            logger.debug(f"Processing section_name: {section_name}")
            section_rule = self.get_section_by_name(section_name)
            # logger.debug("Got section_rule: %s" % (section_rule.toJSON()))
            if section_rule.meet_condition(record):
                logger.debug("Applying " + section_name +
                             " ruleset to record " + record_id)
                section_results = section_rule.validate(
                    attributes, record_id, id_field)
                for one in section_results:
                    record_result.add_validation_result_column(one)

                for field_name in section_rule.get_rule_names():
                    if field_name in unmapped:
                        del unmapped[field_name]

            else:
                logger.debug("section_rule %s doesn't meet_condition" %
                             section_name)

        # unmapped column check can only be done here, not in section rule
        # validation as all section rules need to apply
        if unmapped:
            logger.debug("found those unmapped keys: %s" % (unmapped.keys()))

            for key in unmapped.keys():
                record_result.add_validation_result_column(
                    VRC(VRConstants.WARNING,
                        f"Column {key} could not be found in ruleset",
                        record_id, key))
        else:
            logger.debug("No unmapped columns left")

        return record_result
Exemplo n.º 3
0
def species_check(record: Dict, existing_results: VRR) -> VRR:
    """
    Context validation to check when species specified in the USI structure matches the species field
    :param record: the record data
    :param existing_results: the existing validation result
    :return: the updated validation result
    """
    taxon_id = record['taxonId']
    url = record['attributes'][SPECIES][0]['terms'][0]['url']
    if not url.endswith(str(taxon_id)):
        existing_results.add_validation_result_column(
            VRC(
                VRConstant.ERROR,
                f"taxonId {taxon_id} does not match ontology term used in species {url}",
                existing_results.record_id, "taxonomy", VRConstant.CONTEXT))
    return existing_results
Exemplo n.º 4
0
    def test_sample_relationship_issue(self):
        """Testing an error with related alias. Not sure if it can happen or
        not"""

        # get record from sample
        record = self.sample_record

        # change alias in relationship in order to have no a related obj
        record["sampleRelationships"] = [{
            "alias": "IMAGEA999999999",
            "relationshipNature": "derived from"
        }]

        # create a fake ValidationResultRecord
        record_result = ValidationResultRecord(record_id=record['title'])

        # check relationship method
        related, result = self.metadata.check_relationship(
            record, record_result)

        # this is an error in results
        self.assertEqual(related, [])
        self.assertEqual(result.get_overall_status(), 'Error')
        self.assertIn("Could not locate the referenced record",
                      result.get_messages()[0])
Exemplo n.º 5
0
def check_value_equal(source: Dict, target: Dict, existing_results: VRR,
                      field: str) -> VRR:
    target_field_value = target['attributes'][field][0]['value']
    source_field_value = source['attributes'][field][0]['value']
    source_label = 'sample'
    target_label = 'related animal'
    if source['attributes']['Material'][0]['value'] == 'organism':
        source_label = 'child'
        target_label = 'parent'

    if target_field_value != source_field_value:
        record_id = existing_results.record_id
        existing_results.add_validation_result_column(
            VRC(
                VRConstant.ERROR,
                f"The {field} of {source_label} ({source_field_value}) does not "
                f"match to the {field} of {target_label} ({target_field_value})",
                record_id, field, VRConstant.CONTEXT))
    return existing_results
Exemplo n.º 6
0
def context_validation(record: Dict,
                       existing_results: VRR,
                       related: List = None) -> VRR:
    """
    do validation based on context, i.e. value in one field affects allowed values in another field
    or involve more than one record
    :param record: the record data
    :param existing_results: the existing validation result
    :param related: list of the related records either parents or related animal, could be empty list
    :return: updated validation result
    """
    existing_results = coordinate_check(record['attributes'], existing_results)
    existing_results = species_check(record, existing_results)
    record_id = existing_results.record_id
    # existing related records, i.e. having relationships
    if related:
        material = record['attributes']['Material'][0]['value']
        if material == "organism":
            if len(related) > 2:
                existing_results.add_validation_result_column(
                    VRC(
                        VRConstant.ERROR,
                        "Having more than 2 parents defined in sampleRelationships",
                        existing_results.record_id, "sampleRelationships",
                        VRConstant.CONTEXT))
            else:
                existing_results = child_of_check(record, related,
                                                  existing_results)
                if len(related) == 2:
                    existing_results = parents_sex_check(
                        related, existing_results)
        else:
            if len(related) != 1:
                existing_results.add_validation_result_column(
                    VRC(VRConstant.ERROR,
                        "Specimen can only derive from one animal", record_id,
                        "sampleRelationships", VRConstant.CONTEXT))
            else:
                existing_results = animal_sample_check(record, related[0],
                                                       existing_results)

    return existing_results
Exemplo n.º 7
0
def species_breed_check(animal: Dict, existing_results: VRR) -> VRR:
    """
    check whether mapped breed (recommended) matches species
    if mapped breed not found, gives a warning saying no check has been carried out on supplied breed (mandatory)
    :param animal: the animal record to be validated
    :param existing_results: the existing validation result
    :return: the updated validation result
    """
    attrs = animal['attributes']
    # get root breed ontology term based on given species
    species = attrs[SPECIES][0]['value']
    general_breed_from_species: str = use_ontology.get_general_breed_by_species(
        species)
    general_breed_term = general_breed_from_species['ontologyTerms'].rsplit(
        "/", 1)[1]
    if 'Mapped breed' in attrs:
        mapped_breed = attrs['Mapped breed'][0]['terms'][0]['url']
        match = static_parameters.ontology_library.has_parent(
            mapped_breed, general_breed_term)
        if not match:
            general_crossbreed_from_species = use_ontology.get_general_breed_by_species(
                species, cross=True)
            general_crossbreed_term = general_crossbreed_from_species[
                'ontologyTerms'].rsplit("/", 1)[1]
            match = static_parameters.ontology_library.has_parent(
                mapped_breed, general_crossbreed_term)
            if not match:
                existing_results.add_validation_result_column(
                    VRC(
                        VRConstant.ERROR,
                        f"The mapped breed {mapped_breed} does not match the given species {species}",
                        existing_results.record_id, "Mapped breed",
                        VRConstant.CONTEXT))
    else:
        existing_results.add_validation_result_column(
            VRC(
                VRConstant.WARNING,
                f"No check has been carried out on whether "
                f"{attrs['Supplied breed'][0]['value']} is a {species} breed as no mapped breed provided",
                existing_results.record_id, "Supplied breed",
                VRConstant.CONTEXT))
    return existing_results
Exemplo n.º 8
0
 def load_ruleset(self, ruleset_file: str) -> VRR:
     """
     Load the ruleset from the JSON file and check the integrity of the ruleset,
     if successful, set ruleset ready flag
     if not, the results are stored in the class field general_errors
     :param ruleset_file: the JSON file containing the ruleset
     """
     self.ruleset_pass_flag = False
     general_errors = VRR("general")
     try:
         self.ruleset = validation.read_in_ruleset(ruleset_file)
     except KeyError as e:
         general_errors.add_validation_result_column(
             VRC(VRConstants.ERROR, str(e), general_errors.record_id, "",
                 VRConstants.GENERAL))
         return general_errors
     ruleset_check_result: VRR = validation.check_ruleset(self.ruleset)
     if ruleset_check_result.get_overall_status() != "Pass":
         return ruleset_check_result
     logger.info("Ruleset loaded")
     self.ruleset_pass_flag = True
     return general_errors
Exemplo n.º 9
0
def parents_sex_check(related: List[Dict], existing_results: VRR) -> VRR:
    """
    Context validation to check whether the two annotated parents have two different genders
    For annotated with unknown sex, a Warning will be raised
    :param related: the list of two parent animals
    :param existing_results: the existing validation result
    :return: the updated validation result
    """
    one_sex: str = related[0]['attributes']['Sex'][0]['value']
    another_sex: str = related[1]['attributes']['Sex'][0]['value']
    unknown_flag = False
    if "unknown sex" in one_sex.lower() or "unknown sex" in another_sex.lower(
    ):
        unknown_flag = True
        existing_results.add_validation_result_column(
            VRC(
                VRConstant.WARNING,
                "At least one parent has unknown value for sex, thus could not be checked",
                existing_results.record_id, "parents sex", VRConstant.CONTEXT))
    if not unknown_flag and one_sex == another_sex:
        existing_results.add_validation_result_column(
            VRC(VRConstant.ERROR, "Two parents could not have same sex",
                existing_results.record_id, "parents sex", VRConstant.CONTEXT))
    return existing_results
Exemplo n.º 10
0
def coordinate_check(record: Dict, existing_results: VRR) -> VRR:
    """
    Context validation to check whether value in the place field matches to the value in the accuracy field
    :param record: the record data
    :param existing_results: the existing validation result
    :return: the updated validation result
    """
    if type(record) is not dict:
        raise TypeError("record needs to be a record represented as a Dict")
    if type(existing_results) is not VRR:
        raise TypeError(
            "The existing results parameter needs to be a ValidationResultRecord object"
        )
    material = record['Material'][0]['value']
    if material == "organism":
        place_field_name = "Birth location"
    else:
        place_field_name = "Collection place"
    place_accuracy_field_name = place_field_name + " accuracy"
    if place_field_name not in record:
        if record[place_accuracy_field_name][0][
                'value'] != "missing geographic information":
            msg = f"No value provided for field {place_field_name} but value in field" \
                f" {place_accuracy_field_name} is not missing geographic information"
            existing_results.add_validation_result_column(
                VRC(VRConstant.ERROR, msg, existing_results.record_id,
                    place_field_name, VRConstant.CONTEXT))
    else:
        if record[place_accuracy_field_name][0][
                'value'] == "missing geographic information":
            msg = f"Value {record[place_field_name][0]['value']} provided for field {place_field_name} " \
                f"but value in field {place_accuracy_field_name} is missing geographic information"
            existing_results.add_validation_result_column(
                VRC(VRConstant.ERROR, msg, existing_results.record_id,
                    place_field_name, VRConstant.CONTEXT))
    return existing_results
Exemplo n.º 11
0
 def load_data(self, data_file: str, section: str = '') -> VRR:
     """
     Load the data from JSON file which is to be validated and
     do preliminary validation (usi structure and duplicate), if successful set data ready flag
     The preliminary validation results are stored in the general_errors class field
     :param data_file: the JSON file contains the data
     :param section: optional, the name of the section which contains data
     """
     self.data_ready_flag = False
     general_errors = VRR("general")
     try:
         with open(data_file) as infile:
             self.data = json.load(infile)
     except FileNotFoundError:
         msg = f"Could not find the file {data_file}"
         general_errors.add_validation_result_column(
             VRC(VRConstants.ERROR, msg, general_errors.record_id, "",
                 VRConstants.GENERAL))
         return general_errors
     except json.decoder.JSONDecodeError:
         msg = f"The provided file {data_file} is not a valid JSON file."
         general_errors.add_validation_result_column(
             VRC(VRConstants.ERROR, msg, general_errors.record_id, "",
                 VRConstants.GENERAL))
         return general_errors
     if len(section) > 0:
         if section in self.data:
             self.data = self.data[section]
     # check usi structure
     usi_check_result = validation.check_usi_structure(self.data)
     if usi_check_result.get_overall_status() != "Pass":
         return usi_check_result
     # check duplicate id
     msgs = validation.check_duplicates(self.data, self.id_field)
     if msgs:
         for msg in msgs:
             # classify the error as ruleset based error
             # as it is implicitly required that id field holds unique values
             general_errors.add_validation_result_column(
                 VRC(VRConstants.ERROR, msg, general_errors.record_id,
                     self.id_field, VRConstants.RELATIONSHIP))
         return general_errors
     logger.info("All sample records have unique data source ids")
     self.data_ready_flag = True
     return general_errors
Exemplo n.º 12
0
    def check_biosample_id(self, mock_get, status_code):
        """Base method for checking biosample id"""

        # paching response
        response = Mock()
        response.status_code = status_code
        mock_get.return_value = response

        # create a fake ValidationResultRecord
        record_result = ValidationResultRecord(record_id="test")

        # get a metadata object
        metadata = MetaDataValidation()

        # check biosample object
        record_result = metadata.check_biosample_id_target(
            "FAKEA123456", "test", record_result)

        # assert my methods called
        self.assertTrue(self.check_ruleset.called)
        self.assertTrue(self.read_in_ruleset.called)
        self.assertTrue(mock_get.called)

        return record_result
Exemplo n.º 13
0
    def test_validate_submission_errors(self, my_validate, my_check):
        """A submission with errors is a NEED_REVISION submission"""

        # setting check_usi_structure result. now is a ValidateResultRecord
        result = PickableMock()
        result.get_overall_status.return_value = "Pass"
        result.get_messages.return_value = []
        my_check.return_value = result

        # setting a return value for check_with_ruleset
        result1 = ValidationResultRecord("animal_1")
        result1.add_validation_result_column(
            ValidationResultColumn("warning", "warn message", "animal_1",
                                   "warn column"))

        result2 = ValidationResultRecord("animal_2")
        result2.add_validation_result_column(
            ValidationResultColumn("pass", "a message", "animal_2", ""))

        result3 = ValidationResultRecord("animal_3")
        result3.add_validation_result_column(
            ValidationResultColumn("pass", "a message", "animal_3", ""))

        result4 = ValidationResultRecord("sample_1")
        result4.add_validation_result_column(
            ValidationResultColumn("error", "error message", "sample_1",
                                   "error column"))

        # add results to result set
        responses = [result1, result2, result3, result4]
        my_validate.side_effect = responses

        # call task
        res = self.my_task.run(submission_id=self.submission_id)

        # assert a success with validation taks
        self.assertEqual(res, "success")

        # check submission status and message
        self.submission.refresh_from_db()

        # check submission.state changed
        self.assertEqual(self.submission.status, NEED_REVISION)
        self.assertIn("Error in metadata", self.submission.message)

        # check Animal (they are all ok)
        self.check_model_status(self.animal_qs, responses, READY)

        # sample has need revision
        self.check_model_status(self.sample_qs,
                                responses[self.animal_qs.count():],
                                NEED_REVISION)

        # test for my methods called
        self.assertTrue(my_check.called)
        self.assertTrue(my_validate.called)

        # asserting my mock objects
        self.assertTrue(self.read_in_ruleset.called)
        self.assertTrue(self.check_ruleset.called)
        self.assertFalse(self.validate_retry.called)

        self.check_message(
            message='Need Revision',
            notification_message=('Validation got errors: Error in '
                                  'metadata. Need revisions before submit'),
            validation_message={
                'animals': self.n_animals,
                'samples': self.n_samples,
                'animal_unkn': 0,
                'sample_unkn': 0,
                'animal_issues': 0,
                'sample_issues': 1
            },
            pk=1)
Exemplo n.º 14
0
    def test_validate_submission_wrong_json(self, my_validate, my_check):
        """Test an error in JSON format"""

        # setting check_usi_structure result. now is a ValidateResultRecord
        messages = [('Wrong JSON structure: no title field for record with '
                     'alias as animal_1'),
                    ('Wrong JSON structure: the values for attribute Person '
                     'role needs to be in an array for record animal_1')]

        usi_result = ValidationResultRecord("animal_1")
        usi_result.add_validation_result_column(
            ValidationResultColumn("error", messages[0], "animal_1", ""))
        usi_result.add_validation_result_column(
            ValidationResultColumn("error", messages[1], "animal_1", ""))

        # track 4 object to call check_model_status
        responses = [usi_result] * 4
        my_check.side_effect = responses

        # setting a return value for check_with_ruleset
        rule_result = Mock()
        rule_result.get_overall_status.return_value = "Pass"
        my_validate.return_value = rule_result

        # call task
        res = self.my_task.run(submission_id=self.submission_id)

        # assert a success with validation taks
        self.assertEqual(res, "success")

        # check submission status and message
        self.submission.refresh_from_db()

        # check submission.state changed
        self.assertEqual(self.submission.status, NEED_REVISION)
        self.assertIn("Validation got errors", self.submission.message)

        # Animals and samples have issues
        self.check_model_status(self.animal_qs, responses, NEED_REVISION)

        # sample has need revision
        self.check_model_status(self.sample_qs,
                                responses[self.animal_qs.count():],
                                NEED_REVISION)

        # if JSON is not valid, I don't check for ruleset
        self.assertTrue(my_check.called)
        self.assertFalse(my_validate.called)

        # asserting my mock objects
        self.assertTrue(self.read_in_ruleset.called)
        self.assertTrue(self.check_ruleset.called)
        self.assertFalse(self.validate_retry.called)

        # all sample and animals have issues
        self.check_message(
            'Need Revision', ('Validation got errors: Error in metadata. '
                              'Need revisions before submit'), {
                                  'animals': self.n_animals,
                                  'samples': self.n_samples,
                                  'animal_unkn': 0,
                                  'sample_unkn': 0,
                                  'animal_issues': self.n_animals,
                                  'sample_issues': self.n_samples
                              }, 1)