Exemplo n.º 1
0
    def check(self, dir, args):
        warnings = []
        log.info("Running biobank fields checks (BiobankFields)")
        for biobank in dir.getBiobanks():
            if not 'juridical_person' in biobank or re.search(
                    '^\s*$', biobank['juridical_person']) or re.search(
                        '^\s*N/?A\s*$', biobank['juridical_person']):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getBiobankNN(biobank['id']),
                        DataCheckWarningLevel.ERROR, biobank['id'],
                        DataCheckEntityType.BIOBANK,
                        "Missing juridical person ('juridical_person' attribute is empty)"
                    ))

            if (not 'head_firstname' in biobank
                    or re.search('^\s*$', biobank['head_firstname'])
                    or not 'head_lastname' in biobank
                    or re.search('^\s*$', biobank['head_lastname'])):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getBiobankNN(biobank['id']),
                        DataCheckWarningLevel.WARNING, biobank['id'],
                        DataCheckEntityType.BIOBANK,
                        "Missing head person name ('head_firstname' and/or 'head_lastname' attributes are empty)"
                    ))

            if not 'head_role' in biobank or re.search('^\s*$',
                                                       biobank['head_role']):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getBiobankNN(biobank['id']),
                        DataCheckWarningLevel.INFO, biobank['id'],
                        DataCheckEntityType.BIOBANK,
                        "Missing head person role ('head_role' attribute is empty)"
                    ))

            if 'contact' not in biobank or type(
                    biobank['contact']) is not dict:
                warnings.append(
                    DataCheckWarning(self.__class__.__name__, "",
                                     dir.getBiobankNN(biobank['id']),
                                     DataCheckWarningLevel.ERROR,
                                     biobank['id'],
                                     DataCheckEntityType.BIOBANK,
                                     "Missing valid contact for the biobank"))

        return warnings
 def check(self, dir, args):
     warnings = []
     log.info("Running collection existence checks (CollectionExistence)")
     for biobank in dir.getBiobanks():
         collections = dir.getGraphBiobankCollectionsFromBiobank(
             biobank['id'])
         if len(collections.edges) < 1:
             warnings.append(
                 DataCheckWarning(
                     self.__class__.__name__, "",
                     dir.getBiobankNN(biobank['id']),
                     DataCheckWarningLevel.ERROR, biobank['id'],
                     DataCheckEntityType.BIOBANK,
                     "Missing at least one collection for biobank"))
     return warnings
Exemplo n.º 3
0
 def check(self, dir, args):
     warnings = []
     log.info("Running orphaned collection checks (OrphanedCollections)")
     for collection in dir.getCollections():
         collections = dir.getGraphBiobankCollectionsFromCollection(
             collection['id'])
         if len(collections.edges) < 1:
             warnings.append(
                 DataCheckWarning(self.__class__.__name__, "",
                                  dir.getBiobankNN(biobank['id']),
                                  DataCheckWarningLevel.ERROR,
                                  collection['id'],
                                  DataCheckEntityType.COLLECTION,
                                  "Orphaned collection"))
     return warnings
	def check(self, dir, args):
		warnings = []
		log.info("Running empty or semi-empty fields checks (SemiemptyFields)")
		for biobank in dir.getBiobanks():
			if not 'description' in biobank or re.search('^\s*$', biobank['description']) or re.search('^\s*N/?A\s*$', biobank['description']):
				warnings.append(DataCheckWarning(self.__class__.__name__, "", dir.getBiobankNN(biobank['id']), DataCheckWarningLevel.WARNING, biobank['id'], DataCheckEntityType.BIOBANK, "Missing description for biobank ('description' attribute is empty for the biobank)"))
			if 'description' in biobank and descriptionTooShort(biobank['description']):
				warnings.append(DataCheckWarning(self.__class__.__name__, "", dir.getBiobankNN(biobank['id']), DataCheckWarningLevel.WARNING, biobank['id'], DataCheckEntityType.BIOBANK, f"Suspiciously short description for biobank ('description' attribute {biobank['description']} has less than {str(minDescWords)} words)"))
			if not 'name' in biobank or re.search('^\s*$', biobank['name']) or re.search('^\s*N/?A\s*$', biobank['name']):
				warnings.append(DataCheckWarning(self.__class__.__name__, "", dir.getBiobankNN(biobank['id']), DataCheckWarningLevel.ERROR, biobank['id'], DataCheckEntityType.BIOBANK, "Missing name for biobank ('name' attribute is empty for the biobank)"))

		for collection in dir.getCollections():
			if not 'description' in collection or re.search('^\s*$', collection['description']) or re.search('^\s*N/?A\s*$', collection['description']):
				warnings.append(DataCheckWarning(self.__class__.__name__, "", dir.getCollectionNN(collection['id']), DataCheckWarningLevel.WARNING, collection['id'], DataCheckEntityType.COLLECTION, "Missing description for collection ('description' attribute is empty for the collection)"))
			if 'description' in collection and descriptionTooShort(collection['description']):
				warnings.append(DataCheckWarning(self.__class__.__name__, "", dir.getCollectionNN(collection['id']), DataCheckWarningLevel.WARNING, collection['id'], DataCheckEntityType.COLLECTION, f"Suspiciously short description for collection ('description' attribute {collection['description']} has less than {str(minDescWords)} words)"))
			if not 'name' in collection or re.search('^\s*$', collection['name']) or re.search('^\s*N/?A\s*$', collection['name']):
				warnings.append(DataCheckWarning(self.__class__.__name__, "", dir.getCollectionNN(collection['id']), DataCheckWarningLevel.ERROR, collection['id'], DataCheckEntityType.COLLECTION, "Missing name for collection ('name' attribute is empty for the biobank)"))


		return warnings
Exemplo n.º 5
0
    def check(self, dir, args):
        warnings = []
        log.info("Running URL checks (CheckURLs)")
        assert 'URLs' in __main__.remoteCheckList
        if 'URLs' in args.disableChecksRemote:
            return warnings

        cache_dir = 'data-check-cache/URLs'
        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)
        global cache
        cache = Cache(cache_dir)
        if 'URLs' in args.purgeCaches:
            cache.clear()

        log.info("Testing biobank URLs")
        for biobank in dir.getBiobanks():
            if not 'url' in biobank or re.search('^\s*$', biobank['url']):
                warnings.append(
                    DataCheckWarning(self.__class__.__name__, "",
                                     dir.getBiobankNN(biobank['id']),
                                     DataCheckWarningLevel.WARNING,
                                     biobank['id'],
                                     DataCheckEntityType.BIOBANK,
                                     "Missing URL"))
            else:
                URLwarnings = testURL(
                    biobank['url'],
                    DataCheckWarning(self.__class__.__name__, "",
                                     dir.getBiobankNN(biobank['id']),
                                     DataCheckWarningLevel.ERROR,
                                     biobank['id'],
                                     DataCheckEntityType.BIOBANK,
                                     "Biobank URL"))
                warnings += URLwarnings

        log.info("Testing collection URLs")
        for collection in dir.getBiobanks():
            # non-existence of access URIs is tested in the access policy checks - here we only check validity of the URL if it exists
            if 'data_access_uri' in collection and not re.search(
                    '^\s*$', collection['data_access_uri']):
                URLwarnings = testURL(
                    collection['data_access_uri'],
                    DataCheckWarning(self.__class__.__name__, "",
                                     dir.getCollectionNN(collection['id']),
                                     DataCheckWarningLevel.ERROR,
                                     collection['id'],
                                     DataCheckEntityType.COLLECTION,
                                     "Data access URL for collection"))
                warnings += URLwarnings

            if 'sample_access_uri' in collection and not re.search(
                    '^\s*$', collection['sample_access_uri']):
                URLwarnings = testURL(
                    collection['sample_access_uri'],
                    DataCheckWarning(self.__class__.__name__, "",
                                     dir.getCollectionNN(collection['id']),
                                     DataCheckWarningLevel.ERROR,
                                     collection['id'],
                                     DataCheckEntityType.COLLECTION,
                                     "Sample access URL for collection"))
                warnings += URLwarnings
            if 'image_access_uri' in collection and not re.search(
                    '^\s*$', collection['image_access_uri']):
                URLwarnings = testURL(
                    collection['image_access_uri'],
                    DataCheckWarning(self.__class__.__name__, "",
                                     dir.getCollectionNN(collection['id']),
                                     DataCheckWarningLevel.ERROR,
                                     collection['id'],
                                     DataCheckEntityType.COLLECTION,
                                     "Image access URL for collection"))
                warnings += URLwarnings

        cache.close()
        return warnings
    def check(self, dir, args):
        warnings = []
        log.info("Running geographical location checks (BiobankGeo)")
        # This is to be enabled for real runs.
        assert 'geocoding' in __main__.remoteCheckList
        if 'geocoding' in args.disableChecksRemote:
            geoCodingEnabled = False
        else:
            geoCodingEnabled = True

        cache_dir = 'data-check-cache/geolocator'
        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)
        cache = Cache(cache_dir)
        if 'geocoding' in args.purgeCaches:
            cache.clear()

        geocoords_pattern = '^-?\d+\.\d+$'
        geolocator = Nominatim(
            user_agent=
            'Mozilla/5.0 (X11; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0',
            timeout=15)

        for biobank in dir.getBiobanks():
            if 'latitude' in biobank and not re.search(
                    '^\s*$', biobank['latitude']
            ) and 'longitude' in biobank and not re.search(
                    '^\s*$', biobank['longitude']):
                # we check before doing any convenience substitutions
                if not re.search(geocoords_pattern, biobank['latitude']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getBiobankNN(biobank['id']),
                            DataCheckWarningLevel.ERROR, biobank['id'],
                            DataCheckEntityType.BIOBANK,
                            "Invalid biobank latitude (should be a decimal number with period without any spaces or stray characters around - the surrounding quotes are added in this report): offending value '"
                            + biobank['latitude'] + "'"))
                if not re.search(geocoords_pattern, biobank['longitude']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getBiobankNN(biobank['id']),
                            DataCheckWarningLevel.ERROR, biobank['id'],
                            DataCheckEntityType.BIOBANK,
                            "Invalid biobank longitude (should be a decimal number with period without any spaces or stray characters around - the surrounding quotes are added in this report): offending value '"
                            + biobank['longitude'] + "'"))
                # this is for convenience - if there are commas used instead of periods, we should still do the remaining checks
                biobank['latitude'] = re.sub(r',', r'.', biobank['latitude'])
                biobank['longitude'] = re.sub(r',', r'.', biobank['longitude'])
                if re.search(geocoords_pattern,
                             biobank['latitude']) and re.search(
                                 geocoords_pattern, biobank['longitude']):
                    if geoCodingEnabled:
                        logMessage = "Checking reverse geocoding for " + biobank[
                            'latitude'] + ", " + biobank['longitude']
                        try:
                            loc_string = biobank['latitude'] + ", " + biobank[
                                'longitude']
                            if loc_string in cache and cache[loc_string] != "":
                                country_code = cache[loc_string]
                            else:
                                location = geolocator.reverse(loc_string,
                                                              language='en')
                                country_code = location.raw['address'][
                                    'country_code']
                                cache[loc_string] = country_code
                            logMessage += " -> OK"
                            if ((biobank['country']['id'] != "IARC"
                                 and biobank['country']['id'] != "EU")
                                    and country_code.upper() !=
                                    biobank['country']['id'] and
                                    not (country_code.upper() == "GB" and
                                         biobank['country']['id'] == "UK")):
                                warnings.append(
                                    DataCheckWarning(
                                        self.__class__.__name__, "",
                                        dir.getBiobankNN(biobank['id']),
                                        DataCheckWarningLevel.WARNING,
                                        biobank['id'],
                                        DataCheckEntityType.BIOBANK,
                                        "Geolocation of the biobank is likely outside of its country "
                                        + biobank['country']['id'] +
                                        "; biobank seems to be in " +
                                        country_code.upper() +
                                        f" based on geographical coordinates 'latitude'={biobank['latitude']} 'longitude'={biobank['longitude']}"
                                    ))
                        except Exception as e:
                            logMessage += " -> failed (" + str(e) + ")"
                            warnings.append(
                                DataCheckWarning(
                                    self.__class__.__name__, "",
                                    dir.getBiobankNN(biobank['id']),
                                    DataCheckWarningLevel.WARNING,
                                    biobank['id'], DataCheckEntityType.BIOBANK,
                                    "Reverse geocoding of the biobank  location failed ("
                                    + str(e) + ")"))
                        log.info(logMessage)
            else:
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getBiobankNN(biobank['id']),
                        DataCheckWarningLevel.INFO, biobank['id'],
                        DataCheckEntityType.BIOBANK,
                        "Missing geographical coordinates ('latitude and/or 'longitude' attributes are empty)"
                    ))

        for collection in dir.getCollections():
            if 'latitude' in collection and not re.search(
                    '^\s*$', collection['latitude']
            ) and 'longitude' in collection and not re.search(
                    '^\s*$', collection['longitude']):
                # we check before doing any convenience substitutions
                if not re.search(geocoords_pattern, collection['latitude']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "Invalid collection latitude (should be a decimal number with period without any spaces or stray characters around - the surrounding quotes are added in this report): offending value '"
                            + collection['latitude'] + "'"))
                if not re.search(geocoords_pattern, collection['longitude']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "Invalid collection longitude (should be a decimal number with period without any spaces or stray characters around - the surrounding quotes are added in this report): offending value '"
                            + collection['longitude'] + "'"))
                # this is for convenience - if there are commas used instead of periods, we should still do the remaining checks
                collection['latitude'] = re.sub(r',', r'.',
                                                collection['latitude'])
                collection['longitude'] = re.sub(r',', r'.',
                                                 collection['longitude'])
                if re.search(geocoords_pattern,
                             collection['latitude']) and re.search(
                                 geocoords_pattern, collection['longitude']):
                    if geoCodingEnabled:
                        logMessage = "Checking reverse geocoding for " + collection[
                            'latitude'] + ", " + collection['longitude']
                        try:
                            loc_string = collection[
                                'latitude'] + ", " + collection['longitude']
                            if loc_string in cache and cache[loc_string] != "":
                                country_code = cache[loc_string]
                            else:
                                location = geolocator.reverse(loc_string,
                                                              language='en')
                                country_code = location.raw['address'][
                                    'country_code']
                                cache[loc_string] = country_code
                            logMessage += " -> OK"
                            biobankId = dir.getCollectionBiobankId(
                                collection['id'])
                            biobank = dir.getBiobankById(biobankId)
                            if ((biobank['country']['id'] != "IARC"
                                 and biobank['country']['id'] != "EU")
                                    and country_code.upper() !=
                                    biobank['country']['id'] and
                                    not (country_code.upper() == "GB" and
                                         biobank['country']['id'] == "UK")):
                                warnings.append(
                                    DataCheckWarning(
                                        self.__class__.__name__, "",
                                        dir.getCollectionNN(collection['id']),
                                        DataCheckWarningLevel.WARNING,
                                        collection['id'],
                                        DataCheckEntityType.COLLECTION,
                                        "Geolocation of the collection is likely outside of its country "
                                        + collection['country']['id'] +
                                        "; collection seems to be in " +
                                        country_code.upper() +
                                        f" based on geographical coordinates 'latitude'={collection['latitude']} 'longitude'={collection['longitude']}"
                                    ))
                        except Exception as e:
                            logMessage += " -> failed (" + str(e) + ")"
                            warnings.append(
                                DataCheckWarning(
                                    self.__class__.__name__, "",
                                    dir.getCollectionNN(collection['id']),
                                    DataCheckWarningLevel.WARNING,
                                    collection['id'],
                                    DataCheckEntityType.COLLECTION,
                                    "Reverse geocoding of the collection  location failed ("
                                    + str(e) + ")"))
                        log.info(logMessage)

        cache.close()
        return warnings
Exemplo n.º 7
0
    def check(self, dir, args):
        warnings = []
        log.info("Running collection content checks (CollectionContent)")
        orphacodes = dir.getOrphaCodesMapper()
        for collection in dir.getCollections():
            OoM = collection['order_of_magnitude']['id']
            materials = Directory.getListOfEntityAttributeIds(
                collection, 'materials')
            data_categories = Directory.getListOfEntityAttributeIds(
                collection, 'data_categories')
            types = Directory.getListOfEntityAttributeIds(collection, 'type')

            diags = []
            diags_icd10 = []
            diags_orpha = []
            if 'diagnosis_available' in collection:
                diag_ranges = []
                for d in collection['diagnosis_available']:
                    diags.append(d['id'])
                    if re.search('-', d['id']):
                        diag_ranges.append(d['id'])
                    if re.search('^urn:miriam:icd:', d['id']):
                        diags_icd10.append(
                            re.sub('^urn:miriam:icd:', '', d['id']))
                    elif re.search('^ORPHA:', d['id']):
                        if dir.issetOrphaCodesMapper():
                            if orphacodes.isValidOrphaCode(d):
                                diags_orpha.append(
                                    re.sub('^ORPHA:', '', d['id']))
                            else:
                                warnings.append(
                                    DataCheckWarning(
                                        self.__class__.__name__, "",
                                        dir.getCollectionNN(collection['id']),
                                        DataCheckWarningLevel.ERROR,
                                        collection['id'],
                                        DataCheckEntityType.COLLECTION,
                                        "Invalid ORPHA code found: %s" %
                                        (d['id'])))
                if diag_ranges:
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "It seems that diagnoses contains range - this will render the diagnosis search ineffective for the given collection. Violating diagnosis term(s): "
                            + '; '.join(diag_ranges)))

            if len(types) < 1:
                warnings.append(
                    DataCheckWarning(self.__class__.__name__, "",
                                     dir.getCollectionNN(collection['id']),
                                     DataCheckWarningLevel.ERROR,
                                     collection['id'],
                                     DataCheckEntityType.COLLECTION,
                                     "Collection type not provided"))

            if 'size' in collection and isinstance(collection['size'], int):
                if OoM > 1 and collection['size'] < 10**OoM or collection[
                        'size'] > 10**(OoM + 1):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "Size of the collection does not match its order of magnitude: size = "
                            + str(collection['size']) +
                            ", order of magnitude is %d (size between %d and %d)"
                            % (OoM, 10**OoM, 10**(OoM + 1))))

            if OoM > 4:
                subCollections = dir.getCollectionsDescendants(
                    collection['id'])
                if len(subCollections) < 1:
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.INFO, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "Suspicious situation: large collection (> 100,000 samples or cases) without subcollections; unless it is a really homogeneous collection, it is advisable to refine such a collection into sub-collections to give users better insight into what is stored there"
                        ))

            if OoM > 5:
                if (not 'size' in collection.keys()) or (collection['size']
                                                         == 0):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.INFO, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "Suspicious situation: large collection (> 1,000,000 samples or cases) without exact size specified"
                        ))

            if any(x in types for x in ['HOSPITAL', 'DISEASE_SPECIFIC', 'RD'
                                        ]) and len(diags) < 1:
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getCollectionNN(collection['id']),
                        DataCheckWarningLevel.ERROR, collection['id'],
                        DataCheckEntityType.COLLECTION,
                        "No diagnoses provide for HOSPITAL or DISEASE_SPECIFIC or RD collection"
                    ))

            if len(diags) > 0 and not any(
                    x in types
                    for x in ['HOSPITAL', 'DISEASE_SPECIFIC', 'RD']):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getCollectionNN(collection['id']),
                        DataCheckWarningLevel.INFO, collection['id'],
                        DataCheckEntityType.COLLECTION,
                        "Diagnoses provided but none of HOSPITAL, DISEASE_SPECIFIC, RD is specified as collection type (this may be easily false positive check)"
                    ))

            if 'BIOLOGICAL_SAMPLES' in data_categories and len(materials) == 0:
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getCollectionNN(collection['id']),
                        DataCheckWarningLevel.ERROR, collection['id'],
                        DataCheckEntityType.COLLECTION,
                        "No material types are provided while biological samples are collected"
                    ))

            if len(materials
                   ) > 0 and 'BIOLOGICAL_SAMPLES' not in data_categories:
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getCollectionNN(collection['id']),
                        DataCheckWarningLevel.ERROR, collection['id'],
                        DataCheckEntityType.COLLECTION,
                        "Sample types advertised but BIOLOGICAL_SAMPLES missing among its data categories"
                    ))

            if 'MEDICAL_RECORDS' in data_categories and len(diags) < 1:
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getCollectionNN(collection['id']),
                        DataCheckWarningLevel.WARNING, collection['id'],
                        DataCheckEntityType.COLLECTION,
                        "No diagnoses provide for a collection with MEDICAL_RECORDS among its data categories"
                    ))

            if len(diags) > 0 and 'MEDICAL_RECORDS' not in data_categories:
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getCollectionNN(collection['id']),
                        DataCheckWarningLevel.WARNING, collection['id'],
                        DataCheckEntityType.COLLECTION,
                        "Diagnoses provided but no MEDICAL_RECORDS among its data categories"
                    ))

            if 'RD' in types and len(diags_orpha) == 0:
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getCollectionNN(collection['id']),
                        DataCheckWarningLevel.WARNING, collection['id'],
                        DataCheckEntityType.COLLECTION,
                        "Rare disease (RD) collection without ORPHA code diagnoses"
                    ))
                if dir.issetOrphaCodesMapper():
                    for d in diags_icd10:
                        orpha = orphacodes.icd10ToOrpha(d)
                        if orpha is not None and len(orpha) > 0:
                            orphalist = [
                                "%(code)s(%(name)s)/%(mapping_type)s" % {
                                    'code':
                                    c['code'],
                                    'name':
                                    orphacodes.orphaToNamesString(c['code']),
                                    'mapping_type':
                                    c['mapping_type']
                                } for c in orpha
                            ]
                            warnings.append(
                                DataCheckWarning(
                                    self.__class__.__name__, "",
                                    dir.getCollectionNN(collection['id']),
                                    DataCheckWarningLevel.INFO,
                                    collection['id'],
                                    DataCheckEntityType.COLLECTION,
                                    "Consider adding following ORPHA code(s) to the RD collection - based on mapping ICD-10 code %s to ORPHA codes: %s"
                                    % (d, ",".join(orphalist))))

            if len(diags_orpha) > 0 and 'RD' not in types:
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getCollectionNN(collection['id']),
                        DataCheckWarningLevel.WARNING, collection['id'],
                        DataCheckEntityType.COLLECTION,
                        "ORPHA code diagnoses provided, but collection not marked as rare disease (RD) collection"
                    ))

            if len(diags_orpha) > 0 and len(diags_icd10) == 0:
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getCollectionNN(collection['id']),
                        DataCheckWarningLevel.WARNING, collection['id'],
                        DataCheckEntityType.COLLECTION,
                        "ORPHA code diagnoses specified, but no ICD-10 equivalents provided, thus making collection impossible to find for users using ICD-10 codes"
                    ))

            if len(diags_orpha) > 0 and dir.issetOrphaCodesMapper():
                for d in diags_orpha:
                    icd10codes = orphacodes.orphaToIcd10(d)
                    for c in icd10codes:
                        if 'urn:miriam:icd:' + c['code'] not in diags_icd10:
                            warnings.append(
                                DataCheckWarning(
                                    self.__class__.__name__, "",
                                    dir.getCollectionNN(collection['id']),
                                    DataCheckWarningLevel.INFO,
                                    collection['id'],
                                    DataCheckEntityType.COLLECTION,
                                    "ORPHA code %s provided, but its translation to ICD-10 as %s is not provided (mapping is of %s type). It is recommended to provide this translation explicitly until Directory implements full semantic mapping search."
                                    % (d, c['code'], c['mapping_type'])))

            modalities = []
            if 'imaging_modality' in collection:
                for m in collection['imaging_modality']:
                    modalities.append(m['id'])

            image_dataset_types = []
            if 'image_dataset_type' in collection:
                for idt in collection['image_dataset_type']:
                    image_dataset_types.append(idt['id'])

            if 'IMAGING_DATA' in data_categories:
                if len(modalities) < 1:
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "No image modalities provided for image collection"
                        ))

                if len(image_dataset_types) < 1:
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.WARNING, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "No image dataset types provided for image collection"
                        ))

            if (len(modalities) > 0 or len(image_dataset_types) > 0
                ) and 'IMAGING_DATA' not in data_categories:
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getCollectionNN(collection['id']),
                        DataCheckWarningLevel.ERROR, collection['id'],
                        DataCheckEntityType.COLLECTION,
                        "Imaging modalities or image data set found, but IMAGING_DATA is not among data categories: image_modality = %s, image_dataset_type = %s"
                        % (modalities, image_dataset_types)))

            age_unit = None
            if 'age_unit' in collection:
                age_units = collection['age_unit']
                if len(age_units) > 1:
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "Ambiguous speification of age_unit - only one value is permitted. Provided values %s"
                            % (age_units)))
                elif len(age_units) == 1:
                    age_unit = age_units[0]
            if ('age_high' in collection or 'age_low' in collection) and (
                    'age_low' not in collection or len(age_units) < 1):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getCollectionNN(collection['id']),
                        DataCheckWarningLevel.ERROR, collection['id'],
                        DataCheckEntityType.COLLECTION,
                        f"Missing age_unit for provided age range: {collection.get('age_low')}-{collection.get('age_high')}"
                    ))

            age_min_limit = -1
            if age_unit == "MONTH":
                age_min_limit = age_min_limit * 12
            elif age_unit == "WEEK":
                age_min_limit = age_min_limit * 52.1775
            elif age_unit == "DAY":
                age_min_limit = age_min_limit * 365.2

            if ('age_high' in collection
                    and collection['age_high'] < age_min_limit):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getCollectionNN(collection['id']),
                        DataCheckWarningLevel.ERROR, collection['id'],
                        DataCheckEntityType.COLLECTION,
                        "Age_high is below the minimum value limit (%d %s): offending value %d"
                        % (age_min_limit, age_unit, collection['age_high'])))
            if ('age_low' in collection
                    and collection['age_low'] < age_min_limit):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getCollectionNN(collection['id']),
                        DataCheckWarningLevel.ERROR, collection['id'],
                        DataCheckEntityType.COLLECTION,
                        "Age_low is below the minimum value limit (%d %s): offending value %d"
                        % (age_min_limit, age_unit, collection['age_low'])))

            if ('age_high' in collection and 'age_low' in collection):
                if (collection['age_low'] > collection['age_high']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "Age_low (%d) is higher than age_high (%d)" %
                            (collection['age_low'], collection['age_high'])))
                elif (collection['age_low'] == collection['age_high']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.INFO, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "Suspect situation: age_low == age_high == (%d) (may be false positive)"
                            % (collection['age_low'])))

        return warnings
    def check(self, dir, args):
        warnings = []
        log.info("Running contact fields checks (ContactFields)")
        ValidateEmails = True
        assert 'emails' in __main__.remoteCheckList
        if 'emails' in args.disableChecksRemote:
            ValidateEmails = False
        else:
            ValidateEmails = True

        cache_dir = 'data-check-cache/emails'
        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)
        cache = Cache(cache_dir)
        if 'emails' in args.purgeCaches:
            cache.clear()

        for contact in dir.getContacts():
            if (not 'first_name' in contact
                    or re.search('^\s*$', contact['first_name'])):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getContactNN(contact['id']),
                        DataCheckWarningLevel.WARNING, contact['id'],
                        DataCheckEntityType.CONTACT,
                        "Missing first name for contact ('first_name' attribute is empty)"
                    ))
            if (not 'last_name' in contact
                    or re.search('^\s*$', contact['last_name'])):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getContactNN(contact['id']),
                        DataCheckWarningLevel.WARNING, contact['id'],
                        DataCheckEntityType.CONTACT,
                        "Missing last name for contact ('last_name' attribute is empty)"
                    ))
            if (not 'email' in contact
                    or re.search('^\s*$', contact['email'])):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getContactNN(contact['id']),
                        DataCheckWarningLevel.ERROR, contact['id'],
                        DataCheckEntityType.CONTACT,
                        "Missing email for contact ('email' attribute is empty)"
                    ))
            elif (not validate_email(contact['email'])):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getContactNN(contact['id']),
                        DataCheckWarningLevel.WARNING, contact['id'],
                        DataCheckEntityType.CONTACT,
                        "Email for contact is invalid - offending  'email' attribute value: "
                        + contact['email']))
            else:
                # This is pretty dramatic test and should be used sparingly
                if ValidateEmails:
                    contact_email = contact['email']
                    log_message = "Validating email " + contact_email
                    # XXX: does not work in most cases
                    #if(not validate_email(contact['email'],verify=True)):
                    try:
                        if (contact_email in cache):
                            cache_result = cache[contact_email]
                            if (cache_result['valid']):
                                log_message += " -> OK"
                            else:
                                log_message += " -> failed"
                                warnings.append(cache_result['warning'])
                        else:
                            if (not validate_email(contact_email,
                                                   check_mx=True)):
                                log_message += " -> failed"
                                warning = DataCheckWarning(
                                    self.__class__.__name__, "",
                                    dir.getContactNN(contact['id']),
                                    DataCheckWarningLevel.WARNING,
                                    contact['id'], DataCheckEntityType.CONTACT,
                                    "Email for contact seems to be unreachable because of missing DNS MX record"
                                )
                                warnings.append(warning)
                                cache[contact_email] = {
                                    'valid': False,
                                    'warning': warning
                                }
                            else:
                                log_message += " -> OK"
                                cache[contact_email] = {
                                    'valid': True,
                                    'warning': None
                                }
                        log.info(log_message)
                    except (DNS.Base.TimeoutError, DNS.Base.ServerError,
                            DNS.Base.SocketError) as e:
                        log_message += " -> failed with exception (" + str(
                            e) + ")"
                        log.error(log_message)

            if (not 'phone' in contact
                    or re.search('^\s*$', contact['phone'])):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getContactNN(contact['id']),
                        DataCheckWarningLevel.WARNING, contact['id'],
                        DataCheckEntityType.CONTACT,
                        "Missing phone for contact ('phone' attribute is empty'"
                    ))
            elif (not re.search('^\+(?:[0-9]??){6,14}[0-9]$',
                                contact['phone'])):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getContactNN(contact['id']),
                        DataCheckWarningLevel.ERROR, contact['id'],
                        DataCheckEntityType.CONTACT,
                        "Phone number for contact does not conform to the E.123 international standard (means starts with + sign, no spaces) - offending phone number in 'phone' attribute: "
                        + contact['phone']))
        return warnings
    def check(self, dir, args):
        warnings = []
        log.info("Running identifier validation checks (ValidateIDs)")

        for biobank in dir.getBiobanks():
            NN = dir.getBiobankNN(biobank['id'])
            if NN not in NNContacts.NNtoEmails:
                if not re.search('^bbmri-eric:ID:EXT_', biobank['id']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "", NN,
                            DataCheckWarningLevel.ERROR, biobank['id'],
                            DataCheckEntityType.BIOBANK,
                            "BiobankID is not compliant with the specification "
                            +
                            ' (shall start with "bbmri-eric:ID:EXT_" prefix for external biobanks that have no national node)'
                        ))
            if re.search('^bbmri-eric:ID:EXT', biobank['id']):
                if not re.search('^bbmri-eric:ID:EXT_', biobank['id']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "", NN,
                            DataCheckWarningLevel.ERROR, biobank['id'],
                            DataCheckEntityType.BIOBANK,
                            "BiobankID is not compliant with the specification "
                            +
                            ' (shall start with "bbmri-eric:ID:EXT_" prefix for external biobanks)'
                        ))
            else:
                if not re.search('^bbmri-eric:ID:' + NN + '_', biobank['id']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "", NN,
                            DataCheckWarningLevel.ERROR, biobank['id'],
                            DataCheckEntityType.BIOBANK,
                            "BiobankID is not compliant with the specification "
                            + ' (shall start with "bbmri-eric:ID:' + NN + '_' +
                            '" prefix)'))
            if re.search('[^A-Za-z0-9:_-]', biobank['id']):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "", NN,
                        DataCheckWarningLevel.ERROR, biobank['id'],
                        DataCheckEntityType.BIOBANK,
                        "BiobankID contains illegal characters " +
                        ' (shall be "A-Za-z0-9:_-")'))
            if re.search('::', biobank['id']):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "", NN,
                        DataCheckWarningLevel.ERROR, biobank['id'],
                        DataCheckEntityType.BIOBANK,
                        "BiobankID contains :: indicating empty component in ID hierarchy"
                    ))

        for collection in dir.getCollections():
            NN = dir.getCollectionNN(collection['id'])
            if NN not in NNContacts.NNtoEmails:
                if not re.search('^bbmri-eric:ID:EXT_', collection['id']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "", NN,
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "CollectionID is not compliant with the specification "
                            +
                            ' (shall start with "bbmri-eric:ID:EXT_" prefix for collections from external biobanks that have no national node)'
                        ))
            if re.search('^bbmri-eric:ID:EXT', collection['id']):
                if not re.search('^bbmri-eric:ID:EXT_', collection['id']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "", NN,
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "CollectionID is not compliant with the specification "
                            +
                            ' (shall start with "bbmri-eric:ID:EXT_" prefix for collections from external biobanks)'
                        ))
            else:
                if not re.search('^bbmri-eric:ID:' + NN + '_',
                                 collection['id']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "", NN,
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "CollectionID is not compliant with the specification "
                            + ' (shall start with "bbmri-eric:ID:' + NN + '_' +
                            '" prefix)'))
            if re.search('[^A-Za-z0-9:_-]', collection['id']):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "", NN,
                        DataCheckWarningLevel.ERROR, collection['id'],
                        DataCheckEntityType.COLLECTION,
                        "CollectionID contains illegal characters " +
                        ' (shall be "A-Za-z0-9:_-")'))
            biobankID = collection['biobank']['id']
            if not re.search('^' + biobankID + ':collection:',
                             collection['id']):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "", NN,
                        DataCheckWarningLevel.WARNING, collection['id'],
                        DataCheckEntityType.COLLECTION,
                        "CollectionID does not contain expected biobank prefix "
                        + ' (should start with ' + biobankID + ':collection:' +
                        ')'))
            if re.search('::', collection['id']):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "", NN,
                        DataCheckWarningLevel.ERROR, collection['id'],
                        DataCheckEntityType.COLLECTION,
                        "CollectionID contains :: indicating empty component in ID hierarchy"
                    ))

        for contact in dir.getContacts():
            NN = dir.getContactNN(contact['id'])
            if NN not in NNContacts.NNtoEmails:
                if not re.search('^bbmri-eric:ID:EXT_', contact['id']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "", NN,
                            DataCheckWarningLevel.ERROR, contact['id'],
                            DataCheckEntityType.CONTACT,
                            "ContactID is not compliant with the specification "
                            +
                            ' (shall start with "bbmri-eric:ID:EXT_" prefix for contacts for external biobanks that have no national node)'
                        ))
            if re.search('^bbmri-eric:contactID:EXT', contact['id']):
                if not re.search('^bbmri-eric:contactID:EXT_', contact['id']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "", NN,
                            DataCheckWarningLevel.ERROR, contact['id'],
                            DataCheckEntityType.CONTACT,
                            "ContactID is not compliant with the specification "
                            +
                            ' (shall start with "bbmri-eric:contactID:EXT_" prefix for contacts for external biobanks)'
                        ))
            else:
                if not re.search('^bbmri-eric:contactID:' + NN + '_',
                                 contact['id']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "", NN,
                            DataCheckWarningLevel.ERROR, contact['id'],
                            DataCheckEntityType.CONTACT,
                            "ContactID is not compliant with the specification "
                            + ' (shall start with "bbmri-eric:contactID:' +
                            NN + '_' + '" prefix)'))
            if re.search('[^A-Za-z0-9:_-]', contact['id']):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "", NN,
                        DataCheckWarningLevel.ERROR, contact['id'],
                        DataCheckEntityType.CONTACT,
                        "ContactID contains illegal characters " +
                        ' (shall be "A-Za-z0-9:_-")'))
            if re.search('::', contact['id']):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "", NN,
                        DataCheckWarningLevel.ERROR, contact['id'],
                        DataCheckEntityType.CONTACT,
                        "ContactID contains :: indicating empty component in ID hierarchy"
                    ))

        for network in dir.getNetworks():
            NN = dir.getNetworkNN(network['id'])
            if NN not in NNContacts.NNtoEmails:
                if not re.search('^bbmri-eric:ID:EXT_', network['id']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "", NN,
                            DataCheckWarningLevel.ERROR, network['id'],
                            DataCheckEntityType.NETWORK,
                            "NetworkID is not compliant with the specification "
                            +
                            ' (shall start with "bbmri-eric:ID:EXT_" prefix for networks from countries that have no national node)'
                        ))
            if not re.search('^bbmri-eric:networkID:', network['id']):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "", NN,
                        DataCheckWarningLevel.ERROR, network['id'],
                        DataCheckEntityType.NETWORK,
                        "NetworkID is not compliant with the specification " +
                        ' (shall start with "bbmri-eric:networkID: prefix)'))
            else:
                if not re.search('^bbmri-eric:networkID:' + NN + '_',
                                 network['id']) and not re.search(
                                     '^bbmri-eric:networkID:EU_',
                                     network['id']) and not re.search(
                                         '^bbmri-eric:networkID:EXT_',
                                         network['id']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "", NN,
                            DataCheckWarningLevel.WARNING, network['id'],
                            DataCheckEntityType.NETWORK,
                            "NetworkID has suspicious country affiliation " +
                            ' (should start with "bbmri-eric:networkID:' + NN +
                            '_' + '" or "bbmri-eric:networkID:EU_" prefix)'))
            if re.search('[^A-Za-z0-9:_-]', network['id']):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "", NN,
                        DataCheckWarningLevel.ERROR, network['id'],
                        DataCheckEntityType.NETWORK,
                        "NetworkID contains illegal characters " +
                        ' (shall be "A-Za-z0-9:_-")'))
            if re.search('::', network['id']):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "", NN,
                        DataCheckWarningLevel.ERROR, network['id'],
                        DataCheckEntityType.NETWORK,
                        "NetworkID contains :: indicating empty component in ID hierarchy"
                    ))

        return warnings
Exemplo n.º 10
0
    def check(self, dir, args):
        warnings = []
        log.info("Running COVID content checks (COVID)")
        biobankHasCovidCollection = {}
        biobankHasCovidProspectiveCollection = {}
        biobankHasCovidControls = {}

        for collection in dir.getCollections():
            biobankId = dir.getCollectionBiobankId(collection['id'])
            biobank = dir.getBiobankById(biobankId)
            biobank_capabilities = []
            if 'capabilities' in biobank:
                for c in biobank['capabilities']:
                    biobank_capabilities.append(c['id'])
            biobank_covid = []
            if 'covid19biobank' in biobank:
                for c in biobank['covid19biobank']:
                    biobank_covid.append(c['id'])
            biobank_networks = []
            if 'network' in biobank:
                for n in biobank['network']:
                    biobank_networks.append(n['id'])

            OoM = collection['order_of_magnitude']['id']

            materials = []
            if 'materials' in collection:
                for m in collection['materials']:
                    materials.append(m['id'])

            data_categories = []
            if 'data_categories' in collection:
                for c in collection['data_categories']:
                    data_categories.append(c['id'])

            types = []
            if 'type' in collection:
                for t in collection['type']:
                    types.append(t['id'])

            diags = []
            diag_ranges = []
            covid_diag = False
            covid_control = False

            for d in collection['diagnosis_available']:
                if re.search('-', d['id']):
                    diag_ranges.append(d['id'])
                else:
                    diags.append(d['id'])

            for d in diags + diag_ranges:
                # ICD-10
                if re.search('U07', d):
                    covid_diag = True
                # ICD-10
                if re.search('Z03.818', d):
                    covid_control = True
                # ICD-11
                if re.search('RA01', d):
                    covid_diag = True
                # SNOMED CT
                if re.search(
                        '(840533007|840534001|840535000|840536004|840539006|840544004|840546002)',
                        d):
                    covid_diag = True

            if covid_diag:
                biobankHasCovidCollection[biobank['id']] = True
            else:
                # just initialize the record if not yet set at all - otherwise don't touch!
                if not biobank['id'] in biobankHasCovidCollection:
                    biobankHasCovidCollection[biobank['id']] = False

            if covid_control:
                biobankHasCovidControls[biobank['id']] = True
            else:
                # just initialize the record if not yet set at all - otherwise don't touch!
                if not biobank['id'] in biobankHasCovidControls:
                    biobankHasCovidControls[biobank['id']] = False

            if (covid_diag or covid_control) and diag_ranges:
                warning = DataCheckWarning(
                    self.__class__.__name__, "",
                    dir.getCollectionNN(collection['id']),
                    DataCheckWarningLevel.ERROR, collection['id'],
                    DataCheckEntityType.COLLECTION,
                    "It seems that diagnoses contains range - this will render the diagnosis search ineffective for the given collection. Violating diagnosis term(s): "
                    + '; '.join(diag_ranges))
                warnings.append(warning)

            if covid_diag or covid_control:
                if not covidNetworkName in biobank_networks:
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.ERROR, biobank['id'],
                            DataCheckEntityType.BIOBANK,
                            "Biobank contains COVID collection " +
                            collection['id'] + ' but not marked as part of ' +
                            covidNetworkName))
                if not 'covid19' in biobank_covid:
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.ERROR, biobank['id'],
                            DataCheckEntityType.BIOBANK,
                            "Biobank contains COVID collection " +
                            collection['id'] +
                            ' but does not have "covid19" attribute in "covid19biobank" section of attributes'
                        ))

            if len(types) < 1:
                warnings.append(
                    DataCheckWarning(self.__class__.__name__, "",
                                     dir.getCollectionNN(collection['id']),
                                     DataCheckWarningLevel.ERROR,
                                     collection['id'],
                                     DataCheckEntityType.COLLECTION,
                                     "Collection type not provided"))

            if re.search(covidProspectiveCollectionIdPattern,
                         collection['id']):
                biobankHasCovidProspectiveCollection[biobank['id']] = True
                if not 'DISEASE_SPECIFIC' in types:
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "Prospective COVID-19 collections must have DISEASE_SPECIFIC as one of its types"
                        ))
                if not 'PROSPECTIVE_COLLECTION' in types:
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "Prospective COVID-19 collections must have PROSPECTIVE_COLLECTION as one of its types"
                        ))
                if OoM > 0:
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.WARNING, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "Prospective collection type represents capability of setting up prospective collections - hence it should have zero order of magnitude"
                        ))
                if not covid_diag and not covid_control:
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "COVID19PROSPECTIVE collection misses COVID-19 diagnosis or COVID-19 controls filled in"
                        ))

            if re.search('^Ability to collect',
                         collection['name']) and (covid_diag or covid_control):
                if not re.search(covidProspectiveCollectionIdPattern,
                                 collection['id']):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            'Collection having "ability to collect" does not have COVID19PROSPECTIVE label'
                        ))
                    # only report the following if it hasn't been reported above (hence only if the COVID19PROSPECTIVE does not match)
                    if OoM > 0:
                        warnings.append(
                            DataCheckWarning(
                                self.__class__.__name__, "",
                                dir.getCollectionNN(collection['id']),
                                DataCheckWarningLevel.WARNING,
                                collection['id'],
                                DataCheckEntityType.COLLECTION,
                                "Prospective collection type represents capability of setting up prospective collections - hence it should have zero order of magnitude"
                            ))

            # also find other prospective collections containing COVID-19
            if not re.search(
                    covidProspectiveCollectionIdPattern, collection['id']
            ) and covid_diag and 'PROSPECTIVE_COLLECTION' in types:
                biobankHasCovidProspectiveCollection[biobank['id']] = True
                log.debug(
                    "Prospective COVID-19 collection found with non-standard identifier: %s (%s) in biobank %s (%s)"
                    % (collection['id'], collection['name'], biobank['id'],
                       biobank['name']))

            if re.search('.*:COVID19$', collection['id']):
                if not 'DISEASE_SPECIFIC' in types:
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "Existing COVID-19 collections must have DISEASE_SPECIFIC as one of its types"
                        ))
                if not 'DNA' in materials and not 'PATHOGEN' in materials and not 'PERIPHERAL_BLOOD_CELLS' in materials and not 'PLASMA' in materials and not 'RNA' in materials and not 'SALIVA' in materials and not 'SERUM' in materials and not 'WHOLE_BLOOD' in materials and not 'FECES' in materials and not 'BUFFY_COAT' in materials and not 'NASAL_SWAB' in materials and not 'THROAT_SWAB' in materials:
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.WARNING, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "Supect material types: existing COVID-19 collection does not have any of the common material types: DNA, PATHOGEN, PERIPHERAL_BLOOD_CELLS, PLASMA, RNA, SALIVA, SERUM, WHOLE_BLOOD, FECES, BUFFY_COAT, NASAL_SWAB, THROAT_SWAB"
                        ))
                if 'NASAL_SWAB' in materials or 'THROAT_SWAB' in materials or 'FECES' in materials and not (
                        'BSL2' in biobank_covid or 'BSL3' in biobank_covid):
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.WARNING, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "Suspect situation: collection contains infectious material (nasal/throat swabs, faeces) while the parent biobank does not indicate BSL2 nor BSL3 available"
                        ))
                if not covid_diag:
                    warnings.append(
                        DataCheckWarning(
                            self.__class__.__name__, "",
                            dir.getCollectionNN(collection['id']),
                            DataCheckWarningLevel.ERROR, collection['id'],
                            DataCheckEntityType.COLLECTION,
                            "COVID19 collection misses COVID-19 diagnosis filled in"
                        ))

        for biobank in dir.getBiobanks():
            biobank_capabilities = []
            if 'capabilities' in biobank:
                for c in biobank['capabilities']:
                    biobank_capabilities.append(c['id'])
            biobank_covid = []
            if 'covid19biobank' in biobank:
                for c in biobank['covid19biobank']:
                    biobank_covid.append(c['id'])
            biobank_networks = []
            if 'network' in biobank:
                for n in biobank['network']:
                    biobank_networks.append(n['id'])

            if covidNetworkName in biobank_networks and not 'covid19' in biobank_covid:
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getBiobankNN(biobank['id']),
                        DataCheckWarningLevel.ERROR, biobank['id'],
                        DataCheckEntityType.BIOBANK,
                        "Biobank is part of " + covidNetworkName +
                        " but does not have covid19 among covid19biobank attributes"
                    ))
            if 'covid19' in biobank_covid and not covidNetworkName in biobank_networks:
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getBiobankNN(biobank['id']),
                        DataCheckWarningLevel.ERROR, biobank['id'],
                        DataCheckEntityType.BIOBANK,
                        "Biobank has covid19 among covid19biobank attributes but is not part of "
                        + covidNetworkName))

            # This is a simple check if the biobank has other services than just the attribute of being a covid19 biobank
            other_covid_services = False
            for s in biobank_covid:
                if s != 'covid19':
                    other_covid_services = True

            if 'covid19' in biobank_covid and not (
                    biobank['id'] in biobankHasCovidCollection or biobank['id']
                    in biobankHasCovidControls or other_covid_services):
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getBiobankNN(biobank['id']),
                        DataCheckWarningLevel.ERROR, biobank['id'],
                        DataCheckEntityType.BIOBANK,
                        "Biobank has covid19 among covid19biobank but has no relevant services nor any collection of COVID-19 samples nor any collection of COVID-19 controls"
                    ))

            if 'ProspectiveCollections' in biobank_covid and not biobank[
                    'id'] in biobankHasCovidProspectiveCollection:
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getBiobankNN(biobank['id']),
                        DataCheckWarningLevel.WARNING, biobank['id'],
                        DataCheckEntityType.BIOBANK,
                        "Biobank has ProspectiveCollections among covid19biobank attributes but has no prospective collection defined (collection ID matching '"
                        + covidProspectiveCollectionIdPattern +
                        "' regex pattern)"))

            if biobank[
                    'id'] in biobankHasCovidProspectiveCollection and not 'ProspectiveCollections' in biobank_covid:
                warnings.append(
                    DataCheckWarning(
                        self.__class__.__name__, "",
                        dir.getBiobankNN(biobank['id']),
                        DataCheckWarningLevel.ERROR, biobank['id'],
                        DataCheckEntityType.BIOBANK,
                        "Biobank has prospective COVID-19 collection defined but ProspectiveCollections is not among covid19biobank attributes"
                    ))

        return warnings