def run(repo: MLRepo): test_definitions = repo.get_names(MLObjectType.TEST_DEFINITION) results = {} version_to_label = {} labels = repo.get_names(MLObjectType.LABEL) for l in labels: tmp = repo.get(l) version_to_label[tmp.version] = tmp.repo_info.name for t in test_definitions: test_definition = repo.get(t) models = test_definition._get_models(repo) for m, v in models.items(): for version in v: result = Tests.__check_test(repo, m, version, test_definition) if len(result) > 0: if version in version_to_label: results[m + ':' + version_to_label[version]] = result else: results[m + ':' + version] = result return results
def __check_test(repo: MLRepo, model, model_version, test_definition): # loop over all data data = test_definition._get_data(repo) results = {} for d in data: # first create from definition the test for the given model to get the test name test_name = str( NamingConventions.Test( model=NamingConventions.get_model_from_name(model), test_name=test_definition.repo_info.name, data=d)) logging.debug('Checking test ' + test_name + ' for ' + model + ', version ' + model_version) test = repo.get(test_name, version=None, modifier_versions={model: model_version}, throw_error_not_exist=False, throw_error_not_unique=False) if test == []: results[ test_name] = 'Test for model ' + model + ', version ' + model_version + ' on latest data ' + d + ' missing.' continue if isinstance(test, list): # search latest test t = test[0] for k in range(1, len(test)): if test[k].repo_info.commit_date > t.repo_info.commit_date: t = test[k] test = t result = test._check(repo) if not result is None: results[test_name] = result if not test.result == 'succeeded': results[ test_name] = 'Test for model ' + model + ', version ' + model_version + ' on latest data ' + d + ' failed, details: ' + str( test.details) return results
def run(repo: MLRepo, model_name=None, correct=False, model_version=RepoStore.LAST_VERSION, model_label=None, check_for_latest=True): """Perform consistency checks for specified model versions Args: :param repo (MLRepo): ml repository :model_name (str, optional): Defaults to None. If specified, the model defined by the name and the model_version are checked. :param correct (bool, optional): Defaults to False. If True, the method starts the corresponding jobs to fix the found issues. :param model_version (str or list of str, optional): Defaults to RepoStore.LAST_VERSION. The model version(s) of the models to check :param model_label ([type], optional): Defaults to None. If it is set to '__ALL__', all labels are checked. Raises: Exception: Raises if a model version but no model name is specified Returns: [dict]: dictionary mapping model+version to issues found. May be empty if no issues exist. """ logger.info('Start checking model.') result = {} model_labels = [] if model_label is not None: if isinstance(model_label, list): model_labels = model_label elif isinstance(model_label, str): if model_label == '__ALL__': model_labels = repo.get_names(MLObjectType.LABEL) else: model_labels = [model_label] for model_label in model_labels: # check the model defined by the label label = repo.get(model_label) tmp = Model.__check_model(repo, label.name, correct, model_version=label.version, check_for_latest=False) if len(tmp) > 0: result[model_label] = tmp if model_name is not None: # check the model defined by name and versions if len(model_name.split('/')) == 1: model_name = model_name + '/model' latest_version = repo.get(model_name).repo_info.version if model_version is None: model_version = RepoStore.LAST_VERSION if isinstance(model_version, str): model_versions = [model_version] else: model_versions = model_version for version in model_versions: if str(latest_version) == str(version) or str( version) == RepoStore.LAST_VERSION: if str(version) == RepoStore.LAST_VERSION: logger.debug( 'Latest version found, check if latest version ran on latest data.' ) tmp = Model.__check_model( repo, model_name, correct, latest_version, check_for_latest=check_for_latest) else: tmp = Model.__check_model( repo, model_name, correct, version, check_for_latest=check_for_latest) else: tmp = Model.__check_model( repo, model_name, correct, version, check_for_latest=check_for_latest) if len(tmp) > 0: result[model_name + ':' + str(version)] = tmp logger.info('Finished checking model.') return result