def create(self, ml_repo: MLRepo): """Create a set of tests for models of the repository. Args: ml_repo (MLRepo): ml repo models (dict, optional): Defaults to {}. Dictionary of model names to version numbers to apply tests for. If empty, all latest models are used. data (dict, optional): Defaults to {}. Dictionary of data the tests are applied to. If empty, all latest test- and train data will be used. labels (list, optional): Defaults to []. List of labels to which the tests are applied. Returns: [type]: [description] """ models_test = self._get_models(ml_repo) result = [] data_test = self._get_data(ml_repo) for model, v in models_test.items(): for version in v: for d in data_test: tmp = self._create(model, d, version, LAST_VERSION) tmp.test_definition = self.repo_info.name tmp.repo_info[RepoInfoKey.NAME] = str( NamingConventions.Test( model=NamingConventions.get_model_from_name(model), test_name=self.repo_info[RepoInfoKey.NAME], data=d)) result.append(tmp) return result
def get_measure_history(ml_repo, measure_names): """Returns for a (list of) measure(s) the historic evolution of the measure (using the order induced by the datetime encoded in the version number) Args: ml_repo (MLRepo): the ml repo measure_names (str, list(str)): string or list of strings of measure names (inlcuding full path) Returns: """ label_checker = _LabelChecker(ml_repo) if isinstance(measure_names, str): measure_names = [measure_names] result_all = {} for measure_name in measure_names: data = str( NamingConventions.Data( NamingConventions.EvalData( NamingConventions.Measure(measure_name)))) measures = ml_repo.get( measure_name, version=(RepoStore.FIRST_VERSION, RepoStore.LAST_VERSION )) # , modifier_versions={data: data_versions}) if not isinstance(measures, list): measures = [measures] model_name = NamingConventions.CalibratedModel( NamingConventions.Measure(measure_name)) model_name = str(model_name) train_data = ml_repo.get_names(MLObjectType.TRAINING_DATA)[0] # eval_name result = [] for x in measures: info = { 'model_version': x.repo_info[RepoInfoKey.MODIFICATION_INFO][model_name], 'data_version': x.repo_info[RepoInfoKey.MODIFICATION_INFO][data], 'train_data_version': x.repo_info[RepoInfoKey.MODIFICATION_INFO][train_data], 'value': x.value, 'datetime': _time_from_version( x.repo_info[RepoInfoKey.MODIFICATION_INFO][model_name]) } label = label_checker.get_label( model_name, x.repo_info[RepoInfoKey.MODIFICATION_INFO][model_name]) if label is not None: info['model_label'] = label result.append(info) result_all[measure_name] = result return result_all
def _run_test(self, ml_repo: MLRepo, jobid): logger.debug('Running regression test ' + self.repo_info.name + ' on model ' + str(NamingConventions.CalibratedModel(self.model)) + ', version ' + self.model_version) regression_test = ml_repo.get(self.test_definition, version=LAST_VERSION) label = ml_repo.get(regression_test.reference, version=LAST_VERSION) result = {} measure_types = self._get_measure_types(ml_repo, regression_test) for measure_type in measure_types: measure_name = str( NamingConventions.Measure({ 'model': self.model.split('/')[0], 'data': self.data, 'measure_type': measure_type })) measure = ml_repo.get( measure_name, version=None, modifier_versions={ str(NamingConventions.CalibratedModel(self.model)): self.model_version, self.data: self.data_version }, throw_error_not_exist=False, throw_error_not_unique=True) if measure == []: continue measure_name = str( NamingConventions.Measure({ 'model': label.name.split('/')[0], 'data': self.data, 'measure_type': measure_type })) reference_value = ml_repo.get( measure_name, version=None, modifier_versions={ str(NamingConventions.CalibratedModel(label.name)): label.version, self.data: self.data_version }, adjust_modification_info=False) if regression_test.relative: if measure.value - reference_value.value < regression_test.tol * reference_value.value: result[measure_type] = { 'reference_value': reference_value.value, 'value': measure.value } else: if measure.value - reference_value.value < regression_test.tol: result[measure_type] = { 'reference_value': reference_value.value, 'value': measure.value } return result
def measure_history(ml_repo, measure_name): x = plot_helper.get_measure_history( ml_repo, measure_name) data = [] model_label_annotations = [] for k, measures in x.items(): data_name = str(NamingConventions.Data( NamingConventions.EvalData(NamingConventions.Measure(measure_name)))) data_versions = set() for measure in measures: data_versions.add(measure['data_version']) if 'model_label' in measure: model_label_annotations.append(dict(x=str(measure['datetime']), y=measure['value'], xref='x', yref='y', text=measure['model_label'], showarrow=True, arrowhead=2, # 1 # ax=, # ay=-30 )) measures = pd.DataFrame(measures) for d_version in data_versions: # if True: df = measures.loc[measures['data_version'] == d_version] text = ["model version: " + str(x['model_version']) + '<br>' + data_name + ': ' + str(x['data_version']) + '<br>' + 'train_data: ' + str(x['train_data_version']) for index, x in df.iterrows()] if True: # len(x) > 1: plot_name = k + ': ' + str(d_version) # else: # plot_name = data_name + ': ' + str(d_version) data.append( go.Scatter( x=df['datetime'], y=df['value'], text=text, name=plot_name, mode='markers' ) ) layout = go.Layout( title='measure history', annotations=model_label_annotations, xaxis=dict(title='t'), yaxis=dict(title=NamingConventions.Measure( measure_name).values['measure_type']) ) # IPython notebook # py.iplot(data, filename='pandas/basic-line-plot') fig = go.Figure(data=data, layout=layout) iplot(fig) # , filename='pandas/basic-line-plot')
def get_model_dict(ml_repo, models, label_checker): # first determine models (including their respective version) to be plotted _models = { } # dictionary containing model names together with model versions to be plotted if models is None: # if models is None, use all labeled models logging.info('No model specified, use all labeled models.') for k, v in label_checker._labels.items(): if v.name in _models.keys(): _models[v.name].append(v.version) else: _models[v.name] = [v.version] if isinstance(models, list): for m in models: if m in label_checker._labels.keys(): label = label_checker._labels[m] _models[label.name] = label.version else: _models[str(NamingConventions.CalibratedModel( model=m))] = LAST_VERSION # if just a string is given, use all labels on this model and the latest model if isinstance(models, str): _models[models] = [LAST_VERSION] if models in label_checker._labels.keys(): for k in label_checker._labels[models].keys(): _models[models].append(k) logging.info( logging.info( 'Only a model name given, using last version and ' + str(len(_models[models]) - 1) + ' labeled versions of this model.')) if isinstance(models, dict): _models = models return _models
def get_modifier_versions(self, ml_repo): modifiers = {} modifiers[str(NamingConventions.CalibratedModel( self.model))] = self.model_version modifiers[self.test_definition] = self.test_definition_version regression_test = ml_repo.get(self.test_definition, version=LAST_VERSION) label = ml_repo.get(regression_test.reference, version=LAST_VERSION) modifiers[label.repo_info.name] = label.repo_info.version modifiers[self.data] = self.data_version return self.repo_info.name, modifiers
def __check_test(repo: MLRepo, model, model_version, test_definition): # loop over all data data = test_definition._get_data(repo) results = {} for d in data: # first create from definition the test for the given model to get the test name test_name = str( NamingConventions.Test( model=NamingConventions.get_model_from_name(model), test_name=test_definition.repo_info.name, data=d)) logging.debug('Checking test ' + test_name + ' for ' + model + ', version ' + model_version) test = repo.get(test_name, version=None, modifier_versions={model: model_version}, throw_error_not_exist=False, throw_error_not_unique=False) if test == []: results[ test_name] = 'Test for model ' + model + ', version ' + model_version + ' on latest data ' + d + ' missing.' continue if isinstance(test, list): # search latest test t = test[0] for k in range(1, len(test)): if test[k].repo_info.commit_date > t.repo_info.commit_date: t = test[k] test = t result = test._check(repo) if not result is None: results[test_name] = result if not test.result == 'succeeded': results[ test_name] = 'Test for model ' + model + ', version ' + model_version + ' on latest data ' + d + ' failed, details: ' + str( test.details) return results
def get_measure_by_parameter(ml_repo, measure_names, param_name, data_versions=LAST_VERSION, training_param=False): """Returns for a (list of) measure(s) the measures and corresponding param values for a certain parameter Args: ml_repo (MLRepo): the ml repo measure_names (str, list(str)): string or list of strings of measure names param_name (str): name of parameter data_versions (version number, optional): Defaults to None. If not None, only values on measures on dta with this version number are used Returns: [dict]: dictionary of measure name to list of dictionaries containing the result, i.e. model_version: version of model parameter param_version: version of the parameter of this data point param_name: the parameter value data_version: version of the underlying data train_data_version: version number of trainin data used to calibrate the model leading to this measure measure_version: version of measure value: measure value """ label_checker = _LabelChecker(ml_repo) if isinstance(measure_names, str): measure_names = [measure_names] result_all = {} for measure_name in measure_names: data = str(NamingConventions.Data(NamingConventions.EvalData( NamingConventions.Measure(measure_name)))) measures = ml_repo.get(measure_name, version=None, modifier_versions={data: data_versions}) if not isinstance(measures, list): measures = [measures] model_name = NamingConventions.CalibratedModel( NamingConventions.Measure(measure_name) ) if training_param: p_name = str(NamingConventions.TrainingParam(model_name)) else: p_name = str(NamingConventions.ModelParam(model_name)) train_data = ml_repo.get_names(MLObjectType.TRAINING_DATA)[0] model_name = str(model_name) # eval_name result = [] n_warnings = 1 for x in measures: p = ml_repo.get( p_name, version=x.repo_info[RepoInfoKey.MODIFICATION_INFO][p_name]) try: param_value = _get_value_by_path(p.get_params(), param_name) # get train data version model = ml_repo.get( model_name, version=x.repo_info.modification_info[model_name]) info = {'model_version': x.repo_info[RepoInfoKey.MODIFICATION_INFO][model_name], param_name: param_value, 'param_version': p.repo_info[RepoInfoKey.VERSION], 'data_version': x.repo_info[RepoInfoKey.MODIFICATION_INFO][data], 'train_data_version': model.repo_info[RepoInfoKey.MODIFICATION_INFO][train_data], 'measure_version': x.repo_info[RepoInfoKey.VERSION], 'value': x.value} label = label_checker.get_label( model_name, x.repo_info[RepoInfoKey.MODIFICATION_INFO][model_name]) if label is not None: info['model_label'] = label result.append(info) except: n_warnings += 1 logger.warning('Could no retrieve parameter ' + p_name + ' for ' + p.repo_info.name + ', version ' + p.repo_info.version) if n_warnings > 1: warnings.warn('There are ' + str(n_warnings) + ' cases where the parameter could not be retrieved. See logging (logevel warning) for details.') result_all[measure_name] = result return result_all
def get_ptws_error_dist_mmd(ml_repo, model, data, x_coords=None, y_coords=None, start_index=0, end_index=-1, percentile=0.1, cache=True, scale=True, metric='rbf', **kwds): """Returns Squared Maximum Mean Distance (MMD) between the distributions of the x-data w.r.t. a percentile of the absolute pointwise errors along the y-coordinates. Args: ml_repo (MLRepo): [description] model (str or dict): A dictionary of model names (or labels) to versions (a single version number, a range of versions or a list of versions) or just a model name (in this case the latest version is used) data (str or dict): A dictionary of data namesto versions (a single version number, a range of versions or a list of versions) or just a data name (in this case the latest version is used) x_coords (int, str or list, optional): x-coordinate or list of x-coordinates used to comput the squared MMD. If None, all x-coordinates are used. Defaults to None. y_coords (int str or list, optional): y-coordinate or list of y-coordinates used to comput the squared MMD. If None, all y-coordinates are used. Defaults to None. start_index (int, optional): Start index of data. Defaults to 0. end_index (int, optional): End index of data. Defaults to -1. percentile (float, optional): Percentile of absolute error defining the x-values. Defaults to 0.1. cache (bool, optional): If True, caching is used using the given MLRepo. Defaults to True. scale (bool, optional): If True, the x-cordntes will be scaled by sklearn StandardScaler. Defaults to True. metric (str or callable, optional): The metric to use when calculating kernel between instances in a feature array. defaults to 'rbf'. If metric is a string, it must be one of the metrics in sklearn.metrics.pairwise.PAIRWISE_KERNEL_FUNCTIONS. If metric is precomputed, X is assumed to be a kernel matrix. Alternatively, if metric is a callable function, it is called on each pair of instances (rows) and the resulting value recorded. The callable should take two arrays from X as input and return a value indicating the distance between them. Currently, sklearn provides the following strings: ‘additive_chi2’, ‘chi2’, ‘linear’, ‘poly’, ‘polynomial’, ‘rbf’, ‘laplacian’, ‘sigmoid’, ‘cosine’ **kwds: optional keyword parameters that are passed directly to the kernel function. Returns: list of dict: List of dictionary where each dictionary contains the squared MMD as well as the name and version of underlying data and model and the x- and y-coordinates used. """ label_checker = _LabelChecker(ml_repo) tmp = _get_obj_dict(ml_repo, model, label_checker, MLObjectType.CALIBRATED_MODEL) _models = [] for m, m_tmp in tmp.items(): for m_v in m_tmp: tmp = ml_repo.get(m, version=m_v, throw_error_not_unique=False) if isinstance(tmp, list): _models.extend(tmp) else: _models.append(tmp) tmp = _get_obj_dict(ml_repo, data, None, [ MLObjectType.TRAINING_DATA, MLObjectType.TEST_DATA]) _data = [] for d, d_tmp in tmp.items(): for d_v in d_tmp: tmp = ml_repo.get(d, version=d_v, throw_error_not_unique=False) if isinstance(tmp, list): _data.extend(tmp) else: _data.append(tmp) result = [] if cache: cache_ = ml_repo else: cache_ = None # set coordinates # if x_coords is not None: if isinstance(x_coords, int) or isinstance(x_coords, str): x_coords = [x_coords] for i in range(len(x_coords)): if isinstance(x_coords[i], str): x_coords[i] = _data[0].x_coord_names.index(x_coords[i]) if y_coords is not None: if isinstance(y_coords, int) or isinstance(y_coords, str): y_coords = [y_coords] for i in range(len(y_coords)): if isinstance(y_coords[i], str): y_coords[i] = _data[0].y_coord_names.index(y_coords[i]) # loop over models and data # for m in _models: for d in _data: _eval_data_name = str( NamingConventions.EvalData(data=d.repo_info.name, model=m.repo_info.name.split('/')[0])) eval_data = ml_repo.get(_eval_data_name, None, modifier_versions={ m.repo_info.name: m.repo_info.version}, full_object=True) tmp = ml_repo.get(d.repo_info.name, version=d.repo_info.version, full_object=True) mmd = _get_MMD2_X_vs_abs_ptw_error_percentile( tmp, eval_data, x_coords, y_coords, cache=cache_, percentile=percentile, scale=scale, metric=metric, **kwds) if x_coords is not None: x_coord_names = [d.x_coord_names[i] for i in x_coords] else: x_coord_names = d.x_coord_names if y_coords is not None: y_coord_names = [d.y_coord_names[i] for i in y_coords] else: y_coord_names = d.y_coord_names for i in range(len(x_coord_names)): for j in range(len(y_coord_names)): result.append({'model': m.repo_info.name, 'model version': m.repo_info.version, 'data': d.repo_info.name, 'data version': d.repo_info.version, 'x-coord': x_coord_names[i], 'y-coord': y_coord_names[j], 'mmd': mmd[i, j]}) return result
def get_pointwise_model_errors(ml_repo, models, data, coord_name=None, data_version=LAST_VERSION, x_coord_name=None, start_index=0, end_index=-1): """Compute pointwise errors for given models and data. The method plots histograms between predicted and real values of a certain target variable for reference data and models. The reference data is described by the data name and the version of the data (as well as the targt variables name). The models can be described by - a dictionary of model names to versions (a single version number, a range of versions or a list of versions) - just a model name (in this case the latest version is used) Args: ml_repo (MLRepo): [description] models (str or dict): A dictionary of model names to versions (a single version number, a range of versions or a list of versions) or just a model name (in this case the latest version is used) data (str or list of str): Name of input data to be used for the error plot. coord_name (int or str, optional): Index or name of y-coordinate used for error measurement. If None, the first coordinate is used. Defaults to None. data_version (str, optional): Version of the input data used. Defaults to LAST_VERSION. x_coord_name (str): If specified it defines the respective x-coordinate that will additionally to the errors be returned. If None, no x-values will be returned. Defaults to None. """ label_checker = _LabelChecker(ml_repo) _data = data if isinstance(_data, str): _data = [data] #_models = get_model_dict(ml_repo, models) _models = _get_obj_dict(ml_repo, models, label_checker, MLObjectType.CALIBRATED_MODEL) ref_data = ml_repo.get(_data[0], version=data_version, full_object=False) coord = 0 if coord_name is None: coord_name = ref_data.y_coord_names[0] if isinstance(coord_name, int): coord_name = ref_data.y_coord_names[coord_name] coord = ref_data.y_coord_names.index(coord_name) result = {'title': 'pointwise error (' + coord_name + ')', 'data': {}} if x_coord_name is None: result['x0_name'] = 'model-target [' + coord_name + ']' else: if isinstance(x_coord_name, int): x_coord_name = ref_data.x_coord_names[x_coord_name] result['x0_name'] = x_coord_name result['x1_name'] = 'model-target [' + coord_name + ']' for d in _data: ref_data = ml_repo.get(d, version=data_version, full_object=True) for m_name, m_versions in _models.items(): if len(m_versions) == 1: m_versions = m_versions[0] tmp = m_name.split('/')[0] eval_data_name = str( NamingConventions.EvalData(data=d, model=tmp)) logging.info('Retrieving eval data for model ' + tmp + ', versions ' + str(m_versions) + ' and data ' + d + ', versions ' + str(data_version)) eval_data = ml_repo.get( eval_data_name, version=None, modifier_versions={m_name: m_versions, d: data_version}, full_object=True) if not isinstance(eval_data, list): eval_data = [eval_data] for eval_d in eval_data: error = ref_data.y_data[:, coord] - eval_d.x_data[:, coord] end = end_index if end > 0: end = min(end, error.shape[0]) tmp = {} if x_coord_name is None: tmp['x0'] = error[start_index:end] else: tmp['x1'] = error[start_index:end] tmp['x0_name'] = x_coord_name tmp['x0'] = ref_data.x_data[start_index:end, ref_data.x_coord_names.index(x_coord_name)] tmp['info'] = {d: str(data_version), m_name: str(eval_d.repo_info[RepoInfoKey.MODIFICATION_INFO][m_name])} model_label = label_checker.get_label( m_name, eval_d.repo_info[RepoInfoKey.MODIFICATION_INFO][m_name]) if model_label is not None: tmp['label'] = model_label result['data'][eval_data_name + ': ' + str(eval_d.repo_info[RepoInfoKey.VERSION])] = tmp return result
def measure_by_parameter(ml_repo, measure_name, param_name, data_versions=None, training_param=False, logscale_y=False, logscale_x=False): """Plot a measure value vs a certain training or model parameter. Args: ml_repo (MLRepo): MLRepo measure_name (str): Name of measure to be plotted. param_name (str): Name of parameter to be plotted. To define a subparameter on can use the '/' to define the path to the parameter. data_versions (str, optional): Version of the dataset that should be underlying the measure. If Noe, the latest version for the underlying data is used. Defaults to None. training_param (bool, optional): Boolean that defines if parameter of interest belongs to training or model parameter. Defaults to False. logscale_y (bool): If true, the y-axis will be log scale. Defaults to False. logscale_x (bool): If true, the x-axis will be log scale. Defaults to False. Examples: To plot the maximum error (which must have been defined in the measures) for the model ``DecisionTreeRegressor`` on the dataset ``sample1`` against the parameter ``learning_rate`` contained in the subparameter ``optim_param`` we may call:: >> measure_by_parameter(ml_repo, 'DecisionTreeRegressor/measure/sample1/max', 'optim_param/learning_rate') """ if logscale_x: x_scaler = math.log10 else: x_scaler = lambda x: x if logscale_y: y_scaler = math.log10 else: y_scaler = lambda x: x x = plot_helper.get_measure_by_parameter(ml_repo, measure_name, param_name, data_versions, training_param) data = [] model_label_annotations = [] for k, measures in x.items(): data_name = str( NamingConventions.Data( NamingConventions.EvalData( NamingConventions.Measure(measure_name)))) data_versions = set() for measure in measures: data_versions.add(measure['data_version']) if 'model_label' in measure: model_label_annotations.append( dict( x=x_scaler(measure[param_name]), y=y_scaler(measure['value']), xref='x', yref='y', text=measure['model_label'], showarrow=True, arrowhead=2, # ax=0, # ay=-30 )) measures = pd.DataFrame(measures) for d_version in data_versions: # if True: df = measures.loc[measures['data_version'] == d_version] text = [ "model version: " + str(x['model_version']) + '<br>' + data_name + ': ' + str(x['data_version']) + '<br>' + 'train_data: ' + str(x['train_data_version']) for index, x in df.iterrows() ] if True: # len(x) > 1: plot_name = k + ': ' + str(d_version) # else: # plot_name = data_name + ': ' + str(d_version) data.append( go.Scatter(x=df[param_name], y=df['value'], text=text, name=plot_name, mode='markers')) xaxis = dict(title=param_name) if logscale_x: xaxis['type'] = 'log' yaxis = dict( title=NamingConventions.Measure(measure_name).values['measure_type']) if logscale_y: yaxis['type'] = 'log' layout = go.Layout(title='measure by parameter', annotations=model_label_annotations, xaxis=xaxis, yaxis=yaxis) # IPython notebook # py.iplot(data, filename='pandas/basic-line-plot') fig = go.Figure(data=data, layout=layout) # return fig iplot(fig) # , filename='pandas/basic-line-plot')
def measure_history(ml_repo, measure_name, logscale_y=False): """Plots the history of the model w.r.t. a defined measure. The x-axis is defined by the indert datetime of each model. Args: ml_repo (MLRepo): MLRepo. measure_name (str, iterable of str): Name (or iterable of names) of measure(s) to plot (a measure name includes the name of the underlying model and dataset). logscale_y (bool): If true, the y-axis will be log scale. Defaults to False. Examples: To plot the history of the maximum error (which must have been defined in the measures) for the model ``DecisionTreeRegressor`` on the dataset ``sample1``:: >> measure_history(ml_repo, 'DecisionTreeRegressor/measure/sample1/max') """ if logscale_y: y_scaler = math.log10 else: y_scaler = lambda x: x x = plot_helper.get_measure_history(ml_repo, measure_name) data = [] model_label_annotations = [] for k, measures in x.items(): data_name = str( NamingConventions.Data( NamingConventions.EvalData( NamingConventions.Measure(measure_name)))) data_versions = set() for measure in measures: data_versions.add(measure['data_version']) if 'model_label' in measure: model_label_annotations.append( dict( x=str(measure['datetime']), y=y_scaler(measure['value']), xref='x', yref='y', text=measure['model_label'], showarrow=True, arrowhead=2, # 1 # ax=, # ay=-30 )) measures = pd.DataFrame(measures) for d_version in data_versions: # if True: df = measures.loc[measures['data_version'] == d_version] text = [ "model version: " + str(x['model_version']) + '<br>' + data_name + ': ' + str(x['data_version']) + '<br>' + 'train_data: ' + str(x['train_data_version']) for index, x in df.iterrows() ] if True: # len(x) > 1: plot_name = k + ': ' + str(d_version) # else: # plot_name = data_name + ': ' + str(d_version) data.append( go.Scatter(x=df['datetime'], y=df['value'], text=text, name=plot_name, mode='markers')) yaxis = dict( title=NamingConventions.Measure(measure_name).values['measure_type']) if logscale_y: yaxis['type'] = 'log' layout = go.Layout(title='measure history', annotations=model_label_annotations, xaxis=dict(title='t'), yaxis=yaxis) # IPython notebook # py.iplot(data, filename='pandas/basic-line-plot') fig = go.Figure(data=data, layout=layout) iplot(fig) # , filename='pandas/basic-line-plot')
def __check_model(repo, model_name, correct, model_version, check_for_latest=False): """Check if the model is calibrated and evaluated on the latest versions Args: repo (MLRepo): the ml repo model_name (str): model name model_version (version): model version to check correct (bool): determine whether training, evaluations, measures and tests will be qutomatically triggered if check fails to correct. Defaults to False. check_for_latest (bool): if true, some additional checks are performed to see whether the latest model is calibrated on th latest data Returns: dict -- dictionary containing the modifier versions and the latest version of the objects, if dictionary is empty noc model inconsistencies could be found """ logging.info('Checking model ' + model_name + ', version: ' + str(model_version) + ', correct: ' + str(correct) + ', check_for_latest: ' + str(check_for_latest)) model_main_name = model_name.split('/')[0] result = {} repo_store = repo.get_ml_repo_store() # first check if all versions of the models modifiers are still the latest version if check_for_latest: tmp = {} m = repo.get(MLRepo.get_calibrated_model_name(model_name), version=model_version) for k, v in m.repo_info[RepoInfoKey.MODIFICATION_INFO].items(): latest_version = repo_store.get_latest_version(k) if not str(v) == str(latest_version): tmp[k] = { 'modifier version': v, 'latest version': latest_version } if len(tmp) > 0: result['latest model version not on latest inputs'] = tmp if correct == True: job_id = repo.run_training(model_name) job_ids = repo.run_evaluation( model_name, message= 'running evaluations triggered by new training of ' + model_name, predecessors=[job_id]) job_ids = repo.run_measures( model_name, message='running measures triggered by new training of ' + model_name, predecessors=job_ids) job_ids = repo.run_tests( model_name, message='running tests triggered by new training of ' + model_name, predecessors=job_ids) return result # we can return here since everything was corrected # simply check if model has been evaluated on the latest data sets data_names = repo.get_names(MLObjectType.TRAINING_DATA) data_names.extend(repo.get_names(MLObjectType.TEST_DATA)) eval_check_result = {} data_corrected = set() for data in data_names: latest_data_version = repo_store.get_latest_version(data) eval_name = str( NamingConventions.EvalData(data=data, model=model_main_name)) tmp = repo.get(eval_name, version=None, modifier_versions={ model_name: model_version, data: latest_data_version }, throw_error_not_exist=False, throw_error_not_unique=False) if tmp == []: eval_check_result[data] = latest_data_version if correct: repo.run_evaluation( model_name, message='automatically corrected from _check_model', model_version=model_version, run_descendants=True) data_corrected.add(data) if len(eval_check_result) > 0: result['evaluations missing'] = eval_check_result # simply check if all measures have been computed on latest data for the respective model measures = repo.get_names(MLObjectType.MEASURE_CONFIGURATION) measure_check_result = set() if len(measures) > 0: # only if measures were defined for m in measures: measure_config = repo.get(m) for measure_name in measure_config.measures.keys(): for data in data_names: tmp = str( NamingConventions.Measure( str( NamingConventions.EvalData( data=data, model=model_main_name)), measure_type=measure_name)) obj = repo.get( tmp, version=None, modifier_versions={model_name: model_version}, throw_error_not_exist=False, throw_error_not_unique=False) if obj == []: measure_check_result.add(tmp) # we do only have to correct if the underlying evaluation data has not been corrected (then predecessors are included) if correct and not data in data_corrected: repo.run_measures( model_name, message= 'automatically corrected from _check_model', model_version=model_version, run_descendants=True) if len(measure_check_result) > 0: result['measures not calculated'] = measure_check_result return result
def measure_by_parameter(ml_repo, measure_name, param_name, data_versions=None, training_param=False): '''[summary] Args: :param ml_repo ([type]): [description] :param measure_name ([type]): [description] :param param_name ([type]): [description] :param data_versions ([type], optional): Defaults to None. [description] :param training_parm (bool, optional): If True, training parameters are used otherwise model parameter (default is False) ''' x = plot_helper.get_measure_by_parameter( ml_repo, measure_name, param_name, data_versions, training_param) data = [] model_label_annotations = [] for k, measures in x.items(): data_name = str(NamingConventions.Data( NamingConventions.EvalData(NamingConventions.Measure(measure_name)))) data_versions = set() for measure in measures: data_versions.add(measure['data_version']) if 'model_label' in measure: model_label_annotations.append(dict(x=measure[param_name], y=measure['value'], xref='x', yref='y', text=measure['model_label'], showarrow=True, arrowhead=2, # ax=0, # ay=-30 )) measures = pd.DataFrame(measures) for d_version in data_versions: # if True: df = measures.loc[measures['data_version'] == d_version] text = ["model version: " + str(x['model_version']) + '<br>' + data_name + ': ' + str(x['data_version']) + '<br>' + 'train_data: ' + str(x['train_data_version']) for index, x in df.iterrows()] if True: # len(x) > 1: plot_name = k + ': ' + str(d_version) # else: # plot_name = data_name + ': ' + str(d_version) data.append( go.Scatter( x=df[param_name], y=df['value'], text=text, name=plot_name, mode='markers' ) ) layout = go.Layout( title='measure by parameter', annotations=model_label_annotations, xaxis=dict(title=param_name), yaxis=dict(title=NamingConventions.Measure( measure_name).values['measure_type']) ) # IPython notebook # py.iplot(data, filename='pandas/basic-line-plot') fig = go.Figure(data=data, layout=layout) #return fig iplot(fig) # , filename='pandas/basic-line-plot')
def get_pointwise_model_errors(ml_repo, models, data, coord_name=None, data_version=LAST_VERSION, x_coord_name=None, start_index=0, end_index=-1): label_checker = _LabelChecker(ml_repo) def get_model_dict(ml_repo, models, label_checker): # first determine models (including their respective version) to be plotted _models = { } # dictionary containing model names together with model versions to be plotted if models is None: # if models is None, use all labeled models logging.info('No model specified, use all labeled models.') for k, v in label_checker._labels.items(): if v.name in _models.keys(): _models[v.name].append(v.version) else: _models[v.name] = [v.version] if isinstance(models, list): for m in models: if m in label_checker._labels.keys(): label = label_checker._labels[m] _models[label.name] = label.version else: _models[str(NamingConventions.CalibratedModel( model=m))] = LAST_VERSION # if just a string is given, use all labels on this model and the latest model if isinstance(models, str): _models[models] = [LAST_VERSION] if models in label_checker._labels.keys(): for k in label_checker._labels[models].keys(): _models[models].append(k) logging.info( logging.info( 'Only a model name given, using last version and ' + str(len(_models[models]) - 1) + ' labeled versions of this model.')) if isinstance(models, dict): _models = models return _models _data = data if isinstance(_data, str): _data = [data] _models = get_model_dict(ml_repo, models, label_checker) ref_data = ml_repo.get(_data[0], version=data_version, full_object=False) coord = 0 if coord_name is None: coord_name = ref_data.y_coord_names[0] coord = ref_data.y_coord_names.index(coord_name) result = {'title': 'pointwise error (' + coord_name + ')', 'data': {}} if x_coord_name is None: result['x0_name'] = 'model-target [' + coord_name + ']' else: result['x0_name'] = x_coord_name result['x1_name'] = 'model-target [' + coord_name + ']' for d in _data: ref_data = ml_repo.get(d, version=data_version, full_object=True) for m_name, m_versions in _models.items(): tmp = m_name.split('/')[0] eval_data_name = str(NamingConventions.EvalData(data=d, model=tmp)) logging.info('Retrieving eval data for model ' + tmp + ', versions ' + str(m_versions) + ' and data ' + d + ', versions ' + str(data_version)) eval_data = ml_repo.get(eval_data_name, version=(FIRST_VERSION, LAST_VERSION), modifier_versions={ m_name: m_versions, d: data_version }, full_object=True) if not isinstance(eval_data, list): eval_data = [eval_data] for eval_d in eval_data: error = ref_data.y_data[:, coord] - eval_d.x_data[:, coord] end = end_index if end > 0: end = min(end, error.shape[0]) tmp = {} if x_coord_name is None: tmp['x0'] = error[start_index:end] else: tmp['x1'] = error[start_index:end] tmp['x0_name'] = x_coord_name tmp['x0'] = ref_data.x_data[ start_index:end, ref_data.x_coord_names.index(x_coord_name)] tmp['info'] = { d: str(data_version), m_name: str(eval_d.repo_info[RepoInfoKey.MODIFICATION_INFO] [m_name]) } model_label = label_checker.get_label( m_name, eval_d.repo_info[RepoInfoKey.MODIFICATION_INFO][m_name]) if model_label is not None: tmp['label'] = model_label result['data'][eval_data_name + ': ' + str( eval_d.repo_info[RepoInfoKey.VERSION])] = tmp return result