def preprocess_lcobj(_lcobj, band_names, thday, uses_magnitude=True, uses_band_d=False, ): lcobj = copy(_lcobj) if uses_magnitude: lcobj.convert_to_magnitude() lightcurve = DFBuilder() band_d = {'g':1, 'r':2} for b in band_names: lcobjb = lcobj.get_b(b) lcobjb.clip_attrs_given_max_day(thday) # clip by max day for k in range(0, len(lcobjb)): lightcurve.append(f'{b}.{k}', { 'oid':'', 'time':lcobjb.days[k], 'magpsf':lcobjb.obs[k], 'magnitude':lcobjb.obs[k], 'sigmapsf':lcobjb.obse[k], 'error':lcobjb.obse[k], 'band':band_d[b] if uses_band_d else b, 'isdiffpos':np.inf, # patch }) lightcurve = lightcurve.get_df().set_index('oid') return lightcurve
def get_all_fat_features(lcdataset, lcset_name, backend=None, # backend='multiprocessing', # multiprocessing ): lcset = lcdataset[lcset_name] band_names = lcset.band_names thdays_features_df = DFBuilder() lcobj_names = lcset.get_lcobj_names() batches, n_jobs = get_joblib_config_batches(lcobj_names, backend) bar = ProgressBar(len(batches)) for batch in batches: bar(f'lcset_name={lcset_name}; batch={batch}({len(batch)}#)') jobs = [] for lcobj_name in batch: jobs.append(delayed(get_features)( lcset[lcobj_name], lcobj_name, lcset_name, lcset.get_info(), )) results = Parallel(n_jobs=n_jobs, backend=backend)(jobs) for thdays_features_list in results: for thdays_features in thdays_features_list: thdays_features_df.append(None, thdays_features) # print('thdays_features',thdays_features) bar.done() return thdays_features_df.get_df()
def get_ps_times_df( rootdir, cfilename, kf, method, model_names, train_mode='pre-training', ): info_df = DFBuilder() new_model_names = utils.get_sorted_model_names(model_names) for kmn, model_name in enumerate(new_model_names): load_roodir = f'{rootdir}/{model_name}/{train_mode}/model_info/{cfilename}' files, files_ids = ftfiles.gather_files_by_kfold( load_roodir, kf, set_name, fext='d', disbalanced_kf_mode='oversampling', # error oversampling random_state=RANDOM_STATE, ) print( f'{model_name} {files_ids}({len(files_ids)}#); model={model_name}') if len(files) == 0: continue survey = files[0]()['survey'] band_names = files[0]()['band_names'] class_names = files[0]()['class_names'] is_parallel = 'Parallel' in model_name loss_name = 'wmse-xentropy' print(files[0]()['monitors'][loss_name].keys()) #th = 1 # bug? d = {} parameters = [f()['parameters'] for f in files][0] d['params'] = parameters #d['best_epoch'] = XError([f()['monitors']['wmse-xentropy']['best_epoch'] for f in files]) d['time-per-iteration [segs]'] = sum( [f()['monitors'][loss_name]['time_per_iteration'] for f in files]) #print(d['time-per-iteration [segs]'].max()) #d['time-per-iteration/params $1e6\\cdot$[segs]'] = sum([f()['monitors'][loss_name]['time_per_iteration']/parameters*1e6 for f in files]) #d['time_per_epoch'] = sum([f()['monitors']['wmse-xentropy']['time_per_epoch'] for f in files]) print(files[0]()['monitors'][loss_name]['time_per_epoch']) #d['time_per_epoch [segs]'] = sum([f()['monitors'][loss_name]['time_per_epoch'] for f in files]) d['time-per-epoch [segs]'] = XError( [f()['monitors'][loss_name]['total_time'] / 1500 for f in files]) d['total-time [mins]'] = XError( [f()['monitors'][loss_name]['total_time'] / 60 for f in files]) index = f'model={utils.get_fmodel_name(model_name)}' info_df.append(index, d) return info_df
def reset(self): self.best_value = None self.loss_df = DFBuilder() self.opt_df = DFBuilder() self.loss_df_epoch = DFBuilder() self.metrics_df_epoch = DFBuilder() self.counter_k.reset() self.counter_epoch.reset()
def get_info_dict( rootdir, methods, cfilename, kf, lcset_name, band_names=['g', 'r'], ): info_df = DFBuilder() ### all info d = {} for method in methods: _rootdir = f'{rootdir}/{method}/{cfilename}' files, files_ids = fcfiles.gather_files_by_kfold( _rootdir, kf, lcset_name) trace_time = [f()['segs'] for f in files] d[method] = XError(trace_time) info_df.append(f'metric=trace-time [segs]~band=.', d) ### per band info for kb, b in enumerate(band_names): d = nested_dict() for method in methods: _rootdir = f'{rootdir}/{method}/{cfilename}' files, files_ids = fcfiles.gather_files_by_kfold( _rootdir, kf, lcset_name) traces = [f()['trace_bdict'][b] for f in files] trace_errors = flat_list([t.get_valid_errors() for t in traces]) trace_errors_xe = XError(np.log(np.array(trace_errors) + _C.EPS)) d['error'][method] = trace_errors_xe d['success'][method] = len(trace_errors) / sum( [len(t) for t in traces]) * 100 d = d.to_dict() info_df.append(f'metric=fit-log-error~band={b}', d['error']) info_df.append(f'metric=fits-success [%]~band={b}', d['success']) return info_df.get_df()
def get_ps_performance_df( rootdir, cfilename, kf, set_name, model_names, dmetrics, target_class=None, thday=None, train_mode='fine-tuning', n=1e3, uses_avg=False, baseline_roodir=None, ): info_df = DFBuilder() new_model_names = utils.get_sorted_model_names(model_names) new_model_names = [ BASELINE_MODEL_NAME ] + new_model_names if not baseline_roodir is None else new_model_names for kmn, model_name in enumerate(new_model_names): is_baseline = 'BRF' in model_name load_roodir = f'{rootdir}/{model_name}/{train_mode}/performance/{cfilename}' if not is_baseline else baseline_roodir files, files_ids = ftfiles.gather_files_by_kfold( load_roodir, kf, set_name, fext='d', disbalanced_kf_mode='oversampling', # error oversampling random_state=RANDOM_STATE, ) print(f'{files_ids}({len(files_ids)}#); model={model_name}') if len(files) == 0: continue fixme = 'th' if kmn == 0 else '' # survey = files[0]()['survey'] # fixme band_names = files[0]()['band_names'] class_names = files[0]()['class_names'] thdays = files[0]()[fixme + 'days'] d = {} for km, metric_name in enumerate(dmetrics.keys()): new_metric_name = f'{"b" if target_class is None else target_class}-{metric_name if dmetrics[metric_name]["mn"] is None else dmetrics[metric_name]["mn"]}' if not uses_avg: if target_class is None: xe_metric = XError([ f()[fixme + 'days_class_metrics_df'].loc[f()[ fixme + 'days_class_metrics_df']['_' + fixme + 'day'] == thday] [f'b-{metric_name}'].item() for f in files ]) else: xe_metric = XError([ f()[fixme + 'days_class_metrics_cdf'][target_class].loc[f()[ fixme + 'days_class_metrics_df'][ '_' + fixme + 'day'] == thday][f'{metric_name}'].item() for f in files ]) d[new_metric_name] = xe_metric else: if is_baseline: d[new_metric_name] = XError([-999]) else: if target_class is None: metric_curves = [ f()[fixme + 'days_class_metrics_df'] [f'b-{metric_name}'].values for f in files ] else: metric_curves = [ f()[fixme + 'days_class_metrics_cdf'][target_class] [f'{metric_name}'].values for f in files ] # print(np.concatenate([metric_curve[None] for metric_curve in metric_curves], axis=0).shape) xe_metric_curve_auc = XError( np.mean(np.concatenate([ metric_curve[None] for metric_curve in metric_curves ], axis=0), axis=-1)) # (b,t)>(b) # interp_metric_curve = interp1d(thdays, metric_curve)(np.linspace(thdays.min(), thday, int(n))) # xe_metric_curve_avg = XError(np.mean(interp_metric_curve, axis=-1)) d[new_metric_name] = xe_metric_curve_auc index = f'model={utils.get_fmodel_name(model_name)}' info_df.append(index, d) return info_df
class LossMonitor(object): def __init__( self, loss, optimizer, metrics, save_mode: str = C_.SM_NO_SAVE, target_metric_crit: str = None, k_counter_duration: int = C_.K_COUNTER_DURATION, val_epoch_counter_duration: int = C_.VAL_EPOCH_COUNTER_DURATION, earlystop_epoch_duration: int = C_.EARLYSTOP_EPOCH_DURATION, **kwargs): ### CHECKS assert isinstance(loss, ft_losses.FTLoss) metrics = [metrics] if isinstance(metrics, ft_metrics.FTMetric) else metrics assert isinstance(metrics, list) and all( [isinstance(metric, ft_metrics.FTMetric) for metric in metrics]) assert len([metric.name for metric in metrics ]) == len(set([metric.name for metric in metrics])) assert isinstance(optimizer, ft_optimizers.LossOptimizer) self.loss = loss self.optimizer = optimizer self.metrics = metrics self.save_mode = save_mode self.target_metric_crit = metrics[ 0].name if target_metric_crit is None else target_metric_crit self.counter_k = Counter({'k': k_counter_duration}) self.counter_epoch = Counter({ 'val_epoch': val_epoch_counter_duration, 'earlystop_epoch': earlystop_epoch_duration }) self.name = loss.name self.best_epoch = np.infty self.last_saved_filedir = None self.reset() def reset(self): self.best_value = None self.loss_df = DFBuilder() self.opt_df = DFBuilder() self.loss_df_epoch = DFBuilder() self.metrics_df_epoch = DFBuilder() self.counter_k.reset() self.counter_epoch.reset() ### repr def __repr__(self): def get_metrics_repr(): return f' (target_metric_crit={self.target_metric_crit})' if self.save_mode in [ C_.SM_ONLY_INF_METRIC, C_.SM_ONLY_SUP_METRIC ] else '' txt = '' txt += f'[{self.name}]' + '\n' txt += f' - opt-parameters={len(self.optimizer):,}[p] - device={self.optimizer.get_device()}' + '\n' txt += f' - save-mode={self.save_mode}{get_metrics_repr()}' + '\n' txt += f' - counter_k={self.counter_k} - counter_epoch={self.counter_epoch}' + '\n' return txt[:-1] def get_save_dict(self): info = { 'save_mode': self.save_mode, 'target_metric_crit': self.target_metric_crit, 'counter_k': self.counter_k, 'counter_epoch': self.counter_epoch, 'best_epoch': self.best_epoch, 'last_saved_filedir': self.last_saved_filedir, } return { 'info': info, 'loss_df': self.loss_df, 'opt_df': self.opt_df, 'loss_df_epoch': self.loss_df_epoch, 'metrics_df_epoch': self.metrics_df_epoch, } ### history methods def add_loss_history_k( self, loss, dt=0, ): if self.counter_k.check('k'): assert isinstance(loss, ft_losses.BatchLoss) d = loss.get_info() #index = self.counter_k.get_global_count() index = None d.update({ '_dt': dt, }) self.loss_df.append(index, d) def add_opt_history_epoch(self): d = self.optimizer.get_info() #index = self.counter_epoch.get_global_count() index = None d.update({ '_k': self.counter_k.get_global_count(), }) self.opt_df.append(index, d) def add_loss_history_epoch( self, loss, dt=0, set_name=None, ): if self.counter_epoch.check('val_epoch'): assert isinstance(loss, ft_losses.BatchLoss) d = loss.get_info() #index = self.counter_epoch.get_global_count() index = None d.update({ '_dt': dt, '_set': set_name, }) self.loss_df_epoch.append(index, d) def add_metric_history_epoch( self, metrics_dict, dt=0, set_name=None, ): if self.counter_epoch.check('val_epoch'): d = {} for mn in metrics_dict.keys(): metric = metrics_dict[mn] assert isinstance(metric, ft_metrics.BatchMetric) d[mn] = metric.get_info()['_metric'] d.update({ '_dt': dt, '_set': set_name, }) #index = f'{self.counter_epoch.get_global_count()}.set_name' index = None self.metrics_df_epoch.append(index, d) #print(self.metrics_df_epoch.get_df()) def get_metric_names(self): return [m.name for m in self.metrics] ### along training methods def k_update(self): self.counter_k.update() def epoch_update(self): self.optimizer.update() self.counter_epoch.update() if self.counter_epoch.check('earlystop_epoch'): raise ex.TrainingInterruptedError() def set_last_saved_filedir(self, last_saved_filedir): self.last_saved_filedir = last_saved_filedir def needs_save(self): return not self.save_mode == C_.SM_NO_SAVE def train(self): self.optimizer.train() def eval(self): self.optimizer.eval() def needs_evaluation(self): return self.counter_epoch.check('val_epoch') def reset_early_stop(self): self.counter_epoch.reset_cn('earlystop_epoch') ### get statistics def get_best_epoch(self): return self.best_epoch def set_best_epoch(self, best_epoch): self.best_epoch = best_epoch def get_time_per_iteration(self): loss_df = self.loss_df.get_df() return XError([v for v in loss_df['_dt'].values]) def get_evaluation_set_names(self): loss_df_epoch = self.loss_df_epoch.get_df() return list(np.unique(loss_df_epoch['_set'].values)) def get_time_per_epoch_set(self, set_name): loss_df_epoch = self.loss_df_epoch.get_df() return XError([ v for v in loss_df_epoch['_dt'][loss_df_epoch['_set'].isin( [set_name])].values ]) def get_time_per_epoch(self): # fixme only eval times evaluation_set_names = self.get_evaluation_set_names() return sum([ self.get_time_per_epoch_set(set_name) for set_name in evaluation_set_names ]) def get_total_time(self): evaluation_set_names = self.get_evaluation_set_names() loss_df = self.loss_df.get_df() loss_df_epoch = self.loss_df_epoch.get_df() total_time = 0 total_time += loss_df['_dt'].values.sum() total_time += sum([ loss_df_epoch['_dt'][loss_df_epoch['_set'].isin([set_name ])].values.sum() for set_name in evaluation_set_names ]) # fixme return total_time ### file methods def remove_filedir(self, filedir): files.delete_filedir(filedir, verbose=0) # remove last best model def check_save_condition(self, set_name): if self.save_mode == C_.SM_NO_SAVE: return False elif self.save_mode == C_.SM_ALL: return True elif self.save_mode == C_.SM_ONLY_ALL: self.remove_filedir( self.last_saved_filedir) # remove last best model return True elif self.save_mode == C_.SM_ONLY_INF_LOSS: loss_df_epoch = self.loss_df_epoch.get_df() loss_evolution = loss_df_epoch['_loss'][loss_df_epoch['_set'].isin( [set_name])].values if len(loss_evolution) <= 1: return True # always save first and dont delete anything loss_history = loss_evolution[:-1] # history actual_loss = loss_evolution[-1] # last one if actual_loss < np.min(loss_history): # must save and delete self.remove_filedir( self.last_saved_filedir) # remove last best model self.best_value = actual_loss return True else: return False elif self.save_mode == C_.SM_ONLY_INF_METRIC: metrics_df_epoch = self.metrics_df_epoch.get_df() metric_evolution = metrics_df_epoch[self.target_metric_crit][ metrics_df_epoch['_set'].isin([set_name])].values #print(metrics_df_epoch, metric_evolution) if len(metric_evolution) <= 1: return True # always save first and dont delete anything metric_history = metric_evolution[:-1] # history actual_metric = metric_evolution[-1] # last one if actual_metric < np.min(metric_history): # must save and delete self.remove_filedir( self.last_saved_filedir) # remove last best model self.best_value = actual_metric return True else: return False elif self.save_mode == C_.SM_ONLY_SUP_METRIC: metrics_df_epoch = self.metrics_df_epoch.get_df() metric_evolution = metrics_df_epoch[self.target_metric_crit][ metrics_df_epoch['_set'].isin([set_name])].values if len(metric_evolution) <= 1: return True # always save first and dont delete anything metric_history = metric_evolution[:-1] # history actual_metric = metric_evolution[-1] # last one if actual_metric > np.max(metric_history): # must save and delete self.remove_filedir( self.last_saved_filedir) # remove last best model self.best_value = actual_metric return True else: return False else: raise Exception(f'save mode {self.save_mode} not supported')
def get_column_query_df_table( rootdir, cfilename, kf, lcset_name, model_names, metric_names, query_dict, day_to_metric=None, mode='fine-tuning', arch_modes=['Parallel', 'Serial'], ): info_df = DFBuilder() for arch_mode in arch_modes: for query_value in query_values: info_df[f'{query_value} [{arch_mode}]'] = [] for kmn, model_name in enumerate(model_names): new_rootdir = f'{rootdir}/{mode}/{model_name}' new_rootdir = new_rootdir.replace('mode=pre-training', f'mode={mode}') # patch new_rootdir = new_rootdir.replace('mode=fine-tuning', f'mode={mode}') # patch filedirs = search_for_filedirs(new_rootdir, fext=fext, verbose=0) print(f'[{kmn}][{len(filedirs)}#] {model_name}') mn_dict = strings.get_dict_from_string(model_name) rsc = mn_dict['rsc'] mdl = mn_dict['mdl'] is_parallel = 'Parallel' in mdl arch_mode = 'Parallel' if is_parallel else 'Serial' if arch_mode in arch_modes: for km, metric_name in enumerate(metric_names): day_metric = [] day_metric_avg = [] for filedir in filedirs: rdict = load_pickle(filedir, verbose=0) #model_name = rdict['model_name'] days = rdict['days'] survey = rdict['survey'] band_names = ''.join(rdict['band_names']) class_names = rdict['class_names'] v, vs, _ = utils.get_metric_along_day( days, rdict, metric_name, day_to_metric) day_metric += [v] day_metric_avg += [vs.mean()] xe_day_metric = dstats.XError(day_metric, 0) xe_day_metric_avg = dstats.XError(day_metric_avg, 0) key = f'{mn_dict[query_key]} [{arch_mode}]' info_df[key] += [xe_day_metric] info_df[key] += [xe_day_metric_avg] key = f'metric={utils.get_mday_str(metric_name, day_to_metric)}' if not key in index_df: index_df += [key] index_df += [ f'metric={utils.get_mday_avg_str(metric_name, day_to_metric)}' ] info_df = pd.DataFrame.from_dict(info_df) info_df.index = index_df return info_df
def save_performance(train_handler, data_loader, save_rootdir, target_is_onehot:bool=False, target_y_key='target/y', pred_y_key='model/y', days_n:int=DEFAULT_DAYS_N, **kwargs): train_handler.load_model() # important, refresh to best model train_handler.model.eval() # important, model eval mode dataset = data_loader.dataset # get dataset dataset.reset_max_day() # always reset max day days_rec_metrics_df = DFBuilder() days_class_metrics_df = DFBuilder() days_class_metrics_cdf = {c:DFBuilder() for c in dataset.class_names} days_predictions = {} days_cm = {} days = np.linspace(C_.DEFAULT_MIN_DAY, dataset.max_day, days_n)#[::-1] bar = ProgressBarMulti(len(days), 4) with torch.no_grad(): can_be_in_loop = True for day in days: dataset.set_max_day(day) # very important!! dataset.calcule_precomputed() # very important!! try: if can_be_in_loop: tdicts = [] for ki,in_tdict in enumerate(data_loader): _tdict = train_handler.model(TDictHolder(in_tdict).to(train_handler.device)) tdicts += [_tdict] tdict = minibatch_dict_collate(tdicts) ### mse mse_loss_bdict = {} for kb,b in enumerate(dataset.band_names): p_onehot = tdict[f'input/onehot.{b}'][...,0] # (b,t) #p_rtime = tdict[f'input/rtime.{b}'][...,0] # (b,t) #p_dtime = tdict[f'input/dtime.{b}'][...,0] # (b,t) #p_x = tdict[f'input/x.{b}'] # (b,t,f) p_rerror = tdict[f'target/rerror.{b}'] # (b,t,1) p_rx = tdict[f'target/recx.{b}'] # (b,t,1) p_rx_pred = tdict[f'model/decx.{b}'] # (b,t,1) mse_loss_b = (p_rx-p_rx_pred)**2/(C_.REC_LOSS_EPS+C_.REC_LOSS_K*(p_rerror**2)) # (b,t,1) mse_loss_b = seq_utils.seq_avg_pooling(mse_loss_b, p_onehot)[...,0] # (b,t,1) > (b,t) > (b) mse_loss_bdict[b] = mse_loss_b[...,0] # (b,1) > (b) mse_loss = torch.cat([mse_loss_bdict[b][...,None] for b in dataset.band_names], axis=-1).mean(dim=-1) # (b,d) > (b) mse_loss = mse_loss.mean() days_rec_metrics_df.append(day, { '_day':day, 'mse':tensor_to_numpy(mse_loss), }) ### class prediction y_true = tdict[target_y_key] # (b) #y_pred_p = torch.nn.functional.softmax(tdict[pred_y_key], dim=-1) # (b,c) y_pred_p = torch.sigmoid(tdict[pred_y_key]) # (b,c) #print('y_pred_p',y_pred_p[0]) if target_is_onehot: assert y_pred_.shape==y_true.shape y_true = torch.argmax(y_true, dim=-1) y_true = tensor_to_numpy(y_true) y_pred_p = tensor_to_numpy(y_pred_p) days_predictions[day] = {'y_true':y_true, 'y_pred_p':y_pred_p} metrics_cdict, metrics_dict, cm = fcm.get_multiclass_metrics(y_pred_p, y_true, dataset.class_names) for c in dataset.class_names: days_class_metrics_cdf[c].append(day, update_dicts([{'_day':day}, metrics_cdict[c]])) days_class_metrics_df.append(day, update_dicts([{'_day':day}, metrics_dict])) days_cm[day] = cm ### progress bar recall = {c:metrics_cdict[c]['recall'] for c in dataset.class_names} bmetrics_dict = {k:metrics_dict[k] for k in metrics_dict.keys() if 'b-' in k} bar([f'lcset_name={dataset.lcset_name}; day={day:.3f}', f'mse_loss={mse_loss}', f'bmetrics_dict={bmetrics_dict}', f'recall={recall}']) except KeyboardInterrupt: can_be_in_loop = False bar.done() d = { 'model_name':train_handler.model.get_name(), 'survey':dataset.survey, 'band_names':dataset.band_names, 'class_names':dataset.class_names, 'lcobj_names':dataset.get_lcobj_names(), 'days':days, 'days_rec_metrics_df':days_rec_metrics_df.get_df(), 'days_predictions':days_predictions, 'days_class_metrics_df':days_class_metrics_df.get_df(), 'days_class_metrics_cdf':{c:days_class_metrics_cdf[c].get_df() for c in dataset.class_names}, 'days_cm':days_cm, } ### save file save_filedir = f'{save_rootdir}/{dataset.lcset_name}/id={train_handler.id}.d' files.save_pickle(save_filedir, d) # save file dataset.reset_max_day() # very important!! dataset.calcule_precomputed() # very important!! return
def evaluate_classifier( rf_d, fats_filedir, fats_mode, lcset_info, nan_mode=NAN_MODE, days_n=DEFAULT_DAYS_N, ): class_names = lcset_info['class_names'] features = rf_d['features'] thdays_class_metrics_df = DFBuilder() thdays_class_metrics_cdf = {c: DFBuilder() for c in class_names} thdays_predictions = {} thdays_cm = {} thdays = np.linspace(MIN_DAY, MAX_DAY, days_n) for thday in thdays: eval_df_x, eval_df_y = load_features( fats_filedir, mode=fats_mode, thday=thday, ) # print(eval_df_x.columns, eval_df_y.columns) rf = rf_d['rf'] mean_train_df_x = rf_d['mean_train_df_x'] y_true = eval_df_y[['_y']].values[..., 0] eval_df_x, _, _ = clean_df_nans(eval_df_x, mode=NAN_MODE, df_values=mean_train_df_x) y_pred_p = rf.predict_proba(eval_df_x.values) thdays_predictions[thday] = {'y_true': y_true, 'y_pred_p': y_pred_p} metrics_cdict, metrics_dict, cm = get_multiclass_metrics( y_pred_p, y_true, class_names) for c in class_names: thdays_class_metrics_cdf[c].append( None, update_dicts([{ '_thday': thday }, metrics_cdict[c]])) thdays_class_metrics_df.append( None, update_dicts([{ '_thday': thday }, metrics_dict])) thdays_cm[thday] = cm ### progress bar bmetrics_dict = { k: metrics_dict[k] for k in metrics_dict.keys() if 'b-' in k } print(f'bmetrics_dict={bmetrics_dict}') d = { 'model_name': f'mdl=brf', 'survey': lcset_info['survey'], 'band_names': lcset_info['band_names'], 'class_names': class_names, 'lcobj_names': list(eval_df_y.index), 'thdays': thdays, 'thdays_predictions': thdays_predictions, 'thdays_class_metrics_df': thdays_class_metrics_df.get_df(), 'thdays_class_metrics_cdf': {c: thdays_class_metrics_cdf[c].get_df() for c in class_names}, 'thdays_cm': thdays_cm, 'features': features, 'rank': rf_d['rank'], } return d