示例#1
0
def preprocess_lcobj(_lcobj, band_names, thday,
	uses_magnitude=True,
	uses_band_d=False,
	):
	lcobj = copy(_lcobj)
	if uses_magnitude:
		lcobj.convert_to_magnitude()

	lightcurve = DFBuilder()
	band_d = {'g':1, 'r':2}
	for b in band_names:
		lcobjb = lcobj.get_b(b)
		lcobjb.clip_attrs_given_max_day(thday) # clip by max day
		for k in range(0, len(lcobjb)):
			lightcurve.append(f'{b}.{k}', {
				'oid':'',
				'time':lcobjb.days[k],
				'magpsf':lcobjb.obs[k],
				'magnitude':lcobjb.obs[k],
				'sigmapsf':lcobjb.obse[k],
				'error':lcobjb.obse[k],
				'band':band_d[b] if uses_band_d else b,
				'isdiffpos':np.inf, # patch
				})

	lightcurve = lightcurve.get_df().set_index('oid')
	return lightcurve
示例#2
0
def get_all_fat_features(lcdataset, lcset_name,
	backend=None,
	# backend='multiprocessing', # multiprocessing
	):
	lcset = lcdataset[lcset_name]
	band_names = lcset.band_names
	thdays_features_df = DFBuilder()
	lcobj_names = lcset.get_lcobj_names()
	batches, n_jobs = get_joblib_config_batches(lcobj_names, backend)
	bar = ProgressBar(len(batches))
	for batch in batches:
		bar(f'lcset_name={lcset_name}; batch={batch}({len(batch)}#)')
		jobs = []
		for lcobj_name in batch:
			jobs.append(delayed(get_features)(
				lcset[lcobj_name],
				lcobj_name,
				lcset_name,
				lcset.get_info(),
			))
		results = Parallel(n_jobs=n_jobs, backend=backend)(jobs)
		for thdays_features_list in results:
			for thdays_features in thdays_features_list:
				thdays_features_df.append(None, thdays_features)
				# print('thdays_features',thdays_features)

	bar.done()
	return thdays_features_df.get_df()
示例#3
0
def get_ps_times_df(
    rootdir,
    cfilename,
    kf,
    method,
    model_names,
    train_mode='pre-training',
):
    info_df = DFBuilder()
    new_model_names = utils.get_sorted_model_names(model_names)
    for kmn, model_name in enumerate(new_model_names):
        load_roodir = f'{rootdir}/{model_name}/{train_mode}/model_info/{cfilename}'
        files, files_ids = ftfiles.gather_files_by_kfold(
            load_roodir,
            kf,
            set_name,
            fext='d',
            disbalanced_kf_mode='oversampling',  # error oversampling
            random_state=RANDOM_STATE,
        )
        print(
            f'{model_name} {files_ids}({len(files_ids)}#); model={model_name}')
        if len(files) == 0:
            continue

        survey = files[0]()['survey']
        band_names = files[0]()['band_names']
        class_names = files[0]()['class_names']
        is_parallel = 'Parallel' in model_name

        loss_name = 'wmse-xentropy'
        print(files[0]()['monitors'][loss_name].keys())

        #th = 1 # bug?
        d = {}
        parameters = [f()['parameters'] for f in files][0]
        d['params'] = parameters
        #d['best_epoch'] = XError([f()['monitors']['wmse-xentropy']['best_epoch'] for f in files])
        d['time-per-iteration [segs]'] = sum(
            [f()['monitors'][loss_name]['time_per_iteration'] for f in files])
        #print(d['time-per-iteration [segs]'].max())
        #d['time-per-iteration/params $1e6\\cdot$[segs]'] = sum([f()['monitors'][loss_name]['time_per_iteration']/parameters*1e6 for f in files])
        #d['time_per_epoch'] = sum([f()['monitors']['wmse-xentropy']['time_per_epoch'] for f in files])

        print(files[0]()['monitors'][loss_name]['time_per_epoch'])

        #d['time_per_epoch [segs]'] = sum([f()['monitors'][loss_name]['time_per_epoch'] for f in files])
        d['time-per-epoch [segs]'] = XError(
            [f()['monitors'][loss_name]['total_time'] / 1500 for f in files])

        d['total-time [mins]'] = XError(
            [f()['monitors'][loss_name]['total_time'] / 60 for f in files])

        index = f'model={utils.get_fmodel_name(model_name)}'
        info_df.append(index, d)

    return info_df
示例#4
0
 def reset(self):
     self.best_value = None
     self.loss_df = DFBuilder()
     self.opt_df = DFBuilder()
     self.loss_df_epoch = DFBuilder()
     self.metrics_df_epoch = DFBuilder()
     self.counter_k.reset()
     self.counter_epoch.reset()
def get_info_dict(
    rootdir,
    methods,
    cfilename,
    kf,
    lcset_name,
    band_names=['g', 'r'],
):
    info_df = DFBuilder()

    ### all info
    d = {}
    for method in methods:
        _rootdir = f'{rootdir}/{method}/{cfilename}'
        files, files_ids = fcfiles.gather_files_by_kfold(
            _rootdir, kf, lcset_name)
        trace_time = [f()['segs'] for f in files]
        d[method] = XError(trace_time)

    info_df.append(f'metric=trace-time [segs]~band=.', d)

    ### per band info
    for kb, b in enumerate(band_names):
        d = nested_dict()
        for method in methods:
            _rootdir = f'{rootdir}/{method}/{cfilename}'
            files, files_ids = fcfiles.gather_files_by_kfold(
                _rootdir, kf, lcset_name)
            traces = [f()['trace_bdict'][b] for f in files]
            trace_errors = flat_list([t.get_valid_errors() for t in traces])
            trace_errors_xe = XError(np.log(np.array(trace_errors) + _C.EPS))
            d['error'][method] = trace_errors_xe
            d['success'][method] = len(trace_errors) / sum(
                [len(t) for t in traces]) * 100

        d = d.to_dict()
        info_df.append(f'metric=fit-log-error~band={b}', d['error'])
        info_df.append(f'metric=fits-success [%]~band={b}', d['success'])

    return info_df.get_df()
示例#6
0
def get_ps_performance_df(
    rootdir,
    cfilename,
    kf,
    set_name,
    model_names,
    dmetrics,
    target_class=None,
    thday=None,
    train_mode='fine-tuning',
    n=1e3,
    uses_avg=False,
    baseline_roodir=None,
):
    info_df = DFBuilder()
    new_model_names = utils.get_sorted_model_names(model_names)
    new_model_names = [
        BASELINE_MODEL_NAME
    ] + new_model_names if not baseline_roodir is None else new_model_names
    for kmn, model_name in enumerate(new_model_names):
        is_baseline = 'BRF' in model_name
        load_roodir = f'{rootdir}/{model_name}/{train_mode}/performance/{cfilename}' if not is_baseline else baseline_roodir
        files, files_ids = ftfiles.gather_files_by_kfold(
            load_roodir,
            kf,
            set_name,
            fext='d',
            disbalanced_kf_mode='oversampling',  # error oversampling
            random_state=RANDOM_STATE,
        )
        print(f'{files_ids}({len(files_ids)}#); model={model_name}')
        if len(files) == 0:
            continue

        fixme = 'th' if kmn == 0 else ''
        # survey = files[0]()['survey'] # fixme
        band_names = files[0]()['band_names']
        class_names = files[0]()['class_names']
        thdays = files[0]()[fixme + 'days']

        d = {}
        for km, metric_name in enumerate(dmetrics.keys()):
            new_metric_name = f'{"b" if target_class is None else target_class}-{metric_name if dmetrics[metric_name]["mn"] is None else dmetrics[metric_name]["mn"]}'
            if not uses_avg:
                if target_class is None:
                    xe_metric = XError([
                        f()[fixme + 'days_class_metrics_df'].loc[f()[
                            fixme + 'days_class_metrics_df']['_' + fixme +
                                                             'day'] == thday]
                        [f'b-{metric_name}'].item() for f in files
                    ])
                else:
                    xe_metric = XError([
                        f()[fixme +
                            'days_class_metrics_cdf'][target_class].loc[f()[
                                fixme + 'days_class_metrics_df'][
                                    '_' + fixme +
                                    'day'] == thday][f'{metric_name}'].item()
                        for f in files
                    ])
                d[new_metric_name] = xe_metric
            else:
                if is_baseline:
                    d[new_metric_name] = XError([-999])
                else:
                    if target_class is None:
                        metric_curves = [
                            f()[fixme + 'days_class_metrics_df']
                            [f'b-{metric_name}'].values for f in files
                        ]
                    else:
                        metric_curves = [
                            f()[fixme + 'days_class_metrics_cdf'][target_class]
                            [f'{metric_name}'].values for f in files
                        ]
                    # print(np.concatenate([metric_curve[None] for metric_curve in metric_curves], axis=0).shape)
                    xe_metric_curve_auc = XError(
                        np.mean(np.concatenate([
                            metric_curve[None]
                            for metric_curve in metric_curves
                        ],
                                               axis=0),
                                axis=-1))  # (b,t)>(b)
                    # interp_metric_curve = interp1d(thdays, metric_curve)(np.linspace(thdays.min(), thday, int(n)))
                    # xe_metric_curve_avg = XError(np.mean(interp_metric_curve, axis=-1))
                    d[new_metric_name] = xe_metric_curve_auc

        index = f'model={utils.get_fmodel_name(model_name)}'
        info_df.append(index, d)

    return info_df
示例#7
0
class LossMonitor(object):
    def __init__(
            self,
            loss,
            optimizer,
            metrics,
            save_mode: str = C_.SM_NO_SAVE,
            target_metric_crit: str = None,
            k_counter_duration: int = C_.K_COUNTER_DURATION,
            val_epoch_counter_duration: int = C_.VAL_EPOCH_COUNTER_DURATION,
            earlystop_epoch_duration: int = C_.EARLYSTOP_EPOCH_DURATION,
            **kwargs):

        ### CHECKS
        assert isinstance(loss, ft_losses.FTLoss)
        metrics = [metrics] if isinstance(metrics,
                                          ft_metrics.FTMetric) else metrics
        assert isinstance(metrics, list) and all(
            [isinstance(metric, ft_metrics.FTMetric) for metric in metrics])
        assert len([metric.name for metric in metrics
                    ]) == len(set([metric.name for metric in metrics]))
        assert isinstance(optimizer, ft_optimizers.LossOptimizer)

        self.loss = loss
        self.optimizer = optimizer
        self.metrics = metrics
        self.save_mode = save_mode
        self.target_metric_crit = metrics[
            0].name if target_metric_crit is None else target_metric_crit
        self.counter_k = Counter({'k': k_counter_duration})
        self.counter_epoch = Counter({
            'val_epoch':
            val_epoch_counter_duration,
            'earlystop_epoch':
            earlystop_epoch_duration
        })

        self.name = loss.name
        self.best_epoch = np.infty
        self.last_saved_filedir = None
        self.reset()

    def reset(self):
        self.best_value = None
        self.loss_df = DFBuilder()
        self.opt_df = DFBuilder()
        self.loss_df_epoch = DFBuilder()
        self.metrics_df_epoch = DFBuilder()
        self.counter_k.reset()
        self.counter_epoch.reset()

    ### repr
    def __repr__(self):
        def get_metrics_repr():
            return f' (target_metric_crit={self.target_metric_crit})' if self.save_mode in [
                C_.SM_ONLY_INF_METRIC, C_.SM_ONLY_SUP_METRIC
            ] else ''

        txt = ''
        txt += f'[{self.name}]' + '\n'
        txt += f' - opt-parameters={len(self.optimizer):,}[p] - device={self.optimizer.get_device()}' + '\n'
        txt += f' - save-mode={self.save_mode}{get_metrics_repr()}' + '\n'
        txt += f' - counter_k={self.counter_k} - counter_epoch={self.counter_epoch}' + '\n'
        return txt[:-1]

    def get_save_dict(self):
        info = {
            'save_mode': self.save_mode,
            'target_metric_crit': self.target_metric_crit,
            'counter_k': self.counter_k,
            'counter_epoch': self.counter_epoch,
            'best_epoch': self.best_epoch,
            'last_saved_filedir': self.last_saved_filedir,
        }
        return {
            'info': info,
            'loss_df': self.loss_df,
            'opt_df': self.opt_df,
            'loss_df_epoch': self.loss_df_epoch,
            'metrics_df_epoch': self.metrics_df_epoch,
        }

    ### history methods
    def add_loss_history_k(
        self,
        loss,
        dt=0,
    ):
        if self.counter_k.check('k'):
            assert isinstance(loss, ft_losses.BatchLoss)
            d = loss.get_info()
            #index = self.counter_k.get_global_count()
            index = None
            d.update({
                '_dt': dt,
            })
            self.loss_df.append(index, d)

    def add_opt_history_epoch(self):
        d = self.optimizer.get_info()
        #index = self.counter_epoch.get_global_count()
        index = None
        d.update({
            '_k': self.counter_k.get_global_count(),
        })
        self.opt_df.append(index, d)

    def add_loss_history_epoch(
        self,
        loss,
        dt=0,
        set_name=None,
    ):
        if self.counter_epoch.check('val_epoch'):
            assert isinstance(loss, ft_losses.BatchLoss)
            d = loss.get_info()
            #index = self.counter_epoch.get_global_count()
            index = None
            d.update({
                '_dt': dt,
                '_set': set_name,
            })
            self.loss_df_epoch.append(index, d)

    def add_metric_history_epoch(
        self,
        metrics_dict,
        dt=0,
        set_name=None,
    ):
        if self.counter_epoch.check('val_epoch'):
            d = {}
            for mn in metrics_dict.keys():
                metric = metrics_dict[mn]
                assert isinstance(metric, ft_metrics.BatchMetric)
                d[mn] = metric.get_info()['_metric']
            d.update({
                '_dt': dt,
                '_set': set_name,
            })
            #index = f'{self.counter_epoch.get_global_count()}.set_name'
            index = None
            self.metrics_df_epoch.append(index, d)

        #print(self.metrics_df_epoch.get_df())

    def get_metric_names(self):
        return [m.name for m in self.metrics]

    ### along training methods
    def k_update(self):
        self.counter_k.update()

    def epoch_update(self):
        self.optimizer.update()
        self.counter_epoch.update()
        if self.counter_epoch.check('earlystop_epoch'):
            raise ex.TrainingInterruptedError()

    def set_last_saved_filedir(self, last_saved_filedir):
        self.last_saved_filedir = last_saved_filedir

    def needs_save(self):
        return not self.save_mode == C_.SM_NO_SAVE

    def train(self):
        self.optimizer.train()

    def eval(self):
        self.optimizer.eval()

    def needs_evaluation(self):
        return self.counter_epoch.check('val_epoch')

    def reset_early_stop(self):
        self.counter_epoch.reset_cn('earlystop_epoch')

    ### get statistics
    def get_best_epoch(self):
        return self.best_epoch

    def set_best_epoch(self, best_epoch):
        self.best_epoch = best_epoch

    def get_time_per_iteration(self):
        loss_df = self.loss_df.get_df()
        return XError([v for v in loss_df['_dt'].values])

    def get_evaluation_set_names(self):
        loss_df_epoch = self.loss_df_epoch.get_df()
        return list(np.unique(loss_df_epoch['_set'].values))

    def get_time_per_epoch_set(self, set_name):
        loss_df_epoch = self.loss_df_epoch.get_df()
        return XError([
            v for v in loss_df_epoch['_dt'][loss_df_epoch['_set'].isin(
                [set_name])].values
        ])

    def get_time_per_epoch(self):  # fixme only eval times
        evaluation_set_names = self.get_evaluation_set_names()
        return sum([
            self.get_time_per_epoch_set(set_name)
            for set_name in evaluation_set_names
        ])

    def get_total_time(self):
        evaluation_set_names = self.get_evaluation_set_names()
        loss_df = self.loss_df.get_df()
        loss_df_epoch = self.loss_df_epoch.get_df()
        total_time = 0
        total_time += loss_df['_dt'].values.sum()
        total_time += sum([
            loss_df_epoch['_dt'][loss_df_epoch['_set'].isin([set_name
                                                             ])].values.sum()
            for set_name in evaluation_set_names
        ])  # fixme
        return total_time

    ### file methods
    def remove_filedir(self, filedir):
        files.delete_filedir(filedir, verbose=0)  # remove last best model

    def check_save_condition(self, set_name):
        if self.save_mode == C_.SM_NO_SAVE:
            return False

        elif self.save_mode == C_.SM_ALL:
            return True

        elif self.save_mode == C_.SM_ONLY_ALL:
            self.remove_filedir(
                self.last_saved_filedir)  # remove last best model
            return True

        elif self.save_mode == C_.SM_ONLY_INF_LOSS:
            loss_df_epoch = self.loss_df_epoch.get_df()
            loss_evolution = loss_df_epoch['_loss'][loss_df_epoch['_set'].isin(
                [set_name])].values
            if len(loss_evolution) <= 1:
                return True  # always save first and dont delete anything

            loss_history = loss_evolution[:-1]  # history
            actual_loss = loss_evolution[-1]  # last one

            if actual_loss < np.min(loss_history):  # must save and delete
                self.remove_filedir(
                    self.last_saved_filedir)  # remove last best model
                self.best_value = actual_loss
                return True
            else:
                return False

        elif self.save_mode == C_.SM_ONLY_INF_METRIC:
            metrics_df_epoch = self.metrics_df_epoch.get_df()
            metric_evolution = metrics_df_epoch[self.target_metric_crit][
                metrics_df_epoch['_set'].isin([set_name])].values
            #print(metrics_df_epoch, metric_evolution)
            if len(metric_evolution) <= 1:
                return True  # always save first and dont delete anything

            metric_history = metric_evolution[:-1]  # history
            actual_metric = metric_evolution[-1]  # last one

            if actual_metric < np.min(metric_history):  # must save and delete
                self.remove_filedir(
                    self.last_saved_filedir)  # remove last best model
                self.best_value = actual_metric
                return True
            else:
                return False

        elif self.save_mode == C_.SM_ONLY_SUP_METRIC:
            metrics_df_epoch = self.metrics_df_epoch.get_df()
            metric_evolution = metrics_df_epoch[self.target_metric_crit][
                metrics_df_epoch['_set'].isin([set_name])].values
            if len(metric_evolution) <= 1:
                return True  # always save first and dont delete anything

            metric_history = metric_evolution[:-1]  # history
            actual_metric = metric_evolution[-1]  # last one

            if actual_metric > np.max(metric_history):  # must save and delete
                self.remove_filedir(
                    self.last_saved_filedir)  # remove last best model
                self.best_value = actual_metric
                return True
            else:
                return False

        else:
            raise Exception(f'save mode {self.save_mode} not supported')
def get_column_query_df_table(
    rootdir,
    cfilename,
    kf,
    lcset_name,
    model_names,
    metric_names,
    query_dict,
    day_to_metric=None,
    mode='fine-tuning',
    arch_modes=['Parallel', 'Serial'],
):
    info_df = DFBuilder()

    for arch_mode in arch_modes:
        for query_value in query_values:
            info_df[f'{query_value} [{arch_mode}]'] = []

    for kmn, model_name in enumerate(model_names):
        new_rootdir = f'{rootdir}/{mode}/{model_name}'
        new_rootdir = new_rootdir.replace('mode=pre-training',
                                          f'mode={mode}')  # patch
        new_rootdir = new_rootdir.replace('mode=fine-tuning',
                                          f'mode={mode}')  # patch
        filedirs = search_for_filedirs(new_rootdir, fext=fext, verbose=0)
        print(f'[{kmn}][{len(filedirs)}#] {model_name}')
        mn_dict = strings.get_dict_from_string(model_name)
        rsc = mn_dict['rsc']
        mdl = mn_dict['mdl']
        is_parallel = 'Parallel' in mdl
        arch_mode = 'Parallel' if is_parallel else 'Serial'

        if arch_mode in arch_modes:
            for km, metric_name in enumerate(metric_names):
                day_metric = []
                day_metric_avg = []
                for filedir in filedirs:
                    rdict = load_pickle(filedir, verbose=0)
                    #model_name = rdict['model_name']
                    days = rdict['days']
                    survey = rdict['survey']
                    band_names = ''.join(rdict['band_names'])
                    class_names = rdict['class_names']
                    v, vs, _ = utils.get_metric_along_day(
                        days, rdict, metric_name, day_to_metric)
                    day_metric += [v]
                    day_metric_avg += [vs.mean()]

                xe_day_metric = dstats.XError(day_metric, 0)
                xe_day_metric_avg = dstats.XError(day_metric_avg, 0)
                key = f'{mn_dict[query_key]} [{arch_mode}]'
                info_df[key] += [xe_day_metric]
                info_df[key] += [xe_day_metric_avg]

                key = f'metric={utils.get_mday_str(metric_name, day_to_metric)}'
                if not key in index_df:
                    index_df += [key]
                    index_df += [
                        f'metric={utils.get_mday_avg_str(metric_name, day_to_metric)}'
                    ]

    info_df = pd.DataFrame.from_dict(info_df)
    info_df.index = index_df
    return info_df
示例#9
0
def save_performance(train_handler, data_loader, save_rootdir,
	target_is_onehot:bool=False,
	target_y_key='target/y',
	pred_y_key='model/y',
	days_n:int=DEFAULT_DAYS_N,
	**kwargs):
	train_handler.load_model() # important, refresh to best model
	train_handler.model.eval() # important, model eval mode
	dataset = data_loader.dataset # get dataset
	dataset.reset_max_day() # always reset max day

	days_rec_metrics_df = DFBuilder()
	days_class_metrics_df = DFBuilder()
	days_class_metrics_cdf = {c:DFBuilder() for c in dataset.class_names}
	days_predictions = {}
	days_cm = {}

	days = np.linspace(C_.DEFAULT_MIN_DAY, dataset.max_day, days_n)#[::-1]
	bar = ProgressBarMulti(len(days), 4)
	with torch.no_grad():
		can_be_in_loop = True
		for day in days:
			dataset.set_max_day(day) # very important!!
			dataset.calcule_precomputed() # very important!!
			try:
				if can_be_in_loop:
					tdicts = []
					for ki,in_tdict in enumerate(data_loader):
						_tdict = train_handler.model(TDictHolder(in_tdict).to(train_handler.device))
						tdicts += [_tdict]
					tdict = minibatch_dict_collate(tdicts)

					### mse
					mse_loss_bdict = {}
					for kb,b in enumerate(dataset.band_names):
						p_onehot = tdict[f'input/onehot.{b}'][...,0] # (b,t)
						#p_rtime = tdict[f'input/rtime.{b}'][...,0] # (b,t)
						#p_dtime = tdict[f'input/dtime.{b}'][...,0] # (b,t)
						#p_x = tdict[f'input/x.{b}'] # (b,t,f)
						p_rerror = tdict[f'target/rerror.{b}'] # (b,t,1)
						p_rx = tdict[f'target/recx.{b}'] # (b,t,1)

						p_rx_pred = tdict[f'model/decx.{b}'] # (b,t,1)
						mse_loss_b = (p_rx-p_rx_pred)**2/(C_.REC_LOSS_EPS+C_.REC_LOSS_K*(p_rerror**2)) # (b,t,1)
						mse_loss_b = seq_utils.seq_avg_pooling(mse_loss_b, p_onehot)[...,0] # (b,t,1) > (b,t) > (b)
						mse_loss_bdict[b] = mse_loss_b[...,0] # (b,1) > (b)

					mse_loss = torch.cat([mse_loss_bdict[b][...,None] for b in dataset.band_names], axis=-1).mean(dim=-1) # (b,d) > (b)
					mse_loss = mse_loss.mean()

					days_rec_metrics_df.append(day, {
						'_day':day,
						'mse':tensor_to_numpy(mse_loss),
						})

					### class prediction
					y_true = tdict[target_y_key] # (b)
					#y_pred_p = torch.nn.functional.softmax(tdict[pred_y_key], dim=-1) # (b,c)
					y_pred_p = torch.sigmoid(tdict[pred_y_key]) # (b,c)
					#print('y_pred_p',y_pred_p[0])

					if target_is_onehot:
						assert y_pred_.shape==y_true.shape
						y_true = torch.argmax(y_true, dim=-1)

					y_true = tensor_to_numpy(y_true)
					y_pred_p = tensor_to_numpy(y_pred_p)
					days_predictions[day] = {'y_true':y_true, 'y_pred_p':y_pred_p}
					metrics_cdict, metrics_dict, cm = fcm.get_multiclass_metrics(y_pred_p, y_true, dataset.class_names)
					for c in dataset.class_names:
						days_class_metrics_cdf[c].append(day, update_dicts([{'_day':day}, metrics_cdict[c]]))
					days_class_metrics_df.append(day, update_dicts([{'_day':day}, metrics_dict]))
					days_cm[day] = cm

					### progress bar
					recall = {c:metrics_cdict[c]['recall'] for c in dataset.class_names}
					bmetrics_dict = {k:metrics_dict[k] for k in metrics_dict.keys() if 'b-' in k}
					bar([f'lcset_name={dataset.lcset_name}; day={day:.3f}', f'mse_loss={mse_loss}', f'bmetrics_dict={bmetrics_dict}', f'recall={recall}'])

			except KeyboardInterrupt:
				can_be_in_loop = False

	bar.done()
	d = {
		'model_name':train_handler.model.get_name(),
		'survey':dataset.survey,
		'band_names':dataset.band_names,
		'class_names':dataset.class_names,
		'lcobj_names':dataset.get_lcobj_names(),

		'days':days,
		'days_rec_metrics_df':days_rec_metrics_df.get_df(),
		'days_predictions':days_predictions,
		'days_class_metrics_df':days_class_metrics_df.get_df(),
		'days_class_metrics_cdf':{c:days_class_metrics_cdf[c].get_df() for c in dataset.class_names},
		'days_cm':days_cm,
		}

	### save file
	save_filedir = f'{save_rootdir}/{dataset.lcset_name}/id={train_handler.id}.d'
	files.save_pickle(save_filedir, d) # save file
	dataset.reset_max_day() # very important!!
	dataset.calcule_precomputed() # very important!!
	return
示例#10
0
def evaluate_classifier(
    rf_d,
    fats_filedir,
    fats_mode,
    lcset_info,
    nan_mode=NAN_MODE,
    days_n=DEFAULT_DAYS_N,
):
    class_names = lcset_info['class_names']
    features = rf_d['features']

    thdays_class_metrics_df = DFBuilder()
    thdays_class_metrics_cdf = {c: DFBuilder() for c in class_names}
    thdays_predictions = {}
    thdays_cm = {}

    thdays = np.linspace(MIN_DAY, MAX_DAY, days_n)
    for thday in thdays:
        eval_df_x, eval_df_y = load_features(
            fats_filedir,
            mode=fats_mode,
            thday=thday,
        )
        # print(eval_df_x.columns, eval_df_y.columns)
        rf = rf_d['rf']
        mean_train_df_x = rf_d['mean_train_df_x']
        y_true = eval_df_y[['_y']].values[..., 0]
        eval_df_x, _, _ = clean_df_nans(eval_df_x,
                                        mode=NAN_MODE,
                                        df_values=mean_train_df_x)
        y_pred_p = rf.predict_proba(eval_df_x.values)
        thdays_predictions[thday] = {'y_true': y_true, 'y_pred_p': y_pred_p}
        metrics_cdict, metrics_dict, cm = get_multiclass_metrics(
            y_pred_p, y_true, class_names)
        for c in class_names:
            thdays_class_metrics_cdf[c].append(
                None, update_dicts([{
                    '_thday': thday
                }, metrics_cdict[c]]))
        thdays_class_metrics_df.append(
            None, update_dicts([{
                '_thday': thday
            }, metrics_dict]))
        thdays_cm[thday] = cm

        ### progress bar
        bmetrics_dict = {
            k: metrics_dict[k]
            for k in metrics_dict.keys() if 'b-' in k
        }
        print(f'bmetrics_dict={bmetrics_dict}')

    d = {
        'model_name': f'mdl=brf',
        'survey': lcset_info['survey'],
        'band_names': lcset_info['band_names'],
        'class_names': class_names,
        'lcobj_names': list(eval_df_y.index),
        'thdays': thdays,
        'thdays_predictions': thdays_predictions,
        'thdays_class_metrics_df': thdays_class_metrics_df.get_df(),
        'thdays_class_metrics_cdf':
        {c: thdays_class_metrics_cdf[c].get_df()
         for c in class_names},
        'thdays_cm': thdays_cm,
        'features': features,
        'rank': rf_d['rank'],
    }
    return d