def send_log(key, value): global use_neptune if use_neptune: try: neptune.send_metric(key, value) except: print("Log failed: ", key, value)
def train_evaluate_cv(): meta = pd.read_csv(PARAMS.metadata_filepath) if DEV_MODE: meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED) meta_train = meta[meta['is_train'] == 1] with neptune.create_experiment(name=EXPERIMENT_NAME, params=PARAMS, tags=TAGS + ['train', 'evaluate', 'on_cv_folds'], upload_source_files=get_filepaths(), properties={'experiment_dir': EXPERIMENT_DIR}): cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED) fold_auc = [] for fold_id, (train_idx, valid_idx) in enumerate(cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))): train_data_split, valid_data_split = meta_train.iloc[train_idx], meta_train.iloc[valid_idx] if USE_AUXILIARY_DATA: auxiliary = pd.read_csv(PARAMS.auxiliary_metadata_filepath) train_auxiliary = auxiliary[auxiliary[ID_COLUMN].isin(valid_data_split[ID_COLUMN].tolist())] train_data_split = pd.concat([train_data_split, train_auxiliary], axis=0) LOGGER.info('Started fold {}'.format(fold_id)) auc, _ = fold_fit_evaluate_loop(train_data_split, valid_data_split, fold_id) LOGGER.info('Fold {} AUC {}'.format(fold_id, auc)) neptune.send_metric('Fold {} AUC'.format(fold_id), auc) fold_auc.append(auc) auc_mean, auc_std = np.mean(fold_auc), np.std(fold_auc) log_scores(auc_mean, auc_std)
def on_epoch_end(self, loss, rest): self.epoch_counter += 1 neptune.send_metric(f'{self.prefix}train_loss', self.epoch_counter, loss) for metric, value in rest.items(): neptune.send_metric(f'{self.prefix}train_{metric}', self.epoch_counter, value)
def epoch_end(self, learner: Learner): last_record = self.recorder.dataframe.tail(1).to_dict() for metric_name, data in last_record.items(): for epoch, val in data.items(): neptune.send_metric(self.metric_prefix + metric_name, x=epoch, y=val)
def evaluate_cv(): meta = pd.read_csv(PARAMS.metadata_filepath) if DEV_MODE: meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED) meta_train = meta[meta['is_train'] == 1] with neptune.create_experiment(name=EXPERIMENT_NAME, params=PARAMS, tags=TAGS + ['evaluate', 'on_cv_folds'], upload_source_files=get_filepaths(), properties={'experiment_dir': EXPERIMENT_DIR}): cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED) fold_iou, fold_iout = [], [] for fold_id, (train_idx, valid_idx) in enumerate(cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))): valid_data_split = meta_train.iloc[valid_idx] LOGGER.info('Started fold {}'.format(fold_id)) iou, iout, _ = fold_evaluate_loop(valid_data_split, fold_id) LOGGER.info('Fold {} IOU {}'.format(fold_id, iou)) neptune.send_metric('Fold {} IOU'.format(fold_id), iou) LOGGER.info('Fold {} IOUT {}'.format(fold_id, iout)) neptune.send_metric('Fold {} IOUT'.format(fold_id), iout) fold_iou.append(iou) fold_iout.append(iout) iou_mean, iou_std = np.mean(fold_iou), np.std(fold_iou) iout_mean, iout_std = np.mean(fold_iout), np.std(fold_iout) log_scores(iou_mean, iou_std, iout_mean, iout_std)
def main(arguments): with open(arguments.filepath, 'r') as fp: json_exp = json.load(fp) neptune.init(api_token=arguments.neptune_api_token, project_qualified_name=arguments.project_name) with neptune.create_experiment( name=json_exp['name'], description=json_exp['description'], params=json_exp['params'], properties=json_exp['properties'], tags=json_exp['tags'], upload_source_files=json_exp['upload_source_files']): for name, channel_xy in json_exp['send_metric'].items(): for x, y in zip(channel_xy['x'], channel_xy['y']): neptune.send_metric(name, x=x, y=y) for name, channel_xy in json_exp['send_text'].items(): for x, y in zip(channel_xy['x'], channel_xy['y']): neptune.send_text(name, x=x, y=y) for name, channel_xy in json_exp['send_image'].items(): for x, y in zip(channel_xy['x'], channel_xy['y']): neptune.send_image(name, x=x, y=y)
def evaluate(pipeline_name, dev_mode, chunk_size, logger, params, seed): logger.info('evaluating') meta = pd.read_csv(os.path.join(params.meta_dir, 'stage{}_metadata.csv'.format(params.competition_stage))) meta_valid = meta[meta['is_valid'] == 1] meta_valid = meta_valid.sample(int(params.evaluation_data_sample), random_state=seed) if dev_mode: meta_valid = meta_valid.sample(30, random_state=seed) pipeline = PIPELINES[pipeline_name]['inference'](SOLUTION_CONFIG) prediction = generate_prediction(meta_valid, pipeline, logger, CATEGORY_IDS, chunk_size, params.num_threads) prediction_filepath = os.path.join(params.experiment_dir, 'prediction.json') with open(prediction_filepath, "w") as fp: fp.write(json.dumps(prediction)) annotation_file_path = os.path.join(params.data_dir, 'val', "annotation.json") logger.info('Calculating mean precision and recall') average_precision, average_recall = coco_evaluation(gt_filepath=annotation_file_path, prediction_filepath=prediction_filepath, image_ids=meta_valid[Y_COLUMNS_SCORING].values, category_ids=CATEGORY_IDS[1:], small_annotations_size=params.small_annotations_size) logger.info('Mean precision on validation is {}'.format(average_precision)) logger.info('Mean recall on validation is {}'.format(average_recall)) neptune.send_metric('Precision', average_precision) neptune.send_metric('Recall', average_recall)
def log_results(epoch, st, epoch_loss_train, epoch_loss_test, val_targets, pairings): losses_train.append(epoch_loss_train) losses_test.append(epoch_loss_test) print(f'Time for epoch {epoch}: {(time.time()-st)//60}') print( f"Training Loss: {np.mean(epoch_loss_train)} , Validation Loss: {np.mean(epoch_loss_test)}" ) neptune.send_metric('training_loss', np.mean(epoch_loss_train)) neptune.send_metric('val_loss', np.mean(epoch_loss_test)) torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'losses_test': losses_test, 'validation_targets': val_targets }, os.path.join(CHECKPATH, f'combinedlearning-{EXPERIMENT_NAME}-{epoch}.pth')) # prepare charts prepare_plots2(pairings, val_targets, epoch, METRICSPATH)
def evaluate_epoch(model): with torch.no_grad(): model.eval() epoch_loss_test = [] pairings, targets = [], [] for X, classes_np, rankings_np in generator.next_validation(): X = torch.from_numpy(X.astype('float32')) classes = torch.from_numpy(classes_np.astype('long')) rankings = torch.from_numpy(rankings_np.astype('long')) if torch.cuda.is_available(): X = X.cuda() classes = classes.cuda() rankings = rankings.cuda() embeds, preds = model(X) batch_loss = loss_fun(embeds, preds, classes, rankings) cl_loss, rk_loss, _ = loss_fun.get_metrics() neptune.send_metric('classification_validation_loss', cl_loss) neptune.send_metric('Ranking_validation_loss', rk_loss) pairings.append( np.concatenate( [p.reshape(-1, 4) for p in loss_fun.get_pairings()])) epoch_loss_test.append(batch_loss.detach().cpu().numpy()) targets.append((classes_np, rankings_np, preds.detach().cpu().numpy().squeeze(), embeds.detach().cpu().numpy().squeeze())) return epoch_loss_test, targets, pairings
def _on_rollout_end(self): context = self._context neptune.send_metric("time/fps", context.num_timesteps / (time.time() - context.start_time)) neptune.send_metric("time/iterations", self._iteration) neptune.send_metric("time/time_elapsed", time.time() - context.start_time) neptune.send_metric("time/total_timesteps", context.num_timesteps) rollout_infos = [context.ep_info_buffer[i] for i in range(min(context.n_envs, len(context.ep_info_buffer)))] name_to_key = { "rollout/ep_rew": "r", "rollout/ep_len": "l", "rollout/p1_rounds": "P1_rounds", "rollout/p2_rounds": "P2_rounds", "rollout/p1_health": "P1_health", "rollout/p2_health": "P2_health", "rollout/real_ep_len": "steps", } if len(rollout_infos): for k, v in name_to_key.items(): self._log_3m(k, self._get_by_key(rollout_infos, v)) for k, v in self.logger.get_log_dict().items(): neptune.send_metric(k, v) if self._iteration % self._send_video_n_epoch == 0: self._generate_eval_video() self._iteration += 1
def evaluate_epoch(model,epoch): with torch.no_grad(): model.eval() epoch_loss_test = [] rankings_list = [] pred_list = [] cl_list = [] targets = [] for X, rankings_np,classes_np in generator.next_batch_valid(): X = torch.from_numpy(X.astype('float32')) rankings = torch.from_numpy(rankings_np.astype('float32')) if torch.cuda.is_available(): X = X.cuda() rankings = rankings.cuda() preds = model(X) batch_loss = loss_fun(preds,rankings) neptune.send_metric('Ranking_validation_loss', batch_loss) # pairings.append(np.concatenate([p.reshape(-1,4) for p in loss_fun.get_pairings()])) epoch_loss_test.append(batch_loss.detach().cpu().numpy()) rankings_list.append(rankings.detach().cpu().numpy()) pred_list.append(preds.detach().cpu().numpy()) cl_list.append(classes_np) rankings = np.concatenate(rankings_list) preds = np.concatenate(pred_list) classes = np.concatenate(cl_list) bok_file = f"{METRICSPATH}/ranking_{epoch}.html" targets.append( (classes.squeeze(),rankings.squeeze(),preds.squeeze(), preds.squeeze())) if epoch%50==0: box_plot(rankings.squeeze(),preds.squeeze(),classes.squeeze(),bok_file,epoch) return epoch_loss_test, targets
def train(self): x = np.random.randn(10, 50) y = np.random.randn(10, 10) for epoch in range(10): self.epoch = epoch for i in range(100): x = torch.tensor(x, device=device, dtype=torch.float) y = torch.tensor(y, device=device, dtype=torch.float) y_hat = self.lin(x) cost = torch.mean((y - y_hat) ** 2) self.opt.zero_grad() cost.backward() self.opt.step() neptune.send_metric("epoch_cost", self.epoch, cost.item()) self.step += 1 self.validate() neptune.set_property("epoch", self.epoch) neptune.set_property("cost", cost.item()) neptune.set_property("step", self.step)
def test(self): test_results = None current_test_loader = self.test_loaders[self.current_task] with torch.no_grad(): for test_batch_count, test_batch in enumerate(current_test_loader, start=0): test_batch_results = self.test_on_batch(*test_batch) if test_results is None: test_results = test_batch_results.copy() else: for metric, result in test_batch_results.items(): test_results[metric] += result.data test_results = { 'test_' + key: value / (test_batch_count + 1) for key, value in test_results.items() } template = ( "Task {}/{}x{}\tTest\tglobal iter: {} ({:.2f}%), metrics: " + "".join([key + ": {:.3f} " for key in test_results.keys()])) print( template.format(self.current_task + 1, self.num_tasks, self.num_cycles, self.global_iters, float(self.global_iters) / self.iters * 100., *[item.data for item in test_results.values()])) for metric, result in test_results.items(): neptune.send_metric(metric, x=self.global_iters, y=result)
def on_epoch_end(self, epoch, logs={}): neptune.send_metric('val_loss', epoch, logs['val_loss']) neptune.send_metric('val_mse', epoch, logs['val_mse']) neptune.send_metric('loss', epoch, logs['loss']) neptune.send_metric('mse', epoch, logs['mse']) neptune.send_metric( 'learning_rate', epoch, float(tf.keras.backend.get_value(self.model.optimizer.lr)))
def lgbcv(lambda_l1, lambda_l2): #def lgbcv(feature_fraction, silent=True, seed=1234): NAME = 'baseline' param = { "objective": "binary", "metric": "auc", "boosting": 'gbdt', "tree_learner": "serial", "boost_from_average": "false", 'device': 'gpu', 'gpu_platform_id': 1, 'gpu_device_id': 1, "verbosity": 1, "max_depth": -1, "learning_rate": 0.001, 'min_split_gain': 0, "num_leaves": 8, 'min_data_in_leaf': 10, 'min_sum_hessian_in_leaf': 10, "lambda_l1": lambda_l1, "lambda_l2": lambda_l2, "bagging_freq": 1, "bagging_fraction": 0.7, "feature_fraction": 0.8, #"drop_rate" : 0.4, } train_params = { 'num_boosting_rounds': 5000, 'early_stopping_rounds': 1000, 'verbose_eval': 5000 } params = {**param, **train_params} with neptune.create_experiment(name=NAME, params=params): monitor = neptune_monitor(prefix='train') clf = lgb.train(param, trn_data, train_params['num_boosting_rounds'], \ valid_sets = [trn_data, val_data], verbose_eval = train_params['verbose_eval'], \ early_stopping_rounds=train_params['early_stopping_rounds'], callbacks=[monitor]) predictions = clf.predict(X_valid, num_iteration=clf.best_iteration) print("CV score: {:<8.5f}".format( roc_auc_score(y_valid['target'], predictions))) neptune.send_metric('roc_auc', roc_auc_score(y_valid['target'], predictions)) loss = roc_auc_score(y_valid['target'], predictions) return loss
def main(): print('loading data') train_features_path = os.path.join( FEATURES_DATA_PATH, 'train_features_' + FEATURE_NAME + '.csv') print('... train') train = pd.read_csv(train_features_path, nrows=TRAINING_PARAMS['nrows']) idx_split = int( (1 - VALIDATION_PARAMS['validation_fraction']) * len(train)) train, valid = train[:idx_split], train[idx_split:] train = sample_negative_class( train, fraction=TRAINING_PARAMS['negative_sample_fraction'], seed=TRAINING_PARAMS['negative_sample_seed']) @skopt.utils.use_named_args(SPACE) def objective(**params): model_params = {**params, **STATIC_PARAMS} valid_preds = fit_predict(train, valid, None, model_params, TRAINING_PARAMS, fine_tuning=True) valid_auc = roc_auc_score(valid['isFraud'], valid_preds) return -1.0 * valid_auc experiment_params = { **STATIC_PARAMS, **TRAINING_PARAMS, **HPO_PARAMS, } with neptune.create_experiment(name='skopt forest sweep', params=experiment_params, tags=['skopt', 'forest', 'tune'], upload_source_files=get_filepaths()): print('logging data version') log_data_version(train_features_path, prefix='train_features_') results = skopt.forest_minimize(objective, SPACE, callback=[sk_utils.NeptuneMonitor()], **HPO_PARAMS) best_auc = -1.0 * results.fun best_params = results.x neptune.send_metric('valid_auc', best_auc) neptune.set_property('best_parameters', str(best_params)) sk_utils.send_best_parameters(results) sk_utils.send_plot_convergence(results, channel_name='diagnostics_hpo') sk_utils.send_plot_evaluations(results, channel_name='diagnostics_hpo') sk_utils.send_plot_objective(results, channel_name='diagnostics_hpo')
def _send_numeric_channels(self, *args, **kwargs): for name, averager in self.epoch_loss_averagers.items(): epoch_avg_loss = averager.value averager.reset() neptune.send_metric('{} epoch {} loss'.format(self.model_name, name), x=self.epoch_id, y=epoch_avg_loss) self.model.eval() val_loss = self.get_validation_loss() self.model.train() for name, loss in val_loss.items(): loss = loss.data.cpu().numpy()[0] neptune.send_metric('{} epoch_val {} loss'.format(self.model_name, name), x=self.epoch_id, y=loss)
def train_evaluate_predict_cv(): meta = pd.read_csv(PARAMS.metadata_filepath) if DEV_MODE: meta = meta.sample(PARAMS.dev_mode_size, random_state=SEED) meta_train = meta[meta['is_train'] == 1] meta_test = meta[meta['is_train'] == 0] with neptune.create_experiment(name=EXPERIMENT_NAME, params=PARAMS, tags=TAGS + ['train', 'evaluate', 'predict', 'on_cv_folds'], upload_source_files=get_filepaths(), properties={'experiment_dir': EXPERIMENT_DIR}): cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED) fold_iou, fold_iout, out_of_fold_train_predictions, out_of_fold_test_predictions = [], [], [], [] for fold_id, (train_idx, valid_idx) in enumerate(cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1))): train_data_split, valid_data_split = meta_train.iloc[train_idx], meta_train.iloc[valid_idx] if USE_AUXILIARY_DATA: auxiliary = pd.read_csv(PARAMS.auxiliary_metadata_filepath) train_auxiliary = auxiliary[auxiliary[ID_COLUMN].isin(valid_data_split[ID_COLUMN].tolist())] train_data_split = pd.concat([train_data_split, train_auxiliary], axis=0) LOGGER.info('Started fold {}'.format(fold_id)) iou, iout, out_of_fold_prediction, test_prediction = fold_fit_evaluate_predict_loop(train_data_split, valid_data_split, meta_test, fold_id) LOGGER.info('Fold {} IOU {}'.format(fold_id, iou)) neptune.send_metric('Fold {} IOU'.format(fold_id), iou) LOGGER.info('Fold {} IOUT {}'.format(fold_id, iout)) neptune.send_metric('Fold {} IOUT'.format(fold_id), iout) fold_iou.append(iou) fold_iout.append(iout) out_of_fold_train_predictions.append(out_of_fold_prediction) out_of_fold_test_predictions.append(test_prediction) train_ids, train_predictions = [], [] for idx_fold, train_pred_fold in out_of_fold_train_predictions: train_ids.extend(idx_fold) train_predictions.extend(train_pred_fold) iou_mean, iou_std = np.mean(fold_iou), np.std(fold_iou) iout_mean, iout_std = np.mean(fold_iout), np.std(fold_iout) log_scores(iou_mean, iou_std, iout_mean, iout_std) save_predictions(train_ids, train_predictions, meta_test, out_of_fold_test_predictions)
def on_batch_end(self, metrics, *args, **kwargs): for name, loss in metrics.items(): loss = loss.data.cpu().numpy()[0] if name in self.epoch_loss_averagers.keys(): self.epoch_loss_averagers[name].send(loss) else: self.epoch_loss_averagers[name] = Averager() self.epoch_loss_averagers[name].send(loss) neptune.send_metric('{} batch {} loss'.format(self.model_name, name), x=self.batch_id, y=loss) self.batch_id += 1
def train_epoch(model): epoch_loss_train = [] model.train() # train minibatches for X, classes, rankings in generator.next_batch(): X = torch.from_numpy(X.astype('float32')) classes = torch.from_numpy(classes.astype('long')) rankings = torch.from_numpy(rankings.astype('long')) if torch.cuda.is_available(): X = X.cuda() classes = classes.cuda() rankings = rankings.cuda() optimizer.zero_grad() embeds, preds = model(X) batch_loss = loss_fun(embeds, preds, classes, rankings) neptune.send_metric('batch_loss', batch_loss) cl_loss, rk_loss, cur_weight = loss_fun.get_metrics() neptune.send_metric('classification_training_loss', cl_loss) neptune.send_metric('Ranking_training_loss', rk_loss) neptune.send_metric('current_weighting', cur_weight) epoch_loss_train.append(batch_loss.detach().cpu().numpy()) batch_loss.backward() optimizer.step() return epoch_loss_train
def on_batch_end(self, metrics, *args, **kwargs): for name, loss in metrics.items(): loss = loss.data.cpu().numpy()[0] current_lr = self.optimizer.state_dict()['param_groups'][0]['lr'] logger.info('Learning Rate {} Loss {})'.format(current_lr, loss)) neptune.send_metric('Learning Rate Finder', x=self.batch_id, y=current_lr) neptune.send_metric('Loss', x=self.batch_id, y=loss) for param_group in self.optimizer.param_groups: param_group[ 'lr'] = current_lr * self.multipy_factor + self.add_factor self.batch_id += 1
def log_scores(iou_mean, iou_std, iout_mean, iout_std): LOGGER.info('IOU mean {}, IOU std {}'.format(iou_mean, iou_std)) neptune.send_metric('IOU', iou_mean) neptune.send_metric('IOU STD', iou_std) LOGGER.info('IOUT mean {}, IOUT std {}'.format(iout_mean, iout_std)) neptune.send_metric('IOUT', iout_mean) neptune.send_metric('IOUT STD', iout_std)
def val_coco(self, data_loader, global_steps=None): self.network.eval() torch.cuda.empty_cache() val_loss_stats = {} num_samples = len(data_loader) all_preds = np.zeros((num_samples, cfg.num_joints, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 with torch.no_grad(): for batch, meta in tqdm.tqdm(data_loader): for k in batch: if k != 'meta': batch[k] = batch[k].cuda() output, loss, loss_stats, image_stats = self.network(batch) num_images = batch['inp'].size(0) c = meta['center'].numpy() s = meta['scale'].numpy() score = meta['score'].numpy() preds = output['kpt_2d'][:, :, 0:2].cpu().numpy().copy() for i in range(output['kpt_2d'].shape[0]): preds[i] = transform_preds(output['kpt_2d'][:, :, 0:2][i], c[i], s[i], [192, 256]) all_preds[idx:idx + num_images, :, 0:2] = preds # all_preds[idx:idx + num_images, :, 2:3] = maxvals # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(s * 200, 1) all_boxes[idx:idx + num_images, 5] = score image_path.extend(meta['image']) idx += num_images loss_stats = self.reduce_loss_stats(loss_stats) for k, v in loss_stats.items(): val_loss_stats.setdefault(k, 0) val_loss_stats[k] += v name_values, perf_indicator = data_loader.dataset.evaluate( cfg, all_preds, cfg.result_dir, all_boxes, image_path, filenames, imgnums) if global_steps: neptune_step = global_steps['valid_global_steps'] neptune.send_metric('valid_loss', neptune_step, (val_loss_stats['loss'] / num_samples).item()) neptune.send_metric('valid_loss_seg', neptune_step, (val_loss_stats['seg_loss'] / num_samples).item()) neptune.send_metric('valid_loss_vote', neptune_step, (val_loss_stats['vote_loss'] / num_samples).item()) for k, v in name_values.items(): neptune.send_metric(k, neptune_step, v) global_steps['valid_global_steps'] = neptune_step + 1
def print_loss(self, iter): iter_infor = ('iter = {:6d}/{:6d}, exp = {}'.format( iter, self.config.num_steps, self.config.note)) to_print = [ '{}:{:.4f}'.format(key, self.losses[key].item()) for key in self.losses.keys() ] loss_infor = ' '.join(to_print) if self.config.screen: print(iter_infor + ' ' + loss_infor) if self.config.neptune: for key in self.losses.keys(): neptune.send_metric(key, self.losses[key].item()) if self.config.tensorboard and self.writer is not None: for key in self.losses.keys(): self.writer.add_scalar('train/' + key, self.losses[key], iter)
def log_results(epoch, st, epoch_loss_train, epoch_loss_test,val_targets=None, pairings=None): losses_train.append(epoch_loss_train) losses_test.append(epoch_loss_test) print(f'Time for epoch {epoch}: {(time.time()-st)//60}') print(f"Training Loss: {np.mean(epoch_loss_train)} , Validation Loss: {np.mean(epoch_loss_test)}") neptune.send_metric('training_loss', np.mean(epoch_loss_train)) neptune.send_metric('val_loss', np.mean(epoch_loss_test)) if epoch%50==0: torch.save({ 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'losses_test': losses_test, 'validation_targets':val_targets }, os.path.join(CHECKPATH,f'regressor-{EXPERIMENT_NAME}-{epoch}.pth') )
def evaluate(): meta = pd.read_csv(PARAMS.metadata_filepath) meta_train = meta[meta['is_train'] == 1] cv = utils.KFoldBySortedValue(n_splits=PARAMS.n_cv_splits, shuffle=PARAMS.shuffle, random_state=SEED) for train_idx, valid_idx in cv.split(meta_train[DEPTH_COLUMN].values.reshape(-1)): break meta_valid_split = meta_train.iloc[valid_idx] y_true_valid = utils.read_masks(meta_valid_split[Y_COLUMN].values) if DEV_MODE: meta_valid_split = meta_valid_split.sample(PARAMS.dev_mode_size, random_state=SEED) data = {'input': {'meta': meta_valid_split, }, 'callback_input': {'meta_valid': None } } with neptune.create_experiment(name=EXPERIMENT_NAME, params=PARAMS, tags=TAGS + ['evaluate'], upload_source_files=get_filepaths(), properties={'experiment_dir': EXPERIMENT_DIR}): pipeline_network = network(config=CONFIG, train_mode=False) pipeline_postprocessing = pipelines.mask_postprocessing(config=CONFIG) pipeline_network.clean_cache() output = pipeline_network.transform(data) valid_masks = {'input_masks': output } output = pipeline_postprocessing.transform(valid_masks) pipeline_network.clean_cache() pipeline_postprocessing.clean_cache() y_pred_valid = output['binarized_images'] LOGGER.info('Calculating IOU and IOUT Scores') iou_score, iout_score = calculate_scores(y_true_valid, y_pred_valid) LOGGER.info('IOU score on validation is {}'.format(iou_score)) neptune.send_metric('IOU', iou_score) LOGGER.info('IOUT score on validation is {}'.format(iout_score)) neptune.send_metric('IOUT', iout_score) results_filepath = os.path.join(EXPERIMENT_DIR, 'validation_results.pkl') LOGGER.info('Saving validation results to {}'.format(results_filepath)) joblib.dump((meta_valid_split, y_true_valid, y_pred_valid), results_filepath)
def on_epoch_end(self, *args, **kwargs): self.model.eval() val_loss = self.get_validation_loss() metric = val_loss[self.metric_name] metric = metric.data.cpu().numpy()[0] self.model.train() self.lr_scheduler.step(metrics=metric, epoch=self.epoch_id) logger.info('epoch {0} current lr: {1}'.format( self.epoch_id + 1, self.optimizer.state_dict()['param_groups'][0]['lr'])) neptune.send_metric( 'Learning Rate', x=self.epoch_id, y=self.optimizer.state_dict()['param_groups'][0]['lr']) self.epoch_id += 1
def trainer(num_epochs=10, ssr_steps=1): step = 0 for epoch in range(num_epochs): # Reset the metrics at the start of the next epoch train_sup_loss.reset_states() train_reg_loss.reset_states() train_accuracy.reset_states() test_sup_loss.reset_states() test_reg_loss.reset_states() test_accuracy.reset_states() for images, labels in train_dataset: train_step(images, labels) for i in range(ssr_steps): ssr_step(images, labels) step += 1 #global_step += 1 for test_images, test_labels in test_dataset: test_step(test_images, test_labels) template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}' print( template.format(epoch + 1, train_sup_loss.result(), train_accuracy.result() * 100, test_sup_loss.result(), test_accuracy.result() * 100)) neptune.send_metric('train_accuracy', x=step, y=train_accuracy.result()) neptune.send_metric('test_accuracy', x=step, y=test_accuracy.result()) neptune.send_metric('train_sup_loss', x=step, y=train_sup_loss.result()) neptune.send_metric('train_reg_loss', x=step, y=train_reg_loss.result()) neptune.send_metric('test_sup_loss', x=step, y=test_sup_loss.result()) sys.stdout.flush()
def on_end_epoch(self, training, model, criterion, data_loader, optimizer=None, display=True): map = 100 * self.state['ap_meter'].value().mean() loss = self.state['meter_loss'].value()[0] OP, OR, OF1, CP, CR, CF1 = self.state['ap_meter'].overall() OP_k, OR_k, OF1_k, CP_k, CR_k, CF1_k = self.state['ap_meter'].overall_topk(3) if display: if training: print('Epoch: [{0}]\t' 'Loss {loss:.4f}\t' 'mAP {map:.3f}'.format(self.state['epoch'], loss=loss, map=map)) print('OP: {OP:.4f}\t' 'OR: {OR:.4f}\t' 'OF1: {OF1:.4f}\t' 'CP: {CP:.4f}\t' 'CR: {CR:.4f}\t' 'CF1: {CF1:.4f}'.format(OP=OP, OR=OR, OF1=OF1, CP=CP, CR=CR, CF1=CF1)) if self.state['neptune']: try: neptune.send_metric('epoch', self.state['epoch']) neptune.send_metric('train_loss', loss) neptune.send_metric('train_map', map) except: print("Neptune exception occurred") else: print('Test: \t Loss {loss:.4f}\t mAP {map:.3f}'.format(loss=loss, map=map)) print('OP: {OP:.4f}\t' 'OR: {OR:.4f}\t' 'OF1: {OF1:.4f}\t' 'CP: {CP:.4f}\t' 'CR: {CR:.4f}\t' 'CF1: {CF1:.4f}'.format(OP=OP, OR=OR, OF1=OF1, CP=CP, CR=CR, CF1=CF1)) print('OP_3: {OP:.4f}\t' 'OR_3: {OR:.4f}\t' 'OF1_3: {OF1:.4f}\t' 'CP_3: {CP:.4f}\t' 'CR_3: {CR:.4f}\t' 'CF1_3: {CF1:.4f}'.format(OP=OP_k, OR=OR_k, OF1=OF1_k, CP=CP_k, CR=CR_k, CF1=CF1_k)) if self.state['neptune']: try: neptune.send_metric('test_loss', loss) neptune.send_metric('test_map', map) except: print("Neptune exception occurred") return map
def on_epoch_end(self, epoch, logs={}): if 'loss' in logs: neptune.send_metric('loss', epoch, logs['loss']) if 'classification_loss' in logs: neptune.send_metric('classification_loss', epoch, logs['classification_loss']) if 'regression_loss' in logs: neptune.send_metric('regression_loss', epoch, logs['regression_loss']) if 'mAP' in logs: neptune.send_metric('mAP', epoch, logs['mAP'])