def compute_and_log_metrics(model, train_dataloader, test_dataloader, augmentations, device, step, dataset_version): training_accuracy, training_IoU = compute_metrics( model, train_dataloader, augmentations, device, dataset_version=dataset_version) test_accuracy, test_IoU = compute_metrics(model, test_dataloader, augmentations, device, dataset_version=dataset_version) wandb.log( { 'training IoU': training_IoU, 'training accuracy': training_accuracy }, step=iteration * bs) wandb.log({ 'test IoU': test_IoU, 'test accuracy': test_accuracy }, step=iteration * bs) return test_accuracy, test_IoU
def evaluate(nlp, task, docs_golds): tok2vec = nlp.get_pipe(PIPES.tok2vec) textcat = nlp.get_pipe(PIPES.textcat) right = 0 total = 0 guesses = [] truths = [] labels = textcat.labels for batch in minibatch(docs_golds, size=HP.eval_batch_size): docs, golds = zip(*batch) docs = list(textcat.pipe(tok2vec.pipe(docs))) for doc, gold in zip(docs, golds): guess, _ = max(doc.cats.items(), key=lambda it: it[1]) truth, _ = max(gold.cats.items(), key=lambda it: it[1]) if guess not in labels: msg = (f"Unexpected label {guess} predicted. " f"Expectded labels: {', '.join(labels)}") raise ValueError(msg) if truth not in labels: msg = (f"Unexpected label {truth} predicted. " f"Expectded labels: {', '.join(labels)}") raise ValueError(msg) guesses.append(labels.index(guess)) truths.append(labels.index(truth)) right += guess == truth total += 1 free_tensors(doc) main_name, metrics = compute_metrics(task, numpy.array(guesses), numpy.array(truths)) metrics["_accuracy"] = right / total metrics["_right"] = right metrics["_total"] = total metrics["_main"] = metrics[main_name] return metrics[main_name], metrics
def validate(task, val_iter, model, logger, field, world_size, rank, num_print=10, args=None): model.eval() required_names = ['greedy', 'answer'] optional_names = ['context', 'question'] loss, predictions, answers, results = gather_results( model, val_iter, field, world_size, optional_names=optional_names) predictions = [p.replace('UNK', 'OOV') for p in predictions] names = required_names + optional_names if hasattr(val_iter.dataset.examples[0], 'wikisql_id') or hasattr( val_iter.dataset.examples[0], 'squad_id') or hasattr( val_iter.dataset.examples[0], 'woz_id'): answers = [ val_iter.dataset.all_answers[sid] for sid in answers.tolist() ] metrics, answers = compute_metrics(predictions, answers, bleu='iwslt' in task or 'multi30k' in task, dialogue='woz' in task, rouge='cnn' in task, logical_form='sql' in task, corpus_f1='zre' in task, args=args) results = [predictions, answers] + results print_results(names, results, rank=rank, num_print=num_print) return loss, metrics
def train_one_epoch(train_loader, model, loss_fn, optimizer): losses = [] r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic = [], [], [], [], [], [] for ref_cloud, src_cloud, gtR, gtt in tqdm(train_loader): ref_cloud, src_cloud, gtR, gtt = ref_cloud.cuda(), src_cloud.cuda(), \ gtR.cuda(), gtt.cuda() optimizer.zero_grad() R, t, pred_ref_clouds = model( src_cloud.permute(0, 2, 1).contiguous(), ref_cloud.permute(0, 2, 1).contiguous()) loss = compute_loss(ref_cloud, pred_ref_clouds, loss_fn) loss.backward() optimizer.step() cur_r_mse, cur_r_mae, cur_t_mse, cur_t_mae, cur_r_isotropic, \ cur_t_isotropic = compute_metrics(R, t, gtR, gtt) losses.append(loss.item()) r_mse.append(cur_r_mse) r_mae.append(cur_r_mae) t_mse.append(cur_t_mse) t_mae.append(cur_t_mae) r_isotropic.append(cur_r_isotropic.cpu().detach().numpy()) t_isotropic.append(cur_t_isotropic.cpu().detach().numpy()) r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic = \ summary_metrics(r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic) results = { 'loss': np.mean(losses), 'r_mse': r_mse, 'r_mae': r_mae, 't_mse': t_mse, 't_mae': t_mae, 'r_isotropic': r_isotropic, 't_isotropic': t_isotropic } return results
def test_one_epoch(test_loader, model, loss_fn): model.eval() losses = [] r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic = [], [], [], [], [], [] with torch.no_grad(): for ref_cloud, src_cloud, gtR, gtt in tqdm(test_loader): ref_cloud, src_cloud, gtR, gtt = ref_cloud.cuda(), src_cloud.cuda(), \ gtR.cuda(), gtt.cuda() R, t, pred_ref_clouds = model( src_cloud.permute(0, 2, 1).contiguous(), ref_cloud.permute(0, 2, 1).contiguous()) loss = compute_loss(ref_cloud, pred_ref_clouds, loss_fn) cur_r_mse, cur_r_mae, cur_t_mse, cur_t_mae, cur_r_isotropic, \ cur_t_isotropic = compute_metrics(R, t, gtR, gtt) losses.append(loss.item()) r_mse.append(cur_r_mse) r_mae.append(cur_r_mae) t_mse.append(cur_t_mse) t_mae.append(cur_t_mae) r_isotropic.append(cur_r_isotropic.cpu().detach().numpy()) t_isotropic.append(cur_t_isotropic.cpu().detach().numpy()) model.train() r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic = \ summary_metrics(r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic) results = { 'loss': np.mean(losses), 'r_mse': r_mse, 'r_mae': r_mae, 't_mse': t_mse, 't_mae': t_mae, 'r_isotropic': r_isotropic, 't_isotropic': t_isotropic } return results
def evaluate(test_loader, model, epoch, args, dataset_name): all_txt_embd = [] all_video_embd = [] model.eval() if args.rank == 0: log('Evaluating on {}'.format(dataset_name), args) with torch.no_grad(): for i_batch, data in enumerate(test_loader): text = data['text'].cuda() video = data['video'].float().cuda() video = video / 255.0 video = video.view(-1, video.shape[2], video.shape[3], video.shape[4], video.shape[5]) video_embd, text_embd = model(video, text) video_embd = video_embd.view(text_embd.shape[0], args.num_windows_test, text_embd.shape[1]) video_embd = video_embd.mean(dim=1) video_embd = allgather(video_embd, args) text_embd = allgather(text_embd, args) if args.rank == 0: text_embd = text_embd.cpu().numpy() video_embd = video_embd.cpu().numpy() all_txt_embd.append(text_embd) all_video_embd.append(video_embd) model.train() if args.rank == 0: all_txt_embd = np.concatenate(all_txt_embd, axis=0) all_video_embd = np.concatenate(all_video_embd, axis=0) metrics = compute_metrics(np.dot(all_txt_embd, all_video_embd.T)) log('Epoch {} results: {}'.format(epoch, metrics), args)
def eval_step(batch_imgs, depth_gt): # batch_imgs: (batch_size, height, width, 9) Three images concatenated on the channels dimension # depth_gt: (batch_size, height, width) img_before = batch_imgs[:, :, :, :3] img_target = batch_imgs[:, :, :, 3:6] img_after = batch_imgs[:, :, :, 6:] disps = depth_net( img_target ) # disparities at different scales, in increasing resolution # Loss: T_before_target = pose_net(concat_images(img_before, img_target)) # (bs, 6) T_target_after = pose_net(concat_images(img_target, img_after)) # (bs, 6) matrixT_before_target = make_transformation_matrix(T_before_target, False) # (bs, 4, 4) matrixT_after_target = make_transformation_matrix(T_target_after, True) # (bs, 4, 4) loss_value, image_from_before, image_from_after = \ loss_layer(disps, matrixT_before_target, matrixT_after_target, img_before, img_target, img_after) metrics = compute_metrics(disps[-1], depth_gt) return loss_value, metrics, disps, image_from_before, image_from_after
def evaluate(args, model, tokenizer, prefix=""): eval_task_names = ("snli",) eval_outputs_dirs = (args.output_dir,) results = {} for eval_task, eval_output_dir in zip(eval_task_names, eval_outputs_dirs): eval_dataset = load_and_cache_examples(args, eval_task, tokenizer, evaluate=True) if not os.path.exists(eval_output_dir): os.makedirs(eval_output_dir) args.eval_batch_size = 16 eval_sampler = SequentialSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # Eval! logger.info("***** Running evaluation {} *****".format(prefix)) logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None for batch in tqdm(eval_dataloader, desc="Evaluating", position=0, leave=True, ncols=100): model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'token_type_ids': batch[2], 'labels': batch[3], 'task': 0, } outputs = model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs['labels'].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps preds = np.argmax(preds, axis=1) result = compute_metrics('snli', preds, out_label_ids) results.update(result) output_eval_file = os.path.join(eval_output_dir, prefix, "eval_results.txt") with open(output_eval_file, "w") as writer: logger.info("***** Eval results {} *****".format(prefix)) for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) return results
def evaluate(args, model, eval_dataset, prefix=""): eval_output_dir = args.output_dir results = {} if not os.path.exists(eval_output_dir) and args.local_rank in [-1, 0]: os.makedirs(eval_output_dir) args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) # Note that DistributedSampler samples randomly eval_sampler = SequentialSampler(eval_dataset) if args.local_rank == -1 else DistributedSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) # Eval! logger.info("***** Running evaluation {} *****".format(prefix)) logger.info(" Num examples = %d", len(eval_dataset)) logger.info(" Batch size = %d", args.eval_batch_size) eval_loss = 0.0 nb_eval_steps = 0 preds = None out_label_ids = None for batch in tqdm(eval_dataloader, desc="Evaluating"): model.eval() batch = tuple(t.to(args.device) for t in batch) with torch.no_grad(): inputs = {'input_ids': batch[0], 'attention_mask': batch[1], 'labels': batch[3]} if args.model_type != 'distilbert': inputs['token_type_ids'] = batch[2] if args.model_type in ['bert', 'xlnet'] else None # XLM, DistilBERT and RoBERTa don't use segment_ids outputs = model(**inputs) tmp_eval_loss, logits = outputs[:2] eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if preds is None: preds = logits.detach().cpu().numpy() out_label_ids = inputs['labels'].detach().cpu().numpy() else: preds = np.append(preds, logits.detach().cpu().numpy(), axis=0) out_label_ids = np.append(out_label_ids, inputs['labels'].detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps if args.output_mode == "classification": preds = np.argmax(preds, axis=1) elif args.output_mode == "regression": preds = np.squeeze(preds) result = compute_metrics(preds, out_label_ids) results.update(result) output_eval_file = os.path.join(eval_output_dir, prefix, "eval_results.txt") with open(output_eval_file, "w") as writer: logger.info("***** Eval results {} *****".format(prefix)) for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) return results
def compute_classifier_loss(model, data, target, optimizer=dict()): shape = data.shape x = data.reshape(shape[0], numpy.prod(shape[1:])) out = model(x) output = out['output'] # cross entropy loss = -(target * torch.log(output) + (1 - target) * torch.log(1 - output)).mean() if optimizer: loss.backward() for opt in optimizer.values(): opt.step() result = defaultdict(list) out_bin = output.detach().cpu().numpy() > 0.5 compute_metrics(result, target.detach().cpu().numpy(), out_bin) result['loss'].append(loss.cpu().detach().numpy()) return result
def test(nn_model, dataset): data_loader = data.DataLoader(dataset, batch_size=1) cosines_list = [] similarity_vector_list = [] for batch in tqdm(data_loader): cosines, similarity_vector = test_step(nn_model, batch) cosines_list.append(cosines) similarity_vector_list.append(similarity_vector) return compute_metrics(cosines_list, similarity_vector_list)
def xgboost_test(extractor, opt): import xgboost as xgb res = defaultdict(list) res_train = defaultdict(list) for study_num in range(7): #print(study_name) train_set, test_set = get_merged_common_dataset(opt, skip_study=study_num) train_data, train_labels = get_data(train_set) val_data, val_labels = get_data(test_set) if True: train_features = extractor(train_data).detach().numpy() val_features = extractor(val_data).detach().numpy() else: train_features = train_data val_features = val_data # train the model model = xgb.XGBClassifier() clf = model.fit(train_features, train_labels.astype(int), eval_set=[(val_features, val_labels)], early_stopping_rounds=50, verbose=True, eval_metric='auc') #model = LogisticRegression() #model = SVC(probability=True, class_weight='balanced') #clf = model.fit(train_features, train_labels.astype(int)) print(val_data.shape) res['bias'].append(val_labels.sum() / len(val_labels)) print(res['bias'][-1]) y_pred = clf.predict_proba(val_features)[:, 1] x_pred = clf.predict_proba(train_features)[:, 1] compute_metrics(res, val_labels.flatten() > 0.5, y_pred > 0.5) compute_auc(res, val_labels.flatten() > 0.5, y_pred) compute_metrics(res_train, train_labels.flatten() > 0.5, x_pred > 0.5) compute_auc(res_train, train_labels.flatten() > 0.5, x_pred) for key in res_train: ave = numpy.asarray(res_train[key]).mean(axis=0) print('Train {0}: {1}'.format(key, ave)) for key in res: ave = numpy.asarray(res[key]).mean(axis=0) print('Test {0}: {1}'.format(key, ave))
def get_test_score(task_eval,qa_results,score_dict): score = compute_metrics( qa_results, bleu='iwslt.en.de' in task_eval or 'multinli.in.out' in task_eval, dialogue='woz.en' in task_eval, rouge='cnn_dailymail' in task_eval, logical_form='wikisql' in task_eval, corpus_f1='zre' in task_eval ) score_dict[task_eval] = score
def Eval_retrieval(model, eval_dataloader, dataset_name): model.eval() print('Evaluating Text-Video retrieval on {} data'.format(dataset_name)) with th.no_grad(): for i_batch, data in enumerate(eval_dataloader): text = data['text'].cuda() video = data['video'].cuda() m = model(video, text) m = m.cpu().detach().numpy() metrics = compute_metrics(m) print_computed_metrics(metrics)
def validation_epoch(cb, opt, model, val_loader): """logic for each validation epoch""" model.eval() # metrics to return losses = [] prec = [] rec = [] f1 = [] ap = [] iou = [] l_ship = [] l_bbox = [] with torch.no_grad(): for batch_idx, batch in enumerate(val_loader): for key in batch.keys(): batch[key] = batch[key].to(opt.device) # validation step input, target = batch['input'], batch['target'] output = model(input) loss, _l_ship, _l_bbox = compute_loss(output, target) _prec, _rec, _f1, _ap, _iou = compute_metrics(output, target) # append incase analysis of distribution is of interest losses.append(loss) l_ship.append(_l_ship) l_bbox.append(_l_bbox) prec.append(_prec) rec.append(_rec) f1.append(_f1) ap.append(_ap) iou.append(_iou) loss_avg = torch.mean(torch.cat(losses)) l_ship = torch.mean(torch.cat(l_ship)) l_bbox = torch.mean(torch.cat(l_bbox)) metrics = {} for k, m in zip(["prec", "rec", "f1", "ap", "iou"], [prec, rec, f1, ap, iou]): m = sum(m) / len(m) metrics[k] = m cb.on_validation_end(opt=opt, output=loss_avg, metrics=metrics, l_ship=l_ship, l_bbox=l_bbox) return loss_avg
def evaluate_benchmark_icp(args, test_loader): in_dim = 6 if args.normal else 3 model = IterativeBenchmark(in_dim=in_dim, niters=args.niters, gn=args.gn) if args.cuda: model = model.cuda() model.load_state_dict(torch.load(args.checkpoint)) else: model.load_state_dict( torch.load(args.checkpoint, map_location=torch.device('cpu'))) model.eval() dura = [] r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic = [], [], [], [], [], [] with torch.no_grad(): for i, (ref_cloud, src_cloud, gtR, gtt) in tqdm(enumerate(test_loader)): if args.cuda: ref_cloud, src_cloud, gtR, gtt = ref_cloud.cuda(), src_cloud.cuda(), \ gtR.cuda(), gtt.cuda() tic = time.time() R1, t1, pred_ref_cloud = model( src_cloud.permute(0, 2, 1).contiguous(), ref_cloud.permute(0, 2, 1).contiguous()) ref_cloud = torch.squeeze(ref_cloud).cpu().numpy() src_cloud_tmp = torch.squeeze(pred_ref_cloud[-1]).cpu().numpy() R2, t2, pred_ref_cloud = icp(npy2pcd(src_cloud_tmp), npy2pcd(ref_cloud)) R2, t2 = torch.from_numpy(R2)[None, ...].to(R1), \ torch.from_numpy(t2)[None, ...].to(R1) R, t = R2 @ R1, torch.squeeze(R2 @ t1[:, :, None], dim=-1) + t2 toc = time.time() dura.append(toc - tic) cur_r_mse, cur_r_mae, cur_t_mse, cur_t_mae, cur_r_isotropic, \ cur_t_isotropic = compute_metrics(R, t, gtR, gtt) r_mse.append(cur_r_mse) r_mae.append(cur_r_mae) t_mse.append(cur_t_mse) t_mae.append(cur_t_mae) r_isotropic.append(cur_r_isotropic.cpu().detach().numpy()) t_isotropic.append(cur_t_isotropic.cpu().detach().numpy()) if args.show: src_cloud = torch.squeeze(src_cloud).cpu().numpy() pcd1 = npy2pcd(ref_cloud, 0) pcd2 = npy2pcd(src_cloud, 1) pcd3 = pred_ref_cloud o3d.visualization.draw_geometries([pcd1, pcd2, pcd3]) r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic = \ summary_metrics(r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic) return dura, r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic
def main(): granularity = 0.001 trial_cnt = 10 fout = open('trial_data.txt', 'w') print >> fout, 'DupRatio\tAUC\tBER' for dup_ratio in np.arange(0, 1, granularity): print '==========DupRatio=%s==========' % dup_ratio for _ in range(trial_cnt): generate_sample(dup_ratio) auc, ber = compute_metrics() print >> fout, '%s\t%s\t%s' % (dup_ratio, auc, ber) fout.close() return
def compute_and_log_metrics(disp_pred, depth_gt, step_count): # disp_pred: (batch_size, height, width, 1) # depth_gt: (batch_size, depth_height, depth_width) abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3 = compute_metrics( disp_pred, depth_gt) with train_summary_writer.as_default(): tf.summary.scalar('abs_rel', abs_rel, step=step_count) tf.summary.scalar('sq_rel', sq_rel, step=step_count) tf.summary.scalar('rmse', rmse, step=step_count) tf.summary.scalar('rmse_log', rmse_log, step=step_count) tf.summary.scalar('a1', a1, step=step_count) tf.summary.scalar('a2', a2, step=step_count) tf.summary.scalar('a3', a3, step=step_count)
def compute_metrics_i(i): if os.path.isfile(get_save_path_probs_i(i)): start = time.time() probs, gt, _ = probs_gt_load(i) metrics, components = compute_metrics(probs, gt) metrics_dump(metrics, i) components_dump(components, i) print("image", i, "processed in {}s\r".format(round(time.time() - start)))
def compute_baselines_part1(): query_database = UbuntuDatabase() validation_set = query_database.get_validation_dataset() testing_set = query_database.get_testing_dataset() metrics_list = [] for dataset in (validation_set, testing_set): similarity_vector_list = [] bm25_scores_list = [] for ind_sample in dataset: similarity_vector_list.append(ind_sample["similarity_vec"].numpy()) bm25_scores_list.append(ind_sample["bm25_scores"].numpy()) metrics_list.append( metrics.compute_metrics(bm25_scores_list, similarity_vector_list)) return {"validation": metrics_list[0], "testing": metrics_list[1]}
def validate(task, val_iter, model, logger, field, world_size, rank, num_print=10, args=None): with torch.no_grad(): model.eval() required_names = ['greedy', 'answer'] optional_names = ['context', 'question'] loss, predictions, answers, results = gather_results(model, val_iter, field, world_size, optional_names=optional_names) predictions = [p.replace('UNK', 'OOV') for p in predictions] names = required_names + optional_names if hasattr(val_iter.dataset.examples[0], 'wikisql_id') or hasattr(val_iter.dataset.examples[0], 'squad_id') or hasattr(val_iter.dataset.examples[0], 'woz_id'): answers = [val_iter.dataset.all_answers[sid] for sid in answers.tolist()] metrics, answers = compute_metrics(predictions, answers, bleu='iwslt' in task or 'multi30k' in task, dialogue='woz' in task, rouge='cnn' in task, logical_form='sql' in task, corpus_f1='zre' in task, args=args) results = [predictions, answers] + results print_results(names, results, rank=rank, num_print=num_print) return loss, metrics
def main(): """Ensemble all models inside the experiments folder""" # we assume all the experiments are saved # in the experiments folder path = Path('experiments') # get a list of all experiments name experiment_list = os.listdir(path) assert len(experiment_list) > 1, \ 'there is not enough experiments to ensemble' predictions = [] # for every experiment for experiment in experiment_list: # create a path to the valid prediction file path_to_pred = path.joinpath(experiment, 'prediction', 'valid.csv') if not os.path.exists(path_to_pred): continue # if this file exists, we read it and # set the experiment column to the name of this experiment pred_exp = load_data.read_csv(path_to_pred) pred_exp = pred_exp.assign(experiment=experiment) predictions.append(pred_exp) # concat all the predictions predictions = pd.concat(predictions) # create the target by dropping all duplicates target = predictions.drop_duplicates(subset=['period', 'timedelta']) target.reset_index(drop=True, inplace=True) target.drop(columns=default.yhat, inplace=True) # ensemble predictions_ensemble = ensemble(predictions) target_ensemble = target.merge(predictions_ensemble, on=['period', 'timedelta'], how='left') # check there is non nan values assert target_ensemble[default.yhat].isna().sum().sum() == 0 # compute the metrics ensemble_metrics = compute_metrics(target_ensemble) experiment_list = list(predictions['experiment'].unique()) ensemble_metrics['experiment'] = '__'.join(experiment_list) ensemble_metrics['n_model'] = len(experiment_list) results = pd.DataFrame([ensemble_metrics]) # print scores print(results.head()) # save the ensemble results in a CSV file results.to_csv(path / 'ensemble_summary.csv', index=False)
def evaluate_benchmark(args, test_loader): model = IterativeBenchmark(in_dim=args.in_dim, niters=args.niters, gn=args.gn) if args.cuda: model = model.cuda() model.load_state_dict(torch.load(args.checkpoint)) else: model.load_state_dict(torch.load(args.checkpoint, map_location=torch.device('cpu'))) model.eval() dura = [] r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic = [], [], [], [], [], [] with torch.no_grad(): for i, (ref_cloud, src_cloud, gtR, gtt) in tqdm(enumerate(test_loader)): if args.cuda: ref_cloud, src_cloud, gtR, gtt = ref_cloud.cuda(), src_cloud.cuda(), \ gtR.cuda(), gtt.cuda() tic = time.time() R, t, pred_ref_cloud = model(src_cloud.permute(0, 2, 1).contiguous(), ref_cloud.permute(0, 2, 1).contiguous()) toc = time.time() dura.append(toc - tic) cur_r_mse, cur_r_mae, cur_t_mse, cur_t_mae, cur_r_isotropic, \ cur_t_isotropic = compute_metrics(R, t, gtR, gtt) r_mse.append(cur_r_mse) r_mae.append(cur_r_mae) t_mse.append(cur_t_mse) t_mae.append(cur_t_mae) r_isotropic.append(cur_r_isotropic.cpu().detach().numpy()) t_isotropic.append(cur_t_isotropic.cpu().detach().numpy()) if args.show: ref_cloud = torch.squeeze(ref_cloud).cpu().numpy() src_cloud = torch.squeeze(src_cloud).cpu().numpy() pred_ref_cloud = torch.squeeze(pred_ref_cloud[-1]).cpu().numpy() pcd1 = npy2pcd(ref_cloud, 0) pcd2 = npy2pcd(src_cloud, 1) pcd3 = npy2pcd(pred_ref_cloud, 2) o3d.visualization.draw_geometries([pcd1, pcd2, pcd3]) r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic = \ summary_metrics(r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic) return dura, r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic
def evaluate(model, test_loader): y_pred = [] y_true = [] for batch in test_loader: optim.zero_grad() input_ids = batch['input_ids'].to(device) attention_mask = batch['attention_mask'].to(device) labels = batch['label'].to(device) outputs = model(input_ids, attention_mask=attention_mask, labels=labels, return_dict=False) loss, output = outputs y_pred.extend(torch.argmax(output, 1).tolist()) y_true.extend(labels.tolist()) print('Classification Report:') metrics = compute_metrics(y_pred, y_true) print(metrics)
def run(num_classes,learning_rate,width,depth,mini_batch_size): precision = accuracy = recall = f_score = np.array([]) X_train,X_test,y_train,y_test,unknown_data = dp.load_data() X_train,X_test,y_train,y_test,unknown_data,dtype = dp.prepare_data(X_train,X_test,y_train,y_test,unknown_data) for _ in range(1): model = NN.Net1(num_classes,depth=depth,width=width).type(dtype) opt = optim.SGD(params=model.parameters(),lr=learning_rate,momentum=rp.m,nesterov=True) train_losses,test_losses = model.train_validate(X_train,y_train,X_test,y_test,opt,mini_batch_size,dtype) model = torch.load("Models/Best_Model.pkl") y_pred,_ = model.test(X_test) # Calculate metrics y_true = y_test.data.cpu().numpy() y_pred = y_pred.data.cpu().numpy() a,p,r,f = m.compute_metrics(y_true,y_pred) accuracy = np.append(accuracy,a) precision = np.append(precision,p) recall = np.append(recall,r) f_score = np.append(f_score,f) accuracy = np.mean(accuracy) precision = np.mean(precision) recall = np.mean(recall) f_score = np.mean(f_score) m.show_results(accuracy,precision,recall,f_score,num_classes,train_losses,test_losses) #g.generate_graph(model,X_train) fw.create_data_csv(learning_rate,depth,width,mini_batch_size,rp.m,len(test_losses)-10,accuracy) # Store unknown_data prediction y_pred,_ = model.test(unknown_data) fw.store_prediction(y_pred.data.cpu().numpy())
def evaluate_fgr(args, test_loader): dura = [] r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic = [], [], [], [], [], [] for i, (ref_cloud, src_cloud, gtR, gtt) in tqdm(enumerate(test_loader)): if args.cuda: ref_cloud, src_cloud, gtR, gtt = ref_cloud.cuda(), src_cloud.cuda(), \ gtR.cuda(), gtt.cuda() ref_points = torch.squeeze(ref_cloud).cpu().numpy()[:, :3] src_points = torch.squeeze(src_cloud).cpu().numpy()[:, :3] ref_normals = torch.squeeze(ref_cloud).cpu().numpy()[:, 3:] src_normals = torch.squeeze(src_cloud).cpu().numpy()[:, 3:] tic = time.time() R, t, pred_ref_cloud = fgr(source=npy2pcd(src_points), target=npy2pcd(ref_points), src_normals=src_normals, tgt_normals=ref_normals) toc = time.time() R = torch.from_numpy(np.expand_dims(R, 0)).to(gtR) t = torch.from_numpy(np.expand_dims(t, 0)).to(gtt) dura.append(toc - tic) cur_r_mse, cur_r_mae, cur_t_mse, cur_t_mae, cur_r_isotropic, \ cur_t_isotropic = compute_metrics(R, t, gtR, gtt) r_mse.append(cur_r_mse) r_mae.append(cur_r_mae) t_mse.append(cur_t_mse) t_mae.append(cur_t_mae) r_isotropic.append(cur_r_isotropic.cpu().detach().numpy()) t_isotropic.append(cur_t_isotropic.cpu().detach().numpy()) if args.show: print(cur_t_error.item(), cur_R_error.item(), cur_degree_error.item()) pcd1 = npy2pcd(ref_cloud, 0) pcd2 = npy2pcd(src_cloud, 1) pcd3 = pred_ref_cloud o3d.visualization.draw_geometries([pcd1, pcd2, pcd3]) r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic = \ summary_metrics(r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic) return dura, r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic
def do_eval(model, task_name, eval_dataloader, device, output_mode, eval_labels, num_labels): eval_loss = 0 nb_eval_steps = 0 preds = [] for batch_ in tqdm(eval_dataloader, desc="Evaluating"): batch_ = tuple(t.to(device) for t in batch_) with torch.no_grad(): input_ids, input_mask, segment_ids, label_ids, seq_lengths = batch_ logits, _, _ = model(input_ids, segment_ids, input_mask) # create eval loss and other metric required by the task if output_mode == "classification": loss_fct = CrossEntropyLoss() tmp_eval_loss = loss_fct(logits.view(-1, num_labels), label_ids.view(-1)) elif output_mode == "regression": loss_fct = MSELoss() tmp_eval_loss = loss_fct(logits.view(-1), label_ids.view(-1)) eval_loss += tmp_eval_loss.mean().item() nb_eval_steps += 1 if len(preds) == 0: preds.append(logits.detach().cpu().numpy()) else: preds[0] = np.append(preds[0], logits.detach().cpu().numpy(), axis=0) eval_loss = eval_loss / nb_eval_steps preds = preds[0] if output_mode == "classification": preds = np.argmax(preds, axis=1) elif output_mode == "regression": preds = np.squeeze(preds) result = compute_metrics(task_name, preds, eval_labels.numpy()) result['eval_loss'] = eval_loss return result
def evaluate(train_loader, model, args): all_txt_embd = [] all_video_embd = [] with torch.no_grad(): for i_batch, data in enumerate(tqdm(train_loader)): text = data['text'].cuda() video = data['video'].float().cuda() video = video / 255.0 video = video.view(-1, video.shape[2], video.shape[3], video.shape[4], video.shape[5]) video_embd, text_embd = model(video, text) text_embd = text_embd.cpu().numpy() video_embd = video_embd.view(text_embd.shape[0], args.num_windows_test, text_embd.shape[1]) video_embd = video_embd.mean(dim=1) video_embd = video_embd.cpu().numpy() all_txt_embd.append(text_embd) all_video_embd.append(video_embd) all_txt_embd = np.concatenate(all_txt_embd, axis=0) all_video_embd = np.concatenate(all_video_embd, axis=0) metrics = compute_metrics(np.dot(all_txt_embd, all_video_embd.T)) print_computed_metrics(metrics)
def run_eval(sess, args, igraph, tgraph, data, set_name): """ runs one evaluation against the full epoch of data """ bg = utils.batch_generator( (data["encoded_data"][set_name], data["scores"][set_name]), args.batch_size, skip_last_batch=False, num_epochs=1, shuffle=False) # get all the predicted and true labels in batches predicted_scores = np.zeros(data["scores"][set_name].shape) true_scores = np.zeros(data["scores"][set_name].shape) start = time.time() for batch_num, batch_data in enumerate(bg): ed_batch, sc_batch = batch_data # fill the feed dict with the next batch feed_dict = { igraph["ph_inputs_dict"]["raw_seqs"]: ed_batch, igraph["ph_inputs_dict"]["scores"]: sc_batch } # start and end index for this batch start_index = batch_num * args.batch_size end_index = start_index + args.batch_size # get predicted labels for evaluating metrics using sklearn preds = sess.run(igraph["predictions"], feed_dict=feed_dict) predicted_scores[start_index:end_index] = preds true_scores[start_index:end_index] = sc_batch duration = time.time() - start evaluation_dict = metrics.compute_metrics(true_scores, predicted_scores) print("Evaluation ({} set) completed in {:.3} sec.".format( set_name, duration)) return evaluation_dict
def main(): """Main function""" inputs = [] with io.open("data/test.txt", mode="r", encoding="utf-8") as file_: for line in file_: if line is not None and len(line) > 0: inputs += [remove_diacritics(line)] wrapper = RegexWrapper(TEMPLATE) templates = [] for input_ in inputs: template = wrapper.fill_template(input_) print(exercise_output(template)) templates.append(template) precision, recall = metrics.compute_metrics(templates) avg_precision = precision.mean(skipna=True) avg_recall = recall.mean(skipna=True) print('Precision:\n' + str(precision)) print('\nRecall:\n' + str(recall)) print('\n\nAverage Precision: %.4f' % avg_precision) print('Average Recall: %.4f' % avg_recall)
def evaluate_icp(args, test_loader): dura = [] r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic = [], [], [], [], [], [] for i, (ref_cloud, src_cloud, gtR, gtt) in tqdm(enumerate(test_loader)): if args.cuda: ref_cloud, src_cloud, gtR, gtt = ref_cloud.cuda(), src_cloud.cuda(), \ gtR.cuda(), gtt.cuda() ref_cloud = torch.squeeze(ref_cloud).cpu().numpy() src_cloud = torch.squeeze(src_cloud).cpu().numpy() tic = time.time() R, t, pred_ref_cloud = icp(npy2pcd(src_cloud), npy2pcd(ref_cloud)) toc = time.time() R = torch.from_numpy(np.expand_dims(R, 0)).to(gtR) t = torch.from_numpy(np.expand_dims(t, 0)).to(gtt) dura.append(toc - tic) cur_r_mse, cur_r_mae, cur_t_mse, cur_t_mae, cur_r_isotropic, \ cur_t_isotropic = compute_metrics(R, t, gtR, gtt) r_mse.append(cur_r_mse) r_mae.append(cur_r_mae) t_mse.append(cur_t_mse) t_mae.append(cur_t_mae) r_isotropic.append(cur_r_isotropic.cpu().detach().numpy()) t_isotropic.append(cur_t_isotropic.cpu().detach().numpy()) if args.show: pcd1 = npy2pcd(ref_cloud, 0) pcd2 = npy2pcd(src_cloud, 1) pcd3 = pred_ref_cloud o3d.visualization.draw_geometries([pcd1, pcd2, pcd3]) r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic = \ summary_metrics(r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic) return dura, r_mse, r_mae, t_mse, t_mae, r_isotropic, t_isotropic
def run(args, field, val_sets, model): device = set_seed(args) print(f'Preparing iterators') if len(args.val_batch_size) == 1 and len(val_sets) > 1: args.val_batch_size *= len(val_sets) iters = [(name, to_iter(x, bs, device)) for name, x, bs in zip(args.tasks, val_sets, args.val_batch_size)] def mult(ps): r = 0 for p in ps: this_r = 1 for s in p.size(): this_r *= s r += this_r return r params = list(filter(lambda p: p.requires_grad, model.parameters())) num_param = mult(params) print(f'{args.model} has {num_param:,} parameters') model.to(device) decaScore = [] model.eval() with torch.no_grad(): for task, it in iters: print(task) prediction_file_name = os.path.join(os.path.splitext(args.best_checkpoint)[0], args.evaluate, task + '.txt') answer_file_name = os.path.join(os.path.splitext(args.best_checkpoint)[0], args.evaluate, task + '.gold.txt') results_file_name = answer_file_name.replace('gold', 'results') if 'sql' in task or 'squad' in task: ids_file_name = answer_file_name.replace('gold', 'ids') if os.path.exists(prediction_file_name): print('** ', prediction_file_name, ' already exists -- this is where predictions are stored **') if args.overwrite: print('**** overwriting ', prediction_file_name, ' ****') if os.path.exists(answer_file_name): print('** ', answer_file_name, ' already exists -- this is where ground truth answers are stored **') if args.overwrite: print('**** overwriting ', answer_file_name, ' ****') if os.path.exists(results_file_name): print('** ', results_file_name, ' already exists -- this is where metrics are stored **') if args.overwrite: print('**** overwriting ', results_file_name, ' ****') else: with open(results_file_name) as results_file: if not args.silent: for l in results_file: print(l) metrics = json.loads(results_file.readlines()[0]) decaScore.append(metrics[args.task_to_metric[task]]) continue for x in [prediction_file_name, answer_file_name, results_file_name]: os.makedirs(os.path.dirname(x), exist_ok=True) if not os.path.exists(prediction_file_name) or args.overwrite: with open(prediction_file_name, 'w') as prediction_file: predictions = [] ids = [] for batch_idx, batch in enumerate(it): _, p = model(batch) p = field.reverse(p) for i, pp in enumerate(p): if 'sql' in task: ids.append(int(batch.wikisql_id[i])) if 'squad' in task: ids.append(it.dataset.q_ids[int(batch.squad_id[i])]) prediction_file.write(pp + '\n') predictions.append(pp) if 'sql' in task: with open(ids_file_name, 'w') as id_file: for i in ids: id_file.write(json.dumps(i) + '\n') if 'squad' in task: with open(ids_file_name, 'w') as id_file: for i in ids: id_file.write(i + '\n') else: with open(prediction_file_name) as prediction_file: predictions = [x.strip() for x in prediction_file.readlines()] if 'sql' in task or 'squad' in task: with open(ids_file_name) as id_file: ids = [int(x.strip()) for x in id_file.readlines()] def from_all_answers(an): return [it.dataset.all_answers[sid] for sid in an.tolist()] if not os.path.exists(answer_file_name) or args.overwrite: with open(answer_file_name, 'w') as answer_file: answers = [] for batch_idx, batch in enumerate(it): if hasattr(batch, 'wikisql_id'): a = from_all_answers(batch.wikisql_id.data.cpu()) elif hasattr(batch, 'squad_id'): a = from_all_answers(batch.squad_id.data.cpu()) elif hasattr(batch, 'woz_id'): a = from_all_answers(batch.woz_id.data.cpu()) else: a = field.reverse(batch.answer.data) for aa in a: answers.append(aa) answer_file.write(json.dumps(aa) + '\n') else: with open(answer_file_name) as answer_file: answers = [json.loads(x.strip()) for x in answer_file.readlines()] if len(answers) > 0: if not os.path.exists(results_file_name) or args.overwrite: metrics, answers = compute_metrics(predictions, answers, bleu='iwslt' in task or 'multi30k' in task or args.bleu, dialogue='woz' in task, rouge='cnn' in task or 'dailymail' in task or args.rouge, logical_form='sql' in task, corpus_f1='zre' in task, args=args) with open(results_file_name, 'w') as results_file: results_file.write(json.dumps(metrics) + '\n') else: with open(results_file_name) as results_file: metrics = json.loads(results_file.readlines()[0]) if not args.silent: for i, (p, a) in enumerate(zip(predictions, answers)): print(f'Prediction {i+1}: {p}\nAnswer {i+1}: {a}\n') print(metrics) decaScore.append(metrics[args.task_to_metric[task]]) print(f'Evaluated Tasks:\n') for i, (task, _) in enumerate(iters): print(f'{task}: {decaScore[i]}') print(f'-------------------') print(f'DecaScore: {sum(decaScore)}\n') print(f'\nSummary: | {sum(decaScore)} | {" | ".join([str(x) for x in decaScore])} |\n')
def run(args, field, val_sets, model): set_seed(args) print(f'Preparing iterators') iters = [(name, to_iter(x, bs)) for name, x, bs in zip(args.tasks, val_sets, args.val_batch_size)] def mult(ps): r = 0 for p in ps: this_r = 1 for s in p.size(): this_r *= s r += this_r return r params = list(filter(lambda p: p.requires_grad, model.parameters())) num_param = mult(params) print(f'{args.model} has {num_param:,} parameters') if args.gpus > -1: model.cuda() model.eval() for task, it in iters: prediction_file_name = os.path.join(os.path.splitext(args.best_checkpoint)[0], args.evaluate, task + '.txt') answer_file_name = os.path.join(os.path.splitext(args.best_checkpoint)[0], args.evaluate, task + '.gold.txt') results_file_name = answer_file_name.replace('gold', 'results') if os.path.exists(prediction_file_name): print('** ', prediction_file_name, ' already exists**') if os.path.exists(answer_file_name): print('** ', answer_file_name, ' already exists**') if os.path.exists(results_file_name): print('** ', results_file_name, ' already exists**') with open(results_file_name) as results_file: for l in results_file: print(l) continue for x in [prediction_file_name, answer_file_name, results_file_name]: os.makedirs(os.path.dirname(x), exist_ok=True) if not os.path.exists(prediction_file_name): with open(prediction_file_name, 'a') as prediction_file: predictions = [] for batch_idx, batch in enumerate(it): _, p = model(batch) p = field.reverse(p) for pp in p: prediction_file.write(pp + '\n') predictions.append(pp) else: with open(prediction_file_name) as prediction_file: predictions = [x.strip() for x in prediction_file.readlines()] def from_all_answers(an): return [it.dataset.all_answers[sid] for sid in an.tolist()] if not os.path.exists(answer_file_name): with open(answer_file_name, 'a') as answer_file: answers = [] for batch_idx, batch in enumerate(it): if hasattr(batch, 'wikisql_id'): a = from_all_answers(batch.wikisql_id.data.cpu()) elif hasattr(batch, 'squad_id'): a = from_all_answers(batch.squad_id.data.cpu()) elif hasattr(batch, 'woz_id'): a = from_all_answers(batch.woz_id.data.cpu()) else: a = field.reverse(batch.answer.data) for aa in a: answers.append(aa) answer_file.write(json.dumps(aa) + '\n') else: with open(answer_file_name) as answer_file: answers = [json.loads(x.strip()) for x in answer_file.readlines()] if len(answers) > 0: metrics, answers = compute_metrics(predictions, answers, bleu='iwslt' in task or 'multi30k' in task, dialogue='woz' in task, rouge='cnn' in task, logical_form='sql' in task, corpus_f1='zre' in task, args=args) print(metrics) with open(results_file_name, 'w') as results_file: results_file.write(json.dumps(metrics) + '\n')
def train(sess, model, optimizer, log_dir, batch_size, num_sweeps_per_summary, num_sweeps_per_save, train_input_seqs, train_reset_seqs, train_label_seqs, test_input_seqs, test_reset_seqs, test_label_seqs): """ Train a model and export summaries. `log_dir` will be *replaced* if it already exists, so it certainly shouldn't be anything generic like `/home/user`. Args: sess: A TensorFlow `Session`. model: An `LSTMModel`. optimizer: An `Optimizer`. log_dir: A string. The full path to the log directory. batch_size: An integer. The number of sequences in a batch. num_sweeps_per_summary: An integer. The number of sweeps between summaries. num_sweeps_per_save: An integer. The number of sweeps between saves. train_input_seqs: A list of 2-D NumPy arrays, each with shape `[duration, input_size]`. train_reset_seqs: A list of 2-D NumPy arrays, each with shape `[duration, 1]`. train_label_seqs: A list of 2-D NumPy arrays, each with shape `[duration, 1]`. test_input_seqs: A list of 2-D NumPy arrays, each with shape `[duration, input_size]`. test_reset_seqs: A list of 2-D NumPy arrays, each with shape `[duration, 1]`. test_label_seqs: A list of 2-D NumPy arrays, each with shape `[duration, 1]`. """ ema = tf.train.ExponentialMovingAverage(decay=0.5) update_train_loss_ema = ema.apply([model.loss]) train_loss_ema = ema.average(model.loss) tf.scalar_summary('train_loss_ema', train_loss_ema) train_accuracy = tf.placeholder(tf.float32, name='train_accuracy') train_edit_dist = tf.placeholder(tf.float32, name='train_edit_dist') test_accuracy = tf.placeholder(tf.float32, name='test_accuracy') test_edit_dist = tf.placeholder(tf.float32, name='test_edit_dist') values = [train_accuracy, train_edit_dist, test_accuracy, test_edit_dist] tags = [value.op.name for value in values] tf.scalar_summary('learning_rate', optimizer.learning_rate) tf.scalar_summary(tags, tf.pack(values)) summary_op = tf.merge_all_summaries() if os.path.exists(log_dir): shutil.rmtree(log_dir) summary_writer = tf.train.SummaryWriter(logdir=log_dir, graph=sess.graph) saver = tf.train.Saver() sess.run(tf.initialize_all_variables()) num_sweeps_visited = 0 start_time = time.time() train_gen = data.sweep_generator( [train_input_seqs, train_reset_seqs, train_label_seqs], batch_size=batch_size, shuffle=True, num_sweeps=None) while num_sweeps_visited <= optimizer.num_train_sweeps: if num_sweeps_visited % num_sweeps_per_summary == 0: train_prediction_seqs = models.predict( sess, model, train_input_seqs, train_reset_seqs) train_accuracy_, train_edit_dist_ = metrics.compute_metrics( train_prediction_seqs, train_label_seqs) test_prediction_seqs = models.predict( sess, model, test_input_seqs, test_reset_seqs) test_accuracy_, test_edit_dist_ = metrics.compute_metrics( test_prediction_seqs, test_label_seqs) summary = sess.run(summary_op, feed_dict={train_accuracy: train_accuracy_, train_edit_dist: train_edit_dist_, test_accuracy: test_accuracy_, test_edit_dist: test_edit_dist_}) summary_writer.add_summary(summary, global_step=num_sweeps_visited) status_path = os.path.join(log_dir, 'status.txt') with open(status_path, 'w') as f: line = '%05.1f ' % ((time.time() - start_time)/60) line += '%04d ' % num_sweeps_visited line += '%.6f %08.3f ' % (train_accuracy_, train_edit_dist_) line += '%.6f %08.3f ' % (test_accuracy_, test_edit_dist_) print(line, file=f) label_path = os.path.join(log_dir, 'test_label_seqs.pkl') with open(label_path, 'w') as f: cPickle.dump(test_label_seqs, f) pred_path = os.path.join(log_dir, 'test_prediction_seqs.pkl') with open(pred_path, 'w') as f: cPickle.dump(test_prediction_seqs, f) vis_filename = 'test_visualizations_%06d.png' % num_sweeps_visited vis_path = os.path.join(log_dir, vis_filename) fig, axes = data.visualize_predictions(test_prediction_seqs, test_label_seqs, model.target_size) axes[0].set_title(line) plt.tight_layout() plt.savefig(vis_path) plt.close(fig) if num_sweeps_visited % num_sweeps_per_save == 0: saver.save(sess, os.path.join(log_dir, 'model.ckpt')) train_inputs, train_resets, train_labels = train_gen.next() # We squeeze here because otherwise the targets would have shape # [batch_size, duration, 1, num_classes]. train_targets = data.one_hot(train_labels, model.target_size) train_targets = train_targets.squeeze(axis=2) _, _, num_sweeps_visited = sess.run( [optimizer.optimize_op, update_train_loss_ema, optimizer.num_sweeps_visited], feed_dict={model.inputs: train_inputs, model.resets: train_resets, model.targets: train_targets, model.training: True})