def on_epoch_end(self, epoch, logs=None): self._verbose_print("Calculating metrics...") last_weights_path = self._load_weights_for_model() images, gt_boxes, gt_class_ids, gt_masks, results = detect( self.inference_model, self.dataset) metrics = compute_metrics(images, gt_boxes, gt_class_ids, gt_masks, results) pprint.pprint(metrics) # Images for i, img in enumerate(images): if img.shape[2] != 3: img = cv.cvtColor(img, cv.COLOR_GRAY2BGR) visualize_result(img, results[i], gt_masks[i], scores=True) neptune.log_image(f'image_epoch_{epoch}', img[..., ::-1]) # Metrics for key, value in metrics: neptune.log_metric(key, epoch, value) # Save best result name, mAP = metrics[0] if mAP > self.best_mAP: self.best_mAP = mAP self.best_epoch = epoch self.best_model = last_weights_path
def on_epoch_end(self, epoch, logs=None): self._verbose_print("Calculating metrics...") self._load_weights_for_model() images, gt_boxes, gt_class_ids, gt_masks, results = detect( self.inference_model, self.dataset) metrics = compute_metrics(images, gt_boxes, gt_class_ids, gt_masks, results) pprint.pprint(metrics) # Images summary_img = tf.Summary() for i, img in enumerate(images): if img.shape[2] != 3: img = cv.cvtColor(img, cv.COLOR_GRAY2BGR) visualize_result(img, results[i], gt_masks[i], scores=True) _, buf = cv.imencode('.png', img) im_summary = tf.Summary.Image(encoded_image_string=buf.tobytes()) summary_img.value.add(tag=f'img/{i}', image=im_summary) # Metrics summary = tf.Summary() for key, value in metrics: summary_value = summary.value.add() summary_value.simple_value = value summary_value.tag = 'Metrics/' + key self.writer.add_summary(summary, epoch) self.writer.add_summary(summary_img, epoch) self.writer.flush()
def end_experiment(): acc_df = pd.DataFrame(acc_db) acc_df.to_csv(EXPERIMENT_DIRECTORY+'/accs.csv') visualize_result(acc_df, EXPERIMENT_DIRECTORY+'/accs.png') loss_df = pd.DataFrame(loss_db) loss_df.to_csv(EXPERIMENT_DIRECTORY+'/loss.csv') visualize_result(loss_df, EXPERIMENT_DIRECTORY+'/loss.png') hessian_df = pd.DataFrame(hessian_eig_db) hessian_df.to_csv(EXPERIMENT_DIRECTORY+'/hessian_eigs.csv') score = np.mean([acc_db[i][-1] for i in acc_db.keys()]) forget = np.mean([max(acc_db[i])-acc_db[i][-1] for i in range(1, NUM_TASKS)])/100.0 print('score = {}, forget = {}'.format(score, forget)) experiment.log_metric(name='score', value=score) experiment.log_metric(name='forget', value=forget) experiment.log_asset_folder(EXPERIMENT_DIRECTORY) experiment.end()
def main(ff_type, input_size, bi): hs = input_size // 2 isq = input_size * input_size isq = input_size**2 ff = [] num_cycle = random.randint(1, 99) num_cycle = 17 print("Number of cycle is:", num_cycle) start_ff = random.randint(1, 9) start_ff = 78 start_ff *= 1 #42, 81, 90, 78 print("Start flow field is No.", start_ff * 100 + num_cycle) if bi == 'true': print("Target flow field is No.", start_ff * 100 + 500 + num_cycle) ff, true_ff, true_v = read_inputs(ff_type, num_cycle, start_ff, True) pred = predict_rnn(ff_type, ff, input_size, bi=True) visualize_seq(ff_type, ff, True, start_ff) else: print("Target flow field is No.", start_ff * 100 + 900 + num_cycle) ff, true_ff, true_v = read_inputs(ff_type, num_cycle, start_ff, False) pred = predict_rnn(ff_type, ff, input_size, bi=False) visualize_seq(ff_type, ff, False, start_ff) mnd = np.nanmean(np.linalg.norm(pred - true_ff.reshape(isq, -1), axis=1)) print("Mean normed distance is:", mnd) mag = np.linalg.norm(true_ff.reshape(isq, -1), axis=1).reshape(1, -1) print("average magnitude for vectors in the target flow field is:", np.nanmean(mag)) distance = np.linalg.norm(pred - true_ff.reshape(isq, -1), axis=1).reshape(1, -1) per_error = np.sum(distance) / np.sum(mag) print("Percentage error:", per_error) if ff_type == 'velocity': pred = pred.reshape(9, 9, 2) utils.visualize_result(true_ff, pred, ff_type, pred) elif ff_type == 'magnitude': pred = pred.reshape(9, 9) utils.visualize_result(true_ff, pred, ff_type) else: pred = pred.reshape(9, 9, 2) utils.visualize_result(true_ff, pred, ff_type, true_v)
def test_leave_one_out(self): gpus = 1 if torch.cuda.is_available() else 0 (x_train, y_train), (x_test, y_test) = get_2class_mnist(NUM_A, NUM_B) train_sample_num = len(x_train) class CreateData(torch.utils.data.Dataset): def __init__(self, data, targets): self.data = data self.targets = targets def __len__(self): return len(self.data) def __getitem__(self, idx): out_data = self.data[idx] out_label = self.targets[idx] return out_data, out_label train_data = CreateData(x_train, y_train) train_loader = torch.utils.data.DataLoader(train_data, batch_size=1, shuffle=False) # prepare sklearn model to train w C = 1.0 / (train_sample_num * WEIGHT_DECAY) sklearn_model = linear_model.LogisticRegression(C=C, solver='lbfgs', tol=1e-8, fit_intercept=False) # prepare pytorch model to compute influence function torch_model = LR(weight_decay=WEIGHT_DECAY) # train sklearn_model.fit(x_train, y_train.ravel()) print('LBFGS training took %s iter.' % sklearn_model.n_iter_) # assign W into pytorch model w_opt = sklearn_model.coef_.ravel() with torch.no_grad(): torch_model.w = torch.nn.Parameter( torch.tensor(w_opt, dtype=torch.float) ) # calculate original loss x_test_input = torch.FloatTensor(x_test[TEST_INDEX: TEST_INDEX+1]) y_test_input = torch.LongTensor(y_test[TEST_INDEX: TEST_INDEX+1]) test_data = CreateData(x_test[TEST_INDEX: TEST_INDEX+1], y_test[TEST_INDEX: TEST_INDEX+1]) test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=True) if gpus >= 0: torch_model = torch_model.cuda() x_test_input = x_test_input.cuda() y_test_input = y_test_input.cuda() test_loss_ori = torch_model.loss(torch_model(x_test_input), y_test_input, train=False).detach().cpu().numpy() # # get test loss gradient # test_grad = torch.autograd.grad(test_loss_ori, torch_model.w) # # get inverse hvp (s_test) # print('Calculating s_test ...') # s_test = s_test_sample( # torch_model, x_test_input, y_test_input, train_loader, gpu=gpus, damp=0, scale=25, recursion_depth=RECURSION_DEPTH, r=R # )[0].detach().cpu().numpy() # # s_test = torch_model.sess.run(torch_model.inverse_hessian, feed_dict={torch_model.x: x_train, torch_model.y: y_train}) @ test_grad # print(s_test) # get train loss gradient and estimate loss diff # loss_diff_approx = np.zeros(train_sample_num) # for i in range(train_sample_num): # x_input = torch.FloatTensor(x_train[i]) # y_input = torch.LongTensor(y_train[i]) # if gpus >= 0: # x_input = x_input.cuda() # y_input = y_input.cuda() # train_loss = torch_model.loss(torch_model(x_input), y_input) # train_grad = torch.autograd.grad(train_loss, torch_model.w)[0].detach().cpu().numpy() # loss_diff_approx[i] = np.asscalar(train_grad.T @ s_test) / train_sample_num # if i % 100 == 0: # print('[{}/{}] Estimated loss diff: {}'.format(i+1, train_sample_num, loss_diff_approx[i])) loss_diff_approx, _, _, _, = calc_influence_single(torch_model, train_loader, test_loader, test_id_num=0, gpu=1, recursion_depth=RECURSION_DEPTH, r=R, damp=0, scale=SCALE, exact=EXACT, batch_size=128) loss_diff_approx = torch.FloatTensor(loss_diff_approx).cpu().numpy() # get high and low loss diff indice sorted_indice = np.argsort(loss_diff_approx) sample_indice = np.concatenate([sorted_indice[-int(SAMPLE_NUM/2):], sorted_indice[:int(SAMPLE_NUM/2)]]) # calculate true loss diff loss_diff_true = np.zeros(SAMPLE_NUM) for i, index in zip(range(SAMPLE_NUM), sample_indice): print('[{}/{}]'.format(i+1, SAMPLE_NUM)) # get minus one dataset x_train_minus_one = np.delete(x_train, index, axis=0) y_train_minus_one = np.delete(y_train, index, axis=0) # retrain C = 1.0 / ((train_sample_num - 1) * WEIGHT_DECAY) sklearn_model_minus_one = linear_model.LogisticRegression(C=C, fit_intercept=False, tol=1e-8, solver='lbfgs') sklearn_model_minus_one.fit(x_train_minus_one, y_train_minus_one.ravel()) print('LBFGS training took {} iter.'.format(sklearn_model_minus_one.n_iter_)) # assign w on tensorflow model w_retrain = sklearn_model_minus_one.coef_.T.ravel() with torch.no_grad(): torch_model.w = torch.nn.Parameter( torch.tensor(w_retrain, dtype=torch.float) ) if gpus >= 0: torch_model = torch_model.cuda() # get retrain loss test_loss_retrain = torch_model.loss(torch_model(x_test_input), y_test_input, train=False).detach().cpu().numpy() # get true loss diff loss_diff_true[i] = test_loss_retrain - test_loss_ori print('Original loss :{}'.format(test_loss_ori)) print('Retrain loss :{}'.format(test_loss_retrain)) print('True loss diff :{}'.format(loss_diff_true[i])) print('Estimated loss diff :{}'.format(loss_diff_approx[index])) r2_score = visualize_result(loss_diff_true, loss_diff_approx[sample_indice]) self.assertTrue(r2_score > 0.9)
def train_one_epoch(epoch, dataloader, model, criterion, optimizer, device, log_interval_vis, tb_writer, args=None): imgs_res_folder = os.path.join(args.output_dir, 'current_res') os.makedirs(imgs_res_folder, exist_ok=True) # Put model in training mode model.train() for batch_id, sample_batched in enumerate(dataloader): images = sample_batched['images'].to(device) # BxCxHxW labels = sample_batched['labels'].to(device) # BxHxW # labels = labels[:, None] # Bx1xHxW preds_list = model(images) tmp_preds = torch.cat(preds_list, dim=1) # loss = sum([criterion(tmp_preds[i,...], labels[i,...]) for i in range(0,tmp_preds.shape[0])]) loss = sum([criterion(preds, labels) for preds in preds_list]) # loss /= images.shape[0] #batch size optimizer.zero_grad() loss.backward() optimizer.step() if tb_writer is not None: tb_writer.add_scalar('loss', loss.detach(), (len(dataloader) * epoch + batch_id)) if batch_id % 5 == 0: print( time.ctime(), 'Epoch: {0} Sample {1}/{2} Loss: {3}'.format( epoch, batch_id, len(dataloader), loss.item())) if batch_id % log_interval_vis == 0: res_data = [] img = images.cpu().numpy() res_data.append(img[2]) ed_gt = labels.cpu().numpy() res_data.append(ed_gt[2]) # tmp_pred = tmp_preds[2,...] for i in range(len(preds_list)): tmp = preds_list[i] tmp = tmp[2] # print(tmp.shape) tmp = torch.sigmoid(tmp).unsqueeze(dim=0) tmp = tmp.cpu().detach().numpy() res_data.append(tmp) vis_imgs = visualize_result(res_data, arg=args) del tmp, res_data vis_imgs = cv2.resize( vis_imgs, (int(vis_imgs.shape[1] * 0.8), int(vis_imgs.shape[0] * 0.8))) img_test = 'Epoch: {0} Sample {1}/{2} Loss: {3}' \ .format(epoch, batch_id, len(dataloader), loss.item()) BLACK = (0, 0, 255) font = cv2.FONT_HERSHEY_SIMPLEX font_size = 1.1 font_color = BLACK font_thickness = 2 x, y = 30, 30 vis_imgs = cv2.putText(vis_imgs, img_test, (x, y), font, font_size, font_color, font_thickness, cv2.LINE_AA) cv2.imwrite(os.path.join(imgs_res_folder, 'results.png'), vis_imgs)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) # Training model_name = 'malnet_model.{epoch:03d}.h5' if not os.path.isdir(save_dir): os.makedirs(save_dir) filepath = os.path.join(save_dir, model_name) checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_acc', verbose=1, save_best_only=True) history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_val, y_val), callbacks=[checkpoint]) # Visualize the result utils.visualize_result(history, save_dir) # ROC curve y_pred = model.predict(X_test) fpr, tpr, thresholds = roc_curve(np.argmax(y_test, axis=1), y_pred[:, 1], pos_label=1) acc = np.mean(np.equal(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))) utils.visualize_roc(fpr, tpr, thresholds, acc, save_dir)
def train(train_loader, net, opt, lr_schd, epoch, save_dir, fig, global_iter): """ Training procedure. """ # Create the directory. if not isdir(save_dir): os.makedirs(save_dir) # Switch to train mode and clear the gradient. net.train() opt.zero_grad() # Initialize meter and list. batch_loss_meter = AverageMeter() # Note: The counter is used here to record number of batches in current training iteration has been processed. # It aims to have large training iteration number even if GPU memory is not enough. However, such trick # can be used because batch normalization is not used in the network architecture. counter = 0 for batch_index, (images, edges) in enumerate(tqdm(train_loader)): # Adjust learning rate and modify counter following Caffe's way. if counter == 0: lr_schd.step() # Step at the beginning of the iteration. counter += 1 # Get images and edges from current batch. images, edges = images.to(device), edges.to(device) # Generate predictions. preds_list = net(images) # Calculate the loss of current batch (sum of all scales and fused). # Note: Here we mimic the "iteration" in official repository: iter_size batches will be considered together # to perform one gradient update. To achieve the goal, we calculate the equivalent iteration loss # eqv_iter_loss of current batch and generate the gradient. Then, instead of updating the weights, # we continue to calculate eqv_iter_loss and add the newly generated gradient to current gradient. # After iter_size batches, we will update the weights using the accumulated gradients and then zero # the gradients. # Reference: # https://github.com/s9xie/hed/blob/94fb22f10cbfec8d84fbc0642b224022014b6bd6/src/caffe/solver.cpp#L230 # https://www.zhihu.com/question/37270367 batch_loss = sum([ weighted_cross_entropy_loss(preds, edges) for preds in preds_list ]) eqv_iter_loss = batch_loss / args.train_iter_size # Generate the gradient and accumulate (using equivalent average loss). eqv_iter_loss.backward() if counter == args.train_iter_size: opt.step() opt.zero_grad() counter = 0 # Reset the counter. # Record loss. batch_loss_meter.update(batch_loss.item()) pt_writer.add_scalar('data/logs', batch_loss_meter.avg, global_step=global_iter) # Log and save intermediate images. # visualize results if batch_index % 200 == 0: rgb = images.cpu().numpy() edge = edges.cpu().numpy() pred1 = preds_list[0].cpu().detach().numpy() pred2 = preds_list[1].cpu().detach().numpy() pred3 = preds_list[2].cpu().detach().numpy() pred4 = preds_list[3].cpu().detach().numpy() pred5 = preds_list[4].cpu().detach().numpy() predf = preds_list[5].cpu().detach().numpy() vis_imgs = visualize_result( [rgb, edge, pred1, pred2, pred3, pred4, pred5, predf], args) fig.suptitle("Epoch:" + str(batch_index + 1) + " Loss:" + '%.5f' % batch_loss_meter.avg + " training") fig.add_subplot(1, 1, 1) plt.imshow(vis_imgs) plt.draw() plt.pause(0.01) if (batch_index + 1) % 5000 == 0: print('updating visualisation') plt.close() fig = plt.figure() # end result visualization if batch_index % args.print_freq == args.print_freq - 1: # Log. print('Train epoch:[', epoch, '/', args.max_epoch, 'batch: [', batch_index, '/', len(train_loader), '] curr iter: ', lr_schd.last_epoch, ' batch_loss: %.5f' % batch_loss_meter.val, ' epoch avg batch_loss: %.5f' % batch_loss_meter.avg, ' lr_list: ', lr_schd.get_lr()) # print(('Training epoch:{}/{}, batch:{}/{} current iteration:{}, ' + # 'current batch batch_loss:{}, epoch average batch_loss:{}, learning rate list:{}.').format( # epoch, args.max_epoch, batch_index, len(train_loader), lr_schd.last_epoch, # batch_loss_meter.val, batch_loss_meter.avg, lr_schd.get_lr())) # Generate intermediate images. preds_list_and_edges = preds_list + [edges] _, _, h, w = preds_list_and_edges[0].shape interm_images = torch.zeros((len(preds_list_and_edges), 1, h, w)) for i in range(len(preds_list_and_edges)): # Only fetch the first image in the batch. interm_images[i, 0, :, :] = preds_list_and_edges[i][0, 0, :, :] # Save the images. torchvision.utils.save_image( interm_images, join(save_dir, 'batch-{}-1st-image.png'.format(batch_index))) global_iter += 1 # Return the epoch average batch_loss. return batch_loss_meter.avg, global_iter
def train_one_epoch(epoch, dataloader, model, criterion, optimizer, device, log_interval_vis, tb_writer, args=None): imgs_res_folder = os.path.join(args.output_dir, 'current_res') os.makedirs(imgs_res_folder, exist_ok=True) # Put model in training mode model.train() # l_weight = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1.1] # for bdcn ori loss # before [0.6,0.6,1.1,1.1,0.4,0.4,1.3] [0.4,0.4,1.1,1.1,0.6,0.6,1.3],[0.4,0.4,1.1,1.1,0.8,0.8,1.3] l_weight = [0.7, 0.7, 1.1, 1.1, 0.3, 0.3, 1.3] # for bdcn loss theory 3 before the last 1.3 0.6-0..5 # l_weight = [[0.05, 2.], [0.05, 2.], [0.05, 2.], # [0.1, 1.], [0.1, 1.], [0.1, 1.], # [0.01, 4.]] # for cats loss for batch_id, sample_batched in enumerate(dataloader): images = sample_batched['images'].to(device) # BxCxHxW labels = sample_batched['labels'].to(device) # BxHxW preds_list = model(images) # loss = sum([criterion(preds, labels, l_w, device) for preds, l_w in zip(preds_list, l_weight)]) # cats_loss loss = sum([ criterion(preds, labels, l_w) for preds, l_w in zip(preds_list, l_weight) ]) # bdcn_loss # loss = sum([criterion(preds, labels) for preds in preds_list]) #HED loss, rcf_loss optimizer.zero_grad() loss.backward() optimizer.step() if tb_writer is not None: tb_writer.add_scalar('loss', loss.detach(), (len(dataloader) * epoch + batch_id)) if batch_id % 5 == 0: print( time.ctime(), 'Epoch: {0} Sample {1}/{2} Loss: {3}'.format( epoch, batch_id, len(dataloader), loss.item())) if batch_id % log_interval_vis == 0: res_data = [] img = images.cpu().numpy() res_data.append(img[2]) ed_gt = labels.cpu().numpy() res_data.append(ed_gt[2]) # tmp_pred = tmp_preds[2,...] for i in range(len(preds_list)): tmp = preds_list[i] tmp = tmp[2] # print(tmp.shape) tmp = torch.sigmoid(tmp).unsqueeze(dim=0) tmp = tmp.cpu().detach().numpy() res_data.append(tmp) vis_imgs = visualize_result(res_data, arg=args) del tmp, res_data vis_imgs = cv2.resize( vis_imgs, (int(vis_imgs.shape[1] * 0.8), int(vis_imgs.shape[0] * 0.8))) img_test = 'Epoch: {0} Sample {1}/{2} Loss: {3}' \ .format(epoch, batch_id, len(dataloader), loss.item()) BLACK = (0, 0, 255) font = cv2.FONT_HERSHEY_SIMPLEX font_size = 1.1 font_color = BLACK font_thickness = 2 x, y = 30, 30 vis_imgs = cv2.putText(vis_imgs, img_test, (x, y), font, font_size, font_color, font_thickness, cv2.LINE_AA) cv2.imwrite(os.path.join(imgs_res_folder, 'results.png'), vis_imgs)
file_obj["target"] = Target def compute_performance(prediction, target, data): try: dataset = data.dataset # dataloader except: dataset = data # dataset prediction = LoadData.recover_data(dataset.flow_norm[0], dataset.flow_norm[1], prediction.numpy()) target = LoadData.recover_data(dataset.flow_norm[0], dataset.flow_norm[1], target.numpy()) mae, mape, rmse = Evaluation.total(target.reshape(-1), prediction.reshape(-1)) performance = [mae, mape, rmse] recovered_data = [prediction, target] return performance, recovered_data if __name__ == '__main__': # main() visualize_result(h5_file="GAT_result.h5", nodes_id=120, time_se=[0, 24 * 12 * 2], visualize_file="gat_node_120")
x_train_minus_one = np.delete(x_train, index, axis=0) y_train_minus_one = np.delete(y_train, index, axis=0) # retrain C = 1.0 / ((train_sample_num - 1) * WEIGHT_DECAY) sklearn_model_minus_one = linear_model.LogisticRegression( C=C, fit_intercept=False, tol=1e-8, solver='lbfgs') sklearn_model_minus_one.fit(x_train_minus_one, y_train_minus_one.ravel()) print('LBFGS training took {} iter.'.format( sklearn_model_minus_one.n_iter_)) # assign w on tensorflow model w_retrain = sklearn_model_minus_one.coef_.T.ravel() tf_model.sess.run(tf_model.w_assign_op, feed_dict={tf_model.w_ph: w_retrain}) # get retrain loss test_loss_retrain = tf_model.sess.run(tf_model.loss, feed_dict=feed_dict_test) # get true loss diff loss_diff_true[i] = test_loss_retrain - test_loss_ori print('Original loss :{}'.format(test_loss_ori)) print('Retrain loss :{}'.format(test_loss_retrain)) print('True loss diff :{}'.format(loss_diff_true[i])) print('Estimated loss diff :{}'.format(loss_diff_approx[index])) visualize_result(loss_diff_true, loss_diff_approx[sample_indice])
dataset = data.dataset # 数据为dataloader型,通过它下面的属性.dataset类变成dataset型数据 except: dataset = data # 数据为dataset型,直接赋值 # 下面就是对预测和目标数据进行逆归一化,recover_data()函数在上一小节的数据处理中 # flow_norm为归一化的基,flow_norm[0]为最大值,flow_norm[1]为最小值 # prediction.numpy()和target.numpy()是需要逆归一化的数据,转换成numpy型是因为 recover_data()函数中的数据都是numpy型,保持一致 prediction = LoadData.recover_data(dataset.flow_norm[0], dataset.flow_norm[1], prediction.numpy()) target = LoadData.recover_data(dataset.flow_norm[0], dataset.flow_norm[1], target.numpy()) # 对三种评价指标写了一个类,这个类封装在另一个文件中,在后面 mae, mape, rmse = Evaluation.total( target.reshape(-1), prediction.reshape(-1)) # 变成常向量才能计算这三种指标 performance = [mae, mape, rmse] recovered_data = [prediction, target] return performance, recovered_data # 返回评价结果,以及恢复好的数据(为可视化准备的) if __name__ == '__main__': main() visualize_result( h5_file="GAT_result.h5", # 可视化,在下面的 Evaluation()类中 nodes_id=120, time_se=[0, 24 * 12 * 2], # 是节点的时间范围 visualize_file="gat_node_120")