def define_graph(self): """ Sets up the model graph in TensorFlow. """ with tf.name_scope('generator'): ## # Data ## with tf.name_scope('data'): self.input_frames_train = tf.placeholder( tf.float32, shape=[None, self.height_train, self.width_train, 3 * c.HIST_LEN]) self.gt_frames_train = tf.placeholder( tf.float32, shape=[None, self.height_train, self.width_train, 3]) self.input_frames_test = tf.placeholder( tf.float32, shape=[None, self.height_test, self.width_test, 3 * c.HIST_LEN]) self.gt_frames_test = tf.placeholder( tf.float32, shape=[None, self.height_test, self.width_test, 3]) # use variable batch_size for more flexibility self.batch_size_train = tf.shape(self.input_frames_train)[0] self.batch_size_test = tf.shape(self.input_frames_test)[0] ## # Scale network setup and calculation ## self.summaries_train = [] self.scale_preds_train = [] # the generated images at each scale self.scale_gts_train = [] # the ground truth images at each scale self.d_scale_preds = [] # the predictions from the discriminator model self.summaries_test = [] self.scale_preds_test = [] # the generated images at each scale self.scale_gts_test = [] # the ground truth images at each scale for scale_num in range(self.num_scale_nets): with tf.name_scope('scale_' + str(scale_num)): with tf.name_scope('setup'): ws = [] bs = [] # create weights for kernels for i in range(len(self.scale_kernel_sizes[scale_num])): ws.append(w([self.scale_kernel_sizes[scale_num][i], self.scale_kernel_sizes[scale_num][i], self.scale_layer_fms[scale_num][i], self.scale_layer_fms[scale_num][i + 1]])) bs.append(b([self.scale_layer_fms[scale_num][i + 1]])) with tf.name_scope('calculation'): def calculate(height, width, inputs, gts, last_gen_frames): # scale inputs and gts scale_factor = 1. / 2 ** ((self.num_scale_nets - 1) - scale_num) scale_height = int(height * scale_factor) scale_width = int(width * scale_factor) inputs = tf.image.resize_images(inputs, [scale_height, scale_width]) scale_gts = tf.image.resize_images(gts, [scale_height, scale_width]) # for all scales but the first, add the frame generated by the last # scale to the input if scale_num > 0: last_gen_frames = tf.image.resize_images( last_gen_frames,[scale_height, scale_width]) print("inputs: {}, frames: {}".format(inputs.shape, last_gen_frames.shape)) inputs = tf.concat([inputs, last_gen_frames], 3) # generated frame predictions preds = inputs # perform convolutions with tf.name_scope('convolutions'): for i in range(len(self.scale_kernel_sizes[scale_num])): # Convolve layer preds = tf.nn.conv2d( preds, ws[i], [1, 1, 1, 1], padding=c.PADDING_G) # Activate with ReLU (or Tanh for last layer) if i == len(self.scale_kernel_sizes[scale_num]) - 1: preds = tf.nn.tanh(preds + bs[i]) else: preds = tf.nn.relu(preds + bs[i]) return preds, scale_gts ## # Perform train calculation ## # for all scales but the first, add the frame generated by the last # scale to the input if scale_num > 0: last_scale_pred_train = self.scale_preds_train[scale_num - 1] else: last_scale_pred_train = None # calculate train_preds, train_gts = calculate(self.height_train, self.width_train, self.input_frames_train, self.gt_frames_train, last_scale_pred_train) self.scale_preds_train.append(train_preds) self.scale_gts_train.append(train_gts) # We need to run the network first to get generated frames, run the # discriminator on those frames to get d_scale_preds, then run this # again for the loss optimization. if c.ADVERSARIAL: self.d_scale_preds.append(tf.placeholder(tf.float32, [None, 1])) ## # Perform test calculation ## # for all scales but the first, add the frame generated by the last # scale to the input if scale_num > 0: last_scale_pred_test = self.scale_preds_test[scale_num - 1] else: last_scale_pred_test = None # calculate test_preds, test_gts = calculate(self.height_test, self.width_test, self.input_frames_test, self.gt_frames_test, last_scale_pred_test) self.scale_preds_test.append(test_preds) self.scale_gts_test.append(test_gts) ## # Training ## with tf.name_scope('train'): # global loss is the combined loss from every scale network self.global_loss = combined_loss(self.scale_preds_train, self.scale_gts_train, self.d_scale_preds) self.global_step = tf.Variable(0, trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=c.LRATE_G, name='optimizer') self.train_op = self.optimizer.minimize(self.global_loss, global_step=self.global_step, name='train_op') # train loss summary loss_summary = tf.summary.scalar('train_loss_G', self.global_loss) self.summaries_train.append(loss_summary) ## # Error ## with tf.name_scope('error'): # error computation # get error at largest scale self.psnr_error_train = psnr_error(self.scale_preds_train[-1], self.gt_frames_train) self.sharpdiff_error_train = sharp_diff_error(self.scale_preds_train[-1], self.gt_frames_train) self.psnr_error_test = psnr_error(self.scale_preds_test[-1], self.gt_frames_test) self.sharpdiff_error_test = sharp_diff_error(self.scale_preds_test[-1], self.gt_frames_test) # train error summaries summary_psnr_train = tf.summary.scalar('train_PSNR', self.psnr_error_train) summary_sharpdiff_train = tf.summary.scalar('train_SharpDiff', self.sharpdiff_error_train) self.summaries_train += [summary_psnr_train, summary_sharpdiff_train] # test error summary_psnr_test = tf.summary.scalar('test_PSNR', self.psnr_error_test) summary_sharpdiff_test = tf.summary.scalar('test_SharpDiff', self.sharpdiff_error_test) self.summaries_test += [summary_psnr_test, summary_sharpdiff_test] # add summaries to visualize in TensorBoard self.summaries_train = tf.summary.merge(self.summaries_train) self.summaries_test = tf.summary.merge(self.summaries_test)
# define dataset with tf.name_scope('dataset'): test_video_clips_tensor = tf.placeholder(shape=[1, height, width, 3 * (num_his + 1)], dtype=tf.float32) test_inputs = test_video_clips_tensor[..., 0:num_his*3] test_gt = test_video_clips_tensor[..., -3:] print('test inputs = {}'.format(test_inputs)) print('test prediction gt = {}'.format(test_gt)) # define testing generator function and # in testing, only generator networks, there is no discriminator networks and flownet. with tf.variable_scope('generator', reuse=None): print('testing = {}'.format(tf.get_variable_scope().name)) test_outputs = generator(test_inputs, layers=4, output_channel=3) test_psnr_error,shape = psnr_error(gen_frames=test_outputs, gt_frames=test_gt) print(shape,[shape]) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # dataset data_loader = DataLoader(test_folder, height, width) # initialize weights sess.run(tf.global_variables_initializer()) print('Init global successfully!') # tf saver saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=None)
def val(cfg, model=None): if model: # This is for testing during training. generator = model generator.eval() else: generator = UNet(input_channels=12, output_channel=3).cuda().eval() generator.load_state_dict(torch.load('weights/' + cfg.trained_model)['net_g']) print(f'The pre-trained generator has been loaded from \'weights/{cfg.trained_model}\'.\n') # video_folders = os.listdir(cfg.test_data) # video_folders.sort() # video_folders = [os.path.join(cfg.test_data, aa) for aa in video_folders] with open(os.path.join(cfg.data_root, 'val_split_with_obj.txt')) as f: all_video_names = f.read().splitlines() video_folders = [os.path.join(cfg.data_root, 'frames', vid, 'images') for vid in all_video_names] fps = 0 psnr_group = [] if not model: if cfg.show_curve: fig = plt.figure("Image") manager = plt.get_current_fig_manager() manager.window.setGeometry(550, 200, 600, 500) # This works for QT backend, for other backends, check this ⬃⬃⬃. # https://stackoverflow.com/questions/7449585/how-do-you-set-the-absolute-position-of-figure-windows-with-matplotlib plt.xlabel('frames') plt.ylabel('psnr') plt.title('psnr curve') plt.grid(ls='--') cv2.namedWindow('target frames', cv2.WINDOW_NORMAL) cv2.resizeWindow('target frames', 384, 384) cv2.moveWindow("target frames", 100, 100) if cfg.show_heatmap: cv2.namedWindow('difference map', cv2.WINDOW_NORMAL) cv2.resizeWindow('difference map', 384, 384) cv2.moveWindow('difference map', 100, 550) # load gt labels gt_loader = Label_loader(cfg, video_folders) # Get gt labels. gt, gt_bboxes = gt_loader() with torch.no_grad(): for i, folder in tqdm(enumerate(video_folders)): dataset = Dataset.test_dataset(cfg, folder) test_dataloader = DataLoader(dataset=dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=cfg.batch_size) vid = folder.split('/')[-2] if not model: name = folder.split('/')[-1] fourcc = cv2.VideoWriter_fourcc('X', 'V', 'I', 'D') if cfg.show_curve: video_writer = cv2.VideoWriter(f'results/{name}_video.avi', fourcc, 30, cfg.img_size) curve_writer = cv2.VideoWriter(f'results/{name}_curve.avi', fourcc, 30, (600, 430)) js = [] plt.clf() ax = plt.axes(xlim=(0, len(dataset)), ylim=(30, 45)) line, = ax.plot([], [], '-b') if cfg.show_heatmap: heatmap_writer = cv2.VideoWriter(f'results/{name}_heatmap.avi', fourcc, 30, cfg.img_size) psnrs = [] diff_maps = [] # for j, clip in enumerate(dataset): for clip in test_dataloader: input_frames = clip[:, 0:12, :, :].cuda() target_frame = clip[:, 12:15, :, :].cuda() # input_np = clip[0:12, :, :] # target_np = clip[12:15, :, :] # input_frames = torch.from_numpy(input_np).unsqueeze(0).cuda() # target_frame = torch.from_numpy(target_np).unsqueeze(0).cuda() G_frame = generator(input_frames) '''TODO: save predicted frame or difference ''' test_psnr = psnr_error(G_frame, target_frame, reduce_batch=False).cpu().detach().numpy() # NOTE: Save squred diff so that we could reuse it for differen evaluation square_diff = (target_frame - G_frame).pow(2).mean(dim=1).cpu().detach().numpy().astype('float16') diff_maps.append(square_diff) # psnrs.append(float(test_psnr)) psnrs += list(test_psnr) if not model: if cfg.show_curve: cv2_frame = ((target_np + 1) * 127.5).transpose(1, 2, 0).astype('uint8') js.append(j) line.set_xdata(js) # This keeps the existing figure and updates the X-axis and Y-axis data, line.set_ydata(psnrs) # which is faster, but still not perfect. plt.pause(0.001) # show curve cv2.imshow('target frames', cv2_frame) cv2.waitKey(1) # show video video_writer.write(cv2_frame) # Write original video frames. buffer = io.BytesIO() # Write curve frames from buffer. fig.canvas.print_png(buffer) buffer.write(buffer.getvalue()) curve_img = np.array(Image.open(buffer))[..., (2, 1, 0)] curve_writer.write(curve_img) if cfg.show_heatmap: diff_map = torch.sum(torch.abs(G_frame - target_frame).squeeze(), 0) diff_map -= diff_map.min() # Normalize to 0 ~ 255. diff_map /= diff_map.max() diff_map *= 255 diff_map = diff_map.cpu().detach().numpy().astype('uint8') heat_map = cv2.applyColorMap(diff_map, cv2.COLORMAP_JET) cv2.imshow('difference map', heat_map) cv2.waitKey(1) heatmap_writer.write(heat_map) # Write heatmap frames. torch.cuda.synchronize() # end = time.time() # if j > 1: # Compute fps by calculating the time used in one completed iteration, this is more accurate. # fps = 1 / (end - temp) # temp = end # print(f'\rDetecting: [{i + 1:02d}] {j + 1}/{len(dataset)}, {fps:.2f} fps.', end='') diff_maps = np.concatenate(diff_maps, axis=0) np.save(os.path.join('saved_difference_map', vid+'.npy'), diff_maps) if len(psnrs) != len(gt[i]) - 4 or len(psnrs) != len(diff_maps): pdb.set_trace() psnr_group.append(np.array(psnrs)) if not model: if cfg.show_curve: video_writer.release() curve_writer.release() if cfg.show_heatmap: heatmap_writer.release() print('\nAll frames were detected, begin to compute AUC.') assert len(psnr_group) == len(gt), f'Ground truth has {len(gt)} videos, but got {len(psnr_group)} detected videos.' # save psnr torch.save(psnr_group, 'results/psnr_group.pth') scores = np.array([], dtype=np.float32) labels = np.array([], dtype=np.int8) for i in range(len(psnr_group)): distance = psnr_group[i] distance -= min(distance) # distance = (distance - min) / (max - min) distance /= max(distance) scores = np.concatenate((scores, distance), axis=0) labels = np.concatenate((labels, gt[i][4:]), axis=0) # Exclude the first 4 unpredictable frames in gt. torch.save(psnr_group, 'results/psnr_normalized.pth') assert scores.shape == labels.shape, \ f'Ground truth has {labels.shape[0]} frames, but got {scores.shape[0]} detected frames.' fpr, tpr, thresholds = metrics.roc_curve(labels, scores, pos_label=0) auc = metrics.auc(fpr, tpr) print(f'AUC: {auc}\n') return auc
def train(config): #### set the save and log path #### save_path = config['save_path'] utils.set_save_path(save_path) utils.set_log_path(save_path) writer = SummaryWriter(os.path.join(config['save_path'], 'tensorboard')) yaml.dump(config, open(os.path.join(config['save_path'], 'classifier_config.yaml'), 'w')) device = torch.device('cuda:' + args.gpu) #### make datasets #### # train train_folder = config['dataset_path'] + config['train_dataset_type'] + "/training/frames" test_folder = config['dataset_path'] + config['train_dataset_type'] + "/testing/frames" # Loading dataset train_dataset_args = config['train_dataset_args'] test_dataset_args = config['test_dataset_args'] train_dataset = VadDataset(args,video_folder= train_folder, bbox_folder = config['train_bboxes_path'], flow_folder=config['train_flow_path'], transform=transforms.Compose([transforms.ToTensor()]), resize_height=train_dataset_args['h'], resize_width=train_dataset_args['w'], dataset=config['train_dataset_type'], time_step=train_dataset_args['t_length'] - 1, device=device) test_dataset = VadDataset(args,video_folder= test_folder, bbox_folder = config['test_bboxes_path'], flow_folder=config['test_flow_path'], transform=transforms.Compose([transforms.ToTensor()]), resize_height=train_dataset_args['h'], resize_width=train_dataset_args['w'], dataset=config['train_dataset_type'], time_step=train_dataset_args['t_length'] - 1, device=device) train_dataloader = DataLoader(train_dataset, batch_size=train_dataset_args['batch_size'], shuffle=True, num_workers=train_dataset_args['num_workers'], drop_last=True) test_dataloader = DataLoader(test_dataset, batch_size=test_dataset_args['batch_size'], shuffle=False, num_workers=test_dataset_args['num_workers'], drop_last=False) # for test---- prepare labels labels = np.load('./data/frame_labels_' + config['test_dataset_type'] + '.npy') if config['test_dataset_type'] == 'shanghai': labels = np.expand_dims(labels, 0) videos = OrderedDict() videos_list = sorted(glob.glob(os.path.join(test_folder, '*'))) labels_list = [] label_length = 0 psnr_list = {} for video in sorted(videos_list): video_name = video.split('/')[-1] videos[video_name] = {} videos[video_name]['path'] = video videos[video_name]['frame'] = glob.glob(os.path.join(video, '*.jpg')) videos[video_name]['frame'].sort() videos[video_name]['length'] = len(videos[video_name]['frame']) labels_list = np.append(labels_list, labels[0][4 + label_length:videos[video_name]['length'] + label_length]) label_length += videos[video_name]['length'] psnr_list[video_name] = [] # Model setting num_unet_layers = 4 discriminator_num_filters = [128, 256, 512, 512] # for gradient loss alpha = 1 # for int loss l_num = 2 pretrain = False if config['generator'] == 'cycle_generator_convlstm': ngf = 64 netG = 'resnet_6blocks' norm = 'instance' no_dropout = False init_type = 'normal' init_gain = 0.02 gpu_ids = [] model = define_G(train_dataset_args['c'], train_dataset_args['c'], ngf, netG, norm, not no_dropout, init_type, init_gain, gpu_ids) elif config['generator'] == 'unet': # generator = UNet(n_channels=train_dataset_args['c']*(train_dataset_args['t_length']-1), # layer_nums=num_unet_layers, output_channel=train_dataset_args['c']) model = PreAE(train_dataset_args['c'], train_dataset_args['t_length'], **config['model_args']) else: raise Exception('The generator is not implemented') # generator = torch.load('save/avenue_cycle_generator_convlstm_flownet2_0103/generator-epoch-199.pth') if config['use_D']: discriminator=PixelDiscriminator(train_dataset_args['c'],discriminator_num_filters,use_norm=False) optimizer_D = torch.optim.Adam(discriminator.parameters(),lr=0.00002) # optimizer setting params_encoder = list(model.parameters()) params_decoder = list(model.parameters()) params = params_encoder + params_decoder optimizer_G, lr_scheduler = utils.make_optimizer( params, config['optimizer'], config['optimizer_args']) # set loss, different range with the source version, should change lam_int = 1.0 * 2 lam_gd = 1.0 * 2 # TODO here we use no flow loss # lam_op = 0 # 2.0 # op_loss = Flow_Loss() adversarial_loss = Adversarial_Loss() # TODO if use adv lam_adv = 0.05 discriminate_loss = Discriminate_Loss() alpha = 1 l_num = 2 gd_loss = Gradient_Loss(alpha, train_dataset_args['c']) int_loss = Intensity_Loss(l_num) object_loss = ObjectLoss(device, l_num) # parallel if muti-gpus if torch.cuda.is_available(): model.cuda() if config['use_D']: discriminator.cuda() if config.get('_parallel'): model = nn.DataParallel(model) if config['use_D']: discriminator = nn.DataParallel(discriminator) # Training utils.log('Start train') max_frame_AUC, max_roi_AUC = 0,0 base_channel_num = train_dataset_args['c'] * (train_dataset_args['t_length'] - 1) save_epoch = 5 if config['save_epoch'] is None else config['save_epoch'] for epoch in range(config['epochs']): model.train() for j, (imgs, bbox, flow) in enumerate(tqdm(train_dataloader, desc='train', leave=False)): imgs = imgs.cuda() flow = flow.cuda() # input = imgs[:, :-1, ].view(imgs.shape[0], -1, imgs.shape[-2], imgs.shape[-1]) input = imgs[:, :-1, ] target = imgs[:, -1, ] outputs = model(input) if config['use_D']: g_adv_loss = adversarial_loss(discriminator(outputs)) else: g_adv_loss = 0 g_object_loss = object_loss(outputs, target, flow, bbox) # g_int_loss = int_loss(outputs, target) g_gd_loss = gd_loss(outputs, target) g_loss = lam_adv * g_adv_loss + lam_gd * g_gd_loss + lam_int * g_object_loss optimizer_G.zero_grad() g_loss.backward() optimizer_G.step() train_psnr = utils.psnr_error(outputs,target) # ----------- update optim_D ------- if config['use_D']: optimizer_D.zero_grad() d_loss = discriminate_loss(discriminator(target), discriminator(outputs.detach())) d_loss.backward() optimizer_D.step() lr_scheduler.step() utils.log('----------------------------------------') utils.log('Epoch:' + str(epoch + 1)) utils.log('----------------------------------------') utils.log('Loss: Reconstruction {:.6f}'.format(g_loss.item())) # Testing utils.log('Evaluation of ' + config['test_dataset_type']) # Save the model if epoch % save_epoch == 0 or epoch == config['epochs'] - 1: if not os.path.exists(save_path): os.makedirs(save_path) if not os.path.exists(os.path.join(save_path, "models")): os.makedirs(os.path.join(save_path, "models")) # TODO frame_AUC = ObjectLoss_evaluate(test_dataloader, model, labels_list, videos, dataset=config['test_dataset_type'],device = device, frame_height = train_dataset_args['h'], frame_width=train_dataset_args['w'], is_visual=False, mask_labels_path = config['mask_labels_path'], save_path = os.path.join(save_path, "./final"), labels_dict=labels) torch.save(model.state_dict(), os.path.join(save_path, 'models/model-epoch-{}.pth'.format(epoch))) if config['use_D']: torch.save(discriminator.state_dict(), os.path.join(save_path, 'models/discrominator-epoch-{}.pth'.format(epoch))) else: frame_AUC = ObjectLoss_evaluate(test_dataloader, model, labels_list, videos, dataset=config['test_dataset_type'],device=device, frame_height = train_dataset_args['h'], frame_width=train_dataset_args['w']) utils.log('The result of ' + config['test_dataset_type']) utils.log("AUC: {}%".format(frame_AUC*100)) if frame_AUC > max_frame_AUC: max_frame_AUC = frame_AUC # TODO torch.save(model.state_dict(), os.path.join(save_path, 'models/max-frame_auc-model.pth')) if config['use_D']: torch.save(discriminator.state_dict(), os.path.join(save_path, 'models/discrominator-epoch-{}.pth'.format(epoch))) # evaluate(test_dataloader, model, labels_list, videos, int_loss, config['test_dataset_type'], test_bboxes=config['test_bboxes'], # frame_height = train_dataset_args['h'], frame_width=train_dataset_args['w'], # is_visual=True, mask_labels_path = config['mask_labels_path'], save_path = os.path.join(save_path, "./frame_best"), labels_dict=labels) utils.log('----------------------------------------') utils.log('Training is finished') utils.log('max_frame_AUC: {}'.format(max_frame_AUC))
test_it = test_dataset.make_one_shot_iterator() test_videos_clips_tensor = test_it.get_next() test_videos_clips_tensor.set_shape( [batch_size, height, width, 3 * (num_his + 1)]) test_inputs = test_videos_clips_tensor[..., 0:num_his * 3] test_gt = test_videos_clips_tensor[..., -3:] print('test inputs = {}'.format(test_inputs)) print('test prediction gt = {}'.format(test_gt)) # define training generator function with tf.variable_scope('generator', reuse=None): print('training = {}'.format(tf.get_variable_scope().name)) train_outputs = generator(train_inputs, layers=4, output_channel=3) train_psnr_error = psnr_error(gen_frames=train_outputs, gt_frames=train_gt) # define testing generator function with tf.variable_scope('generator', reuse=True): print('testing = {}'.format(tf.get_variable_scope().name)) test_outputs = generator(test_inputs, layers=4, output_channel=3) test_psnr_error = psnr_error(gen_frames=test_outputs, gt_frames=test_gt) # define intensity loss if lam_lp != 0: lp_loss = intensity_loss(gen_frames=train_outputs, gt_frames=train_gt, l_num=l_num) else: lp_loss = tf.constant(0.0, dtype=tf.float32)
def val(cfg, model=None): if model: test_folder = cfg.test_folder print("The test folder", test_folder) else: model_path = '/project/bo/exp_data/FFP/%s_%d/' % (cfg.dataset_type, cfg.version) ckpt_path = model_path + "model-%d.pth" % cfg.ckpt_step if cfg.dataset_augment_test_type != "frames/testing/" and "venue" in cfg.dataset_type: rain_type = str( cfg.dataset_augment_test_type.strip().split('_')[0]) brightness = int( cfg.dataset_augment_test_type.strip().split('_')[-1]) / 10 data_dir = cfg.dataset_path + "Avenue/frames/%s_testing/bright_%.2f/" % ( rain_type, brightness) if not os.path.exists(data_dir): aug_data.save_avenue_rain_or_bright(cfg.dataset_path, rain_type, True, "testing", bright_space=brightness) else: data_dir = cfg.dataset_path + '/%s/%s/' % ( "Avenue", cfg.dataset_augment_test_type) rain_type = "original" brightness = 1.0 test_folder = data_dir orig_stdout = sys.stdout f = open( os.path.join( model_path, 'output_rain_%s_bright_%s.txt' % (rain_type, brightness)), 'w') sys.stdout = f cfg.gt = np.load('/project/bo/anomaly_data/Avenue/gt_label.npy', allow_pickle=True) if model: # This is for testing during training. generator = model generator.eval() else: generator = UNet(input_channels=12, output_channel=3).cuda().eval() generator.load_state_dict(torch.load(ckpt_path)['net_g']) # generator.load_state_dict(torch.load('weights/' + cfg.trained_model)['net_g']) print("The pre-trained generator has been loaded from", ckpt_path) # print(f'The pre-trained generator has been loaded from \'weights/{cfg.trained_model}\'.\n') videos = {} videos, video_string = input_utils.setup(test_folder, videos) fps = 0 psnr_group = [] if not model: if cfg.show_curve: fig = plt.figure("Image") manager = plt.get_current_fig_manager() manager.window.setGeometry(550, 200, 600, 500) # This works for QT backend, for other backends, check this ⬃⬃⬃. # https://stackoverflow.com/questions/7449585/how-do-you-set-the-absolute-position-of-figure-windows-with-matplotlib plt.xlabel('frames') plt.ylabel('psnr') plt.title('psnr curve') plt.grid(ls='--') cv2.namedWindow('target frames', cv2.WINDOW_NORMAL) cv2.resizeWindow('target frames', 384, 384) cv2.moveWindow("target frames", 100, 100) if cfg.show_heatmap: cv2.namedWindow('difference map', cv2.WINDOW_NORMAL) cv2.resizeWindow('difference map', 384, 384) cv2.moveWindow('difference map', 100, 550) with torch.no_grad(): for i, folder in enumerate(video_string): if not model: name = folder.split('/')[-1] fourcc = cv2.VideoWriter_fourcc('X', 'V', 'I', 'D') if cfg.show_curve: video_writer = cv2.VideoWriter(f'results/{name}_video.avi', fourcc, 30, cfg.img_size) curve_writer = cv2.VideoWriter(f'results/{name}_curve.avi', fourcc, 30, (600, 430)) js = [] plt.clf() ax = plt.axes(xlim=(0, len(dataset)), ylim=(30, 45)) line, = ax.plot([], [], '-b') if cfg.show_heatmap: heatmap_writer = cv2.VideoWriter( f'results/{name}_heatmap.avi', fourcc, 30, cfg.img_size) psnrs = [] dataset = input_utils.test_dataset(videos[folder]['frame'], [imh, imw]) print("Start video %s with %d frames...................." % (folder, len(dataset))) psnrs = [] for j, clip in enumerate(dataset): input_np = clip[0:12, :, :] target_np = clip[12:15, :, :] input_frames = torch.from_numpy(input_np).unsqueeze(0).cuda() target_frame = torch.from_numpy(target_np).unsqueeze(0).cuda() G_frame = generator(input_frames) test_psnr = psnr_error(G_frame, target_frame).cpu().detach().numpy() psnrs.append(float(test_psnr)) if not model: if cfg.show_curve: cv2_frame = ((target_np + 1) * 127.5).transpose( 1, 2, 0).astype('uint8') js.append(j) line.set_xdata( js ) # This keeps the existing figure and updates the X-axis and Y-axis data, line.set_ydata( psnrs) # which is faster, but still not perfect. plt.pause(0.001) # show curve cv2.imshow('target frames', cv2_frame) cv2.waitKey(1) # show video video_writer.write( cv2_frame) # Write original video frames. buffer = io.BytesIO( ) # Write curve frames from buffer. fig.canvas.print_png(buffer) buffer.write(buffer.getvalue()) curve_img = np.array(Image.open(buffer))[..., (2, 1, 0)] curve_writer.write(curve_img) if cfg.show_heatmap: diff_map = torch.sum( torch.abs(G_frame - target_frame).squeeze(), 0) diff_map -= diff_map.min() # Normalize to 0 ~ 255. diff_map /= diff_map.max() diff_map *= 255 diff_map = diff_map.cpu().detach().numpy().astype( 'uint8') heat_map = cv2.applyColorMap(diff_map, cv2.COLORMAP_JET) cv2.imshow('difference map', heat_map) cv2.waitKey(1) heatmap_writer.write(heat_map) # Write heatmap frames. torch.cuda.synchronize() end = time.time() if j > 1: # Compute fps by calculating the time used in one completed iteration, this is more accurate. fps = 1 / (end - temp) temp = end # print(f'\rDetecting: [{i + 1:02d}] {j + 1}/{len(dataset)}, {fps:.2f} fps.', end='') psnr_group.append(np.array(psnrs)) if not model: if cfg.show_curve: video_writer.release() curve_writer.release() if cfg.show_heatmap: heatmap_writer.release() print('\nAll frames were detected, begin to compute AUC.') auc = give_score(psnr_group, cfg.gt) if not model: sys.stdout = orig_stdout f.close() return auc
def train(config): #### set the save and log path #### svname = args.name if svname is None: svname = config['train_dataset_type'] + '_' + config[ 'generator'] + '_' + config['flow_model'] if args.tag is not None: svname += '_' + args.tag save_path = os.path.join('./save', svname) utils.set_save_path(save_path) utils.set_log_path(save_path) writer = SummaryWriter(os.path.join(save_path, 'tensorboard')) yaml.dump(config, open(os.path.join(save_path, 'classifier_config.yaml'), 'w')) #### make datasets #### # train train_folder = config['dataset_path'] + config[ 'train_dataset_type'] + "/training/frames" test_folder = config['dataset_path'] + config[ 'train_dataset_type'] + "/testing/frames" # Loading dataset train_dataset_args = config['train_dataset_args'] test_dataset_args = config['test_dataset_args'] train_dataset = VadDataset(train_folder, transforms.Compose([ transforms.ToTensor(), ]), resize_height=train_dataset_args['h'], resize_width=train_dataset_args['w'], time_step=train_dataset_args['t_length'] - 1) test_dataset = VadDataset(test_folder, transforms.Compose([ transforms.ToTensor(), ]), resize_height=test_dataset_args['h'], resize_width=test_dataset_args['w'], time_step=test_dataset_args['t_length'] - 1) train_dataloader = DataLoader( train_dataset, batch_size=train_dataset_args['batch_size'], shuffle=True, num_workers=train_dataset_args['num_workers'], drop_last=True) test_dataloader = DataLoader(test_dataset, batch_size=test_dataset_args['batch_size'], shuffle=False, num_workers=test_dataset_args['num_workers'], drop_last=False) # for test---- prepare labels labels = np.load('./data/frame_labels_' + config['test_dataset_type'] + '.npy') if config['test_dataset_type'] == 'shanghai': labels = np.expand_dims(labels, 0) videos = OrderedDict() videos_list = sorted(glob.glob(os.path.join(test_folder, '*'))) labels_list = [] label_length = 0 psnr_list = {} for video in sorted(videos_list): # video_name = video.split('/')[-1] # windows video_name = os.path.split(video)[-1] videos[video_name] = {} videos[video_name]['path'] = video videos[video_name]['frame'] = glob.glob(os.path.join(video, '*.jpg')) videos[video_name]['frame'].sort() videos[video_name]['length'] = len(videos[video_name]['frame']) labels_list = np.append( labels_list, labels[0][4 + label_length:videos[video_name]['length'] + label_length]) label_length += videos[video_name]['length'] psnr_list[video_name] = [] # Model setting num_unet_layers = 4 discriminator_num_filters = [128, 256, 512, 512] # for gradient loss alpha = 1 # for int loss l_num = 2 pretrain = False if config['generator'] == 'cycle_generator_convlstm': ngf = 64 netG = 'resnet_6blocks' norm = 'instance' no_dropout = False init_type = 'normal' init_gain = 0.02 gpu_ids = [] generator = define_G(train_dataset_args['c'], train_dataset_args['c'], ngf, netG, norm, not no_dropout, init_type, init_gain, gpu_ids) elif config['generator'] == 'unet': # generator = UNet(n_channels=train_dataset_args['c']*(train_dataset_args['t_length']-1), # layer_nums=num_unet_layers, output_channel=train_dataset_args['c']) model = PreAE(train_dataset_args['c'], train_dataset_args['t_length'], **config['model_args']) else: raise Exception('The generator is not implemented') # generator = torch.load('save/avenue_cycle_generator_convlstm_flownet2_0103/generator-epoch-199.pth') discriminator = PixelDiscriminator(train_dataset_args['c'], discriminator_num_filters, use_norm=False) # discriminator = torch.load('save/avenue_cycle_generator_convlstm_flownet2_0103/discriminator-epoch-199.pth') # if not pretrain: # generator.apply(weights_init_normal) # discriminator.apply(weights_init_normal) # if use flownet # if config['flow_model'] == 'flownet2': # flownet2SD_model_path = 'flownet2/FlowNet2_checkpoint.pth.tar' # flow_network = FlowNet2(args).eval() # flow_network.load_state_dict(torch.load(flownet2SD_model_path)['state_dict']) # elif config['flow_model'] == 'liteflownet': # lite_flow_model_path = 'liteFlownet/network-sintel.pytorch' # flow_network = Network().eval() # flow_network.load_state_dict(torch.load(lite_flow_model_path)) # different range with the source version, should change lam_int = 1.0 * 2 lam_gd = 1.0 * 2 # here we use no flow loss lam_op = 0 # 2.0 lam_adv = 0.05 adversarial_loss = Adversarial_Loss() discriminate_loss = Discriminate_Loss() gd_loss = Gradient_Loss(alpha, train_dataset_args['c']) op_loss = Flow_Loss() int_loss = Intensity_Loss(l_num) step = 0 utils.log('initializing the model with Generator-Unet {} layers,' 'PixelDiscriminator with filters {} '.format( num_unet_layers, discriminator_num_filters)) g_lr = 0.0002 d_lr = 0.00002 optimizer_G = torch.optim.Adam(generator.parameters(), lr=g_lr) optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=d_lr) # # optimizer setting # params_encoder = list(generator.encoder.parameters()) # params_decoder = list(generator.decoder.parameters()) # params = params_encoder + params_decoder # optimizer, lr_scheduler = utils.make_optimizer( # params, config['optimizer'], config['optimizer_args']) # # loss_func_mse = nn.MSELoss(reduction='none') # parallel if muti-gpus if torch.cuda.is_available(): generator.cuda() discriminator.cuda() # # if use flownet # flow_network.cuda() adversarial_loss.cuda() discriminate_loss.cuda() gd_loss.cuda() op_loss.cuda() int_loss.cuda() if config.get('_parallel'): generator = nn.DataParallel(generator) discriminator = nn.DataParallel(discriminator) # if use flownet # flow_network = nn.DataParallel(flow_network) adversarial_loss = nn.DataParallel(adversarial_loss) discriminate_loss = nn.DataParallel(discriminate_loss) gd_loss = nn.DataParallel(gd_loss) op_loss = nn.DataParallel(op_loss) int_loss = nn.DataParallel(int_loss) # Training utils.log('Start train') max_accuracy = 0 base_channel_num = train_dataset_args['c'] * ( train_dataset_args['t_length'] - 1) save_epoch = 5 if config['save_epoch'] is None else config['save_epoch'] for epoch in range(config['epochs']): generator.train() for j, imgs in enumerate( tqdm(train_dataloader, desc='train', leave=False)): imgs = imgs.cuda() input = imgs[:, :-1, ] input_last = input[:, -1, ] target = imgs[:, -1, ] # input = input.view(input.shape[0], -1, input.shape[-2],input.shape[-1]) # only for debug # input0=imgs[:, 0,] # input1 = imgs[:, 1, ] # gt_flow_esti_tensor = torch.cat([input0, input1], 1) # flow_gt = batch_estimate(gt_flow_esti_tensor, flow_network)[0] # objectOutput = open('./out_train.flo', 'wb') # np.array([80, 73, 69, 72], np.uint8).tofile(objectOutput) # np.array([flow_gt.size(2), flow_gt.size(1)], np.int32).tofile(objectOutput) # np.array(flow_gt.detach().cpu().numpy().transpose(1, 2, 0), np.float32).tofile(objectOutput) # objectOutput.close() # break # ------- update optim_G -------------- outputs = generator(input) # pred_flow_tensor = torch.cat([input_last, outputs], 1) # gt_flow_tensor = torch.cat([input_last, target], 1) # flow_pred = batch_estimate(pred_flow_tensor, flow_network) # flow_gt = batch_estimate(gt_flow_tensor, flow_network) # if you want to use flownet2SD, comment out the part in front # #### if use flownet #### # pred_flow_esti_tensor = torch.cat([input_last.view(-1,3,1,input.shape[-2],input.shape[-1]), # outputs.view(-1,3,1,input.shape[-2],input.shape[-1])], 2) # gt_flow_esti_tensor = torch.cat([input_last.view(-1,3,1,input.shape[-2],input.shape[-1]), # target.view(-1,3,1,input.shape[-2],input.shape[-1])], 2) # flow_gt=flow_network(gt_flow_esti_tensor*255.0) # flow_pred=flow_network(pred_flow_esti_tensor*255.0) ############################## # g_op_loss = op_loss(flow_pred, flow_gt) ## flow loss g_op_loss = 0 g_adv_loss = adversarial_loss(discriminator(outputs)) g_int_loss = int_loss(outputs, target) g_gd_loss = gd_loss(outputs, target) g_loss = lam_adv * g_adv_loss + lam_gd * g_gd_loss + lam_op * g_op_loss + lam_int * g_int_loss optimizer_G.zero_grad() g_loss.backward() optimizer_G.step() train_psnr = utils.psnr_error(outputs, target) # ----------- update optim_D ------- optimizer_D.zero_grad() d_loss = discriminate_loss(discriminator(target), discriminator(outputs.detach())) d_loss.backward() optimizer_D.step() # break # lr_scheduler.step() utils.log('----------------------------------------') utils.log('Epoch:' + str(epoch + 1)) utils.log('----------------------------------------') utils.log("g_loss: {} d_loss {}".format(g_loss, d_loss)) utils.log('\t gd_loss {}, op_loss {}, int_loss {} ,'.format( g_gd_loss, g_op_loss, g_int_loss)) utils.log('\t train psnr{}'.format(train_psnr)) # Testing utils.log('Evaluation of ' + config['test_dataset_type']) for video in sorted(videos_list): # video_name = video.split('/')[-1] video_name = os.path.split(video)[-1] psnr_list[video_name] = [] generator.eval() video_num = 0 # label_length += videos[videos_list[video_num].split('/')[-1]]['length'] label_length = videos[os.path.split( videos_list[video_num])[1]]['length'] for k, imgs in enumerate( tqdm(test_dataloader, desc='test', leave=False)): if k == label_length - 4 * (video_num + 1): video_num += 1 label_length += videos[os.path.split( videos_list[video_num])[1]]['length'] imgs = imgs.cuda() input = imgs[:, :-1, ] target = imgs[:, -1, ] # input = input.view(input.shape[0], -1, input.shape[-2], input.shape[-1]) outputs = generator(input) mse_imgs = int_loss((outputs + 1) / 2, (target + 1) / 2).item() # psnr_list[videos_list[video_num].split('/')[-1]].append(utils.psnr(mse_imgs)) psnr_list[os.path.split(videos_list[video_num])[1]].append( utils.psnr(mse_imgs)) # Measuring the abnormality score and the AUC anomaly_score_total_list = [] for video in sorted(videos_list): # video_name = video.split('/')[-1] video_name = os.path.split(video)[1] anomaly_score_total_list += utils.anomaly_score_list( psnr_list[video_name]) anomaly_score_total_list = np.asarray(anomaly_score_total_list) accuracy = utils.AUC(anomaly_score_total_list, np.expand_dims(1 - labels_list, 0)) utils.log('The result of ' + config['test_dataset_type']) utils.log('AUC: ' + str(accuracy * 100) + '%') # Save the model if epoch % save_epoch == 0 or epoch == config['epochs'] - 1: # torch.save(model, os.path.join( # save_path, 'model-epoch-{}.pth'.format(epoch))) torch.save( generator, os.path.join(save_path, 'generator-epoch-{}.pth'.format(epoch))) torch.save( discriminator, os.path.join(save_path, 'discriminator-epoch-{}.pth'.format(epoch))) if accuracy > max_accuracy: torch.save(generator, os.path.join(save_path, 'generator-max')) torch.save(discriminator, os.path.join(save_path, 'discriminator-max')) utils.log('----------------------------------------') utils.log('Training is finished')
def define_graph(self): """ Sets up the model graph in TensorFlow. """ with tf.name_scope('generator'): ## # Data ## with tf.name_scope('data'): self.inputs = tf.placeholder(tf.float32, shape=[None, 6]) self.gt_frames = tf.placeholder( tf.float32, shape=[None, self.height, self.width, 3]) # use variable batch_size for more flexibility self.batch_size = tf.shape(self.inputs)[0] ## # Scale network setup and calculation ## self.summaries = [] self.scale_preds = [] # the generated images at each scale self.scale_gts = [] # the ground truth images at each scale self.d_scale_preds = [ ] # the predictions from the discriminator model for scale_num in xrange(self.num_scale_nets): with tf.name_scope('scale_' + str(scale_num)): with tf.name_scope('setup'): with tf.name_scope('fully-connected'): fc_ws = [] fc_bs = [] # create weights for fc layers for i in xrange( len(self.scale_fc_layer_sizes[scale_num]) - 1): fc_ws.append( w([ self.scale_fc_layer_sizes[scale_num] [i], self.scale_fc_layer_sizes[scale_num][i + 1] ])) fc_bs.append( b([ self.scale_fc_layer_sizes[scale_num][i + 1] ])) with tf.name_scope('convolutions'): conv_ws = [] conv_bs = [] # create weights for kernels for i in xrange( len(self.scale_kernel_sizes[scale_num])): conv_ws.append( w([ self.scale_kernel_sizes[scale_num][i], self.scale_kernel_sizes[scale_num][i], self.scale_conv_layer_fms[scale_num] [i], self.scale_conv_layer_fms[scale_num][i + 1] ])) conv_bs.append( b([ self.scale_conv_layer_fms[scale_num][i + 1] ])) with tf.name_scope('calculation'): def calculate(height, width, inputs, gts, last_gen_frames): # scale inputs and gts scale_factor = 1. / 2**( (self.num_scale_nets - 1) - scale_num) scale_height = int(height * scale_factor) scale_width = int(width * scale_factor) scale_gts = tf.image.resize_images( gts, scale_height, scale_width) # for all scales but the first, add the frame generated by the last # scale to the input # if scale_num > 0: # last_gen_frames = tf.image.resize_images(last_gen_frames, # scale_height, # scale_width) # inputs = tf.concat(3, [inputs, last_gen_frames]) # generated frame predictions preds = inputs # perform fc multiplications with tf.name_scope('fully-connected'): for i in xrange( len(self. scale_fc_layer_sizes[scale_num]) - 1): preds = tf.nn.relu( tf.matmul(preds, fc_ws[i]) + fc_bs[i]) # reshape for convolutions preds = tf.reshape(preds, [ -1, c.FRAME_HEIGHT, c.FRAME_WIDTH, self.scale_conv_layer_fms[scale_num][0] ]) # perform convolutions with tf.name_scope('convolutions'): for i in xrange( len(self.scale_kernel_sizes[scale_num]) ): # Convolve layer preds = tf.nn.conv2d(preds, conv_ws[i], [1, 1, 1, 1], padding=c.PADDING_G) # Activate with ReLU (or Tanh for last layer) if i == len( self.scale_kernel_sizes[scale_num] ) - 1: preds = tf.nn.tanh(preds + conv_bs[i]) else: preds = tf.nn.relu(preds + conv_bs[i]) return preds, scale_gts ## # Perform train calculation ## # for all scales but the first, add the frame generated by the last # scale to the input if scale_num > 0: last_scale_pred = self.scale_preds[scale_num - 1] else: last_scale_pred = None # calculate train_preds, train_gts = calculate( self.height, self.width, self.inputs, self.gt_frames, last_scale_pred) self.scale_preds.append(train_preds) self.scale_gts.append(train_gts) # We need to run the network first to get generated frames, run the # discriminator on those frames to get d_scale_preds, then run this # again for the loss optimization. if c.ADVERSARIAL: self.d_scale_preds.append( tf.placeholder(tf.float32, [None, 1])) ## # Training ## with tf.name_scope('train'): # global loss is the combined loss from every scale network self.global_loss = combined_loss(self.scale_preds, self.scale_gts, self.d_scale_preds) self.global_step = tf.Variable(0, trainable=False) self.optimizer = tf.train.AdamOptimizer( learning_rate=c.LRATE_G, name='optimizer') self.train_op = self.optimizer.minimize( self.global_loss, global_step=self.global_step, name='train_op') # train loss summary loss_summary = tf.scalar_summary('train_loss_G', self.global_loss) self.summaries.append(loss_summary) ## # Error ## with tf.name_scope('error'): # error computation # get error at largest scale self.psnr_error = psnr_error(self.scale_preds[-1], self.gt_frames) self.sharpdiff_error = sharp_diff_error( self.scale_preds[-1], self.gt_frames) # train error summaries summary_psnr = tf.scalar_summary('train_PSNR', self.psnr_error) summary_sharpdiff = tf.scalar_summary('train_SharpDiff', self.sharpdiff_error) self.summaries += [summary_psnr, summary_sharpdiff] # add summaries to visualize in TensorBoard self.summaries = tf.merge_summary(self.summaries)
def define_graph(self, discriminator): """ Sets up the model graph in TensorFlow. @param discriminator: The discriminator model that discriminates frames generated by this model. """ with tf.name_scope('generator'): ## # Data ## with tf.name_scope('input'): self.input_frames_train = tf.placeholder( tf.float32, shape=[ None, self.height_train, self.width_train, 3 * c.HIST_LEN ], name='input_frames_train') self.gt_frames_train = tf.placeholder(tf.float32, shape=[ None, self.height_train, self.width_train, 3 * c.GT_LEN ], name='gt_frames_train') self.input_frames_test = tf.placeholder( tf.float32, shape=[ None, self.height_test, self.width_test, 3 * c.HIST_LEN ], name='input_frames_test') self.gt_frames_test = tf.placeholder(tf.float32, shape=[ None, self.height_test, self.width_test, 3 * c.GT_LEN ], name='gt_frames_test') # use variable batch_size for more flexibility with tf.name_scope('batch_size_train'): self.batch_size_train = tf.shape( self.input_frames_train, name='input_frames_train_shape')[0] with tf.name_scope('batch_size_test'): self.batch_size_test = tf.shape( self.input_frames_test, name='input_frames_test_shape')[0] ## # Scale network setup and calculation ## self.train_vars = [ ] # the variables to train in the optimization step self.summaries_train = [] self.scale_preds_train = [] # the generated images at each scale self.scale_gts_train = [] # the ground truth images at each scale self.d_scale_preds = [ ] # the predictions from the discriminator model self.summaries_test = [] self.scale_preds_test = [] # the generated images at each scale self.scale_gts_test = [] # the ground truth images at each scale self.ws = [] self.bs = [] for scale_num in xrange(self.num_scale_nets): with tf.name_scope('scale_net_' + str(scale_num)): with tf.name_scope('setup'): scale_ws = [] scale_bs = [] # create weights for kernels with tf.name_scope('weights'): for i in xrange( len(self.scale_kernel_sizes[scale_num])): scale_ws.append( w([ self.scale_kernel_sizes[scale_num][i], self.scale_kernel_sizes[scale_num][i], self.scale_layer_fms[scale_num][i], self.scale_layer_fms[scale_num][i + 1] ], 'gen_' + str(scale_num) + '_' + str(i))) with tf.name_scope('biases'): for i in xrange( len(self.scale_kernel_sizes[scale_num])): scale_bs.append( b([self.scale_layer_fms[scale_num][i + 1] ])) # add to trainable parameters self.train_vars += scale_ws self.train_vars += scale_bs self.ws.append(scale_ws) self.bs.append(scale_bs) with tf.name_scope('calculation'): with tf.name_scope('calculation_train'): ## # Perform train calculation ## if scale_num > 0: last_scale_pred_train = self.scale_preds_train[ scale_num - 1] else: last_scale_pred_train = None train_preds, train_gts = self.generate_predictions( scale_num, self.height_train, self.width_train, self.input_frames_train, self.gt_frames_train, last_scale_pred_train) with tf.name_scope('calculation_test'): ## # Perform test calculation if scale_num > 0: last_scale_pred_test = self.scale_preds_test[ scale_num - 1] else: last_scale_pred_test = None test_preds, test_gts = self.generate_predictions( scale_num, self.height_test, self.width_test, self.input_frames_test, self.gt_frames_test, last_scale_pred_test, 'test') self.scale_preds_train.append(train_preds) self.scale_gts_train.append(train_gts) self.scale_preds_test.append(test_preds) self.scale_gts_test.append(test_gts) ## # Get Discriminator Predictions ## if c.ADVERSARIAL: with tf.name_scope('d_preds'): # A list of the prediction tensors for each scale network self.d_scale_preds = [] for scale_num in xrange(self.num_scale_nets): with tf.name_scope('scale_' + str(scale_num)): with tf.name_scope('calculation'): input_scale_factor = 1. / self.scale_gt_inverse_scale_factor[ scale_num] input_scale_height = int(self.height_train * input_scale_factor) input_scale_width = int(self.width_train * input_scale_factor) scale_inputs_train = tf.image.resize_images( self.input_frames_train, [input_scale_height, input_scale_width]) # get predictions from the d scale networks self.d_scale_preds.append( discriminator.scale_nets[scale_num]. generate_all_predictions( scale_inputs_train, self.scale_preds_train[scale_num])) ## # Training ## with tf.name_scope('training'): # global loss is the combined loss from every scale network self.global_loss = temporal_combined_loss( self.scale_preds_train, self.scale_gts_train, self.d_scale_preds) with tf.name_scope('train_step'): self.global_step = tf.Variable(0, trainable=False, name='global_step') self.optimizer = tf.train.AdamOptimizer( learning_rate=c.LRATE_G, name='optimizer') self.train_op = self.optimizer.minimize( self.global_loss, global_step=self.global_step, var_list=self.train_vars, name='train_op') # train loss summary loss_summary = tf.summary.scalar('train_loss_G', self.global_loss) self.summaries_train.append(loss_summary) ## # Error ## with tf.name_scope('error'): # error computation # get error at largest scale with tf.name_scope('psnr_train'): self.psnr_error_train = [] for gt_num in xrange(c.GT_LEN): self.psnr_error_train.append( psnr_error( self.scale_preds_train[-1][:, :, :, gt_num * 3:(gt_num + 1) * 3], self.gt_frames_train[:, :, :, gt_num * 3:(gt_num + 1) * 3])) with tf.name_scope('sharpdiff_train'): self.sharpdiff_error_train = [] for gt_num in xrange(c.GT_LEN): self.sharpdiff_error_train.append( sharp_diff_error( self.scale_preds_train[-1][:, :, :, gt_num * 3:(gt_num + 1) * 3], self.gt_frames_train[:, :, :, gt_num * 3:(gt_num + 1) * 3])) with tf.name_scope('ssim_train'): self.ssim_error_train = [] for gt_num in xrange(c.GT_LEN): self.ssim_error_train.append( ssim_error( self.scale_preds_train[-1][:, :, :, gt_num * 3:(gt_num + 1) * 3], self.gt_frames_train[:, :, :, gt_num * 3:(gt_num + 1) * 3])) with tf.name_scope('psnr_test'): self.psnr_error_test = [] for gt_num in xrange(c.GT_LEN): self.psnr_error_test.append( psnr_error( self.scale_preds_test[-1][:, :, :, gt_num * 3:(gt_num + 1) * 3], self.gt_frames_test[:, :, :, gt_num * 3:(gt_num + 1) * 3])) with tf.name_scope('sharpdiff_test'): self.sharpdiff_error_test = [] for gt_num in xrange(c.GT_LEN): self.sharpdiff_error_test.append( sharp_diff_error( self.scale_preds_test[-1][:, :, :, gt_num * 3:(gt_num + 1) * 3], self.gt_frames_test[:, :, :, gt_num * 3:(gt_num + 1) * 3])) with tf.name_scope('ssim_test'): self.ssim_error_test = [] for gt_num in xrange(c.GT_LEN): self.ssim_error_test.append( ssim_error( self.scale_preds_test[-1][:, :, :, gt_num * 3:(gt_num + 1) * 3], self.gt_frames_test[:, :, :, gt_num * 3:(gt_num + 1) * 3])) for gt_num in xrange(c.GT_LEN): # train error summaries summary_psnr_train = tf.summary.scalar( 'train_PSNR_' + str(gt_num), self.psnr_error_train[gt_num]) summary_sharpdiff_train = tf.summary.scalar( 'train_SharpDiff_' + str(gt_num), self.sharpdiff_error_train[gt_num]) summary_ssim_train = tf.summary.scalar( 'train_SSIM_' + str(gt_num), self.ssim_error_train[gt_num]) self.summaries_train += [ summary_psnr_train, summary_sharpdiff_train, summary_ssim_train ] # test error summaries summary_psnr_test = tf.summary.scalar( 'test_PSNR_' + str(gt_num), self.psnr_error_test[gt_num]) summary_sharpdiff_test = tf.summary.scalar( 'test_SharpDiff_' + str(gt_num), self.sharpdiff_error_test[gt_num]) summary_ssim_test = tf.summary.scalar( 'test_SSIM_' + str(gt_num), self.ssim_error_test[gt_num]) self.summaries_test += [ summary_psnr_test, summary_sharpdiff_test, summary_ssim_test ] # add summaries to visualize in TensorBoard self.summaries_train = tf.summary.merge(self.summaries_train) self.summaries_test = tf.summary.merge(self.summaries_test)