예제 #1
0
    def define_graph(self):
        """
        Sets up the model graph in TensorFlow.
        """
        with tf.name_scope('generator'):
            ##
            # Data
            ##

            with tf.name_scope('data'):
                self.input_frames_train = tf.placeholder(
                    tf.float32, shape=[None, self.height_train, self.width_train, 3 * c.HIST_LEN])
                self.gt_frames_train = tf.placeholder(
                    tf.float32, shape=[None, self.height_train, self.width_train, 3])

                self.input_frames_test = tf.placeholder(
                    tf.float32, shape=[None, self.height_test, self.width_test, 3 * c.HIST_LEN])
                self.gt_frames_test = tf.placeholder(
                    tf.float32, shape=[None, self.height_test, self.width_test, 3])

                # use variable batch_size for more flexibility
                self.batch_size_train = tf.shape(self.input_frames_train)[0]
                self.batch_size_test = tf.shape(self.input_frames_test)[0]

            ##
            # Scale network setup and calculation
            ##

            self.summaries_train = []
            self.scale_preds_train = []  # the generated images at each scale
            self.scale_gts_train = []  # the ground truth images at each scale
            self.d_scale_preds = []  # the predictions from the discriminator model

            self.summaries_test = []
            self.scale_preds_test = []  # the generated images at each scale
            self.scale_gts_test = []  # the ground truth images at each scale

            for scale_num in range(self.num_scale_nets):
                with tf.name_scope('scale_' + str(scale_num)):
                    with tf.name_scope('setup'):
                        ws = []
                        bs = []

                        # create weights for kernels
                        for i in range(len(self.scale_kernel_sizes[scale_num])):
                            ws.append(w([self.scale_kernel_sizes[scale_num][i],
                                         self.scale_kernel_sizes[scale_num][i],
                                         self.scale_layer_fms[scale_num][i],
                                         self.scale_layer_fms[scale_num][i + 1]]))
                            bs.append(b([self.scale_layer_fms[scale_num][i + 1]]))

                    with tf.name_scope('calculation'):
                        def calculate(height, width, inputs, gts, last_gen_frames):
                            # scale inputs and gts
                            scale_factor = 1. / 2 ** ((self.num_scale_nets - 1) - scale_num)
                            scale_height = int(height * scale_factor)
                            scale_width = int(width * scale_factor)

                            inputs = tf.image.resize_images(inputs, [scale_height, scale_width])
                            scale_gts = tf.image.resize_images(gts, [scale_height, scale_width])

                            # for all scales but the first, add the frame generated by the last
                            # scale to the input
                            if scale_num > 0:
                                last_gen_frames = tf.image.resize_images(
                                    last_gen_frames,[scale_height, scale_width])
                                print("inputs: {}, frames: {}".format(inputs.shape, last_gen_frames.shape))
                                inputs = tf.concat([inputs, last_gen_frames], 3)

                            # generated frame predictions
                            preds = inputs

                            # perform convolutions
                            with tf.name_scope('convolutions'):
                                for i in range(len(self.scale_kernel_sizes[scale_num])):
                                    # Convolve layer
                                    preds = tf.nn.conv2d(
                                        preds, ws[i], [1, 1, 1, 1], padding=c.PADDING_G)

                                    # Activate with ReLU (or Tanh for last layer)
                                    if i == len(self.scale_kernel_sizes[scale_num]) - 1:
                                        preds = tf.nn.tanh(preds + bs[i])
                                    else:
                                        preds = tf.nn.relu(preds + bs[i])

                            return preds, scale_gts

                        ##
                        # Perform train calculation
                        ##

                        # for all scales but the first, add the frame generated by the last
                        # scale to the input
                        if scale_num > 0:
                            last_scale_pred_train = self.scale_preds_train[scale_num - 1]
                        else:
                            last_scale_pred_train = None

                        # calculate
                        train_preds, train_gts = calculate(self.height_train,
                                                           self.width_train,
                                                           self.input_frames_train,
                                                           self.gt_frames_train,
                                                           last_scale_pred_train)
                        self.scale_preds_train.append(train_preds)
                        self.scale_gts_train.append(train_gts)

                        # We need to run the network first to get generated frames, run the
                        # discriminator on those frames to get d_scale_preds, then run this
                        # again for the loss optimization.
                        if c.ADVERSARIAL:
                            self.d_scale_preds.append(tf.placeholder(tf.float32, [None, 1]))

                        ##
                        # Perform test calculation
                        ##

                        # for all scales but the first, add the frame generated by the last
                        # scale to the input
                        if scale_num > 0:
                            last_scale_pred_test = self.scale_preds_test[scale_num - 1]
                        else:
                            last_scale_pred_test = None

                        # calculate
                        test_preds, test_gts = calculate(self.height_test,
                                                         self.width_test,
                                                         self.input_frames_test,
                                                         self.gt_frames_test,
                                                         last_scale_pred_test)
                        self.scale_preds_test.append(test_preds)
                        self.scale_gts_test.append(test_gts)

            ##
            # Training
            ##

            with tf.name_scope('train'):
                # global loss is the combined loss from every scale network
                self.global_loss = combined_loss(self.scale_preds_train,
                                                 self.scale_gts_train,
                                                 self.d_scale_preds)
                self.global_step = tf.Variable(0, trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=c.LRATE_G, name='optimizer')
                self.train_op = self.optimizer.minimize(self.global_loss,
                                                        global_step=self.global_step,
                                                        name='train_op')

                # train loss summary
                loss_summary = tf.summary.scalar('train_loss_G', self.global_loss)
                self.summaries_train.append(loss_summary)

            ##
            # Error
            ##

            with tf.name_scope('error'):
                # error computation
                # get error at largest scale
                self.psnr_error_train = psnr_error(self.scale_preds_train[-1],
                                                   self.gt_frames_train)
                self.sharpdiff_error_train = sharp_diff_error(self.scale_preds_train[-1],
                                                              self.gt_frames_train)
                self.psnr_error_test = psnr_error(self.scale_preds_test[-1],
                                                  self.gt_frames_test)
                self.sharpdiff_error_test = sharp_diff_error(self.scale_preds_test[-1],
                                                             self.gt_frames_test)
                # train error summaries
                summary_psnr_train = tf.summary.scalar('train_PSNR',
                                                       self.psnr_error_train)
                summary_sharpdiff_train = tf.summary.scalar('train_SharpDiff',
                                                            self.sharpdiff_error_train)
                self.summaries_train += [summary_psnr_train, summary_sharpdiff_train]

                # test error
                summary_psnr_test = tf.summary.scalar('test_PSNR',
                                                      self.psnr_error_test)
                summary_sharpdiff_test = tf.summary.scalar('test_SharpDiff',
                                                           self.sharpdiff_error_test)
                self.summaries_test += [summary_psnr_test, summary_sharpdiff_test]

            # add summaries to visualize in TensorBoard
            self.summaries_train = tf.summary.merge(self.summaries_train)
            self.summaries_test = tf.summary.merge(self.summaries_test)
예제 #2
0
# define dataset
with tf.name_scope('dataset'):
    test_video_clips_tensor = tf.placeholder(shape=[1, height, width, 3 * (num_his + 1)],
                                             dtype=tf.float32)
    test_inputs = test_video_clips_tensor[..., 0:num_his*3]
    test_gt = test_video_clips_tensor[..., -3:]
    print('test inputs = {}'.format(test_inputs))
    print('test prediction gt = {}'.format(test_gt))

# define testing generator function and
# in testing, only generator networks, there is no discriminator networks and flownet.
with tf.variable_scope('generator', reuse=None):
    print('testing = {}'.format(tf.get_variable_scope().name))
    test_outputs = generator(test_inputs, layers=4, output_channel=3)
    test_psnr_error,shape = psnr_error(gen_frames=test_outputs, gt_frames=test_gt)
    print(shape,[shape])


config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
    # dataset
    data_loader = DataLoader(test_folder, height, width)

    # initialize weights
    sess.run(tf.global_variables_initializer())
    print('Init global successfully!')

    # tf saver
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=None)
예제 #3
0
def val(cfg, model=None):
    if model:  # This is for testing during training.
        generator = model
        generator.eval()
    else:
        generator = UNet(input_channels=12, output_channel=3).cuda().eval()
        generator.load_state_dict(torch.load('weights/' + cfg.trained_model)['net_g'])
        print(f'The pre-trained generator has been loaded from \'weights/{cfg.trained_model}\'.\n')

    # video_folders = os.listdir(cfg.test_data)
    # video_folders.sort()
    # video_folders = [os.path.join(cfg.test_data, aa) for aa in video_folders]
    
    with open(os.path.join(cfg.data_root, 'val_split_with_obj.txt')) as f:
        all_video_names = f.read().splitlines()
    video_folders = [os.path.join(cfg.data_root, 'frames', vid, 'images') for vid in all_video_names]
    
    fps = 0
    psnr_group = []

    if not model:
        if cfg.show_curve:
            fig = plt.figure("Image")
            manager = plt.get_current_fig_manager()
            manager.window.setGeometry(550, 200, 600, 500)
            # This works for QT backend, for other backends, check this ⬃⬃⬃.
            # https://stackoverflow.com/questions/7449585/how-do-you-set-the-absolute-position-of-figure-windows-with-matplotlib
            plt.xlabel('frames')
            plt.ylabel('psnr')
            plt.title('psnr curve')
            plt.grid(ls='--')

            cv2.namedWindow('target frames', cv2.WINDOW_NORMAL)
            cv2.resizeWindow('target frames', 384, 384)
            cv2.moveWindow("target frames", 100, 100)

        if cfg.show_heatmap:
            cv2.namedWindow('difference map', cv2.WINDOW_NORMAL)
            cv2.resizeWindow('difference map', 384, 384)
            cv2.moveWindow('difference map', 100, 550)

    # load gt labels
    gt_loader = Label_loader(cfg, video_folders)  # Get gt labels.
    gt, gt_bboxes = gt_loader()

    with torch.no_grad():
        for i, folder in tqdm(enumerate(video_folders)):
            dataset = Dataset.test_dataset(cfg, folder)
            test_dataloader = DataLoader(dataset=dataset, batch_size=cfg.batch_size,
                              shuffle=False, num_workers=cfg.batch_size)
            vid = folder.split('/')[-2]

            if not model:
                name = folder.split('/')[-1]
                fourcc = cv2.VideoWriter_fourcc('X', 'V', 'I', 'D')

                if cfg.show_curve:
                    video_writer = cv2.VideoWriter(f'results/{name}_video.avi', fourcc, 30, cfg.img_size)
                    curve_writer = cv2.VideoWriter(f'results/{name}_curve.avi', fourcc, 30, (600, 430))

                    js = []
                    plt.clf()
                    ax = plt.axes(xlim=(0, len(dataset)), ylim=(30, 45))
                    line, = ax.plot([], [], '-b')

                if cfg.show_heatmap:
                    heatmap_writer = cv2.VideoWriter(f'results/{name}_heatmap.avi', fourcc, 30, cfg.img_size)

            psnrs = []
            diff_maps = []
            # for j, clip in enumerate(dataset):
            for clip in test_dataloader:
                input_frames = clip[:, 0:12, :, :].cuda()
                target_frame = clip[:, 12:15, :, :].cuda()
                # input_np = clip[0:12, :, :]
                # target_np = clip[12:15, :, :]
                # input_frames = torch.from_numpy(input_np).unsqueeze(0).cuda()
                # target_frame = torch.from_numpy(target_np).unsqueeze(0).cuda()

                G_frame = generator(input_frames)
                '''TODO: save predicted frame or difference '''
                test_psnr = psnr_error(G_frame, target_frame, reduce_batch=False).cpu().detach().numpy()
                # NOTE: Save squred diff so that we could reuse it for differen evaluation
                square_diff = (target_frame - G_frame).pow(2).mean(dim=1).cpu().detach().numpy().astype('float16')
                diff_maps.append(square_diff)

                # psnrs.append(float(test_psnr))
                psnrs += list(test_psnr)
                
                if not model:
                    if cfg.show_curve:
                        cv2_frame = ((target_np + 1) * 127.5).transpose(1, 2, 0).astype('uint8')
                        js.append(j)
                        line.set_xdata(js)  # This keeps the existing figure and updates the X-axis and Y-axis data,
                        line.set_ydata(psnrs)  # which is faster, but still not perfect.
                        plt.pause(0.001)  # show curve

                        cv2.imshow('target frames', cv2_frame)
                        cv2.waitKey(1)  # show video

                        video_writer.write(cv2_frame)  # Write original video frames.

                        buffer = io.BytesIO()  # Write curve frames from buffer.
                        fig.canvas.print_png(buffer)
                        buffer.write(buffer.getvalue())
                        curve_img = np.array(Image.open(buffer))[..., (2, 1, 0)]
                        curve_writer.write(curve_img)

                    if cfg.show_heatmap:
                        diff_map = torch.sum(torch.abs(G_frame - target_frame).squeeze(), 0)
                        diff_map -= diff_map.min()  # Normalize to 0 ~ 255.
                        diff_map /= diff_map.max()
                        diff_map *= 255
                        diff_map = diff_map.cpu().detach().numpy().astype('uint8')
                        heat_map = cv2.applyColorMap(diff_map, cv2.COLORMAP_JET)

                        cv2.imshow('difference map', heat_map)
                        cv2.waitKey(1)

                        heatmap_writer.write(heat_map)  # Write heatmap frames.

                torch.cuda.synchronize()
                # end = time.time()
                # if j > 1:  # Compute fps by calculating the time used in one completed iteration, this is more accurate.
                #     fps = 1 / (end - temp)
                # temp = end
                # print(f'\rDetecting: [{i + 1:02d}] {j + 1}/{len(dataset)}, {fps:.2f} fps.', end='')
            diff_maps = np.concatenate(diff_maps, axis=0)
            np.save(os.path.join('saved_difference_map', vid+'.npy'), diff_maps)
            if len(psnrs) != len(gt[i]) - 4 or len(psnrs) != len(diff_maps):
                pdb.set_trace()
            psnr_group.append(np.array(psnrs))

            if not model:
                if cfg.show_curve:
                    video_writer.release()
                    curve_writer.release()
                if cfg.show_heatmap:
                    heatmap_writer.release()

    print('\nAll frames were detected, begin to compute AUC.')

    assert len(psnr_group) == len(gt), f'Ground truth has {len(gt)} videos, but got {len(psnr_group)} detected videos.'
    # save psnr
    torch.save(psnr_group, 'results/psnr_group.pth')

    scores = np.array([], dtype=np.float32)
    labels = np.array([], dtype=np.int8)
    for i in range(len(psnr_group)):
        distance = psnr_group[i]
        distance -= min(distance)  # distance = (distance - min) / (max - min)
        distance /= max(distance)

        scores = np.concatenate((scores, distance), axis=0)
        labels = np.concatenate((labels, gt[i][4:]), axis=0)  # Exclude the first 4 unpredictable frames in gt.
    
    torch.save(psnr_group, 'results/psnr_normalized.pth')
    assert scores.shape == labels.shape, \
        f'Ground truth has {labels.shape[0]} frames, but got {scores.shape[0]} detected frames.'

    fpr, tpr, thresholds = metrics.roc_curve(labels, scores, pos_label=0)
    auc = metrics.auc(fpr, tpr)
    print(f'AUC: {auc}\n')
    return auc
예제 #4
0
def train(config):
    #### set the save and log path ####
    save_path = config['save_path']
    utils.set_save_path(save_path)
    utils.set_log_path(save_path)
    writer = SummaryWriter(os.path.join(config['save_path'], 'tensorboard'))
    yaml.dump(config, open(os.path.join(config['save_path'], 'classifier_config.yaml'), 'w'))

    device = torch.device('cuda:' + args.gpu)

    #### make datasets ####
    # train
    train_folder = config['dataset_path'] + config['train_dataset_type'] + "/training/frames"
    test_folder = config['dataset_path'] + config['train_dataset_type'] + "/testing/frames"

    # Loading dataset
    train_dataset_args = config['train_dataset_args']
    test_dataset_args = config['test_dataset_args']

    train_dataset = VadDataset(args,video_folder= train_folder, bbox_folder = config['train_bboxes_path'], flow_folder=config['train_flow_path'],
                            transform=transforms.Compose([transforms.ToTensor()]),
                            resize_height=train_dataset_args['h'], resize_width=train_dataset_args['w'],
                               dataset=config['train_dataset_type'], time_step=train_dataset_args['t_length'] - 1,
                               device=device)

    test_dataset = VadDataset(args,video_folder= test_folder, bbox_folder = config['test_bboxes_path'], flow_folder=config['test_flow_path'],
                            transform=transforms.Compose([transforms.ToTensor()]),
                            resize_height=train_dataset_args['h'], resize_width=train_dataset_args['w'],
                               dataset=config['train_dataset_type'], time_step=train_dataset_args['t_length'] - 1,
                               device=device)


    train_dataloader = DataLoader(train_dataset, batch_size=train_dataset_args['batch_size'],
                                  shuffle=True, num_workers=train_dataset_args['num_workers'], drop_last=True)
    test_dataloader = DataLoader(test_dataset, batch_size=test_dataset_args['batch_size'],
                                 shuffle=False, num_workers=test_dataset_args['num_workers'], drop_last=False)

    # for test---- prepare labels
    labels = np.load('./data/frame_labels_' + config['test_dataset_type'] + '.npy')
    if config['test_dataset_type'] == 'shanghai':
        labels = np.expand_dims(labels, 0)
    videos = OrderedDict()
    videos_list = sorted(glob.glob(os.path.join(test_folder, '*')))
    labels_list = []
    label_length = 0
    psnr_list = {}
    for video in sorted(videos_list):
        video_name = video.split('/')[-1]
        videos[video_name] = {}
        videos[video_name]['path'] = video
        videos[video_name]['frame'] = glob.glob(os.path.join(video, '*.jpg'))
        videos[video_name]['frame'].sort()
        videos[video_name]['length'] = len(videos[video_name]['frame'])
        labels_list = np.append(labels_list, labels[0][4 + label_length:videos[video_name]['length'] + label_length])
        label_length += videos[video_name]['length']
        psnr_list[video_name] = []

    # Model setting
    num_unet_layers = 4
    discriminator_num_filters = [128, 256, 512, 512]

    # for gradient loss
    alpha = 1
    # for int loss
    l_num = 2
    pretrain = False

    if config['generator'] == 'cycle_generator_convlstm':
        ngf = 64
        netG = 'resnet_6blocks'
        norm = 'instance'
        no_dropout = False
        init_type = 'normal'
        init_gain = 0.02
        gpu_ids = []
        model = define_G(train_dataset_args['c'], train_dataset_args['c'],
                             ngf, netG, norm, not no_dropout, init_type, init_gain, gpu_ids)
    elif config['generator'] == 'unet':
        # generator = UNet(n_channels=train_dataset_args['c']*(train_dataset_args['t_length']-1),
        #                  layer_nums=num_unet_layers, output_channel=train_dataset_args['c'])
        model = PreAE(train_dataset_args['c'], train_dataset_args['t_length'], **config['model_args'])
    else:
        raise Exception('The generator is not implemented')

    # generator = torch.load('save/avenue_cycle_generator_convlstm_flownet2_0103/generator-epoch-199.pth')
    if config['use_D']:
        discriminator=PixelDiscriminator(train_dataset_args['c'],discriminator_num_filters,use_norm=False)
        optimizer_D = torch.optim.Adam(discriminator.parameters(),lr=0.00002)

    # optimizer setting
    params_encoder = list(model.parameters())
    params_decoder = list(model.parameters())
    params = params_encoder + params_decoder
    optimizer_G, lr_scheduler = utils.make_optimizer(
        params, config['optimizer'], config['optimizer_args'])    


    # set loss, different range with the source version, should change
    lam_int = 1.0 * 2
    lam_gd = 1.0 * 2
    # TODO here we use no flow loss
    # lam_op = 0  # 2.0
    # op_loss = Flow_Loss()
    
    adversarial_loss = Adversarial_Loss()
    # TODO if use adv
    lam_adv = 0.05
    discriminate_loss = Discriminate_Loss()
    alpha = 1
    l_num = 2
    gd_loss = Gradient_Loss(alpha, train_dataset_args['c'])    
    int_loss = Intensity_Loss(l_num)
    object_loss = ObjectLoss(device, l_num)

    # parallel if muti-gpus
    if torch.cuda.is_available():
        model.cuda()
        if config['use_D']:
            discriminator.cuda()
    if config.get('_parallel'):
        model = nn.DataParallel(model)
        if config['use_D']:
            discriminator = nn.DataParallel(discriminator)
    # Training
    utils.log('Start train')
    max_frame_AUC, max_roi_AUC = 0,0
    base_channel_num  = train_dataset_args['c'] * (train_dataset_args['t_length'] - 1)
    save_epoch = 5 if config['save_epoch'] is None else config['save_epoch']
    for epoch in range(config['epochs']):

        model.train()
        for j, (imgs, bbox, flow) in enumerate(tqdm(train_dataloader, desc='train', leave=False)):
            imgs = imgs.cuda()
            flow = flow.cuda()
            # input = imgs[:, :-1, ].view(imgs.shape[0], -1, imgs.shape[-2], imgs.shape[-1])
            input = imgs[:, :-1, ]
            target = imgs[:, -1, ]
            outputs = model(input)

            if config['use_D']:
                g_adv_loss = adversarial_loss(discriminator(outputs))
            else:
                g_adv_loss = 0 

            g_object_loss = object_loss(outputs, target, flow, bbox)
            # g_int_loss = int_loss(outputs, target)
            g_gd_loss = gd_loss(outputs, target)
            g_loss = lam_adv * g_adv_loss + lam_gd * g_gd_loss + lam_int * g_object_loss

            optimizer_G.zero_grad()
            g_loss.backward()

            optimizer_G.step()

            train_psnr = utils.psnr_error(outputs,target)

            # ----------- update optim_D -------
            if config['use_D']:
                optimizer_D.zero_grad()
                d_loss = discriminate_loss(discriminator(target), discriminator(outputs.detach()))
                d_loss.backward()
                optimizer_D.step()
        lr_scheduler.step()

        utils.log('----------------------------------------')
        utils.log('Epoch:' + str(epoch + 1))
        utils.log('----------------------------------------')
        utils.log('Loss: Reconstruction {:.6f}'.format(g_loss.item()))

        # Testing
        utils.log('Evaluation of ' + config['test_dataset_type'])   


        # Save the model
        if epoch % save_epoch == 0 or epoch == config['epochs'] - 1:
            if not os.path.exists(save_path):
                os.makedirs(save_path) 
            if not os.path.exists(os.path.join(save_path, "models")):
                os.makedirs(os.path.join(save_path, "models")) 
            # TODO 
            frame_AUC = ObjectLoss_evaluate(test_dataloader, model, labels_list, videos, dataset=config['test_dataset_type'],device = device,
                frame_height = train_dataset_args['h'], frame_width=train_dataset_args['w'],
                is_visual=False, mask_labels_path = config['mask_labels_path'], save_path = os.path.join(save_path, "./final"), labels_dict=labels) 
            
            torch.save(model.state_dict(), os.path.join(save_path, 'models/model-epoch-{}.pth'.format(epoch)))
            if config['use_D']:
                torch.save(discriminator.state_dict(), os.path.join(save_path, 'models/discrominator-epoch-{}.pth'.format(epoch)))
        else:
            frame_AUC = ObjectLoss_evaluate(test_dataloader, model, labels_list, videos, dataset=config['test_dataset_type'],device=device,
                frame_height = train_dataset_args['h'], frame_width=train_dataset_args['w']) 

        utils.log('The result of ' + config['test_dataset_type'])
        utils.log("AUC: {}%".format(frame_AUC*100))

        if frame_AUC > max_frame_AUC:
            max_frame_AUC = frame_AUC
            # TODO
            torch.save(model.state_dict(), os.path.join(save_path, 'models/max-frame_auc-model.pth'))
            if config['use_D']:
                torch.save(discriminator.state_dict(), os.path.join(save_path, 'models/discrominator-epoch-{}.pth'.format(epoch)))
            # evaluate(test_dataloader, model, labels_list, videos, int_loss, config['test_dataset_type'], test_bboxes=config['test_bboxes'],
            #     frame_height = train_dataset_args['h'], frame_width=train_dataset_args['w'], 
            #     is_visual=True, mask_labels_path = config['mask_labels_path'], save_path = os.path.join(save_path, "./frame_best"), labels_dict=labels) 
        
        utils.log('----------------------------------------')

    utils.log('Training is finished')
    utils.log('max_frame_AUC: {}'.format(max_frame_AUC))
예제 #5
0
    test_it = test_dataset.make_one_shot_iterator()
    test_videos_clips_tensor = test_it.get_next()
    test_videos_clips_tensor.set_shape(
        [batch_size, height, width, 3 * (num_his + 1)])

    test_inputs = test_videos_clips_tensor[..., 0:num_his * 3]
    test_gt = test_videos_clips_tensor[..., -3:]

    print('test inputs = {}'.format(test_inputs))
    print('test prediction gt = {}'.format(test_gt))

# define training generator function
with tf.variable_scope('generator', reuse=None):
    print('training = {}'.format(tf.get_variable_scope().name))
    train_outputs = generator(train_inputs, layers=4, output_channel=3)
    train_psnr_error = psnr_error(gen_frames=train_outputs, gt_frames=train_gt)

# define testing generator function
with tf.variable_scope('generator', reuse=True):
    print('testing = {}'.format(tf.get_variable_scope().name))
    test_outputs = generator(test_inputs, layers=4, output_channel=3)
    test_psnr_error = psnr_error(gen_frames=test_outputs, gt_frames=test_gt)

# define intensity loss
if lam_lp != 0:
    lp_loss = intensity_loss(gen_frames=train_outputs,
                             gt_frames=train_gt,
                             l_num=l_num)
else:
    lp_loss = tf.constant(0.0, dtype=tf.float32)
def val(cfg, model=None):
    if model:
        test_folder = cfg.test_folder
        print("The test folder", test_folder)
    else:
        model_path = '/project/bo/exp_data/FFP/%s_%d/' % (cfg.dataset_type,
                                                          cfg.version)
        ckpt_path = model_path + "model-%d.pth" % cfg.ckpt_step
        if cfg.dataset_augment_test_type != "frames/testing/" and "venue" in cfg.dataset_type:
            rain_type = str(
                cfg.dataset_augment_test_type.strip().split('_')[0])
            brightness = int(
                cfg.dataset_augment_test_type.strip().split('_')[-1]) / 10
            data_dir = cfg.dataset_path + "Avenue/frames/%s_testing/bright_%.2f/" % (
                rain_type, brightness)
            if not os.path.exists(data_dir):
                aug_data.save_avenue_rain_or_bright(cfg.dataset_path,
                                                    rain_type,
                                                    True,
                                                    "testing",
                                                    bright_space=brightness)
        else:
            data_dir = cfg.dataset_path + '/%s/%s/' % (
                "Avenue", cfg.dataset_augment_test_type)
            rain_type = "original"
            brightness = 1.0
        test_folder = data_dir
        orig_stdout = sys.stdout
        f = open(
            os.path.join(
                model_path,
                'output_rain_%s_bright_%s.txt' % (rain_type, brightness)), 'w')
        sys.stdout = f
        cfg.gt = np.load('/project/bo/anomaly_data/Avenue/gt_label.npy',
                         allow_pickle=True)

    if model:  # This is for testing during training.
        generator = model
        generator.eval()
    else:
        generator = UNet(input_channels=12, output_channel=3).cuda().eval()
        generator.load_state_dict(torch.load(ckpt_path)['net_g'])
        #         generator.load_state_dict(torch.load('weights/' + cfg.trained_model)['net_g'])
        print("The pre-trained generator has been loaded from", ckpt_path)
#         print(f'The pre-trained generator has been loaded from \'weights/{cfg.trained_model}\'.\n')
    videos = {}
    videos, video_string = input_utils.setup(test_folder, videos)

    fps = 0
    psnr_group = []

    if not model:
        if cfg.show_curve:
            fig = plt.figure("Image")
            manager = plt.get_current_fig_manager()
            manager.window.setGeometry(550, 200, 600, 500)
            # This works for QT backend, for other backends, check this ⬃⬃⬃.
            # https://stackoverflow.com/questions/7449585/how-do-you-set-the-absolute-position-of-figure-windows-with-matplotlib
            plt.xlabel('frames')
            plt.ylabel('psnr')
            plt.title('psnr curve')
            plt.grid(ls='--')

            cv2.namedWindow('target frames', cv2.WINDOW_NORMAL)
            cv2.resizeWindow('target frames', 384, 384)
            cv2.moveWindow("target frames", 100, 100)

        if cfg.show_heatmap:
            cv2.namedWindow('difference map', cv2.WINDOW_NORMAL)
            cv2.resizeWindow('difference map', 384, 384)
            cv2.moveWindow('difference map', 100, 550)

    with torch.no_grad():
        for i, folder in enumerate(video_string):

            if not model:
                name = folder.split('/')[-1]
                fourcc = cv2.VideoWriter_fourcc('X', 'V', 'I', 'D')

                if cfg.show_curve:
                    video_writer = cv2.VideoWriter(f'results/{name}_video.avi',
                                                   fourcc, 30, cfg.img_size)
                    curve_writer = cv2.VideoWriter(f'results/{name}_curve.avi',
                                                   fourcc, 30, (600, 430))

                    js = []
                    plt.clf()
                    ax = plt.axes(xlim=(0, len(dataset)), ylim=(30, 45))
                    line, = ax.plot([], [], '-b')

                if cfg.show_heatmap:
                    heatmap_writer = cv2.VideoWriter(
                        f'results/{name}_heatmap.avi', fourcc, 30,
                        cfg.img_size)

            psnrs = []
            dataset = input_utils.test_dataset(videos[folder]['frame'],
                                               [imh, imw])
            print("Start video %s with %d frames...................." %
                  (folder, len(dataset)))
            psnrs = []
            for j, clip in enumerate(dataset):
                input_np = clip[0:12, :, :]
                target_np = clip[12:15, :, :]
                input_frames = torch.from_numpy(input_np).unsqueeze(0).cuda()
                target_frame = torch.from_numpy(target_np).unsqueeze(0).cuda()

                G_frame = generator(input_frames)
                test_psnr = psnr_error(G_frame,
                                       target_frame).cpu().detach().numpy()
                psnrs.append(float(test_psnr))

                if not model:
                    if cfg.show_curve:
                        cv2_frame = ((target_np + 1) * 127.5).transpose(
                            1, 2, 0).astype('uint8')
                        js.append(j)
                        line.set_xdata(
                            js
                        )  # This keeps the existing figure and updates the X-axis and Y-axis data,
                        line.set_ydata(
                            psnrs)  # which is faster, but still not perfect.
                        plt.pause(0.001)  # show curve

                        cv2.imshow('target frames', cv2_frame)
                        cv2.waitKey(1)  # show video

                        video_writer.write(
                            cv2_frame)  # Write original video frames.

                        buffer = io.BytesIO(
                        )  # Write curve frames from buffer.
                        fig.canvas.print_png(buffer)
                        buffer.write(buffer.getvalue())
                        curve_img = np.array(Image.open(buffer))[...,
                                                                 (2, 1, 0)]
                        curve_writer.write(curve_img)

                    if cfg.show_heatmap:
                        diff_map = torch.sum(
                            torch.abs(G_frame - target_frame).squeeze(), 0)
                        diff_map -= diff_map.min()  # Normalize to 0 ~ 255.
                        diff_map /= diff_map.max()
                        diff_map *= 255
                        diff_map = diff_map.cpu().detach().numpy().astype(
                            'uint8')
                        heat_map = cv2.applyColorMap(diff_map,
                                                     cv2.COLORMAP_JET)

                        cv2.imshow('difference map', heat_map)
                        cv2.waitKey(1)

                        heatmap_writer.write(heat_map)  # Write heatmap frames.

                torch.cuda.synchronize()
                end = time.time()
                if j > 1:  # Compute fps by calculating the time used in one completed iteration, this is more accurate.
                    fps = 1 / (end - temp)
                temp = end


#                 print(f'\rDetecting: [{i + 1:02d}] {j + 1}/{len(dataset)}, {fps:.2f} fps.', end='')

            psnr_group.append(np.array(psnrs))

            if not model:
                if cfg.show_curve:
                    video_writer.release()
                    curve_writer.release()
                if cfg.show_heatmap:
                    heatmap_writer.release()

    print('\nAll frames were detected, begin to compute AUC.')

    auc = give_score(psnr_group, cfg.gt)
    if not model:
        sys.stdout = orig_stdout
        f.close()

    return auc
예제 #7
0
def train(config):
    #### set the save and log path ####
    svname = args.name
    if svname is None:
        svname = config['train_dataset_type'] + '_' + config[
            'generator'] + '_' + config['flow_model']

    if args.tag is not None:
        svname += '_' + args.tag
    save_path = os.path.join('./save', svname)
    utils.set_save_path(save_path)
    utils.set_log_path(save_path)
    writer = SummaryWriter(os.path.join(save_path, 'tensorboard'))
    yaml.dump(config,
              open(os.path.join(save_path, 'classifier_config.yaml'), 'w'))

    #### make datasets ####
    # train
    train_folder = config['dataset_path'] + config[
        'train_dataset_type'] + "/training/frames"
    test_folder = config['dataset_path'] + config[
        'train_dataset_type'] + "/testing/frames"

    # Loading dataset
    train_dataset_args = config['train_dataset_args']
    test_dataset_args = config['test_dataset_args']
    train_dataset = VadDataset(train_folder,
                               transforms.Compose([
                                   transforms.ToTensor(),
                               ]),
                               resize_height=train_dataset_args['h'],
                               resize_width=train_dataset_args['w'],
                               time_step=train_dataset_args['t_length'] - 1)

    test_dataset = VadDataset(test_folder,
                              transforms.Compose([
                                  transforms.ToTensor(),
                              ]),
                              resize_height=test_dataset_args['h'],
                              resize_width=test_dataset_args['w'],
                              time_step=test_dataset_args['t_length'] - 1)

    train_dataloader = DataLoader(
        train_dataset,
        batch_size=train_dataset_args['batch_size'],
        shuffle=True,
        num_workers=train_dataset_args['num_workers'],
        drop_last=True)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=test_dataset_args['batch_size'],
                                 shuffle=False,
                                 num_workers=test_dataset_args['num_workers'],
                                 drop_last=False)

    # for test---- prepare labels
    labels = np.load('./data/frame_labels_' + config['test_dataset_type'] +
                     '.npy')
    if config['test_dataset_type'] == 'shanghai':
        labels = np.expand_dims(labels, 0)
    videos = OrderedDict()
    videos_list = sorted(glob.glob(os.path.join(test_folder, '*')))
    labels_list = []
    label_length = 0
    psnr_list = {}
    for video in sorted(videos_list):
        # video_name = video.split('/')[-1]

        # windows
        video_name = os.path.split(video)[-1]
        videos[video_name] = {}
        videos[video_name]['path'] = video
        videos[video_name]['frame'] = glob.glob(os.path.join(video, '*.jpg'))
        videos[video_name]['frame'].sort()
        videos[video_name]['length'] = len(videos[video_name]['frame'])
        labels_list = np.append(
            labels_list,
            labels[0][4 + label_length:videos[video_name]['length'] +
                      label_length])
        label_length += videos[video_name]['length']
        psnr_list[video_name] = []

    # Model setting
    num_unet_layers = 4
    discriminator_num_filters = [128, 256, 512, 512]

    # for gradient loss
    alpha = 1
    # for int loss
    l_num = 2
    pretrain = False

    if config['generator'] == 'cycle_generator_convlstm':
        ngf = 64
        netG = 'resnet_6blocks'
        norm = 'instance'
        no_dropout = False
        init_type = 'normal'
        init_gain = 0.02
        gpu_ids = []
        generator = define_G(train_dataset_args['c'], train_dataset_args['c'],
                             ngf, netG, norm, not no_dropout, init_type,
                             init_gain, gpu_ids)
    elif config['generator'] == 'unet':
        # generator = UNet(n_channels=train_dataset_args['c']*(train_dataset_args['t_length']-1),
        #                  layer_nums=num_unet_layers, output_channel=train_dataset_args['c'])
        model = PreAE(train_dataset_args['c'], train_dataset_args['t_length'],
                      **config['model_args'])
    else:
        raise Exception('The generator is not implemented')

    # generator = torch.load('save/avenue_cycle_generator_convlstm_flownet2_0103/generator-epoch-199.pth')

    discriminator = PixelDiscriminator(train_dataset_args['c'],
                                       discriminator_num_filters,
                                       use_norm=False)
    # discriminator = torch.load('save/avenue_cycle_generator_convlstm_flownet2_0103/discriminator-epoch-199.pth')

    # if not pretrain:
    #     generator.apply(weights_init_normal)
    #     discriminator.apply(weights_init_normal)

    # if use flownet
    # if config['flow_model'] == 'flownet2':
    #     flownet2SD_model_path = 'flownet2/FlowNet2_checkpoint.pth.tar'
    #     flow_network = FlowNet2(args).eval()
    #     flow_network.load_state_dict(torch.load(flownet2SD_model_path)['state_dict'])
    # elif config['flow_model'] == 'liteflownet':
    #     lite_flow_model_path = 'liteFlownet/network-sintel.pytorch'
    #     flow_network = Network().eval()
    #     flow_network.load_state_dict(torch.load(lite_flow_model_path))

    # different range with the source version, should change
    lam_int = 1.0 * 2
    lam_gd = 1.0 * 2
    # here we use no flow loss
    lam_op = 0  # 2.0
    lam_adv = 0.05
    adversarial_loss = Adversarial_Loss()
    discriminate_loss = Discriminate_Loss()
    gd_loss = Gradient_Loss(alpha, train_dataset_args['c'])
    op_loss = Flow_Loss()
    int_loss = Intensity_Loss(l_num)
    step = 0

    utils.log('initializing the model with Generator-Unet {} layers,'
              'PixelDiscriminator with filters {} '.format(
                  num_unet_layers, discriminator_num_filters))

    g_lr = 0.0002
    d_lr = 0.00002
    optimizer_G = torch.optim.Adam(generator.parameters(), lr=g_lr)
    optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=d_lr)

    # # optimizer setting
    # params_encoder = list(generator.encoder.parameters())
    # params_decoder = list(generator.decoder.parameters())
    # params = params_encoder + params_decoder
    # optimizer, lr_scheduler = utils.make_optimizer(
    #     params, config['optimizer'], config['optimizer_args'])
    #
    # loss_func_mse = nn.MSELoss(reduction='none')

    # parallel if muti-gpus
    if torch.cuda.is_available():
        generator.cuda()
        discriminator.cuda()
        # # if use flownet
        # flow_network.cuda()
        adversarial_loss.cuda()
        discriminate_loss.cuda()
        gd_loss.cuda()
        op_loss.cuda()
        int_loss.cuda()

    if config.get('_parallel'):
        generator = nn.DataParallel(generator)
        discriminator = nn.DataParallel(discriminator)
        # if use flownet
        # flow_network = nn.DataParallel(flow_network)
        adversarial_loss = nn.DataParallel(adversarial_loss)
        discriminate_loss = nn.DataParallel(discriminate_loss)
        gd_loss = nn.DataParallel(gd_loss)
        op_loss = nn.DataParallel(op_loss)
        int_loss = nn.DataParallel(int_loss)

    # Training
    utils.log('Start train')
    max_accuracy = 0
    base_channel_num = train_dataset_args['c'] * (
        train_dataset_args['t_length'] - 1)
    save_epoch = 5 if config['save_epoch'] is None else config['save_epoch']
    for epoch in range(config['epochs']):

        generator.train()
        for j, imgs in enumerate(
                tqdm(train_dataloader, desc='train', leave=False)):
            imgs = imgs.cuda()
            input = imgs[:, :-1, ]
            input_last = input[:, -1, ]
            target = imgs[:, -1, ]
            # input = input.view(input.shape[0], -1, input.shape[-2],input.shape[-1])

            # only for debug
            # input0=imgs[:, 0,]
            # input1 = imgs[:, 1, ]
            # gt_flow_esti_tensor = torch.cat([input0, input1], 1)
            # flow_gt = batch_estimate(gt_flow_esti_tensor, flow_network)[0]
            # objectOutput = open('./out_train.flo', 'wb')
            # np.array([80, 73, 69, 72], np.uint8).tofile(objectOutput)
            # np.array([flow_gt.size(2), flow_gt.size(1)], np.int32).tofile(objectOutput)
            # np.array(flow_gt.detach().cpu().numpy().transpose(1, 2, 0), np.float32).tofile(objectOutput)
            # objectOutput.close()
            # break

            # ------- update optim_G --------------
            outputs = generator(input)
            # pred_flow_tensor = torch.cat([input_last, outputs], 1)
            # gt_flow_tensor = torch.cat([input_last, target], 1)
            # flow_pred = batch_estimate(pred_flow_tensor, flow_network)
            # flow_gt = batch_estimate(gt_flow_tensor, flow_network)

            # if you want to use flownet2SD, comment out the part in front

            # #### if use flownet ####
            # pred_flow_esti_tensor = torch.cat([input_last.view(-1,3,1,input.shape[-2],input.shape[-1]),
            #                                    outputs.view(-1,3,1,input.shape[-2],input.shape[-1])], 2)
            # gt_flow_esti_tensor = torch.cat([input_last.view(-1,3,1,input.shape[-2],input.shape[-1]),
            #                                  target.view(-1,3,1,input.shape[-2],input.shape[-1])], 2)

            # flow_gt=flow_network(gt_flow_esti_tensor*255.0)
            # flow_pred=flow_network(pred_flow_esti_tensor*255.0)
            ##############################
            # g_op_loss = op_loss(flow_pred, flow_gt) ## flow loss
            g_op_loss = 0
            g_adv_loss = adversarial_loss(discriminator(outputs))

            g_int_loss = int_loss(outputs, target)
            g_gd_loss = gd_loss(outputs, target)
            g_loss = lam_adv * g_adv_loss + lam_gd * g_gd_loss + lam_op * g_op_loss + lam_int * g_int_loss

            optimizer_G.zero_grad()
            g_loss.backward()
            optimizer_G.step()

            train_psnr = utils.psnr_error(outputs, target)

            # ----------- update optim_D -------
            optimizer_D.zero_grad()
            d_loss = discriminate_loss(discriminator(target),
                                       discriminator(outputs.detach()))
            d_loss.backward()
            optimizer_D.step()
            # break
        # lr_scheduler.step()

        utils.log('----------------------------------------')
        utils.log('Epoch:' + str(epoch + 1))
        utils.log('----------------------------------------')
        utils.log("g_loss: {} d_loss {}".format(g_loss, d_loss))
        utils.log('\t gd_loss {}, op_loss {}, int_loss {} ,'.format(
            g_gd_loss, g_op_loss, g_int_loss))
        utils.log('\t train psnr{}'.format(train_psnr))

        # Testing
        utils.log('Evaluation of ' + config['test_dataset_type'])
        for video in sorted(videos_list):
            # video_name = video.split('/')[-1]
            video_name = os.path.split(video)[-1]
            psnr_list[video_name] = []

        generator.eval()
        video_num = 0
        # label_length += videos[videos_list[video_num].split('/')[-1]]['length']
        label_length = videos[os.path.split(
            videos_list[video_num])[1]]['length']
        for k, imgs in enumerate(
                tqdm(test_dataloader, desc='test', leave=False)):
            if k == label_length - 4 * (video_num + 1):
                video_num += 1
                label_length += videos[os.path.split(
                    videos_list[video_num])[1]]['length']
            imgs = imgs.cuda()
            input = imgs[:, :-1, ]
            target = imgs[:, -1, ]
            # input = input.view(input.shape[0], -1, input.shape[-2], input.shape[-1])

            outputs = generator(input)
            mse_imgs = int_loss((outputs + 1) / 2, (target + 1) / 2).item()
            # psnr_list[videos_list[video_num].split('/')[-1]].append(utils.psnr(mse_imgs))
            psnr_list[os.path.split(videos_list[video_num])[1]].append(
                utils.psnr(mse_imgs))

        # Measuring the abnormality score and the AUC
        anomaly_score_total_list = []
        for video in sorted(videos_list):
            # video_name = video.split('/')[-1]
            video_name = os.path.split(video)[1]
            anomaly_score_total_list += utils.anomaly_score_list(
                psnr_list[video_name])

        anomaly_score_total_list = np.asarray(anomaly_score_total_list)
        accuracy = utils.AUC(anomaly_score_total_list,
                             np.expand_dims(1 - labels_list, 0))

        utils.log('The result of ' + config['test_dataset_type'])
        utils.log('AUC: ' + str(accuracy * 100) + '%')

        # Save the model
        if epoch % save_epoch == 0 or epoch == config['epochs'] - 1:
            # torch.save(model, os.path.join(
            #     save_path, 'model-epoch-{}.pth'.format(epoch)))

            torch.save(
                generator,
                os.path.join(save_path,
                             'generator-epoch-{}.pth'.format(epoch)))
            torch.save(
                discriminator,
                os.path.join(save_path,
                             'discriminator-epoch-{}.pth'.format(epoch)))

        if accuracy > max_accuracy:
            torch.save(generator, os.path.join(save_path, 'generator-max'))
            torch.save(discriminator,
                       os.path.join(save_path, 'discriminator-max'))

        utils.log('----------------------------------------')

    utils.log('Training is finished')
예제 #8
0
    def define_graph(self):
        """
        Sets up the model graph in TensorFlow.
        """
        with tf.name_scope('generator'):
            ##
            # Data
            ##

            with tf.name_scope('data'):
                self.inputs = tf.placeholder(tf.float32, shape=[None, 6])
                self.gt_frames = tf.placeholder(
                    tf.float32, shape=[None, self.height, self.width, 3])

                # use variable batch_size for more flexibility
                self.batch_size = tf.shape(self.inputs)[0]

            ##
            # Scale network setup and calculation
            ##

            self.summaries = []
            self.scale_preds = []  # the generated images at each scale
            self.scale_gts = []  # the ground truth images at each scale
            self.d_scale_preds = [
            ]  # the predictions from the discriminator model

            for scale_num in xrange(self.num_scale_nets):
                with tf.name_scope('scale_' + str(scale_num)):
                    with tf.name_scope('setup'):
                        with tf.name_scope('fully-connected'):
                            fc_ws = []
                            fc_bs = []

                            # create weights for fc layers
                            for i in xrange(
                                    len(self.scale_fc_layer_sizes[scale_num]) -
                                    1):
                                fc_ws.append(
                                    w([
                                        self.scale_fc_layer_sizes[scale_num]
                                        [i],
                                        self.scale_fc_layer_sizes[scale_num][i
                                                                             +
                                                                             1]
                                    ]))
                                fc_bs.append(
                                    b([
                                        self.scale_fc_layer_sizes[scale_num][i
                                                                             +
                                                                             1]
                                    ]))

                        with tf.name_scope('convolutions'):
                            conv_ws = []
                            conv_bs = []

                            # create weights for kernels
                            for i in xrange(
                                    len(self.scale_kernel_sizes[scale_num])):
                                conv_ws.append(
                                    w([
                                        self.scale_kernel_sizes[scale_num][i],
                                        self.scale_kernel_sizes[scale_num][i],
                                        self.scale_conv_layer_fms[scale_num]
                                        [i],
                                        self.scale_conv_layer_fms[scale_num][i
                                                                             +
                                                                             1]
                                    ]))
                                conv_bs.append(
                                    b([
                                        self.scale_conv_layer_fms[scale_num][i
                                                                             +
                                                                             1]
                                    ]))

                    with tf.name_scope('calculation'):

                        def calculate(height, width, inputs, gts,
                                      last_gen_frames):
                            # scale inputs and gts
                            scale_factor = 1. / 2**(
                                (self.num_scale_nets - 1) - scale_num)
                            scale_height = int(height * scale_factor)
                            scale_width = int(width * scale_factor)

                            scale_gts = tf.image.resize_images(
                                gts, scale_height, scale_width)

                            # for all scales but the first, add the frame generated by the last
                            # scale to the input
                            # if scale_num > 0:
                            #     last_gen_frames = tf.image.resize_images(last_gen_frames,
                            #                                              scale_height,
                            #                                              scale_width)
                            #     inputs = tf.concat(3, [inputs, last_gen_frames])

                            # generated frame predictions
                            preds = inputs

                            # perform fc multiplications
                            with tf.name_scope('fully-connected'):
                                for i in xrange(
                                        len(self.
                                            scale_fc_layer_sizes[scale_num]) -
                                        1):
                                    preds = tf.nn.relu(
                                        tf.matmul(preds, fc_ws[i]) + fc_bs[i])

                                # reshape for convolutions
                                preds = tf.reshape(preds, [
                                    -1, c.FRAME_HEIGHT, c.FRAME_WIDTH,
                                    self.scale_conv_layer_fms[scale_num][0]
                                ])

                            # perform convolutions
                            with tf.name_scope('convolutions'):
                                for i in xrange(
                                        len(self.scale_kernel_sizes[scale_num])
                                ):
                                    # Convolve layer
                                    preds = tf.nn.conv2d(preds,
                                                         conv_ws[i],
                                                         [1, 1, 1, 1],
                                                         padding=c.PADDING_G)

                                    # Activate with ReLU (or Tanh for last layer)
                                    if i == len(
                                            self.scale_kernel_sizes[scale_num]
                                    ) - 1:
                                        preds = tf.nn.tanh(preds + conv_bs[i])
                                    else:
                                        preds = tf.nn.relu(preds + conv_bs[i])

                            return preds, scale_gts

                        ##
                        # Perform train calculation
                        ##

                        # for all scales but the first, add the frame generated by the last
                        # scale to the input
                        if scale_num > 0:
                            last_scale_pred = self.scale_preds[scale_num - 1]
                        else:
                            last_scale_pred = None

                        # calculate
                        train_preds, train_gts = calculate(
                            self.height, self.width, self.inputs,
                            self.gt_frames, last_scale_pred)
                        self.scale_preds.append(train_preds)
                        self.scale_gts.append(train_gts)

                        # We need to run the network first to get generated frames, run the
                        # discriminator on those frames to get d_scale_preds, then run this
                        # again for the loss optimization.
                        if c.ADVERSARIAL:
                            self.d_scale_preds.append(
                                tf.placeholder(tf.float32, [None, 1]))

            ##
            # Training
            ##

            with tf.name_scope('train'):
                # global loss is the combined loss from every scale network
                self.global_loss = combined_loss(self.scale_preds,
                                                 self.scale_gts,
                                                 self.d_scale_preds)
                self.global_step = tf.Variable(0, trainable=False)
                self.optimizer = tf.train.AdamOptimizer(
                    learning_rate=c.LRATE_G, name='optimizer')
                self.train_op = self.optimizer.minimize(
                    self.global_loss,
                    global_step=self.global_step,
                    name='train_op')

                # train loss summary
                loss_summary = tf.scalar_summary('train_loss_G',
                                                 self.global_loss)
                self.summaries.append(loss_summary)

            ##
            # Error
            ##

            with tf.name_scope('error'):
                # error computation
                # get error at largest scale
                self.psnr_error = psnr_error(self.scale_preds[-1],
                                             self.gt_frames)
                self.sharpdiff_error = sharp_diff_error(
                    self.scale_preds[-1], self.gt_frames)

                # train error summaries
                summary_psnr = tf.scalar_summary('train_PSNR', self.psnr_error)
                summary_sharpdiff = tf.scalar_summary('train_SharpDiff',
                                                      self.sharpdiff_error)
                self.summaries += [summary_psnr, summary_sharpdiff]

            # add summaries to visualize in TensorBoard
            self.summaries = tf.merge_summary(self.summaries)
예제 #9
0
    def define_graph(self, discriminator):
        """
        Sets up the model graph in TensorFlow.

        @param discriminator: The discriminator model that discriminates frames generated by this
                              model.
        """
        with tf.name_scope('generator'):
            ##
            # Data
            ##

            with tf.name_scope('input'):
                self.input_frames_train = tf.placeholder(
                    tf.float32,
                    shape=[
                        None, self.height_train, self.width_train,
                        3 * c.HIST_LEN
                    ],
                    name='input_frames_train')
                self.gt_frames_train = tf.placeholder(tf.float32,
                                                      shape=[
                                                          None,
                                                          self.height_train,
                                                          self.width_train,
                                                          3 * c.GT_LEN
                                                      ],
                                                      name='gt_frames_train')

                self.input_frames_test = tf.placeholder(
                    tf.float32,
                    shape=[
                        None, self.height_test, self.width_test, 3 * c.HIST_LEN
                    ],
                    name='input_frames_test')
                self.gt_frames_test = tf.placeholder(tf.float32,
                                                     shape=[
                                                         None,
                                                         self.height_test,
                                                         self.width_test,
                                                         3 * c.GT_LEN
                                                     ],
                                                     name='gt_frames_test')

                # use variable batch_size for more flexibility
                with tf.name_scope('batch_size_train'):
                    self.batch_size_train = tf.shape(
                        self.input_frames_train,
                        name='input_frames_train_shape')[0]
                with tf.name_scope('batch_size_test'):
                    self.batch_size_test = tf.shape(
                        self.input_frames_test,
                        name='input_frames_test_shape')[0]

            ##
            # Scale network setup and calculation
            ##

            self.train_vars = [
            ]  # the variables to train in the optimization step

            self.summaries_train = []
            self.scale_preds_train = []  # the generated images at each scale
            self.scale_gts_train = []  # the ground truth images at each scale
            self.d_scale_preds = [
            ]  # the predictions from the discriminator model

            self.summaries_test = []
            self.scale_preds_test = []  # the generated images at each scale
            self.scale_gts_test = []  # the ground truth images at each scale

            self.ws = []
            self.bs = []
            for scale_num in xrange(self.num_scale_nets):
                with tf.name_scope('scale_net_' + str(scale_num)):
                    with tf.name_scope('setup'):
                        scale_ws = []
                        scale_bs = []

                        # create weights for kernels
                        with tf.name_scope('weights'):
                            for i in xrange(
                                    len(self.scale_kernel_sizes[scale_num])):
                                scale_ws.append(
                                    w([
                                        self.scale_kernel_sizes[scale_num][i],
                                        self.scale_kernel_sizes[scale_num][i],
                                        self.scale_layer_fms[scale_num][i],
                                        self.scale_layer_fms[scale_num][i + 1]
                                    ], 'gen_' + str(scale_num) + '_' + str(i)))

                        with tf.name_scope('biases'):
                            for i in xrange(
                                    len(self.scale_kernel_sizes[scale_num])):
                                scale_bs.append(
                                    b([self.scale_layer_fms[scale_num][i + 1]
                                       ]))

                        # add to trainable parameters
                        self.train_vars += scale_ws
                        self.train_vars += scale_bs

                        self.ws.append(scale_ws)
                        self.bs.append(scale_bs)

                    with tf.name_scope('calculation'):
                        with tf.name_scope('calculation_train'):
                            ##
                            # Perform train calculation
                            ##
                            if scale_num > 0:
                                last_scale_pred_train = self.scale_preds_train[
                                    scale_num - 1]
                            else:
                                last_scale_pred_train = None

                            train_preds, train_gts = self.generate_predictions(
                                scale_num, self.height_train, self.width_train,
                                self.input_frames_train, self.gt_frames_train,
                                last_scale_pred_train)

                        with tf.name_scope('calculation_test'):
                            ##
                            # Perform test calculation
                            if scale_num > 0:
                                last_scale_pred_test = self.scale_preds_test[
                                    scale_num - 1]
                            else:
                                last_scale_pred_test = None

                            test_preds, test_gts = self.generate_predictions(
                                scale_num, self.height_test, self.width_test,
                                self.input_frames_test, self.gt_frames_test,
                                last_scale_pred_test, 'test')

                        self.scale_preds_train.append(train_preds)
                        self.scale_gts_train.append(train_gts)

                        self.scale_preds_test.append(test_preds)
                        self.scale_gts_test.append(test_gts)

            ##
            # Get Discriminator Predictions
            ##

            if c.ADVERSARIAL:
                with tf.name_scope('d_preds'):
                    # A list of the prediction tensors for each scale network
                    self.d_scale_preds = []

                    for scale_num in xrange(self.num_scale_nets):
                        with tf.name_scope('scale_' + str(scale_num)):
                            with tf.name_scope('calculation'):
                                input_scale_factor = 1. / self.scale_gt_inverse_scale_factor[
                                    scale_num]
                                input_scale_height = int(self.height_train *
                                                         input_scale_factor)
                                input_scale_width = int(self.width_train *
                                                        input_scale_factor)

                                scale_inputs_train = tf.image.resize_images(
                                    self.input_frames_train,
                                    [input_scale_height, input_scale_width])

                                # get predictions from the d scale networks
                                self.d_scale_preds.append(
                                    discriminator.scale_nets[scale_num].
                                    generate_all_predictions(
                                        scale_inputs_train,
                                        self.scale_preds_train[scale_num]))

            ##
            # Training
            ##

            with tf.name_scope('training'):
                # global loss is the combined loss from every scale network
                self.global_loss = temporal_combined_loss(
                    self.scale_preds_train, self.scale_gts_train,
                    self.d_scale_preds)

                with tf.name_scope('train_step'):
                    self.global_step = tf.Variable(0,
                                                   trainable=False,
                                                   name='global_step')
                    self.optimizer = tf.train.AdamOptimizer(
                        learning_rate=c.LRATE_G, name='optimizer')

                    self.train_op = self.optimizer.minimize(
                        self.global_loss,
                        global_step=self.global_step,
                        var_list=self.train_vars,
                        name='train_op')

                    # train loss summary
                    loss_summary = tf.summary.scalar('train_loss_G',
                                                     self.global_loss)
                    self.summaries_train.append(loss_summary)

            ##
            # Error
            ##

            with tf.name_scope('error'):
                # error computation
                # get error at largest scale
                with tf.name_scope('psnr_train'):
                    self.psnr_error_train = []
                    for gt_num in xrange(c.GT_LEN):
                        self.psnr_error_train.append(
                            psnr_error(
                                self.scale_preds_train[-1][:, :, :, gt_num *
                                                           3:(gt_num + 1) * 3],
                                self.gt_frames_train[:, :, :, gt_num *
                                                     3:(gt_num + 1) * 3]))
                with tf.name_scope('sharpdiff_train'):
                    self.sharpdiff_error_train = []
                    for gt_num in xrange(c.GT_LEN):
                        self.sharpdiff_error_train.append(
                            sharp_diff_error(
                                self.scale_preds_train[-1][:, :, :, gt_num *
                                                           3:(gt_num + 1) * 3],
                                self.gt_frames_train[:, :, :, gt_num *
                                                     3:(gt_num + 1) * 3]))
                with tf.name_scope('ssim_train'):
                    self.ssim_error_train = []
                    for gt_num in xrange(c.GT_LEN):
                        self.ssim_error_train.append(
                            ssim_error(
                                self.scale_preds_train[-1][:, :, :, gt_num *
                                                           3:(gt_num + 1) * 3],
                                self.gt_frames_train[:, :, :, gt_num *
                                                     3:(gt_num + 1) * 3]))
                with tf.name_scope('psnr_test'):
                    self.psnr_error_test = []
                    for gt_num in xrange(c.GT_LEN):
                        self.psnr_error_test.append(
                            psnr_error(
                                self.scale_preds_test[-1][:, :, :, gt_num *
                                                          3:(gt_num + 1) * 3],
                                self.gt_frames_test[:, :, :, gt_num *
                                                    3:(gt_num + 1) * 3]))
                with tf.name_scope('sharpdiff_test'):
                    self.sharpdiff_error_test = []
                    for gt_num in xrange(c.GT_LEN):
                        self.sharpdiff_error_test.append(
                            sharp_diff_error(
                                self.scale_preds_test[-1][:, :, :, gt_num *
                                                          3:(gt_num + 1) * 3],
                                self.gt_frames_test[:, :, :, gt_num *
                                                    3:(gt_num + 1) * 3]))
                with tf.name_scope('ssim_test'):
                    self.ssim_error_test = []
                    for gt_num in xrange(c.GT_LEN):
                        self.ssim_error_test.append(
                            ssim_error(
                                self.scale_preds_test[-1][:, :, :, gt_num *
                                                          3:(gt_num + 1) * 3],
                                self.gt_frames_test[:, :, :, gt_num *
                                                    3:(gt_num + 1) * 3]))
                for gt_num in xrange(c.GT_LEN):
                    # train error summaries
                    summary_psnr_train = tf.summary.scalar(
                        'train_PSNR_' + str(gt_num),
                        self.psnr_error_train[gt_num])
                    summary_sharpdiff_train = tf.summary.scalar(
                        'train_SharpDiff_' + str(gt_num),
                        self.sharpdiff_error_train[gt_num])
                    summary_ssim_train = tf.summary.scalar(
                        'train_SSIM_' + str(gt_num),
                        self.ssim_error_train[gt_num])
                    self.summaries_train += [
                        summary_psnr_train, summary_sharpdiff_train,
                        summary_ssim_train
                    ]

                    # test error summaries
                    summary_psnr_test = tf.summary.scalar(
                        'test_PSNR_' + str(gt_num),
                        self.psnr_error_test[gt_num])
                    summary_sharpdiff_test = tf.summary.scalar(
                        'test_SharpDiff_' + str(gt_num),
                        self.sharpdiff_error_test[gt_num])
                    summary_ssim_test = tf.summary.scalar(
                        'test_SSIM_' + str(gt_num),
                        self.ssim_error_test[gt_num])
                    self.summaries_test += [
                        summary_psnr_test, summary_sharpdiff_test,
                        summary_ssim_test
                    ]

            # add summaries to visualize in TensorBoard
            self.summaries_train = tf.summary.merge(self.summaries_train)
            self.summaries_test = tf.summary.merge(self.summaries_test)