Exemplo n.º 1
0
class Demo(BaseDemo):
    def __init__(self, args):
        super(Demo, self).__init__(args)
        self.model, self.model_gt = self.init_model(self.data.m_kernel)
        self.visualizer = Visualizer(args, self.data.reverse_m_dict)

    def init_model(self, m_kernel):
        self.model = Net(self.im_size, self.im_size, self.im_channel,
                         self.num_frame, m_kernel.shape[1], self.m_range,
                         self.net_depth)
        self.model_gt = GtNet(self.im_size, self.im_size, self.im_channel,
                              self.num_frame, m_kernel.shape[1], self.m_range)
        if torch.cuda.is_available():
            # model = torch.nn.DataParallel(model).cuda()
            self.model = self.model.cuda()
            self.model_gt = self.model_gt.cuda()
        if self.init_model_path is not '':
            self.model.load_state_dict(torch.load(self.init_model_path))
        return self.model, self.model_gt

    def train(self):
        optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        base_loss, train_loss = [], []
        for epoch in range(self.train_epoch):
            optimizer.zero_grad()
            if self.data.name in ['box', 'mnist', 'box_complex']:
                im, _, _, _ = self.data.get_next_batch(self.data.train_images)
            elif self.data.name in ['box2', 'mnist2']:
                im, _, _ = self.data.get_next_batch(self.data.train_images)
            elif self.data.name in [
                    'robot64', 'mpii64', 'mpi128', 'nyuv2', 'robot128',
                    'viper64', 'viper128', 'robot128c'
            ]:
                im = self.data.get_next_batch(self.data.train_images)
            else:
                logging.error('%s data not supported' % self.data.name)
                sys.exit()
            im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1,
                                                   self.im_size, self.im_size)
            im_output = im[:, -1, :, :, :]
            im_input = Variable(torch.from_numpy(im_input).float())
            im_output = Variable(torch.from_numpy(im_output).float())
            if torch.cuda.is_available():
                im_input, im_output = im_input.cuda(), im_output.cuda()
            im_pred, flow = self.model(im_input, im_output)
            flow = flow * self.im_size / 2  # resize flow from [-1, 1] back to image scale
            im_diff = im_pred - im_input[:, -self.
                                         im_channel:, :, :]  # inverse warping loss
            loss = torch.abs(im_diff).sum()
            # loss = (im_diff * im_diff).sum()
            flow_diff1 = flow[:, :, 1:, :] - flow[:, :, :-1, :]
            flow_diff2 = flow[:, :, :, 1:] - flow[:, :, :, :-1]
            loss = loss + 0.1 * (torch.abs(flow_diff1).sum() +
                                 torch.abs(flow_diff2).sum())
            # loss = loss + 0.1 * ((flow_diff1 * flow_diff1).sum() + (flow_diff2 * flow_diff2).sum())
            loss.backward()
            optimizer.step()

            train_loss.append(loss.data[0])
            if len(train_loss) > 100:
                train_loss.pop(0)
            ave_train_loss = sum(train_loss) / float(len(train_loss))
            base_loss.append(
                torch.abs(im_input[:, -self.im_channel:, :, :] -
                          im_output).sum().data[0])
            if len(base_loss) > 100:
                base_loss.pop(0)
            ave_base_loss = sum(base_loss) / float(len(base_loss))
            logging.info(
                'epoch %d, train loss: %.2f, average train loss: %.2f, base loss: %.2f',
                epoch, loss.data[0], ave_train_loss, ave_base_loss)
            if (epoch + 1) % self.test_interval == 0:
                logging.info('epoch %d, testing', epoch)
                self.validate()

    def test(self):
        base_loss, test_loss = [], []
        test_epe = []
        motion = None
        for epoch in range(self.test_epoch):
            if self.data.name in ['box', 'mnist', 'box_complex']:
                im, motion, _, _ = self.data.get_next_batch(
                    self.data.test_images)
            elif self.data.name in ['box2', 'mnist2']:
                im, motion, _ = self.data.get_next_batch(self.data.test_images)
            elif self.data.name in [
                    'robot64', 'mpii64', 'mpi128', 'nyuv2', 'robot128',
                    'viper64', 'viper128', 'robot128c'
            ]:
                im, motion = self.data.get_next_batch(
                    self.data.test_images), None
            elif self.data.name in ['mpii64_sample']:
                im, motion = self.data.get_next_batch(
                    self.data.test_images), None
                im = im[:, -self.num_frame:, :, :, :]
            else:
                logging.error('%s data not supported' % self.data.name)
                sys.exit()
            im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1,
                                                   self.im_size, self.im_size)
            im_output = im[:, -1, :, :, :]
            im_input = Variable(torch.from_numpy(im_input).float(),
                                volatile=True)
            im_output = Variable(torch.from_numpy(im_output).float(),
                                 volatile=True)
            if torch.cuda.is_available():
                im_input, im_output = im_input.cuda(), im_output.cuda()
            im_pred, flow = self.model(im_input, im_output)
            flow = flow * self.im_size / 2  # resize flow from [-1, 1] back to image scale
            im_diff = im_pred - im_input[:, -self.
                                         im_channel:, :, :]  # inverse warping loss
            loss = torch.abs(im_diff).sum()
            # loss = (im_diff * im_diff).sum()
            flow_diff1 = flow[:, :, 1:, :] - flow[:, :, :-1, :]
            flow_diff2 = flow[:, :, :, 1:] - flow[:, :, :, :-1]
            loss = loss + 0.1 * (torch.abs(flow_diff1).sum() +
                                 torch.abs(flow_diff2).sum())
            # loss = loss + 0.01 * ((flow_diff1 * flow_diff1).sum() + (flow_diff2 * flow_diff2).sum())

            test_loss.append(loss.data[0])
            base_loss.append(
                torch.abs(im_input[:, -self.im_channel:, :, :] -
                          im_output).sum().data[0])

            if motion is None:
                gt_motion = None
            else:
                gt_motion = motion[:, -2, :, :, :]
                gt_motion = Variable(torch.from_numpy(gt_motion).float())
                if torch.cuda.is_available():
                    gt_motion = gt_motion.cuda()
                epe = (flow - gt_motion) * (flow - gt_motion)
                epe = torch.sqrt(epe.sum(1))
                epe = epe.sum() / epe.numel()
                test_epe.append(epe.cpu().data[0])
            if self.display:
                self.visualizer.visualize_result(im_input, im_output, im_pred,
                                                 flow, gt_motion,
                                                 'test_%d.png' % epoch)
            if self.display_all:
                for i in range(self.batch_size):
                    self.visualizer.visualize_result(im_input, im_output,
                                                     im_pred, flow, gt_motion,
                                                     'test_%d.png' % i, i)
        test_loss = numpy.mean(numpy.asarray(test_loss))
        base_loss = numpy.mean(numpy.asarray(base_loss))
        improve_loss = base_loss - test_loss
        improve_percent = improve_loss / (base_loss + 1e-5)
        logging.info('average test loss: %.2f, base loss: %.2f', test_loss,
                     base_loss)
        logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss,
                     improve_percent)
        if motion is not None:
            test_epe = numpy.mean(numpy.asarray(test_epe))
            logging.info('average test endpoint error: %.2f', test_epe)
        return improve_percent

    def test_gt(self):
        base_loss, test_loss = [], []
        test_epe = []
        for epoch in range(self.test_epoch):
            if self.data.name in ['box', 'mnist', 'box_complex']:
                im, motion, _, _ = self.data.get_next_batch(
                    self.data.test_images)
            elif self.data.name in ['box2', 'mnist2']:
                im, motion, _ = self.data.get_next_batch(self.data.test_images)
            else:
                logging.error('%s data not supported in test_gt' %
                              self.data.name)
                sys.exit()
            im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1,
                                                   self.im_size, self.im_size)
            im_output = im[:, -1, :, :, :]
            gt_motion = motion[:, -2, :, :, :]
            im_input = Variable(torch.from_numpy(im_input).float(),
                                volatile=True)
            im_output = Variable(torch.from_numpy(im_output).float(),
                                 volatile=True)
            gt_motion = Variable(torch.from_numpy(gt_motion).float())
            if torch.cuda.is_available():
                im_input, im_output = im_input.cuda(), im_output.cuda()
                gt_motion = gt_motion.cuda()
            if self.data.name in ['box', 'mnist', 'box_complex']:
                im_pred, flow = self.model_gt(im_input, im_output, gt_motion)
            elif self.data.name in ['box2', 'mnist2']:
                im_pred, flow = self.model_gt(im_input, im_output, gt_motion)
            flow = flow * self.im_size / 2  # resize flow from [-1, 1] back to image scale
            im_diff = im_pred - im_input[:, -self.
                                         im_channel:, :, :]  # inverse warping loss
            loss = torch.abs(im_diff).sum()
            flow_diff1 = flow[:, :, 1:, :] - flow[:, :, :-1, :]
            flow_diff2 = flow[:, :, :, 1:] - flow[:, :, :, :-1]
            loss = loss + 0.1 * (torch.abs(flow_diff1).sum() +
                                 torch.abs(flow_diff2).sum())

            test_loss.append(loss.data[0])
            base_loss.append(
                torch.abs(im_input[:, -self.im_channel:, :, :] -
                          im_output).sum().data[0])
            epe = (flow - gt_motion) * (flow - gt_motion)
            epe = torch.sqrt(epe.sum(1))
            epe = epe.sum() / epe.numel()
            test_epe.append(epe.cpu().data[0])
            if self.display:
                self.visualizer.visualize_result(im_input, im_output, im_pred,
                                                 flow, gt_motion,
                                                 'test_gt.png')
            if self.display_all:
                for i in range(self.batch_size):
                    self.visualizer.visualize_result(im_input, im_output,
                                                     im_pred, flow, gt_motion,
                                                     'test_gt_%d.png' % i, i)
        test_loss = numpy.mean(numpy.asarray(test_loss))
        base_loss = numpy.mean(numpy.asarray(base_loss))
        improve_loss = base_loss - test_loss
        improve_percent = improve_loss / (base_loss + 1e-5)
        logging.info('average ground truth test loss: %.2f, base loss: %.2f',
                     test_loss, base_loss)
        logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss,
                     improve_percent)
        test_epe = numpy.mean(numpy.asarray(test_epe))
        logging.info('average ground truth test endpoint error: %.2f',
                     test_epe)
        return improve_percent
Exemplo n.º 2
0
class Demo(BaseDemo):
    def __init__(self, args):
        super(Demo, self).__init__(args)
        self.model, self.model_gt = self.init_model(self.data.m_kernel)
        self.visualizer = Visualizer(args, self.data.reverse_m_dict)

    def init_model(self, m_kernel):
        self.model = Net(self.im_size, self.im_size, self.im_channel,
                         self.num_frame - 1, m_kernel.shape[1], self.m_range,
                         m_kernel)
        self.model_gt = GtNet(self.im_size, self.im_size, self.im_channel,
                              self.num_frame - 1, m_kernel.shape[1],
                              self.m_range, m_kernel)
        if torch.cuda.is_available():
            # model = torch.nn.DataParallel(model).cuda()
            self.model = self.model.cuda()
            self.model_gt = self.model_gt.cuda()
        if self.init_model_path is not '':
            self.model.load_state_dict(torch.load(self.init_model_path))
        return self.model, self.model_gt

    def train(self):
        optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        base_loss, train_loss = [], []
        for epoch in range(self.train_epoch):
            optimizer.zero_grad()
            if self.data.name in ['box', 'mnist', 'box_complex']:
                im, _, _, _ = self.data.get_next_batch(self.data.train_images)
            elif self.data.name in ['box2', 'mnist2']:
                im, _, _ = self.data.get_next_batch(self.data.train_images)
            elif self.data.name in ['robot64', 'mpii64', 'nyuv2']:
                im = self.data.get_next_batch(self.data.train_meta)
            else:
                logging.error('%s data not supported' % self.data.name)
                sys.exit()
            im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1,
                                                   self.im_size, self.im_size)
            im_output = im[:, -1, :, :, :]
            im_input = Variable(torch.from_numpy(im_input).float())
            im_output = Variable(torch.from_numpy(im_output).float())
            if torch.cuda.is_available():
                im_input, im_output = im_input.cuda(), im_output.cuda()
            im_pred, m_mask, d_mask = self.model(im_input)
            im_diff = im_pred - im_output
            loss = torch.abs(im_diff).sum()
            loss.backward()
            optimizer.step()

            train_loss.append(loss.data[0])
            if len(train_loss) > 100:
                train_loss.pop(0)
            ave_train_loss = sum(train_loss) / float(len(train_loss))
            base_loss.append(
                torch.abs(im_input[:, -self.im_channel:, :, :] -
                          im_output).sum().data[0])
            if len(base_loss) > 100:
                base_loss.pop(0)
            ave_base_loss = sum(base_loss) / float(len(base_loss))
            logging.info(
                'epoch %d, train loss: %.2f, average train loss: %.2f, base loss: %.2f',
                epoch, loss.data[0], ave_train_loss, ave_base_loss)
            if (epoch + 1) % self.test_interval == 0:
                logging.info('epoch %d, testing', epoch)
                self.validate()

    def test(self):
        base_loss, test_loss = [], []
        test_epe = []
        for epoch in range(self.test_epoch):
            if self.data.name in ['box', 'mnist', 'box_complex']:
                im, motion, _, _ = self.data.get_next_batch(
                    self.data.test_images)
            elif self.data.name in ['box2', 'mnist2']:
                im, motion, _ = self.data.get_next_batch(self.data.test_images)
            elif self.data.name in ['robot64', 'mpii64', 'nyuv2']:
                im, motion = self.data.get_next_batch(
                    self.data.test_meta), None
            elif self.data.name in ['mpii64_sample']:
                im, motion = self.data.get_next_batch(
                    self.data.test_meta), None
                im = im[:, -self.num_frame:, :, :, :]
            else:
                logging.error('%s data not supported' % self.data.name)
                sys.exit()
            im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1,
                                                   self.im_size, self.im_size)
            im_output = im[:, -1, :, :, :]
            im_input = Variable(torch.from_numpy(im_input).float())
            im_output = Variable(torch.from_numpy(im_output).float())
            if torch.cuda.is_available():
                im_input, im_output = im_input.cuda(), im_output.cuda()
            im_pred, m_mask, d_mask = self.model(im_input)
            im_diff = im_pred - im_output
            loss = torch.abs(im_diff).sum()

            test_loss.append(loss.data[0])
            base_loss.append(
                torch.abs(im_input[:, -self.im_channel:, :, :] -
                          im_output).sum().data[0])
            flow = self.motion2flow(m_mask)
            depth = self.mask2depth(d_mask)

            if motion is None:
                gt_motion = None
            else:
                gt_motion = motion[:, -2, :, :, :]
                gt_motion = Variable(torch.from_numpy(gt_motion).float())
                if torch.cuda.is_available():
                    gt_motion = gt_motion.cuda()
                epe = (flow - gt_motion) * (flow - gt_motion)
                epe = torch.sqrt(epe.sum(1))
                epe = epe.sum() / epe.numel()
                test_epe.append(epe.cpu().data[0])
            if self.display:
                self.visualizer.visualize_result(im_input, im_output, im_pred,
                                                 flow, gt_motion, depth,
                                                 'test_%d.png' % epoch)
            if self.display_all:
                for i in range(self.batch_size):
                    self.visualizer.visualize_result(im_input, im_output,
                                                     im_pred, flow, gt_motion,
                                                     depth, 'test_%d.png' % i,
                                                     i)
        test_loss = numpy.mean(numpy.asarray(test_loss))
        base_loss = numpy.mean(numpy.asarray(base_loss))
        improve_loss = base_loss - test_loss
        improve_percent = improve_loss / (base_loss + 1e-5)
        logging.info('average test loss: %.2f, base loss: %.2f', test_loss,
                     base_loss)
        logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss,
                     improve_percent)
        if gt_motion is not None:
            test_epe = numpy.mean(numpy.asarray(test_epe))
            logging.info('average test endpoint error: %.2f', test_epe)
        return improve_percent

    def test_gt(self):
        base_loss, test_loss = [], []
        test_epe = []
        for epoch in range(self.test_epoch):
            if self.data.name in ['box', 'mnist', 'box_complex']:
                im, motion, motion_label, depth = self.data.get_next_batch(
                    self.data.test_images)
                gt_motion_label = motion_label[:, -2, :, :, :]
                gt_motion_label = Variable(torch.from_numpy(gt_motion_label))
                if torch.cuda.is_available():
                    gt_motion_label = gt_motion_label.cuda()
            elif self.data.name in ['box2', 'mnist2']:
                im, motion, depth = self.data.get_next_batch(
                    self.data.test_images)
            else:
                logging.error('%s data not supported in test_gt' %
                              self.data.name)
                sys.exit()
            im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1,
                                                   self.im_size, self.im_size)
            im_output = im[:, -1, :, :, :]
            gt_motion = motion[:, -2, :, :, :]
            gt_depth = depth[:, -2, :, :, :]
            im_input = Variable(torch.from_numpy(im_input).float())
            im_output = Variable(torch.from_numpy(im_output).float())
            gt_motion = Variable(torch.from_numpy(gt_motion).float())
            gt_depth = Variable(torch.from_numpy(gt_depth).float())
            if torch.cuda.is_available():
                im_input, im_output = im_input.cuda(), im_output.cuda()
                gt_motion = gt_motion.cuda()
                gt_depth = gt_depth.cuda()
            if self.data.name in ['box', 'mnist', 'box_complex']:
                im_pred, m_mask, d_mask = self.model_gt(
                    im_input, gt_motion_label, gt_depth, 'label')
            elif self.data.name in ['box2', 'mnist2']:
                im_pred, m_mask, d_mask = self.model_gt(
                    im_input, gt_motion, gt_depth)
            im_diff = im_pred - im_output
            loss = torch.abs(im_diff).sum()

            test_loss.append(loss.data[0])
            base_loss.append(
                torch.abs(im_input[:, -self.im_channel:, :, :] -
                          im_output).sum().data[0])
            flow = self.motion2flow(m_mask)
            depth = self.mask2depth(d_mask)
            epe = (flow - gt_motion) * (flow - gt_motion)
            epe = torch.sqrt(epe.sum(1))
            epe = epe.sum() / epe.numel()
            test_epe.append(epe.cpu().data[0])
            if self.display:
                self.visualizer.visualize_result(im_input, im_output, im_pred,
                                                 flow, gt_motion, depth,
                                                 'test_gt.png')
            if self.display_all:
                for i in range(self.batch_size):
                    self.visualizer.visualize_result(im_input, im_output,
                                                     im_pred, flow, gt_motion,
                                                     depth,
                                                     'test_gt_%d.png' % i, i)
        test_loss = numpy.mean(numpy.asarray(test_loss))
        base_loss = numpy.mean(numpy.asarray(base_loss))
        improve_loss = base_loss - test_loss
        improve_percent = improve_loss / (base_loss + 1e-5)
        logging.info('average ground truth test loss: %.2f, base loss: %.2f',
                     test_loss, base_loss)
        logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss,
                     improve_percent)
        test_epe = numpy.mean(numpy.asarray(test_epe))
        logging.info('average ground truth test endpoint error: %.2f',
                     test_epe)
        return improve_percent

    def mask2depth(self, d_mask):
        [batch_size, num_depth, height, width] = d_mask.size()
        depth_number = Variable(
            torch.zeros(batch_size, num_depth, height, width))
        if torch.cuda.is_available():
            depth_number = depth_number.cuda()
        for i in range(num_depth):
            depth_number[:, i, :, :] = i
        depth = Variable(torch.zeros(batch_size, 1, height, width))
        if torch.cuda.is_available():
            depth = depth.cuda()
        depth[:, 0, :, :] = (d_mask * depth_number).sum(1)
        return depth
Exemplo n.º 3
0
class Demo(BaseDemo):
    def __init__(self, args):
        super(Demo, self).__init__(args)
        self.model, self.model_gt = self.init_model(self.data.m_kernel)
        self.visualizer = Visualizer(args, self.data.reverse_m_dict)

    def init_model(self, m_kernel):
        self.model = Net(self.im_size, self.im_size, 3, self.num_frame - 1,
                             m_kernel.shape[1], self.m_range, m_kernel)
        self.model_gt = GtNet(self.im_size, self.im_size, 3, self.num_frame - 1,
                                  m_kernel.shape[1], self.m_range, m_kernel)
        if torch.cuda.is_available():
            # model = torch.nn.DataParallel(model).cuda()
            self.model = self.model.cuda()
            self.model_gt = self.model_gt.cuda()
        if self.init_model_path is not '':
            self.model.load_state_dict(torch.load(self.init_model_path))
        return self.model, self.model_gt

    def train_unsupervised(self):
        optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        base_loss, train_loss = [], []
        for epoch in range(self.train_epoch):
            optimizer.zero_grad()
            im, _, _, _ = self.data.get_next_batch(self.data.train_images)
            im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1, self.im_size, self.im_size)
            im_output = im[:, -1, :, :, :]
            im_input = Variable(torch.from_numpy(im_input).float())
            im_output = Variable(torch.from_numpy(im_output).float())
            if torch.cuda.is_available():
                im_input, im_output = im_input.cuda(), im_output.cuda()
            im_pred, m_mask, occlude, unocclude = self.model(im_input)
            im_diff = unocclude.expand_as(im_output) * (im_pred - im_output)
            im_diff = im_diff / unocclude.sum(3).sum(2).expand_as(im_diff)
            loss = torch.abs(im_diff).sum() * im_diff.size(2) * im_diff.size(3)
            loss.backward()
            optimizer.step()

            train_loss.append(loss.data[0])
            if len(train_loss) > 100:
                train_loss.pop(0)
            ave_train_loss = sum(train_loss) / float(len(train_loss))
            base_loss.append(torch.abs(im_input[:, -3:, :, :] - im_output).sum().data[0])
            if len(base_loss) > 100:
                base_loss.pop(0)
            ave_base_loss = sum(base_loss) / float(len(base_loss))
            logging.info('epoch %d, train loss: %.2f, average train loss: %.2f, base loss: %.2f',
                         epoch, loss.data[0], ave_train_loss, ave_base_loss)
            if (epoch+1) % self.test_interval == 0:
                logging.info('epoch %d, testing', epoch)
                self.validate()

    def test_unsupervised(self):
        base_loss, test_loss = [], []
        test_epe = []
        for epoch in range(self.test_epoch):
            im, motion, _, _ = self.data.get_next_batch(self.data.test_images)
            im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1, self.im_size, self.im_size)
            im_output = im[:, -1, :, :, :]
            gt_motion = motion[:, -2, :, :, :]
            im_input = Variable(torch.from_numpy(im_input).float())
            im_output = Variable(torch.from_numpy(im_output).float())
            gt_motion = Variable(torch.from_numpy(gt_motion).float())
            if torch.cuda.is_available():
                im_input, im_output = im_input.cuda(), im_output.cuda()
                gt_motion = gt_motion.cuda()
            im_pred, m_mask, occlude, unocclude = self.model(im_input)
            im_diff = unocclude.expand_as(im_output) * (im_pred - im_output)
            im_diff = im_diff / unocclude.sum(3).sum(2).expand_as(im_diff)
            loss = torch.abs(im_diff).sum() * im_diff.size(2) * im_diff.size(3)

            test_loss.append(loss.data[0])
            base_loss.append(torch.abs(im_input[:, -3:, :, :] - im_output).sum().data[0])
            flow = self.motion2flow(m_mask)
            epe = (flow - gt_motion) * (flow - gt_motion)
            epe = torch.sqrt(epe.sum(1))
            epe = epe.sum() / epe.numel()
            test_epe.append(epe.cpu().data[0])
            if self.display:
                self.visualizer.visualize_result(im_input, im_output, im_pred, flow, gt_motion,
                                                 unocclude, occlude, 'test_%d.png' % epoch)
        test_loss = numpy.mean(numpy.asarray(test_loss))
        base_loss = numpy.mean(numpy.asarray(base_loss))
        improve_loss = base_loss - test_loss
        improve_percent = improve_loss / (base_loss + 1e-5)
        logging.info('average test loss: %.2f, base loss: %.2f', test_loss, base_loss)
        logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent)
        test_epe = numpy.mean(numpy.asarray(test_epe))
        logging.info('average test endpoint error: %.2f', test_epe)
        return improve_percent

    def test_gt_unsupervised(self):
        base_loss, test_loss = [], []
        test_epe = []
        for epoch in range(self.test_epoch):
            im, motion, motion_label, _ = self.data.get_next_batch(self.data.test_images)
            im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1, self.im_size, self.im_size)
            im_output = im[:, -1, :, :, :]
            gt_motion = motion[:, -2, :, :, :]
            gt_motion_label = motion_label[:, -2, :, :, :]
            im_input = Variable(torch.from_numpy(im_input).float())
            im_output = Variable(torch.from_numpy(im_output).float())
            gt_motion = Variable(torch.from_numpy(gt_motion).float())
            gt_motion_label = Variable(torch.from_numpy(gt_motion_label))
            if torch.cuda.is_available():
                im_input, im_output = im_input.cuda(), im_output.cuda()
                gt_motion = gt_motion.cuda()
                gt_motion_label = gt_motion_label.cuda()
            im_pred, m_mask, occlude, unocclude = self.model_gt(im_input, gt_motion_label)
            im_diff = unocclude.expand_as(im_output) * (im_pred - im_output)
            im_diff = im_diff / unocclude.sum(3).sum(2).expand_as(im_diff)
            loss = torch.abs(im_diff).sum() * im_diff.size(2) * im_diff.size(3)

            test_loss.append(loss.data[0])
            base_loss.append(torch.abs(im_input[:, -3:, :, :] - im_output).sum().data[0])
            flow = self.motion2flow(m_mask)
            epe = (flow - gt_motion) * (flow - gt_motion)
            epe = torch.sqrt(epe.sum(1))
            epe = epe.sum() / epe.numel()
            test_epe.append(epe.cpu().data[0])
            if self.display:
                self.visualizer.visualize_result(im_input, im_output, im_pred, flow, gt_motion,
                                                 unocclude, occlude, 'test_gt.png')
        test_loss = numpy.mean(numpy.asarray(test_loss))
        base_loss = numpy.mean(numpy.asarray(base_loss))
        improve_loss = base_loss - test_loss
        improve_percent = improve_loss / (base_loss + 1e-5)
        logging.info('average ground truth test loss: %.2f, base loss: %.2f', test_loss, base_loss)
        logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent)
        test_epe = numpy.mean(numpy.asarray(test_epe))
        logging.info('average ground truth test endpoint error: %.2f', test_epe)
        return improve_percent
Exemplo n.º 4
0
class Demo(BaseDemo):
    def __init__(self, args):
        super(Demo, self).__init__(args)
        if args.data == 'box':
            self.data = BoxDataBidirect(args)
        elif args.data == 'mnist':
            self.data = MnistDataBidirect(args)
        self.model, self.model_gt = self.init_model(self.data.m_kernel)
        self.visualizer = Visualizer(args, self.data.reverse_m_dict)
        self.num_inputs = (self.num_frame - 1) / 2

    def init_model(self, m_kernel):
        num_inputs = (self.num_frame - 1) / 2
        self.model = Net(self.im_size, self.im_size, 3, num_inputs,
                         m_kernel.shape[1], self.m_range, m_kernel)
        self.model_gt = GtNet(self.im_size, self.im_size, 3, num_inputs,
                              m_kernel.shape[1], self.m_range, m_kernel)
        if torch.cuda.is_available():
            # model = torch.nn.DataParallel(model).cuda()
            self.model = self.model.cuda()
            self.model_gt = self.model_gt.cuda()
        if self.init_model_path is not '':
            self.model.load_state_dict(torch.load(self.init_model_path))
        return self.model, self.model_gt

    def train_unsupervised(self):
        optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        base_loss, train_loss = [], []
        for epoch in range(self.train_epoch):
            optimizer.zero_grad()
            im, _, _, _, _, _ = self.data.get_next_batch(
                self.data.train_images)
            im_input_f = im[:, :self.num_inputs, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_output = im[:, self.num_inputs, :, :, :]
            im_input_f = Variable(torch.from_numpy(im_input_f).float())
            im_input_b = Variable(torch.from_numpy(im_input_b).float())
            im_output = Variable(torch.from_numpy(im_output).float())
            if torch.cuda.is_available():
                im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda()
                im_output = im_output.cuda()
            im_pred, m_mask_f, disappear_f, attn_f, m_mask_b, disappear_b, attn_b = \
                self.model(im_input_f, im_input_b)
            loss = torch.abs(im_pred - im_output).sum()
            loss.backward()
            optimizer.step()

            train_loss.append(loss.data[0])
            if len(train_loss) > 100:
                train_loss.pop(0)
            ave_train_loss = sum(train_loss) / float(len(train_loss))
            base_loss.append(
                torch.abs(im_input_f[:, -3:, :, :] - im_output).sum().data[0])
            base_loss.append(
                torch.abs(im_input_b[:, -3:, :, :] - im_output).sum().data[0])
            if len(base_loss) > 100:
                base_loss.pop(0)
            ave_base_loss = sum(base_loss) / float(len(base_loss))
            logging.info(
                'epoch %d, train loss: %.2f, average train loss: %.2f, base loss: %.2f',
                epoch, loss.data[0], ave_train_loss, ave_base_loss)
            if (epoch + 1) % self.test_interval == 0:
                logging.info('epoch %d, testing', epoch)
                self.validate()

    def test_unsupervised(self):
        base_loss, test_loss = [], []
        test_epe = []
        for epoch in range(self.test_epoch):
            im, motion, motion_r, _, _, _ = self.data.get_next_batch(
                self.data.test_images)
            im_input_f = im[:, :self.num_inputs, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_output = im[:, self.num_inputs, :, :, :]
            gt_motion_f = motion[:, self.num_inputs - 1, :, :, :]
            gt_motion_b = motion_r[:, self.num_inputs + 1, :, :, :]
            im_input_f = Variable(torch.from_numpy(im_input_f).float())
            im_input_b = Variable(torch.from_numpy(im_input_b).float())
            im_output = Variable(torch.from_numpy(im_output).float())
            gt_motion_f = Variable(torch.from_numpy(gt_motion_f).float())
            gt_motion_b = Variable(torch.from_numpy(gt_motion_b).float())
            if torch.cuda.is_available():
                im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda()
                im_output = im_output.cuda()
                gt_motion_f, gt_motion_b = gt_motion_f.cuda(
                ), gt_motion_b.cuda()
            im_pred, m_mask_f, disappear_f, attn_f, m_mask_b, disappear_b, attn_b = \
                self.model(im_input_f, im_input_b)
            loss = torch.abs(im_pred - im_output).sum()

            test_loss.append(loss.data[0])
            base_loss.append(
                torch.abs(im_input_f[:, -3:, :, :] - im_output).sum().data[0])
            # base_loss.append(torch.abs(im_input_b[:, -3:, :, :] - im_output).sum().data[0])
            flow_f = self.motion2flow(m_mask_f)
            epe = (flow_f - gt_motion_f) * (flow_f - gt_motion_f)
            epe = torch.sqrt(epe.sum(1))
            epe = epe.sum() / epe.numel()
            test_epe.append(epe.cpu().data[0])
            flow_b = self.motion2flow(m_mask_b)
            # epe = (flow_b - gt_motion_b) * (flow_b - gt_motion_b)
            # epe = torch.sqrt(epe.sum(1))
            # epe = epe.sum() / epe.numel()
            # test_epe.append(epe.cpu().data[0])
            if self.display:
                self.visualizer.visualize_result_bidirect(
                    im_input_f, im_input_b, im_output, im_pred, flow_f,
                    gt_motion_f, disappear_f, attn_f, flow_b, gt_motion_b,
                    disappear_b, attn_b, 'test_%d.png' % epoch)
        test_loss = numpy.mean(numpy.asarray(test_loss))
        base_loss = numpy.mean(numpy.asarray(base_loss))
        improve_loss = base_loss - test_loss
        improve_percent = improve_loss / (base_loss + 1e-5)
        logging.info('average test loss: %.2f, base loss: %.2f', test_loss,
                     base_loss)
        logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss,
                     improve_percent)
        test_epe = numpy.mean(numpy.asarray(test_epe))
        logging.info('average test endpoint error: %.2f', test_epe)
        return improve_percent

    def test_gt_unsupervised(self):
        base_loss, test_loss = [], []
        test_epe = []
        for epoch in range(self.test_epoch):
            im, motion, motion_r, motion_label, motion_label_r, gt_depth = self.data.get_next_batch(
                self.data.test_images)
            im_input_f = im[:, :self.num_inputs, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_output = im[:, self.num_inputs, :, :, :]
            gt_motion_f = motion[:, self.num_inputs - 1, :, :, :]
            gt_motion_b = motion_r[:, self.num_inputs + 1, :, :, :]
            gt_motion_label_f = motion_label[:, self.num_inputs - 1, :, :, :]
            gt_motion_label_b = motion_label_r[:, self.num_inputs + 1, :, :, :]
            im_input_f = Variable(torch.from_numpy(im_input_f).float())
            im_input_b = Variable(torch.from_numpy(im_input_b).float())
            im_output = Variable(torch.from_numpy(im_output).float())
            gt_motion_f = Variable(torch.from_numpy(gt_motion_f).float())
            gt_motion_b = Variable(torch.from_numpy(gt_motion_b).float())
            gt_motion_label_f = Variable(torch.from_numpy(gt_motion_label_f))
            gt_motion_label_b = Variable(torch.from_numpy(gt_motion_label_b))
            if torch.cuda.is_available():
                im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda()
                im_output = im_output.cuda()
                gt_motion_f, gt_motion_b = gt_motion_f.cuda(
                ), gt_motion_b.cuda()
                gt_motion_label_f, gt_motion_label_b = gt_motion_label_f.cuda(
                ), gt_motion_label_b.cuda()
            im_pred, m_mask_f, disappear_f, attn_f, m_mask_b, disappear_b, attn_b = \
                self.model_gt(im_input_f, im_input_b, gt_motion_f, gt_motion_b)
            loss = torch.abs(im_pred - im_output).sum()

            test_loss.append(loss.data[0])
            base_loss.append(
                torch.abs(im_input_f[:, -3:, :, :] - im_output).sum().data[0])
            # base_loss.append(torch.abs(im_input_b[:, -3:, :, :] - im_output).sum().data[0])
            flow_f = self.motion2flow(m_mask_f)
            epe = (flow_f - gt_motion_f) * (flow_f - gt_motion_f)
            epe = torch.sqrt(epe.sum(1))
            epe = epe.sum() / epe.numel()
            test_epe.append(epe.cpu().data[0])
            flow_b = self.motion2flow(m_mask_b)
            # epe = (flow_b - gt_motion_b) * (flow_b - gt_motion_b)
            # epe = torch.sqrt(epe.sum(1))
            # epe = epe.sum() / epe.numel()
            # test_epe.append(epe.cpu().data[0])
            if self.display:
                self.visualizer.visualize_result_bidirect(
                    im_input_f, im_input_b, im_output, im_pred, flow_f,
                    gt_motion_f, disappear_f, attn_f, flow_b, gt_motion_b,
                    disappear_b, attn_b, 'test_gt.png')
        test_loss = numpy.mean(numpy.asarray(test_loss))
        base_loss = numpy.mean(numpy.asarray(base_loss))
        improve_loss = base_loss - test_loss
        improve_percent = improve_loss / (base_loss + 1e-5)
        logging.info('average ground truth test loss: %.2f, base loss: %.2f',
                     test_loss, base_loss)
        logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss,
                     improve_percent)
        test_epe = numpy.mean(numpy.asarray(test_epe))
        logging.info('average ground truth test endpoint error: %.2f',
                     test_epe)
        return improve_percent
Exemplo n.º 5
0
class Demo(BaseDemo):
    def __init__(self, args):
        super(Demo, self).__init__(args)
        self.model, self.model_gt = self.init_model(self.data.m_kernel)
        self.visualizer = Visualizer(args, self.data.reverse_m_dict)

    def init_model(self, m_kernel):
        self.model = Net(self.im_size, self.im_size, self.im_channel,
                         self.num_frame, m_kernel.shape[1], self.m_range,
                         m_kernel, self.net_depth)
        self.model_gt = GtNet(self.im_size, self.im_size, self.im_channel,
                              self.num_frame, m_kernel.shape[1], self.m_range,
                              m_kernel)
        if torch.cuda.is_available():
            # model = torch.nn.DataParallel(model).cuda()
            self.model = self.model.cuda()
            self.model_gt = self.model_gt.cuda()
        if self.init_model_path is not '':
            self.model.load_state_dict(torch.load(self.init_model_path))
        return self.model, self.model_gt

    def train(self):
        writer = SummaryWriter(self.tensorboard_path)
        optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        base_loss_all, train_loss_all = [], []
        for epoch in range(self.train_epoch):
            optimizer.zero_grad()
            if self.data.name in ['box', 'mnist', 'chair']:
                im, _, _, _ = self.data.get_next_batch(self.data.train_images)
            elif self.data.name in [
                    'robot', 'mpii', 'viper', 'kitti', 'robotc'
            ]:
                im = self.data.get_next_batch(self.data.train_images)
            else:
                logging.error('%s data not supported' % self.data.name)
                sys.exit()
            im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1,
                                                   self.im_size, self.im_size)
            im_output = im[:, -1, :, :, :]
            im_input = Variable(torch.from_numpy(im_input).float())
            im_output = Variable(torch.from_numpy(im_output).float())
            if torch.cuda.is_available():
                im_input, im_output = im_input.cuda(), im_output.cuda()
            im_pred, m_mask = self.model(im_input, im_output)
            im_diff = im_pred - im_output
            loss = torch.abs(im_diff).sum() / self.batch_size
            loss.backward()
            optimizer.step()

            writer.add_scalar('train_loss', loss.data[0], epoch)
            train_loss_all.append(loss.data[0])
            if len(train_loss_all) > 100:
                train_loss_all.pop(0)
            ave_train_loss = sum(train_loss_all) / float(len(train_loss_all))
            im_base = im_input[:, -self.im_channel:, :, :]
            base_loss = torch.abs(im_base - im_output).sum() / self.batch_size
            base_loss_all.append(base_loss.data[0])
            if len(base_loss_all) > 100:
                base_loss_all.pop(0)
            ave_base_loss = sum(base_loss_all) / float(len(base_loss_all))
            logging.info(
                'epoch %d, train loss: %.2f, average train loss: %.2f, base loss: %.2f',
                epoch, loss.data[0], ave_train_loss, ave_base_loss)
            if (epoch + 1) % self.save_interval == 0:
                logging.info('epoch %d, saving model', epoch)
                with open(os.path.join(self.save_dir, '%d.pth' % epoch),
                          'w') as handle:
                    torch.save(self.model.state_dict(), handle)
            if (epoch + 1) % self.test_interval == 0:
                logging.info('epoch %d, testing', epoch)
                test_loss, test_epe = self.validate()
                writer.add_scalar('test_loss', test_loss, epoch)
                if test_epe is not None:
                    writer.add_scalar('test_epe', test_epe, epoch)
        writer.close()

    def test(self):
        base_loss_all, test_loss_all = [], []
        test_epe_all = []
        motion = None
        for epoch in range(self.test_epoch):
            if self.data.name in ['box', 'mnist', 'chair']:
                im, motion, _, _ = self.data.get_next_batch(
                    self.data.test_images)
            elif self.data.name in [
                    'robot', 'mpii', 'viper', 'kitti', 'robotc'
            ]:
                im, motion = self.data.get_next_batch(
                    self.data.test_images), None
            elif self.data.name in ['mpii_sample', 'kitti_sample']:
                im, motion = self.data.get_next_batch(
                    self.data.test_images), None
                im = im[:, -self.num_frame:, :, :, :]
            else:
                logging.error('%s data not supported' % self.data.name)
                sys.exit()
            im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1,
                                                   self.im_size, self.im_size)
            im_output = im[:, -1, :, :, :]
            im_input = Variable(torch.from_numpy(im_input).float(),
                                volatile=True)
            im_output = Variable(torch.from_numpy(im_output).float(),
                                 volatile=True)
            if torch.cuda.is_available():
                im_input, im_output = im_input.cuda(), im_output.cuda()
            im_pred, m_mask = self.model(im_input, im_output)
            flow = self.motion2flow(m_mask)
            im_diff = im_pred - im_output
            loss = torch.abs(im_diff).sum() / self.batch_size

            test_loss_all.append(loss.data[0])
            im_base = im_input[:, -self.im_channel:, :, :]
            base_loss = torch.abs(im_base - im_output).sum() / self.batch_size
            base_loss_all.append(base_loss.data[0])

            if motion is None:
                gt_motion = None
            else:
                gt_motion = motion[:, -2, :, :, :]
                gt_motion = Variable(torch.from_numpy(gt_motion).float())
                if torch.cuda.is_available():
                    gt_motion = gt_motion.cuda()
                epe = (flow - gt_motion) * (flow - gt_motion)
                epe = torch.sqrt(epe.sum(1))
                epe = epe.sum() / epe.numel()
                test_epe_all.append(epe.cpu().data[0])
            if self.display:
                self.visualizer.visualize_result(im_input, im_output, im_pred,
                                                 flow, gt_motion,
                                                 'test_%d.png' % epoch)
            if self.display_all:
                for i in range(self.batch_size):
                    self.visualizer.visualize_result(im_input, im_output,
                                                     im_pred, flow, gt_motion,
                                                     'test_%d.png' % i, i)
        test_loss = numpy.mean(numpy.asarray(test_loss_all))
        base_loss = numpy.mean(numpy.asarray(base_loss_all))
        improve_loss = base_loss - test_loss
        improve_percent = improve_loss / (base_loss + 1e-5)
        logging.info('average test loss: %.2f, base loss: %.2f', test_loss,
                     base_loss)
        logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss,
                     improve_percent)
        if motion is not None:
            test_epe = numpy.mean(numpy.asarray(test_epe_all))
            logging.info('average test endpoint error: %.2f', test_epe)
        else:
            test_epe = None
        return test_loss, test_epe, improve_percent

    def test_gt(self):
        base_loss_all, test_loss_all = [], []
        test_epe_all = []
        for epoch in range(self.test_epoch):
            if self.data.name in ['box', 'mnist', 'chair']:
                im, motion, motion_label, _ = self.data.get_next_batch(
                    self.data.test_images)
                gt_motion_label = motion_label[:, -2, :, :, :]
                gt_motion_label = Variable(torch.from_numpy(gt_motion_label))
                if torch.cuda.is_available():
                    gt_motion_label = gt_motion_label.cuda()
            else:
                logging.error('%s data not supported in test_gt' %
                              self.data.name)
                sys.exit()
            im_input = im[:, :-1, :, :, :].reshape(self.batch_size, -1,
                                                   self.im_size, self.im_size)
            im_output = im[:, -1, :, :, :]
            gt_motion = motion[:, -2, :, :, :]
            im_input = Variable(torch.from_numpy(im_input).float(),
                                volatile=True)
            im_output = Variable(torch.from_numpy(im_output).float(),
                                 volatile=True)
            gt_motion = Variable(torch.from_numpy(gt_motion).float(),
                                 volatile=True)
            if torch.cuda.is_available():
                im_input, im_output = im_input.cuda(), im_output.cuda()
                gt_motion = gt_motion.cuda()
            if self.data.name in ['box', 'mnist', 'chair']:
                im_pred, m_mask = self.model_gt(im_input, im_output,
                                                gt_motion_label, 'label')
                flow = self.motion2flow(m_mask)
            im_diff = im_pred - im_output
            loss = torch.abs(im_diff).sum() / self.batch_size

            test_loss_all.append(loss.data[0])
            im_base = im_input[:, -self.im_channel:, :, :]
            base_loss = torch.abs(im_base - im_output).sum() / self.batch_size
            base_loss_all.append(base_loss.data[0])
            epe = (flow - gt_motion) * (flow - gt_motion)
            epe = torch.sqrt(epe.sum(1))
            epe = epe.sum() / epe.numel()
            test_epe_all.append(epe.cpu().data[0])
            if self.display:
                self.visualizer.visualize_result(im_input, im_output, im_pred,
                                                 flow, gt_motion,
                                                 'test_gt.png')
            if self.display_all:
                for i in range(self.batch_size):
                    self.visualizer.visualize_result(im_input, im_output,
                                                     im_pred, flow, gt_motion,
                                                     'test_gt_%d.png' % i, i)
        test_loss = numpy.mean(numpy.asarray(test_loss_all))
        base_loss = numpy.mean(numpy.asarray(base_loss_all))
        improve_loss = base_loss - test_loss
        improve_percent = improve_loss / (base_loss + 1e-5)
        logging.info('average ground truth test loss: %.2f, base loss: %.2f',
                     test_loss, base_loss)
        logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss,
                     improve_percent)
        test_epe = numpy.mean(numpy.asarray(test_epe_all))
        logging.info('average ground truth test endpoint error: %.2f',
                     test_epe)
        return improve_percent
Exemplo n.º 6
0
class Demo(BaseBiDemo):
    def __init__(self, args):
        super(Demo, self).__init__(args)
        self.model, self.model_gt = self.init_model(self.data.m_kernel)
        self.visualizer = Visualizer(args, self.data.reverse_m_dict)

    def init_model(self, m_kernel):
        self.model = Net(self.im_size, self.im_size, self.im_channel, self.num_inputs,
                         m_kernel.shape[1], self.m_range, m_kernel)
        self.model_gt = GtNet(self.im_size, self.im_size, self.im_channel, self.num_inputs,
                              m_kernel.shape[1], self.m_range, m_kernel)
        if torch.cuda.is_available():
            # model = torch.nn.DataParallel(model).cuda()
            self.model = self.model.cuda()
            self.model_gt = self.model_gt.cuda()
        if self.init_model_path is not '':
            self.model.load_state_dict(torch.load(self.init_model_path))
        return self.model, self.model_gt

    def train(self):
        optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        base_loss, train_loss = [], []
        for epoch in range(self.train_epoch):
            optimizer.zero_grad()
            if self.data.name in ['box', 'mnist', 'box_complex']:
                im, _, _, _, _, _ = self.data.get_next_batch(self.data.train_images)
            elif self.data.name in ['robot64', 'mpii64', 'mpi128', 'nyuv2', 'robot128', 'viper64', 'viper128']:
                im = self.data.get_next_batch(self.data.train_images)
            else:
                logging.error('%s data not supported' % self.data.name)
                sys.exit()
            im_input_f = im[:, :self.num_inputs, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_output = im[:, self.num_inputs, :, :, :]
            im_input_f = Variable(torch.from_numpy(im_input_f).float())
            im_input_b = Variable(torch.from_numpy(im_input_b).float())
            im_output = Variable(torch.from_numpy(im_output).float())
            if torch.cuda.is_available():
                im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda()
                im_output = im_output.cuda()
            im_pred, m_mask_f, d_mask_f, attn_f, m_mask_b, d_mask_b, attn_b = \
                self.model(im_input_f, im_input_b)
            im_diff = im_pred - im_output
            loss = torch.abs(im_diff[:, :, self.m_range:-self.m_range, self.m_range:-self.m_range]).sum()
            loss.backward()
            optimizer.step()

            train_loss.append(loss.data[0])
            if len(train_loss) > 100:
                train_loss.pop(0)
            ave_train_loss = sum(train_loss) / float(len(train_loss))
            im_base = 0.5 * im_input_f[:, -self.im_channel:, :, :] + \
                0.5 * im_input_b[:, -self.im_channel:, :, :]
            base_loss.append(torch.abs(im_base - im_output).sum().data[0])
            if len(base_loss) > 100:
                base_loss.pop(0)
            ave_base_loss = sum(base_loss) / float(len(base_loss))
            logging.info('epoch %d, train loss: %.2f, average train loss: %.2f, base loss: %.2f',
                         epoch, loss.data[0], ave_train_loss, ave_base_loss)
            if (epoch + 1) % self.save_interval == 0:
                logging.info('epoch %d, saving model', epoch)
                with open(os.path.join(self.save_dir, '%d.pth' % epoch), 'w') as handle:
                    torch.save(self.model.state_dict(), handle)
            if (epoch + 1) % self.test_interval == 0:
                logging.info('epoch %d, testing', epoch)
                self.validate()

    def test(self):
        base_loss, test_loss = [], []
        test_epe = []
        motion = None
        for epoch in range(self.test_epoch):
            if self.data.name in ['box', 'mnist', 'box_complex']:
                im, motion, motion_r, _, _, _ = self.data.get_next_batch(self.data.test_images)
            elif self.data.name in ['robot64', 'mpii64', 'mpi128', 'nyuv2', 'robot128', 'viper64', 'viper128']:
                im, motion, motion_r = self.data.get_next_batch(self.data.test_images), None, None
            elif self.data.name in ['mpii64_sample']:
                im, motion, motion_r = self.data.get_next_batch(self.data.test_images), None, None
                im = im[:, -self.num_frame:, :, :, :]
            else:
                logging.error('%s data not supported' % self.data.name)
                sys.exit()
            im_input_f = im[:, :self.num_inputs, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_output = im[:, self.num_inputs, :, :, :]
            im_input_f = Variable(torch.from_numpy(im_input_f).float(), volatile=True)
            im_input_b = Variable(torch.from_numpy(im_input_b).float(), volatile=True)
            im_output = Variable(torch.from_numpy(im_output).float(), volatile=True)
            if torch.cuda.is_available():
                im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda()
                im_output = im_output.cuda()
            im_pred, m_mask_f, d_mask_f, attn_f, m_mask_b, d_mask_b, attn_b = \
                self.model(im_input_f, im_input_b)
            im_diff = im_pred - im_output
            loss = torch.abs(im_diff[:, :, self.m_range:-self.m_range, self.m_range:-self.m_range]).sum()

            test_loss.append(loss.data[0])
            im_base = 0.5 * im_input_f[:, -self.im_channel:, :, :] + \
                0.5 * im_input_b[:, -self.im_channel:, :, :]
            base_loss.append(torch.abs(im_base - im_output).sum().data[0])
            flow_f = self.motion2flow(m_mask_f)
            depth_f = self.mask2depth(d_mask_f)
            flow_b = self.motion2flow(m_mask_b)
            depth_b = self.mask2depth(d_mask_b)

            if motion is None:
                gt_motion_f = None
                gt_motion_b = None
            else:
                gt_motion_f = motion[:, self.num_inputs - 1, :, :, :]
                gt_motion_f = Variable(torch.from_numpy(gt_motion_f).float())
                if torch.cuda.is_available():
                    gt_motion_f = gt_motion_f.cuda()
                epe = (flow_f - gt_motion_f) * (flow_f - gt_motion_f)
                epe = torch.sqrt(epe.sum(1))
                epe = epe.sum() / epe.numel()
                test_epe.append(epe.cpu().data[0])
                gt_motion_b = motion_r[:, self.num_inputs + 1, :, :, :]
                gt_motion_b = Variable(torch.from_numpy(gt_motion_b).float())
                if torch.cuda.is_available():
                    gt_motion_b = gt_motion_b.cuda()
                epe = (flow_b - gt_motion_b) * (flow_b - gt_motion_b)
                epe = torch.sqrt(epe.sum(1))
                epe = epe.sum() / epe.numel()
                test_epe.append(epe.cpu().data[0])
            if self.display:
                self.visualizer.visualize_result(im_input_f, im_input_b, im_output, im_pred, flow_f,
                                                 gt_motion_f, depth_f, attn_f, flow_b, gt_motion_b,
                                                 depth_b, attn_b, 'test_%d.png' % epoch)
            if self.display_all:
                for i in range(self.batch_size):
                    self.visualizer.visualize_result(im_input_f, im_input_b, im_output, im_pred,
                                                     flow_f, gt_motion_f, depth_f, attn_f,
                                                     flow_b, gt_motion_b, depth_b, attn_b,
                                                     'test_%d.png' % i, i)
        test_loss = numpy.mean(numpy.asarray(test_loss))
        base_loss = numpy.mean(numpy.asarray(base_loss))
        improve_loss = base_loss - test_loss
        improve_percent = improve_loss / (base_loss + 1e-5)
        logging.info('average test loss: %.2f, base loss: %.2f', test_loss, base_loss)
        logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent)
        if motion is not None:
            test_epe = numpy.mean(numpy.asarray(test_epe))
            logging.info('average test endpoint error: %.2f', test_epe)
        return improve_percent

    def test_gt(self):
        base_loss, test_loss = [], []
        test_epe = []
        for epoch in range(self.test_epoch):
            if self.data.name in ['box', 'mnist', 'box_complex']:
                im, motion, motion_r, motion_label, motion_label_r, depth = \
                    self.data.get_next_batch(self.data.test_images)
            else:
                logging.error('%s data not supported in test_gt' % self.data.name)
                sys.exit()
            im_input_f = im[:, :self.num_inputs, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_output = im[:, self.num_inputs, :, :, :]
            gt_motion_f = motion[:, self.num_inputs - 1, :, :, :]
            gt_motion_b = motion_r[:, self.num_inputs + 1, :, :, :]
            gt_motion_label_f = motion_label[:, self.num_inputs - 1, :, :, :]
            gt_motion_label_b = motion_label_r[:, self.num_inputs + 1, :, :, :]
            gt_depth_f = depth[:, self.num_inputs - 1, :, :]
            gt_depth_b = depth[:, self.num_inputs + 1, :, :]
            im_input_f = Variable(torch.from_numpy(im_input_f).float(), volatile=True)
            im_input_b = Variable(torch.from_numpy(im_input_b).float(), volatile=True)
            im_output = Variable(torch.from_numpy(im_output).float(), volatile=True)
            gt_motion_f = Variable(torch.from_numpy(gt_motion_f).float())
            gt_motion_b = Variable(torch.from_numpy(gt_motion_b).float())
            gt_motion_label_f = Variable(torch.from_numpy(gt_motion_label_f))
            gt_motion_label_b = Variable(torch.from_numpy(gt_motion_label_b))
            gt_depth_f = Variable(torch.from_numpy(gt_depth_f))
            gt_depth_b = Variable(torch.from_numpy(gt_depth_b))
            if torch.cuda.is_available():
                im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda()
                im_output = im_output.cuda()
                gt_motion_f, gt_motion_b = gt_motion_f.cuda(), gt_motion_b.cuda()
                gt_motion_label_f = gt_motion_label_f.cuda()
                gt_motion_label_b = gt_motion_label_b.cuda()
                gt_depth_f, gt_depth_b = gt_depth_f.cuda(), gt_depth_b.cuda()
            im_pred, m_mask_f, d_mask_f, attn_f, m_mask_b, d_mask_b, attn_b = self.model_gt(
                im_input_f, im_input_b, gt_motion_label_f, gt_depth_f, gt_motion_label_b,
                gt_depth_b, 'label')
            im_diff = im_pred - im_output
            loss = torch.abs(im_diff[:, :, self.m_range:-self.m_range, self.m_range:-self.m_range]).sum()

            test_loss.append(loss.data[0])
            im_base = 0.5 * im_input_f[:, -self.im_channel:, :, :] + \
                0.5 * im_input_b[:, -self.im_channel:, :, :]
            base_loss.append(torch.abs(im_base - im_output).sum().data[0])
            flow_f = self.motion2flow(m_mask_f)
            depth_f = self.mask2depth(d_mask_f)
            epe = (flow_f - gt_motion_f) * (flow_f - gt_motion_f)
            epe = torch.sqrt(epe.sum(1))
            epe = epe.sum() / epe.numel()
            test_epe.append(epe.cpu().data[0])
            flow_b = self.motion2flow(m_mask_b)
            depth_b = self.mask2depth(d_mask_b)
            epe = (flow_b - gt_motion_b) * (flow_b - gt_motion_b)
            epe = torch.sqrt(epe.sum(1))
            epe = epe.sum() / epe.numel()
            test_epe.append(epe.cpu().data[0])
            if self.display:
                self.visualizer.visualize_result(im_input_f, im_input_b, im_output, im_pred, flow_f,
                                                 gt_motion_f, depth_f, attn_f, flow_b, gt_motion_b,
                                                 depth_b, attn_b, 'test_gt.png')
            if self.display_all:
                for i in range(self.batch_size):
                    self.visualizer.visualize_result(im_input_f, im_input_b, im_output, im_pred,
                                                     flow_f, gt_motion_f, depth_f, attn_f, flow_b,
                                                     gt_motion_b, depth_b, attn_b,
                                                     'test_gt_%d.png' % i, i)
        test_loss = numpy.mean(numpy.asarray(test_loss))
        base_loss = numpy.mean(numpy.asarray(base_loss))
        improve_loss = base_loss - test_loss
        improve_percent = improve_loss / (base_loss + 1e-5)
        logging.info('average ground truth test loss: %.2f, base loss: %.2f', test_loss, base_loss)
        logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss, improve_percent)
        test_epe = numpy.mean(numpy.asarray(test_epe))
        logging.info('average ground truth test endpoint error: %.2f', test_epe)
        return improve_percent

    @staticmethod
    def mask2depth(d_mask):
        [batch_size, num_depth, height, width] = d_mask.size()
        depth_number = Variable(torch.zeros(batch_size, num_depth, height, width))
        if torch.cuda.is_available():
            depth_number = depth_number.cuda()
        for i in range(num_depth):
            depth_number[:, i, :, :] = i
        depth = Variable(torch.zeros(batch_size, 1, height, width))
        if torch.cuda.is_available():
            depth = depth.cuda()
        depth[:, 0, :, :] = (d_mask * depth_number).sum(1)
        return depth
Exemplo n.º 7
0
class Demo(BaseDemo):
    def __init__(self, args):
        super(Demo, self).__init__(args)
        if args.data == 'box':
            self.data = BoxDataBidirect(args)
        elif args.data == 'mnist':
            self.data = MnistDataBidirect(args)
        self.model, self.model_gt = self.init_model(self.data.m_kernel)
        self.visualizer = Visualizer(args, self.data.reverse_m_dict)
        self.num_inputs = (self.num_frame - 1) / 2

    def init_model(self, m_kernel):
        num_inputs = (self.num_frame - 1) / 2
        self.model = Net(self.im_size, self.im_size, 3, num_inputs,
                         m_kernel.shape[1], self.m_range, m_kernel)
        self.model_gt = GtNet(self.im_size, self.im_size, 3, num_inputs,
                              m_kernel.shape[1], self.m_range, m_kernel)
        if torch.cuda.is_available():
            # model = torch.nn.DataParallel(model).cuda()
            self.model = self.model.cuda()
            self.model_gt = self.model_gt.cuda()
        if self.init_model_path is not '':
            self.model.load_state_dict(torch.load(self.init_model_path))
        return self.model, self.model_gt

    def train_unsupervised(self):
        optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        base_loss, train_loss = [], []
        for epoch in range(self.train_epoch):
            optimizer.zero_grad()
            im, motion, motion_r = self.data.get_next_batch(
                self.data.train_images)
            im_input_f = im[:, :self.num_inputs, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_output = im[:, self.num_inputs, :, :, :]
            im_input_f = Variable(torch.from_numpy(im_input_f).float())
            im_input_b = Variable(torch.from_numpy(im_input_b).float())
            im_output = Variable(torch.from_numpy(im_output).float())
            if torch.cuda.is_available():
                im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda()
                im_output = im_output.cuda()
            im_pred, m_mask_f, disappear_f, attn_f, m_mask_b, disappear_b, attn_b = \
                self.model(im_input_f, im_input_b)
            im_diff = im_pred - im_output
            loss = torch.abs(im_diff).sum()
            loss.backward()
            optimizer.step()

            train_loss.append(loss.data[0])
            if len(train_loss) > 100:
                train_loss.pop(0)
            ave_train_loss = sum(train_loss) / float(len(train_loss))
            base_loss.append(
                torch.abs(im_input_f[:, -3:, :, :] - im_output).sum().data[0])
            base_loss.append(
                torch.abs(im_input_b[:, -3:, :, :] - im_output).sum().data[0])
            if len(base_loss) > 100:
                base_loss.pop(0)
            ave_base_loss = sum(base_loss) / float(len(base_loss))
            logging.info(
                'epoch %d, train loss: %.2f, average train loss: %.2f, base loss: %.2f',
                epoch, loss.data[0], ave_train_loss, ave_base_loss)
            if (epoch + 1) % self.test_interval == 0:
                logging.info('epoch %d, testing', epoch)
                self.validate()

    def test_unsupervised(self):
        base_loss, test_loss = [], []
        test_accuracy = []
        for epoch in range(self.test_epoch):
            im, motion, motion_r = self.data.get_next_batch(
                self.data.test_images)
            im_input_f = im[:, :self.num_inputs, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_output = im[:, self.num_inputs, :, :, :]
            gt_motion_f = motion[:, self.num_inputs - 1, :, :, :]
            gt_motion_b = motion_r[:, self.num_inputs + 1, :, :, :]
            im_input_f = Variable(torch.from_numpy(im_input_f).float())
            im_input_b = Variable(torch.from_numpy(im_input_b).float())
            im_output = Variable(torch.from_numpy(im_output).float())
            gt_motion_f = Variable(torch.from_numpy(gt_motion_f))
            gt_motion_b = Variable(torch.from_numpy(gt_motion_b))
            if torch.cuda.is_available():
                im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda()
                im_output = im_output.cuda()
                gt_motion_f, gt_motion_b = gt_motion_f.cuda(
                ), gt_motion_b.cuda()
            im_pred, m_mask_f, disappear_f, attn_f, m_mask_b, disappear_b, attn_b = \
                self.model(im_input_f, im_input_b)
            im_diff = im_pred - im_output
            loss = torch.abs(im_diff).sum()

            test_loss.append(loss.data[0])
            base_loss.append(
                torch.abs(im_input_f[:, -3:, :, :] - im_output).sum().data[0])
            base_loss.append(
                torch.abs(im_input_b[:, -3:, :, :] - im_output).sum().data[0])
            pred_motion_f = m_mask_f.max(1)[1]
            pred_motion_b = m_mask_b.max(1)[1]
            accuracy_f = pred_motion_f.eq(
                gt_motion_f).float().sum() / gt_motion_f.numel()
            accuracy_b = pred_motion_b.eq(
                gt_motion_b).float().sum() / gt_motion_b.numel()
            test_accuracy.append(accuracy_f.cpu().data[0])
            test_accuracy.append(accuracy_b.cpu().data[0])
            if self.display:
                flow_f = self.motion2flow(m_mask_f)
                flow_b = self.motion2flow(m_mask_b)
                self.visualizer.visualize_result_bidirect(
                    im_input_f, im_input_b, im_output, im_pred, flow_f,
                    gt_motion_f, disappear_f, attn_f, flow_b, gt_motion_b,
                    disappear_b, attn_b, 'test_%d.png' % epoch)
        test_loss = numpy.mean(numpy.asarray(test_loss))
        base_loss = numpy.mean(numpy.asarray(base_loss))
        improve_loss = base_loss - test_loss
        improve_percent = improve_loss / (base_loss + 1e-5)
        logging.info('average test loss: %.2f, base loss: %.2f', test_loss,
                     base_loss)
        logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss,
                     improve_percent)
        test_accuracy = numpy.mean(numpy.asarray(test_accuracy))
        logging.info('average test accuracy: %.2f', test_accuracy)
        return improve_percent

    def test_gt_unsupervised(self):
        base_loss, test_loss = [], []
        test_accuracy = []
        for epoch in range(self.test_epoch):
            im, motion, motion_r = self.data.get_next_batch(
                self.data.test_images)
            im_input_f = im[:, :self.num_inputs, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_input_b = im[:, :self.num_inputs:-1, :, :, :].reshape(
                self.batch_size, -1, self.im_size, self.im_size)
            im_output = im[:, self.num_inputs, :, :, :]
            gt_motion_f = motion[:, self.num_inputs - 1, :, :, :]
            gt_motion_b = motion_r[:, self.num_inputs + 1, :, :, :]
            im_input_f = Variable(torch.from_numpy(im_input_f).float())
            im_input_b = Variable(torch.from_numpy(im_input_b).float())
            im_output = Variable(torch.from_numpy(im_output).float())
            gt_motion_f = Variable(torch.from_numpy(gt_motion_f))
            gt_motion_b = Variable(torch.from_numpy(gt_motion_b))
            if torch.cuda.is_available():
                im_input_f, im_input_b = im_input_f.cuda(), im_input_b.cuda()
                im_output = im_output.cuda()
                gt_motion_f, gt_motion_b = gt_motion_f.cuda(
                ), gt_motion_b.cuda()
            im_pred, m_mask_f, disappear_f, attn_f, m_mask_b, disappear_b, attn_b = \
                self.model_gt(im_input_f, im_input_b, gt_motion_f, gt_motion_b)
            im_diff = im_pred - im_output
            loss = torch.abs(im_diff).sum()

            test_loss.append(loss.data[0])
            base_loss.append(
                torch.abs(im_input_f[:, -3:, :, :] - im_output).sum().data[0])
            base_loss.append(
                torch.abs(im_input_b[:, -3:, :, :] - im_output).sum().data[0])
            pred_motion_f = m_mask_f.max(1)[1]
            pred_motion_b = m_mask_b.max(1)[1]
            accuracy_f = pred_motion_f.eq(
                gt_motion_f).float().sum() / gt_motion_f.numel()
            accuracy_b = pred_motion_b.eq(
                gt_motion_b).float().sum() / gt_motion_b.numel()
            test_accuracy.append(accuracy_f.cpu().data[0])
            test_accuracy.append(accuracy_b.cpu().data[0])
            if self.display:
                flow_f = self.motion2flow(m_mask_f)
                flow_b = self.motion2flow(m_mask_b)
                self.visualizer.visualize_result_bidirect(
                    im_input_f, im_input_b, im_output, im_pred, flow_f,
                    gt_motion_f, disappear_f, attn_f, flow_b, gt_motion_b,
                    disappear_b, attn_b, 'test_gt.png')
        test_loss = numpy.mean(numpy.asarray(test_loss))
        base_loss = numpy.mean(numpy.asarray(base_loss))
        improve_loss = base_loss - test_loss
        improve_percent = improve_loss / (base_loss + 1e-5)
        logging.info('average groundtruth test loss: %.2f, base loss: %.2f',
                     test_loss, base_loss)
        logging.info('improve_loss: %.2f, improve_percent: %.2f', improve_loss,
                     improve_percent)
        test_accuracy = numpy.mean(numpy.asarray(test_accuracy))
        logging.info('average groundtruth test accuracy: %.2f', test_accuracy)
        return improve_percent

    def motion2flow(self, m_mask):
        reverse_m_dict = self.data.reverse_m_dict
        [batch_size, num_class, height, width] = m_mask.size()
        kernel_x = Variable(torch.zeros(batch_size, num_class, height, width))
        kernel_y = Variable(torch.zeros(batch_size, num_class, height, width))
        if torch.cuda.is_available():
            kernel_x = kernel_x.cuda()
            kernel_y = kernel_y.cuda()
        for i in range(num_class):
            (m_x, m_y) = reverse_m_dict[i]
            kernel_x[:, i, :, :] = m_x
            kernel_y[:, i, :, :] = m_y
        flow = Variable(torch.zeros(batch_size, 2, height, width))
        flow[:, 0, :, :] = (m_mask * kernel_x).sum(1)
        flow[:, 1, :, :] = (m_mask * kernel_y).sum(1)
        return flow