示例#1
0
文件: util.py 项目: yycho0108/qpwcnet
    def on_epoch_end(self, epoch, logs={}):
        flows = self.model(self.data, training=False)
        flow_images = flow_to_image(flows, data_format=self.data_format)

        with self.writer.as_default():
            tf.summary.image('flow', img, step=epoch)
        self.writer.add_summary(summary, epoch)
示例#2
0
    def on_batch_end(self, batch, logs={}):
        self.batch_index += 1
        if (self.batch_index % self.log_period) != 0:
            return

        data_format = tf.keras.backend.image_data_format()

        val_ims, val_flo = self.val_data
        val_flow_img = self.val_flow_img

        flow_imgs = [val_flow_img]
        flows = self.model.predict(val_ims)
        for flow in flows:
            flow_img = flow_to_image(flow, data_format=data_format)
            if data_format == 'channels_first':
                # nchw -> nhwc
                flow_img = tf.transpose(flow_img, (0, 2, 3, 1))

            # NOTE(yycho0108):
            # interpolate nearest (tensorboard visualization applies
            # bilinear interpolation by default).
            flow_img = tf.image.resize(
                flow_img,
                size=val_flow_img.shape[1:3],
                method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            flow_imgs.append(flow_img)

        step = self.batch_index * self.batch_size
        with self.writer.as_default():
            # will this work?
            for i, flow_img in enumerate(flow_imgs):
                name = 'flow-{:02d}'.format(i)
                tf.summary.image(name, flow_img, step=step, max_outputs=3)
示例#3
0
def main():
    disable_gpu()

    # compute_stats()

    if False:
        filename = '/media/ssd/datasets/sintel-processed/sintel.tfrecord'
        reader = get_reader(filename).map(preprocess)
    else:
        reader = get_dataset_from_set().map(preprocess_fc3d)
        # reader = get_dataset().interleave(lambda x: Dataset.from_tensors(x).map(decode_files),
        #                                  cycle_length=tf.data.experimental.AUTOTUNE,
        #                                  num_parallel_calls=tf.data.experimental.AUTOTUNE).map(preprocess)

    reader.shuffle(buffer_size=32)
    for entry in reader.as_numpy_iterator():
        ims, flo = entry
        flo_vis = flow_to_image(flo)
        prv = ims[..., :3]
        nxt = ims[..., 3:]

        #print('prv', prv.min(), prv.max())
        #print('nxt', nxt.min(), nxt.max())
        #print('flo', flo.min(), flo.max())
        #print('flo', np.linalg.norm(flo, axis=-1).mean())

        # show prev reconstructed from nxt.
        # nxt_w = tfa.image.dense_image_warp(nxt[None, ...].astype(
        #    np.float32)/255.0, -flo[None, ..., ::-1]).numpy()
        # nxt_w = tf_warp(nxt[None, ...].astype(
        #    np.float32)/255.0, flo[None, ...]).numpy()

        # flo order : (x,y) == (1,0)
        nxt_w = tfa.image.dense_image_warp(nxt[None, ...],
                                           -flo[None, ..., ::-1])[0].numpy()
        # nxt_w = tf_warp(nxt[None, ...], flo)[0].numpy()
        print(nxt_w.shape)

        cv2.imshow('prv', prv)
        cv2.imshow('nxt', nxt)
        # cv2.imshow('msk', prv_has_flo.astype(np.float32))
        cv2.imshow('nxt_w', nxt_w)
        cv2.imshow('nxt_w2', nxt_w - prv)

        # bgr, prv=b, nxt=g, r=warp
        overlay = np.stack([(prv).mean(axis=-1), (nxt).mean(axis=-1),
                            (nxt_w).mean(axis=-1)],
                           axis=-1)
        cv2.imshow('overlay', overlay)
        cv2.imshow('flo', normalize(flo[..., 0]))
        cv2.imshow('flo-vis', flo_vis.numpy())
        k = cv2.waitKey(0)
        if k == 27:
            break
示例#4
0
    def _get_test_data(self):
        batch_size = self.batch_size
        data_format = tf.keras.backend.image_data_format()
        val_data = next(get_dataset_from_set().map(preprocess_no_op).batch(
            batch_size).take(1).cache().as_numpy_iterator())

        # Might as well also precompute flow image.
        val_ims, val_flo = val_data
        val_flow_img = flow_to_image(val_flo, data_format=data_format)
        if data_format == 'channels_first':
            # nchw -> nhwc
            val_flow_img = tf.transpose(val_flow_img, (0, 2, 3, 1))
        return val_data, val_flow_img
示例#5
0
def main():
    disable_gpu()
    data_format = 'channels_first'
    tf.keras.backend.set_image_data_format(data_format)
    dataset = setup_input(8, data_format)

    for imgs, flows in dataset:
        idx = np.random.randint(8)

        prv = imgs[idx, :3]
        nxt = imgs[idx, 3:]
        flo = flows[idx]
        flo_rgb = flow_to_image(flo, data_format=data_format)

        show('prv', 0.5 + prv, True, data_format)
        show('nxt', 0.5 + nxt, True, data_format)
        # FLO corresponds to stuff in `prv`
        show('flo', flo_rgb, True, data_format)
        k = cv2.waitKey(0)
        if k in [27, ord('q')]:
            break
示例#6
0
def main(args: Settings):
    data_format = args.data_format
    tf.keras.backend.set_image_data_format(data_format)
    model_file = Path(args.model)

    multi_output = True

    # Define inference-only model.
    model = build_interpolator(
        input_shape=args.input_shape,
        output_multiscale=False)
    load_weights(model, args.model)
    multi_output = False

    logging.info('Done with model load')

    # Extract flow-only model for visualization.
    # NOTE(ycho): We're only extracting forward-directional flow,
    # i.e. flow : prv[i, j] == nxt[i+flo[i,j,1], j+flo[i,j,0]]
    flow_model = tf.keras.Model(
        inputs=model.inputs,
        outputs=model.get_layer('lambda_11').get_output_at(0)
        # print(model.get_layer('lambda_11').get_output_at(1))
    )

    # FIXME(ycho): Ability to select dataset
    # Select dataset.
    if args.dataset == 'ytvos':
        dataset = YoutubeVos(YoutubeVosSettings(data_type='train'))
    elif args.dataset == 'vimeo':
        dataset = VimeoTriplet(VimeoTripletSettings(data_type='train'))
    else:
        raise ValueError('Invalid dataset = {}'.format(args.dataset))
    # TripletDataset -> tf.data.Dataset
    dataset = read_triplet_dataset(dataset, dsize=args.input_shape,
                                   augment=False,
                                   batch_size=1)

    for img0, img1, img2 in dataset:
        img_pair = tf.concat([img0, img2], axis=3)

        # @see pre_train:preprocess()
        if data_format == 'channels_first':
            img_pair = einops.rearrange(img_pair, 'n h w c -> n c h w')
        img_pair -= 0.5

        if True:
            flow = flow_model(img_pair)
            flow_rgb = flow_to_image(flow, data_format=data_format)
            _show('5-flow', flow_rgb[0], data_format)

            # warp 1 -> 0, let's see how it fares.
            if data_format == 'channels_first':
                upflow = 2.0 * einops.repeat(flow,
                                             'n c h w -> n c (h h2) (w w2)',
                                             h2=2, w2=2)
            else:
                upflow = 2.0 * einops.repeat(flow,
                                             'n h w c -> n (h h2) (w w2) c',
                                             h2=2, w2=2)
            if data_format == 'channels_first':
                img1_ = einops.rearrange(img1, 'n h w c -> n c h w')
            else:
                img1_ = img1
            img1w = tf_warp(img1_, upflow, data_format)
            _show('6-warp(==0-prv)', img1w[0], data_format)

        if True:
            pred_img1 = model(img_pair)

            # Take the last (full-res) image in case of multi output.
            # This would be the case if e.g. model.output_multiscale==True.
            if multi_output:
                pred_img1 = pred_img1[-1]

            overlay = 0.5 * img0[0] + 0.5 * img2[0]
            _show('0-prv', img0[0], 'channels_last')
            _show('1-nxt', img2[0], 'channels_last')
            _show('2-ground-truth', img1[0], 'channels_last')
            _show('3-pred', 0.5 + pred_img1[0], data_format=data_format)
            _show('4-overlay', overlay, 'channels_last')

        k = cv2.waitKey(0)
        if k in [27, ord('q')]:
            break
        continue
示例#7
0
def main():
    faulthandler.enable()

    # NOTE(ycho): Mysteriously, tflite segfaults if `channels_first`.
    tf.keras.backend.set_image_data_format('channels_last')

    # my_devices = tf.config.experimental.list_physical_devices(device_type='CPU')
    # tf.config.experimental.set_visible_devices(devices= my_devices, device_type='CPU')
    # disable_gpu()

    # Load the TFLite model and allocate tensors.
    interpreter = tf.lite.Interpreter(model_path="/tmp/qpwcnet.tflite")
    interpreter.allocate_tensors()

    # Get input and output tensors.
    input_details = interpreter.get_input_details()
    print(input_details)
    output_details = interpreter.get_output_details()
    print(output_details)

    # Test the model on random input data.
    input_shape = input_details[0]['shape']
    input_data = np.array(np.random.random_sample(input_shape),
                          dtype=np.float32)
    print(input_data.shape)  # 1, 6, 256, 512

    print('set_tensor')
    interpreter.set_tensor(input_details[0]['index'], input_data)
    print('invoke')
    interpreter.invoke()
    print('?')

    # The function `get_tensor()` returns a copy of the tensor data.
    # Use `tensor()` in order to get a pointer to the tensor.
    output_data = interpreter.get_tensor(output_details[-1]['index'])
    print(output_data.shape)

    def preprocess(ims, flo):
        # 0-255 -> 0.0-1.0
        ims = tf.cast(ims, tf.float32) * tf.constant(1.0 / 255.0,
                                                     dtype=tf.float32)
        # resize, no augmentation.
        ims, flo = image_resize(ims, flo, (256, 512))
        # ims, flo = image_augment(ims, flo, (256, 512))
        # 0.0-1.0 -> -0.5, 0.5
        ims = ims - 0.5

        # Convert to correct data format
        data_format = tf.keras.backend.image_data_format()
        if data_format == 'channels_first':
            ims = einops.rearrange(ims, '... h w c -> ... c h w')
            flo = einops.rearrange(flo, '... h w c -> ... c h w')

        return ims, flo

    if True:
        # TODO(ycho): Cleanup dataset loading pattern for opt-flow datasets.
        glob_pattern = '/media/ssd/datasets/sintel-processed/shards/sintel-*.tfrecord'
        filenames = tf.data.Dataset.list_files(glob_pattern).shuffle(32)
        # dataset = get_reader(filenames).shuffle(buffer_size=1024).repeat().batch(8)
        # dataset = get_reader(filenames).batch(8).repeat()
        dataset = get_reader(filenames).shuffle(
            buffer_size=32).map(preprocess).batch(1)

        for ims, flo in dataset:
            interpreter.set_tensor(input_details[0]['index'],
                                   ims)  # ims.numpy()?
            interpreter.invoke()
            flo_pred = output_data = interpreter.get_tensor(
                output_details[-1]['index'])
            flo_pred_rgb = flow_to_image(flo_pred)

            show('flo_pred_rgb', flo_pred_rgb[0], True)
            cv2.waitKey(0)

            break
示例#8
0
def main(cfg: Settings):
    if cfg.data_format is not None:
        tf.keras.backend.set_image_data_format(cfg.data_format)
    data_format = tf.keras.backend.image_data_format()

    # 1) Build inference-only network
    model = build_flower(False,
                         cfg.input_shape,
                         data_format)

    # 2) Restore model.
    load_weights(model, cfg.model_file)

    if False:
        # from image

        # x = np.random.uniform(0, 255, size=(1, 256, 512, 6)).astype(np.uint8)
        lhs = cv2.imread(
            '/media/ssd/datasets/MPI-Sintel-complete/test/final/ambush_3/frame_0014.png')
        rhs = cv2.imread(
            '/media/ssd/datasets/MPI-Sintel-complete/test/final/ambush_3/frame_0015.png')
        lhs = cv2.resize(lhs, (512, 256))
        rhs = cv2.resize(rhs, (512, 256))
        x = np.concatenate([lhs, rhs], axis=-1)[None, ...]
        # FIXME(yycho0108): the series of above operations replicate
        # preprocess() data whitening procedure.
        y = model(x / 255.0 - 0.5).numpy()
        rhs_w = tf_warp(rhs[None, ...].astype(np.float32) / 255.0,
                        y)[0].numpy()

        cv2.imshow('lhs', lhs)
        cv2.imshow('rhs', rhs)
        cv2.imshow('overlay', rhs // 2 + lhs // 2)
        cv2.imshow('overlay-w', rhs_w / 2 + lhs / 255.0 / 2)
        cv2.imshow('flow-x', normalize(y[0, ..., 0]))
        cv2.imshow('flow-y', normalize(y[0, ..., 1]))
        cv2.imshow('rhs-w', rhs_w)
        cv2.waitKey(0)

    if True:
        # from tfrecord
        glob_pattern = '/media/ssd/datasets/sintel-processed/shards/sintel-*.tfrecord'
        filenames = tf.data.Dataset.list_files(glob_pattern).shuffle(32)
        # dataset = get_reader(filenames).shuffle(buffer_size=1024).repeat().batch(8)
        # dataset = get_reader(filenames).batch(8).repeat()
        dataset = get_reader(filenames).shuffle(
            buffer_size=32).map(preprocess).batch(1)

        for ims, flo in dataset:
            flo_pred = model.predict(ims)

            # Unstack `ims`.
            if data_format == 'channels_first':
                prv, nxt = einops.rearrange(
                    ims, 'n (k c) h w -> k n c h w', k=2)
            else:
                prv, nxt = einops.rearrange(
                    ims, 'n h w (k c) -> k n c h w', k=2)

            # NOTE(ycho): Maintain consistent `data_format` for sanity
            # preserving. Slightly inefficient but oh well...
            #if data_format == 'channels_first':
            #    nxt_nhwc = einops.rearrange(nxt, 'n c h w -> n h w c')
            #    flo_pred_nhwc = einops.rearrange(
            #        flo_pred, 'n c h w -> n h w c')
            #    nxt_w = tf_warp(nxt_nhwc, flo_pred_nhwc, data_format)
            #    nxt_w = einops.rearrange(nxt_w, 'n h w c -> n c h w')

            #    nxt_w_gt = tf_warp(nxt, flo)
            #else:
            nxt_w = tf_warp(nxt, flo_pred, data_format)
            nxt_w_gt = tf_warp(nxt, flo, data_format)

            # Undo `preprocess()`
            prv = 0.5 + prv
            nxt = 0.5 + nxt
            nxt_w = 0.5 + nxt_w
            nxt_w_gt = 0.5 + nxt_w_gt
            flo_pred = flo_pred

            # Apply colorization.
            flo_rgb = flow_to_image(flo, data_format)
            flo_pred_rgb = flow_to_image(flo_pred, data_format)

            # Compute derived visualizations.
            overlay = 0.5 * prv + 0.5 * nxt
            overlay_warped = 0.5 * prv + 0.5 * nxt_w
            delta_warped = tf.abs(0.5 * prv - 0.5 * nxt_w)
            overlay_warped_gt = 0.5 * prv + 0.5 * nxt_w_gt
            delta_warped_gt = tf.abs(0.5 * prv - 0.5 * nxt_w_gt)

            # Show all.
            for name in ['prv', 'nxt', 'nxt_w', 'overlay',
                         'overlay_warped', 'overlay_warped_gt',
                         'delta_warped', 'delta_warped_gt',
                         'flo_rgb', 'flo_pred_rgb']:
                image = locals()[name]
                # NOTE(ycho): unbatch before showing.
                show(name, image[0], True, data_format)

            k = cv2.waitKey(0)
            if k == 27:
                break
示例#9
0
def train_custom(model, losses, dataset, path, config):
    """
    Custom training loop.
    """

    # Unroll config.
    (batch_size, num_epoch, update_freq, data_format, allow_memory_growth,
     use_custom_training) = config

    # Setup metrics.
    metrics = {}
    metrics['loss'] = tf.keras.metrics.Mean(name='loss', dtype=tf.float32)
    # metrics['epe'] = tf.keras.metrics.Mean(name='epe', dtype=tf.float32)
    for out in model.outputs:
        if data_format == 'channels_first':
            h = out.shape[2]
        else:
            h = out.shape[1]
        name = 'flow-loss-{:02d}'.format(h)
        metrics[name] = tf.keras.metrics.Mean(name=name, dtype=tf.float32)

    # Retrieve validation dataset (only used for visualization for now) ...
    val_data = next(get_dataset_from_set().map(preprocess_no_op).batch(
        batch_size).take(1).cache().as_numpy_iterator())

    # Setup handlers for training/logging.
    # lr = learning_rate_cyclic(batch_size)
    lr = 1e-4  # learning_rate_cyclic(batch_size)
    optim = tf.keras.optimizers.Adam(learning_rate=lr)
    writer = tf.summary.create_file_writer(str(path['log']))
    ckpt = tf.train.Checkpoint(optimizer=optim, model=model)
    ckpt_mgr = tf.train.CheckpointManager(ckpt,
                                          str(path['ckpt']),
                                          max_to_keep=8)

    # Load from checkpoint.
    ckpt.restore(tf.train.latest_checkpoint('/tmp/pwc/run/044/ckpt/'))

    # Iterate through train loop.
    for epoch in range(num_epoch):
        print('Epoch {:03d}/{:03d}'.format(epoch, num_epoch))
        # prepare epoch.
        for v in metrics.values():
            v.reset_states()

        # train epoch.
        for ims, flo in dataset:
            # Skip invalid inputs (unlikely but happens sometimes)
            if not (tf.reduce_all(tf.math.is_finite(ims))
                    and tf.reduce_all(tf.math.is_finite(flo))):
                continue

            opt_iter, flow_loss, step_loss = train_step(
                model, losses, optim, ims, flo)

            # update metrics.
            metrics['loss'].update_state(step_loss)
            for out, l in zip(model.outputs, flow_loss):
                if data_format == 'channels_first':
                    h = out.shape[2]
                else:
                    h = out.shape[1]
                name = 'flow-loss-{:02d}'.format(h)
                metrics[name].update_state(l)

            # log/save.
            if (opt_iter > 0) and ((opt_iter % update_freq) == 0):
                # compute flows and output image.
                val_ims, val_flo = val_data

                # First add ground truth flow ...
                val_flow_img = flow_to_image(val_flo, data_format=data_format)
                if data_format == 'channels_first':
                    # nchw -> nhwc
                    val_flow_img = tf.transpose(val_flow_img, (0, 2, 3, 1))
                flow_imgs = [val_flow_img]

                flows = model(val_ims, training=False)
                for flow in flows:
                    flow_img = flow_to_image(flow, data_format=data_format)
                    if data_format == 'channels_first':
                        # nchw -> nhwc
                        flow_img = tf.transpose(flow_img, (0, 2, 3, 1))

                    # NOTE(yycho0108):
                    # interpolate nearest (tensorboard visualization applies
                    # bilinear interpolation by default).
                    flow_img = tf.image.resize(
                        flow_img,
                        size=val_flow_img.shape[1:3],
                        method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
                    flow_imgs.append(flow_img)

                with writer.as_default():
                    tf.summary.scalar('iter', opt_iter, step=opt_iter)
                    tf.summary.scalar('learning_rate', lr, step=opt_iter)
                    # tf.summary.scalar('learning_rate', lr(
                    #    tf.cast(opt_iter, tf.float32)), step=opt_iter)
                    for k, v in metrics.items():
                        tf.summary.scalar(k, v.result(), step=opt_iter)
                    # will this work?
                    for i, flow_img in enumerate(flow_imgs):
                        name = 'flow-{:02d}'.format(i)
                        tf.summary.image(name,
                                         flow_img,
                                         step=opt_iter,
                                         max_outputs=3)
        ckpt_mgr.save(epoch)
    model.save_weights(str(path['run'] / 'model.h5'))