示例#1
0
def main():
    config = Config.get_instance()
    cfg = config['colorization']['train']
    device_info = Devices.get_devices(gpu_ids=cfg['gpus'])
    tf.logging.info('\nargs: %s\nconfig: %s\ndevice info: %s', args, config,
                    device_info)

    # load centroids from results of clustering
    with open(cfg['centroids'], 'rb') as centroids_file:
        centroids = np.load(centroids_file)
    num_colors = centroids.shape[0]

    input_functions = {
        'train':
        get_input_fn('train',
                     centroids,
                     cfg['batch_size'],
                     num_refs=cfg['reference_frames_count'],
                     num_process=cfg['num_process']),
        'eval':
        get_input_fn('test',
                     centroids,
                     cfg['batch_size'],
                     num_refs=cfg['reference_frames_count'],
                     num_process=max(1, cfg['num_process'] // 4))
    }

    hparams = config['colorization']['hparams']
    hparams['optimizer'] = tf.train.AdamOptimizer(
        learning_rate=cfg['learning_rate'])
    hparams = tf.contrib.training.HParams(**hparams)

    config.clear()

    # configure ResNet colorizer model
    model_fn = model.Colorizer.get('resnet',
                                   model.ResNetColorizer,
                                   log_steps=1,
                                   num_refs=cfg['reference_frames_count'],
                                   num_colors=num_colors,
                                   predict_direction=cfg['direction'])

    tf_config = tf.estimator.RunConfig(model_dir=cfg['model_dir'],
                                       keep_checkpoint_max=100,
                                       save_checkpoints_secs=None,
                                       save_checkpoints_steps=1000,
                                       save_summary_steps=10,
                                       session_config=None)

    estimator = tf.estimator.Estimator(model_fn=model_fn,
                                       config=tf_config,
                                       params=hparams)

    for _ in range(cfg['epoch']):
        estimator.train(input_fn=input_functions['train'], steps=1000)
        estimator.evaluate(input_fn=input_functions['eval'], steps=50)
示例#2
0
def dataflow(name='davis', scale=1):
    """Compute graph to retrieve index, grayscale index, annotation."""
    cfg = Config.get_instance()

    # get test index one at a time
    if name == 'davis':
        data_dirpath = cfg['data_dir']['davis']
        data = Davis(data_dirpath, num_frames=1, shuffle=False)
    elif name == 'kinetics':
        data_dirpath = cfg['data_dir']['kinetics']
        data = Kinetics(data_dirpath, num_frames=1, skips=[0], shuffle=False)
    else:
        raise Exception('Dataset [%s] not supported.' % name)

    # repeat Kinetics index since Davis has image and annotated frames
    if name != 'davis':
        data = df.MapData(data, lambda dp: [dp[0], dp[1], dp[1]])

    data = df.MapData(data, lambda dp: [dp[0], dp[1], dp[2]])
    length = 256 * scale
    size = (length, length)

    # resize frames to 256x256
    data = df.MapDataComponent(data,
                               ImageProcessor.resize(small_axis=length),
                               index=1)
    data = df.MapDataComponent(data,
                               lambda images: cv2.resize(images[0], size),
                               index=2)

    # get index, original index, gray scale index, annotation mask
    data = df.MapData(
        data, lambda dp: [
            dp[0],
            dp[1][0],
            cv2.cvtColor(cv2.resize(dp[1][0], size), cv2.COLOR_BGR2GRAY).
            reshape((length, length, 1)),
            dp[2],
        ])
    data = df.MultiProcessPrefetchData(data, nr_prefetch=32, nr_proc=1)
    return data
    parser.add_argument('-k', '--num-clusters', type=int, default=16)
    parser.add_argument('-n', '--num-samples', type=int, default=50000)
    parser.add_argument('--name', type=str, default='kinetics')
    parser.add_argument('-l', '--log', type=str, default='')
    parser.add_argument('--debug', action='store_true')
    args = parser.parse_args()

    # func with o k n name log(default)
    # func to get data flow
    # func for mp4 / gif?
    # func to get frame for one video?
    # func to get clustered results?

    # function to call script, log level default warning for function, info for script

    cfg = Config()
    kinetics_dirpath = cfg['data_dir']['kinetics']

    # configure logger
    log_format = '[%(asctime)s %(levelname)s] %(message)s'
    level = logging.DEBUG if args.debug else logging.INFO
    if not args.log:
        logging.basicConfig(level=level, format=log_format, stream=sys.stderr)
    else:
        logging.basicConfig(level=level, format=log_format, filename=args.log)
    logging.info('args: %s', args)

    if args.name == 'kinetics':
        # get every frame of
        ds = Kinetics(kinetics_dirpath, num_frames=1, skips=[0], shuffle=False)
示例#4
0
def main(args):
    cfg = Config(args.config) if args.config else Config()
    device_info = Devices.get_devices(gpu_ids=args.gpus)
    tf.logging.info('\nargs: %s\nconfig: %s\ndevice info: %s', args, cfg,
                    device_info)

    scale = args.scale
    image_len, label_len = 256 * scale, 32 * scale
    data = dataflow(args.name, scale)
    data.reset_state()

    num_inputs = args.num_ref_frames + 1  # WHY? TODO
    placeholders = {
        'features':
        tf.placeholder(tf.int32, (None, num_inputs, image_len, image_len, 1),
                       'features'),
        'labels':
        tf.placeholder(tf.int32, (None, num_inputs, label_len, label_len, 1),
                       'labels'),
    }
    hparams = Config.get_instance()['hparams']
    hparams['optimizer'] = tf.train.AdamOptimizer()
    hparams = tf.contrib.training.HParams(**hparams)

    estimator_spec = model.Colorizer.get(
        'resnet',
        model.ResNetColorizer,
        num_ref_frames=args.num_ref_frames,
        predict_direction=args.direction)(
            features=placeholders['features'],
            labels=placeholders['labels'],
            mode=tf.estimator.ModeKeys.PREDICT,
            params=hparams,
        )

    session = tf.Session()
    saver = tf.train.Saver(tf.global_variables())
    saver.restore(session, args.checkpoint)

    # TODO change zeros
    dummy_labels = np.zeros((1, num_inputs, label_len, label_len, 1),
                            dtype=np.int32)

    num_images, video_index = 0, -1
    start_time = time.time()  # TODO replace with SplitTimer

    for idx, image, gray, color in data.get_data():
        curr = {'image': image, 'gray': gray, 'color': color}
        num_images += 1

        if idx == 0:
            tf.logging.info('Avg elapsed time per image: %.3f seconds',
                            (time.time() - start_time) / num_images)
            start_time = time.time()
            num_images = 0
            video_index += 1
            dummy_features = [
                np.zeros((image_len, image_len, 1), dtype=np.int32)
                for _ in range(num_inputs)
            ]
            dummy_references = [
                np.zeros((image_len, image_len, 3), dtype=np.int32)
                for _ in range(args.num_ref_frames)
            ]

            prev = copy.deepcopy(curr)
            dummy_features = dummy_features[1:] + [prev['gray']]
            tf.logging.info('Video index: %04d', video_index)

        # revise grayscale features and references
        if idx <= args.num_ref_frames:
            dummy_features = dummy_features[1:] + [curr['gray']]
            dummy_references = dummy_references[1:] + [curr['color']]

        features = np.expand_dims(np.stack(dummy_features[1:] + [curr['gray']],
                                           axis=0),
                                  axis=0)
        predictions = session.run(estimator_spec.predictions,
                                  feed_dict={
                                      placeholders['features']: features,
                                      placeholders['labels']: dummy_labels,
                                  })

        # predict color
        matrix_size = label_len**2
        indices = np.argmax(predictions['similarity'], axis=-1).reshape((-1, ))
        mapping = np.zeros((matrix_size, 2))
        for i, index in enumerate(indices):
            f = (index // matrix_size) % args.num_ref_frames
            y = index // label_len
            x = index % label_len
            mapping[i, :] = [x, (args.num_ref_frames - f - 1) * label_len + y]

        mapping = np.array(mapping, dtype=np.float32).reshape(
            (label_len, label_len, 2))

        height, width = mapping.shape[:2]
        reference_colors = np.concatenate(dummy_references, axis=0)

        predicted = cv2.remap(
            cv2.resize(reference_colors,
                       (width, height * args.num_ref_frames)), mapping, None,
            cv2.INTER_LINEAR)

        predicted = cv2.resize(predicted, (image_len, image_len))
        # curr['color'] = np.copy(predicted)

        height, width = image.shape[:2]
        predicted = cv2.resize(predicted, (width, height))
        prev = copy.deepcopy(curr)

        if args.name == 'davis':
            _, mask = cv2.threshold(
                cv2.cvtColor(predicted, cv2.COLOR_BGR2GRAY), 10, 255,
                cv2.THRESH_BINARY)
            mask_inv = cv2.bitwise_not(mask)

            predicted = cv2.add(cv2.bitwise_and(image, image, mask=mask_inv),
                                predicted)
            predicted = cv2.addWeighted(image, 0.3, predicted, 0.7, 0)

        stacked = np.concatenate([image, predicted], axis=1)
        similarity = (np.copy(predictions['similarity']).reshape(
            (label_len**2 * args.num_ref_frames, -1)) * 255.0).astype(np.int32)
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (scale, scale))
        similarity = cv2.resize(
            cv2.dilate(similarity, kernel),
            (label_len * 2 * args.num_ref_frames, label_len * 2))
        output_dir = '%s/%04d' % (args.output, video_index)

        for name, result in [('image', stacked), ('similarity', similarity)]:
            folder = os.path.join(output_dir, name)
            if not os.path.exists(folder):
                os.makedirs(folder)
            cv2.imwrite('%s/%04d.jpg' % (folder, idx), result)
示例#5
0
def dataflow(centroids, num_refs=3, num_process=16, shuffle=False):
    """
    Compute graph to retrieve 3 reference and 1 target frames from Kinetics.

    Downsample grayscale frames to 256x256 and colorized frames to 32x32
    feature maps in Lab colorspace. Cluster colors in colorized frames.

    Returned tensors are of shape (num_refs + 1, 256, 256, 1)
    and (num_refs + 1, 32, 32, 1) each. Instead of colorized output,
    cluster centroid index is returned.

    :return: (grayscale input, cluster indices for colorized output)
    """
    config = Config.get_instance()
    kinetics_dirpath = config['data_dir']['kinetics']

    # get frame and 3 prior reference frames with certain number of skips
    data = Kinetics(kinetics_dirpath,
                    num_frames=num_refs + 1,
                    skips=[0, 4, 4, 8][:num_refs + 1],
                    shuffle=shuffle)

    # downsample frames to 256x256
    data = df.MapDataComponent(data,
                               ImageProcessor.resize(small_axis=256),
                               index=1)
    data = df.MapDataComponent(data,
                               ImageProcessor.crop(shape=(256, 256)),
                               index=1)
    # data = df.MapDataComponent(
    #    data, lambda images: [cv2.resize(image, (256, 256)) for image in images], index=1)

    # split frames into 3 references and 1 target frame
    # create deep copies of each at odd indices
    data = df.MapData(
        data, lambda dp: [
            dp[1][:num_refs],
            copy.deepcopy(dp[1][:num_refs]), dp[1][num_refs:],
            copy.deepcopy(dp[1][num_refs:])
        ])

    # decolorize first set of reference and target frames as (256, 256, 1)
    for idx in [0, 2]:
        data = df.MapDataComponent(
            data,
            lambda images: [
                np.int32(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)).reshape(
                    256, 256, 1) for image in images
            ],
            index=idx)

    for idx in [1, 3]:
        # downsample to 32x32 feature map
        data = df.MapDataComponent(
            data,
            lambda images: [cv2.resize(image, (32, 32)) for image in images],
            index=idx)

        # discard grayscale L space, keep only 'ab' from Lab color space
        # scale from 0-255 to 0-1 for clustering in next step
        data = df.MapDataComponent(
            data,
            lambda images: [
                cv2.cvtColor(np.float32(image / 255.0), cv2.COLOR_BGR2Lab)
                [:, :, 1:] for image in images
            ],
            index=idx)

        # find nearest color cluster index for every pixel in ref and target
        data = df.MapDataComponent(
            data,
            lambda images:
            [get_cluster_labels(image, centroids) for image in images],
            index=idx)

    # combine ref and target frames into (num_refs + 1, dim, dim, 1) tensor
    # for both grayscale and colorized feature maps respectively
    # generates [input tensor, output tensor]
    data = df.MapData(
        data, lambda dp:
        [np.stack(dp[0] + dp[2], axis=0),
         np.stack(dp[1] + dp[3], axis=0)])

    # important for tensorflow.data.dataset
    # does not do what it is supposed to do
    data = df.MapData(data, tuple)

    # prefetch 256 datapoints
    data = df.MultiProcessPrefetchData(data,
                                       nr_prefetch=256,
                                       nr_proc=num_process)
    data = df.PrefetchDataZMQ(data, nr_proc=1)

    return data
示例#6
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--num-process', type=int, default=16)
    parser.add_argument('-g', '--gpus', type=int, nargs='*', default=[0])
    parser.add_argument('-d', '--model-dir', type=str, default=None)
    parser.add_argument('--centroids', type=str, default=None)
    parser.add_argument('-f',
                        '--reference-frames-count',
                        type=int,
                        default=None)
    parser.add_argument('--direction',
                        type=str,
                        default=None,
                        help='[forward|backward]')
    parser.add_argument('--learning_rate', type=float, default=None)
    parser.add_argument('-e', '--epoch', type=int, default=None)
    parser.add_argument('--config', type=str, default=None)
    args = parser.parse_args()

    # update config from cli args
    config = Config(args.config) if args.config else Config()
    config['colorization']['train'].update(
        {key: val
         for key, val in vars(args).items() if val is not None})

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '5'
    tf.logging.set_verbosity(tf.logging.INFO)

    main()
示例#7
0
def main(args):
    logging.info('args: %s', args)

    # set default kinetics dir path from config.yaml
    if args.dir is None:
        cfg = Config()
        args.dir = cfg['data_dir']['kinetics']

    kinetics_filename = os.path.join(args.dir, 'kinetics_train.json')
    if not os.path.exists(kinetics_filename):
        raise Exception('File does not exist: "%s\"' % kinetics_filename)

    # create video folders if not exists
    for foldername in ['original', 'processed']:
        if not os.path.exists(os.path.join(args.dir, foldername)):
            os.mkdir(os.path.join(args.dir, foldername))

    kinetics = json.load(open(kinetics_filename))
    keys = sorted(kinetics.keys())

    # download and/or process videos
    if not args.process:
        for i, key in enumerate(keys):
            value = kinetics[key]
            original_path = os.path.join(args.dir, 'original', key + '.mp4')
            if os.path.exists(original_path):
                logging.info('[%04d/%04d] file already exists for "%s"',
                             i, len(kinetics), key)
                continue
            try:
                logging.info('[%04d/%04d] downloading video "%s"',
                             i, len(kinetics), key)

                # download YouTube video
                command = [
                    'youtube-dl', '--quiet', '--no-warnings', '-f', 'mp4',
                    '-o', '"%s"' % original_path, '"%s"' % value['url'], '&',
                ]
                logging.info(' '.join(command))
                os.system(' '.join(command))
                time.sleep(0.5)
            except Exception as e:
                logging.error('[%04d/%04d] download failed for video "%s"',
                              i, len(kinetics), key)
                logging.error('%s: %s', type(e), str(e))

    else:
        for i, key in enumerate(keys):
            value = kinetics[key]
            original_path = os.path.join(args.dir, 'original', key + '.mp4')
            processed_path = os.path.join(args.dir, 'processed', key + '.mp4')
            if not os.path.exists(original_path):
                logging.info('[%04d/%04d] original file does not exist "%s"',
                             i, len(kinetics), key)
                continue
            if os.path.exists(processed_path):
                logging.info('[%04d/%04d] processed file already exists "%s"',
                             i, len(kinetics), key)
                continue
            try:
                logging.info('[%04d/%04d] processing video "%s"',
                             i, len(kinetics), key)

                # process video
                command = [
                    'ffmpeg', '-loglevel panic',
                    '-i', '"%s"' % original_path,
                    '-t', '%f' % value['duration'],
                    '-ss', '%f' % value['annotations']['segment'][0],
                    '-strict', '-2',
                    '"%s"' % processed_path,
                    '&'
                ]
                logging.info(' '.join(command))
                os.system(' '.join(command))
                time.sleep(1.5)
            except Exception as e:
                logging.error('[%04d/%04d] processing failed for video "%s"',
                              i, len(kinetics), key)
                logging.error('%s: %s', type(e), str(e))