def main(): config = Config.get_instance() cfg = config['colorization']['train'] device_info = Devices.get_devices(gpu_ids=cfg['gpus']) tf.logging.info('\nargs: %s\nconfig: %s\ndevice info: %s', args, config, device_info) # load centroids from results of clustering with open(cfg['centroids'], 'rb') as centroids_file: centroids = np.load(centroids_file) num_colors = centroids.shape[0] input_functions = { 'train': get_input_fn('train', centroids, cfg['batch_size'], num_refs=cfg['reference_frames_count'], num_process=cfg['num_process']), 'eval': get_input_fn('test', centroids, cfg['batch_size'], num_refs=cfg['reference_frames_count'], num_process=max(1, cfg['num_process'] // 4)) } hparams = config['colorization']['hparams'] hparams['optimizer'] = tf.train.AdamOptimizer( learning_rate=cfg['learning_rate']) hparams = tf.contrib.training.HParams(**hparams) config.clear() # configure ResNet colorizer model model_fn = model.Colorizer.get('resnet', model.ResNetColorizer, log_steps=1, num_refs=cfg['reference_frames_count'], num_colors=num_colors, predict_direction=cfg['direction']) tf_config = tf.estimator.RunConfig(model_dir=cfg['model_dir'], keep_checkpoint_max=100, save_checkpoints_secs=None, save_checkpoints_steps=1000, save_summary_steps=10, session_config=None) estimator = tf.estimator.Estimator(model_fn=model_fn, config=tf_config, params=hparams) for _ in range(cfg['epoch']): estimator.train(input_fn=input_functions['train'], steps=1000) estimator.evaluate(input_fn=input_functions['eval'], steps=50)
def dataflow(name='davis', scale=1): """Compute graph to retrieve index, grayscale index, annotation.""" cfg = Config.get_instance() # get test index one at a time if name == 'davis': data_dirpath = cfg['data_dir']['davis'] data = Davis(data_dirpath, num_frames=1, shuffle=False) elif name == 'kinetics': data_dirpath = cfg['data_dir']['kinetics'] data = Kinetics(data_dirpath, num_frames=1, skips=[0], shuffle=False) else: raise Exception('Dataset [%s] not supported.' % name) # repeat Kinetics index since Davis has image and annotated frames if name != 'davis': data = df.MapData(data, lambda dp: [dp[0], dp[1], dp[1]]) data = df.MapData(data, lambda dp: [dp[0], dp[1], dp[2]]) length = 256 * scale size = (length, length) # resize frames to 256x256 data = df.MapDataComponent(data, ImageProcessor.resize(small_axis=length), index=1) data = df.MapDataComponent(data, lambda images: cv2.resize(images[0], size), index=2) # get index, original index, gray scale index, annotation mask data = df.MapData( data, lambda dp: [ dp[0], dp[1][0], cv2.cvtColor(cv2.resize(dp[1][0], size), cv2.COLOR_BGR2GRAY). reshape((length, length, 1)), dp[2], ]) data = df.MultiProcessPrefetchData(data, nr_prefetch=32, nr_proc=1) return data
def main(args): cfg = Config(args.config) if args.config else Config() device_info = Devices.get_devices(gpu_ids=args.gpus) tf.logging.info('\nargs: %s\nconfig: %s\ndevice info: %s', args, cfg, device_info) scale = args.scale image_len, label_len = 256 * scale, 32 * scale data = dataflow(args.name, scale) data.reset_state() num_inputs = args.num_ref_frames + 1 # WHY? TODO placeholders = { 'features': tf.placeholder(tf.int32, (None, num_inputs, image_len, image_len, 1), 'features'), 'labels': tf.placeholder(tf.int32, (None, num_inputs, label_len, label_len, 1), 'labels'), } hparams = Config.get_instance()['hparams'] hparams['optimizer'] = tf.train.AdamOptimizer() hparams = tf.contrib.training.HParams(**hparams) estimator_spec = model.Colorizer.get( 'resnet', model.ResNetColorizer, num_ref_frames=args.num_ref_frames, predict_direction=args.direction)( features=placeholders['features'], labels=placeholders['labels'], mode=tf.estimator.ModeKeys.PREDICT, params=hparams, ) session = tf.Session() saver = tf.train.Saver(tf.global_variables()) saver.restore(session, args.checkpoint) # TODO change zeros dummy_labels = np.zeros((1, num_inputs, label_len, label_len, 1), dtype=np.int32) num_images, video_index = 0, -1 start_time = time.time() # TODO replace with SplitTimer for idx, image, gray, color in data.get_data(): curr = {'image': image, 'gray': gray, 'color': color} num_images += 1 if idx == 0: tf.logging.info('Avg elapsed time per image: %.3f seconds', (time.time() - start_time) / num_images) start_time = time.time() num_images = 0 video_index += 1 dummy_features = [ np.zeros((image_len, image_len, 1), dtype=np.int32) for _ in range(num_inputs) ] dummy_references = [ np.zeros((image_len, image_len, 3), dtype=np.int32) for _ in range(args.num_ref_frames) ] prev = copy.deepcopy(curr) dummy_features = dummy_features[1:] + [prev['gray']] tf.logging.info('Video index: %04d', video_index) # revise grayscale features and references if idx <= args.num_ref_frames: dummy_features = dummy_features[1:] + [curr['gray']] dummy_references = dummy_references[1:] + [curr['color']] features = np.expand_dims(np.stack(dummy_features[1:] + [curr['gray']], axis=0), axis=0) predictions = session.run(estimator_spec.predictions, feed_dict={ placeholders['features']: features, placeholders['labels']: dummy_labels, }) # predict color matrix_size = label_len**2 indices = np.argmax(predictions['similarity'], axis=-1).reshape((-1, )) mapping = np.zeros((matrix_size, 2)) for i, index in enumerate(indices): f = (index // matrix_size) % args.num_ref_frames y = index // label_len x = index % label_len mapping[i, :] = [x, (args.num_ref_frames - f - 1) * label_len + y] mapping = np.array(mapping, dtype=np.float32).reshape( (label_len, label_len, 2)) height, width = mapping.shape[:2] reference_colors = np.concatenate(dummy_references, axis=0) predicted = cv2.remap( cv2.resize(reference_colors, (width, height * args.num_ref_frames)), mapping, None, cv2.INTER_LINEAR) predicted = cv2.resize(predicted, (image_len, image_len)) # curr['color'] = np.copy(predicted) height, width = image.shape[:2] predicted = cv2.resize(predicted, (width, height)) prev = copy.deepcopy(curr) if args.name == 'davis': _, mask = cv2.threshold( cv2.cvtColor(predicted, cv2.COLOR_BGR2GRAY), 10, 255, cv2.THRESH_BINARY) mask_inv = cv2.bitwise_not(mask) predicted = cv2.add(cv2.bitwise_and(image, image, mask=mask_inv), predicted) predicted = cv2.addWeighted(image, 0.3, predicted, 0.7, 0) stacked = np.concatenate([image, predicted], axis=1) similarity = (np.copy(predictions['similarity']).reshape( (label_len**2 * args.num_ref_frames, -1)) * 255.0).astype(np.int32) kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (scale, scale)) similarity = cv2.resize( cv2.dilate(similarity, kernel), (label_len * 2 * args.num_ref_frames, label_len * 2)) output_dir = '%s/%04d' % (args.output, video_index) for name, result in [('image', stacked), ('similarity', similarity)]: folder = os.path.join(output_dir, name) if not os.path.exists(folder): os.makedirs(folder) cv2.imwrite('%s/%04d.jpg' % (folder, idx), result)
def dataflow(centroids, num_refs=3, num_process=16, shuffle=False): """ Compute graph to retrieve 3 reference and 1 target frames from Kinetics. Downsample grayscale frames to 256x256 and colorized frames to 32x32 feature maps in Lab colorspace. Cluster colors in colorized frames. Returned tensors are of shape (num_refs + 1, 256, 256, 1) and (num_refs + 1, 32, 32, 1) each. Instead of colorized output, cluster centroid index is returned. :return: (grayscale input, cluster indices for colorized output) """ config = Config.get_instance() kinetics_dirpath = config['data_dir']['kinetics'] # get frame and 3 prior reference frames with certain number of skips data = Kinetics(kinetics_dirpath, num_frames=num_refs + 1, skips=[0, 4, 4, 8][:num_refs + 1], shuffle=shuffle) # downsample frames to 256x256 data = df.MapDataComponent(data, ImageProcessor.resize(small_axis=256), index=1) data = df.MapDataComponent(data, ImageProcessor.crop(shape=(256, 256)), index=1) # data = df.MapDataComponent( # data, lambda images: [cv2.resize(image, (256, 256)) for image in images], index=1) # split frames into 3 references and 1 target frame # create deep copies of each at odd indices data = df.MapData( data, lambda dp: [ dp[1][:num_refs], copy.deepcopy(dp[1][:num_refs]), dp[1][num_refs:], copy.deepcopy(dp[1][num_refs:]) ]) # decolorize first set of reference and target frames as (256, 256, 1) for idx in [0, 2]: data = df.MapDataComponent( data, lambda images: [ np.int32(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)).reshape( 256, 256, 1) for image in images ], index=idx) for idx in [1, 3]: # downsample to 32x32 feature map data = df.MapDataComponent( data, lambda images: [cv2.resize(image, (32, 32)) for image in images], index=idx) # discard grayscale L space, keep only 'ab' from Lab color space # scale from 0-255 to 0-1 for clustering in next step data = df.MapDataComponent( data, lambda images: [ cv2.cvtColor(np.float32(image / 255.0), cv2.COLOR_BGR2Lab) [:, :, 1:] for image in images ], index=idx) # find nearest color cluster index for every pixel in ref and target data = df.MapDataComponent( data, lambda images: [get_cluster_labels(image, centroids) for image in images], index=idx) # combine ref and target frames into (num_refs + 1, dim, dim, 1) tensor # for both grayscale and colorized feature maps respectively # generates [input tensor, output tensor] data = df.MapData( data, lambda dp: [np.stack(dp[0] + dp[2], axis=0), np.stack(dp[1] + dp[3], axis=0)]) # important for tensorflow.data.dataset # does not do what it is supposed to do data = df.MapData(data, tuple) # prefetch 256 datapoints data = df.MultiProcessPrefetchData(data, nr_prefetch=256, nr_proc=num_process) data = df.PrefetchDataZMQ(data, nr_proc=1) return data