def fast_rcnn_loss(class_outputs, box_outputs, class_targets, box_targets, params): """Computes the box and class loss (Fast-RCNN branch) of Mask-RCNN. This function implements the classification and box regression loss of the Fast-RCNN branch in Mask-RCNN. As the `box_outputs` produces `num_classes` boxes for each RoI, the reference model expands `box_targets` to match the shape of `box_outputs` and selects only the target that the RoI has a maximum overlap. (Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py) # pylint: disable=line-too-long Instead, this function selects the `box_outputs` by the `class_targets` so that it doesn't expand `box_targets`. The loss computation has two parts: (1) classification loss is softmax on all RoIs. (2) box loss is smooth L1-loss on only positive samples of RoIs. Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/modeling/fast_rcnn_heads.py # pylint: disable=line-too-long Args: class_outputs: a float tensor representing the class prediction for each box with a shape of [batch_size, num_boxes, num_classes]. box_outputs: a float tensor representing the box prediction for each box with a shape of [batch_size, num_boxes, num_classes * 4]. class_targets: a float tensor representing the class label for each box with a shape of [batch_size, num_boxes]. box_targets: a float tensor representing the box label for each box with a shape of [batch_size, num_boxes, 4]. params: the dictionary including training parameters specified in default_haprams function in this file. Returns: total_loss: a float tensor representing total loss reducing from class and box losses from all levels. cls_loss: a float tensor representing total class loss. box_loss: a float tensor representing total box regression loss. """ with tf.name_scope('fast_rcnn_loss'): class_targets = tf.to_int32(class_targets) class_targets_one_hot = tf.one_hot(class_targets, params['num_classes']) class_loss = _fast_rcnn_class_loss( class_outputs, class_targets_one_hot) # Selects the box from `box_outputs` based on `class_targets`, with which # the box has the maximum overlap. batch_size, num_rois, _ = box_outputs.get_shape().as_list() box_outputs = tf.reshape(box_outputs, [batch_size, num_rois, params['num_classes'], 4]) box_indices = tf.reshape( class_targets + tf.tile( tf.expand_dims( tf.range(batch_size) * num_rois * params['num_classes'], 1), [1, num_rois]) + tf.tile( tf.expand_dims(tf.range(num_rois) * params['num_classes'], 0), [batch_size, 1]), [-1]) box_outputs = tf.matmul( tf.one_hot( box_indices, batch_size * num_rois * params['num_classes'], dtype=box_outputs.dtype), tf.reshape(box_outputs, [-1, 4])) box_outputs = tf.reshape(box_outputs, [batch_size, -1, 4]) box_loss = (params['fast_rcnn_box_loss_weight'] * _fast_rcnn_box_loss(box_outputs, box_targets, class_targets)) total_loss = class_loss + box_loss return total_loss, class_loss, box_loss
def GetTargetSpec( name, num_dims=100, t_dof=1.0, regression_dataset="covertype", regression_num_points=0, regression_normalize=False, regression_hier_type="none", # none, centered, non_centered regression_beta_prior="normal", # normal, student_t regression_type="regular", # regular, gamma_scales regression_use_beta_scales=True, eig_source="linear", batch_size=0, regression_stochastic_points=0, gamma_shape=0.5, precomputed_stats_path=None, **kwargs): if name == "funnel": spec = TargetSpec(name=name, num_dims=num_dims, x_min=-4.0, x_max=4.0, y_min=-10.0, y_max=10.0, stats=None, bijector=None) def funnel_forward(x): shift = tf.zeros_like(x) log_scale = tf.concat([ tf.zeros_like(x[Ellipsis, :1]), tf.tile(x[Ellipsis, :1], [1, num_dims - 1]) ], -1) return shift, log_scale mg = tfd.MultivariateNormalDiag(loc=tf.zeros(num_dims), scale_identity_multiplier=1.0) target = tfd.TransformedDistribution( mg, bijector=tfb.MaskedAutoregressiveFlow(funnel_forward)) elif name == "ill_cond_gaussian": # For backwards compatibility with earlier experiments. spec = TargetSpec(name=name, num_dims=num_dims, x_min=-5.0, x_max=5.0, y_min=-5.0, y_max=5.0, stats=None, bijector=None) rng = np.random.RandomState(seed=10) diag_precisions = np.linspace(1., 1000., num_dims)**-1 q, _ = np.linalg.qr(rng.randn(num_dims, num_dims)) scg_prec = (q * diag_precisions).dot(q.T) scg_prec = scg_prec.astype(np.float32) scg_var = np.linalg.inv(scg_prec) / 1000.0 target = tfd.MultivariateNormalFullCovariance( loc=tf.zeros(num_dims), covariance_matrix=scg_var) elif name == "new_ill_cond_gaussian": spec = TargetSpec(name=name, num_dims=num_dims, x_min=-5.0, x_max=5.0, y_min=-5.0, y_max=5.0, stats=None, bijector=None) rng = np.random.RandomState(seed=10) if eig_source == "linear": eigenvalues = np.linspace(1., 1000., num_dims)**-1 elif eig_source == "gamma": eigenvalues = np.sort( rng.gamma(shape=gamma_shape, scale=1., size=num_dims)).astype(np.float32) q, _ = np.linalg.qr(rng.randn(num_dims, num_dims)) covariance = (q * eigenvalues**-1).dot(q.T).astype(np.float32) target = tfd.MultivariateNormalFullCovariance( loc=tf.zeros(num_dims), covariance_matrix=covariance) elif name == "ill_cond_t": # For backwards compatibility with earlier experiments. spec = TargetSpec(name=name, num_dims=num_dims, x_min=-10.0, x_max=10.0, y_min=-10.0, y_max=10.0, stats=None, bijector=None) rng = np.random.RandomState(seed=10) diag_precisions = np.linspace(1., 1000., num_dims)**-1 q, _ = np.linalg.qr(rng.randn(num_dims, num_dims)) scg_prec = (q * diag_precisions).dot(q.T) scg_prec = scg_prec.astype(np.float32) scg_var = np.linalg.inv(scg_prec) / 1000.0 scale = tf.linalg.LinearOperatorFullMatrix(scg_var) target = tfd.MultivariateStudentTLinearOperator(loc=tf.zeros(num_dims), scale=scale, df=t_dof) elif name == "new_ill_cond_t": spec = TargetSpec(name=name, num_dims=num_dims, x_min=-5.0, x_max=5.0, y_min=-5.0, y_max=5.0, stats=None, bijector=None) rng = np.random.RandomState(seed=10) if eig_source == "linear": eigenvalues = np.linspace(1., 1000., num_dims)**-1 elif eig_source == "gamma": eigenvalues = np.sort(rng.gamma(shape=0.5, scale=1., size=num_dims)).astype(np.float32) q, _ = np.linalg.qr(rng.randn(num_dims, num_dims)) covariance = (q * eigenvalues**-1).dot(q.T).astype(np.float32) scale = tf.linalg.LinearOperatorFullMatrix(covariance) target = tfd.MultivariateStudentTLinearOperator(loc=tf.zeros(num_dims), scale=scale, df=t_dof) elif name == "logistic_reg": if regression_hier_type == "none": extra_dims = 0 else: extra_dims = 2 if regression_dataset == "covertype": x, y = utils.LoadCovertype() if regression_num_points > 0: rng = np.random.RandomState(seed=10) chosen_rows = rng.choice(x.shape[0], regression_num_points, replace=False) x = x[chosen_rows] y = y[chosen_rows] num_features = x.shape[-1] + 1 num_classes = 7 num_dims = num_features * num_classes + extra_dims x = tf.to_float(x) y = tf.to_int32(y) elif regression_dataset == "german": x, y = utils.LoadGerman() num_features = int(x.shape[-1]) + 1 num_classes = 2 num_dims = num_features * num_classes + extra_dims x = tf.to_float(x) y = tf.to_int32(y) if regression_num_points > 0: rng = np.random.RandomState(seed=10) chosen_rows = rng.choice(x.shape[0], regression_num_points, replace=False) x = tf.gather(x, chosen_rows) y = tf.gather(y, chosen_rows) if regression_stochastic_points > 0: chosen_rows = tf.random.uniform( [int(regression_stochastic_points)], 0, int(x.shape[0]), dtype=tf.int32) x = tf.gather(x, chosen_rows) y = tf.gather(y, chosen_rows) if regression_normalize: x_min = tf.reduce_min(x, 0, keep_dims=True) x_max = tf.reduce_max(x, 0, keep_dims=True) x /= (x_max - x_min) x = 2.0 * x - 1.0 x = tf.concat([x, tf.ones([int(x.shape[0]), 1])], -1) def regular_log_prob_fn(params): if regression_hier_type == "none": beta = params beta_scaled = beta elif regression_hier_type == "centered": mu_0 = params[Ellipsis, -1] tau_0 = tf.nn.softplus(params[Ellipsis, -2]) beta = params[Ellipsis, :-2] beta_scaled = beta elif regression_hier_type == "non_centered": mu_0 = params[Ellipsis, -1] tau_0 = tf.nn.softplus(params[Ellipsis, -2]) beta = params[Ellipsis, :-2] beta_scaled = beta / tf.expand_dims( tau_0, -1) + tf.expand_dims(mu_0, -1) else: raise ValueError("Unknown regression_hier_type:" + regression_hier_type) if batch_size: def body(_, i): y_dist = tfd.Categorical(logits=tf.einsum( "ij,kjm->kim", x[i:i + batch_size], tf.reshape(beta_scaled, [-1, num_features, num_classes]))) return tf.reduce_sum(y_dist.log_prob(y[i:i + batch_size]), -1) log_prob = tf.reduce_sum( tf.scan(body, tf.range(0, x.shape[0], batch_size), initializer=tf.zeros(tf.shape(params)[:1]), parallel_iterations=1), 0) else: y_dist = tfd.Categorical(logits=tf.einsum( "ij,kjm->kim", x, tf.reshape(beta_scaled, [-1, num_features, num_classes]))) log_prob = tf.reduce_sum(y_dist.log_prob(y), -1) def make_beta_dist(loc, scale): if regression_beta_prior == "normal": return tfd.Normal(loc=loc, scale=scale) else: if tf.convert_to_tensor(loc).shape.ndims == 0: loc = tf.fill( tf.stack([ tf.shape(params)[0], num_features * num_classes ]), loc) if tf.convert_to_tensor(scale).shape.ndims == 0: scale = tf.fill( tf.stack([ tf.shape(params)[0], num_features * num_classes ]), scale) scale = tf.linalg.LinearOperatorDiag(scale) return tfd.MultivariateStudentTLinearOperator(loc=loc, scale=scale, df=t_dof) if regression_hier_type == "none": beta_dist = make_beta_dist(loc=0.0, scale=10.0) else: mu_0_dist = tfd.Normal(loc=0.0, scale=10.0) tau_0_dist = tfd.Gamma(2.0, 1.0) log_prob += mu_0_dist.log_prob(mu_0) + tau_0_dist.log_prob( tau_0) if regression_hier_type == "centered": mu_0 = tf.tile(tf.expand_dims(mu_0, -1), [1, num_features * num_classes]) tau_0 = tf.tile(tf.expand_dims(tau_0, -1), [1, num_features * num_classes]) beta_dist = make_beta_dist(loc=mu_0, scale=1.0 / tau_0) elif regression_hier_type == "non_centered": beta_dist = make_beta_dist(loc=0.0, scale=1.0) log_prob += tf.reduce_sum(beta_dist.log_prob(beta), -1) return log_prob def gamma_scales_log_prob_fn(params): assert num_classes == 2 def unmarshal(params): results = [] n_dimensions_used = 0 if regression_use_beta_scales: dim_list = [num_features, num_features, 1] else: dim_list = [num_features, 1] for n_to_add in dim_list: results.append( params[Ellipsis, n_dimensions_used:n_dimensions_used + n_to_add]) n_dimensions_used += n_to_add return tuple(results) log_prob = 0. if regression_use_beta_scales: beta, beta_log_scales, overall_log_scale = unmarshal(params) # p(per-variable scales) log_prob += tf.reduce_sum( tfd.TransformedDistribution( tfd.Gamma(0.5, 0.5), tfb.Invert(tfb.Exp())).log_prob(beta_log_scales), -1) else: beta, overall_log_scale = unmarshal(params) beta_log_scales = 0.0 # p(overall scale) log_prob += tf.reduce_sum( tfd.Normal(0., 10.).log_prob(overall_log_scale), -1) # p(beta) log_prob += tf.reduce_sum(tfd.Normal(0., 1.).log_prob(beta), -1) # p(y | x, beta) scaled_beta = beta * tf.exp(overall_log_scale) * tf.exp( beta_log_scales) if batch_size: def body(_, i): logits = tf.einsum("nd,md->mn", x[i:i + batch_size], scaled_beta) return tf.reduce_sum( tfd.Bernoulli(logits=logits).log_prob( y[i:i + batch_size]), -1) log_prob += tf.reduce_sum( tf.scan(body, tf.range(0, x.shape[0], batch_size), initializer=tf.zeros(tf.shape(params)[:1]), parallel_iterations=1), 0) else: logits = tf.einsum("nd,md->mn", x, scaled_beta) log_prob += tf.reduce_sum( tfd.Bernoulli(logits=logits).log_prob(y), -1) return log_prob def horseshoe_log_prob_fn(params): assert num_classes == 2 (z, r1_local, r2_local, r1_global, r2_global) = tf.split( params, [num_features, num_features, num_features, 1, 1], axis=-1) def indep(d): return tfd.Independent(d, 1) zero = tf.zeros(num_features) one = tf.ones(num_features) half = 0.5 * one p_z = indep(tfd.Normal(zero, one)) p_r1_local = indep(tfd.HalfNormal(one)) p_r2_local = indep(tfd.InverseGamma(half, half)) p_r1_global = indep(tfd.HalfNormal([1.])) p_r2_global = indep(tfd.InverseGamma([0.5], [0.5])) log_prob = (p_z.log_prob(z) + p_r1_local.log_prob(r1_local) + p_r2_local.log_prob(r2_local) + p_r1_global.log_prob(r1_global) + p_r2_global.log_prob(r2_global)) lambda_ = r1_local * tf.sqrt(r2_local) tau = r1_global * tf.sqrt(r2_global) beta = z * lambda_ * tau if batch_size: def body(_, i): logits = tf.einsum("nd,md->mn", x[i:i + batch_size], beta) return tfd.Independen(tfd.Bernoulli(logits=logits), 1).log_prob(y[i:i + batch_size]) log_prob += tf.reduce_sum( tf.scan(body, tf.range(0, x.shape[0], batch_size), initializer=tf.zeros(tf.shape(params)[:1]), parallel_iterations=1), 0) else: logits = tf.einsum("nd,md->mn", x, beta) log_prob += tfd.Independent(tfd.Bernoulli(logits=logits), 1).log_prob(y) return log_prob def gamma_scales2_log_prob_fn(params): assert num_classes == 2 (z, local_scale, global_scale) = tf.split(params, [num_features, num_features, 1], axis=-1) def indep(d): return tfd.Independent(d, 1) zero = tf.zeros(num_features) one = tf.ones(num_features) half = 0.5 * one p_z = indep(tfd.Normal(zero, one)) p_local_scale = indep(tfd.Gamma(half, half)) p_global_scale = indep(tfd.Gamma([0.5], [0.5])) log_prob = (p_z.log_prob(z) + p_local_scale.log_prob(local_scale) + p_global_scale.log_prob(global_scale)) beta = z * local_scale * global_scale if batch_size: def body(_, i): logits = tf.einsum("nd,md->mn", x[i:i + batch_size], beta) return tfd.Independen(tfd.Bernoulli(logits=logits), 1).log_prob(y[i:i + batch_size]) log_prob += tf.reduce_sum( tf.scan(body, tf.range(0, x.shape[0], batch_size), initializer=tf.zeros(tf.shape(params)[:1]), parallel_iterations=1), 0) else: logits = tf.einsum("nd,md->mn", x, beta) log_prob += tfd.Independent(tfd.Bernoulli(logits=logits), 1).log_prob(y) return log_prob bijector = None if regression_type == "regular": log_prob_fn = regular_log_prob_fn elif regression_type == "gamma_scales": log_prob_fn = gamma_scales_log_prob_fn num_dims = num_features + 1 if regression_use_beta_scales: num_dims += num_features elif regression_type == "horseshoe": log_prob_fn = horseshoe_log_prob_fn num_dims = num_features * 3 + 2 bijector = tfb.Blockwise( [tfb.Identity(), tfb.Exp()], [num_features, num_features * 2 + 2]) elif regression_type == "gamma_scales2": log_prob_fn = gamma_scales2_log_prob_fn num_dims = num_features * 2 + 1 bijector = tfb.Blockwise( [tfb.Identity(), tfb.Exp()], [num_features, num_features + 1]) target = utils.LogProbDist(num_dims=num_dims, log_prob_fn=log_prob_fn) spec = TargetSpec(name=name, num_dims=num_dims, x_min=0.10, x_max=0.15, y_min=0.10, y_max=0.15, stats=None, bijector=bijector) elif name == "mog": comp_1 = tfd.MultivariateNormalDiag(loc=[-1., 1.] + [0.] * (num_dims - 2), scale_identity_multiplier=2.) comp_2 = tfd.MultivariateNormalDiag(loc=[1., 1.] + [0.] * (num_dims - 2), scale_identity_multiplier=4.) comp_3 = tfd.MultivariateNormalDiag(loc=[0., 0.] + [0.] * (num_dims - 2), scale_identity_multiplier=2.) cat = tfd.Categorical(logits=[0] * 3) target = tfd.Mixture(cat=cat, components=[comp_1, comp_2, comp_3]) spec = TargetSpec(name=name, num_dims=num_dims, x_min=-2., x_max=2., y_min=-2., y_max=2., stats=None, bijector=None) elif name == "easy_gaussian": spec = TargetSpec(name=name, num_dims=num_dims, x_min=-5.0, x_max=5.0, y_min=-5.0, y_max=5.0, stats=None, bijector=None) rng = np.random.RandomState(seed=10) eigenvalues = np.linspace(0.5, 2., num_dims)**-1 q, _ = np.linalg.qr(rng.randn(num_dims, num_dims)) covariance = (q * eigenvalues**-1).dot(q.T).astype(np.float32) target = tfd.MultivariateNormalFullCovariance( loc=tf.zeros(num_dims), covariance_matrix=covariance) elif name == "gp_reg": x, y = utils.LoadCloud() if regression_num_points > 0: rng = np.random.RandomState(seed=10) chosen_rows = rng.choice(x.shape[0], regression_num_points, replace=False) x = x[chosen_rows] y = y[chosen_rows] x = tf.convert_to_tensor(x, dtype=tf.float32) y = tf.convert_to_tensor(y, dtype=tf.float32) num_features = int(x.shape[-1]) num_dims = num_features + 2 def log_prob_fn(params): rho, alpha, sigma = tf.split(params, [num_features, 1, 1], -1) one = tf.ones(num_features) def indep(d): return tfd.Independent(d, 1) p_rho = indep(tfd.InverseGamma(5. * one, 5. * one)) p_alpha = indep(tfd.HalfNormal([1.])) p_sigma = indep(tfd.HalfNormal([1.])) rho_shape = tf.shape(rho) alpha_shape = tf.shape(alpha) x1 = tf.expand_dims(x, -2) x2 = tf.expand_dims(x, -3) exp = -0.5 * tf.squared_difference(x1, x2) exp /= tf.reshape( tf.square(rho), tf.concat([rho_shape[:1], [1, 1], rho_shape[1:]], 0)) exp = tf.reduce_sum(exp, -1, keep_dims=True) exp += 2. * tf.reshape( tf.log(alpha), tf.concat([alpha_shape[:1], [1, 1], alpha_shape[1:]], 0)) exp = tf.exp(exp[Ellipsis, 0]) exp += tf.matrix_diag( tf.tile(tf.square(sigma), [1, int(x.shape[0])]) + 1e-6) exp = tf.check_numerics(exp, "exp 2 has NaNs") with tf.control_dependencies([tf.print(exp[0], summarize=99999)]): exp = tf.identity(exp) p_y = tfd.MultivariateNormalFullCovariance(covariance_matrix=exp) log_prob = (p_rho.log_prob(rho) + p_alpha.log_prob(alpha) + p_sigma.log_prob(sigma) + p_y.log_prob(y)) return log_prob bijector = tfb.Softplus() #tfb.Exp() target = utils.LogProbDist(num_dims=num_dims, log_prob_fn=log_prob_fn) spec = TargetSpec(name=name, num_dims=num_dims, x_min=0.10, x_max=0.15, y_min=0.10, y_max=0.15, stats=None, bijector=bijector) if precomputed_stats_path is not None: with tf.gfile.Open(precomputed_stats_path) as f: stats = simplejson.load(f) stats = {k: np.array(v) for k, v in stats.items()} spec = spec._replace(stats=stats) return target, spec._replace(**kwargs)
def resampler_with_unstacked_warp(data, warp_x, warp_y, safe=True, name='resampler'): """Resamples input data at user defined coordinates. Args: data: Tensor of shape `[batch_size, data_height, data_width, data_num_channels]` containing 2D data that will be resampled. warp_x: Tensor of shape `[batch_size, dim_0, ... , dim_n]` containing the x coordinates at which resampling will be performed. warp_y: Tensor of the same shape as warp_x containing the y coordinates at which resampling will be performed. safe: A boolean, if True, warp_x and warp_y will be clamped to their bounds. Disable only if you know they are within bounds, otherwise a runtime exception will be thrown. name: Optional name of the op. Returns: Tensor of resampled values from `data`. The output tensor shape is `[batch_size, dim_0, ... , dim_n, data_num_channels]`. Raises: ValueError: If warp_x, warp_y and data have incompatible shapes. """ with tf.name_scope(name): warp_x = tf.convert_to_tensor(warp_x) warp_y = tf.convert_to_tensor(warp_y) data = tf.convert_to_tensor(data) if not warp_x.shape.is_compatible_with(warp_y.shape): raise ValueError( 'warp_x and warp_y are of incompatible shapes: %s vs %s ' % (str(warp_x.shape), str(warp_y.shape))) warp_shape = tf.shape(warp_x) if warp_x.shape[0] != data.shape[0]: raise ValueError( '\'warp_x\' and \'data\' must have compatible first ' 'dimension (batch size), but their shapes are %s and %s ' % (str(warp_x.shape[0]), str(data.shape[0]))) # Compute the four points closest to warp with integer value. warp_floor_x = tf.floor(warp_x) warp_floor_y = tf.floor(warp_y) # Compute the weight for each point. right_warp_weight = warp_x - warp_floor_x down_warp_weight = warp_y - warp_floor_y warp_floor_x = tf.to_int32(warp_floor_x) warp_floor_y = tf.to_int32(warp_floor_y) warp_ceil_x = tf.to_int32(tf.ceil(warp_x)) warp_ceil_y = tf.to_int32(tf.ceil(warp_y)) left_warp_weight = tf.subtract( tf.convert_to_tensor(1.0, right_warp_weight.dtype), right_warp_weight) up_warp_weight = tf.subtract( tf.convert_to_tensor(1.0, down_warp_weight.dtype), down_warp_weight) # Extend warps from [batch_size, dim_0, ... , dim_n, 2] to # [batch_size, dim_0, ... , dim_n, 3] with the first element in last # dimension being the batch index. # A shape like warp_shape but with all sizes except the first set to 1: warp_batch_shape = tf.concat( [warp_shape[0:1], tf.ones_like(warp_shape[1:])], 0) warp_batch = tf.reshape(tf.range(warp_shape[0], dtype=tf.int32), warp_batch_shape) # Broadcast to match shape: warp_batch += tf.zeros_like(warp_y, dtype=tf.int32) left_warp_weight = tf.expand_dims(left_warp_weight, axis=-1) down_warp_weight = tf.expand_dims(down_warp_weight, axis=-1) up_warp_weight = tf.expand_dims(up_warp_weight, axis=-1) right_warp_weight = tf.expand_dims(right_warp_weight, axis=-1) up_left_warp = tf.stack([warp_batch, warp_floor_y, warp_floor_x], axis=-1) up_right_warp = tf.stack([warp_batch, warp_floor_y, warp_ceil_x], axis=-1) down_left_warp = tf.stack([warp_batch, warp_ceil_y, warp_floor_x], axis=-1) down_right_warp = tf.stack([warp_batch, warp_ceil_y, warp_ceil_x], axis=-1) def gather_nd(params, indices): return (safe_gather_nd if safe else tf.gather_nd)(params, indices) # gather data then take weighted average to get resample result. result = ((gather_nd(data, up_left_warp) * left_warp_weight + gather_nd(data, up_right_warp) * right_warp_weight) * up_warp_weight + (gather_nd(data, down_left_warp) * left_warp_weight + gather_nd(data, down_right_warp) * right_warp_weight) * down_warp_weight) result_shape = (warp_x.get_shape().as_list() + data.get_shape().as_list()[-1:]) result.set_shape(result_shape) return result
def unit(w, sparsity): """Unit-level magnitude pruning.""" w_shape = common_layers.shape_list(w) count = tf.to_int32(w_shape[-1] * sparsity) mask = common_layers.unit_targeting(w, count) return (1 - mask) * w
def get_scheduled_sample_func(self, batch_size): """Creates a function for scheduled sampling based on given hparams.""" with tf.variable_scope("scheduled_sampling_func", reuse=tf.AUTO_REUSE): iter_num = self.get_iteration_num() # Simple function to bypass scheduled sampling in gt or pred only modes. def scheduled_sampling_simple(ground_truth_x, generated_x, batch_size, scheduled_sample_var): del batch_size if scheduled_sample_var: return ground_truth_x return generated_x mode = self.hparams.scheduled_sampling_mode if mode == "ground_truth_only": scheduled_sampling_func = scheduled_sampling_simple scheduled_sampling_func_var = True elif mode == "prediction_only": scheduled_sampling_func = scheduled_sampling_simple scheduled_sampling_func_var = False elif mode == "prob": decay_steps = self.hparams.scheduled_sampling_decay_steps probability = tf.train.polynomial_decay( 1.0, iter_num, decay_steps, 0.0) scheduled_sampling_func = common_video.scheduled_sample_prob scheduled_sampling_func_var = probability elif mode == "prob_inverse_exp": decay_steps = self.hparams.scheduled_sampling_decay_steps probability = common_layers.inverse_exp_decay(decay_steps, step=iter_num) probability *= self.hparams.scheduled_sampling_max_prob probability = 1.0 - probability scheduled_sampling_func = common_video.scheduled_sample_prob scheduled_sampling_func_var = probability elif mode == "prob_inverse_lin": decay_steps = self.hparams.scheduled_sampling_decay_steps probability = common_layers.inverse_exp_decay( decay_steps // 4, step=iter_num) # Very low at start. probability *= common_layers.inverse_lin_decay(decay_steps, step=iter_num) probability *= self.hparams.scheduled_sampling_max_prob probability = 1.0 - probability scheduled_sampling_func = common_video.scheduled_sample_prob scheduled_sampling_func_var = probability elif mode == "count": # Calculate number of ground-truth frames to pass in. k = self.hparams.scheduled_sampling_k num_ground_truth = tf.to_int32( tf.round( tf.to_float(batch_size) * (k / (k + tf.exp(tf.to_float(iter_num) / tf.to_float(k))))) ) scheduled_sampling_func = common_video.scheduled_sample_count scheduled_sampling_func_var = num_ground_truth else: raise ValueError("unknown scheduled sampling method: %s" % mode) if isinstance(scheduled_sampling_func_var, tf.Tensor): tf.summary.scalar("scheduled_sampling_var", scheduled_sampling_func_var) partial_func = functools.partial( scheduled_sampling_func, batch_size=batch_size, scheduled_sample_var=scheduled_sampling_func_var) return partial_func
def hier_homography_fmask_estimator(color_inputs, num_param=8, num_layer=7, num_level=3, dropout_keep_prob=0.8, reuse=None, is_training=True, trainable=True, scope='hier_hmg'): """A hierarchical neural network with mask for homograhy estimation. Args: color_inputs: batch of input image pairs of data type float32 and of shape [batch_size, height, width, 6] num_param: the number of parameters for homography (default 8) num_layer: the number of convolutional layers in the motion feature network num_level: the number of hierarchical levels dropout_keep_prob: the percentage of activation values that are kept reuse: whether to reuse this network weights is_training: whether used for training or testing trainable: whether this network is to be trained or not scope: the scope of variables in this function Returns: a list of homographies at each level and motion feature maps if final_endpoint='mfeature'; otherwise a list of images warped by the list of corresponding homographies """ _, h_input, w_input = color_inputs.get_shape().as_list()[0 : 3] vgg_inputs = (color_inputs[Ellipsis, 3 : 6] * 256 + 128)- VGG_MEANS with slim.arg_scope([slim.conv2d, slim.max_pool2d], padding='SAME'): with slim.arg_scope([slim.conv2d, slim.fully_connected], trainable=False): with slim.arg_scope([slim.conv2d], normalizer_fn=None): with slim.arg_scope(contrib_slim_nets_vgg.vgg_arg_scope()): sfeature, _ = contrib_slim_nets_vgg.vgg_16( vgg_inputs, 1000, predictions_fn=slim.softmax, global_pool=False, is_training=False, reuse=reuse, spatial_squeeze=True, final_endpoint='pool5', scope='vgg_16') gray_image1 = tf.image.rgb_to_grayscale(color_inputs[Ellipsis, 0 : 3]) gray_image2 = tf.image.rgb_to_grayscale(color_inputs[Ellipsis, 3 : 6]) inputs = tf.concat([gray_image1, gray_image2], 3) hmgs_list = [] warped_list = [] with tf.variable_scope(scope, [inputs], reuse=reuse): for level_index in range(num_level): scale = 2 ** (num_level - 1 - level_index) h = tf.to_float(tf.floordiv(h_input, scale)) w = tf.to_float(tf.floordiv(w_input, scale)) inputs_il = tf.image.resize_images(inputs, tf.to_int32([h, w])) if level_index == 0: mfeature = hier_base_layers(inputs_il, num_layer + 1 - num_level + level_index, level_index, is_training=is_training, trainable=trainable) hmgs_il = homography_regression(mfeature, num_param, level_index, dropout_keep_prob=dropout_keep_prob, is_training=is_training, trainable=trainable) hmgs_list.append(hmgs_il) else: warped, _ = hmg_util.homography_scale_warp_per_batch( inputs_il[:, :, :, 0], w / 2, h / 2, hmgs_list[level_index - 1]) pre_warped_inputs_il = tf.stack([warped, inputs_il[:, :, :, 1]], -1) warped_list.append(pre_warped_inputs_il) mfeature = hier_base_layers(pre_warped_inputs_il, num_layer + 1 - num_level + level_index, level_index, is_training=is_training, trainable=trainable) if level_index == num_level - 1: mfeature = fmask_layers_semantic(mfeature, sfeature, level_index, is_training=is_training, trainable=trainable) hmgs_il = homography_regression(mfeature, num_param, level_index, dropout_keep_prob=dropout_keep_prob, is_training=is_training, trainable=trainable) new_hmgs_il = hmg_util.homography_shift_mult_batch( hmgs_list[level_index - 1], w / 2, h / 2, hmgs_il, w, h, w, h) hmgs_list.append(new_hmgs_il) return hmgs_list, warped_list
def DecodeLabelAndImage(r): r = tf.decode_raw(r, tf.uint8) return tf.to_float( tf.transpose(tf.reshape(r[1:], [3, 32, 32]), [1, 2, 0])) / 255.0, tf.to_int32(r[0])
def compute_loss(self, y_true, y_pred): """Compute mutlibox loss. # Arguments y_true: Ground truth targets, tensor of shape (?, num_boxes, 4 + num_classes + 8), priors in ground truth are fictitious, y_true[:, :, -8] has 1 if prior should be penalized or in other words is assigned to some ground truth box, y_true[:, :, -7:] are all 0. y_pred: Predicted logits, tensor of shape (?, num_boxes, 4 + num_classes + 8). # Returns loss: Loss for prediction, tensor of shape (?,). """ batch_size = tf.shape(y_true)[0] num_boxes = tf.to_float(tf.shape(y_true)[1]) # loss for all priors conf_loss = self._softmax_loss(y_true[:, :, 4:-8], y_pred[:, :, 4:-8]) loc_loss = self._l1_smooth_loss(y_true[:, :, :4], y_pred[:, :, :4]) # get positives loss num_pos = tf.reduce_sum(y_true[:, :, -8], axis=-1) pos_loc_loss = tf.reduce_sum(loc_loss * y_true[:, :, -8], axis=1) pos_conf_loss = tf.reduce_sum(conf_loss * y_true[:, :, -8], axis=1) # get negatives loss, we penalize only confidence here num_neg = tf.minimum(self.neg_pos_ratio * num_pos, num_boxes - num_pos) pos_num_neg_mask = tf.greater(num_neg, 0) has_min = tf.to_float(tf.reduce_any(pos_num_neg_mask)) num_neg = tf.concat( axis=0, values=[num_neg, [(1 - has_min) * self.negatives_for_hard]]) num_neg_batch = tf.reduce_min( tf.boolean_mask(num_neg, tf.greater(num_neg, 0))) num_neg_batch = tf.to_int32(num_neg_batch) confs_start = 4 + self.background_label_id + 1 confs_end = confs_start + self.num_classes - 1 max_confs = tf.reduce_max(y_pred[:, :, confs_start:confs_end], axis=2) _, indices = tf.nn.top_k(max_confs * (1 - y_true[:, :, -8]), k=num_neg_batch) batch_idx = tf.expand_dims(tf.range(0, batch_size), 1) batch_idx = tf.tile(batch_idx, (1, num_neg_batch)) full_indices = (tf.reshape(batch_idx, [-1]) * tf.to_int32(num_boxes) + tf.reshape(indices, [-1])) # full_indices = tf.concat(2, [tf.expand_dims(batch_idx, 2), # tf.expand_dims(indices, 2)]) # neg_conf_loss = tf.gather_nd(conf_loss, full_indices) neg_conf_loss = tf.gather(tf.reshape(conf_loss, [-1]), full_indices) neg_conf_loss = tf.reshape(neg_conf_loss, [batch_size, num_neg_batch]) neg_conf_loss = tf.reduce_sum(neg_conf_loss, axis=1) # loss is sum of positives and negatives total_loss = pos_conf_loss + neg_conf_loss total_loss /= (num_pos + tf.to_float(num_neg_batch)) num_pos = tf.where(tf.not_equal(num_pos, 0), num_pos, tf.ones_like(num_pos)) total_loss += (self.alpha * pos_loc_loss) / num_pos return total_loss
def body(self, features, decode_step=None, cache=None, decoding_stats=None, add_summary=True): encoder_output = None extra_losses = [] padding_bias = None if not self.hparams.fast_decode: decode_step = None if "inputs" in features: inputs = features["inputs"] # remove the last two dimensions that are always 1. inputs = tf.reshape( inputs, utils.shape_list(inputs)[:2] + [self.hidden_size]) # Padding bias only used for seq2seq models. padding_bias = utils.embedding_to_padding(inputs) # Mask random positions shape = utils.shape_list(inputs) if self.hparams.input_dropout: inputs = tf.where( tf.random.uniform(shape) < self.hparams.input_dropout, tf.zeros_like(inputs), inputs) if self.hparams.add_timing_signal: inputs += utils.get_timing_signal_1d(self.hparams.max_length, self.hidden_size) if cache is not None and -1 in cache: encoder_output = cache[-1] else: encoder_output = utils.transformer_encoder_layers( inputs=inputs, num_layers=self.num_encoder_layers, hparams=self.hparams, losses=extra_losses, name="encoder", token_bias=features.get("token_bias_inputs"), padding_bias=padding_bias) if cache is not None and -1 not in cache: cache[-1] = encoder_output targets = tf.to_int32(features["targets"]) # remove the last two dimensions that are always 1. targets = tf.reshape(targets, utils.shape_list(targets)[:2]) # Clamp targets to max_target_length targets = targets[:, :self.hparams.max_target_length] if self.is_decode: targets = self.process_partial_targets_decoding(targets) decoder_input = self.prepare_decoder(targets) decoder_output = utils.transformer_decoder_layers( inputs=decoder_input, num_layers=self.num_decoder_layers, hparams=self.hparams, encoder_output=encoder_output, decode_step=decode_step, losses=extra_losses, cache=cache, name="decoder", decoding_stats=decoding_stats, token_bias_inputs=features.get("token_bias_inputs"), token_bias_targets=features.get("token_bias_targets"), padding_bias=padding_bias) logits = self.produce_output(decoder_output) # Return logits as-is in decoding mode if self.is_decode: return logits # Add cross entropy loss one_hot_targets = tf.one_hot(tf.cast(targets, dtype=tf.int32), self.vocab_size) x_entropy = tf.nn.softmax_cross_entropy_with_logits_v2( labels=one_hot_targets, logits=logits) weights = tf.to_float(tf.not_equal(targets, 0)) loss = tf.reduce_sum(x_entropy * weights) / tf.reduce_sum(weights) if add_summary: tf.summary.scalar("losses/weight", tf.reduce_sum(weights)) tf.summary.scalar("losses/x_entropy", tf.reduce_sum(x_entropy * weights)) loss_dict = {"training": loss} if extra_losses: loss_dict["extra_loss"] = tf.add_n(extra_losses) # hack for T2T metrics logits = tf.reshape( logits, utils.shape_list(logits)[:2] + [1, 1] + utils.shape_list(logits)[-1:]) return logits, loss_dict
def randomly_crop_and_resize(image, masks, boxes, keypoints, image_size, probability=0.5): """ Arguments: image: a float tensor with shape [height, width, 3]. masks: a float tensor with shape [height / DOWNSAMPLE, width / DOWNSAMPLE, 2]. boxes: a float tensor with shape [num_persons, 4]. keypoints: an int tensor with shape [num_persons, 17, 3]. image_size: a tuple of integers (h, w). probability: a float number. Returns: image: a float tensor with shape [h, w, 3]. masks: a float tensor with shape [h / DOWNSAMPLE, w / DOWNSAMPLE, 2]. boxes: a float tensor with shape [num_remaining, 4]. keypoints: an int tensor with shape [num_remaining, 17, 3]. """ shape = tf.to_float(tf.shape(image)) height, width = shape[0], shape[1] scaler = tf.stack([height, width, height, width]) boxes /= scaler # to the [0, 1] range def crop(image, boxes, keypoints): """ Arguments: image: a float tensor with shape [height, width, 3]. boxes: a float tensor with shape [num_persons, 4]. keypoints: an int tensor with shape [num_persons, 17, 3]. Returns: image: a float tensor with shape [None, None, 3]. boxes: a float tensor with shape [num_remaining, 4]. keypoints: an int tensor with shape [num_remaining, 17, 3]. window: a float tensor with shape [4]. """ image, boxes, window, keep_indices = random_image_crop( image, boxes, min_object_covered=0.9, aspect_ratio_range=(0.95, 1.05), area_range=(0.5, 1.0), overlap_threshold=OVERLAP_THRESHOLD) keypoints = tf.gather(keypoints, keep_indices) # it has shape [num_remaining, 17, 3] ymin, xmin, ymax, xmax = tf.unstack(window * scaler) points, v = tf.split(keypoints, [2, 1], axis=2) points = tf.to_float(points) # shape [num_remaining, 17, 2] translation = tf.stack([ymin, xmin]) points = tf.to_int32(tf.round(points - translation)) keypoints = tf.concat([points, v], axis=2) # note that after this some keypoints will be invisible, # so we need to modify the `v` vector later return image, boxes, keypoints, window whole_image_window = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32) do_it = tf.less(tf.random_uniform([]), probability) image, boxes, keypoints, window = tf.cond( do_it, lambda: crop(image, boxes, keypoints), lambda: (image, boxes, keypoints, whole_image_window)) def correct_keypoints(image_shape, keypoints): """ Arguments: image_shape: an int tensor with shape [3]. keypoints: an int tensor with shape [num_persons, 17, 3]. Returns: an int tensor with shape [num_persons, 17, 3]. """ y, x, v = tf.split(keypoints, 3, axis=2) height = image_shape[0] width = image_shape[1] coordinate_violations = tf.concat([ tf.less(y, 0), tf.less(x, 0), tf.greater_equal(y, height), tf.greater_equal(x, width) ], axis=2) # shape [num_persons, 17, 4] valid_indicator = tf.logical_not( tf.reduce_any(coordinate_violations, axis=2)) valid_indicator = tf.expand_dims(valid_indicator, 2) # it has shape [num_persons, 17, 1] v *= tf.to_int32(valid_indicator) keypoints = tf.concat([y, x, v], axis=2) return keypoints def rescale(boxes, keypoints, old_shape, new_shape): """ Arguments: boxes: a float tensor with shape [num_persons, 4]. keypoints: an int tensor with shape [num_persons, 17, 3]. old_shape, new_shape: int tensors with shape [3]. Returns: a float tensor with shape [num_persons, 4]. an int tensor with shape [num_persons, 17, 3]. """ points, v = tf.split(keypoints, [2, 1], axis=2) points = tf.to_float(points) old_shape = tf.to_float(old_shape) new_shape = tf.to_float(new_shape) old_height, old_width = old_shape[0], old_shape[1] new_height, new_width = new_shape[0], new_shape[1] scaler = tf.stack([new_height / old_height, new_width / old_width]) points *= scaler scaler = tf.stack([new_height, new_width]) scaler = tf.concat(2 * [scaler], axis=0) boxes *= scaler new_height = tf.to_int32(new_height) new_width = tf.to_int32(new_width) points = tf.to_int32(tf.round(points)) y, x = tf.split(points, 2, axis=2) y = tf.clip_by_value(y, 0, new_height - 1) x = tf.clip_by_value(x, 0, new_width - 1) keypoints = tf.concat([y, x, v], axis=2) return boxes, keypoints old_shape = tf.shape(image) keypoints = correct_keypoints(old_shape, keypoints) h, w = image_size # image size that will be used for training image = tf.image.resize_images(image, [h, w], method=RESIZE_METHOD) masks_height = tf.to_int32(tf.ceil(h / DOWNSAMPLE)) masks_width = tf.to_int32(tf.ceil(w / DOWNSAMPLE)) masks = tf.image.crop_and_resize(image=tf.expand_dims(masks, 0), boxes=tf.expand_dims(window, 0), box_indices=tf.constant([0], dtype=tf.int32), crop_size=[masks_height, masks_width], method='nearest') masks = masks[0] boxes, keypoints = rescale(boxes, keypoints, old_shape, tf.shape(image)) return image, masks, boxes, keypoints
def ae_transformer_internal(inputs, targets, target_space, hparams, cache=None): """Main step used for training.""" # Encoder. inputs = common_layers.flatten4d3d(inputs) inputs, ed = encode(inputs, target_space, hparams, "input_enc") # Autoencoding. losses = {"extra": tf.constant(0.0), "latent_pred": tf.constant(0.0)} max_targets_len_from_inputs = tf.concat([inputs, inputs], axis=1) targets, _ = common_layers.pad_to_same_length( targets, max_targets_len_from_inputs, final_length_divisible_by=2**hparams.num_compress_steps) targets_c = compress(targets, hparams, "compress") if hparams.mode != tf.estimator.ModeKeys.PREDICT: # Compress and bottleneck. latents_discrete_hot, extra_loss = vq_discrete_bottleneck( x=targets_c, hparams=hparams) latents_dense = vq_discrete_unbottleneck(latents_discrete_hot, hparams=hparams) latents_dense = targets_c + tf.stop_gradient(latents_dense - targets_c) latents_discrete = tf.argmax(latents_discrete_hot, axis=-1) tf.summary.histogram("codes", tf.reshape(latents_discrete[:, 0, :], [-1])) losses["extra"] = extra_loss # Extra loss predicting latent code from input. latents_pred = decode_transformer(inputs, ed, latents_dense, hparams, "extra") latent_pred_loss = get_latent_pred_loss(latents_pred, latents_discrete_hot, hparams) losses["latent_pred"] = tf.reduce_mean(latent_pred_loss) else: latent_len = common_layers.shape_list(targets_c)[1] embed = functools.partial(vq_discrete_unbottleneck, hparams=hparams) latents_dense = tf.zeros_like(targets_c[:, :latent_len, :, :]) if cache is None: cache = ae_latent_sample_beam(latents_dense, inputs, ed, embed, hparams) cache_hot = tf.one_hot(cache, depth=2**hparams.bottleneck_bits) latents_dense = embed(cache_hot) # Postprocess. d = latents_dense pos = tf.get_variable("pos", [1, 1000, 1, hparams.hidden_size]) pos = pos[:, :common_layers.shape_list(latents_dense)[1] + 1, :, :] latents_dense = tf.pad(latents_dense, [[0, 0], [1, 0], [0, 0], [0, 0]]) + pos # Decompressing the dense latents for i in range(hparams.num_compress_steps): j = hparams.num_compress_steps - i - 1 d = residual_conv(d, 1, (3, 1), hparams, "decompress_rc_%d" % j) d = decompress_step(d, hparams, i > 0, "decompress_%d" % j) masking = common_layers.inverse_lin_decay(hparams.mask_startup_steps) masking *= common_layers.inverse_exp_decay(hparams.mask_startup_steps // 4) # Not much at start. masking = tf.minimum(tf.maximum(masking, 0.0), 1.0) if hparams.mode == tf.estimator.ModeKeys.PREDICT: masking = 1.0 mask = tf.less(masking, tf.random_uniform(common_layers.shape_list(targets)[:-1])) mask = tf.expand_dims(tf.to_float(mask), 3) # targets is always [batch, length, 1, depth] targets = mask * targets + (1.0 - mask) * d res = decode_transformer(inputs, ed, targets, hparams, "decoder") latent_time = tf.less(hparams.mask_startup_steps, tf.to_int32(tf.train.get_global_step())) losses["latent_pred"] *= tf.to_float(latent_time) return res, losses, cache
def scale(x): unpadded_x = tf.to_int32(tf.round(tf.to_float(x) * scale_factor)) x = tf.to_int32(tf.ceil(unpadded_x / divisor)) pad = divisor * x - unpadded_x return (unpadded_x, pad)
def resize_keeping_aspect_ratio(image, masks, boxes, keypoints, min_dimension, divisor): """ This function resizes and possibly pads with zeros. When using a usual FPN, divisor must be equal to 128. Arguments: image: a float tensor with shape [height, width, 3]. masks: a float tensor with shape [height / DOWNSAMPLE, width / DOWNSAMPLE, 2]. boxes: a float tensor with shape [num_persons, 4]. keypoints: an int tensor with shape [num_persons, 17, 3]. min_dimension, divisor: integers. Returns: image: a float tensor with shape [h, w, 3], where `min_dimension = min(h, w)`, `h` and `w` are divisible by `DIVISOR`. masks: a float tensor with shape [h / DOWNSAMPLE, w / DOWNSAMPLE, 2]. boxes: a float tensor with shape [num_persons, 4]. keypoints: an int tensor with shape [num_persons, 17, 3]. """ assert min_dimension % divisor == 0 min_dimension = tf.constant(min_dimension, dtype=tf.int32) divisor = tf.constant(divisor, dtype=tf.int32) shape = tf.shape(image) height, width = shape[0], shape[1] original_min_dim = tf.minimum(height, width) scale_factor = tf.to_float(min_dimension / original_min_dim) # RESIZE AND PAD IMAGE def scale(x): unpadded_x = tf.to_int32(tf.round(tf.to_float(x) * scale_factor)) x = tf.to_int32(tf.ceil(unpadded_x / divisor)) pad = divisor * x - unpadded_x return (unpadded_x, pad) zero = tf.constant(0, dtype=tf.int32) new_height, pad_height, new_width, pad_width = tf.cond( tf.greater_equal(height, width), lambda: scale(height) + (min_dimension, zero), lambda: (min_dimension, zero) + scale(width)) # final image size h = new_height + pad_height w = new_width + pad_width # resize keeping aspect ratio image = tf.image.resize_images(image, [new_height, new_width], method=RESIZE_METHOD) # pad image at the bottom or at the right image = tf.image.pad_to_bounding_box(image, offset_height=0, offset_width=0, target_height=h, target_width=w) # RESIZE AND PAD MASKS # new size of masks with padding map_height = tf.to_int32(tf.ceil(h / DOWNSAMPLE)) map_width = tf.to_int32(tf.ceil(w / DOWNSAMPLE)) # new size of only masks without padding map_only_height = tf.to_int32(tf.ceil(new_height / DOWNSAMPLE)) map_only_width = tf.to_int32(tf.ceil(new_width / DOWNSAMPLE)) masks = tf.image.resize_images( masks, [map_only_height, map_only_width], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) masks = tf.image.pad_to_bounding_box(masks, offset_height=0, offset_width=0, target_height=map_height, target_width=map_width) # TRANSFORM KEYPOINTS keypoint_scaler = tf.stack([new_height / height, new_width / width]) keypoint_scaler = tf.to_float(keypoint_scaler) points, v = tf.split(keypoints, [2, 1], axis=2) points = tf.to_int32(tf.round(tf.to_float(points) * keypoint_scaler)) y, x = tf.split(points, 2, axis=2) y = tf.clip_by_value(y, 0, h - 1) x = tf.clip_by_value(x, 0, w - 1) keypoints = tf.concat([y, x, v], axis=2) # TRANSFORM BOXES box_scaler = tf.concat(2 * [keypoint_scaler], axis=0) boxes *= box_scaler return image, masks, boxes, keypoints
def parse(self, example_proto): """ Returns: image: a float tensor with shape [height, width, 3], an RGB image with pixel values in the range [0, 1]. masks: a float tensor with shape [height / DOWNSAMPLE, width / DOWNSAMPLE, 2]. boxes: a float tensor with shape [num_persons, 4], in absolute coordinates. keypoints: an int tensor with shape [num_persons, 17, 3], in absolute coordinates. """ features = { 'image': tf.FixedLenFeature([], tf.string), 'num_persons': tf.FixedLenFeature([], tf.int64), 'boxes': tf.FixedLenSequenceFeature([], tf.float32, allow_missing=True), 'keypoints': tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True), 'masks': tf.FixedLenFeature([], tf.string) } parsed_features = tf.parse_single_example(example_proto, features) # get an image image = tf.image.decode_jpeg(parsed_features['image'], channels=3) image = tf.image.convert_image_dtype(image, tf.float32) # now pixel values are scaled to the [0, 1] range # get number of people on the image num_persons = tf.to_int32(parsed_features['num_persons']) # it is assumed that num_persons > 0 # get groundtruth boxes, they are in absolute coordinates boxes = tf.reshape(parsed_features['boxes'], [num_persons, 4]) # they are used to guide the data augmentation (when doing a random crop) # and to choose sigmas for gaussian blobs # get keypoints, they are in absolute coordinates keypoints = tf.to_int32(parsed_features['keypoints']) keypoints = tf.reshape(keypoints, [num_persons, 17, 3]) # get size of masks, they are downsampled shape = tf.shape(image) image_height, image_width = shape[0], shape[1] masks_height = tf.to_int32(tf.ceil(image_height / DOWNSAMPLE)) masks_width = tf.to_int32(tf.ceil(image_width / DOWNSAMPLE)) # (we use the 'SAME' padding in the networks) # get masks (loss and segmentation masks) masks = tf.decode_raw(parsed_features['masks'], tf.uint8) # unpack bits (reverse np.packbits) b = tf.constant([128, 64, 32, 16, 8, 4, 2, 1], dtype=tf.uint8) masks = tf.reshape(tf.bitwise.bitwise_and(masks[:, None], b), [-1]) masks = masks[:(masks_height * masks_width * 2)] masks = tf.cast(masks > 0, tf.uint8) # reshape to the initial form masks = tf.reshape(masks, [masks_height, masks_width, 2]) masks = tf.to_float(masks) # it has binary values only return image, masks, boxes, keypoints
def subsample(self, indicator, batch_size, labels, scope=None): """Returns subsampled minibatch. Args: indicator: boolean tensor of shape [N] whose True entries can be sampled. batch_size: desired batch size. If None, keeps all positive samples and randomly selects negative samples so that the positive sample fraction matches self._positive_fraction. It cannot be None is is_static is True. labels: boolean tensor of shape [N] denoting positive(=True) and negative (=False) examples. scope: name scope. Returns: sampled_idx_indicator: boolean tensor of shape [N], True for entries which are sampled. Raises: ValueError: if labels and indicator are not 1D boolean tensors. """ if len(indicator.get_shape().as_list()) != 1: raise ValueError( 'indicator must be 1 dimensional, got a tensor of ' 'shape %s' % indicator.get_shape()) if len(labels.get_shape().as_list()) != 1: raise ValueError('labels must be 1 dimensional, got a tensor of ' 'shape %s' % labels.get_shape()) if labels.dtype != tf.bool: raise ValueError('labels should be of type bool. Received: %s' % labels.dtype) if indicator.dtype != tf.bool: raise ValueError('indicator should be of type bool. Received: %s' % indicator.dtype) with tf.name_scope(scope, 'BalancedPositiveNegativeSampler'): if self._is_static: return self._static_subsample(indicator, batch_size, labels) else: # Only sample from indicated samples negative_idx = tf.logical_not(labels) positive_idx = tf.logical_and(labels, indicator) negative_idx = tf.logical_and(negative_idx, indicator) # Sample positive and negative samples separately if batch_size is None: max_num_pos = tf.reduce_sum(tf.to_int32(positive_idx)) else: max_num_pos = int(self._positive_fraction * batch_size) sampled_pos_idx = self.subsample_indicator( positive_idx, max_num_pos) num_sampled_pos = tf.reduce_sum( tf.cast(sampled_pos_idx, tf.int32)) if batch_size is None: negative_positive_ratio = ( 1 - self._positive_fraction) / self._positive_fraction max_num_neg = tf.to_int32(negative_positive_ratio * tf.to_float(num_sampled_pos)) else: max_num_neg = batch_size - num_sampled_pos sampled_neg_idx = self.subsample_indicator( negative_idx, max_num_neg) return tf.logical_or(sampled_pos_idx, sampled_neg_idx)
def add_distance_loss_to_center(labels, logits, groundtruth_coords): """Add distance loss function for ClickRegression.""" weights = tf.to_int32( tf.not_equal( labels, model_input.dataset_descriptors[FLAGS.dataset].ignore_label)) labels *= weights # Use GT box to get center if it exists. Less computation required. # Otherwise, calculate from label mask. if FLAGS.use_groundtruth_box: center_x = (groundtruth_coords['xmin'] + groundtruth_coords['xmax']) / 2.0 center_y = (groundtruth_coords['ymin'] + groundtruth_coords['ymax']) / 2.0 center = tf.stack([center_y, center_x], axis=1) else: # Make array of coordinates (each row contains three coordinates) ii, jj = tf.meshgrid(tf.range(FLAGS.image_size), tf.range(FLAGS.image_size), indexing='ij') coords = tf.stack([tf.reshape(ii, (-1, )), tf.reshape(jj, (-1, ))], axis=-1) coords = tf.cast(coords, tf.int32) # Rearrange input into one vector per volume volumes_flat = tf.reshape( labels, [-1, FLAGS.image_size * FLAGS.image_size * 1, 1]) # Compute total mass for each volume. Add 0.00001 to prevent division by 0 total_mass = tf.cast(tf.reduce_sum(volumes_flat, axis=1), tf.float32) + ZERO_DIV_OFFSET # Compute centre of mass center = tf.cast(tf.reduce_sum(volumes_flat * coords, axis=1), tf.float32) / total_mass center = center / FLAGS.image_size # Normalize coordinates by size of image logits = logits / FLAGS.image_size # Calculate loss based on the distance metric specified # Loss added later in model_fn by tf.losses.get_total_loss() if FLAGS.distance_metric == 'mse': tf.losses.mean_squared_error(center, logits) elif FLAGS.distance_metric in [ 'euclidean', 'euclidean_sqrt', 'euclidean_iter' ]: distance_to_center = tf.sqrt( tf.reduce_sum(tf.square(logits - center), axis=-1) + ZERO_DIV_OFFSET) if FLAGS.ratio_box_distance: distance_to_box = calc_distance_to_edge(groundtruth_coords, logits) box_distance_to_center = (tf.to_float(distance_to_center) - distance_to_box) loss = distance_to_center / (box_distance_to_center + ZERO_DIV_OFFSET) else: loss = distance_to_center if FLAGS.distance_metric == 'euclidean_sqrt': loss = tf.sqrt(loss) if FLAGS.distance_metric == 'euclidean_iter': iter_num = tf.to_float(tf.train.get_or_create_global_step()) step = (iter_num // FLAGS.euclidean_step) + 1.0 loss = tf.pow(loss, tf.to_float(1.0 / step)) tf.losses.compute_weighted_loss(loss)
def hier_homography_estimator(inputs, num_param=8, num_layer=7, num_level=3, dropout_keep_prob=0.8, reuse=None, is_training=True, trainable=True, final_endpoint=None, scope='hier_hmg'): """A hierarchical VGG-style neural network for homograhy estimation. Args: inputs: batch of input image pairs of data type float32 and of shape [batch_size, height, width, 2] num_param: the number of parameters for homography (default 8) num_layer: the number of convolutional layers in the motion feature network num_level: the number of hierarchical levels dropout_keep_prob: the percentage of activation values that are kept reuse: whether to reuse this network weights is_training: whether used for training or testing trainable: whether this network is to be trained or not final_endpoint: specifies the endpoint to construct the network up to scope: the scope of variables in this function Returns: a list of homographies at each level and motion feature maps if final_endpoint='mfeature'; otherwise a list of images warped by the list of corresponding homographies """ _, h_input, w_input = inputs.get_shape().as_list()[0:3] hmgs_list = [] warped_list = [] with tf.variable_scope(scope, [inputs], reuse=reuse): for level_index in range(num_level): scale = 2 ** (num_level - 1 - level_index) h = tf.to_float(tf.floordiv(h_input, scale)) w = tf.to_float(tf.floordiv(w_input, scale)) inputs_il = tf.image.resize_images(inputs, tf.to_int32([h, w])) if level_index == 0: mfeature = hier_base_layers(inputs_il, num_layer + 1 - num_level + level_index, level_index, is_training=is_training, trainable=trainable) hmgs_il = homography_regression(mfeature, num_param, level_index, dropout_keep_prob=dropout_keep_prob, is_training=is_training, trainable=trainable) hmgs_list.append(hmgs_il) else: warped, _ = hmg_util.homography_scale_warp_per_batch( inputs_il[:, :, :, 0], w / 2, h / 2, hmgs_list[level_index - 1]) pre_warped_inputs_il = tf.stack([warped, inputs_il[:, :, :, 1]], -1) warped_list.append(pre_warped_inputs_il) if level_index == num_level - 1 and final_endpoint == 'mfeature': mfeature = hier_base_layers(pre_warped_inputs_il, num_layer - num_level + level_index, level_index, is_training=is_training, trainable=trainable) return hmgs_list, mfeature else: mfeature = hier_base_layers(pre_warped_inputs_il, num_layer + 1 - num_level + level_index, level_index, is_training=is_training, trainable=trainable) hmgs_il = homography_regression(mfeature, num_param, level_index, dropout_keep_prob=dropout_keep_prob, is_training=is_training, trainable=trainable) new_hmgs_il = hmg_util.homography_shift_mult_batch( hmgs_list[level_index - 1], w / 2, h / 2, hmgs_il, w, h, w, h) hmgs_list.append(new_hmgs_il) return hmgs_list, warped_list
def _serving_model_fn(features, labels, mode, params): """Builds the serving model_fn.""" del labels # unused. if mode != tf.estimator.ModeKeys.PREDICT: raise ValueError('To build the serving model_fn, set ' 'mode = `tf.estimator.ModeKeys.PREDICT`') model_params = params_dict.ParamsDict(params) images = features['images'] _, height, width, _ = images.get_shape().as_list() model_fn = factory.model_generator(model_params) outputs = model_fn.build_outputs( features['images'], labels=None, mode=mode_keys.PREDICT) logits = tf.image.resize_bilinear( outputs['logits'], tf.shape(images)[1:3], align_corners=False) original_image_size = tf.squeeze(features['image_info'][:, 0:1, :]) height = original_image_size[0] width = original_image_size[1] offset_height = tf.zeros_like(height, dtype=tf.int32) offset_width = tf.zeros_like(width, dtype=tf.int32) # Clip the predictions to original image size. logits = tf.image.crop_to_bounding_box(logits, offset_height, offset_width, tf.cast(height, dtype=tf.int32), tf.cast(width, dtype=tf.int32)) probabilities = tf.nn.softmax(logits) score_threshold_placeholder = features['score_thresholds'] key_placeholder = features['key'] score_threshold_pred_expanded = score_threshold_placeholder for _ in range(0, logits.shape.ndims - 1): score_threshold_pred_expanded = tf.expand_dims( score_threshold_pred_expanded, -1) scores = tf.where(probabilities > score_threshold_pred_expanded, probabilities, tf.zeros_like(probabilities)) scores = tf.reduce_max(scores, 3) scores = tf.expand_dims(scores, -1) scores = tf.cast(tf.minimum(scores * 255.0, 255), tf.uint8) categories = tf.to_int32(tf.expand_dims(tf.argmax(probabilities, 3), -1)) # Generate images for scores and categories. score_bytes = tf.map_fn( tf.image.encode_png, scores, back_prop=False, dtype=tf.string) category_bytes = tf.map_fn( tf.image.encode_png, tf.cast(categories, tf.uint8), back_prop=False, dtype=tf.string) predictions = {} predictions['category_bytes'] = tf.identity( category_bytes, name='category_bytes') predictions['score_bytes'] = tf.identity(score_bytes, name='score_bytes') predictions['key'] = tf.identity(key_placeholder, name='key') if output_image_info: predictions['image_info'] = tf.identity( features['image_info'], name='image_info') if export_tpu_model: return tf.estimator.tpu.TPUEstimatorSpec( mode=mode, predictions=predictions) return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
def DecodeLabel(label): label = tf.decode_raw(label, tf.uint8) label = tf.reshape(label, []) return tf.to_int32(label)
def get_center_index(response): """Get the index of the center in the response map""" shape = tf.shape(response) c1 = tf.to_int32((shape[1] - 1) / 2) c2 = tf.to_int32((shape[2] - 1) / 2) return c1, c2
def apply_cmap(brightness, cmap): indices = tf.to_int32(tf.round(brightness * 255)) cm = matplotlib.cm.get_cmap(cmap) colors = tf.constant(cm.colors, dtype=tf.float32) return tf.gather(colors, indices)
def compute_mel_filterbank_features(waveforms, sample_rate=16000, dither=1.0 / np.iinfo(np.int16).max, preemphasis=0.97, frame_length=25, frame_step=10, fft_length=None, window_fn=functools.partial( tf.signal.hann_window, periodic=True), lower_edge_hertz=80.0, upper_edge_hertz=7600.0, num_mel_bins=80, log_noise_floor=1e-3, apply_mask=True): """Implement mel-filterbank extraction using tf ops. Args: waveforms: float32 tensor with shape [batch_size, max_len] sample_rate: sampling rate of the waveform dither: stddev of Gaussian noise added to waveform to prevent quantization artefacts preemphasis: waveform high-pass filtering constant frame_length: frame length in ms frame_step: frame_Step in ms fft_length: number of fft bins window_fn: windowing function lower_edge_hertz: lowest frequency of the filterbank upper_edge_hertz: highest frequency of the filterbank num_mel_bins: filterbank size log_noise_floor: clip small values to prevent numeric overflow in log apply_mask: When working on a batch of samples, set padding frames to zero Returns: filterbanks: a float32 tensor with shape [batch_size, len, num_bins, 1] """ # `stfts` is a complex64 Tensor representing the short-time Fourier # Transform of each signal in `signals`. Its shape is # [batch_size, ?, fft_unique_bins] # where fft_unique_bins = fft_length // 2 + 1 # Find the wave length: the largest index for which the value is !=0 # note that waveforms samples that are exactly 0.0 are quite common, so # simply doing sum(waveforms != 0, axis=-1) will not work correctly. wav_lens = tf.reduce_max( tf.expand_dims(tf.range(tf.shape(waveforms)[1]), 0) * tf.to_int32(tf.not_equal(waveforms, 0.0)), axis=-1) + 1 if dither > 0: waveforms += tf.random_normal(tf.shape(waveforms), stddev=dither) if preemphasis > 0: waveforms = waveforms[:, 1:] - preemphasis * waveforms[:, :-1] wav_lens -= 1 frame_length = int(frame_length * sample_rate / 1e3) frame_step = int(frame_step * sample_rate / 1e3) if fft_length is None: fft_length = int(2**(np.ceil(np.log2(frame_length)))) stfts = tf.contrib.signal.stft(waveforms, frame_length=frame_length, frame_step=frame_step, fft_length=fft_length, window_fn=window_fn, pad_end=True) stft_lens = (wav_lens + (frame_step - 1)) // frame_step masks = tf.to_float( tf.less_equal(tf.expand_dims(tf.range(tf.shape(stfts)[1]), 0), tf.expand_dims(stft_lens, 1))) # An energy spectrogram is the magnitude of the complex-valued STFT. # A float32 Tensor of shape [batch_size, ?, 257]. magnitude_spectrograms = tf.abs(stfts) # Warp the linear-scale, magnitude spectrograms into the mel-scale. num_spectrogram_bins = magnitude_spectrograms.shape[-1].value linear_to_mel_weight_matrix = ( tf.contrib.signal.linear_to_mel_weight_matrix(num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz, upper_edge_hertz)) mel_spectrograms = tf.tensordot(magnitude_spectrograms, linear_to_mel_weight_matrix, 1) # Note: Shape inference for tensordot does not currently handle this case. mel_spectrograms.set_shape(magnitude_spectrograms.shape[:-1].concatenate( linear_to_mel_weight_matrix.shape[-1:])) log_mel_sgram = tf.log(tf.maximum(log_noise_floor, mel_spectrograms)) if apply_mask: log_mel_sgram *= tf.expand_dims(tf.to_float(masks), -1) return tf.expand_dims(log_mel_sgram, -1, name="mel_sgrams")
hidden.append( Conv2D(nClasses, (1), padding='same', activation='softmax')(hidden[-1])) print('layer', len(hidden) - 1, ':', hidden[-1].shape, 'output') sm = hidden[-1] y0 = Input((imSize, imSize, nClasses)) toCrop = int((y0.shape[1] - sm.shape[1]) // 2) y = Cropping2D(toCrop)(y0) cropSize = y.shape[1] l = [] # nl = [] for iClass in range(nClasses): labels0 = tf.reshape( tf.to_int32(tf.slice(y, [0, 0, 0, iClass], [-1, -1, -1, 1])), [batchSize, cropSize, cropSize]) predict0 = tf.reshape(tf.to_int32(tf.equal(tf.argmax(sm, 3), iClass)), [batchSize, cropSize, cropSize]) correct = tf.multiply(labels0, predict0) nCorrect0 = tf.reduce_sum(correct) nLabels0 = tf.reduce_sum(labels0) l.append(tf.to_float(nCorrect0) / tf.to_float(nLabels0)) # nl.append(nLabels0) acc = tf.add_n(l) / nClasses loss = -tf.reduce_sum(tf.multiply(y, tf.log(sm))) updateOps = tf.get_collection(tf.GraphKeys.UPDATE_OPS) optimizer = tf.train.AdamOptimizer(learningRate) with tf.control_dependencies(updateOps): optOp = optimizer.minimize(loss)
def __init__(self, num_emb, batch_size, emb_dim, hidden_dim, sequence_length, start_token, learning_rate=0.01, reward_gamma=0.95): self.num_emb = num_emb self.batch_size = batch_size self.emb_dim = emb_dim self.hidden_dim = hidden_dim self.sequence_length = sequence_length self.start_token = tf.constant([start_token] * self.batch_size, dtype=tf.int32) self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.reward_gamma = reward_gamma self.g_params = [] self.d_params = [] self.temperature = 1.0 self.grad_clip = 5.0 self.expected_reward = tf.Variable(tf.zeros([self.sequence_length])) self.g_embeddings = tf.Variable( self.init_matrix([self.num_emb, self.emb_dim])) self.g_params.append(self.g_embeddings) self.g_recurrent_unit = self.create_recurrent_unit( self.g_params) # maps h_tm1 to h_t for generator self.g_output_unit = self.create_output_unit( self.g_params) # maps h_t to o_t (output token logits) # placeholder definition self.x = tf.placeholder(tf.int32, shape=[ self.batch_size, self.sequence_length ]) # sequence of tokens generated by generator self.rewards = tf.placeholder( tf.float32, shape=[self.batch_size, self.sequence_length ]) # get from rollout policy and discriminator # processed for batch self.processed_x = tf.transpose( tf.nn.embedding_lookup(self.g_embeddings, self.x), perm=[1, 0, 2]) # seq_length x batch_size x emb_dim # Initial states self.h0 = tf.zeros([self.batch_size, self.hidden_dim]) self.h0 = tf.stack([self.h0, self.h0]) gen_o = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length, dynamic_size=False, infer_shape=True) gen_x = tensor_array_ops.TensorArray(dtype=tf.int32, size=self.sequence_length, dynamic_size=False, infer_shape=True) def _g_recurrence(i, x_t, h_tm1, gen_o, gen_x): h_t = self.g_recurrent_unit(x_t, h_tm1) # hidden_memory_tuple o_t = self.g_output_unit(h_t) # batch x vocab , logits not prob log_prob = tf.log(tf.nn.softmax(o_t)) next_token = tf.cast( tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]), tf.int32) x_tp1 = tf.nn.embedding_lookup(self.g_embeddings, next_token) # batch x emb_dim gen_o = gen_o.write( i, tf.reduce_sum( tf.multiply(tf.one_hot(next_token, self.num_emb, 1.0, 0.0), tf.nn.softmax(o_t)), 1)) # [batch_size] , prob gen_x = gen_x.write(i, next_token) # indices, batch_size return i + 1, x_tp1, h_t, gen_o, gen_x _, _, _, self.gen_o, self.gen_x = control_flow_ops.while_loop( cond=lambda i, _1, _2, _3, _4: i < self.sequence_length, body=_g_recurrence, loop_vars=(tf.constant(0, dtype=tf.int32), tf.nn.embedding_lookup(self.g_embeddings, self.start_token), self.h0, gen_o, gen_x)) self.gen_x = self.gen_x.stack() # seq_length x batch_size self.gen_x = tf.transpose(self.gen_x, perm=[1, 0]) # batch_size x seq_length # supervised pretraining for generator g_predictions = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length, dynamic_size=False, infer_shape=True) ta_emb_x = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length) ta_emb_x = ta_emb_x.unstack(self.processed_x) def _pretrain_recurrence(i, x_t, h_tm1, g_predictions): h_t = self.g_recurrent_unit(x_t, h_tm1) o_t = self.g_output_unit(h_t) g_predictions = g_predictions.write( i, tf.nn.softmax(o_t)) # batch x vocab_size x_tp1 = ta_emb_x.read(i) return i + 1, x_tp1, h_t, g_predictions _, _, _, self.g_predictions = control_flow_ops.while_loop( cond=lambda i, _1, _2, _3: i < self.sequence_length, body=_pretrain_recurrence, loop_vars=(tf.constant(0, dtype=tf.int32), tf.nn.embedding_lookup(self.g_embeddings, self.start_token), self.h0, g_predictions)) self.g_predictions = tf.transpose( self.g_predictions.stack(), perm=[1, 0, 2]) # batch_size x seq_length x vocab_size # pretraining loss self.pretrain_loss = -tf.reduce_sum( tf.one_hot(tf.to_int32(tf.reshape( self.x, [-1])), self.num_emb, 1.0, 0.0) * tf.log( tf.clip_by_value( tf.reshape(self.g_predictions, [-1, self.num_emb]), 1e-20, 1.0))) / (self.sequence_length * self.batch_size) # training updates pretrain_opt = self.g_optimizer(self.learning_rate) self.pretrain_grad, _ = tf.clip_by_global_norm( tf.gradients(self.pretrain_loss, self.g_params), self.grad_clip) self.pretrain_updates = pretrain_opt.apply_gradients( zip(self.pretrain_grad, self.g_params)) ####################################################################################################### # Unsupervised Training ####################################################################################################### self.g_loss = -tf.reduce_sum( tf.reduce_sum( tf.one_hot(tf.to_int32(tf.reshape( self.x, [-1])), self.num_emb, 1.0, 0.0) * tf.log( tf.clip_by_value( tf.reshape(self.g_predictions, [-1, self.num_emb]), 1e-20, 1.0)), 1) * tf.reshape(self.rewards, [-1])) g_opt = self.g_optimizer(self.learning_rate) self.g_grad, _ = tf.clip_by_global_norm( tf.gradients(self.g_loss, self.g_params), self.grad_clip) self.g_updates = g_opt.apply_gradients(zip(self.g_grad, self.g_params))
def parse_fn(filename, output_sequence_length=IMAGES_PER_SEQUENCE): """Read data from single files stored in directories. Args: filename: the filename of the set of files to be loaded. output_sequence_length: Length of the output sequence. If less than IMAGES_PER_SEQUENCE, only the first `output_sequence_length` frames will be kept. Returns: A dictionary that maps strings to tf.Tensors of type float32: 'rgb': an RGB image of shape H, W, 3. Each channel value is between 0.0 and 1.0. 'intrinsics': a list of intrinsics values. """ if output_sequence_length > IMAGES_PER_SEQUENCE or output_sequence_length < 1: raise ValueError( 'Invalid output_sequence_length %d: must be within [1, ' '%d].' % (output_sequence_length, IMAGES_PER_SEQUENCE)) image_file = tf.strings.join([filename, '.png']) intrinsics_file = tf.strings.join([filename, '_cam.txt']) mask_file = tf.strings.join([filename, '-fseg.png']) # Read files. encoded_image = tf.io.read_file(image_file) encoded_mask = tf.io.read_file(mask_file) intrinsics_content = tf.io.read_file(intrinsics_file) content_is_empty = tf.math.equal(intrinsics_content, '') filename_matches = tf.strings.regex_full_match( filename, '.*%s$' % KITTI_CORRUPT_FILE) file_is_corrupt = tf.math.logical_and(content_is_empty, filename_matches) intrinsics_content = tf.cond(file_is_corrupt, lambda: KITTI_CORRUPT_FILE_INTRINSICS, lambda: intrinsics_content) # Parse intrinsics data to a tensor representing a 3x3 matrix. intrinsics = tf.strings.split([intrinsics_content], ',').values intrinsics = tf.strings.to_number(intrinsics) intrinsics.set_shape([9]) fx, _, x0, _, fy, y0, _, _, _ = tf.unstack(intrinsics) intrinsics = tf.stack([IMAGE_WIDTH, IMAGE_HEIGHT, fx, fy, x0, y0]) # Decode and normalize images. decoded_image = tf.image.decode_png(encoded_image, channels=3) decoded_image = tf.to_float(decoded_image) * (1 / 255.0) split_image_sequence = tf.split(decoded_image, IMAGES_PER_SEQUENCE, axis=1) decoded_mask = tf.image.decode_png(encoded_mask, channels=3) mask_r, mask_g, mask_b = tf.unstack(tf.to_int32(decoded_mask), axis=-1) # Since TPU does not support images of type uint8, we encode the 3 RGB uint8 # values into one int32 value. mask = mask_r * (256 * 256) + mask_g * 256 + mask_b # All images in our pipeline have 3 dimensions (height, width, channels), so # we add a third dimension to the mask too. mask = tf.expand_dims(mask, -1) split_mask_sequence = tf.split(mask, IMAGES_PER_SEQUENCE, axis=1) return { 'rgb': tf.stack(split_image_sequence[:output_sequence_length]), 'intrinsics': tf.stack([intrinsics] * output_sequence_length), 'mask': tf.stack(split_mask_sequence[:output_sequence_length]), }
def average_bag_of_embeds(embeddings, mask, use_bigrams=False, bigram_embed_scope=None, append_start_end=False): """Averages a bag of embeds. Args: embeddings: a float Tensor of shape [None, length, depth] mask: a boolean Tensor of shape [None, length] use_bigrams: whether to use bigrams. bigram_embed_scope: the variable scope. append_start_end: whether to append start and end tokens. Returns: word_embed: a Tensor of shape [None, embed_size] """ if bigram_embed_scope is None: var_scope = "average_bow" else: var_scope = bigram_embed_scope with tf.variable_scope(var_scope, reuse=tf.AUTO_REUSE): with tf.control_dependencies([ tf.assert_equal(tf.rank(embeddings), 3, summarize=100), tf.assert_equal(tf.rank(mask), 2, summarize=100), ]): lengths = tf.cast( tf.reduce_sum(tf.cast(mask, tf.int32), -1, keepdims=True), tf.float32) batch_size = common_layers.shape_list(embeddings)[0] length = common_layers.shape_list(embeddings)[1] depth = common_layers.shape_list(embeddings)[2] embeddings = tf.where( tf.tile(tf.expand_dims(mask, 2), [1, 1, depth]), embeddings, tf.zeros_like(embeddings)) if use_bigrams: if append_start_end: span_start_embed = tf.get_variable(name="span_start_embed", shape=[depth]) span_end_embed = tf.get_variable(name="span_end_embed", shape=[depth]) span_end_embed = tf.expand_dims(tf.expand_dims(span_end_embed, 0), 0) start = tf.expand_dims( tf.tile(tf.expand_dims(span_start_embed, 0), [batch_size, 1]), 1) # Prefix the start embeddings = tf.concat([start, embeddings], axis=1) # Pad for the end slot embeddings = tf.pad(embeddings, [[0, 0], [0, 1], [0, 0]]) span_end_embed = tf.tile(span_end_embed, [batch_size, length + 2, 1]) mask_with_start = tf.pad( tf.pad(tf.to_int32(mask), [[0, 0], [1, 0]], constant_values=1), [[0, 0], [0, 1]], constant_values=0) mask_with_end = tf.pad(mask_with_start, [[0, 0], [1, 0]], constant_values=1)[:, :-1] mask = tf.cast(mask_with_end, tf.bool) mask_of_end = tf.expand_dims(mask_with_end - mask_with_start, 2) embeddings = embeddings + span_end_embed * tf.to_float(mask_of_end) bigram_embeddings = tf.layers.dense( tf.concat([embeddings[:, :-1, :], embeddings[:, 1:, :]], axis=-1), units=depth) bigram_mask = tf.to_float(tf.expand_dims(mask[:, 1:], 2)) masked_bigram_embeddings = bigram_embeddings * bigram_mask embeddings = tf.concat( [embeddings, masked_bigram_embeddings], axis=1) lengths = lengths + lengths - 1 avg_embeddings = tf.div(tf.reduce_sum(embeddings, axis=1), tf.maximum(lengths, 1.0)) return avg_embeddings
def __init__(self, train_batch_size=4096, test_chain_batch_size=4096, bijector="iaf", log_dir="/tmp/neutra", base_learning_rate=1e-3, q_base_scale=1., learning_rate_schedule=[[6000, 1e-1]]): target, target_spec = GetTargetSpec() self.target = target self.target_spec = target_spec with gin.config_scope("train"): train_target, train_target_spec = GetTargetSpec() self.train_target = train_target self.train_target_spec = train_target_spec if bijector == "rnvp": bijector_fn = tf.make_template("bijector", MakeRNVPBijectorFn, num_dims=self.target_spec.num_dims) elif bijector == "iaf": bijector_fn = tf.make_template("bijector", MakeIAFBijectorFn, num_dims=self.target_spec.num_dims) elif bijector == "affine": bijector_fn = tf.make_template("bijector", MakeAffineBijectorFn, num_dims=self.target_spec.num_dims) else: bijector_fn = lambda *args, **kwargs: tfb.Identity() self.train_bijector = bijector_fn(train=True) self.bijector = bijector_fn(train=False) if train_target_spec.bijector is not None: print("Using train target bijector") self.train_bijector = tfb.Chain( [train_target_spec.bijector, self.train_bijector]) if target_spec.bijector is not None: print("Using target bijector") self.bijector = tfb.Chain([target_spec.bijector, self.bijector]) q_base = tfd.Independent( tfd.Normal(loc=tf.zeros(self.target_spec.num_dims), scale=q_base_scale * tf.ones(self.target_spec.num_dims)), 1) self.q_x_train = tfd.TransformedDistribution(q_base, self.train_bijector) self.q_x = tfd.TransformedDistribution(q_base, self.bijector) # Params self.train_batch_size = int(train_batch_size) self.test_chain_batch_size = tf.placeholder_with_default( test_chain_batch_size, [], "test_chain_batch_size") self.test_batch_size = tf.placeholder_with_default( 16384 * 8, [], "test_batch_size") self.test_num_steps = tf.placeholder_with_default( 1000, [], "test_num_steps") self.test_num_leapfrog_steps = tf.placeholder_with_default( tf.to_int32(2), [], "test_num_leapfrog_steps") self.test_step_size = tf.placeholder_with_default( 0.1, [], "test_step_size") # Test self.neutra_outputs = MakeNeuTra( target=self.target, q=self.q_x, batch_size=self.test_chain_batch_size, num_steps=self.test_num_steps, num_leapfrog_steps=self.test_num_leapfrog_steps, step_size=self.test_step_size, ) self.z_chain = tf.reshape( self.bijector.inverse( tf.reshape(self.neutra_outputs.x_chain, [-1, self.target_spec.num_dims])), tf.shape(self.neutra_outputs.x_chain)) self.target_samples = self.target.sample(self.test_batch_size) self.target_z = self.bijector.inverse(self.target_samples) self.q_samples = self.q_x.sample(self.test_batch_size) self.target_cov = utils.Covariance(self.target_samples) self.target_eigvals, self.target_eigvecs = tf.linalg.eigh( self.target_cov) self.cached_target_eigvals = tf.get_local_variable( "cached_target_eigvals", self.target_eigvals.shape, initializer=tf.zeros_initializer()) self.cached_target_eigvecs = tf.get_local_variable( "cached_target_eigvecs", self.target_eigvecs.shape, initializer=tf.zeros_initializer()) self.cached_target_stats_update_op = [ self.cached_target_eigvals.assign(self.target_eigvals), self.cached_target_eigvecs.assign(self.target_eigvecs), tf.print("Assigning target stats") ] def variance(x): x -= tf.reduce_mean(x, 0, keep_dims=True) x = tf.square(x) return x def rotated_variance(x): x2 = tf.reshape(x, [-1, self.target_spec.num_dims]) x2 -= tf.reduce_mean(x2, 0, keep_dims=True) x2 = tf.matmul(x2, self.cached_target_eigvecs) x2 = tf.square(x2) return tf.reshape(x2, tf.shape(x)) functions = [ ("mean", tf.identity), # ("var", variance), ("square", tf.square), # ("rot_square", rot_square), # ("rot_var", rotated_variance), ] self.cached_target_mean = {} self.cached_target_mean_update_op = [ tf.print("Assigning target means.") ] self.neutra_stats = {} self.q_stats = {} for name, f in functions: target_mean = tf.reduce_mean(f(self.target_samples), 0) cached_target_mean = tf.get_local_variable(name + "_cached_mean", target_mean.shape) if self.target_spec.stats is not None: self.cached_target_mean_update_op.append( cached_target_mean.assign(self.target_spec.stats[name])) else: self.cached_target_mean_update_op.append( cached_target_mean.assign(target_mean)) self.cached_target_mean[name] = cached_target_mean self.q_stats[name] = ComputeQStats(f(self.q_samples), cached_target_mean) self.neutra_stats[name] = ComputeChainStats( f(self.neutra_outputs.x_chain), cached_target_mean, self.test_num_leapfrog_steps) # Training self.train_q_samples = self.q_x_train.sample(self.train_batch_size) self.train_log_q_x = self.q_x_train.log_prob(self.train_q_samples) self.kl_q_p = tf.reduce_mean( self.train_log_q_x - self.target.log_prob(self.train_q_samples)) loss = self.kl_q_p reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) if reg_losses: tf.logging.info("Regularizing.") loss += tf.add_n(reg_losses) self.loss = tf.check_numerics(loss, "Loss has NaNs") self.global_step = tf.train.get_or_create_global_step() steps, factors = list(zip(*learning_rate_schedule)) learning_rate = base_learning_rate * tf.train.piecewise_constant( self.global_step, steps, [1.0] + list(factors)) opt = tf.train.AdamOptimizer(learning_rate=learning_rate) self.train_op = opt.minimize(self.loss, global_step=self.global_step) tf.summary.scalar("kl_q_p", self.kl_q_p) tf.summary.scalar("loss", self.loss) self.init = [ tf.global_variables_initializer(), tf.local_variables_initializer(), tf.print("Initializing variables") ] self.saver = tf.train.Saver() self.log_dir = log_dir
def _build_sampler(self): """Build the sampler ops and the log_prob ops.""" hidden_size = self.params.controller_hidden_size num_layers = self.params.controller_num_layers arc_seq = [] sample_log_probs = [] sample_entropy = [] all_h = [tf.zeros([1, hidden_size], dtype=tf.float32)] all_h_w = [tf.zeros([1, hidden_size], dtype=tf.float32)] # sampler ops inputs = self.g_emb prev_c = tf.zeros([1, hidden_size], dtype=tf.float32) prev_h = tf.zeros([1, hidden_size], dtype=tf.float32) inputs = self.g_emb for layer_id in range(1, num_layers+1): next_c, next_h = _lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h all_h.append(next_h) all_h_w.append(tf.matmul(next_h, self.attn_w_1)) query = tf.matmul(next_h, self.attn_w_2) query = query + tf.concat(all_h_w[:-1], axis=0) query = tf.tanh(query) logits = tf.matmul(query, self.attn_v) logits = tf.reshape(logits, [1, layer_id]) if self.params.controller_temperature: logits /= self.params.controller_temperature if self.params.controller_tanh_constant: logits = self.params.controller_tanh_constant * tf.tanh(logits) diff = tf.to_float(layer_id - tf.range(0, layer_id)) ** 2 logits -= tf.reshape(diff, [1, layer_id]) / 6.0 skip_index = tf.multinomial(logits, 1) skip_index = tf.to_int32(skip_index) skip_index = tf.reshape(skip_index, [1]) arc_seq.append(skip_index) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=skip_index) sample_log_probs.append(log_prob) entropy = log_prob * tf.exp(-log_prob) sample_entropy.append(tf.stop_gradient(entropy)) inputs = tf.nn.embedding_lookup( tf.concat(all_h[:-1], axis=0), skip_index) inputs /= (0.1 + tf.to_float(layer_id - skip_index)) next_c, next_h = _lstm(inputs, prev_c, prev_h, self.w_lstm) prev_c, prev_h = next_c, next_h logits = tf.matmul(next_h, self.w_emb, transpose_b=True) if self.params.controller_temperature: logits /= self.params.controller_temperature if self.params.controller_tanh_constant: logits = self.params.controller_tanh_constant * tf.tanh(logits) func = tf.multinomial(logits, 1) func = tf.to_int32(func) func = tf.reshape(func, [1]) arc_seq.append(func) log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=func) sample_log_probs.append(log_prob) entropy = log_prob * tf.exp(-log_prob) sample_entropy.append(tf.stop_gradient(entropy)) inputs = tf.nn.embedding_lookup(self.w_emb, func) arc_seq = tf.concat(arc_seq, axis=0) self.sample_arc = arc_seq self.sample_log_probs = tf.concat(sample_log_probs, axis=0) self.ppl = tf.exp(tf.reduce_mean(self.sample_log_probs)) sample_entropy = tf.concat(sample_entropy, axis=0) self.sample_entropy = tf.reduce_sum(sample_entropy) self.all_h = all_h
def _enas_layer(self, layer_id, prev_layers, arc, out_filters): """ Args: layer_id: current layer prev_layers: cache of previous layers. for skip connections start_idx: where to start looking at. technically, we can infer this from layer_id, but why bother... """ assert len(prev_layers) == 2, "need exactly 2 inputs" layers = [prev_layers[0], prev_layers[1]] layers = self._maybe_calibrate_size(layers, out_filters, is_training=True) used = [] for cell_id in range(self.num_cells): prev_layers = tf.stack(layers, axis=0) with tf.variable_scope("cell_{0}".format(cell_id)): with tf.variable_scope("x"): x_id = arc[4 * cell_id] x_op = arc[4 * cell_id + 1] x = prev_layers[x_id, :, :, :, :] x = self._enas_cell(x, cell_id, x_id, x_op, out_filters) x_used = tf.one_hot(x_id, depth=self.num_cells + 2, dtype=tf.int32) with tf.variable_scope("y"): y_id = arc[4 * cell_id + 2] y_op = arc[4 * cell_id + 3] y = prev_layers[y_id, :, :, :, :] y = self._enas_cell(y, cell_id, y_id, y_op, out_filters) y_used = tf.one_hot(y_id, depth=self.num_cells + 2, dtype=tf.int32) out = x + y used.extend([x_used, y_used]) layers.append(out) used = tf.add_n(used) indices = tf.where(tf.equal(used, 0)) indices = tf.to_int32(indices) indices = tf.reshape(indices, [-1]) num_outs = tf.size(indices) out = tf.stack(layers, axis=0) out = tf.gather(out, indices, axis=0) inp = prev_layers[0] if self.data_format == "NHWC": N = tf.shape(inp)[0] H = tf.shape(inp)[1] W = tf.shape(inp)[2] C = tf.shape(inp)[3] out = tf.transpose(out, [1, 2, 3, 0, 4]) out = tf.reshape(out, [N, H, W, num_outs * out_filters]) elif self.data_format == "NCHW": N = tf.shape(inp)[0] C = tf.shape(inp)[1] H = tf.shape(inp)[2] W = tf.shape(inp)[3] out = tf.transpose(out, [1, 0, 2, 3, 4]) out = tf.reshape(out, [N, num_outs * out_filters, H, W]) else: raise ValueError("Unknown data_format '{0}'".format( self.data_format)) with tf.variable_scope("final_conv"): w = create_weight("w", [self.num_cells + 2, out_filters * out_filters]) w = tf.gather(w, indices, axis=0) w = tf.reshape(w, [1, 1, num_outs * out_filters, out_filters]) out = tf.nn.relu(out) out = tf.nn.conv2d(out, w, strides=[1, 1, 1, 1], padding="SAME", data_format=self.data_format) out = batch_norm(out, is_training=True, data_format=self.data_format) out = tf.reshape(out, tf.shape(prev_layers[0])) return out
def build_train_graph(self, inputs, min_depth, max_depth, num_mpi_planes, learning_rate=0.0002, beta1=0.9, vgg_model_file=None, global_step=0): """Construct the training computation graph. Args: inputs: dictionary of tensors (see 'input_data' below) needed for training min_depth: minimum depth for the PSV and MPI planes max_depth: maximum depth for the PSV and MPI planes num_mpi_planes: number of MPI planes to infer learning_rate: learning rate beta1: hyperparameter for Adam vgg_model_file: path to vgg weights (needed when vgg loss is used) global_step: current optimization step Returns: A train_op to be used for training. """ print("starting to build graph") with tf.name_scope("input_size_randomization"): dim_choices = tf.constant([[1, 16], [2, 32], [4, 32], [4, 64], [4, 128], [8, 32], [8, 64], [8, 128]], dtype=tf.int32) rand_dim = tf.random_shuffle(dim_choices)[0, :] height_div = rand_dim[0] width_div = rand_dim[0] num_mpi_planes = rand_dim[1] tf.summary.scalar("num_mpi_planes", num_mpi_planes) with tf.name_scope("setup"): mpi_planes = self.inv_depths(min_depth, max_depth, num_mpi_planes) with tf.name_scope("input_data"): raw_tgt_image = inputs["tgt_image"] raw_ref_image = inputs["ref_image"] raw_src_images = inputs["src_images"] _, img_height, img_width, _ = raw_src_images.get_shape().as_list( ) img_height = img_height // height_div img_width = img_width // width_div raw_tgt_image = tf.image.convert_image_dtype( raw_tgt_image, dtype=tf.float32) raw_ref_image = tf.image.convert_image_dtype( raw_ref_image, dtype=tf.float32) raw_src_images = tf.image.convert_image_dtype( raw_src_images, dtype=tf.float32) raw_tgt_image = tf.image.resize_area(raw_tgt_image, [img_height, img_width]) raw_ref_image = tf.image.resize_area(raw_ref_image, [img_height, img_width]) raw_src_images = tf.image.resize_area(raw_src_images, [img_height, img_width]) tgt_pose = inputs["tgt_pose"] ref_pose = inputs["ref_pose"] src_poses = inputs["src_poses"] intrinsics = inputs["intrinsics"] # Scale intrinsics based on size randomization intrinsics = tf.concat([ intrinsics[:, 0:1, :] / tf.to_float(width_div), intrinsics[:, 1:2, :] / tf.to_float(height_div), intrinsics[:, 2:3, :] ], axis=1) inputs["intrinsics"] = intrinsics _, num_source, _, _ = src_poses.get_shape().as_list() with tf.name_scope("inference"): print("setting up MPI inference") num_mpi_planes = tf.shape(mpi_planes)[0] pred = self.infer_mpi(raw_src_images, raw_ref_image, ref_pose, src_poses, intrinsics, num_mpi_planes, mpi_planes) rgba_layers = pred["rgba_layers"] rgba_layers_refine = pred["rgba_layers_refine"] stuff_behind = pred["stuff_behind"] refine_input_mpi = pred["refine_input_mpi"] psv = pred["psv"] with tf.name_scope("synthesis"): print("setting up rendering") rel_pose = tf.matmul(tgt_pose, tf.matrix_inverse(ref_pose)) output_image, output_layers = self.mpi_render_view( rgba_layers, rel_pose, mpi_planes, intrinsics) output_alpha = output_layers[Ellipsis, -1] output_image_refine, _ = self.mpi_render_view( rgba_layers_refine, rel_pose, mpi_planes, intrinsics) with tf.name_scope("loss"): print("computing losses") # Mask loss for pixels outside reference frustum loss_mask = tf.where( tf.equal( tf.reduce_min( tf.abs(tf.reduce_sum(output_layers, axis=-1)), axis=3, keep_dims=True), 0.0), tf.zeros_like(output_alpha[:, :, :, 0:1]), tf.ones_like(output_alpha[:, :, :, 0:1])) loss_mask = tf.stop_gradient(loss_mask) tf.summary.image("loss_mask", loss_mask) # Helper functions for loss def compute_error(real, fake, mask): return tf.reduce_mean(mask * tf.abs(fake - real)) # Normalized VGG loss (from # https://github.com/CQFIO/PhotographicImageSynthesis) downsample = lambda tensor, ds: tf.nn.avg_pool(tensor, [1, ds, ds, 1], [1, ds, ds, 1], "SAME") def vgg_loss(raw_tgt_image, output_image, loss_mask): """Compute VGG loss.""" vgg_real = build_vgg19(raw_tgt_image * 255.0, vgg_model_file) rescaled_output_image = (output_image + 1.)/2. * 255.0 vgg_fake = build_vgg19( rescaled_output_image, vgg_model_file, reuse=True) p0 = compute_error(vgg_real["input"], vgg_fake["input"], loss_mask) p1 = compute_error(vgg_real["conv1_2"], vgg_fake["conv1_2"], loss_mask)/2.6 p2 = compute_error(vgg_real["conv2_2"], vgg_fake["conv2_2"], downsample(loss_mask, 2))/4.8 p3 = compute_error(vgg_real["conv3_2"], vgg_fake["conv3_2"], downsample(loss_mask, 4))/3.7 p4 = compute_error(vgg_real["conv4_2"], vgg_fake["conv4_2"], downsample(loss_mask, 8))/5.6 p5 = compute_error(vgg_real["conv5_2"], vgg_fake["conv5_2"], downsample(loss_mask, 16))*10/1.5 total_loss = p0+p1+p2+p3+p4+p5 return total_loss, vgg_real, vgg_fake vgg_loss_initial, _, _ = vgg_loss(raw_tgt_image, output_image, loss_mask) tf.summary.scalar("vgg_loss_initial", vgg_loss_initial) total_loss = vgg_loss_initial vgg_loss_refine, _, _ = vgg_loss(raw_tgt_image, output_image_refine, loss_mask) tf.summary.scalar("vgg_loss_refine", vgg_loss_refine) total_loss += vgg_loss_refine with tf.name_scope("train_op"): print("setting up train op") train_vars = [var for var in tf.trainable_variables()] optim = tf.train.AdamOptimizer(learning_rate, beta1) grads_and_vars = optim.compute_gradients(total_loss, var_list=train_vars) train_op = [optim.apply_gradients(grads_and_vars)] # Summaries tf.summary.scalar("total_loss", total_loss) # Source images for i in range(num_source): src_image = raw_src_images[:, :, :, i*3:(i+1)*3] tf.summary.image("src_image_%d" % i, src_image) # Output image tf.summary.image("output_image", self.deprocess_image(output_image)) # Refined output image tf.summary.image("output_image_refine", self.deprocess_image(output_image_refine)) # Target image tf.summary.image("tgt_image", raw_tgt_image) # Ref image tf.summary.image("ref_image", raw_ref_image) # Predicted color and alpha layers, and PSV num_summ = 16 # Number of plane summaries to show in tensorboard for i in range(num_summ): ind = tf.to_int32(i * num_mpi_planes/num_summ) rgb = rgba_layers[:, :, :, ind, :3] alpha = rgba_layers[:, :, :, ind, -1:] ref_plane = psv[:, :, :, ind, 3:6] source_plane = psv[:, :, :, ind, :3] output_rgb = output_layers[:, :, :, ind, :3] tf.summary.image("rgb_layer_%d" % i, self.deprocess_image(rgb)) tf.summary.image("alpha_layer_%d" % i, alpha) tf.summary.image("rgba_layer_%d" % i, self.deprocess_image(rgb * alpha)) tf.summary.image("psv_avg_%d" % i, (self.deprocess_image(0.5*ref_plane + 0.5*source_plane))) tf.summary.image("output_rgb_%d" % i, self.deprocess_image(output_rgb)) tf.summary.image("psv_ref_%d" % i, self.deprocess_image(ref_plane)) tf.summary.image("psv_source_%d" % i, self.deprocess_image(source_plane)) # Cumulative rendered images and refined MPI for i in range(num_summ): ind = tf.to_int32(i * num_mpi_planes/num_summ) rgb = rgba_layers_refine[:, :, :, ind, :3] alpha = rgba_layers_refine[:, :, :, ind, 3:] render = stuff_behind[:, :, :, ind, :3] input_colors = refine_input_mpi[:, :, :, ind, :3] tf.summary.image("rgb_layer_refine_%d" % i, self.deprocess_image(rgb)) tf.summary.image("alpha_layer_refine_%d" % i, alpha) tf.summary.image("rgba_layer_refine_%d" % i, self.deprocess_image(rgb * alpha)) tf.summary.image("cumulative_render_%d" % i, self.deprocess_image(render)) tf.summary.image("input_colors_refine_%d" % i, self.deprocess_image(input_colors)) return train_op