def minimize(self, loss, name=None): """ """ # Error checking var_list = tf.trainable_variables() for x_tm1 in var_list: if not isinstance(x_tm1, tf.Variable): raise TypeError("Argument is not a tf.Variable: %s" % x_tm1) if not var_list: raise ValueError("No variables to optimize") if loss.dtype.base_dtype != tf.float32: raise ValueError('Loss is not float32') # Compute gradients grads = tf.gradients(loss, var_list, colocate_gradients_with_ops=True, gate_gradients=True, aggregation_method=2) for x_tm1, g_t in zip(var_list, grads): if g_t is not None: if x_tm1.dtype.base_dtype != tf.float32: raise ValueError('%s is not float32' % x_tm1.name) # Apply gradients with tf.control_dependencies(None): self._init_acc(var_list, grads) with tf.name_scope(name, self.name.title(), []) as name: caches = [ cache for cache in self._prepare(var_list, grads) if cache['g_t'] is not None ] for cache in caches: x_tm1, g_t = cache['x_tm1'], cache['g_t'] with tf.name_scope("update_" + x_tm1.op.name), tf.device( x_tm1.device): if isinstance(g_t, tf.Tensor): cache['g_t'] = tf.where(tf.is_finite(g_t), g_t, tf.zeros_like(g_t)) self._apply_dense(cache) else: cache['g_t'] = tf.where(tf.is_finite(g_t.values), g_t.values, tf.zeros_like(g_t.values)) cache['idxs'] = g_t.indices self._apply_sparse(cache) with tf.control_dependencies([self._finish(caches)]): with tf.device(self.global_step.device): return tf.assign_add(self.global_step, 1, name=name).op
def stn_diffeo(self): with tf.variable_scope("atn"): x_tensor = tf.reshape(self.X,[-1,self.img_sz[0],self.img_sz[1],self.num_channels]) # x_tensor = tf.Print(x_tensor,[x_tensor],message="x_tensor: ",summarize=100) c = tf.reduce_mean(tf.boolean_mask(x_tensor, tf.is_finite(x_tensor)), 0) # c = tf.Print(c,[c],message="c: ",summarize=100) # self.theta, self.affine_maps, d2 = transfromation_parameters_regressor(self.requested_transforms,self.X, # self.keep_prob,self.img_sz,self.weight_stddev,self.num_channels,self.activation_func) # # self.theta = tf.Print(self.theta,[self.theta],message="self.theta: ",summarize=100) # out_size = (self.img_sz[0], self.img_sz[1]) # self.theta_exp = expm(-self.theta) # compute matrix exponential on {-theta} # # self.theta_exp = tf.Print(self.theta_exp,[self.theta_exp],message="theta_exp: ", summarize=100) # x_theta, d = transformer(x_tensor, self.theta_exp, out_size) # #to avoid the sparse indexing warning, comment the next line, and uncomment the one after it. # self.x_theta = tf.reshape(x_theta,shape=[-1,self.img_sz[0],self.img_sz[1],self.num_channels]) # d.update({'params':d2['params']}) # Working with recurrent STN: get self.theta and self.theta_exp in shape: [num_STN, batch_sz, 6] d = c self.x_theta, self.theta, self.theta_exp = transfromation_parameters_regressor(self.requested_transforms, self.X, self.keep_prob,self.img_sz, self.weight_stddev,self.num_channels, self.activation_func, self.num_stn) return self.x_theta, d, c
def preprocess_device_grads(self, device_grads): compact_grads = (self.benchmark_cnn.params.use_fp16 and self.benchmark_cnn.params.compact_gradient_transfer) defer_grads = ( self.benchmark_cnn.params.variable_consistency == 'relaxed') grads_to_reduce = [[g for g, _ in grad_vars] for grad_vars in device_grads] algorithm = batch_allreduce.algorithm_from_params( self.benchmark_cnn.params) reduced_grads, self._warmup_ops = algorithm.batch_all_reduce( grads_to_reduce, self.benchmark_cnn.params.gradient_repacking, compact_grads, defer_grads, self.benchmark_cnn.params.xla_compile) if self.benchmark_cnn.enable_auto_loss_scale: # Check for infs or nans is_finite_list = [] with tf.name_scope('check_for_inf_and_nan'): for tower_grads in reduced_grads: with tf.colocate_with(tower_grads[0]): # TODO(tanmingxing): Create fused op that takes in a list of tensors # as input and returns scalar boolean True if there are any # infs/nans. is_finite_list.append( tf.reduce_all([ tf.reduce_all(tf.is_finite(g)) for g in tower_grads ])) self.grad_has_inf_nan = tf.logical_not( tf.reduce_all(is_finite_list)) reduced_device_grads = [[ (g, v) for g, (_, v) in zip(grads, grad_vars) ] for grads, grad_vars in zip(reduced_grads, device_grads)] return self.benchmark_cnn.devices, reduced_device_grads
def _apply_dense(self, cache): """ """ x_tm1, g_t = cache['x_tm1'], cache['g_t'] updates = cache['updates'] if self.mu > 0: m_t, t_m = self._dense_moving_average(x_tm1, g_t, 'm', beta=self.mu) m_bar_t = (1 - self.gamma) * m_t + self.gamma * g_t updates.extend([m_t, t_m]) else: m_bar_t = g_t if self.nu > 0: v_t, t_v = self._dense_moving_average(x_tm1, g_t**2, 'v', beta=self.nu) v_bar_t = tf.sqrt(v_t + self.epsilon) updates.extend([v_t, t_v]) else: v_bar_t = 1 s_t = self.learning_rate * m_bar_t / v_bar_t cache['s_t'] = tf.where(tf.is_finite(s_t), s_t, tf.zeros_like(s_t)) return cache
def ProcessGradients(grads_and_vars, global_gradient_clip=0.0, sanitize_gradients=False, normalize_gradients=False): tf.logging.info("Prcessing gradients") grads, vars_ = list(zip(*grads_and_vars)) if sanitize_gradients: new_grads = [] for g in grads: if g is not None: g = tf.where(tf.is_finite(g), g, tf.zeros_like(g)) new_grads.append(g) grads = new_grads if normalize_gradients: new_grads = [] for g in grads: if g is not None: g *= tf.rsqrt(tf.maximum(1e-12, tf.reduce_sum(tf.square(g)))) new_grads.append(g) grads = new_grads if global_gradient_clip > 0: grads, grad_norm = tf.clip_by_global_norm(grads, global_gradient_clip) grads_and_vars = list(zip(grads, vars_)) else: grad_norm = tf.global_norm(grads) tf.summary.scalar("global_grad_norm", grad_norm) return grads_and_vars
def _create_var(name: str, value_expr: TfExpression) -> TfExpression: """Internal helper for creating autosummary accumulators.""" assert not _finalized name_id = name.replace("/", "_") v = tf.cast(value_expr, _dtype) if v.shape.is_fully_defined(): size = np.prod(v.shape.as_list()) size_expr = tf.constant(size, dtype=_dtype) else: size = None size_expr = tf.reduce_prod(tf.cast(tf.shape(v), _dtype)) if size == 1: if v.shape.ndims != 0: v = tf.reshape(v, []) v = [size_expr, v, tf.square(v)] else: v = [size_expr, tf.reduce_sum(v), tf.reduce_sum(tf.square(v))] v = tf.cond(tf.is_finite(v[1]), lambda: tf.stack(v), lambda: tf.zeros(3, dtype=_dtype)) with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.control_dependencies(None): var = tf.Variable(tf.zeros(3, dtype=_dtype), trainable=False) # [sum(1), sum(x), sum(x**2)] update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v)) if name in _vars: _vars[name].append(var) else: _vars[name] = [var] return update_op
def __init__(self, idx, mean, var, **kwargs): """ :param mean: Tensor of shape [W] containing the mean at each parameter vertex :param var: Tensor of shape [W] containing the variance at each parameter vertex """ Posterior.__init__(self, idx, **kwargs) self.nvertices = tf.shape(mean)[0] self.name = kwargs.get("name", "NormPost") mean, var = self._get_mean_var(mean, var, kwargs.get("init", None)) mean = tf.cast(mean, tf.float32) var = tf.cast(var, tf.float32) mean = self.log_tf(tf.where(tf.is_finite(mean), mean, tf.zeros_like(mean))) var = tf.where(tf.is_nan(var), tf.ones_like(var), var) self.mean_variable = self.log_tf(tf.Variable(mean, validate_shape=False, name="%s_mean" % self.name)) self.log_var = self.log_tf(tf.Variable(tf.log(var), validate_shape=False, name="%s_log_var" % self.name)) self.var_variable = self.log_tf(tf.exp(self.log_var, name="%s_var" % self.name)) if kwargs.get("suppress_nan", True): #self.mean = tf.where(tf.is_nan(self.mean_variable), tf.ones_like(self.mean_variable), self.mean_variable) #self.var = tf.where(tf.is_nan(self.var_variable), tf.ones_like(self.var_variable), self.var_variable) self.mean = tf.where(tf.is_nan(self.mean_variable), mean, self.mean_variable) self.var = tf.where(tf.is_nan(self.var_variable), var, self.var_variable) else: self.mean = self.mean_variable self.var = self.var_variable self.std = self.log_tf(tf.sqrt(self.var, name="%s_std" % self.name))
def check_grads_finite(grads): if not len(grads): # pylint: disable=g-explicit-length-test return tf.constant(True) else: finites = [ tf.reduce_sum(1 - tf.to_float(tf.is_finite(g))) for g in grads ] return tf.equal(tf.add_n(finites), 0.)
def row_normalize(x): with tf.name_scope('row_norm'): x = tf.clip_by_value(x, 0., 1.) s = tf.cast(tf.shape(x)[1], tf.float32) vec = tf.reduce_sum(x, axis=1) x = x / repeat(vec, s) x = tf.where(tf.is_finite(x), x, tf.ones(shape=tf.shape(x)) / s) return x
def compute_gradients(loss, variables, loss_scale): with tf.name_scope("gradient_computation"): gradients = tf.gradients(loss * loss_scale, variables) # Create zero gradients for None entries zeros = [tf.zeros_like(var) for var in variables] gradients = [grad / loss_scale if grad is not None else None for grad in gradients] finites = [tf.reduce_all(tf.is_finite(grad)) if grad is not None else None for grad in gradients] gradients = [tf.where(finite, grad, zero) if grad is not None else None for finite, grad, zero in zip(finites, gradients, zeros)] all_finite = tf.reduce_all([f for f in finites if f is not None]) return gradients, all_finite
def _custom_recall_at_k(labels_as_multi_hot, predictions, k): """Calculates recall_at_k metric with multi-hot labels. For each example which contains at least one label, a recall-at-k is calculated by assessing what proportion of these labels are in the top k predictions. This metric is the mean of these values. Args: labels_as_multi_hot: a tensor of [batch_size, num_output_classes] where elements are zero (absent) or one (present). predictions: a tensor of [batch_size, num_output_classes] where elemenents are floats indicating the probability of class membership. k: number of top predictions to consider (must be <= num_output_classes). Returns: mean: A scalar `Tensor` representing the current mean, the value of `total` divided by `count` (of finite values). update_op: An operation that increments the `total` and `count` variables appropriately and whose (scalar) value matches the mean_value. """ labels_as_multi_hot = tf.cast(labels_as_multi_hot, tf.float32) num_output_classes = tf.shape(labels_as_multi_hot)[1] _, indices = tf.math.top_k(predictions, k=k) predictions_top_k_as_multi_hot = _indices_to_multihot( indices, num_output_classes) true_positives_tensor = tf.math.logical_and( tf.cast(labels_as_multi_hot, tf.bool), tf.cast(predictions_top_k_as_multi_hot, tf.bool)) false_negatives_tensor = tf.math.greater(labels_as_multi_hot, predictions_top_k_as_multi_hot) true_positives_per_example = tf.count_nonzero(true_positives_tensor, axis=1) false_negatives_per_example = tf.count_nonzero(false_negatives_tensor, axis=1) recall_per_example = true_positives_per_example / ( true_positives_per_example + false_negatives_per_example) is_finite = tf.is_finite( recall_per_example) # To filter out no label cases. recall_per_example_finite_only = tf.boolean_mask(recall_per_example, is_finite) return tf.metrics.mean(recall_per_example_finite_only)
def get_S_fromtensor(W): wsum = tf.sparse.reduce_sum(W, axis=1) wsum = tf.reshape(wsum, (-1, )) d_sqrt = tf.reciprocal(tf.sqrt(wsum)) d_sqrt = tf.where(tf.is_finite(d_sqrt), d_sqrt, tf.ones(shape=tf.shape(d_sqrt))) d_sqrt_i = tf.gather(d_sqrt, W.indices[:, 0]) d_sqrt_j = tf.gather(d_sqrt, W.indices[:, 1]) S = tf.sparse.SparseTensor(indices=W.indices, values=W.values * d_sqrt_i * d_sqrt_j, dense_shape=W._dense_shape) return S
def postprocess(x, n_bits_x=8): """Converts x from [-0.5, 0.5], to [0, 255]. Args: x: 3-D or 4-D Tensor normalized between [-0.5, 0.5] n_bits_x: Number of bits representing each pixel of the output. Defaults to 8, to default to 256 possible values. Returns: x: 3-D or 4-D Tensor representing images or videos. """ x = tf.where(tf.is_finite(x), x, tf.ones_like(x)) x = tf.clip_by_value(x, -0.5, 0.5) x += 0.5 x = x * 2**n_bits_x return tf.cast(tf.clip_by_value(x, 0, 255), dtype=tf.uint8)
def get_gradients_to_apply(self, device_num, gradient_state): device_grads = gradient_state tower_grad = device_grads[device_num] if self.benchmark_cnn.enable_auto_loss_scale and device_num == 0: # Since we don't aggregate variables in --independent mode, we cannot tell # if there are NaNs on all GPUs. So we arbitrarily choose to only check # NaNs on the first GPU. has_inf_nan_list = [] for grad, _ in tower_grad: has_inf_nan_list.append(tf.reduce_all(tf.is_finite(grad))) self.grad_has_inf_nan = tf.logical_not( tf.reduce_all(has_inf_nan_list)) return tower_grad
def alignment_loss(self): # ------------------------ Our loss (with W) ------------------------------------------------------ img = tf.slice(self.x_theta, [0, 0, 0, 0], [-1, -1, -1, self.num_channels - 1]) # (64, 128, 128, 3) mask = tf.slice(self.x_theta, [0, 0, 0, self.num_channels - 1], [-1, -1, -1, -1]) # (64, 128, 128, 1) #img = tf.layers.batch_normalization(x_theta_new0) #img = tf.multiply(img_slice, mask) # (64, 128, 128, 3) sum_weighted_imgs = tf.reduce_sum(img, 0) # sum_weighted_imgs = tf.Print(sum_weighted_imgs, [sum_weighted_imgs], message="sum_weighted_imgs: ", summarize=100) sum_weights = tf.reduce_sum(mask, 0) sum_weights = tf.concat([sum_weights,sum_weights,sum_weights], 2) # sum_weights = tf.Print(sum_weights, [sum_weights], message="sum_weights: ", summarize=100) # averages = tf.where(tf.less(sum_weights, 1e-3), tf.zeros_like(sum_weighted_imgs), tf.divide(sum_weighted_imgs, sum_weights+1e-7)) #sum_weights+1e-7 # averages = tf.Print(averages, [averages], message="averages: ", summarize=100) # "Recursive" average: average_batch = tf.where(tf.less(sum_weights, 1e-3), tf.zeros_like(sum_weighted_imgs), tf.divide(sum_weighted_imgs, sum_weights+1e-7)) #sum_weights+1e-7 averages_curr = tf.divide(tf.multiply(self.cnt, self.averages_prev) + average_batch, tf.add(self.cnt, 1)) # self.averages_prev = averages_curr # self.cnt = tf.add(self.cnt, 1) weighted_diff = tf.multiply(mask, tf.subtract(img, averages_curr)) # square_weighted_diff = tf.square(weighted_diff) huber_diff = tf.where(tf.less(tf.abs(weighted_diff), self.delta), 0.5*tf.square(weighted_diff), self.delta*tf.abs(weighted_diff)-0.5*(self.delta**2)) huber_diff_robust = huber_diff / (huber_diff + self.sigma ** 2) loss_sum_per_pixel = tf.reduce_sum(huber_diff_robust, 0) alignment_loss = tf.reduce_sum(loss_sum_per_pixel) alignment_loss = alignment_loss/tf.reduce_sum(mask) # alignment_loss / (self.img_sz[0] * self.img_sz[1] * self.num_channels) # alignment_loss = tf.reduce_sum(alignment_loss / (alignment_loss + self.sigma ** 2)) # alignment_loss = alignment_loss / (alignment_loss + self.sigma ** 2) a = tf.reduce_mean(tf.boolean_mask(self.x_theta, tf.is_finite(self.x_theta)), 0) # a = tf.Print(a,[a],message="a: ",summarize=100) b = tf.reduce_sum(mask) # need to remove # b = tf.Print(b,[b],message="b: ",summarize=100) return alignment_loss, a, b
def alignment_loss_median(self): # ------------------------ Our loss (with W) ------------------------------------------------------ # Compute median per pixel-stack: num_of_img_pixels = self.img_sz[0] * self.img_sz[1] * (self.img_sz[2]-1) img = tf.slice(self.x_theta, [0, 0, 0, 0], [-1, -1, -1, self.num_channels - 1]) # Shape: (bs, h, w, 3) mask = tf.slice(self.x_theta, [0, 0, 0, self.num_channels - 1], [-1, -1, -1, -1]) # Shape: (bs, h, w, 1) bool_mask = tf.logical_not(tf.equal(mask, 0)) bool_mask = tf.concat([bool_mask, bool_mask, bool_mask], 3) neg_val = tf.ones_like(img) * -200. x_theta_tmp = tf.where(bool_mask, img, neg_val) # Shape: (bs_sz, h, w, 3) # a = tf.reduce_max(x_theta_tmp) # a = tf.Print(a,[a],message="a: ",summarize=100) batch_elements = tf.transpose(tf.reshape(x_theta_tmp, [-1, num_of_img_pixels])) # Shape: (h*w*3, bs) batch_elements = tf.concat([batch_elements, tf.zeros([num_of_img_pixels, 1])], 1) medians_pixels_stack = tf.map_fn( lambda x: tf.contrib.distributions.percentile(tf.boolean_mask(x, x > -10.), q=50., axis=[0]), batch_elements, dtype=tf.float32) # Shape: (h*w*3, ) # -- Instead, perform median on all values including irrelevant pixels: # medians_pixels_stack = tf.contrib.distributions.percentile(batch_elements, q=50., axis=[1]) # Shape: (h*w*3, ) medians_in_img_shape = tf.reshape(medians_pixels_stack, [self.img_sz[0], self.img_sz[1], (self.img_sz[2]-1)]) # Shape: (h, w, 3) # Compute loss per pixel-stack: weighted_diff = tf.multiply(mask, tf.subtract(img, medians_in_img_shape)) # Shape: (bs, h, w, 3) # square_weighted_diff = tf.square(weighted_diff) huber_diff = tf.where(tf.less(tf.abs(weighted_diff), self.delta), 0.5*tf.square(weighted_diff), self.delta*tf.abs(weighted_diff)-0.5*(self.delta**2)) huber_diff_robust = huber_diff #/ (huber_diff + self.sigma ** 2) # Compute total loss: loss_sum_pixel_stack = tf.reduce_sum(huber_diff_robust, 0) alignment_loss = tf.reduce_sum(loss_sum_pixel_stack) alignment_loss = alignment_loss/tf.reduce_sum(mask) # alignment_loss / (self.img_sz[0] * self.img_sz[1] * self.num_channels) # alignment_loss = tf.reduce_sum(alignment_loss / (alignment_loss + self.sigma ** 2)) # alignment_loss = alignment_loss / (alignment_loss + self.sigma ** 2) a = tf.reduce_mean(tf.boolean_mask(self.x_theta, tf.is_finite(self.x_theta)), 0) # # a = tf.Print(a,[a],message="a: ",summarize=100) b = tf.reduce_sum(mask) # need to remove # b = tf.Print(b,[b],message="b: ",summarize=100) return alignment_loss, a, b
def aggregate(self, gradients): with tf.name_scope("GAR_krum_tf"): # Assertion assert len(gradients) > 0, "Empty list of gradient to aggregate" # Distance computations distances = [] for i in range(self.__nbworkers - 1): dists = list() for j in range(i + 1, self.__nbworkers): sqr_dst = tf.reduce_sum( tf.squared_difference(gradients[i], gradients[j])) dists.append( tf.negative( tf.where(tf.is_finite(sqr_dst), sqr_dst, tf.constant(np.inf, dtype=sqr_dst.dtype))) ) # Use of 'negative' to get the smallest distances and score indexes in 'nn.top_k' distances.append(dists) # Score computations scores = [] for i in range(self.__nbworkers): dists = [] for j in range(self.__nbworkers): if j == i: continue if j < i: dists.append(distances[j][i - j - 1]) else: dists.append(distances[i][j - i - 1]) dists = tf.parallel_stack(dists) dists, _ = tf.nn.top_k(dists, k=(self.__nbworkers - self.__nbbyzwrks - 2), sorted=False) scores.append(tf.reduce_sum(dists)) # Average of the 'nbselected' smallest scoring gradients gradients = tf.parallel_stack(gradients) scores = tf.parallel_stack(scores) _, indexes = tf.nn.top_k(scores, k=self.__nbselected, sorted=False) return tf.reduce_mean(tf.gather(gradients, indexes), axis=0)
def _create_autosummary_var(name, value_expr): assert not _autosummary_finalized v = tf.cast(value_expr, tf.float32) if v.shape.ndims is 0: v = [v, np.float32(1.0)] elif v.shape.ndims is 1: v = [tf.reduce_sum(v), tf.cast(tf.shape(v)[0], tf.float32)] else: v = [ tf.reduce_sum(v), tf.reduce_prod(tf.cast(tf.shape(v), tf.float32)) ] v = tf.cond(tf.is_finite(v[0]), lambda: tf.stack(v), lambda: tf.zeros(2)) with tf.control_dependencies(None): var = tf.Variable(tf.zeros(2)) # [numerator, denominator] update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v)) if name in _autosummary_vars: _autosummary_vars[name].append(var) else: _autosummary_vars[name] = [var] return update_op
def _apply_sparse(self, cache): """ """ x_tm1, g_t, idxs = cache['x_tm1'], cache['g_t'], cache['idxs'] idxs, idxs_ = tf.unique(idxs) g_t_ = tf.unsorted_segment_sum(g_t, idxs_, tf.size(idxs)) updates = cache['updates'] if self.mu > 0: m_t, t_m = self._sparse_moving_average(x_tm1, idxs, g_t_, 'm', beta=self.mu) m_t_ = tf.gather(m_t, idxs) m_bar_t_ = (1 - self.gamma) * m_t_ + self.gamma * g_t_ updates.extend([m_t, t_m]) else: m_bar_t_ = g_t_ if self.nu > 0: v_t, t_v = self._sparse_moving_average(x_tm1, idxs, g_t_**2, 'v', beta=self.nu) v_t_ = tf.gather(v_t, idxs) v_bar_t_ = tf.sqrt(v_t_ + self.epsilon) updates.extend([v_t, t_v]) else: v_bar_t_ = 1 s_t_ = self.learning_rate * m_bar_t_ / v_bar_t_ cache['s_t'] = tf.where(tf.is_finite(s_t_), s_t_, tf.zeros_like(s_t_)) cache['g_t'] = g_t_ cache['idxs'] = idxs return cache
def aggregate_single_gradient_using_copy(grad_and_vars, use_mean, check_inf_nan): """Calculate the average gradient for a shared variable across all towers. Note that this function provides a synchronization point across all towers. Args: grad_and_vars: A list or tuple of (gradient, variable) tuples. Each (gradient, variable) pair within the outer list represents the gradient of the variable calculated for a single tower, and the number of pairs equals the number of towers. use_mean: if True, mean is taken, else sum of gradients is taken. check_inf_nan: check grads for nans and infs. Returns: The tuple ([(average_gradient, variable),], has_nan_or_inf) where the gradient has been averaged across all towers. The variable is chosen from the first tower. The has_nan_or_inf indicates the grads has nan or inf. """ grads = [g for g, _ in grad_and_vars] if any(isinstance(g, tf.IndexedSlices) for g in grads): # TODO(reedwm): All-reduce IndexedSlices more effectively. grad = aggregate_indexed_slices_gradients(grads) else: grad = tf.add_n(grads) if use_mean and len(grads) > 1: grad = tf.scalar_mul(1.0 / len(grads), grad) v = grad_and_vars[0][1] if check_inf_nan: with tf.name_scope('check_for_inf_and_nan'): has_nan_or_inf = tf.logical_not(tf.reduce_all(tf.is_finite(grads))) return (grad, v), has_nan_or_inf else: return (grad, v), None
def apply_updates(self): assert not self._updates_applied self._updates_applied = True devices = list(self._dev_grads.keys()) total_grads = sum(len(grads) for grads in self._dev_grads.values()) assert len(devices) >= 1 and total_grads >= 1 ops = [] with absolute_name_scope(self.scope): # Cast gradients to FP32 and calculate partial sum within each device. dev_grads = OrderedDict() # device => [(grad, var), ...] for dev_idx, dev in enumerate(devices): with tf.name_scope('ProcessGrads%d' % dev_idx), tf.device(dev): sums = [] for gv in zip(*self._dev_grads[dev]): assert all(v is gv[0][1] for g, v in gv) g = [tf.cast(g, tf.float32) for g, v in gv] g = g[0] if len(g) == 1 else tf.add_n(g) sums.append((g, gv[0][1])) dev_grads[dev] = sums # Sum gradients across devices. if len(devices) > 1: with tf.name_scope('SumAcrossGPUs'), tf.device(None): for var_idx, grad_shape in enumerate(self._grad_shapes): g = [dev_grads[dev][var_idx][0] for dev in devices] if np.prod( grad_shape ): # nccl does not support zero-sized tensors g = tf.contrib.nccl.all_sum(g) for dev, gg in zip(devices, g): dev_grads[dev][var_idx] = ( gg, dev_grads[dev][var_idx][1]) # Apply updates separately on each device. for dev_idx, (dev, grads) in enumerate(dev_grads.items()): with tf.name_scope('ApplyGrads%d' % dev_idx), tf.device(dev): # Scale gradients as needed. if self.use_loss_scaling or total_grads > 1: with tf.name_scope('Scale'): coef = tf.constant(np.float32(1.0 / total_grads), name='coef') coef = self.undo_loss_scaling(coef) grads = [(g * coef, v) for g, v in grads] # Check for overflows. with tf.name_scope('CheckOverflow'): grad_ok = tf.reduce_all( tf.stack([ tf.reduce_all(tf.is_finite(g)) for g, v in grads ])) # Update weights and adjust loss scaling. with tf.name_scope('UpdateWeights'): opt = self._dev_opt[dev] ls_var = self.get_loss_scaling_var(dev) if not self.use_loss_scaling: ops.append( tf.cond(grad_ok, lambda: opt.apply_gradients(grads), tf.no_op)) else: ops.append( tf.cond( grad_ok, lambda: tf.group( tf.assign_add(ls_var, self. loss_scaling_inc), opt.apply_gradients(grads)), lambda: tf.group( tf.assign_sub(ls_var, self. loss_scaling_dec)))) # Report statistics on the last device. if dev == devices[-1]: with tf.name_scope('Statistics'): ops.append( autosummary(self.id + '/learning_rate', self.learning_rate)) ops.append( autosummary(self.id + '/overflow_frequency', tf.where(grad_ok, 0, 1))) if self.use_loss_scaling: ops.append( autosummary(self.id + '/loss_scaling_log2', ls_var)) # Initialize variables and group everything into a single op. self.reset_optimizer_state() init_uninited_vars(list(self._dev_ls_var.values())) return tf.group(*ops, name='TrainingOp')
def create_train_op(optimizer, grads_and_vars, max_grad=1.0, mixed_precision=False, gradient_accumulation_steps=1): global_step = tf.train.get_or_create_global_step() if gradient_accumulation_steps > 1: local_step = tf.get_variable(name="local_step", shape=[], dtype=tf.int32, trainable=False, initializer=tf.zeros_initializer) batch_finite = tf.get_variable(name="batch_finite", shape=[], dtype=tf.bool, trainable=False, initializer=tf.ones_initializer) accum_vars = [ tf.get_variable(name=tvar.name.split(":")[0] + "/accum", shape=tvar.shape.as_list(), dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) for tvar in tf.trainable_variables() ] reset_step = tf.cast(tf.math.equal( local_step % gradient_accumulation_steps, 0), dtype=tf.bool) local_step = tf.cond( reset_step, lambda: local_step.assign(tf.ones_like(local_step)), lambda: local_step.assign_add(1)) grads_and_vars_and_accums = [(gv[0], gv[1], accum_vars[i]) for i, gv in enumerate(grads_and_vars) if gv[0] is not None] grads, tvars, accum_vars = list(zip(*grads_and_vars_and_accums)) all_are_finite = tf.reduce_all([ tf.reduce_all(tf.is_finite(g)) for g in grads ]) if mixed_precision else tf.constant(True, dtype=tf.bool) batch_finite = tf.cond( reset_step, lambda: batch_finite.assign( tf.math.logical_and(tf.constant(True, dtype=tf.bool), all_are_finite)), lambda: batch_finite.assign( tf.math.logical_and(batch_finite, all_are_finite))) # This is how the model was pre-trained. # ensure global norm is a finite number # to prevent clip_by_global_norm from having a hizzy fit. (clipped_grads, _) = tf.clip_by_global_norm(grads, clip_norm=max_grad) accum_vars = tf.cond( reset_step, lambda: [ accum_vars[i].assign(grad) for i, grad in enumerate(clipped_grads) ], lambda: [ accum_vars[i].assign_add(grad) for i, grad in enumerate(clipped_grads) ]) def update(accum_vars): return optimizer.apply_gradients(list(zip(accum_vars, tvars))) update_step = tf.identity(tf.cast(tf.math.equal( local_step % gradient_accumulation_steps, 0), dtype=tf.bool), name="update_step") update_op = tf.cond(update_step, lambda: update(accum_vars), lambda: tf.no_op()) new_global_step = tf.cond( tf.math.logical_and(update_step, batch_finite), lambda: global_step + 1, lambda: global_step) new_global_step = tf.identity(new_global_step, name='step_update') train_op = tf.group(update_op, [global_step.assign(new_global_step)]) else: grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None] grads, tvars = list(zip(*grads_and_vars)) all_are_finite = tf.reduce_all([ tf.reduce_all(tf.is_finite(g)) for g in grads ]) if mixed_precision else tf.constant(True, dtype=tf.bool) # This is how the model was pre-trained. # ensure global norm is a finite number # to prevent clip_by_global_norm from having a hizzy fit. (clipped_grads, _) = tf.clip_by_global_norm(grads, clip_norm=max_grad) # 这里不要传入global step,adam内部没有对global step累加 # 而原本adam等tf内置优化器会累加,这样就会造成global step重复增加 train_op = optimizer.apply_gradients(list(zip(clipped_grads, tvars))) new_global_step = tf.cond(all_are_finite, lambda: global_step + 1, lambda: global_step) new_global_step = tf.identity(new_global_step, name='step_update') train_op = tf.group(train_op, [global_step.assign(new_global_step)]) return train_op
def create_optimizer(loss, learning_rate, num_train_steps, weight_decay_rate=0.0, warmup_steps=0, warmup_proportion=0, lr_decay_power=1.0, layerwise_lr_decay_power=-1, n_transformer_layers=None, hvd=None, use_fp16=False, num_accumulation_steps=1, allreduce_post_accumulation=False): """ Creates an optimizer and training op. """ compression = Compression.fp16 if use_fp16 else Compression.none global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.polynomial_decay(learning_rate, global_step, num_train_steps, end_learning_rate=0.0, power=lr_decay_power, cycle=False) warmup_steps = max(num_train_steps * warmup_proportion, warmup_steps) learning_rate *= tf.minimum( 1.0, tf.cast(global_step, tf.float32) / tf.cast(warmup_steps, tf.float32)) if layerwise_lr_decay_power > 0: learning_rate = _get_layer_lrs(learning_rate, layerwise_lr_decay_power, n_transformer_layers) optimizer = AdamWeightDecayOptimizer( learning_rate=learning_rate, weight_decay_rate=weight_decay_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-6, exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"]) if hvd is not None and (num_accumulation_steps == 1 or (not allreduce_post_accumulation)): optimizer = hvd.DistributedOptimizer(optimizer, sparse_as_dense=True, compression=compression) if use_fp16: loss_scale_manager = tf_contrib.mixed_precision.ExponentialUpdateLossScaleManager( init_loss_scale=2**32, incr_every_n_steps=1000, decr_every_n_nan_or_inf=2, decr_ratio=0.5) optimizer = tf_contrib.mixed_precision.LossScaleOptimizer( optimizer, loss_scale_manager) tvars = tf.trainable_variables() # if hvd.rank() == 0: # print("*****Trainable variables*****") # for v in tvars: # print(v) # print("*****************************") grads_and_vars = optimizer.compute_gradients( loss * 1.0 / num_accumulation_steps, tvars) if num_accumulation_steps > 1: local_step = tf.get_variable(name="local_step", shape=[], dtype=tf.int32, trainable=False, initializer=tf.zeros_initializer()) batch_finite = tf.get_variable(name="batch_finite", shape=[], dtype=tf.bool, trainable=False, initializer=tf.ones_initializer()) accum_vars = [ tf.get_variable(name=tvar.name.split(":")[0] + "/accum", shape=tvar.shape.as_list(), dtype=tf.float32, trainable=False, initializer=tf.zeros_initializer()) for tvar in tvars ] reset_step = tf.cast(tf.math.equal(local_step % num_accumulation_steps, 0), dtype=tf.bool) local_step = tf.cond( reset_step, lambda: local_step.assign(tf.ones_like(local_step)), lambda: local_step.assign_add(1)) grads, tvars, accum_vars = zip( *[(g, v, g_acc) for (g, v), g_acc in zip(grads_and_vars, accum_vars) if g is not None]) if use_fp16: # оказывается, это условие может быть кучу перых шагов false, а затем будет всю дорогу true all_are_finite = tf.reduce_all( [tf.reduce_all(tf.is_finite(g)) for g in grads]) # если возобновить обучение из чекпоинта, то снова первые дохера шагов градиенты будут накапливаться, # что повлечёт скачок лосса # сделано так для продолжения обучения # all_are_finite = tf.constant(True, dtype=tf.bool) else: all_are_finite = tf.constant(True, dtype=tf.bool) batch_finite = tf.cond( reset_step, lambda: batch_finite.assign( tf.math.logical_and(tf.constant(True, dtype=tf.bool), all_are_finite)), lambda: batch_finite.assign( tf.math.logical_and(batch_finite, all_are_finite))) # This is how the model was pre-trained. # ensure global norm is a finite number # to prevent clip_by_global_norm from having a hizzy fit. (clipped_grads, _) = tf.clip_by_global_norm( grads, clip_norm=1.0, use_norm=tf.cond(all_are_finite, lambda: tf.global_norm(grads), lambda: tf.constant(1.0))) accum_vars = tf.cond( reset_step, lambda: [v.assign(grad) for v, grad in zip(accum_vars, clipped_grads)], lambda: [v.assign_add(grad) for v, grad in zip(accum_vars, clipped_grads)]) def update(accum_vars): if allreduce_post_accumulation and hvd is not None: accum_vars = [ hvd.allreduce(tf.convert_to_tensor(accum_var), compression=compression) if isinstance( accum_var, tf.IndexedSlices) else hvd.allreduce(accum_var, compression=compression) for accum_var in accum_vars ] return optimizer.apply_gradients(list(zip(accum_vars, tvars)), global_step=global_step) update_step = tf.identity(tf.cast(tf.math.equal( local_step % num_accumulation_steps, 0), dtype=tf.bool), name="update_step") update_op = tf.cond(update_step, lambda: update(accum_vars), lambda: tf.no_op()) new_global_step = tf.cond( tf.math.logical_and( update_step, tf.cast(hvd.allreduce(tf.cast(batch_finite, tf.int32)), tf.bool)), lambda: global_step + 1, lambda: global_step) new_global_step = tf.identity(new_global_step, name='step_update') train_op = tf.group(update_op, [global_step.assign(new_global_step)]) else: grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None] grads, tvars = list(zip(*grads_and_vars)) if use_fp16: all_are_finite = tf.reduce_all( [tf.reduce_all(tf.is_finite(g)) for g in grads]) else: all_are_finite = tf.constant(True, dtype=tf.bool) # This is how the model was pre-trained. # ensure global norm is a finite number # to prevent clip_by_global_norm from having a hizzy fit. (clipped_grads, _) = tf.clip_by_global_norm( grads, clip_norm=1.0, use_norm=tf.cond(all_are_finite, lambda: tf.global_norm(grads), lambda: tf.constant(1.0))) train_op = optimizer.apply_gradients(list(zip(clipped_grads, tvars)), global_step=global_step) new_global_step = tf.cond(all_are_finite, lambda: global_step + 1, lambda: global_step) new_global_step = tf.identity(new_global_step, name='step_update') train_op = tf.group(train_op, [global_step.assign(new_global_step)]) return train_op
def _check_regularizer(self, reg): weights = tf.random.normal((12, 16)) nll = reg(weights) self.assertTrue(bool(tf.is_finite(nll)), msg='Invalid prior nll returned.') self.assertFalse(bool(tf.is_nan(nll)), msg='Prior nll is NaN.')
def attention_layer(from_tensor, to_tensor, attention_mask=None, num_attention_heads=1, size_per_head=512, query_act=None, key_act=None, value_act=None, attention_probs_dropout_prob=0.0, initializer_range=0.02, softmax_temperature=1.0, batch_size=None, from_seq_length=None, to_seq_length=None): """Performs multi-headed attention from `from_tensor` to `to_tensor`. This is an implementation of multi-headed attention based on "Attention is all you Need". If `from_tensor` and `to_tensor` are the same, then this is self-attention. Each timestep in `from_tensor` attends to the corresponding sequence in `to_tensor`, and returns a fixed-with vector. This function first projects `from_tensor` into a "query" tensor and `to_tensor` into "key" and "value" tensors. These are (effectively) a list of tensors of length `num_attention_heads`, where each tensor is of shape [batch_size, seq_length, size_per_head]. Then, the query and key tensors are dot-producted and scaled. These are softmaxed to obtain attention probabilities. The value tensors are then interpolated by these probabilities, then concatenated back to a single tensor and returned. In practice, the multi-headed attention are done with tf.einsum as follows: Input_tensor: [BFD] Wq, Wk, Wv: [DNH] Q:[BFNH] = einsum('BFD,DNH->BFNH', Input_tensor, Wq) K:[BTNH] = einsum('BTD,DNH->BTNH', Input_tensor, Wk) V:[BTNH] = einsum('BTD,DNH->BTNH', Input_tensor, Wv) attention_scores:[BNFT] = einsum('BFNH,BTNH>BNFT', Q, K) / sqrt(H) attention_probs:[BNFT] = softmax(attention_scores) context_layer:[BFNH] = einsum('BNFT,BTNH->BFNH', attention_probs, V) Wout:[DNH] Output:[BFD] = einsum('BFNH,DNH>BFD', context_layer, Wout) Args: from_tensor: float Tensor of shape [batch_size, from_seq_length, from_width]. to_tensor: float Tensor of shape [batch_size, to_seq_length, to_width]. attention_mask: (optional) int32 Tensor of shape [batch_size, from_seq_length, to_seq_length]. The values should be 1 or 0. The attention scores will effectively be set to -infinity for any positions in the mask that are 0, and will be unchanged for positions that are 1. num_attention_heads: int. Number of attention heads. size_per_head: int. Size of each attention head. query_act: (optional) Activation function for the query transform. key_act: (optional) Activation function for the key transform. value_act: (optional) Activation function for the value transform. attention_probs_dropout_prob: (optional) float. Dropout probability of the attention probabilities. initializer_range: float. Range of the weight initializer. softmax_temperature: The temperature for the softmax attention. batch_size: (Optional) int. If the input is 2D, this might be the batch size of the 3D version of the `from_tensor` and `to_tensor`. from_seq_length: (Optional) If the input is 2D, this might be the seq length of the 3D version of the `from_tensor`. to_seq_length: (Optional) If the input is 2D, this might be the seq length of the 3D version of the `to_tensor`. Returns: float Tensor of shape [batch_size, from_seq_length, num_attention_heads, size_per_head]. Raises: ValueError: Any of the arguments or tensor shapes are invalid. """ from_shape = get_shape_list(from_tensor, expected_rank=[2, 3]) to_shape = get_shape_list(to_tensor, expected_rank=[2, 3]) if len(from_shape) != len(to_shape): raise ValueError( "The rank of `from_tensor` must match the rank of `to_tensor`.") if len(from_shape) == 3: batch_size = from_shape[0] from_seq_length = from_shape[1] to_seq_length = to_shape[1] elif len(from_shape) == 2: if (batch_size is None or from_seq_length is None or to_seq_length is None): raise ValueError( "When passing in rank 2 tensors to attention_layer, the values " "for `batch_size`, `from_seq_length`, and `to_seq_length` " "must all be specified.") # Scalar dimensions referenced here: # B = batch size (number of sequences) # F = `from_tensor` sequence length # T = `to_tensor` sequence length # N = `num_attention_heads` # H = `size_per_head` # `query_layer` = [B, F, N, H] query_layer = dense_layer_3d(from_tensor, num_attention_heads, size_per_head, create_initializer(initializer_range), query_act, "query") # `key_layer` = [B, T, N, H] key_layer = dense_layer_3d(to_tensor, num_attention_heads, size_per_head, create_initializer(initializer_range), key_act, "key") # `value_layer` = [B, T, N, H] value_layer = dense_layer_3d(to_tensor, num_attention_heads, size_per_head, create_initializer(initializer_range), value_act, "value") # Take the dot product between "query" and "key" to get the raw # attention scores. attention_scores = tf.einsum( "BFNH,BTNH->BNFT", query_layer, key_layer, name="query_key_einsum") attention_scores = attention_scores / softmax_temperature attention_scores = tf.multiply(attention_scores, 1.0 / math.sqrt(float(size_per_head))) if attention_mask is not None: # `attention_mask` = [B, 1, F, T] or [B, H, F, T] # Caller can pass a rank 3 tensor for a constand mask or rank 4 for per-head # head attention mask. attention_mask = tf.reshape( attention_mask, shape=[batch_size, -1, from_seq_length, to_seq_length]) # Since attention_mask is 1.0 for positions we want to attend and 0.0 for # masked positions, this operation will create a tensor which is 0.0 for # positions we want to attend and -10000.0 for masked positions. attention_mask_float = tf.cast(attention_mask, tf.float32) # Please keep this tf.where as it fixes back propagation issues: It removes # NaNs when using tf.math.log. attention_mask_float = tf.where(attention_mask_float > 0.0, attention_mask_float, tf.zeros_like(attention_mask_float)) adder = tf.math.log(attention_mask_float) adder = tf.where( tf.is_finite(adder), adder, tf.zeros_like(adder, dtype=tf.float32) - 10000.0) # Since we are adding it to the raw scores before the softmax, this is # effectively the same as removing these entirely. attention_scores += adder # Normalize the attention scores to probabilities. # `attention_probs` = [B, N, F, T] attention_probs = tf.nn.softmax(attention_scores) # This is actually dropping out entire tokens to attend to, which might # seem a bit unusual, but is taken from the original Transformer paper. attention_probs_do = dropout(attention_probs, attention_probs_dropout_prob) # `context_layer` = [B, F, N, H] context_layer = tf.einsum( "BNFT,BTNH->BFNH", attention_probs_do, value_layer, name="attention_value_einsum") return context_layer, attention_probs
def get(self): """ Provides input data to the graph. """ # calculate size of each record (this lists what is contained in the db and how many bytes are occupied) record_bytes = 0 encoding_bytes = 4 kp_xyz_entries = 3 * self.num_kp record_bytes += encoding_bytes*kp_xyz_entries encoding_bytes = 4 kp_uv_entries = 2 * self.num_kp record_bytes += encoding_bytes*kp_uv_entries kp_vis_entries = self.num_kp record_bytes += encoding_bytes*kp_vis_entries image_bytes = self.image_size[0] * self.image_size[1] * 3 record_bytes += image_bytes """ READ DATA ITEMS""" # Start reader reader = tf.FixedLengthRecordReader(header_bytes=0, record_bytes=record_bytes) _, value = reader.read(tf.train.string_input_producer([self.path_to_db])) # decode to floats bytes_read = 0 data_dict = dict() record_bytes_float32 = tf.decode_raw(value, tf.float32) # 1. Read keypoint xyz keypoint_xyz21 = tf.reshape(tf.slice(record_bytes_float32, [bytes_read//4], [kp_xyz_entries]), [self.num_kp, 3]) bytes_read += encoding_bytes*kp_xyz_entries keypoint_xyz21 /= 1000.0 # scale to meters keypoint_xyz21 = self.convert_kp(keypoint_xyz21) # calculate wrist coord if self.use_wrist_coord: wrist_xyz = keypoint_xyz21[16, :] + 2.0*(keypoint_xyz21[0, :] - keypoint_xyz21[16, :]) keypoint_xyz21 = tf.concat([tf.expand_dims(wrist_xyz, 0), keypoint_xyz21[1:, :]], 0) data_dict['keypoint_xyz21'] = keypoint_xyz21 # 2. Read keypoint uv AND VIS keypoint_uv_vis21 = tf.reshape(tf.slice(record_bytes_float32, [bytes_read//4], [kp_uv_entries+kp_vis_entries]), [self.num_kp, 3]) bytes_read += encoding_bytes*(kp_uv_entries+kp_vis_entries) keypoint_uv_vis21 = self.convert_kp(keypoint_uv_vis21) keypoint_uv21 = keypoint_uv_vis21[:, :2] keypoint_vis21 = tf.equal(keypoint_uv_vis21[:, 2], 1.0) # calculate wrist vis if self.use_wrist_coord: wrist_vis = tf.logical_or(keypoint_vis21[16], keypoint_vis21[0]) keypoint_vis21 = tf.concat([tf.expand_dims(wrist_vis, 0), keypoint_vis21[1:]], 0) wrist_uv = keypoint_uv21[16, :] + 2.0*(keypoint_uv21[0, :] - keypoint_uv21[16, :]) keypoint_uv21 = tf.concat([tf.expand_dims(wrist_uv, 0), keypoint_uv21[1:, :]], 0) data_dict['keypoint_vis21'] = keypoint_vis21 if self.coord_uv_noise: noise = tf.truncated_normal([42, 2], mean=0.0, stddev=self.coord_uv_noise_sigma) keypoint_uv21 += noise data_dict['keypoint_uv21'] = keypoint_uv21 # decode to uint8 record_bytes_uint8 = tf.decode_raw(value, tf.uint8) # 4. Read image image = tf.reshape(tf.slice(record_bytes_uint8, [bytes_read], [image_bytes]), [self.image_size[0], self.image_size[1], 3]) image = tf.cast(image, tf.float32) bytes_read += image_bytes # subtract mean image = image / 255.0 - 0.5 if self.hue_aug: image = tf.image.random_hue(image, self.hue_aug_max) data_dict['image'] = image """ CONSTANTS """ # Camera intrinsics sx = 822.79041 sy = 822.79041 tx = 318.47345 ty = 250.31296 data_dict['cam_mat'] = tf.constant([[sx, 0.0, tx], [0.0, sy, ty], [0.0, 0.0, 1.0]]) # Hand side: this dataset only contains left hands data_dict['hand_side'] = tf.one_hot(tf.constant(0, dtype=tf.int32), depth=2, on_value=1.0, off_value=0.0, dtype=tf.float32) assert bytes_read == record_bytes, "Doesnt add up." """ DEPENDENT DATA ITEMS: XYZ represenations. """ # make coords relative to root joint kp_coord_xyz_root = keypoint_xyz21[0, :] # this is the palm coord kp_coord_xyz21_rel = keypoint_xyz21 - kp_coord_xyz_root # relative coords in metric coords index_root_bone_length = tf.sqrt(tf.reduce_sum(tf.square(kp_coord_xyz21_rel[12, :] - kp_coord_xyz21_rel[11, :]))) data_dict['keypoint_scale'] = index_root_bone_length data_dict['keypoint_xyz21_normed'] = kp_coord_xyz21_rel / index_root_bone_length # normalized by length of 12->11 # calculate local coordinates kp_coord_xyz21_local = bone_rel_trafo(data_dict['keypoint_xyz21_normed']) kp_coord_xyz21_local = tf.squeeze(kp_coord_xyz21_local) data_dict['keypoint_xyz21_local'] = kp_coord_xyz21_local # calculate viewpoint and coords in canonical coordinates kp_coord_xyz21_rel_can, rot_mat = canonical_trafo(data_dict['keypoint_xyz21_normed']) kp_coord_xyz21_rel_can, rot_mat = tf.squeeze(kp_coord_xyz21_rel_can), tf.squeeze(rot_mat) data_dict['keypoint_xyz21_can'] = kp_coord_xyz21_rel_can data_dict['rot_mat'] = tf.matrix_inverse(rot_mat) """ DEPENDENT DATA ITEMS: HAND CROP """ if self.hand_crop: crop_center = keypoint_uv21[12, ::-1] # catch problem, when no valid kp available (happens almost never) crop_center = tf.cond(tf.reduce_all(tf.is_finite(crop_center)), lambda: crop_center, lambda: tf.constant([0.0, 0.0])) crop_center.set_shape([2, ]) if self.crop_center_noise: noise = tf.truncated_normal([2], mean=0.0, stddev=self.crop_center_noise_sigma) crop_center += noise crop_scale_noise = tf.constant(1.0) if self.crop_scale_noise: crop_scale_noise = tf.squeeze(tf.random_uniform([1], minval=1.0, maxval=1.2)) if not self.use_wrist_coord: wrist_uv = keypoint_uv21[16, :] + 2.0*(keypoint_uv21[0, :] - keypoint_uv21[16, :]) keypoint_uv21 = tf.concat([tf.expand_dims(wrist_uv, 0), keypoint_uv21[1:, :]], 0) # select visible coords only kp_coord_h = tf.boolean_mask(keypoint_uv21[:, 1], keypoint_vis21) kp_coord_w = tf.boolean_mask(keypoint_uv21[:, 0], keypoint_vis21) kp_coord_hw = tf.stack([kp_coord_h, kp_coord_w], 1) # determine size of crop (measure spatial extend of hw coords first) min_coord = tf.maximum(tf.reduce_min(kp_coord_hw, 0), 0.0) max_coord = tf.minimum(tf.reduce_max(kp_coord_hw, 0), self.image_size) # find out larger distance wrt the center of crop crop_size_best = 2*tf.maximum(max_coord - crop_center, crop_center - min_coord) crop_size_best = tf.reduce_max(crop_size_best) crop_size_best = tf.minimum(tf.maximum(crop_size_best, 50.0), 500.0) # catch problem, when no valid kp available crop_size_best = tf.cond(tf.reduce_all(tf.is_finite(crop_size_best)), lambda: crop_size_best, lambda: tf.constant(200.0)) crop_size_best.set_shape([]) # calculate necessary scaling scale = tf.cast(self.crop_size, tf.float32) / crop_size_best scale = tf.minimum(tf.maximum(scale, 1.0), 10.0) scale *= crop_scale_noise data_dict['crop_scale'] = scale if self.crop_offset_noise: noise = tf.truncated_normal([2], mean=0.0, stddev=self.crop_offset_noise_sigma) crop_center += noise # Crop image img_crop = crop_image_from_xy(tf.expand_dims(image, 0), crop_center, self.crop_size, scale) data_dict['image_crop'] = tf.squeeze(img_crop) # Modify uv21 coordinates crop_center_float = tf.cast(crop_center, tf.float32) keypoint_uv21_u = (data_dict['keypoint_uv21'][:, 0] - crop_center_float[1]) * scale + self.crop_size // 2 keypoint_uv21_v = (data_dict['keypoint_uv21'][:, 1] - crop_center_float[0]) * scale + self.crop_size // 2 keypoint_uv21 = tf.stack([keypoint_uv21_u, keypoint_uv21_v], 1) data_dict['keypoint_uv21'] = keypoint_uv21 # Modify camera intrinsics scale = tf.reshape(scale, [1, ]) scale_matrix = tf.dynamic_stitch([[0], [1], [2], [3], [4], [5], [6], [7], [8]], [scale, [0.0], [0.0], [0.0], scale, [0.0], [0.0], [0.0], [1.0]]) scale_matrix = tf.reshape(scale_matrix, [3, 3]) crop_center_float = tf.cast(crop_center, tf.float32) trans1 = crop_center_float[0] * scale - self.crop_size // 2 trans2 = crop_center_float[1] * scale - self.crop_size // 2 trans1 = tf.reshape(trans1, [1, ]) trans2 = tf.reshape(trans2, [1, ]) trans_matrix = tf.dynamic_stitch([[0], [1], [2], [3], [4], [5], [6], [7], [8]], [[1.0], [0.0], -trans2, [0.0], [1.0], -trans1, [0.0], [0.0], [1.0]]) trans_matrix = tf.reshape(trans_matrix, [3, 3]) data_dict['cam_mat'] = tf.matmul(trans_matrix, tf.matmul(scale_matrix, data_dict['cam_mat'])) """ DEPENDENT DATA ITEMS: Scoremap from the SUBSET of 21 keypoints""" # create scoremaps from the subset of 2D annoataion keypoint_hw21 = tf.stack([keypoint_uv21[:, 1], keypoint_uv21[:, 0]], -1) scoremap_size = self.image_size if self.hand_crop: scoremap_size = (self.crop_size, self.crop_size) scoremap = self.create_multiple_gaussian_map(keypoint_hw21, scoremap_size, self.sigma, valid_vec=keypoint_vis21) if self.scoremap_dropout: scoremap = tf.nn.dropout(scoremap, self.scoremap_dropout_prob, noise_shape=[1, 1, 21]) scoremap *= self.scoremap_dropout_prob data_dict['scoremap'] = scoremap if self.random_crop_to_size: tensor_stack = tf.concat([data_dict['image'], tf.expand_dims(tf.cast(data_dict['hand_parts'], tf.float32), -1), tf.cast(data_dict['hand_mask'], tf.float32)], 2) s = tensor_stack.get_shape().as_list() tensor_stack_cropped = tf.random_crop(tensor_stack, [self.random_crop_size, self.random_crop_size, s[2]]) data_dict = dict() # delete everything else because the random cropping makes the data invalid anyway data_dict['image'], data_dict['hand_parts'], data_dict['hand_mask'] = tensor_stack_cropped[:, :, :3],\ tf.cast(tensor_stack_cropped[:, :, 3], tf.int32),\ tf.cast(tensor_stack_cropped[:, :, 4:], tf.int32) names, tensors = zip(*data_dict.items()) if self.shuffle: tensors = tf.train.shuffle_batch_join([tensors], batch_size=self.batch_size, capacity=100, min_after_dequeue=50, enqueue_many=False) else: tensors = tf.train.batch_join([tensors], batch_size=self.batch_size, capacity=100, enqueue_many=False) return dict(zip(names, tensors))
if hvd is not None: optimizer = hvd.DistributedOptimizer(optimizer, sparse_as_dense=True, compression=Compression.fp16 if use_fp16 or manual_fp16 else Compression.none) tvars = tf.trainable_variables() # grads = tf.gradients( # loss, tvars, colocate_gradients_with_ops=colocate_gradients_with_ops) // Change 10 calculate gradients with horovod grads_and_vars = optimizer.compute_gradients(loss, tvars) # # This is how the model was pre-trained. #(grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0) // Change 11 clip grads grads_and_vars = [(g, v) for g, v in grads_and_vars if g is not None] grads, tvars = list(zip(*grads_and_vars)) all_are_finite = tf.reduce_all( [tf.reduce_all(tf.is_finite(g)) for g in grads]) if use_fp16 or manual_fp16 else tf.constant(True, dtype=tf.bool) # This is how the model was pre-trained. # ensure global norm is a finite number # to prevent clip_by_global_norm from having a hizzy fit. (clipped_grads, _) = tf.clip_by_global_norm( grads, clip_norm=1.0, use_norm=tf.cond( all_are_finite, lambda: tf.global_norm(grads), lambda: tf.constant(1.0))) #train_op = optimizer.apply_gradients( # list(zip(grads, tvars)), global_step=global_step) // Change 12 apply grads using the cliped grads train_op = optimizer.apply_gradients(
def calc_center_bb(binary_class_mask): """ Returns the center of mass coordinates for the given binary_class_mask. """ with tf.variable_scope('calc_center_bb'): binary_class_mask = tf.cast(binary_class_mask, tf.int32) binary_class_mask = tf.equal(binary_class_mask, 1) s = binary_class_mask.get_shape().as_list() if len(s) == 4: binary_class_mask = tf.squeeze(binary_class_mask, [3]) s = binary_class_mask.get_shape().as_list() assert len(s) == 3, "binary_class_mask must be 3D." assert (s[0] < s[1]) and ( s[0] < s[2]), "binary_class_mask must be [Batch, Width, Height]" # my meshgrid x_range = tf.expand_dims(tf.range(s[1]), 1) y_range = tf.expand_dims(tf.range(s[2]), 0) X = tf.tile(x_range, [1, s[2]]) Y = tf.tile(y_range, [s[1], 1]) bb_list = list() center_list = list() crop_size_list = list() for i in range(s[0]): X_masked = tf.cast(tf.boolean_mask(X, binary_class_mask[i, :, :]), tf.float32) Y_masked = tf.cast(tf.boolean_mask(Y, binary_class_mask[i, :, :]), tf.float32) x_min = tf.reduce_min(X_masked) x_max = tf.reduce_max(X_masked) y_min = tf.reduce_min(Y_masked) y_max = tf.reduce_max(Y_masked) start = tf.stack([x_min, y_min]) end = tf.stack([x_max, y_max]) bb = tf.stack([start, end], 1) bb_list.append(bb) center_x = 0.5 * (x_max + x_min) center_y = 0.5 * (y_max + y_min) center = tf.stack([center_x, center_y], 0) center = tf.cond(tf.reduce_all(tf.is_finite(center)), lambda: center, lambda: tf.constant([160.0, 160.0])) center.set_shape([2]) center_list.append(center) crop_size_x = x_max - x_min crop_size_y = y_max - y_min crop_size = tf.expand_dims(tf.maximum(crop_size_x, crop_size_y), 0) crop_size = tf.cond(tf.reduce_all(tf.is_finite(crop_size)), lambda: crop_size, lambda: tf.constant([100.0])) crop_size.set_shape([1]) crop_size_list.append(crop_size) bb = tf.stack(bb_list) center = tf.stack(center_list) crop_size = tf.stack(crop_size_list) return center, bb, crop_size
def apply_updates(self, allow_no_op: bool = False) -> tf.Operation: """Construct training op to update the registered variables based on their gradients.""" tfutil.assert_tf_initialized() assert not self._updates_applied self._updates_applied = True all_ops = [] # Check for no-op. if allow_no_op and len(self._devices) == 0: with tfutil.absolute_name_scope(self.scope): return tf.no_op(name='TrainingOp') # Clean up gradients. for device_idx, device in enumerate(self._devices.values()): with tfutil.absolute_name_scope(self.scope + "/Clean%d" % device_idx), tf.device(device.name): for var, grad in device.grad_raw.items(): # Filter out disconnected gradients and convert to float32. grad = [g for g in grad if g is not None] grad = [tf.cast(g, tf.float32) for g in grad] # Sum within the device. if len(grad) == 0: grad = tf.zeros(var.shape) # No gradients => zero. elif len(grad) == 1: grad = grad[0] # Single gradient => use as is. else: grad = tf.add_n(grad) # Multiple gradients => sum. # Scale as needed. scale = 1.0 / len(device.grad_raw[var]) / len(self._devices) scale = tf.constant(scale, dtype=tf.float32, name="scale") if self.minibatch_multiplier is not None: scale /= tf.cast(self.minibatch_multiplier, tf.float32) scale = self.undo_loss_scaling(scale) device.grad_clean[var] = grad * scale # Sum gradients across devices. if len(self._devices) > 1: with tfutil.absolute_name_scope(self.scope + "/Broadcast"), tf.device(None): if platform.system() == "Windows": # Windows => NCCL ops are not available. self._broadcast_fallback() elif tf.VERSION.startswith("1.15."): # TF 1.15 => NCCL ops are broken: https://github.com/tensorflow/tensorflow/issues/41539 self._broadcast_fallback() else: # Otherwise => NCCL ops are safe to use. self._broadcast_nccl() # Apply updates separately on each device. for device_idx, device in enumerate(self._devices.values()): with tfutil.absolute_name_scope(self.scope + "/Apply%d" % device_idx), tf.device(device.name): # pylint: disable=cell-var-from-loop # Accumulate gradients over time. if self.minibatch_multiplier is None: acc_ok = tf.constant(True, name='acc_ok') device.grad_acc = OrderedDict(device.grad_clean) else: # Create variables. with tf.control_dependencies(None): for var in device.grad_clean.keys(): device.grad_acc_vars[var] = tf.Variable(tf.zeros(var.shape), trainable=False, name="grad_acc_var") device.grad_acc_count = tf.Variable(tf.zeros([]), trainable=False, name="grad_acc_count") # Track counter. count_cur = device.grad_acc_count + 1.0 count_inc_op = lambda: tf.assign(device.grad_acc_count, count_cur) count_reset_op = lambda: tf.assign(device.grad_acc_count, tf.zeros([])) acc_ok = (count_cur >= tf.cast(self.minibatch_multiplier, tf.float32)) all_ops.append(tf.cond(acc_ok, count_reset_op, count_inc_op)) # Track gradients. for var, grad in device.grad_clean.items(): acc_var = device.grad_acc_vars[var] acc_cur = acc_var + grad device.grad_acc[var] = acc_cur with tf.control_dependencies([acc_cur]): acc_inc_op = lambda: tf.assign(acc_var, acc_cur) acc_reset_op = lambda: tf.assign(acc_var, tf.zeros(var.shape)) all_ops.append(tf.cond(acc_ok, acc_reset_op, acc_inc_op)) # No overflow => apply gradients. all_ok = tf.reduce_all(tf.stack([acc_ok] + [tf.reduce_all(tf.is_finite(g)) for g in device.grad_acc.values()])) apply_op = lambda: device.optimizer.apply_gradients([(tf.cast(grad, var.dtype), var) for var, grad in device.grad_acc.items()]) all_ops.append(tf.cond(all_ok, apply_op, tf.no_op)) # Adjust loss scaling. if self.use_loss_scaling: ls_inc_op = lambda: tf.assign_add(device.loss_scaling_var, self.loss_scaling_inc) ls_dec_op = lambda: tf.assign_sub(device.loss_scaling_var, self.loss_scaling_dec) ls_update_op = lambda: tf.group(tf.cond(all_ok, ls_inc_op, ls_dec_op)) all_ops.append(tf.cond(acc_ok, ls_update_op, tf.no_op)) # Last device => report statistics. if device_idx == len(self._devices) - 1: all_ops.append(autosummary.autosummary(self.id + "/learning_rate", tf.convert_to_tensor(self.learning_rate))) all_ops.append(autosummary.autosummary(self.id + "/overflow_frequency", tf.where(all_ok, 0, 1), condition=acc_ok)) if self.use_loss_scaling: all_ops.append(autosummary.autosummary(self.id + "/loss_scaling_log2", device.loss_scaling_var)) # Initialize variables. self.reset_optimizer_state() if self.use_loss_scaling: tfutil.init_uninitialized_vars([device.loss_scaling_var for device in self._devices.values()]) if self.minibatch_multiplier is not None: tfutil.run([var.initializer for device in self._devices.values() for var in list(device.grad_acc_vars.values()) + [device.grad_acc_count]]) # Group everything into a single op. with tfutil.absolute_name_scope(self.scope): return tf.group(*all_ops, name="TrainingOp")
def get(self): """ Provides input data to the graph. """ # calculate size of each record (this lists what is contained in the db and how many bytes are occupied) record_bytes = 2 encoding_bytes = 4 kp_xyz_entries = 3 * self.num_kp record_bytes += encoding_bytes * kp_xyz_entries encoding_bytes = 4 kp_uv_entries = 2 * self.num_kp record_bytes += encoding_bytes * kp_uv_entries cam_matrix_entries = 9 record_bytes += encoding_bytes * cam_matrix_entries image_bytes = self.image_size[0] * self.image_size[1] * 3 record_bytes += image_bytes hand_parts_bytes = self.image_size[0] * self.image_size[1] record_bytes += hand_parts_bytes kp_vis_bytes = self.num_kp record_bytes += kp_vis_bytes """ READ DATA ITEMS""" # Start reader reader = tf.FixedLengthRecordReader(header_bytes=0, record_bytes=record_bytes) _, value = reader.read( tf.train.string_input_producer([self.path_to_db])) # decode to floats bytes_read = 0 data_dict = dict() record_bytes_float32 = tf.decode_raw(value, tf.float32) # 1. Read keypoint xyz keypoint_xyz = tf.reshape( tf.slice(record_bytes_float32, [bytes_read // 4], [kp_xyz_entries]), [self.num_kp, 3]) bytes_read += encoding_bytes * kp_xyz_entries # calculate palm coord if not self.use_wrist_coord: palm_coord_l = tf.expand_dims( 0.5 * (keypoint_xyz[0, :] + keypoint_xyz[12, :]), 0) palm_coord_r = tf.expand_dims( 0.5 * (keypoint_xyz[21, :] + keypoint_xyz[33, :]), 0) keypoint_xyz = tf.concat([ palm_coord_l, keypoint_xyz[1:21, :], palm_coord_r, keypoint_xyz[-20:, :] ], 0) data_dict['keypoint_xyz'] = keypoint_xyz # 2. Read keypoint uv keypoint_uv = tf.cast( tf.reshape( tf.slice(record_bytes_float32, [bytes_read // 4], [kp_uv_entries]), [self.num_kp, 2]), tf.int32) bytes_read += encoding_bytes * kp_uv_entries keypoint_uv = tf.cast(keypoint_uv, tf.float32) # calculate palm coord if not self.use_wrist_coord: palm_coord_uv_l = tf.expand_dims( 0.5 * (keypoint_uv[0, :] + keypoint_uv[12, :]), 0) palm_coord_uv_r = tf.expand_dims( 0.5 * (keypoint_uv[21, :] + keypoint_uv[33, :]), 0) keypoint_uv = tf.concat([ palm_coord_uv_l, keypoint_uv[1:21, :], palm_coord_uv_r, keypoint_uv[-20:, :] ], 0) if self.coord_uv_noise: noise = tf.truncated_normal([42, 2], mean=0.0, stddev=self.coord_uv_noise_sigma) keypoint_uv += noise data_dict['keypoint_uv'] = keypoint_uv # 3. Camera intrinsics cam_mat = tf.reshape( tf.slice(record_bytes_float32, [bytes_read // 4], [cam_matrix_entries]), [3, 3]) bytes_read += encoding_bytes * cam_matrix_entries data_dict['cam_mat'] = cam_mat # decode to uint8 bytes_read += 2 record_bytes_uint8 = tf.decode_raw(value, tf.uint8) # 4. Read image image = tf.reshape( tf.slice(record_bytes_uint8, [bytes_read], [image_bytes]), [self.image_size[0], self.image_size[1], 3]) image = tf.cast(image, tf.float32) bytes_read += image_bytes # subtract mean image = image / 255.0 - 0.5 if self.hue_aug: image = tf.image.random_hue(image, self.hue_aug_max) data_dict['image'] = image # 5. Read mask hand_parts_mask = tf.reshape( tf.slice(record_bytes_uint8, [bytes_read], [hand_parts_bytes]), [self.image_size[0], self.image_size[1]]) hand_parts_mask = tf.cast(hand_parts_mask, tf.int32) bytes_read += hand_parts_bytes data_dict['hand_parts'] = hand_parts_mask hand_mask = tf.greater(hand_parts_mask, 1) bg_mask = tf.logical_not(hand_mask) data_dict['hand_mask'] = tf.cast(tf.stack([bg_mask, hand_mask], 2), tf.int32) # 6. Read visibilty keypoint_vis = tf.reshape( tf.slice(record_bytes_uint8, [bytes_read], [kp_vis_bytes]), [self.num_kp]) keypoint_vis = tf.cast(keypoint_vis, tf.bool) bytes_read += kp_vis_bytes # calculate palm visibility if not self.use_wrist_coord: palm_vis_l = tf.expand_dims( tf.logical_or(keypoint_vis[0], keypoint_vis[12]), 0) palm_vis_r = tf.expand_dims( tf.logical_or(keypoint_vis[21], keypoint_vis[33]), 0) keypoint_vis = tf.concat([ palm_vis_l, keypoint_vis[1:21], palm_vis_r, keypoint_vis[-20:] ], 0) data_dict['keypoint_vis'] = keypoint_vis assert bytes_read == record_bytes, "Doesnt add up." """ DEPENDENT DATA ITEMS: SUBSET of 21 keypoints""" # figure out dominant hand by analysis of the segmentation mask one_map, zero_map = tf.ones_like(hand_parts_mask), tf.zeros_like( hand_parts_mask) cond_l = tf.logical_and(tf.greater(hand_parts_mask, one_map), tf.less(hand_parts_mask, one_map * 18)) cond_r = tf.greater(hand_parts_mask, one_map * 17) hand_map_l = tf.where(cond_l, one_map, zero_map) hand_map_r = tf.where(cond_r, one_map, zero_map) num_px_left_hand = tf.reduce_sum(hand_map_l) num_px_right_hand = tf.reduce_sum(hand_map_r) # PRODUCE the 21 subset using the segmentation masks # We only deal with the more prominent hand for each frame and discard the second set of keypoints kp_coord_xyz_left = keypoint_xyz[:21, :] kp_coord_xyz_right = keypoint_xyz[-21:, :] cond_left = tf.logical_and( tf.cast(tf.ones_like(kp_coord_xyz_left), tf.bool), tf.greater(num_px_left_hand, num_px_right_hand)) kp_coord_xyz21 = tf.where(cond_left, kp_coord_xyz_left, kp_coord_xyz_right) hand_side = tf.where( tf.greater(num_px_left_hand, num_px_right_hand), tf.constant(0, dtype=tf.int32), tf.constant(1, dtype=tf.int32)) # left hand = 0; right hand = 1 data_dict['hand_side'] = tf.one_hot(hand_side, depth=2, on_value=1.0, off_value=0.0, dtype=tf.float32) data_dict['keypoint_xyz21'] = kp_coord_xyz21 # make coords relative to root joint kp_coord_xyz_root = kp_coord_xyz21[0, :] # this is the palm coord kp_coord_xyz21_rel = kp_coord_xyz21 - kp_coord_xyz_root # relative coords in metric coords index_root_bone_length = tf.sqrt( tf.reduce_sum( tf.square(kp_coord_xyz21_rel[12, :] - kp_coord_xyz21_rel[11, :]))) data_dict['keypoint_scale'] = index_root_bone_length data_dict[ 'keypoint_xyz21_normed'] = kp_coord_xyz21_rel / index_root_bone_length # normalized by length of 12->11 # calculate local coordinates kp_coord_xyz21_local = bone_rel_trafo( data_dict['keypoint_xyz21_normed']) kp_coord_xyz21_local = tf.squeeze(kp_coord_xyz21_local) data_dict['keypoint_xyz21_local'] = kp_coord_xyz21_local # calculate viewpoint and coords in canonical coordinates kp_coord_xyz21_rel_can, rot_mat = canonical_trafo( data_dict['keypoint_xyz21_normed']) kp_coord_xyz21_rel_can, rot_mat = tf.squeeze( kp_coord_xyz21_rel_can), tf.squeeze(rot_mat) kp_coord_xyz21_rel_can = flip_right_hand(kp_coord_xyz21_rel_can, tf.logical_not(cond_left)) data_dict['keypoint_xyz21_can'] = kp_coord_xyz21_rel_can data_dict['rot_mat'] = tf.matrix_inverse(rot_mat) # Set of 21 for visibility keypoint_vis_left = keypoint_vis[:21] keypoint_vis_right = keypoint_vis[-21:] keypoint_vis21 = tf.where(cond_left[:, 0], keypoint_vis_left, keypoint_vis_right) data_dict['keypoint_vis21'] = keypoint_vis21 # Set of 21 for UV coordinates keypoint_uv_left = keypoint_uv[:21, :] keypoint_uv_right = keypoint_uv[-21:, :] keypoint_uv21 = tf.where(cond_left[:, :2], keypoint_uv_left, keypoint_uv_right) data_dict['keypoint_uv21'] = keypoint_uv21 """ DEPENDENT DATA ITEMS: HAND CROP """ if self.hand_crop: crop_center = keypoint_uv21[12, ::-1] # catch problem, when no valid kp available (happens almost never) crop_center = tf.cond(tf.reduce_all(tf.is_finite(crop_center)), lambda: crop_center, lambda: tf.constant([0.0, 0.0])) crop_center.set_shape([ 2, ]) if self.crop_center_noise: noise = tf.truncated_normal( [2], mean=0.0, stddev=self.crop_center_noise_sigma) crop_center += noise crop_scale_noise = tf.constant(1.0) if self.crop_scale_noise: crop_scale_noise = tf.squeeze( tf.random_uniform([1], minval=1.0, maxval=1.2)) # select visible coords only kp_coord_h = tf.boolean_mask(keypoint_uv21[:, 1], keypoint_vis21) kp_coord_w = tf.boolean_mask(keypoint_uv21[:, 0], keypoint_vis21) kp_coord_hw = tf.stack([kp_coord_h, kp_coord_w], 1) # determine size of crop (measure spatial extend of hw coords first) min_coord = tf.maximum(tf.reduce_min(kp_coord_hw, 0), 0.0) max_coord = tf.minimum(tf.reduce_max(kp_coord_hw, 0), self.image_size) # find out larger distance wrt the center of crop crop_size_best = 2 * tf.maximum(max_coord - crop_center, crop_center - min_coord) crop_size_best = tf.reduce_max(crop_size_best) crop_size_best = tf.minimum(tf.maximum(crop_size_best, 50.0), 500.0) # catch problem, when no valid kp available crop_size_best = tf.cond( tf.reduce_all(tf.is_finite(crop_size_best)), lambda: crop_size_best, lambda: tf.constant(200.0)) crop_size_best.set_shape([]) # calculate necessary scaling scale = tf.cast(self.crop_size, tf.float32) / crop_size_best scale = tf.minimum(tf.maximum(scale, 1.0), 10.0) scale *= crop_scale_noise data_dict['crop_scale'] = scale if self.crop_offset_noise: noise = tf.truncated_normal( [2], mean=0.0, stddev=self.crop_offset_noise_sigma) crop_center += noise # Crop image img_crop = crop_image_from_xy(tf.expand_dims(image, 0), crop_center, self.crop_size, scale) data_dict['image_crop'] = tf.squeeze(img_crop) # Modify uv21 coordinates crop_center_float = tf.cast(crop_center, tf.float32) keypoint_uv21_u = (keypoint_uv21[:, 0] - crop_center_float[1] ) * scale + self.crop_size // 2 keypoint_uv21_v = (keypoint_uv21[:, 1] - crop_center_float[0] ) * scale + self.crop_size // 2 keypoint_uv21 = tf.stack([keypoint_uv21_u, keypoint_uv21_v], 1) data_dict['keypoint_uv21'] = keypoint_uv21 # Modify camera intrinsics scale = tf.reshape(scale, [ 1, ]) scale_matrix = tf.dynamic_stitch([ [0], [1], [2], [3], [4], [5], [6], [7], [8] ], [scale, [0.0], [0.0], [0.0], scale, [0.0], [0.0], [0.0], [1.0]]) scale_matrix = tf.reshape(scale_matrix, [3, 3]) crop_center_float = tf.cast(crop_center, tf.float32) trans1 = crop_center_float[0] * scale - self.crop_size // 2 trans2 = crop_center_float[1] * scale - self.crop_size // 2 trans1 = tf.reshape(trans1, [ 1, ]) trans2 = tf.reshape(trans2, [ 1, ]) trans_matrix = tf.dynamic_stitch( [[0], [1], [2], [3], [4], [5], [6], [7], [8]], [[1.0], [0.0], -trans2, [0.0], [1.0], -trans1, [0.0], [0.0], [1.0]]) trans_matrix = tf.reshape(trans_matrix, [3, 3]) data_dict['cam_mat'] = tf.matmul(trans_matrix, tf.matmul(scale_matrix, cam_mat)) """ DEPENDENT DATA ITEMS: Scoremap from the SUBSET of 21 keypoints""" # create scoremaps from the subset of 2D annoataion keypoint_hw21 = tf.stack([keypoint_uv21[:, 1], keypoint_uv21[:, 0]], -1) scoremap_size = self.image_size if self.hand_crop: scoremap_size = (self.crop_size, self.crop_size) scoremap = self.create_multiple_gaussian_map(keypoint_hw21, scoremap_size, self.sigma, valid_vec=keypoint_vis21) if self.scoremap_dropout: scoremap = tf.nn.dropout(scoremap, self.scoremap_dropout_prob, noise_shape=[1, 1, 21]) scoremap *= self.scoremap_dropout_prob data_dict['scoremap'] = scoremap if self.scale_to_size: image, keypoint_uv21, keypoint_vis21 = data_dict[ 'image'], data_dict['keypoint_uv21'], data_dict[ 'keypoint_vis21'] s = image.get_shape().as_list() image = tf.image.resize_images(image, self.scale_target_size) scale = (self.scale_target_size[0] / float(s[0]), self.scale_target_size[1] / float(s[1])) keypoint_uv21 = tf.stack([ keypoint_uv21[:, 0] * scale[1], keypoint_uv21[:, 1] * scale[0] ], 1) data_dict = dict( ) # delete everything else because the scaling makes the data invalid anyway data_dict['image'] = image data_dict['keypoint_uv21'] = keypoint_uv21 data_dict['keypoint_vis21'] = keypoint_vis21 elif self.random_crop_to_size: tensor_stack = tf.concat([ data_dict['image'], tf.expand_dims(tf.cast(data_dict['hand_parts'], tf.float32), -1), tf.cast(data_dict['hand_mask'], tf.float32) ], 2) s = tensor_stack.get_shape().as_list() tensor_stack_cropped = tf.random_crop( tensor_stack, [self.random_crop_size, self.random_crop_size, s[2]]) data_dict = dict( ) # delete everything else because the random cropping makes the data invalid anyway data_dict['image'], data_dict['hand_parts'], data_dict['hand_mask'] = tensor_stack_cropped[:, :, :3],\ tf.cast(tensor_stack_cropped[:, :, 3], tf.int32),\ tf.cast(tensor_stack_cropped[:, :, 4:], tf.int32) names, tensors = zip(*data_dict.items()) if self.shuffle: tensors = tf.train.shuffle_batch_join([tensors], batch_size=self.batch_size, capacity=100, min_after_dequeue=50, enqueue_many=False) else: tensors = tf.train.batch_join([tensors], batch_size=self.batch_size, capacity=100, enqueue_many=False) return dict(zip(names, tensors))