def test_dense_output(self): dense_inputs = tf.convert_to_tensor( np.random.uniform(size=(10, 10)).astype('f')) # Create some sparse data where multiple rows and columns are missing. sparse_inputs = tf.SparseTensor( indices=np.random.randint(low=0, high=10, size=(5, 2)), values=np.random.uniform(size=(5,)).astype('f'), dense_shape=[10, 10]) sparse_inputs = tf.sparse.reorder(sparse_inputs) layer = keras.layers.Dense( 5, kernel_initializer=keras.initializers.RandomUniform(), bias_initializer=keras.initializers.RandomUniform(), dtype='float32') dense_outputs = layer(dense_inputs) sparse_outpus = layer(sparse_inputs) expected_dense = tf.add( tf.matmul(dense_inputs, keras.backend.get_value(layer.kernel)), keras.backend.get_value(layer.bias)) expected_sparse = tf.add( tf.matmul( tf.sparse.to_dense(sparse_inputs), keras.backend.get_value(layer.kernel)), keras.backend.get_value(layer.bias)) self.assertAllClose(dense_outputs, expected_dense) self.assertAllClose(sparse_outpus, expected_sparse)
def mlp(self, x): layer_1 = tf.nn.relu( tf.add(tf.matmul(x, self.h1_weights), self.h1_bias)) layer_2 = tf.nn.relu( tf.add(tf.matmul(layer_1, self.h2_weights), self.h2_bias)) return tf.nn.relu( tf.add(tf.matmul(layer_2, self.out_weights), self.out_bias))
def _apply_divergence_concrete(self,scale_factor, name): divergence_fn = (lambda pl, pr: (tf.reduce_sum(tf.add( tf.multiply(pl,tf.subtract(tf.math.log( tf.add(pl,tfk.backend.epsilon())), tf.math.log(pr))), tf.multiply( tf.subtract(tfk.backend.constant(1),pl), tf.subtract(tf.math.log( tf.add(tf.subtract(tfk.backend.constant(1),pl),tfk.backend.epsilon())), tf.math.log(pr)))) ) /tf.cast(scale_factor, dtype=tf.float32))) divergence = tf.identity( divergence_fn(self.p_post, self.p_prior), name=name) self.add_loss(divergence)
def test_dense_output(self): dense_inputs = tf.convert_to_tensor( np.random.uniform(size=(10, 10)).astype("f")) # Create some sparse data where multiple rows and columns are missing. sparse_inputs = tf.SparseTensor( indices=np.random.randint(low=0, high=10, size=(5, 2)), values=np.random.uniform(size=(5, )).astype("f"), dense_shape=[10, 10], ) sparse_inputs = tf.sparse.reorder(sparse_inputs) # Create some ragged data. ragged_inputs = tf.RaggedTensor.from_row_splits( np.random.uniform(size=(10, 10)).astype("f"), row_splits=[0, 4, 6, 6, 9, 10], ) layer = keras.layers.Dense( 5, kernel_initializer=keras.initializers.RandomUniform(), bias_initializer=keras.initializers.RandomUniform(), dtype="float32", ) dense_outputs = layer(dense_inputs) sparse_outpus = layer(sparse_inputs) ragged_outputs = layer(ragged_inputs) expected_dense = tf.add( tf.matmul(dense_inputs, keras.backend.get_value(layer.kernel)), keras.backend.get_value(layer.bias), ) expected_sparse = tf.add( tf.matmul( tf.sparse.to_dense(sparse_inputs), keras.backend.get_value(layer.kernel), ), keras.backend.get_value(layer.bias), ) expected_ragged_values = tf.add( tf.matmul(ragged_inputs.flat_values, keras.backend.get_value(layer.kernel)), keras.backend.get_value(layer.bias), ) expected_ragged = tf.RaggedTensor.from_row_splits( expected_ragged_values, row_splits=[0, 4, 6, 6, 9, 10]) self.assertAllClose(dense_outputs, expected_dense) self.assertAllClose(sparse_outpus, expected_sparse) self.assertAllClose(ragged_outputs, expected_ragged)
def _hash_values_to_bins(self, values): """Converts a non-sparse tensor of values to bin indices.""" hash_bins = self.num_bins mask = None # If mask_value is set, the zeroth bin is reserved for it. if self.mask_value is not None and hash_bins > 1: hash_bins -= 1 mask = tf.equal(values, self.mask_value) # Convert all values to strings before hashing. if values.dtype.is_integer: values = tf.as_string(values) # Hash the strings. if self.strong_hash: values = tf.strings.to_hash_bucket_strong(values, hash_bins, name='hash', key=self.salt) else: values = tf.strings.to_hash_bucket_fast(values, hash_bins, name='hash') if mask is not None: values = tf.add(values, tf.ones_like(values)) values = tf.where(mask, tf.zeros_like(values), values) return values
def call(self, inputs, training=True, survival_prob=None): """Implementation of call(). Args: inputs: the inputs tensor. training: boolean, whether the model is constructed for training. survival_prob: float, between 0 to 1, drop connect rate. Returns: A output tensor. """ x = inputs if self._block_args.expand_ratio != 1: x = self._relu_fn(self._bn0(self._expand_conv(x), training=training)) x = self._relu_fn(self._bn1(self._depthwise_conv(x), training=training)) if self._has_se: se_tensor = tf.reduce_mean( x, self._spatial_dims, keepdims=True) se_tensor = self._se_expand(self._relu_fn(self._se_reduce(se_tensor))) x = tf.sigmoid(se_tensor) * x x = self._bn2(self._project_conv(x), training=training) # Add identity so that quantization-aware training can insert quantization # ops correctly. x = tf.identity(x) if self._clip_projection_output: x = tf.clip_by_value(x, -6, 6) if all( s == 1 for s in self._block_args.strides ) and self._block_args.input_filters == self._block_args.output_filters: if survival_prob: x = utils.drop_connect(x, training, survival_prob) x = tf.add(x, inputs) return x
def __call__(self, step): with tf.name_scope(self.name or "PolynomialDecay") as name: initial_learning_rate = tf.convert_to_tensor( self.initial_learning_rate, name="initial_learning_rate") dtype = initial_learning_rate.dtype end_learning_rate = tf.cast(self.end_learning_rate, dtype) power = tf.cast(self.power, dtype) global_step_recomp = tf.cast(step, dtype) decay_steps_recomp = tf.cast(self.decay_steps, dtype) if self.cycle: # Find the first multiple of decay_steps that is bigger than # global_step. If global_step is zero set the multiplier to 1 multiplier = tf.where( tf.equal(global_step_recomp, 0), 1.0, tf.math.ceil(global_step_recomp / self.decay_steps)) decay_steps_recomp = tf.multiply(decay_steps_recomp, multiplier) else: # Make sure that the global_step used is not bigger than decay_steps. global_step_recomp = tf.minimum(global_step_recomp, decay_steps_recomp) p = tf.divide(global_step_recomp, decay_steps_recomp) return tf.add(tf.multiply( initial_learning_rate - end_learning_rate, tf.pow(1 - p, power)), end_learning_rate, name=name)
def _apply_variational_kernel(self, inputs): if not isinstance( self.kernel_posterior, independent_lib.Independent ) or not isinstance(self.kernel_posterior.distribution, normal_lib.Normal): self.kernel_posterior_tensor = self.kernel_posterior_tensor_fn( self.kernel_posterior ) self.kernel_posterior_affine = None self.kernel_posterior_affine_tensor = None outputs = self._convolution_op(inputs, self.kernel_posterior_tensor) return outputs else: self.kernel_posterior_affine = normal_lib.Normal( loc=tf.zeros_like(self.kernel_posterior.distribution.loc), scale=self.kernel_posterior.distribution.scale, ) self.kernel_posterior_affine_tensor = self.kernel_posterior_tensor_fn( self.kernel_posterior_affine ) self.kernel_posterior_tensor = None outputs_m = self._convolution_op( inputs, self.kernel_posterior.distribution.loc ) outputs_v = self._convolution_op( tf.square(inputs), tf.square(self.kernel_posterior.distribution.stddev()), ) k_size = tf_layers_util.normalize_tuple(self.kernel_size, 3, "k_size") g_shape = [1 for i in k_size] + [1, self.filters] outputs_e = tf.random.normal(shape=g_shape, dtype=self.dtype) if self.is_mc: err = tf.sqrt(tf.add(outputs_v, tf.keras.backend.epsilon())) * outputs_e return outputs_m + err else: return outputs_m
def _set_values_using_indicator(self, x, indicator, val): """Set the indicated fields of x to val. Args: x: tensor. indicator: boolean with same shape as x. val: scalar with value to set. Returns: modified tensor. """ indicator = tf.cast(indicator, x.dtype) return tf.add(tf.multiply(x, 1 - indicator), val * indicator)
def __call__(self, step): with tf.name_scope(self.name or "InverseTimeDecay") as name: initial_learning_rate = tf.convert_to_tensor( self.initial_learning_rate, name="initial_learning_rate") dtype = initial_learning_rate.dtype decay_steps = tf.cast(self.decay_steps, dtype) decay_rate = tf.cast(self.decay_rate, dtype) global_step_recomp = tf.cast(step, dtype) p = global_step_recomp / decay_steps if self.staircase: p = tf.floor(p) const = tf.cast(tf.constant(1), dtype) denom = tf.add(const, tf.multiply(decay_rate, p)) return tf.divide(initial_learning_rate, denom, name=name)
def _hash_values_to_bins(self, values): """Converts a non-sparse tensor of values to bin indices.""" str_to_hash_bucket = self._get_string_to_hash_bucket_fn() num_available_bins = self.num_bins mask = None # If mask_value is set, the zeroth bin is reserved for it. if self.mask_value is not None and num_available_bins > 1: num_available_bins -= 1 mask = tf.equal(values, self.mask_value) # Convert all values to strings before hashing. if values.dtype.is_integer: values = tf.as_string(values) values = str_to_hash_bucket(values, num_available_bins, name='hash') if mask is not None: values = tf.add(values, tf.compat.v1.ones_like(values)) values = tf.compat.v1.where(mask, tf.compat.v1.zeros_like(values), values) return values
def _create_ensemble_logits_helper(self, weighted_subnetworks, bias, summary, key=None, index=None): """Returns the AdaNet ensemble logits and regularization term for key.""" subnetwork_logits = [] for weighted_subnetwork in weighted_subnetworks: subnetwork_logits.append( _lookup_if_dict(weighted_subnetwork.logits, key)) with tf_compat.v1.variable_scope( "logits_{}".format(index) if index else "logits"): ensemble_logits = _lookup_if_dict(bias, key) for logits in subnetwork_logits: ensemble_logits = tf.add(ensemble_logits, logits) return ensemble_logits
def _dft_magnitude(self, signal): """Compute DFT and then its magnitude. It is avoiding tflite incompatible ops. Args: signal: has dims [..., frame_size] Returns: magnitude_spectrogram: with dims [..., fft_size] """ real_spectrum = tf.matmul(signal, self.real_dft_tensor) imag_spectrum = tf.matmul(signal, self.imag_dft_tensor) magnitude_spectrum = tf.add(real_spectrum * real_spectrum, imag_spectrum * imag_spectrum) # if self.magnitude_squared: # return magnitude_spectrum # else: return tf.sqrt(magnitude_spectrum)
def cal_longest_subsequence(softmaxed_logits): int_logits = tf.dtypes.cast(tf.round(softmaxed_logits), dtype=tf.int32) index_tensor = tf.range(softmaxed_logits.shape[1], dtype=tf.int32) t_index = tf.reshape(index_tensor, [softmaxed_logits.shape[1], 1]) new_seq = tf.transpose(tf.matmul(int_logits, t_index))[0].numpy().tolist() # new_seq = [3,2,4,5,6,5,5,6,7] # print(new_seq) subseq = [] indexseq = [] for i in range(len(new_seq)): if i == 0: subseq.append(new_seq[i]) indexseq.append(i) else: if new_seq[i] > subseq[-1]: subseq.append(new_seq[i]) indexseq.append(i) elif new_seq[i] < subseq[0]: subseq[0] = new_seq[i] indexseq[0] = i else: index = binarySearch(subseq, 0, len(subseq) - 1, new_seq[i]) if index != -1: subseq[index] = new_seq[i] indexseq[index] = i # print(subseq) # print(indexseq) subseq_tensor = tf.reshape(subseq, [1, -1]) index_tensor = tf.reshape(indexseq, [1, -1]) # print(subseq_tensor,index_tensor) te = tf.subtract(subseq_tensor, index_tensor) # print(te) minus_result = tf.square(tf.subtract(subseq_tensor, index_tensor)) one_tensor = tf.ones([1, len(subseq)], tf.int32) result = tf.divide(one_tensor, tf.add(one_tensor, minus_result)) # return tf.reduce_sum(result) return subseq
def main(_): # Wrap the TensorFlow Session object for debugging. # TODO(anthonyjliu): Enable debugger from flags if FLAGS.debug and FLAGS.tensorboard_debug_address: raise ValueError( "The --debug and --tensorboard_debug_address flags are mutually " "exclusive.") if FLAGS.debug: raise NotImplementedError( "tfdbg v2 support for debug_fibonacci is not implemented yet") elif FLAGS.tensorboard_debug_address: raise NotImplementedError( "tfdbg v2 support for debug_fibonacci is not implemented yet") # Construct the TensorFlow network. n0 = tf.constant(np.ones([FLAGS.tensor_size] * 2), dtype=tf.int32) n1 = tf.constant(np.ones([FLAGS.tensor_size] * 2), dtype=tf.int32) for _ in xrange(2, FLAGS.length): n0, n1 = n1, tf.add(n0, n1) print("Fibonacci number at position %d:\n%s" % (FLAGS.length, n1.numpy()))
def maybe_update_alpha(): """Maybe update the alpha param. Checks if global_step is between begin_compression_step and end_compression_step, and if the current training step is a compression step. Returns: Boolean tensor whether the training step is a compression step. """ is_step_within_compression_range = tf.logical_and( tf.greater_equal(tf.cast(self._global_step, tf.int32), self._spec.begin_compression_step), tf.logical_or( tf.less_equal(tf.cast(self._global_step, tf.int32), self._spec.end_compression_step), tf.less(self._spec.end_compression_step, 0))) is_compression_step = tf.less_equal( tf.add(self.last_alpha_update_step, self._spec.compression_frequency), tf.cast(self._global_step, tf.int32)) return tf.logical_and(is_step_within_compression_range, is_compression_step)
def from_importance_weights(log_rhos, discounts, rewards, values, bootstrap_value, clip_rho_threshold=1.0, clip_pg_rho_threshold=1.0, name='vtrace_from_importance_weights'): r"""V-trace from log importance weights. Calculates V-trace actor critic targets as described in "IMPALA: Scalable Distributed Deep-RL with Importance Weighted Actor-Learner Architectures" by Espeholt, Soyer, Munos et al. In the notation used throughout documentation and comments, T refers to the time dimension ranging from 0 to T-1. B refers to the batch size and NUM_ACTIONS refers to the number of actions. This code also supports the case where all tensors have the same number of additional dimensions, e.g., `rewards` is [T, B, C], `values` is [T, B, C], `bootstrap_value` is [B, C]. Args: log_rhos: A float32 tensor of shape [T, B, NUM_ACTIONS] representing the log importance sampling weights, i.e. log(target_policy(a) / behaviour_policy(a)). V-trace performs operations on rhos in log-space for numerical stability. discounts: A float32 tensor of shape [T, B] with discounts encountered when following the behaviour policy. rewards: A float32 tensor of shape [T, B] containing rewards generated by following the behaviour policy. values: A float32 tensor of shape [T, B] with the value function estimates wrt. the target policy. bootstrap_value: A float32 of shape [B] with the value function estimate at time T. clip_rho_threshold: A scalar float32 tensor with the clipping threshold for importance weights (rho) when calculating the baseline targets (vs). rho^bar in the paper. If None, no clipping is applied. clip_pg_rho_threshold: A scalar float32 tensor with the clipping threshold on rho_s in \rho_s \delta log \pi(a|x) (r + \gamma v_{s+1} - V(x_s)). If None, no clipping is applied. name: The name scope that all V-trace operations will be created in. Returns: A VTraceReturns namedtuple (vs, pg_advantages) where: vs: A float32 tensor of shape [T, B]. Can be used as target to train a baseline (V(x_t) - vs_t)^2. pg_advantages: A float32 tensor of shape [T, B]. Can be used as the advantage in the calculation of policy gradients. """ log_rhos = tf.convert_to_tensor(log_rhos, dtype=tf.float32) discounts = tf.convert_to_tensor(discounts, dtype=tf.float32) rewards = tf.convert_to_tensor(rewards, dtype=tf.float32) values = tf.convert_to_tensor(values, dtype=tf.float32) bootstrap_value = tf.convert_to_tensor(bootstrap_value, dtype=tf.float32) if clip_rho_threshold is not None: clip_rho_threshold = tf.convert_to_tensor(clip_rho_threshold, dtype=tf.float32) if clip_pg_rho_threshold is not None: clip_pg_rho_threshold = tf.convert_to_tensor(clip_pg_rho_threshold, dtype=tf.float32) # Make sure tensor ranks are consistent. rho_rank = log_rhos.shape.ndims # Usually 2. values.shape.assert_has_rank(rho_rank) bootstrap_value.shape.assert_has_rank(rho_rank - 1) discounts.shape.assert_has_rank(rho_rank) rewards.shape.assert_has_rank(rho_rank) if clip_rho_threshold is not None: clip_rho_threshold.shape.assert_has_rank(0) if clip_pg_rho_threshold is not None: clip_pg_rho_threshold.shape.assert_has_rank(0) with tf.name_scope(name): rhos = tf.exp(log_rhos) if clip_rho_threshold is not None: clipped_rhos = tf.minimum(clip_rho_threshold, rhos, name='clipped_rhos') else: clipped_rhos = rhos cs = tf.minimum(1.0, rhos, name='cs') # Append bootstrapped value to get [v1, ..., v_t+1] values_t_plus_1 = tf.concat( [values[1:], tf.expand_dims(bootstrap_value, 0)], axis=0) deltas = clipped_rhos * (rewards + discounts * values_t_plus_1 - values) acc = tf.zeros_like(bootstrap_value) vs_minus_v_xs = [] for i in range(int(discounts.shape[0]) - 1, -1, -1): discount, c, delta = discounts[i], cs[i], deltas[i] acc = delta + discount * c * acc vs_minus_v_xs.append(acc) # Reversing vs_minus_v_xs = vs_minus_v_xs[::-1] # Add V(x_s) to get v_s. vs = tf.add(vs_minus_v_xs, values, name='vs') # Advantage for policy gradient. vs_t_plus_1 = tf.concat( [vs[1:], tf.expand_dims(bootstrap_value, 0)], axis=0) if clip_pg_rho_threshold is not None: clipped_pg_rhos = tf.minimum(clip_pg_rho_threshold, rhos, name='clipped_pg_rhos') else: clipped_pg_rhos = rhos pg_advantages = (clipped_pg_rhos * (rewards + discounts * vs_t_plus_1 - values)) # Make sure no gradients backpropagated through the returned values. return VTraceReturns(vs=tf.stop_gradient(vs), pg_advantages=tf.stop_gradient(pg_advantages))
def add_or_or(x1, x2): if x1.dtype == tf.bool: assert x2.dtype == tf.bool return tf.logical_or(x1, x2) return tf.add(x1, x2)
def compute_loss_and_metrics(mu, log_sigma_sq, regression_targets, labels, task_type, model_uncertainty, loss_config, regularization_loss=0., confidence_interval=95, mode='train'): """Computes loss statistics and other metrics.""" scalars_to_log = dict() vectors_to_log = dict() scalars_to_log['regularization_loss'] = regularization_loss vectors_to_log['mu'] = mu if task_type == TASK_CLASSIFICATION: cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=mu, labels=labels, name='cross_entropy') classification_loss = tf.reduce_mean(cross_entropy, name='class_loss') total_loss = classification_loss sigma = None scalars_to_log['classification_loss'] = classification_loss predicted_labels = tf.argmax(mu, axis=1) correct_predictions = equal32(predicted_labels, labels) else: regression_loss = mse_loss(mu, regression_targets) if 'mse_normalize' in loss_config and loss_config['mse_normalize']: assert task_type in [ TASK_GROUNDED_UNNORMALIZED_REGRESSION, TASK_NORMALIZED_REGRESSION ] regression_loss = normalize_regression_loss(regression_loss, mu) avg_regression_loss = tf.reduce_mean(regression_loss) vectors_to_log['regression_loss'] = regression_loss scalars_to_log['regression_loss'] = avg_regression_loss scalars_to_log['avg_mu'] = tf.reduce_mean(mu) scalars_to_log['var_mu'] = tf.reduce_mean( mse_loss(mu, tf.reduce_mean(mu))) predicted_labels = tf.cast(mu > 0, tf.int64) correct_predictions = equal32(predicted_labels, labels) if model_uncertainty: # This implements Eq. (1) in https://arxiv.org/pdf/1612.01474.pdf inv_sigma_sq = tf.math.exp(-log_sigma_sq) scaled_regression_loss = regression_loss * inv_sigma_sq scaled_regression_loss = tf.reduce_mean(scaled_regression_loss) uncertainty_loss = tf.reduce_mean(log_sigma_sq) total_loss = uncertainty_loss + scaled_regression_loss scalars_to_log['uncertainty_loss'] = uncertainty_loss scalars_to_log['scaled_regression_loss'] = scaled_regression_loss scalars_to_log['uncertainty_plus_scaled_regression'] = total_loss sigma = tf.math.exp(log_sigma_sq / 2.) vectors_to_log['sigma'] = sigma scalars_to_log['avg_sigma'] = tf.reduce_mean(sigma) var_sigma = tf.reduce_mean(mse_loss(sigma, tf.reduce_mean(sigma))) scalars_to_log['var_sigma'] = var_sigma # Compute # of labels that fall into the confidence interval. std_factor = get_std_factor_from_confidence_percent( confidence_interval) lower_bound = mu - std_factor * sigma upper_bound = mu + std_factor * sigma preds = tf.logical_and(tf.greater(regression_targets, lower_bound), tf.less(regression_targets, upper_bound)) percent_in_conf_interval = tf.reduce_mean( tf.cast(preds, tf.float32)) scalars_to_log[ 'percent_in_conf_interval'] = percent_in_conf_interval * 100 error_sigma_corr = tfp.stats.correlation(x=regression_loss, y=sigma, event_axis=None) scalars_to_log['error_sigma_correlation'] = error_sigma_corr dists = tfp.distributions.Normal(mu, sigma) probs = dists.prob(regression_targets) scalars_to_log['avg_prob'] = tf.reduce_mean(probs) else: total_loss = avg_regression_loss loss_name = str(mode) + '_loss' total_loss = tf.add(total_loss, regularization_loss, name=loss_name) scalars_to_log[loss_name] = total_loss vectors_to_log['correct_predictions'] = correct_predictions scalars_to_log['prediction_accuracy'] = tf.reduce_mean(correct_predictions) # Validate that metrics outputted are exactly what is expected expected = get_all_metric_names(task_type, model_uncertainty, loss_config, mode, False) assert set(expected) == set(scalars_to_log.keys()) return scalars_to_log, vectors_to_log
def train_loop_body(step): train_op = optimizer.minimize( build_loss_fn if tf.executing_eagerly() else build_loss_fn()) return tf.tuple(tensors=[tf.add(step, 1)], control_inputs=[train_op])