def _training(self): """Perform multiple training iterations of both policy and value baseline. Training on the episodes collected in the memory. Reset the memory afterwards. Always returns a summary string. Returns: Summary tensor. """ with tf.name_scope('training'): assert_full = tf.assert_equal(self._memory_index, self._config.update_every) with tf.control_dependencies([assert_full]): data = self._memory.data() (observ, action, old_mean, old_logstd, reward), length = data with tf.control_dependencies([tf.assert_greater(length, 0)]): length = tf.identity(length) observ = self._observ_filter.transform(observ) reward = self._reward_filter.transform(reward) update_summary = self._perform_update_steps( observ, action, old_mean, old_logstd, reward, length) with tf.control_dependencies([update_summary]): penalty_summary = self._adjust_penalty(observ, old_mean, old_logstd, length) with tf.control_dependencies([penalty_summary]): clear_memory = tf.group(self._memory.clear(), self._memory_index.assign(0)) with tf.control_dependencies([clear_memory]): weight_summary = utility.variable_summaries( tf.trainable_variables(), self._config.weight_summaries) return tf.summary.merge( [update_summary, penalty_summary, weight_summary])
def select_indices(size, scores, indices=None, soft=False) -> tf.Tensor: """Selects indices of the instances to label given the scores. Parameters ---------- size : int Number of samples to label. scores : Tensor <float32> [num_samples] A vector of scores that are used to select which sample to label. indices : Tensor <int32> [num_instances], optional A vector of absolute indices of the samples in a larger collection. If not None, the method returns `selected_indices` from `indices`. Otherwise, `selected_indices` are relative. soft : bool, optional (default=False) Whether to select top indices softly by sampling a categorical distribution with logits proportional to the scores. Returns ------- selected_indices : Tensor <int32> [size] """ if soft: uniform_samples = tf.random.uniform(tf.shape(scores)) z = -tf.math.log(-tf.math.log(uniform_samples)) scores = tf.add(scores, z) with tf.control_dependencies([tf.assert_greater(tf.size(scores), 0)]): size = tf.minimum(size, tf.size(scores)) _, selected_indices = tf.nn.top_k(scores, k=size) if indices is not None: selected_indices = tf.gather(indices, selected_indices, axis=0) return tf.cast(selected_indices, tf.int32)
def sample_from_logits(logits): with tf.control_dependencies([tf.assert_greater(temperature, 0.0)]): logits = tf.identity(logits) reshaped_logits = ( tf.reshape(logits, [-1, tf.shape(logits)[-1]]) / temperature) choices = tf.multinomial(reshaped_logits, 1) choices = tf.reshape(choices, tf.shape(logits)[:logits.get_shape().ndims - 1]) return choices
def parse_reshape_logic( parsed_features: TensorDict, features: protein_features.FeaturesMetadata, key: Optional[str] = None) -> TensorDict: """Transforms parsed serial features to the correct shape.""" # Find out what is the number of sequences and the number of alignments. num_residues = tf.cast(_first(parsed_features["seq_length"]), dtype=tf.int32) if "num_alignments" in parsed_features: num_msa = tf.cast(_first(parsed_features["num_alignments"]), dtype=tf.int32) else: num_msa = 0 if "template_domain_names" in parsed_features: num_templates = tf.cast( tf.shape(parsed_features["template_domain_names"])[0], dtype=tf.int32) else: num_templates = 0 if key is not None and "key" in features: parsed_features["key"] = [key] # Expand dims from () to (1,). # Reshape the tensors according to the sequence length and num alignments. for k, v in parsed_features.items(): new_shape = protein_features.shape( feature_name=k, num_residues=num_residues, msa_length=num_msa, num_templates=num_templates, features=features) new_shape_size = tf.constant(1, dtype=tf.int32) for dim in new_shape: new_shape_size *= tf.cast(dim, tf.int32) assert_equal = tf.assert_equal( tf.size(v), new_shape_size, name="assert_%s_shape_correct" % k, message="The size of feature %s (%s) could not be reshaped " "into %s" % (k, tf.size(v), new_shape)) if "template" not in k: # Make sure the feature we are reshaping is not empty. assert_non_empty = tf.assert_greater( tf.size(v), 0, name="assert_%s_non_empty" % k, message="The feature %s is not set in the tf.Example. Either do not " "request the feature or use a tf.Example that has the " "feature set." % k) with tf.control_dependencies([assert_non_empty, assert_equal]): parsed_features[k] = tf.reshape(v, new_shape, name="reshape_%s" % k) else: with tf.control_dependencies([assert_equal]): parsed_features[k] = tf.reshape(v, new_shape, name="reshape_%s" % k) return parsed_features
def _maybe_validate_args(outcomes, logits, probs, validate_args): """Validate `outcomes`, `logits` and `probs`'s shapes.""" assertions = [] def validate_equal_last_dim(tensor_a, tensor_b, message): if tensor_a.shape.is_fully_defined( ) and tensor_b.shape.is_fully_defined(): if tensor_a.shape[-1] != tensor_b.shape[-1]: raise ValueError(message) elif validate_args: assertions.append( tf1.assert_equal(tf.shape(tensor_a)[-1], tf.shape(tensor_b)[-1], message=message)) if logits is not None: validate_equal_last_dim( outcomes, logits, message='Last dimension of outcomes and logits must be equal size.' ) if probs is not None: validate_equal_last_dim( outcomes, probs, message='Last dimension of outcomes and probs must be equal size.') message = 'Rank of outcomes must be 1.' if outcomes.shape.ndims is not None: if outcomes.shape.ndims != 1: raise ValueError(message) elif validate_args: assertions.append(tf1.assert_rank(outcomes, 1, message=message)) message = 'Size of outcomes must be greater than 0.' if outcomes.shape.num_elements() is not None: if outcomes.shape.num_elements() == 0: raise ValueError(message) elif validate_args: assertions.append( tf1.assert_greater(tf.size(outcomes), 0, message=message)) if validate_args: assertions.append( tf1.assert_equal(tf.math.is_strictly_increasing(outcomes), True, message='outcomes is not strictly increasing.')) return assertions
def parse_tfexample(raw_data, features): """Read a single TF Example proto and return a subset of its features. Args: raw_data: A serialized tf.Example proto. features: A dictionary of features, mapping string feature names to a tuple (dtype, shape). This dictionary should be a subset of protein_features.FEATURES (or the dictionary itself for all features). Returns: A dictionary of features mapping feature names to features. Only the given features are returned, all other ones are filtered out. """ feature_map = { k: tf.io.FixedLenSequenceFeature(shape=(), dtype=v[0], allow_missing=True) for k, v in features.items() } parsed_features = tf.io.parse_single_example(raw_data, feature_map) # Find out what is the number of sequences and the number of alignments. num_residues = tf.cast(parsed_features['seq_length'][0], dtype=tf.int32) # Reshape the tensors according to the sequence length and num alignments. for k, v in parsed_features.items(): new_shape = shape(feature_name=k, num_residues=num_residues) # Make sure the feature we are reshaping is not empty. assert_non_empty = tf.assert_greater( tf.size(v), 0, name='assert_%s_non_empty' % k, message='The feature %s is not set in the tf.Example. Either do not ' 'request the feature or use a tf.Example that has the feature set.' % k) with tf.control_dependencies([assert_non_empty]): parsed_features[k] = tf.reshape(v, new_shape, name='reshape_%s' % k) return parsed_features
def __init__(self, learning_rate, preconditioner_decay_rate=0.95, data_size=1, burnin=25, diagonal_bias=1e-8, name=None, parallel_iterations=10): default_name = 'StochasticGradientLangevinDynamics' with tf1.name_scope(name, default_name, [ learning_rate, preconditioner_decay_rate, data_size, burnin, diagonal_bias ]): if tf.executing_eagerly(): raise NotImplementedError( 'Eager execution currently not supported for ' ' SGLD optimizer.') self._preconditioner_decay_rate = tf.convert_to_tensor( value=preconditioner_decay_rate, name='preconditioner_decay_rate') self._data_size = tf.convert_to_tensor(value=data_size, name='data_size') self._burnin = tf.convert_to_tensor(value=burnin, name='burnin', dtype=dtype_util.common_dtype( [burnin], dtype_hint=tf.int64)) self._diagonal_bias = tf.convert_to_tensor(value=diagonal_bias, name='diagonal_bias') # TODO(b/124800185): Consider migrating `learning_rate` to be a # hyperparameter handled by the base Optimizer class. This would allow # users to plug in a `tf.keras.optimizers.schedules.LearningRateSchedule` # object in addition to Tensors. self._learning_rate = tf.convert_to_tensor(value=learning_rate, name='learning_rate') self._parallel_iterations = parallel_iterations self._preconditioner_decay_rate = distribution_util.with_dependencies( [ tf1.assert_non_negative( self._preconditioner_decay_rate, message= '`preconditioner_decay_rate` must be non-negative'), tf1.assert_less_equal( self._preconditioner_decay_rate, 1., message='`preconditioner_decay_rate` must be at most 1.' ), ], self._preconditioner_decay_rate) self._data_size = distribution_util.with_dependencies([ tf1.assert_greater( self._data_size, 0, message='`data_size` must be greater than zero') ], self._data_size) self._burnin = distribution_util.with_dependencies([ tf1.assert_non_negative( self._burnin, message='`burnin` must be non-negative'), tf1.assert_integer(self._burnin, message='`burnin` must be an integer') ], self._burnin) self._diagonal_bias = distribution_util.with_dependencies([ tf1.assert_non_negative( self._diagonal_bias, message='`diagonal_bias` must be non-negative') ], self._diagonal_bias) super(StochasticGradientLangevinDynamics, self).__init__(name=name or default_name)
def _potential_scale_reduction_single_state(state, independent_chain_ndims, split_chains, validate_args): """potential_scale_reduction for one single state `Tensor`.""" with tf.name_scope('potential_scale_reduction_single_state'): # We assume exactly one leading dimension indexes e.g. correlated samples # from each Markov chain. state = tf.convert_to_tensor(state, name='state') n_samples_ = tf.compat.dimension_value(state.shape[0]) if n_samples_ is not None: # If available statically. if split_chains and n_samples_ < 4: raise ValueError( 'Must provide at least 4 samples when splitting chains. ' 'Found {}'.format(n_samples_)) if not split_chains and n_samples_ < 2: raise ValueError( 'Must provide at least 2 samples. Found {}'.format( n_samples_)) elif validate_args: if split_chains: state = distribution_util.with_dependencies([ tf1.assert_greater( tf.shape(state)[0], 4, message= 'Must provide at least 4 samples when splitting chains.' ) ], state) else: state = distribution_util.with_dependencies([ tf1.assert_greater( tf.shape(state)[0], 2, message='Must provide at least 2 samples.') ], state) # Define so it's not a magic number. # Warning! `if split_chains` logic assumes this is 1! sample_ndims = 1 if split_chains: # Split the sample dimension in half, doubling the number of # independent chains. # For odd number of samples, keep all but the last sample. state_shape = prefer_static.shape(state) n_samples = state_shape[0] state = state[:n_samples - n_samples % 2] # Suppose state = [0, 1, 2, 3, 4, 5] # Step 1: reshape into [[0, 1, 2], [3, 4, 5]] # E.g. reshape states of shape [a, b] into [2, a//2, b]. state = tf.reshape( state, prefer_static.concat([[2, n_samples // 2], state_shape[1:]], axis=0)) # Step 2: Put the size `2` dimension in the right place to be treated as a # chain, changing [[0, 1, 2], [3, 4, 5]] into [[0, 3], [1, 4], [2, 5]], # reshaping [2, a//2, b] into [a//2, 2, b]. state = tf.transpose( a=state, perm=prefer_static.concat( [[1, 0], tf.range(2, tf.rank(state))], axis=0)) # We're treating the new dim as indexing 2 chains, so increment. independent_chain_ndims += 1 sample_axis = tf.range(0, sample_ndims) chain_axis = tf.range(sample_ndims, sample_ndims + independent_chain_ndims) sample_and_chain_axis = tf.range( 0, sample_ndims + independent_chain_ndims) n = _axis_size(state, sample_axis) m = _axis_size(state, chain_axis) # In the language of Brooks and Gelman (1998), # B / n is the between chain variance, the variance of the chain means. # W is the within sequence variance, the mean of the chain variances. b_div_n = _reduce_variance(tf.reduce_mean(state, axis=sample_axis, keepdims=True), sample_and_chain_axis, biased=False) w = tf.reduce_mean(_reduce_variance(state, sample_axis, keepdims=True, biased=True), axis=sample_and_chain_axis) # sigma^2_+ is an estimate of the true variance, which would be unbiased if # each chain was drawn from the target. c.f. "law of total variance." sigma_2_plus = w + b_div_n return ((m + 1.) / m) * sigma_2_plus / w - (n - 1.) / (m * n)
def __init__(self, batch_size, total_num_examples, max_learning_rate=1., preconditioner_decay_rate=0.95, burnin=25, burnin_max_learning_rate=1e-6, use_single_learning_rate=False, name=None): default_name = 'VariationalSGD' with tf1.name_scope(name, default_name, [ max_learning_rate, preconditioner_decay_rate, batch_size, burnin, burnin_max_learning_rate ]): self._preconditioner_decay_rate = tf.convert_to_tensor( value=preconditioner_decay_rate, name='preconditioner_decay_rate') self._batch_size = tf.convert_to_tensor(value=batch_size, name='batch_size') self._total_num_examples = tf.convert_to_tensor( value=total_num_examples, name='total_num_examples') self._burnin = tf.convert_to_tensor(value=burnin, name='burnin', dtype=dtype_util.common_dtype( [burnin], dtype_hint=tf.int64)) self._burnin_max_learning_rate = tf.convert_to_tensor( value=burnin_max_learning_rate, name='burnin_max_learning_rate') self._max_learning_rate = tf.convert_to_tensor( value=max_learning_rate, name='max_learning_rate') self._use_single_learning_rate = use_single_learning_rate self._preconditioner_decay_rate = distribution_util.with_dependencies( [ tf1.assert_non_negative( self._preconditioner_decay_rate, message= '`preconditioner_decay_rate` must be non-negative'), tf1.assert_less_equal( self._preconditioner_decay_rate, 1., message='`preconditioner_decay_rate` must be at most 1.' ), ], self._preconditioner_decay_rate) self._batch_size = distribution_util.with_dependencies([ tf1.assert_greater( self._batch_size, 0, message='`batch_size` must be greater than zero') ], self._batch_size) self._total_num_examples = distribution_util.with_dependencies([ tf1.assert_greater( self._total_num_examples, 0, message='`total_num_examples` must be greater than zero') ], self._total_num_examples) self._burnin = distribution_util.with_dependencies([ tf1.assert_non_negative( self._burnin, message='`burnin` must be non-negative'), tf1.assert_integer(self._burnin, message='`burnin` must be an integer') ], self._burnin) self._burnin_max_learning_rate = distribution_util.with_dependencies( [ tf1.assert_non_negative( self._burnin_max_learning_rate, message= '`burnin_max_learning_rate` must be non-negative') ], self._burnin_max_learning_rate) self._max_learning_rate = distribution_util.with_dependencies([ tf1.assert_non_negative( self._max_learning_rate, message='`max_learning_rate` must be non-negative') ], self._max_learning_rate) super(VariationalSGD, self).__init__(name=name or default_name)