def rel_seg_bias(q_head, seg_mat, n_head, d_head, initializer, func_mask=None, dtype=tf.float32): """Relative attention segmentation bias.""" # Expand seg_mat: [... x N x T x T] tgt_shape = [] for i in range(seg_mat.shape.ndims): tgt_shape.append(tf.shape(seg_mat)[i]) tgt_shape.insert(-2, n_head) seg_mat = tf.expand_dims(seg_mat, -3) # Compute same / diff biases r_s_bias = tf.get_variable("r_s_bias", [n_head, d_head], dtype=dtype, initializer=initializer) seg_embed = tf.get_variable("seg_embed", [2, n_head, d_head], dtype=dtype, initializer=initializer) scale = tf.cast(1.0 / np.sqrt(d_head), dtype) q_head_s = q_head + r_s_bias * scale # [... x N x T x 2] seg_biases = tf.einsum("...inh,snh->...nis", q_head_s, seg_embed) # Split into `diff` & `same`: [... x N x T x 1] seg_bias_diff, seg_bias_same = tf.split(seg_biases, 2, axis=-1) # Broadcast seg_mat = tf.broadcast_to(seg_mat, tgt_shape) seg_bias_diff = tf.broadcast_to(seg_bias_diff, tgt_shape) seg_bias_same = tf.broadcast_to(seg_bias_same, tgt_shape) seg_bias = tf.where(seg_mat, seg_bias_same, seg_bias_diff) if func_mask is not None: seg_bias *= func_mask return seg_bias
def _build(self, loc, raw_scale, batch_size): with tf.name_scope('sample'): scale = tf.math.softplus(raw_scale) noise = tf.random.normal((batch_size, *scale.shape[1:])) sample = loc + scale * noise with tf.name_scope('broadcast'): batch_mul_slot = batch_size * sample.shape[1] net = tf.reshape(sample, [batch_mul_slot, *sample.shape[2:]]) net = tf.expand_dims(tf.expand_dims(net, axis=1), axis=2) net = tf.broadcast_to( net, [batch_mul_slot, *self._grid.shape[1:3], net.shape[-1]]) grid = tf.broadcast_to(self._grid, [batch_mul_slot, *self._grid.shape[1:]]) net = tf.concat([net, grid], axis=-1, name='concat') for idx, (layer, shape) in enumerate(zip(self._layers, self._shapes)): if net.shape[1:3] != shape: net = tf.image.resize_bilinear(net, shape) net = layer(net) net = tf.nn.elu(net, name='elu_{}'.format(idx)) if net.shape[1:3] != self._image_shape[:-1]: net = tf.image.resize_bilinear(net, self._image_shape[:-1]) net = self._conv_out(net) with tf.name_scope('convert'): net = tf.reshape(net, [batch_size, sample.shape[1], *net.shape[1:]]) apc, logits_mask = tf.split(net, [self._image_shape[-1], 1], axis=-1) apc = (apc + 1) * 0.5 mask = tf.math.softmax(logits_mask, axis=1) log_mask = tf.math.log_softmax(logits_mask, axis=1) return apc, mask, log_mask, logits_mask
def _batch_slice(self, ary, start_ijk, w, batch_size): """Batched slicing of original grid. Args: ary: tensor, rank = 3. start_ijk: [batch_size, 3] tensor, starting index. w: width of cube to extract. batch_size: int, batch size. Returns: batched_slices: [batch_size, w, w, w] tensor, batched slices of ary. """ batch_size = start_ijk.shape[0] ijk = tf.range(w, dtype=tf.int32) slice_idx = tf.meshgrid(ijk, ijk, ijk, indexing='ij') slice_idx = tf.stack( slice_idx, axis=-1) # [in_grid_res, in_grid_res, in_grid_res, 3] slice_idx = tf.broadcast_to(slice_idx[tf.newaxis], [batch_size, w, w, w, 3]) offset = tf.broadcast_to( start_ijk[:, tf.newaxis, tf.newaxis, tf.newaxis, :], [batch_size, w, w, w, 3]) slice_idx += offset # [batch_size, in_grid_res, in_grid_res, in_grid_res, 3] batched_slices = tf.gather_nd(ary, slice_idx) # [batch_size, in_grid_res, in_grid_res, in_grid_res] return batched_slices
def log_beta(a, x): a_full = tf.broadcast_to(a, [L - 1, K, G, C]) x_full = tf.broadcast_to(x, [L - 1, K, G, C]) a_s = tf.reshape(a_full, (-1, )) x_s = tf.reshape(x_full, (-1, )) lt = tf.lbeta(tf.stack([x_s + 1.0, a_s], axis=1)) return tf.reshape(lt, (L - 1, K, G, C))
def SGNN_loss(self): """ implement sgnn loss """ negative_training_norm = tf.math.l2_normalize(self.x_negative, axis=2) skip_training = tf.broadcast_to(self.x_origin, [self.batch_size, self.negative_sample_size, self.latent_dim]) skip_training_norm = tf.math.l2_normalize(skip_training, axis=2) dot_prod = tf.multiply(skip_training_norm, negative_training_norm) dot_prod_sum = tf.reduce_sum(dot_prod, 2) sum_log_dot_prod = tf.math.log(tf.math.sigmoid(tf.math.negative(tf.reduce_mean(dot_prod_sum, 1)))) positive_training = tf.broadcast_to(self.x_origin, [self.batch_size, self.walk_length, self.latent_dim]) positive_skip_norm = tf.math.l2_normalize(self.x_skip, axis=2) positive_training_norm = tf.math.l2_normalize(positive_training, axis=2) dot_prod_positive = tf.multiply(positive_skip_norm, positive_training_norm) dot_prod_sum_positive = tf.reduce_sum(dot_prod_positive, 2) sum_log_dot_prod_positive = tf.math.log(tf.math.sigmoid(tf.reduce_mean(dot_prod_sum_positive, 1))) self.negative_sum = tf.math.negative( tf.reduce_sum(tf.math.add(sum_log_dot_prod, sum_log_dot_prod_positive)))
def inputs(opts, index): return { "inputs": tf.broadcast_to(tf.cast(index, tf.float16), [opts.batch_size, 224, 224, 4]), "labels": tf.broadcast_to(tf.cast(index, tf.int32), [opts.batch_size]), }
def _divide_no_nan(x, y, epsilon=1e-8): """Equivalent to tf.math.divide_no_nan but supports bfloat16.""" # need manual broadcast... safe_y = tf.where( tf.logical_and(tf.greater_equal(y, -epsilon), tf.less_equal(y, epsilon)), tf.ones_like(y), y) return tf.where( tf.logical_and( tf.greater_equal(tf.broadcast_to(y, x.get_shape()), -epsilon), tf.less_equal(tf.broadcast_to(y, x.get_shape()), epsilon)), tf.zeros_like(x), x / safe_y)
def _build(self, inputs, batch_size): with tf.name_scope('broadcast'): net = tf.expand_dims(tf.expand_dims(inputs, axis=1), axis=2) net = tf.broadcast_to(net, [batch_size, *self._grid.shape[1:3], net.shape[-1]]) grid = tf.broadcast_to(self._grid, [batch_size, *self._grid.shape[1:]]) net = tf.concat([net, grid], axis=-1, name='concat') for layer in self._layers: net = layer(net) with tf.name_scope('convert'): apc, logits_mask = tf.split(net, [self._image_ch, 1], axis=-1) apc = (apc + 1) * 0.5 return apc, logits_mask
def _eval_net(self, lat, weights, xloc, training=False): """Evaluate function values by querying shared dense network. Args: lat: `[batch_size, num_points, 2**dim, in_features]` tensor, neighbor latent codes for each input point. weights: `[batch_size, num_points, 2**dim]` tensor, bi/tri-linear interpolation weights for each neighbor. xloc: `[batch_size, num_points, 2**dim, dim]`tensor, relative coordinates. training: bool, flag indicating training phase. Returns: values: `[batch_size, num_point, out_features]` tensor, query values. """ nb, np, nn, nc = lat.get_shape().as_list() nd = self.dim if self.method == "linear": inputs = tf.concat([xloc, lat], axis=-1) # `[batch_size, num_points, 2**dim, dim+in_features]` inputs = tf.reshape(inputs, [-1, nc + nd]) values = self.net(inputs, training=training) values = tf.reshape(values, [nb, np, nn, self.cout]) # `[batch_size, num_points, 2**dim, out_features]` if self.interp: values = tf.reduce_sum(tf.expand_dims(weights, axis=-1) * values, axis=2) # `[batch_size, num_points out_features]` else: values = (values, weights) else: # nearest neighbor nid = tf.cast(tf.argmax(weights, axis=-1), tf.int32) # [batch_size, num_points] bid = tf.broadcast_to( tf.range(nb, dtype=tf.int32)[:, tf.newaxis], [nb, np]) pid = tf.broadcast_to( tf.range(np, dtype=tf.int32)[tf.newaxis, :], [nb, np]) gather_id = tf.stack((bid, pid, nid), axis=-1) lat_ = tf.gather_nd(lat, gather_id) # [batch_size, num_points, in_feat] xloc_ = tf.gather_nd(xloc, gather_id) # [batch_size, num_points, dim] inputs = tf.concat([xloc_, lat_], axis=-1) inputs = tf.reshape(inputs, [-1, nc + nd]) values = self.net(inputs, training=training) values = tf.reshape(values, [nb, np, self.cout]) # `[batch_size, num_points, out_features]` return values
def make_argmax_indices(input_tensor, axis=1): """Given a tensor, find argmax over the axis and return indices. The output can be used as indices in tf.gather_nd and tf.scatter_nd ops. Args: input_tensor: A tensor to perform argmax over. axis: Which axis to take argmax over. Returns: indices: An index tensor that can be used in tf.gather_nd and tf.scatter_nd ops to gather from and scatter to the max index. """ extreme_idx = tf.argmax(input_tensor, axis=axis, output_type=tf.int32) # tf.argmax does not have keepdims argument, so we do it separately. extreme_idx = tf.expand_dims(extreme_idx, axis=axis) in_shape = tf.shape(extreme_idx) in_rank = len(extreme_idx.shape.as_list()) idx_list = [] for dim in range(in_rank): if dim != axis: dim_len = in_shape[dim] pre_broadcast_shape = [1] * in_rank pre_broadcast_shape[dim] = dim_len dim_idx = tf.reshape(tf.cast(tf.range(dim_len), extreme_idx.dtype), pre_broadcast_shape) idx_list.append(tf.broadcast_to(dim_idx, in_shape)) else: idx_list.append(extreme_idx) indices = tf.stack(idx_list, axis=-1) return indices
def batch_segment_sum_embeddings(long_embeddings: tf.Tensor, long_word_idx: tf.Tensor, long_input_mask: tf.Tensor) -> tf.Tensor: """Sums wordpiece `long_embeddings` into word embeddings. Args: long_embeddings: <float32>[batch_size, long_max_length, hidden_size] Tensor of contextual embeddings for wordpieces, as output by ETC model. long_word_idx: <int32>[batch_size, long_max_length] Tensor representing the index of the word each wordpiece belongs to. The index for padding tokens can be any integer in the range [0, long_max_length) and will be ignored. long_input_mask: <int32>[batch_size, long_max_length] Tensor representing which *wordpiece* tokens in `long_embeddings` are present, with `1` for present tokens and `0` for padding. Returns: <float32>[batch_size, long_max_length, hidden_size] Tensor of embeddings for each word calculated by summing the embeddings of the wordpieces belonging to the word. The number of words is no greater than the number of wordpieces, but we keep `long_max_length`, so there may be an increase in padding. All padding embeddings will be 0. """ # Zero out padding embeddings. long_embeddings *= tf.cast( long_input_mask, dtype=long_embeddings.dtype)[:, :, tf.newaxis] batch_size = tf.shape(long_embeddings)[0] example_idx = tf.broadcast_to( tf.range(batch_size)[:, tf.newaxis], shape=tf.shape(long_word_idx)) scatter_indices = tf.stack([example_idx, long_word_idx], axis=-1) return tf.scatter_nd( indices=scatter_indices, updates=long_embeddings, shape=tf.shape(long_embeddings))
def outer_body(j, diffo, a, b, q): def body(i, diff, at, bt): bt1 = at * bgt at1 = tf.maximum( at + (tf.log(bt) + agt - tf.digamma(at)) / tf.polygamma(1.0, at), 1e-5) diff1 = tf.reduce_mean(tf.abs(at/at1-1.0)) + \ tf.reduce_mean(tf.abs(bt/bt1-1.0)) return [i - 1, diff1, at1, bt1] def cond(i, diff, at, bt): return tf.logical_and(i > 0, diff > tol) vF = a * tf.log(b/sc) - (x+a) * tf.log(1+b/sc) - \ log_beta(a, x) - tf.log(x+a) t = q + tf.concat([vF, tf.broadcast_to(vL_in, [1, K, G, C])], axis=0) eta = tf.reduce_logsumexp(t, axis=(0), keepdims=True) xi = rho + t - eta qt = tf.reduce_logsumexp(xi, axis=3, keepdims=True) # sum over c qt1 = qt - tf.reduce_logsumexp(qt, axis=0, keepdims=True) # sum over l xi_n = xi - tf.reduce_max(xi, axis=3, keepdims=True) p_n = tf.exp(xi_n[:L - 1, :, :, :]) pt_n = tf.reduce_sum(p_n, axis=3, keepdims=True) bgt = pt_n / tf.reduce_sum( p_n * (a + x) / (b + sc), axis=3, keepdims=True) agt = tf.reduce_sum( p_n * (tf.digamma(x + a) - tf.log(sc + b)), axis=3, keepdims=True) / pt_n i, diff_in, at1, bt1 = tf.while_loop(cond, body, (n_inner_iter, 1.0, a, b)) diffo1 = tf.reduce_mean(tf.abs(a/at1-1.0)) + \ tf.reduce_mean(tf.abs(b/bt1-1.0)) return [j - 1, diffo1, at1, bt1, qt1]
def test_docstring_example(self): """Computes values of example in the docstring for function interpolate.""" for dtype in [np.float32, np.float64]: times = tf.constant([2., 2.5, 3, 4.5], dtype=dtype) strikes = tf.constant([16, 22, 35], dtype=dtype) times_data = tf.constant([1.5, 2.5, 3.5, 4.5, 5.5], dtype=dtype) sigma_square_data = tf.constant( [[0.15, 0.25, 0.35, 0.4, 0.45, 0.4], [0.2, 0.35, 0.55, 0.45, 0.4, 0.6], [0.3, 0.45, 0.25, 0.4, 0.5, 0.65], [0.25, 0.25, 0.45, 0.25, 0.5, 0.55], [0.35, 0.35, 0.25, 0.4, 0.55, 0.55]], dtype=dtype) total_variance = tf.expand_dims(times_data, -1) * sigma_square_data strike_data = tf.broadcast_to( tf.constant([15, 25, 35, 40, 50, 55], dtype=dtype), [5, 6]) interpolator = interpolation_2d.Interpolation2D(times_data, strike_data, total_variance, dtype=dtype) interpolated_vols = interpolator.interpolate(times, strikes) self.assertEqual(interpolated_vols.dtype.as_numpy_dtype, dtype) self.assertAllClose(interpolated_vols.shape.as_list(), [4, 3]) expected_vols = np.array([[0.382399, 0.523347, 0.95], [0.524797, 0.716694, 1.375], [0.839203, 1.159248, 1.125], [1.069968, 0.92655, 2.025]]) self.assertAllClose(interpolated_vols, expected_vols, rtol=1e-04, atol=1e-04)
def _setup_graph(self, n_inp, n_out, drop_frac, start_iter=1, end_iter=4, freq_iter=2): """Setups a trivial training procedure for sparse training.""" tf.reset_default_graph() optim = tf.train.GradientDescentOptimizer(1e-3) global_step = tf.train.get_or_create_global_step() sparse_optim = sparse_optimizers.SparseRigLOptimizer( optim, start_iter, end_iter, freq_iter, drop_fraction=drop_frac) x = tf.ones((1, n_inp)) y = layers.masked_fully_connected(x, n_out, activation_fn=None) # Multiplying the output with range of constants to have constant but # different gradients at the masked weights. We also multiply the loss with # global_step to increase the gradient linearly with time. scale_vector = ( tf.reshape(tf.cast(tf.range(tf.size(y)), dtype=y.dtype), y.shape) * tf.cast(global_step, dtype=y.dtype)) y = y * scale_vector loss = tf.reduce_sum(y) global_step = tf.train.get_or_create_global_step() train_op = sparse_optim.minimize(loss, global_step) weight = pruning.get_weights()[0] expected_gradient = tf.broadcast_to(scale_vector, weight.shape) masked_grad = sparse_optim._weight2masked_grads[weight.name] # Init sess = tf.Session() init = tf.global_variables_initializer() sess.run(init) return sess, train_op, masked_grad, expected_gradient
def _updated_cashflow(num_times, exercise_index, exercise_value, expected_continuation, cashflow): """Revises the cashflow tensor where options will be exercised earlier.""" do_exercise_bool = exercise_value > expected_continuation do_exercise = tf.cast(do_exercise_bool, exercise_value.dtype) # Shape [num_samples, payoff_dim] scaled_do_exercise = tf.where(do_exercise_bool, exercise_value, tf.zeros_like(exercise_value)) # This picks out the samples where we now wish to exercise. # Shape [num_samples, payoff_dim, 1] new_samp_masked = tf.expand_dims(scaled_do_exercise, 2) # This should be one on the current time step and zero otherwise. # This is an array with nonzero entries showing newly exercised payoffs. pad_shape = scaled_do_exercise.shape.as_list() zeros_before = tf.zeros(pad_shape + [exercise_index - 1], dtype=scaled_do_exercise.dtype) zeros_after = tf.zeros(pad_shape + [num_times - exercise_index], dtype=scaled_do_exercise.dtype) new_cash = tf.concat([zeros_before, new_samp_masked, zeros_after], -1) # Has shape [num_samples, payoff_dim, 1] old_samp_masker = tf.expand_dims(1 - do_exercise, 2) # Broadcast to shape [num_samples, payoff_dim, num_times - exercise_index] old_samp_masker_after = tf.broadcast_to( old_samp_masker, pad_shape + [num_times - exercise_index]) # Has shape `[num_samples, payoff_dim, exercise_index]` zeros_before = tf.zeros(pad_shape + [exercise_index], dtype=scaled_do_exercise.dtype) # Shape [num_samples, payoff_dim, num_times] old_mask = tf.concat([zeros_before, old_samp_masker_after], -1) # Shape [num_samples, payoff_dim, num_times] old_cash = old_mask * cashflow return new_cash + old_cash
def get_global_step(self): # tf.train.get_global_step() does not work well under model_fn for TPU. with tf.variable_scope('', reuse=tf.AUTO_REUSE): return tf.broadcast_to(tf.get_variable('global_step', shape=[], dtype=tf.int64), shape=(self._export_batch_size, ))
def _init_graph(self): """Initialize computation graph for tensorflow. """ with self.graph.as_default(): self.refiner = im.ImNet(dim=self.dim, in_features=self.codelen, out_features=self.out_features, num_filters=self.num_filters) self.global_step = tf.get_variable('global_step', shape=[], dtype=tf.int64) self.pts_ph = tf.placeholder(tf.float32, shape=[self.point_batch, 3]) self.lat_ph = tf.placeholder(tf.float32, shape=[self.codelen]) lat = tf.broadcast_to(self.lat_ph[tf.newaxis], [self.point_batch, self.codelen]) code = tf.concat((self.pts_ph, lat), axis=-1) # [pb, 3+c] vals = self.refiner(code, training=False) # [pb, 1] self.vals = tf.squeeze(vals, axis=1) # [pb] self.saver = tf.train.Saver() self.sess = tf.Session() self.saver.restore(self.sess, self.ckpt)
def batch_random_blur(images, side_length=IMAGE_SIZE, blur_probability=0.5): """Probabilistically applies a Gaussian blur across a batch of images. Each image in the batch is blurred with probability `blur_probability`. Args: images: A Tensor representing a batch of images. Shape should be [batch_size, side_length, side_length, channels] and have float dtype. side_length: A python integer. The length, in pixels, of the height and width dimensions of `images`. blur_probability: The probaility with which to apply the blur operator to each image in the batch. A python float between 0 and 1. Returns: A batch of images of the same shape and dtype as the input `images`. """ with tf.name_scope('batch_random_blur'): images_shape = tf.shape(images) batch_size = images_shape[0] selector_shape = [batch_size, 1, 1, 1] blurred = _gaussian_blur(images, side_length=side_length, padding='SAME') selector = tf.less( tf.random_uniform(selector_shape, 0, 1, dtype=tf.float32), blur_probability) selector = tf.broadcast_to(selector, shape=images_shape) images = tf.where(selector, blurred, images) return images
def pred_input(params, logger, enc=None, path_to_prompt=""): unicorns = "In a shocking finding, scientists discovered a herd of unicorns living in a remote, " \ "previously unexplored valley, in the Andes Mountains. Even more surprising to the " \ "researchers was the fact that the unicorns spoke perfect English." text = unicorns if path_to_prompt == "" else open(path_to_prompt, "r").read() tokens = encode(enc, text) if len(tokens) > params["n_ctx"]: logger.info( "The length of your input prompt is longer than the model's context length - truncating input." ) tokens = tokens[len(tokens) - params["n_ctx"]:] if len(tokens) < params["n_ctx"]: tokens = tf.pad(tokens, [[0, params["n_ctx"] - len(tokens)]], constant_values=params["padding_id"]) t = tf.broadcast_to(tokens, [params["batch_size"], params["n_ctx"]]) dataset = tf.data.Dataset.from_tensors(t) def _dummy_labels(x): return x, x dataset = dataset.map(_dummy_labels) return dataset
def apply_window_to_impulse_response(impulse_response: tf.Tensor, window_size: int = 0, causal: bool = False) -> tf.Tensor: """Apply a window to an impulse response and put in causal form. Args: impulse_response: A series of impulse responses frames to window, of shape [batch, n_frames, ir_size]. window_size: Size of the window to apply in the time domain. If window_size is less than 1, it defaults to the impulse_response size. causal: Impulse responnse input is in causal form (peak in the middle). Returns: impulse_response: Windowed impulse response in causal form, with last dimension cropped to window_size if window_size is greater than 0 and less than ir_size. """ impulse_response = tf_float32(impulse_response) # If IR is in causal form, put it in zero-phase form. if causal: impulse_response = tf.signal.fftshift(impulse_response, axes=-1) # Get a window for better time/frequency resolution than rectangular. # Window defaults to IR size, cannot be bigger. ir_size = int(impulse_response.shape[-1]) if (window_size <= 0) or (window_size > ir_size): window_size = ir_size window = tf.signal.hann_window(window_size) # Zero pad the window and put in in zero-phase form. padding = ir_size - window_size if padding > 0: half_idx = (window_size + 1) // 2 window = tf.concat( [window[half_idx:], tf.zeros([padding]), window[:half_idx]], axis=0) else: window = tf.signal.fftshift(window, axes=-1) # Apply the window, to get new IR (both in zero-phase form). window = tf.broadcast_to(window, impulse_response.shape) impulse_response = window * tf.real(impulse_response) # Put IR in causal form and trim zero padding. if padding > 0: first_half_start = (ir_size - (half_idx - 1)) + 1 second_half_end = half_idx + 1 impulse_response = tf.concat([ impulse_response[..., first_half_start:], impulse_response[..., :second_half_end] ], axis=-1) else: impulse_response = tf.signal.fftshift(impulse_response, axes=-1) return impulse_response
def test_batch(self): """Test batching.""" dtype = np.float64 times = tf.constant([2., 2.5, 3, 4.5], dtype=dtype) strikes = tf.constant([16, 22, 35], dtype=dtype) times_data = tf.constant( [[1.5, 2.5, 3.5, 4.5, 5.5], [1.2, 2.2, 3.5, 4.5, 5.5]], dtype=dtype) # Corresponding squared volatility values sigma_square_data = tf.constant([[0.15, 0.25, 0.35, 0.4, 0.45, 0.4], [0.2, 0.35, 0.55, 0.45, 0.4, 0.6], [0.3, 0.45, 0.25, 0.4, 0.5, 0.65], [0.25, 0.25, 0.45, 0.25, 0.5, 0.55], [0.35, 0.35, 0.25, 0.4, 0.55, 0.65]], dtype=dtype) # Interpolation is done for the total variance total_variance = tf.expand_dims(times_data, -1) * sigma_square_data # Corresponding strike values. Notice we need to broadcast to the shape of # `sigma_square_data` strike_data = tf.broadcast_to( tf.constant([15, 25, 35, 40, 50, 55], dtype=dtype), [5, 6]) sigma_square_data = tf.broadcast_to(sigma_square_data, [2, 5, 6]) strike_data = tf.broadcast_to(strike_data, [2, 5, 6]) # Interpolate total variance on the grid [times, strikes] interpolator = interpolation_2d.Interpolation2D(times_data, strike_data, total_variance, dtype=dtype) interpolated_values = interpolator.interpolate(times, strikes) self.assertEqual(interpolated_values.dtype.as_numpy_dtype, dtype) self.assertAllClose(interpolated_values.shape.as_list(), [2, 4, 3]) expected_vols = np.array([[[0.38239871, 0.52334687, 0.95], [0.52479743, 0.71669375, 1.375], [0.83920309, 1.1592475, 1.125], [1.06996765, 0.92655, 2.025]], [[0.40785739, 0.5573524, 1.052], [0.62146489, 0.85479298, 1.13269231], [0.88753682, 1.22829712, 1.00384615], [1.06996765, 0.92655, 2.025]]]) self.assertAllClose(interpolated_values, expected_vols, rtol=1e-04, atol=1e-04)
def style_mixing_reg(self, w_input_1, w_input_2, style_mixing_prob, layers): w_latent_1 = tf.tile(w_input_1[:,:, np.newaxis], [1, 1, layers+1]) w_latent_2 = tf.tile(w_input_2[:,:, np.newaxis], [1, 1, layers+1]) with tf.variable_scope('style_mixing_reg'): layers_index = 1 + layers possible_layers = np.arange(layers_index)[np.newaxis, np.newaxis, :] layer_cut = tf.cond(tf.random_uniform([], 0.0, 1.0) < style_mixing_prob, lambda: tf.random.uniform([], 1, layers_index, dtype=tf.int32), lambda: tf.constant(layers_index, dtype=tf.int32)) w_latent = tf.where(tf.broadcast_to(possible_layers<layer_cut, tf.shape(w_latent_1)), w_latent_1, w_latent_2) return w_latent
def _build(self, batch_size, num_slots): init_loc = tf.get_variable('loc', shape=[1, 1, self._latent_size], initializer=tf.zeros_initializer()) init_scale = tf.get_variable('scale', shape=[1, 1, self._latent_size], initializer=tf.zeros_initializer()) with tf.name_scope('post_param'): loc = tf.broadcast_to(init_loc, [batch_size, num_slots, self._latent_size]) raw_scale = tf.broadcast_to( init_scale, [batch_size, num_slots, self._latent_size]) with tf.name_scope('state'): if self._state_size: zeros = tf.zeros((batch_size * num_slots, self._state_size)) states = snt.LSTMState(hidden=zeros, cell=zeros) else: states = None return loc, raw_scale, states
def call(self, images): batch_size = tf.shape(images)[0] patches = self.get_patches(images) classification_emb = tf.broadcast_to( self.classification_emb, [batch_size, 1, self.patch_embedding_dim]) proj_patches = self.projection(patches) proj_patches = tf.concat([classification_emb, proj_patches], axis=1) proj_patches += self.position_embeddings return proj_patches
def test_batch(self): """Test batching.""" dtype = np.float64 times = tf.constant([[2., 2.5, 3], [2., 2.5, 3]], dtype=dtype) strikes = tf.constant([[16, 22, 35], [16, 22, 35]], dtype=dtype) times_data = tf.constant( [[1.5, 2.5, 3.5, 4.5, 5.5], [1.2, 2.2, 3.5, 4.5, 5.5]], dtype=dtype) # Corresponding squared volatility values sigma_square_data = tf.constant([[0.15, 0.25, 0.35, 0.4, 0.45, 0.4], [0.2, 0.35, 0.55, 0.45, 0.4, 0.6], [0.3, 0.45, 0.25, 0.4, 0.5, 0.65], [0.25, 0.25, 0.45, 0.25, 0.5, 0.55], [0.35, 0.35, 0.25, 0.4, 0.55, 0.65]], dtype=dtype) # Interpolation is done for the total variance total_variance = tf.expand_dims(times_data, -1) * sigma_square_data # Corresponding strike values. Notice we need to broadcast to the shape of # `sigma_square_data` strike_data = tf.broadcast_to( tf.constant([15, 25, 35, 40, 50, 55], dtype=dtype), [5, 6]) sigma_square_data = tf.broadcast_to(sigma_square_data, [2, 5, 6]) strike_data = tf.broadcast_to(strike_data, [2, 5, 6]) # Interpolate total variance on the grid [times, strikes] interpolator = interpolation_2d.Interpolation2D(times_data, strike_data, total_variance, dtype=dtype) interpolated_values = interpolator.interpolate(times, strikes) with self.subTest("CorrectDtype"): self.assertEqual(interpolated_values.dtype.as_numpy_dtype, dtype) with self.subTest("CorrectShape"): self.assertAllClose(interpolated_values.shape.as_list(), [2, 3]) expected_vols = np.array([[0.38239871, 0.71669375, 1.125], [0.40785739, 0.85479298, 1.00384615]]) with self.subTest("CorrectResult"): self.assertAllClose(interpolated_values, expected_vols, rtol=1e-04, atol=1e-04)
def _binary_focal_loss_with_logits(labels, logits, gamma, pos_weight=None): """ Compute focal loss from logits. Adapted from github.com/artemmavrin/focal-loss for TF 1.14. """ import tensorflow.compat.v1 as tf # Compute probabilities for the positive class p = tf.math.sigmoid(logits) # The labels and logits tensors' shapes need to be the same for the # built-in cross-entropy functions. Since we want to allow broadcasting, # we do some checks on the shapes and possibly broadcast explicitly # Note: tensor.shape returns a tf.TensorShape, whereas tf.shape(tensor) # returns an int tf.Tensor; this is why both are used below labels_shape = labels.shape logits_shape = logits.shape if not labels_shape.is_fully_defined() or labels_shape != logits_shape: labels_shape = tf.shape(labels) logits_shape = tf.shape(logits) shape = tf.broadcast_dynamic_shape(labels_shape, logits_shape) labels = tf.broadcast_to(labels, shape) logits = tf.broadcast_to(logits, shape) if pos_weight is None: loss_func = tf.nn.sigmoid_cross_entropy_with_logits else: from functools import partial loss_func = partial(tf.nn.weighted_cross_entropy_with_logits, pos_weight=pos_weight) loss = loss_func(labels=labels, logits=logits) if abs(gamma) < 0.00001: # no modulation (the loss returns NaN with ** 0) return loss else: modulation_pos = (1 - p) ** gamma modulation_neg = p ** gamma mask = tf.dtypes.cast(labels, dtype=tf.bool) modulation = tf.where(mask, modulation_pos, modulation_neg) return modulation * loss
def tensor_ll_rho(): vF = a * tf.log(b/sc) - (x+a) * tf.log(1+b/sc) - \ log_beta(a, x) - tf.log(x+a) t = q + tf.concat([vF, tf.broadcast_to(vL_in, [1, K, G, C])], axis=0) eta = tf.reduce_logsumexp(t, axis=(0), keepdims=True) ga = tf.reduce_sum(eta, axis=(2), keepdims=True) tt = pi + ga ll = tf.reduce_sum(tf.reduce_logsumexp(tt, axis=1)) tt_m = tt - tf.reduce_max(tt, axis=1, keepdims=True) rho = tt_m - tf.reduce_logsumexp(tt_m, axis=1, keepdims=True) xi = rho + t - eta p_n = tf.exp(xi - tf.reduce_logsumexp(xi, axis=(0, 1), keepdims=True)) return ll, rho, p_n
def compute_eval_losses_and_metrics_for_weights(self, inputs, weights_dict): """Returns losses and metrics computed by evaluation.""" self._check_weights_dict(weights_dict) n_losses = len(self._losses_names) # TODO(josipd): Don't hard-code float32. weight_vector = tf.constant([weights_dict[loss_name] for loss_name in self._losses_names], dtype=tf.float32) extra_inputs = tf.broadcast_to(self._preprocess(weight_vector), (self._batch_size, n_losses)) losses, metrics = self._problem.losses_and_metrics( inputs, extra_inputs, training=False) return losses, metrics
def _mean_image_subtraction(image, means, num_channels): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') if len(means) != num_channels: raise ValueError('len(means) must match the number of channels') # We have a 1-D tensor of means; convert to 3-D. # means = tf.expand_dims(tf.expand_dims(means, 0), 0) means = tf.broadcast_to(means, tf.shape(image)) return image - means
def ensemble_cross_entropy(labels, logits, binary=False, aggregate=True): """Cross-entropy of an ensemble distribution. For each datapoint (x,y), the ensemble's negative log-probability is: ``` -log p(y|x) = -log sum_{m=1}^{ensemble_size} exp(log p(y|x,theta_m)) + log ensemble_size. ``` The cross entropy is the expected negative log-probability with respect to the true data distribution. Args: labels: tf.Tensor of shape [...]. logits: tf.Tensor of shape [ensemble_size, ..., num_classes]. binary: bool, whether it is a binary classification (sigmoid as activation). aggregate: bool, whether or not to average over the batch. Returns: tf.Tensor of shape [...]. """ logits = tf.convert_to_tensor(logits) ensemble_size = float(logits.shape[0]) if binary: ce = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.broadcast_to( labels[tf.newaxis, ...], tf.shape(logits)), logits=logits) else: labels = tf.cast(labels, tf.int32) ce = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.broadcast_to(labels[tf.newaxis, ...], tf.shape(logits)[:-1]), logits=logits) nll = -tf.reduce_logsumexp(-ce, axis=0) + tf.math.log(ensemble_size) if aggregate: nll = tf.reduce_mean(nll) return nll