def testQueryInModule(self): module = snt.Linear(output_size=42, name="linear") with self.assertRaisesRegexp(snt.Error, "not instantiated yet"): module.get_variables() # Compare to the desired result set, after connection. input_ = tf.placeholder(tf.float32, shape=[3, 4]) _ = module(input_) self.assertEqual(set(module.get_variables()), {module.w, module.b}) self.assertEqual(set(snt.get_variables_in_module(module)), {module.w, module.b})
def encoder_fn(net): """Encoder for VAE.""" net = snt.nets.ConvNet2D(enc_units, kernel_shapes=[(3, 3)], strides=[2, 2], paddings=[snt.SAME], activation=activation_fn, activate_final=True)(net) flat_dims = int(np.prod(net.shape.as_list()[1:])) net = tf.reshape(net, [-1, flat_dims]) net = snt.Linear(2 * n_z)(net) return generative_utils.LogStddevNormal(net)
def __init__(self, dim, condx_dim, condz_dim, num_layers, name='HyperGen'): super(HyperGen, self).__init__(name=name) self.dim = dim self.condx_dim = condx_dim self.condz_dim = condz_dim with self._enter_variable_scope(): self.fc = snt.Linear(condz_dim) self.norm_flow = NormFlow(self.dim, num_layers, 'planar') self.hnet = HyperNet(2 * condz_dim, 256, self.norm_flow.num_params, depth=2)
def _build(self, x, presence=None): batch_size = int(x.shape[0]) h = snt.BatchApply(snt.Linear(self._n_dims))(x) args = [self._n_heads, self._layer_norm, self._dropout_rate] klass = SelfAttention if self._n_inducing_points > 0: args = [self._n_inducing_points] + args klass = InducedSelfAttention for _ in range(self._n_layers): h = klass(*args)(h, presence) z = snt.BatchApply(snt.Linear(self._n_output_dims))(h) inducing_points = tf.get_variable( 'inducing_points', shape=[1, self._n_outputs, self._n_output_dims]) inducing_points = snt.TileByDim([0], [batch_size])(inducing_points) return MultiHeadQKVAttention(self._n_heads)(inducing_points, z, z, presence)
def _fn(batch): """Make the loss from the given batch.""" net = batch["image"] net = snt.nets.ConvNet2D(enc_units, kernel_shapes=[(3, 3)], strides=[2, 2], paddings=[snt.SAME], activation=activation_fn, activate_final=True)(batch["image"]) flat_dims = int(np.prod(net.shape.as_list()[1:])) net = tf.reshape(net, [-1, flat_dims]) net = snt.Linear(num_latents)(net) if batch["image"].shape.as_list()[1] == 28: net = snt.Linear(7 * 7 * 32)(net) net = tf.reshape(net, [-1, 7, 7, 32]) shapes = [(14, 14), (28, 28)] elif batch["image"].shape.as_list()[1] == 32: net = snt.Linear(8 * 8 * 32)(net) net = tf.reshape(net, [-1, 8, 8, 32]) shapes = [(16, 16), (32, 32)] else: raise ValueError("Only 28x28, or 32x32 supported") net = snt.nets.ConvNet2DTranspose(dec_units, shapes, kernel_shapes=[(3, 3)], strides=[2, 2], paddings=[snt.SAME], activation=activation_fn, activate_final=True)(net) outchannel = batch["image"].shape.as_list()[3] net = snt.Conv2D(outchannel, kernel_shape=(1, 1))(net) loss_vec = tf.reduce_mean( tf.square(batch["image"] - tf.nn.sigmoid(net)), [1, 2, 3]) return tf.reduce_mean(loss_vec)
def Linear(name, output_size): initializers = { "w": tf.truncated_normal_initializer(stddev=0.1), "b": tf.constant_initializer(value=0.1) } regularizers = { "w": tf.contrib.layers.l2_regularizer(scale=0.1), "b": tf.contrib.layers.l2_regularizer(scale=0.1) } return snt.Linear(output_size, initializers=initializers, regularizers=regularizers, name=name)
def fuse_modality(vision_vectorized, text_embedded, config): if config["method"] == "concat": fusing_resblock_lstm = tf.concat((text_embedded, vision_vectorized), axis=1) elif config["method"] == "hadamard": proj = snt.Linear(output_size=vision_vectorized.get_shape()[1])(text_embedded) fusing_resblock_lstm = tf.multiply(proj, vision_vectorized) # hadamard product else: assert False, "Fusing can be 'concat' or 'hadamard' not '{}'".format(config["fusing_method"]) return fusing_resblock_lstm
def _build(self, inputs): with tf.variable_scope('', custom_getter=_sn_custom_getter()): net = snt.nets.ConvNet2D( output_channels=[64, 64, 128, 128, 256, 256, 512], kernel_shapes=[(3, 3), (4, 4), (3, 3), (4, 4), (3, 3), (4, 4), (3, 3)], strides=[1, 2, 1, 2, 1, 2, 1], paddings=[snt.SAME], activate_final=True, activation=functools.partial(tf.nn.leaky_relu, alpha=0.1)) linear = snt.Linear(self._num_outputs) output = linear(snt.BatchFlatten()(net(inputs))) return output
def _build(self, inputs): if self.typ == "mlp_transform": # Transforms the outputs into the appropriate shape. net = snt.nets.MLP([self.n_neurons] * self.n_layers, activate_final=self.activation_final) seq = snt.Sequential( [net, snt.LayerNorm(), snt.Linear(self.output_size)])(inputs) elif self.typ == "mlp_layer_norm": net = snt.nets.MLP([self.n_neurons] * self.n_layers, activate_final=self.activation_final) seq = snt.Sequential([net, snt.LayerNorm()])(inputs) return seq
def encoder(self, inputs): with tf.variable_scope("encoder"): after_dropout = tf.nn.dropout(inputs, rate=self.dropout_rate) regularizer = tf.contrib.layers.l2_regularizer(self._l2_penalty_weight) initializer = tf.initializers.glorot_uniform(dtype=self._float_dtype) encoder_module = snt.Linear( self._num_latents, use_bias=False, regularizers={"w": regularizer}, initializers={"w": initializer}, ) outputs = snt.BatchApply(encoder_module)(after_dropout) return outputs
def _fn(batch): """Build the loss.""" net = snt.nets.ConvNet2D( hidden_units, kernel_shapes=[(3, 3)], strides=[2] + [1] * (len(hidden_units) - 1), paddings=[snt.SAME], activation=activation_fn, initializers=initializers, activate_final=True)( batch["image"]) lastdims = int(np.prod(net.shape.as_list()[1:])) net = tf.reshape(net, [-1, lastdims]) for s in hidden_layers: net = activation_fn(snt.Linear(s, initializers=initializers)(net)) num_classes = batch["label_onehot"].shape.as_list()[1] logits = snt.Linear(num_classes, initializers=initializers)(net) loss_vec = tf.nn.softmax_cross_entropy_with_logits_v2( labels=batch["label_onehot"], logits=logits) return tf.reduce_mean(loss_vec)
def _build(self, inputs): """Compute output Tensor from input Tensor.""" layer_0 = RNN(hidden_size=64, name="policy_rnn") layer_1 = snt.Linear(self._hidden_size, name="layer_1") layer_2 = snt.Linear(self._hidden_size, name="layer_2") layer_3 = snt.Linear(self._output_size, name="layer_3") mlp = snt.Sequential([ layer_0, layer_1, tf.nn.relu, layer_2, tf.nn.relu, layer_3, tf.nn.tanh ]) mu = mlp(inputs) # dist = tf.contrib.distributions.Normal(loc=mu, scale = tf.ones_like(mu) * self._co_var) dist = tf.contrib.distributions.MultivariateNormalDiag( loc=mu, scale_diag=tf.ones_like(mu) * self._co_var) # mu : MB x ACTION_DIM # dist: MB x ACTION_DIM return mu, dist
def __init__(self, latent_dimension, encoder_net, decoder_net, hvar_shape, prior_fn=STD_GAUSSIAN_FN, posterior_fn=SOFTPLUS_GAUSSIAN_FN, temperature=1.0, name='vae'): """prior should be a callable taking an integer for latent dimensionality and return a TF Distribution""" super(HVAE, self).__init__(name=name) self._encoder = encoder_net self._decoder = decoder_net self._latent_posterior_fn = posterior_fn self._temperature = temperature with self._enter_variable_scope(): self._loc = snt.Linear(latent_dimension) self._scale = snt.Linear(latent_dimension) self._hvar = snt.Linear(hvar_shape) # Consider using a parameterized GMM prior learned with backprop self.latent_prior = prior_fn(latent_dimension)
def testEndToEnd(self, predictor_cls, attack_cls, optimizer_cls, epsilon, restarted=False): # l-\infty norm of perturbation ball. if isinstance(epsilon, list): # We test the ability to have different epsilons across dimensions. epsilon = tf.constant([epsilon], dtype=tf.float32) bounds = (-.5, 2.5) # Create a simple network. m = snt.Linear(1, initializers={ 'w': tf.constant_initializer(1.), 'b': tf.constant_initializer(1.), }) z = tf.constant([[1, 2]], dtype=tf.float32) predictor = predictor_cls(m, self) # Not important for the test but needed. labels = tf.constant([1], dtype=tf.int64) # We create two attacks to maximize and then minimize the output. max_spec = ibp.LinearSpecification(tf.constant([[[1.]]])) max_attack = attack_cls(predictor, max_spec, epsilon, input_bounds=bounds, optimizer_builder=optimizer_cls) if restarted: max_attack = ibp.RestartedAttack(max_attack, num_restarts=10) z_max = max_attack(z, labels) min_spec = ibp.LinearSpecification(tf.constant([[[-1.]]])) min_attack = attack_cls(predictor, min_spec, epsilon, input_bounds=bounds, optimizer_builder=optimizer_cls) if restarted: min_attack = ibp.RestartedAttack(min_attack, num_restarts=10) z_min = min_attack(z, labels) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) z_max_values, z_min_values = sess.run([z_max, z_min]) z_max_values = z_max_values[0] z_min_values = z_min_values[0] self.assertAlmostEqual(2., z_max_values[0]) self.assertAlmostEqual(2.5, z_max_values[1]) self.assertAlmostEqual(0., z_min_values[0]) self.assertAlmostEqual(1., z_min_values[1])
def _torso(self, input_): last_action, env_output = input_ reward, _, _, (frame, instruction) = env_output # Convert to floats. frame = tf.to_float(frame) frame /= 255 with tf.variable_scope('convnet'): conv_out = frame for i, (num_ch, num_blocks) in enumerate([(16, 2), (32, 2), (32, 2)]): # Downscale. conv_out = snt.Conv2D(num_ch, 3, stride=1, padding='SAME')(conv_out) conv_out = tf.nn.pool(conv_out, window_shape=[3, 3], pooling_type='MAX', padding='SAME', strides=[2, 2]) # Residual block(s). for j in range(num_blocks): with tf.variable_scope('residual_%d_%d' % (i, j)): block_input = conv_out conv_out = tf.nn.relu(conv_out) conv_out = snt.Conv2D(num_ch, 3, stride=1, padding='SAME')(conv_out) conv_out = tf.nn.relu(conv_out) conv_out = snt.Conv2D(num_ch, 3, stride=1, padding='SAME')(conv_out) conv_out += block_input conv_out = tf.nn.relu(conv_out) conv_out = snt.BatchFlatten()(conv_out) conv_out = snt.Linear(256)(conv_out) conv_out = tf.nn.relu(conv_out) instruction_out = self._instruction(instruction) # Append clipped last reward and one hot last action. clipped_reward = tf.expand_dims(tf.clip_by_value(reward, -1, 1), -1) one_hot_last_action = tf.one_hot(last_action, self._num_actions) return tf.concat( [conv_out, clipped_reward, one_hot_last_action, instruction_out], axis=1)
def __init__(self, edge_output_size=None, node_output_size=None, global_output_size=None, network="GraphIndependent", name="EncodeProcessDecode"): super(EncodeProcessDecode, self).__init__(name=name) if network == "GraphIndependent": self._encoder = MLPGraphIndependent() self._core = MLPGraphNetwork() self._decoder = MLPGraphIndependent() elif network == "RelationNetwork": self._encoder = MLPRelationNetwork() self._core = MLPGraphNetwork() self._decoder = MLPRelationNetwork() # Transforms the outputs into the appropriate shapes. if edge_output_size is None: edge_fn = None else: edge_fn = lambda: snt.Linear(edge_output_size, name="edge_output") if node_output_size is None: node_fn = None else: node_fn = lambda: snt.Linear(node_output_size, name="node_output") if global_output_size is None: global_fn = None else: global_fn = lambda: snt.Linear(global_output_size, name="global_output") with self._enter_variable_scope(): if network == "GraphIndependent": self._output_transform = modules.GraphIndependent( edge_fn, node_fn, global_fn) elif network == "RelationNetwork": self._output_transform = modules.RelationNetwork( edge_fn, global_fn)
def __init__(self, edge_output_size=None, node_output_size=None, global_output_size=None, latent_size=16, num_layers=2, separate_edge_output=False, edge_output_layer_norm=False, skip_encoder_decoder=False, name="MyEncodeProcessDecode"): super(MyEncodeProcessDecode, self).__init__(name=name) self._encoder = MLPGraphIndependent(latent_size, num_layers) self._core = MLPGraphNetwork(latent_size, num_layers) self._edge_type_concat = separate_edge_output self._decoder = MLPGraphIndependent( latent_size, num_layers, output_independent=True, separate_edge_output=separate_edge_output, edge_output_layer_norm=edge_output_layer_norm) self._skip_encoder_decoder = skip_encoder_decoder # Transforms the outputs into the appropriate shapes. if edge_output_size is None: edge_fn = None else: edge_fn = lambda: snt.Linear(edge_output_size, name="edge_output") if node_output_size is None: node_fn = None else: node_fn = lambda: snt.Linear(node_output_size, name="node_output") if global_output_size is None: global_fn = None else: global_fn = lambda: snt.Linear(global_output_size, name="global_output") with self._enter_variable_scope(): self._output_transform = modules.GraphIndependent( edge_fn, node_fn, global_fn)
def _build(self, padded_word_embeddings, length): x = padded_word_embeddings for layer in self._config['conv_architecture']: if isinstance(layer, tuple) or isinstance(layer, list): filters, kernel_size, pooling_size = layer conv = snt.Conv1D( output_channels=filters, kernel_shape=kernel_size) x = conv(x) if pooling_size and pooling_size > 1: x = _max_pool_1d(x, pooling_size) elif layer == 'relu': x = tf.nn.relu(x) if self._keep_prob < 1: x = tf.nn.dropout(x, keep_prob=self._keep_prob) else: raise RuntimeError('Bad layer type {} in conv'.format(layer)) # Final layer pools over the remaining sequence length to get a # fixed sized vector. if self._pooling == 'max': x = tf.reduce_max(x, axis=1) elif self._pooling == 'average': x = tf.reduce_sum(x, axis=1) lengths = tf.expand_dims(tf.cast(length, tf.float32), axis=1) x = x / lengths if self._config['conv_fc1']: fc1_layer = snt.Linear(output_size=self._config['conv_fc1']) x = tf.nn.relu(fc1_layer(x)) if self._keep_prob < 1: x = tf.nn.dropout(x, keep_prob=self._keep_prob) if self._config['conv_fc2']: fc2_layer = snt.Linear(output_size=self._config['conv_fc2']) x = tf.nn.relu(fc2_layer(x)) if self._keep_prob < 1: x = tf.nn.dropout(x, keep_prob=self._keep_prob) return x
def _build_layers_v2(self, input_dict, num_outputs, options): """Define the layers of a custom model. Arguments: input_dict (dict): Dictionary of input tensors, including "obs", "prev_action", "prev_reward". num_outputs (int): Output tensor must be of size [BATCH_SIZE, num_outputs]. options (dict): Model options. Returns: (outputs, feature_layer): Tensors of size [BATCH_SIZE, num_outputs] and [BATCH_SIZE, desired_feature_size]. """ config = options["custom_options"] initializers_conv = get_init_conv() initializers_mlp = get_init_mlp() state = input_dict['obs']["image"] # Features Extractor to_next_conv = state for layer in range(config["n_layers"]): conv_layer = snt.Conv2D(output_channels= config["n_channels"][layer], kernel_shape=config["kernel"][layer], stride=config["stride"][layer], initializers=initializers_conv)(to_next_conv) to_next_conv = tf.nn.relu(conv_layer) # Flatten input then mlp flatten_vision = snt.BatchFlatten(preserve_dims=1)(to_next_conv) out_mlp1 = tf.nn.relu(snt.Linear(config["last_layer_hidden"],initializers=initializers_mlp)(flatten_vision)) out_mlp2 = snt.Linear(num_outputs,initializers=initializers_mlp)(out_mlp1) return out_mlp2, out_mlp1 # you need to return output (out_mlp2) and input of the last layer (out_mlp1)
def __init__(self, num_dimensions: int, num_components: int, multivariate: bool, name: str = 'GaussianMixture'): """Initialization. Args: num_dimensions: dimensionality of the output distribution num_components: number of mixture components. multivariate: whether the resulting distribution is multivariate or not. name: name of the module passed to snt.Module parent class. """ super().__init__(name=name) self._num_dimensions = num_dimensions self._num_components = num_components self._multivariate = multivariate initializer = tf.initializers.VarianceScaling(distribution='uniform', mode='fan_out', scale=0.333) # Create a layer that outputs the unnormalized log-weights. if self._multivariate: logits_size = self._num_components else: logits_size = self._num_dimensions * self._num_components self._logit_layer = snt.Linear(logits_size, w_init=initializer) # Create two layers that outputs a location and a scale, respectively, for # each dimension and each component. self._loc_layer = snt.Linear(self._num_dimensions * self._num_components, w_init=initializer) self._scale_layer = snt.Linear(self._num_dimensions * self._num_components, w_init=initializer)
def __init__( self, num_dimensions: int, init_scale: float = 0.3, min_scale: float = 1e-6, tanh_mean: bool = False, fixed_scale: bool = False, use_tfd_independent: bool = False, w_init: snt_init.Initializer = tf.initializers.VarianceScaling(1e-4), b_init: snt_init.Initializer = tf.initializers.Zeros()): """Initialization. Args: num_dimensions: Number of dimensions of MVN distribution. init_scale: Initial standard deviation. min_scale: Minimum standard deviation. tanh_mean: Whether to transform the mean (via tanh) before passing it to the distribution. fixed_scale: Whether to use a fixed variance. use_tfd_independent: Whether to use tfd.Independent or tfd.MultivariateNormalDiag class w_init: Initialization for linear layer weights. b_init: Initialization for linear layer biases. """ super().__init__(name='MultivariateNormalDiagHead') self._init_scale = init_scale self._min_scale = min_scale self._tanh_mean = tanh_mean self._mean_layer = snt.Linear(num_dimensions, w_init=w_init, b_init=b_init) self._fixed_scale = fixed_scale if not fixed_scale: self._scale_layer = snt.Linear(num_dimensions, w_init=w_init, b_init=b_init) self._use_tfd_independent = use_tfd_independent
def test_recurrent(self): environment = _make_fake_env() env_spec = specs.make_environment_spec(environment) network = snt.DeepRNN([ snt.Flatten(), snt.Linear(env_spec.actions.num_values), lambda x: tf.argmax(x, axis=-1, output_type=env_spec.actions.dtype ), ]) actor = actors_tf2.RecurrentActor(network) loop = environment_loop.EnvironmentLoop(environment, actor) loop.run(20)
def test_feedforward(self): environment = _make_fake_env() env_spec = specs.make_environment_spec(environment) network = snt.Sequential([ snt.Flatten(), snt.Linear(env_spec.actions.num_values), lambda x: tf.argmax(x, axis=-1, output_type=env_spec.actions.dtype ), ]) actor = actors_tf2.FeedForwardActor(network) loop = environment_loop.EnvironmentLoop(environment, actor) loop.run(20)
def _build(self, x): # [channel, bs, 1] output = x for d in [0, 1]: stats = [] l1 = tf.reduce_mean(tf.abs(x), axis=d, keepdims=True) l2 = tf.sqrt(tf.reduce_mean(x**2, axis=d, keepdims=True) + 1e-6) mean, var = tf.nn.moments(x, [d], keepdims=True) stats.extend([l1, l2, mean, tf.sqrt(var + 1e-8)]) to_add = tf.concat(stats, axis=2) # [channels/1, units/1, stats] output += snt.BatchApply(snt.Linear(x.shape.as_list()[2]))(to_add) return output
def make_value_func_bsuite( environment_spec: EnvironmentSpec, value_layer_sizes: str = '50,50', adversarial_layer_sizes: str = '50,50', ) -> Tuple[snt.Module, snt.Module]: action_network = functools.partial( tf.one_hot, depth=environment_spec.actions.num_values) layer_sizes = list(map(int, value_layer_sizes.split(','))) value_function = snt.Sequential([ networks.CriticMultiplexer(action_network=action_network), snt.nets.MLP(layer_sizes, activate_final=True), snt.Linear(1) ]) layer_sizes = list(map(int, adversarial_layer_sizes.split(','))) advsarial_function = snt.Sequential([ networks.CriticMultiplexer(action_network=action_network), snt.nets.MLP(layer_sizes, activate_final=True), snt.Linear(1) ]) return value_function, advsarial_function
def _build(self, x): batch_size = x.shape[0] img_embedding = self._encoder(x) splits = [self._n_caps_dims, self._n_features, 1] # 1 for presence n_dims = sum(splits) if self._encoder_type == 'linear': n_outputs = self._n_caps * n_dims h = snt.BatchFlatten()(img_embedding) h = snt.Linear(n_outputs)(h) else: h = snt.AddBias(bias_dims=[1, 2, 3])(img_embedding) if self._encoder_type == 'conv': h = snt.Conv2D(n_dims * self._n_caps, 1, 1)(h) h = tf.reduce_mean(h, (1, 2)) h = tf.reshape(h, [batch_size, self._n_caps, n_dims]) elif self._encoder_type == 'conv_att': h = snt.Conv2D(n_dims * self._n_caps + self._n_caps, 1, 1)(h) h = snt.MergeDims(1, 2)(h) h, a = tf.split(h, [n_dims * self._n_caps, self._n_caps], -1) h = tf.reshape(h, [batch_size, -1, n_dims, self._n_caps]) a = tf.nn.softmax(a, 1) a = tf.reshape(a, [batch_size, -1, 1, self._n_caps]) h = tf.reduce_sum(h * a, 1) else: raise ValueError('Invalid encoder type="{}".'.format( self._encoder_type)) h = tf.reshape(h, [batch_size, self._n_caps, n_dims]) pose, feature, pres_logit = tf.split(h, splits, -1) if self._n_features == 0: feature = None pres_logit = tf.squeeze(pres_logit, -1) if self._noise_scale > 0.: pres_logit += ((tf.random.uniform(pres_logit.shape) - .5) * self._noise_scale) pres = tf.nn.sigmoid(pres_logit) pose = math_ops.geometric_transform(pose, self._similarity_transform) return self.OutputTuple(pose, feature, pres, pres_logit, img_embedding)
def _build(self, inputs, is_training=True): if EncodeProcessDecode_v1.convnet_tanh: activation = tf.nn.tanh else: activation = tf.nn.relu # input shape is (batch_size, feature_length) but CNN operates on depth channels --> (batch_size, feature_length, 1) inputs = tf.expand_dims(inputs, axis=2) ''' layer 1''' outputs = snt.Conv1D(output_channels=12, kernel_shape=10, stride=2)(inputs) outputs = snt.BatchNorm()(outputs, is_training=is_training) if EncodeProcessDecode_v1.convnet_pooling: outputs = tf.layers.max_pooling1d(outputs, 2, 2) outputs = activation(outputs) #print(outputs.get_shape()) ''' layer 2''' outputs = snt.Conv1D(output_channels=12, kernel_shape=10, stride=2)(outputs) outputs = snt.BatchNorm()(outputs, is_training=is_training) if EncodeProcessDecode_v1.convnet_pooling: outputs = tf.layers.max_pooling1d(outputs, 2, 2) outputs = activation(outputs) #print(outputs.get_shape()) ''' layer 3''' outputs = snt.Conv1D(output_channels=12, kernel_shape=10, stride=2)(outputs) outputs = snt.BatchNorm()(outputs, is_training=is_training) if EncodeProcessDecode_v1.convnet_pooling: outputs = tf.layers.max_pooling1d(outputs, 2, 2) outputs = activation(outputs) #print(outputs.get_shape()) ''' layer 4''' outputs = snt.Conv1D(output_channels=12, kernel_shape=10, stride=2)(outputs) outputs = snt.BatchNorm()(outputs, is_training=is_training) # todo: deal with train/test time if EncodeProcessDecode_v1.convnet_pooling: outputs = tf.layers.max_pooling1d(outputs, 2, 2) outputs = activation(outputs) #print(outputs.get_shape()) ''' layer 5''' outputs = snt.BatchFlatten()(outputs) #outputs = tf.nn.dropout(outputs, keep_prob=tf.constant(1.0)) # todo: deal with train/test time outputs = snt.Linear(output_size=EncodeProcessDecode_v1.dimensions_latent_repr)(outputs) #print(outputs.get_shape()) return outputs
def _build(self, queries, keys, values, presence=None): def transform(x, n=self._n_heads): n_dim = np.ceil(float(int(x.shape[-1])) / n) return snt.BatchApply(snt.Linear(int(n_dim)))(x) outputs = [] for _ in range(self._n_heads): args = [transform(i) for i in [queries, keys, values]] if presence is not None: args.append(presence) outputs.append(QKVAttention()(*args)) linear = snt.BatchApply(snt.Linear(values.shape[-1])) return linear(tf.concat(outputs, -1))
def __init__(self, filter_size=5, num_filters=32, pooling_stride=2, act='tanh', summ=None, name="mapper"): super(Mapper, self).__init__(name=name) self._pool = Downsample1D(pooling_stride) self._act = Activation(act, verbose=True) self._bf = snt.BatchFlatten() self._summ = summ initializers = { 'w': tf.truncated_normal_initializer(stddev=0.01), 'b': tf.zeros_initializer() } with self._enter_variable_scope(): self._lin1 = snt.Linear(64, initializers=initializers) self._lin2 = snt.Linear(1, initializers=initializers)
def __init__(self, num_hidden, output_size, nonlinearity=tf.sigmoid, name='mlp'): """Construct a `MLP`. Args: num_hidden: Number of hidden units in first FC layer. output_size: Size of the output layer on top of the MLP. nonlinearity: Activation function. name: Name of the module. """ super(MLP, self).__init__(name=name) self._num_hidden = num_hidden self._output_size = output_size self._nonlinearity = nonlinearity with self._enter_variable_scope(): self._l1 = snt.Linear(output_size=self._num_hidden, name='l1') self._l2 = snt.Linear(output_size=self._output_size, name='l2')