def __init__(self, hparams=None): EncoderBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) self.multihead_attention_list = [] self.poswise_networks = [] for i in range(self._hparams.num_blocks): with tf.variable_scope("layer_{}".format(i)): with tf.variable_scope('attention'): mh_attn = MultiheadAttentionEncoder( self._hparams.multihead_attention) self.multihead_attention_list.append(mh_attn) if self._hparams.dim != mh_attn.hparams.output_dim: raise ValueError( 'The "dim" in the hparams of ' '"multihead_attention" should be equal to the ' '"dim" of TransformerEncoder') pw_net = FeedForwardNetwork( hparams=self._hparams['poswise_feedforward']) final_dim = pw_net.hparams.layers[-1]['kwargs']['units'] if self._hparams.dim != final_dim: raise ValueError( 'The output dimenstion of ' '"poswise_feedforward" should be equal ' 'to the "dim" of TransformerEncoder.') self.poswise_networks.append(pw_net)
def __init__(self, vocab_size=None, output_layer=None, hparams=None): ModuleBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) # Make the output layer self._output_layer, self._vocab_size = _make_output_layer( output_layer, vocab_size, self._hparams.output_layer_bias, self.variable_scope) # Make attention and poswise networks self.multihead_attentions = {'self_att': [], 'encdec_att': []} self.poswise_networks = [] for i in range(self._hparams.num_blocks): layer_name = 'layer_{}'.format(i) with tf.variable_scope(layer_name): with tf.variable_scope("self_attention"): multihead_attention = MultiheadAttentionEncoder( self._hparams.multihead_attention) self.multihead_attentions['self_att'].append( multihead_attention) if self._hparams.dim != \ multihead_attention.hparams.output_dim: raise ValueError('The output dimenstion of ' 'MultiheadEncoder should be equal ' 'to the dim of TransformerDecoder') with tf.variable_scope('encdec_attention'): multihead_attention = MultiheadAttentionEncoder( self._hparams.multihead_attention) self.multihead_attentions['encdec_att'].append( multihead_attention) if self._hparams.dim != \ multihead_attention.hparams.output_dim: raise ValueError('The output dimenstion of ' 'MultiheadEncoder should be equal ' 'to the dim of TransformerDecoder') pw_net = FeedForwardNetwork( hparams=self._hparams['poswise_feedforward']) final_dim = pw_net.hparams.layers[-1]['kwargs']['units'] if self._hparams.dim != final_dim: raise ValueError( 'The output dimenstion of ' '"poswise_feedforward" should be equal ' 'to the "dim" of TransformerDecoder.') self.poswise_networks.append(pw_net) # Built in _build() self.context = None self.context_sequence_length = None self.embedding = None self._helper = None self._cache = None self.max_decoding_length = None
def __init__(self, vocab_size=None, output_layer=None, tau=None, hparams=None): EncoderBase.__init__(self, hparams) with tf.variable_scope(self.variable_scope): if self._hparams.initializer: tf.get_variable_scope().set_initializer( layers.get_initializer(self._hparams.initializer)) # Make the output layer self._output_layer, self._vocab_size = _make_output_layer( output_layer, vocab_size, self._hparams.output_layer_bias, self.variable_scope) # Make attention and poswise networks self.graph_multihead_attention_list = [] self.poswise_networks = [] for i in range(self._hparams.num_blocks): with tf.variable_scope("layer_{}".format(i)): with tf.variable_scope('attention'): mh_attn = GraphMultiheadAttentionEncoder( self._hparams.graph_multihead_attention) self.graph_multihead_attention_list.append(mh_attn) if self._hparams.dim != mh_attn.hparams.output_dim: raise ValueError( 'The "dim" in the hparams of ' '"multihead_attention" should be equal to the ' '"dim" of CrossGraphTransformerFixedLengthDecoder' ) pw_net = FeedForwardNetwork( hparams=self._hparams['poswise_feedforward']) final_dim = pw_net.hparams.layers[-1]['kwargs']['units'] if self._hparams.dim != final_dim: raise ValueError( 'The output dimenstion of ' '"poswise_feedforward" should be equal ' 'to the "dim" of CrossGraphTransformerFixedLengthDecoder.' ) self.poswise_networks.append(pw_net) self._helper = None self._tau = tau
def test_feedforward(self): """Tests feed-forward. """ hparams = { "layers": [{ "type": "Dense", }, { "type": "Dense", }] } nn = FeedForwardNetwork(hparams=hparams) self.assertEqual(len(nn.layers), len(hparams["layers"])) _ = nn(tf.ones([64, 16, 16])) self.assertEqual(len(nn.trainable_variables), len(hparams["layers"]) * 2) self.assertEqual(len(nn.layer_outputs), len(hparams["layers"]))