Пример #1
0
 def __init__(self, units, n_gaze, n_step, **kwargs):
     super(GGNN, self).__init__(**kwargs)
     self.units = units
     self.n_gaze = n_gaze
     self.n_edge = (self.n_gaze + 1) * 2
     self.n_step = n_step
     self.gru_cell = GRUCell(units=self.units)
Пример #2
0
 def __call__(self, input):
     rnn_cell_1 = GRUCell(units=self.rnn_units_1, dropout=self.dropout, recurrent_dropout=self.recurrent_dropout, name=self.name + '_rnn_cell_1' if self.name else None)
     rnn_cell_2 = GRUCell(units=self.rnn_units_2, dropout=self.dropout, recurrent_dropout=self.recurrent_dropout, name=self.name + '_rnn_cell_2' if self.name else None)
     # gru_cell_3 = GRUCell(units= rnn_units_3, dropout= rnn_dropout, recurrent_dropout= rnn_recurrent_dropout, reset_after= False)
     rnn_stack_cell = StackedRNNCells(cells=[rnn_cell_1, rnn_cell_2], name=self.name + '_stacked_rnn_cell' if self.name else None)
     rnn = RNN(cell=rnn_stack_cell, return_state=self.return_state, return_sequences=self.return_sequence, unroll=self.unroll, name=self.name)(input)
     return rnn
    def get_model(self):
        # Input text
        encoder_inputs = Input(shape=(None, ))
        # Input summary
        decoder_inputs = Input(shape=(None, ))

        # word embedding layer for text
        encoder_inputs_emb = Embedding(input_dim=self.num_encoder_tokens + 1,
                                       output_dim=self.embedding_dim,
                                       mask_zero=True)(encoder_inputs)
        # word embedding layer for summary
        decoder_inputs_emb = Embedding(input_dim=self.num_decoder_tokens + 1,
                                       output_dim=self.embedding_dim,
                                       mask_zero=True)(decoder_inputs)

        # Bidirectional LSTM encoder
        encoder_out = Bidirectional(LSTM(self.hidden_dim // 2,
                                         return_sequences=True,
                                         return_state=True),
                                    merge_mode='concat')(encoder_inputs_emb)

        encoder_o = encoder_out[0]
        initial_h_lstm = concatenate([encoder_out[1], encoder_out[2]])
        initial_c_lstm = concatenate([encoder_out[3], encoder_out[4]])
        initial_decoder_state = Dense(self.hidden_dim, activation='tanh')(
            concatenate([initial_h_lstm, initial_c_lstm]))

        # LSTM decoder + attention
        initial_attention_h = Lambda(lambda x: K.zeros_like(x)[:, 0, :])(
            encoder_o)
        initial_state = [initial_decoder_state, initial_attention_h]

        cell = DenseAnnotationAttention(cell=GRUCell(self.hidden_dim),
                                        units=self.hidden_dim,
                                        input_mode="concatenate",
                                        output_mode="cell_output")

        # TODO output_mode="concatenate", see TODO(3)/A
        decoder_o, decoder_h, decoder_c = RNN(cell=cell,
                                              return_sequences=True,
                                              return_state=True)(
                                                  decoder_inputs_emb,
                                                  initial_state=initial_state,
                                                  constants=encoder_o)
        decoder_o = Dense(self.hidden_dim * 2)(concatenate(
            [decoder_o, decoder_inputs_emb]))
        y_pred = TimeDistributed(
            Dense(self.num_decoder_tokens + 1,
                  activation='softmax'))(decoder_o)

        model = Model([encoder_inputs, decoder_inputs], y_pred)
        return model
Пример #4
0
def select_cell(cell_type, hidden_dim, l1=0.0, l2=0.0):
    """Select an RNN cell and initialises it with hidden_dim units."""
    if cell_type == 'vanilla':
        return SimpleRNNCell(units=hidden_dim,
                             kernel_regularizer=l1_l2(l1=l1, l2=l2),
                             recurrent_regularizer=l1_l2(l1=l1, l2=l2))
    elif cell_type == 'gru':
        return GRUCell(units=hidden_dim,
                       kernel_regularizer=l1_l2(l1=l1, l2=l2),
                       recurrent_regularizer=l1_l2(l1=l1, l2=l2))
    elif cell_type == 'lstm':
        return LSTMCell(units=hidden_dim,
                        kernel_regularizer=l1_l2(l1=l1, l2=l2),
                        recurrent_regularizer=l1_l2(l1=l1, l2=l2))
    else:
        raise ValueError(
            'Unknown cell type. Please select one of: vanilla, gru, or lstm.')
Пример #5
0
 def call(self, inputs):
     """ Inputs should be [message, previous_state], returns [next_state]
     """
     return GRUCell.call(self, inputs[0], [inputs[1]])[0]
Пример #6
0
 def build(self, input_shape):
     GRUCell.build(self, input_shape[0])
Пример #7
0
class GGNN(Layer):
    """
    Implementation of Adapted GGNN introduced in Ding et.al.
    "A Neural Multi-digraph Model for Chinese NER with Gazetteers"
    """
    def __init__(self, units, n_gaze, n_step, **kwargs):
        super(GGNN, self).__init__(**kwargs)
        self.units = units
        self.n_gaze = n_gaze
        self.n_edge = (self.n_gaze + 1) * 2
        self.n_step = n_step
        self.gru_cell = GRUCell(units=self.units)

    def build(self, input_shape):
        embed_dim = input_shape[0][-1]
        assert embed_dim == self.units
        self.alpha = self.add_weight(name=self.name +
                                     'contribution_coefficient',
                                     shape=(self.n_edge, ),
                                     initializer='ones')
        self.w = self.add_weight(name=self.name + '_w',
                                 shape=(self.n_edge, embed_dim, self.units),
                                 initializer=RandomNormal(0., 0.02))
        self.b = self.add_weight(name=self.name + '_b',
                                 shape=(self.n_edge, self.units),
                                 initializer='zeros')
        self.gru_cell.build([None, self.units * self.n_edge])
        super(GGNN, self).build(input_shape)

    def call(self, inputs, **kwargs):
        # init_state: [batch_size, n_node, embed_dim]
        # adj_matrix: [batch_size, n_edge, n_node, n_node]
        init_state, adj_matrix = inputs
        n_node = K.shape(init_state)[1]

        expand_alpha = K.expand_dims(K.expand_dims(self.alpha, axis=-1),
                                     axis=-1)
        weighted_adj_matrix = adj_matrix * K.sigmoid(expand_alpha)

        cur_state = K.identity(init_state)
        for _ in range(self.n_step):
            h = K.dot(cur_state,
                      self.w) + self.b  # [batch_size, n_node, n_edge, units]
            neigh_state = []
            for edge_idx in range(self.n_edge):
                neigh_state.append(
                    K.batch_dot(weighted_adj_matrix[:, edge_idx, :, :],
                                h[:, :, edge_idx, :],
                                axes=(2, 1)))  # [batch_size, n_node, units]
            neigh_state = K.concatenate(
                neigh_state, axis=-1)  # [batch_size, n_node, units*n_edge]

            gru_inputs = K.reshape(neigh_state, (-1, self.units * self.n_edge))
            gru_states = K.reshape(cur_state, (-1, self.units))
            # should look up into GRUCell's implementation
            gru_output, _ = self.gru_cell.call(inputs=gru_inputs,
                                               states=[gru_states])
            cur_state = K.reshape(gru_output, (-1, n_node, self.units))
        return cur_state

    def compute_output_shape(self, input_shape):
        return input_shape[0][0], input_shape[0][1], self.units

    @property
    def trainable_weights(self):
        return self._trainable_weights + self.gru_cell.trainable_weights
Пример #8
0
    def __init__(self,
                 units,
                 activation='tanh',
                 recurrent_activation='hard_sigmoid',
                 use_bias=True,
                 kernel_initializer='glorot_uniform',
                 recurrent_initializer='orthogonal',
                 bias_initializer='zeros',
                 kernel_regularizer=None,
                 recurrent_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 recurrent_constraint=None,
                 bias_constraint=None,
                 dropout=0.,
                 recurrent_dropout=0.,
                 implementation=1,
                 return_sequences=False,
                 return_state=False,
                 go_backwards=False,
                 stateful=False,
                 unroll=False,
                 reset_after=False,
                 **kwargs):
        if implementation == 0:
            warnings.warn('`implementation=0` has been deprecated, '
                          'and now defaults to `implementation=1`.'
                          'Please update your layer call.')
        if K.backend() == 'theano' and (dropout or recurrent_dropout):
            warnings.warn(
                'RNN dropout is no longer supported with the Theano backend '
                'due to technical limitations. '
                'You can either set `dropout` and `recurrent_dropout` to 0, '
                'or use the TensorFlow backend.')
            dropout = 0.
            recurrent_dropout = 0.

        cell = GRUCell(units,
                       activation=activation,
                       recurrent_activation=recurrent_activation,
                       use_bias=use_bias,
                       kernel_initializer=kernel_initializer,
                       recurrent_initializer=recurrent_initializer,
                       bias_initializer=bias_initializer,
                       kernel_regularizer=kernel_regularizer,
                       recurrent_regularizer=recurrent_regularizer,
                       bias_regularizer=bias_regularizer,
                       kernel_constraint=kernel_constraint,
                       recurrent_constraint=recurrent_constraint,
                       bias_constraint=bias_constraint,
                       dropout=dropout,
                       recurrent_dropout=recurrent_dropout,
                       implementation=implementation,
                       reset_after=reset_after)
        super(AttGRU, self).__init__(cell,
                                     return_sequences=return_sequences,
                                     return_state=return_state,
                                     go_backwards=go_backwards,
                                     stateful=stateful,
                                     unroll=unroll,
                                     **kwargs)
        self.activity_regularizer = regularizers.get(activity_regularizer)
                      mask_zero=True)(x)
    y_emb = Embedding(target_max_word_idx + 1, EMBEDDING_SIZE,
                      mask_zero=True)(y)

    encoder_rnn = Bidirectional(
        GRU(RECURRENT_UNITS, return_sequences=True, return_state=True))
    x_enc, h_enc_fwd_final, h_enc_bkw_final = encoder_rnn(x_emb)

    # the final state of the backward-GRU (closest to the start of the input
    # sentence) is used to initialize the state of the decoder
    initial_state_gru = Dense(RECURRENT_UNITS,
                              activation='tanh')(h_enc_bkw_final)
    initial_attention_h = Lambda(lambda x: K.zeros_like(x)[:, 0, :])(x_enc)
    initial_state = [initial_state_gru, initial_attention_h]

    cell = DenseAnnotationAttention(cell=GRUCell(RECURRENT_UNITS),
                                    units=DENSE_ATTENTION_UNITS,
                                    input_mode="concatenate",
                                    output_mode="cell_output")
    # TODO output_mode="concatenate", see TODO(3)/A
    decoder_rnn = RNN(cell=cell, return_sequences=True, return_state=True)
    h1_and_state = decoder_rnn(y_emb,
                               initial_state=initial_state,
                               constants=x_enc)
    h1 = h1_and_state[0]

    def dense_maxout(x_):
        """Implements a dense maxout layer where max is taken
        over _two_ units"""
        x_ = Dense(READOUT_HIDDEN_UNITS * 2)(x_)
        x_1 = x_[:, :READOUT_HIDDEN_UNITS]