Exemplo n.º 1
0
    def _rnn_encoder(model):
        """

        :type model: modeling.BERTModel
        """
        with tf.variable_scope('rnn_encoder'):
            # Embed clinical observations
            embedded_observations = layers.embedding_layer(model.observations, model.vocabulary_size,
                                                           model.embedding_size,
                                                           model.vocab_dropout,
                                                           training=model.training)

            # Reshape to (batch * seq_len) x doc_len x embedding
            flattened_embedded_obs = tf.reshape(embedded_observations,
                                                [model.batch_size * model.max_seq_len,
                                                 model.max_snapshot_size,
                                                 model.embedding_size],
                                                name='flat_emb_obs')
            flattened_snapshot_sizes = tf.reshape(model.snapshot_sizes, [model.batch_size * model.max_seq_len],
                                                  name='flat_snapshot_sizes')

            # Apply RNN to all documents in all batches
            flattened_snapshot_encodings = layers.rnn_layer(cell_fn=cell_fn,
                                                            num_hidden=num_hidden,
                                                            inputs=flattened_embedded_obs,
                                                            lengths=flattened_snapshot_sizes,
                                                            return_interpretable_weights=False)

            # Reshape back to (batch x seq_len x encoding_size)
            return tf.reshape(flattened_snapshot_encodings,
                              [model.batch_size, model.max_seq_len, flattened_snapshot_encodings.shape[-1]],
                              name='rnn_snapshot_encoding')
Exemplo n.º 2
0
def generator(source,
              target,
              sequence_length,
              vocab_size,
              decoder_fn=None,
              **opts):
    """
    Args:
        source: TensorFlow queue or placeholder tensor for word ids for source 
        target: TensorFlow queue or placeholder tensor for word ids for target
        sequence_length: TensorFlow queue or placeholder tensor for number of word ids for each sentence
        vocab_size: max vocab size determined from data
        decoder_fn: if using custom decoder_fn else use the default dynamic_rnn
    """
    tf.logging.info(" Setting up generator")

    embedding_layer = lay.embedding_layer(vocab_size,
                                          opts["embedding_dim"],
                                          name="embedding_matrix")

    # TODO: add batch norm?
    rnn_outputs = (source >> embedding_layer >> lay.word_dropout_layer(
        keep_prob=opts["word_dropout_keep_prob"]) >> lay.recurrent_layer(
            hidden_dims=opts["rnn_hidden_dim"],
            keep_prob=opts["recurrent_dropout_keep_prob"],
            sequence_length=sequence_length,
            decoder_fn=decoder_fn,
            name="rnn_cell"))

    output_projection_layer = lay.dense_layer(hidden_dims=vocab_size,
                                              name="output_projections")

    flat_logits = (rnn_outputs >> lay.reshape_layer(
        shape=(-1, opts["rnn_hidden_dim"])) >> output_projection_layer)

    probs = flat_logits >> lay.softmax_layer()

    embedding_matrix = embedding_layer.get_variables_in_scope()
    output_projections = output_projection_layer.get_variables_in_scope()

    if decoder_fn is not None:
        return GeneratorTuple(rnn_outputs=rnn_outputs,
                              flat_logits=flat_logits,
                              probs=probs,
                              loss=None,
                              embedding_matrix=embedding_matrix[0],
                              output_projections=output_projections)

    loss = (flat_logits >> lay.cross_entropy_layer(target=target) >>
            lay.reshape_layer(shape=tf.shape(target)) >>
            lay.mean_loss_by_example_layer(sequence_length=sequence_length))

    # TODO: add dropout penalty
    return GeneratorTuple(rnn_outputs=rnn_outputs,
                          flat_logits=flat_logits,
                          probs=probs,
                          loss=loss,
                          embedding_matrix=embedding_matrix[0],
                          output_projections=output_projections)
Exemplo n.º 3
0
def generator(source, target, sequence_length, vocab_size, decoder_fn=None, **opts):
    """
    Args:
        source: TensorFlow queue or placeholder tensor for word ids for source 
        target: TensorFlow queue or placeholder tensor for word ids for target
        sequence_length: TensorFlow queue or placeholder tensor for number of word ids for each sentence
        vocab_size: max vocab size determined from data
        decoder_fn: if using custom decoder_fn else use the default dynamic_rnn
    """
    tf.logging.info(" Setting up generator")

    embedding_layer = lay.embedding_layer(vocab_size, opts["embedding_dim"], name="embedding_matrix")

    # TODO: add batch norm?
    rnn_outputs = (
        source >>
        embedding_layer >>
        lay.word_dropout_layer(keep_prob=opts["word_dropout_keep_prob"]) >>
        lay.recurrent_layer(hidden_dims=opts["rnn_hidden_dim"], keep_prob=opts["recurrent_dropout_keep_prob"],
                            sequence_length=sequence_length, decoder_fn=decoder_fn, name="rnn_cell")
    )

    output_projection_layer = lay.dense_layer(hidden_dims=vocab_size, name="output_projections")

    flat_logits = (
        rnn_outputs >>
        lay.reshape_layer(shape=(-1, opts["rnn_hidden_dim"])) >>
        output_projection_layer
    )

    probs = flat_logits >> lay.softmax_layer()

    embedding_matrix = embedding_layer.get_variables_in_scope()
    output_projections = output_projection_layer.get_variables_in_scope()

    if decoder_fn is not None:
        return GeneratorTuple(rnn_outputs=rnn_outputs, flat_logits=flat_logits, probs=probs, loss=None,
                              embedding_matrix=embedding_matrix[0], output_projections=output_projections)

    loss = (
        flat_logits >>
        lay.cross_entropy_layer(target=target) >>
        lay.reshape_layer(shape=tf.shape(target)) >>
        lay.mean_loss_by_example_layer(sequence_length=sequence_length)
    )

    # TODO: add dropout penalty
    return GeneratorTuple(rnn_outputs=rnn_outputs, flat_logits=flat_logits, probs=probs, loss=loss,
                          embedding_matrix=embedding_matrix[0], output_projections=output_projections)
Exemplo n.º 4
0
    def _dan_encoder(model):
        """
        :param model:
        :type model: modeling.CANTRIPModel
        :return:
        """
        with tf.variable_scope('dan_encoder'):
            embedded_observations = layers.embedding_layer(model.observations, model.vocabulary_size,
                                                           model.embedding_size, model.vocab_dropout,
                                                           training=model.training)

            # Reshape to (batch * seq_len * doc_len) x embedding
            flattened_embedded_observations = tf.reshape(
                embedded_observations,
                [model.batch_size * model.max_seq_len * model.max_snapshot_size,
                 model.embedding_size]
            )
            # Add dense observation layers
            obs_layer = flattened_embedded_observations
            for num_hidden in obs_hidden_units:
                obs_layer = tf.keras.layers.Dense(units=num_hidden, activation=activation_fn)(obs_layer)

            # Reshape final output by grouping observations in the same snapshot together
            obs_layer = tf.reshape(obs_layer, [model.batch_size * model.max_seq_len,
                                               model.max_snapshot_size,
                                               obs_hidden_units[-1]])

            # Divide by active number of observations rather than the padded snapshot size; requires reshaping to
            # (batch x seq_len) x 1 so we can divide by this
            flattened_snapshot_sizes = tf.reshape(model.snapshot_sizes, [model.batch_size * model.max_seq_len, 1])

            mask = tf.sequence_mask(model.snapshot_sizes, maxlen=model.max_snapshot_size, dtype=tf.float32)
            mask = tf.reshape(mask, [model.batch_size * model.max_seq_len, model.max_snapshot_size, 1])

            # Compute dynamic-size element-wise average
            avg_layer = tf.reduce_sum(obs_layer * mask, axis=1)
            avg_layer = avg_layer / tf.cast(tf.maximum(flattened_snapshot_sizes, 1), dtype=tf.float32)

            # More fun dense layers
            for num_hidden in avg_hidden_units:
                avg_layer = tf.keras.layers.Dense(num_hidden, activation_fn)(avg_layer)

            # Final output of the model
            output = tf.keras.layers.Dense(model.embedding_size, activation_fn)(avg_layer)

            # Reshape to [batch_size x seq_len x encoding_size]
            return tf.reshape(output, [model.batch_size, model.max_seq_len, model.embedding_size])
Exemplo n.º 5
0
    def _dan_encoder(model):
        """

        :param model:
        :type model: modeling.PRONTOModel
        :return:
        """
        with tf.variable_scope('dan_encoder'):
            embedded_observations = layers.embedding_layer(model.observations, model.vocabulary_size,
                                                           model.embedding_size, model.vocab_dropout,
                                                           training=model.training)

            # Reshape to (batch * seq_len * doc_len) x embedding
            flattened_embedded_observations = tf.reshape(
                embedded_observations,
                [model.batch_size * model.max_seq_len * model.max_snapshot_size,
                 model.embedding_size]
            )
            # Add dense observation layers
            # TODO: switch back to ReLU as described in the paper
            obs_layer = flattened_embedded_observations
            for num_hidden in obs_hidden_units:
                obs_layer = tf.layers.dense(obs_layer, num_hidden, tf.nn.tanh)

            # Reshape final output by grouping observations in the same snapshot together
            obs_layer = tf.reshape(obs_layer, [model.batch_size * model.max_seq_len,
                                               model.max_snapshot_size,
                                               obs_layer.shape[-1]])

            # Divide by active number of observations rather than the padded snapshot size; requires reshaping to
            # (batch x seq_len) x 1 so we can divide by this
            flattened_snapshot_sizes = tf.reshape(model.snapshot_sizes, [model.batch_size * model.max_seq_len, 1])

            # Compute dynamic-size element-wise average
            avg_layer = tf.reduce_mean(obs_layer, axis=1) / tf.to_float(tf.maximum(flattened_snapshot_sizes, 1))

            # More fun dense layers
            # TODO: switch back to ReLU as described in the paper
            for num_hidden in avg_hidden_units:
                avg_layer = tf.layers.dense(avg_layer, num_hidden, tf.nn.tanh)

            # Final output of the model
            output = tf.layers.dense(avg_layer, model.embedding_size, tf.nn.tanh)

            # Reshape to [batch_size x seq_len x encoding_size]
            return tf.reshape(output, [model.batch_size, model.max_seq_len, model.embedding_size])
Exemplo n.º 6
0
    def _cnn_encoder(model):
        """

        :type model: BERTModel
        """
        with tf.variable_scope('cnn_encoder'):
            # Embed observations
            embedded_observations = layers.embedding_layer(model.observations, model.vocabulary_size,
                                                           model.embedding_size,
                                                           model.vocab_dropout,
                                                           training=model.training)

            # Reshape to (batch * seq_len) x snapshot_size x embedding
            flattened_embedded_obs = tf.reshape(embedded_observations,
                                                [model.batch_size * model.max_seq_len,
                                                 model.max_snapshot_size,
                                                 model.embedding_size])

            # Apply parallel convolutional and pooling layers
            outputs = []
            for n in windows:
                if dropout > 0:
                    flattened_embedded_obs = \
                        tf.keras.layers.Dropout(rate=model.dropout)(flattened_embedded_obs, training=model.training)
                conv_layer = tf.keras.layers.Convolution1D(filters=kernels,
                                                           kernel_size=n,
                                                           activation=tf.nn.leaky_relu,
                                                           name="conv_%dgram" % n)(flattened_embedded_obs)
                pool_layer = tf.keras.layers.MaxPooling1D(pool_size=1,
                                                          strides=model.max_snapshot_size - n + 1,
                                                          name="maxpool_%dgram" % n)(conv_layer)
                outputs.append(pool_layer)

            # Concatenate pooled outputs
            output = tf.concat(outputs, axis=-1)

            # Embed concat output with leaky ReLU
            embeddings = tf.keras.layers.Dense(units=model.embedding_size, activation=tf.nn.relu)(output)

            # Reshape back to [batch_size x max_seq_len x encoding_size]
            return tf.reshape(embeddings, [model.batch_size, model.max_seq_len, model.embedding_size])
Exemplo n.º 7
0
    def __init__(self,
                 corpus,
                 n_filters=(128, 256),
                 filter_width=3,
                 token_embeddings_dim=128,
                 char_embeddings_dim=50,
                 use_char_embeddins=True,
                 pretrained_model_filepath=None,
                 embeddings_dropout=False,
                 dense_dropout=False,
                 use_batch_norm=False,
                 logging=False,
                 use_crf=False,
                 net_type='cnn',
                 char_filter_width=5,
                 verbouse=True,
                 use_capitalization=False,
                 concat_embeddings=False,
                 cell_type=None):
        tf.reset_default_graph()

        n_tags = len(corpus.tag_dict)
        n_tokens = len(corpus.token_dict)
        n_chars = len(corpus.char_dict)
        embeddings_onethego = not concat_embeddings and \
                              corpus.embeddings is not None and \
                              not isinstance(corpus.embeddings, dict)

        # Create placeholders
        if embeddings_onethego:
            x_word = tf.placeholder(
                dtype=tf.float32,
                shape=[None, None, corpus.embeddings.vector_size],
                name='x_word')
        else:
            x_word = tf.placeholder(dtype=tf.int32,
                                    shape=[None, None],
                                    name='x_word')
        if concat_embeddings:
            x_emb = tf.placeholder(
                dtype=tf.float32,
                shape=[None, None, corpus.embeddings.vector_size],
                name='x_word')
        x_char = tf.placeholder(dtype=tf.int32,
                                shape=[None, None, None],
                                name='x_char')
        y_true = tf.placeholder(dtype=tf.int32,
                                shape=[None, None],
                                name='y_tag')
        mask = tf.placeholder(dtype=tf.float32,
                              shape=[None, None],
                              name='mask')
        x_capi = tf.placeholder(dtype=tf.float32,
                                shape=[None, None],
                                name='x_capi')

        # Auxiliary placeholders
        learning_rate_ph = tf.placeholder(dtype=tf.float32,
                                          shape=[],
                                          name='learning_rate')
        dropout_ph = tf.placeholder_with_default(1.0, shape=[])
        training_ph = tf.placeholder_with_default(False, shape=[])
        learning_rate_decay_ph = tf.placeholder(dtype=tf.float32,
                                                shape=[],
                                                name='learning_rate_decay')

        # Embeddings
        if not embeddings_onethego:
            with tf.variable_scope('Embeddings'):
                w_emb = embedding_layer(
                    x_word,
                    n_tokens=n_tokens,
                    token_embedding_dim=token_embeddings_dim)
                if use_char_embeddins:
                    c_emb = character_embedding_network(
                        x_char,
                        n_characters=n_chars,
                        char_embedding_dim=char_embeddings_dim,
                        filter_width=char_filter_width)
                    emb = tf.concat([w_emb, c_emb], axis=-1)
                else:
                    emb = w_emb
        else:
            emb = x_word

        if concat_embeddings:
            emb = tf.concat([emb, x_emb], axis=2)

        if use_capitalization:
            cap = tf.expand_dims(x_capi, 2)
            emb = tf.concat([emb, cap], axis=2)

        # Dropout for embeddings
        if embeddings_dropout:
            emb = tf.layers.dropout(emb, dropout_ph, training=training_ph)

        if 'cnn' in net_type.lower():
            # Convolutional network
            with tf.variable_scope('ConvNet'):
                units = stacked_convolutions(emb,
                                             n_filters=n_filters,
                                             filter_width=filter_width,
                                             use_batch_norm=use_batch_norm,
                                             training_ph=training_ph)
        elif 'rnn' in net_type.lower():
            if cell_type is None or cell_type not in {'lstm', 'gru'}:
                raise RuntimeError(
                    'You must specify the type of the cell! It could be either "lstm" or "gru"'
                )
            units = stacked_rnn(emb, n_filters, cell_type=cell_type)

        elif 'cnn_highway' in net_type.lower():
            units = highway_convolutional_network(
                emb,
                n_filters=n_filters,
                filter_width=filter_width,
                use_batch_norm=use_batch_norm,
                training_ph=training_ph)
        else:
            raise KeyError(
                'There is no such type of network: {}'.format(net_type))

        # Classifier
        with tf.variable_scope('Classifier'):
            logits = tf.layers.dense(units,
                                     n_tags,
                                     kernel_initializer=xavier_initializer())

        if use_crf:
            sequence_lengths = tf.reduce_sum(mask, axis=1)
            log_likelihood, trainsition_params = tf.contrib.crf.crf_log_likelihood(
                logits, y_true, sequence_lengths)
            loss_tensor = -log_likelihood
            predictions = None
        else:
            ground_truth_labels = tf.one_hot(y_true, n_tags)
            loss_tensor = tf.nn.softmax_cross_entropy_with_logits(
                labels=ground_truth_labels, logits=logits)
            loss_tensor = loss_tensor * mask
            predictions = tf.argmax(logits, axis=-1)

        loss = tf.reduce_mean(loss_tensor)

        # Initialize session
        sess = tf.Session()
        if verbouse:
            self.print_number_of_parameters()
        if logging:
            self.train_writer = tf.summary.FileWriter('summary', sess.graph)

        self._use_crf = use_crf
        self.summary = tf.summary.merge_all()

        self._learning_rate_decay_ph = learning_rate_decay_ph
        self._x_w = x_word
        self._x_c = x_char
        self._y_true = y_true
        self._y_pred = predictions
        if concat_embeddings:
            self._x_emb = x_emb
        if use_crf:
            self._logits = logits
            self._trainsition_params = trainsition_params
            self._sequence_lengths = sequence_lengths
        self._learning_rate_ph = learning_rate_ph
        self._dropout = dropout_ph

        self._loss = loss
        self._sess = sess
        self.corpus = corpus

        self._loss_tensor = loss_tensor
        self._use_dropout = True if embeddings_dropout or dense_dropout else None

        self._training_ph = training_ph
        self._logging = logging

        # Get training op
        self._train_op = self.get_train_op(
            loss, learning_rate_ph, lr_decay_rate=learning_rate_decay_ph)
        self._embeddings_onethego = embeddings_onethego
        self.verbouse = verbouse
        sess.run(tf.global_variables_initializer())
        self._mask = mask
        if use_capitalization:
            self._x_capi = x_capi
        self._use_capitalization = use_capitalization
        self._concat_embeddings = concat_embeddings
        if pretrained_model_filepath is not None:
            self.load(pretrained_model_filepath)
Exemplo n.º 8
0
    def step_through_session(self, X, attention_mask, return_last_with_hidden_states=False, return_softmax=False, reuse=False):
        """
        Train for a batch of sessions in the HRED X can be a 3-D tensor (steps, batch, vocab)

        :param X: The input sessions. Lists of ints, ints correspond to words
        Shape: (max_length x batch_size)
        :return:
        """

        num_of_steps = tf.shape(X)[0]
        batch_size = tf.shape(X)[1]


        # Making embeddings for x
        embedder = layers.embedding_layer(X, vocab_dim=self.vocab_size, embedding_dim=self.embedding_dim, reuse=reuse)

        # Mask used to reset the query encoder when symbol is End-Of-Query symbol and to retain the state of the
        # session encoder when EoQ symbol has been seen yet.
        eoq_mask = tf.expand_dims(tf.cast(tf.not_equal(X, self.eoq_symbol), tf.float32), 2)

        # eoq mask has size [MAX LEN x BATCH SIZE] --> we want to loop over batch size

        # BATCH_SIZE = 80
        # MAX_LEN = 50  # TODO: this shouldn't be as local

        # print((embedder, eoq_mask))
        # Computes the encoded query state. The tensorflow scan function repeatedly applies the gru_layer_with_reset
        # function to (embedder, eoq_mask) and it initialized the gru layer with the zero tensor.
        # In the query encoder we need the possibility to reset the gru layer, namely after the eos symbol has been
        # reached
        query_encoder_packed = tf.scan(
            lambda result_prev, x: layers.gru_layer_with_reset(
                result_prev[1],  # h_reset_prev
                x,
                name='forward_query_encoder',
                x_dim=self.embedding_dim,
                y_dim=self.query_dim,
                reuse=reuse
            ),
            (embedder, eoq_mask),  # scan does not accept multiple tensors so we need to pack and unpack
            initializer=tf.zeros((2, batch_size, self.query_dim))
        )
        # print(tf.shape(query_encoder_packed))

        query_encoder, hidden_query = tf.unstack(query_encoder_packed, axis=1)
        # query_encoder = tf.nn.dropout(query_encoder, keep_prob=0.5)



        # This part does the same, yet for the session encoder. Here we need to have the possibility to keep the current
        # state where we were at, namely if we have not seen a full query. If we have, update the session encoder state.
        # session_encoder_packed = tf.scan(
        #     lambda result_prev, x: layers.gru_layer_with_retain(
        #         result_prev[1],  # h_retain_prev
        #         x,
        #         name='session_encoder',
        #         x_dim=self.query_dim,  # 2*
        #         y_dim=self.session_dim,
        #         reuse=reuse
        #     ),
        #     (query_encoder, eoq_mask),
        #     initializer=tf.zeros((2, batch_size, self.session_dim))
        # )
        #
        # session_encoder, hidden_session = tf.unstack(session_encoder_packed, axis=1)
        # session_encoder = layers.gnn_attention(session_encoder, attention_mask, query_encoder_gnn, self.session_dim,
        #                                        self.query_dim, reuse=reuse)

        # This part makes the decoder for a step. The decoder uses both the word embeddings, the reset/retain vector
        # and the session encoder, so we give three variables to the decoder GRU. The decoder GRU is somewhat special,
        # as it incorporates the session_encoder into each hidden state update
        # decoder = tf.scan(
        #     lambda result_prev, x: layers.gru_layer_with_state_reset(
        #         result_prev,
        #         x,
        #         name='decoder',
        #         x_dim=self.embedding_dim,
        #         h_dim=self.query_dim,
        #         y_dim=self.decoder_dim,
        #         reuse=reuse
        #     ),
        #     (embedder, eoq_mask, query_encoder),
        #     # scan does not accept multiple tensors so we need to pack and unpack
        #     initializer=tf.zeros((batch_size, self.decoder_dim))
        # )

        # After the decoder we add an additional output layer
        flatten_decoder = tf.reshape(query_encoder, (-1, self.decoder_dim))
        flatten_embedder = tf.reshape(embedder, (-1, self.embedding_dim))
        # flatten_session_encoder = tf.reshape(session_encoder, (-1, self.session_dim))

        # attention

        # expand to batch_size x num_of_steps x query_dim
        # query_encoder_T = tf.transpose(query_encoder, perm=[1, 0, 2])
        # query_decoder_T = tf.transpose(decoder, perm=[1, 0, 2])

        # expand to num_of_steps x batch_size x num_of_steps x query_dim
        # query_encoder_expanded = tf.tile(tf.expand_dims(query_encoder, 2), (1, 1, num_of_steps, 1))

        # query_encoder_expanded = query_encoder_expanded * tf.tile(tf.expand_dims(attention_mask, 3), (1, 1, 1, self.query_dim))  # 2*

        # flatten_decoder_with_attention = \
        #     layers.attention_session(query_encoder_expanded, flatten_decoder, enc_dim=self.query_dim, dec_dim=self.decoder_dim,
        #                              reuse=reuse)  # 2*

        output_layer = layers.output_layer(
            flatten_embedder,
            flatten_decoder,  #
            x_dim=self.embedding_dim,
            h_dim=self.decoder_dim,  # 2*
            y_dim=self.output_dim,
            reuse=reuse
        )

        # We compute the output logits based on the output layer above
        flatten_logits, self.l2_loss = layers.logits_layer(
            output_layer,
            self.l2_loss,
            x_dim=self.output_dim,
            y_dim=self.vocab_size,
            reuse=reuse
        )

        logits = tf.reshape(flatten_logits, (num_of_steps, batch_size, self.vocab_size))
        # logits = tf.Print(logits, [np.argmax(logits)], summarize=1500)

        # If we want the softmax back from this step or just the logits f or the loss function
        if return_softmax:
            output = self.softmax(logits)
        else:
            output = logits

        # If we want to continue decoding with single_step we need the hidden states of all GRU layers
        if return_last_with_hidden_states:
            # hidden_decoder = decoder  # there is no resetted decoder output
            # Note for attention mechanism
            return output[-1, :, :], hidden_query[:, :, :] # , hidden_decoder[-1, :, :]
        else:
            return output
Exemplo n.º 9
0
    def single_step(self, X, prev_hidden_query_states, prev_hidden_session, prev_hidden_decoder, reuse=True):
        """
        Performs a step in the HRED X can be a 2-D tensor (batch, vocab), this can be used
        for beam search

        :param X: The input sessions. Lists of ints, ints correspond to words
        Shape: (max_length)
        :param start_hidden_query: The first hidden state of the query encoder. Initialized with zeros.
        Shape: (2 x query_dim)
        :param start_hidden_session: The first hidden state of the session encoder. Iniitalized with zeros.
        Shape: (2 x session_dim)
        :param start_hidden_decoder: The first hidden state of the decoder. Initialized with zeros.
        Shape: (output_dim)
        :return:
        """
        # Note that with the implementation of attention the object "prev_hidden_query_states" contains not only the
        # previous query encoded state but all previous states, therefore we need to get the last query state
        prev_hidden_query = prev_hidden_query_states[-1, :, :]
        # Making embeddings for x
        embedder = layers.embedding_layer(X, vocab_dim=self.vocab_size, embedding_dim=self.embedding_dim, reuse=reuse)

        # Mask used to reset the query encoder when symbol is End-Of-Query symbol and to retain the state of the
        # session encoder when EoQ symbol has been seen yet.
        eoq_mask = tf.cast(tf.not_equal(X, self.eoq_symbol), tf.float32)

        query_encoder, hidden_query = tf.unstack(layers.gru_layer_with_reset(
            prev_hidden_query,  # h_reset_prev
            (embedder, eoq_mask),
            name='forward_query_encoder',
            x_dim=self.embedding_dim,
            y_dim=self.query_dim,
            reuse=reuse
        ))


        # This part does the same, yet for the session encoder. Here we need to have the possibility to keep the current
        # state where we were at, namely if we have not seen a full query. If we have, update the session encoder state.
        session_encoder, hidden_session = tf.unstack(layers.gru_layer_with_retain(
            prev_hidden_session,  # h_retain_prev
            (query_encoder, eoq_mask),
            name='session_encoder',
            x_dim=self.query_dim,
            y_dim=self.session_dim,
            reuse=reuse
        ))

        # This part makes the decoder for a step. The decoder uses both the word embeddings, the reset/retain vector
        # and the session encoder, so we give three variables to the decoder GRU. The decoder GRU is somewhat special,
        # as it incorporates the session_encoder into each hidden state update
        hidden_decoder = layers.gru_layer_with_state_reset(
            prev_hidden_decoder,
            (embedder, eoq_mask, session_encoder),
            name='decoder',
            x_dim=self.embedding_dim,
            h_dim=self.session_dim,
            y_dim=self.decoder_dim,
            reuse=reuse
        )

        decoder = hidden_decoder
        flatten_decoder = tf.reshape(decoder, (-1, self.decoder_dim))

        # add attention layer
        # expand to num_of_steps x batch_size x num_of_steps x query_dim
        num_of_atten_states = tf.shape(prev_hidden_query_states)[0]
        # tf.Print(num_of_atten_states, [num_of_atten_states], "INFO - single-step ")
        # tf.Print(flatten_decoder, [tf.shape(flatten_decoder)], "INFO - decoder.shape ")
        query_encoder_expanded = tf.transpose(prev_hidden_query_states, [1, 0, 2])

        flatten_decoder_with_attention = \
            layers.attention_step(query_encoder_expanded, flatten_decoder, enc_dim=self.query_dim, dec_dim=self.decoder_dim,
                                  reuse=reuse)

        # After the decoder we add an additional output layer
        output = layers.output_layer(
            embedder,
            flatten_decoder_with_attention,  #
            x_dim=self.embedding_dim,
            h_dim=self.decoder_dim + self.query_dim,  #
            y_dim=self.output_dim,
            reuse=reuse
        )

        # We compute the output logits based on the output layer above
        logits = layers.logits_layer(
            output,
            x_dim=self.output_dim,
            y_dim=self.vocab_size,
            reuse=reuse
        )

        softmax = self.softmax(logits)

        return softmax, tf.concat([prev_hidden_query_states, tf.expand_dims(hidden_query, 0)], 0), hidden_session, hidden_decoder
Exemplo n.º 10
0
    def __init__(self,
                 corpus,
                 n_filters=(128, 128),
                 filter_width=3,
                 token_embeddings_dim=100,
                 char_embeddings_dim=30,
                 use_char_embeddins=True,
                 embeddings_dropout=False,
                 use_crf=False,
                 char_filter_width=3,
                 pretrained_model_path=None,
                 char_max_len=30):

        tf.reset_default_graph()

        n_tags = len(corpus.tag_dict)
        n_tokens = len(corpus.token_dict)
        n_chars = len(corpus.char_dict)

        # Create placeholders
        x_word = tf.placeholder(dtype=tf.int32,
                                shape=[None, None],
                                name='x_word')
        x_char = tf.placeholder(dtype=tf.int32,
                                shape=[None, None, None],
                                name='x_char')
        y_true = tf.placeholder(dtype=tf.int32,
                                shape=[None, None],
                                name='y_tag')
        mask = tf.placeholder(dtype=tf.int32, shape=[None, None], name='mask')
        learning_rate_ph = tf.placeholder(dtype=tf.float32,
                                          shape=[],
                                          name='learning_rate')
        dropout_ph = tf.placeholder_with_default(1.0, shape=[])
        training_ph = tf.placeholder_with_default(False, shape=[])
        learning_rate_decay_ph = tf.placeholder(dtype=tf.float32,
                                                shape=[],
                                                name='learning_rate_decay')
        momentum_ph = tf.placeholder(dtype=tf.float32,
                                     shape=[],
                                     name='momentum')
        max_grad_ph = tf.placeholder(dtype=tf.float32,
                                     shape=[],
                                     name='max_grad')

        # Embeddings
        with tf.variable_scope('Embeddings'):
            w_emb = embedding_layer(x_word,
                                    n_tokens=n_tokens,
                                    token_embedding_dim=token_embeddings_dim,
                                    token_embedding_matrix=corpus.emb_mat)
            w_emb = tf.cast(w_emb, tf.float32)
            c_emb = character_embedding_network(
                x_char,
                n_characters=n_chars,
                char_embedding_dim=char_embeddings_dim,
                filter_width=char_filter_width,
                dropout_ph=dropout_ph)
            emb = tf.concat([w_emb, c_emb], axis=-1)
        # Dropout for embeddings
        emb = tf.layers.dropout(emb, dropout_ph, training=training_ph)
        # Make bi-LSTM
        sequence_lengths = tf.reduce_sum(mask, axis=1)
        units = biLSTM(emb, n_filters, sequence_lengths)
        # Dropout
        units = tf.layers.dropout(units, dropout_ph, training=training_ph)

        # Classifier
        with tf.variable_scope('Classifier'):
            logits = tf.layers.dense(units,
                                     n_tags,
                                     kernel_initializer=xavier_initializer())
        if use_crf:

            log_likelihood, trainsition_params = tf.contrib.crf.crf_log_likelihood(
                logits, y_true, sequence_lengths)
            loss_tensor = -log_likelihood
            predictions = None
        else:
            ground_truth_labels = tf.one_hot(y_true, n_tags)
            loss_tensor = tf.nn.softmax_cross_entropy_with_logits(
                labels=ground_truth_labels, logits=logits)
            loss_tensor = loss_tensor * mask
            predictions = tf.argmax(logits, axis=-1)

        loss = tf.reduce_mean(loss_tensor)
        # Initialize session
        sess = tf.Session()

        self._use_crf = use_crf
        self._learning_rate_decay_ph = learning_rate_decay_ph
        self._x_w = x_word
        self._x_c = x_char
        self._y_true = y_true
        self._y_pred = predictions
        self._learning_rate_ph = learning_rate_ph
        self._dropout = dropout_ph
        self._loss = loss
        self._sess = sess
        self.corpus = corpus
        self._loss_tensor = loss_tensor
        self._use_dropout = embeddings_dropout
        self._training_ph = training_ph
        if use_crf:
            self._logits = logits
            self._trainsition_params = trainsition_params
            self._sequence_lengths = sequence_lengths
        self.filewriter = tf.summary.FileWriter('graphs', sess.graph)
        self.summary = tf.summary.merge_all()
        self._train_op = self.get_train_op(
            loss,
            learning_rate_ph,
            lr_decay_rate=learning_rate_decay_ph,
            momentum=momentum_ph,
            max_grad=max_grad_ph)
        sess.run(tf.global_variables_initializer())
        self._mask = mask
        if pretrained_model_path is not None:
            self.load(pretrained_model_path)
        self._momentum = momentum_ph
        self._max_grad = max_grad_ph
Exemplo n.º 11
0
def build_model(placeholders,
                info,
                batch_size=4,
                adj_channel_num=1,
                embedding_dim=10):
    sequences = placeholders["sequences"]
    sequences_len = placeholders["sequences_len"]
    labels = placeholders["labels"]
    mask = placeholders["mask"]
    dropout_rate = placeholders["dropout_rate"]
    mask_label = placeholders["mask_label"]
    wd_b = None
    wd_w = 0.1
    is_train = placeholders["is_train"]
    dropout_rate = 1 - dropout_rate
    ###
    ### Sequence part
    ###
    with tf.variable_scope("seq_nn") as scope_part:
        # Embedding
        embedding_dim = 25
        layer = layers.embedding_layer("embedding",
                                       sequences,
                                       info.sequence_symbol_num,
                                       embedding_dim,
                                       init_params_flag=True,
                                       params=None)
        # CNN + Pooling
        stride = 4
        layer = klayer.convolutional.Conv1D(505,
                                            stride,
                                            padding="same",
                                            activation='relu')(layer)
        layer = klayer.pooling.MaxPooling1D(stride)(layer)

        stride = 3
        layer = klayer.convolutional.Conv1D(200,
                                            stride,
                                            padding="same",
                                            activation='relu')(layer)
        layer = klayer.pooling.MaxPooling1D(stride)(layer)

        stride = 2
        layer = klayer.convolutional.Conv1D(100,
                                            stride,
                                            padding="same",
                                            activation='relu')(layer)
        layer = klayer.pooling.MaxPooling1D(stride)(layer)

        layer = klayer.convolutional.Conv1D(1,
                                            stride,
                                            padding="same",
                                            activation='tanh')(layer)

        layer = tf.squeeze(layer)

        output_dim = info.label_dim

    logits = mu.multitask_logits(layer, labels.shape[1])
    model = logits
    # # costの計算 各タスクのバッチ数平均 12
    task_losses =  mu.add_training_loss(logits = logits,label = labels,pos_weight = info.pos_weight,\
             batch_size= batch_size,n_tasks = labels.shape[1],mask = mask_label)
    total_loss = tf.reduce_sum(task_losses)  #全タスクのlossを合計

    ### multi-task loss
    cost_opt = task_losses
    each_cost = task_losses

    # 2値の確率予測:12×50×2
    prediction = mu.add_softmax(logits)

    metrics = {}
    cost_sum = total_loss
    # cost_sum = cost_opt
    metrics["each_cost"] = task_losses

    metrics["each_correct_count"] = {}
    for i in range(labels.shape[1]):
        equal_cnt = mask_label[:, i] * tf.cast(
            tf.equal(tf.cast(tf.argmax(prediction[i], 1), tf.int16),
                     tf.cast(labels[:, i], tf.int16)), tf.float32)

        each_correct_count = tf.cast(tf.reduce_sum(equal_cnt, axis=0),
                                     tf.float32)
        metrics["each_correct_count"][i] = each_correct_count

    # correct_count=0#mask*tf.cast(tf.reduce_all(tf.equal(tf.cast(tf.argmax(prediction,1),tf.int16), tf.cast(labels,tf.int16)),axis=1),tf.float32)
    metrics["correct_count"] = sum(
        [metrics["each_correct_count"][i] for i in range(labels.shape[1])])
    return model, prediction, cost_opt, cost_sum, metrics