示例#1
0
 def _build_cnn(self, units, n_hidden_list, cnn_filter_width,
                use_batch_norm):
     units = stacked_cnn(units,
                         n_hidden_list,
                         cnn_filter_width,
                         use_batch_norm,
                         training_ph=self.training_ph)
     return units
示例#2
0
    def __init__(self,
                 word_vocab,
                 char_vocab,
                 tag_vocab,
                 n_filters=(128, 256),
                 filter_width=3,
                 token_embeddings_dim=128,
                 char_embeddings_dim=50,
                 use_char_embeddins=True,
                 embeddings_dropout=False,
                 dense_dropout=False,
                 use_batch_norm=False,
                 logging=False,
                 use_crf=False,
                 net_type='cnn',
                 char_filter_width=5,
                 verbose=False,
                 sess=None,
                 cell_type='lstm',
                 embedder=None,
                 use_capitalization=False,
                 two_dense_layers=False):
        tf.set_random_seed(SEED)
        n_tags = len(tag_vocab)
        n_tokens = len(word_vocab)
        n_chars = len(char_vocab)
        # Create placeholders
        if embedder is not None:
            x_word = tf.placeholder(dtype=tf.float32,
                                    shape=[None, None, token_embeddings_dim],
                                    name='x_word')
        else:
            x_word = tf.placeholder(dtype=tf.int32,
                                    shape=[None, None],
                                    name='x_word')
        x_char = tf.placeholder(dtype=tf.int32,
                                shape=[None, None, None],
                                name='x_char')
        y_true = tf.placeholder(dtype=tf.int32,
                                shape=[None, None],
                                name='y_tag')
        x_cap = tf.placeholder(dtype=tf.float32,
                               shape=[None, None],
                               name='y_cap')
        # Auxiliary placeholders
        learning_rate_ph = tf.placeholder(dtype=tf.float32,
                                          shape=[],
                                          name='learning_rate')
        dropout_ph = tf.placeholder_with_default(1.0, shape=[])
        training_ph = tf.placeholder_with_default(False, shape=[])
        mask_ph = tf.placeholder(dtype=tf.float32, shape=[None, None])

        # Embeddings
        if embedder is None:
            with tf.variable_scope('Embeddings'):
                w_emb = embedding_layer(
                    x_word,
                    n_tokens=n_tokens,
                    token_embedding_dim=token_embeddings_dim)
                if use_char_embeddins:
                    c_emb = character_embedding_network(
                        x_char,
                        n_characters=n_chars,
                        char_embedding_dim=char_embeddings_dim,
                        filter_width=char_filter_width)
                    emb = tf.concat([w_emb, c_emb], axis=-1)
                else:
                    emb = w_emb
        else:
            emb = x_word
        if use_capitalization:
            emb = tf.concat([emb, tf.expand_dims(x_cap, 2)], 2)

        # Dropout for embeddings
        if embeddings_dropout:
            emb = tf.layers.dropout(emb, dropout_ph, training=training_ph)

        if 'cnn' in net_type.lower():
            # Convolutional network
            with tf.variable_scope('ConvNet'):
                units = stacked_cnn(emb,
                                    n_hidden_list=n_filters,
                                    filter_width=filter_width,
                                    use_batch_norm=use_batch_norm,
                                    training_ph=training_ph)
        elif 'rnn' in net_type.lower():
            units, _ = stacked_bi_rnn(emb, n_filters, cell_type)

        elif 'cnn_highway' in net_type.lower():
            units = stacked_highway_cnn(emb,
                                        n_hidden_list=n_filters,
                                        filter_width=filter_width,
                                        use_batch_norm=use_batch_norm,
                                        training_ph=training_ph)
        else:
            raise KeyError(
                'There is no such type of network: {}'.format(net_type))

        # Classifier
        with tf.variable_scope('Classifier'):
            if two_dense_layers:
                units = tf.layers.dense(
                    units,
                    n_filters[-1],
                    kernel_initializer=xavier_initializer())
            logits = tf.layers.dense(units,
                                     n_tags,
                                     kernel_initializer=xavier_initializer())

        # Loss with masking
        if use_crf:
            sequence_lengths = tf.reduce_sum(mask_ph, axis=1)
            log_likelihood, trainsition_params = tf.contrib.crf.crf_log_likelihood(
                logits, y_true, sequence_lengths)
            loss_tensor = -log_likelihood
            predictions = None
        else:
            ground_truth_labels = tf.one_hot(y_true, n_tags)
            loss_tensor = tf.nn.softmax_cross_entropy_with_logits(
                labels=ground_truth_labels, logits=logits)
            loss_tensor = loss_tensor * mask_ph
            predictions = tf.argmax(logits, axis=-1)

        loss = tf.reduce_mean(loss_tensor)

        # Initialize session
        if sess is None:
            sess = tf.Session()
        if logging:
            self.train_writer = tf.summary.FileWriter('summary', sess.graph)

        self._token_embeddings_dim = token_embeddings_dim
        self.token_vocab = word_vocab
        self.tag_vocab = tag_vocab
        self.char_vocab = char_vocab
        self._use_crf = use_crf
        self.summary = tf.summary.merge_all()
        self._x_w = x_word
        self._x_c = x_char
        self._y_true = y_true
        self._y_pred = predictions
        self._x_cap = x_cap
        if use_crf:
            self._logits = logits
            self._trainsition_params = trainsition_params
            self._sequence_lengths = sequence_lengths
        self._loss = loss
        self._sess = sess
        self._learning_rate_ph = learning_rate_ph
        self._dropout = dropout_ph
        self._loss_tensor = loss_tensor
        self._use_dropout = True if embeddings_dropout or dense_dropout else None
        self._training_ph = training_ph
        self._logging = logging
        self._train_op = self.get_train_op(loss, learning_rate_ph)
        self._embedder = embedder
        self.verbose = verbose
        self._mask = mask_ph
        self._use_capitalization = use_capitalization
        sess.run(tf.global_variables_initializer())
示例#3
0
 def _build_cnn(self, units, n_hidden_list, cnn_filter_width, use_batch_norm):
     units = stacked_cnn(units, n_hidden_list, cnn_filter_width, use_batch_norm, training_ph=self.training_ph)
     return units