def _build_cnn(self, units, n_hidden_list, cnn_filter_width, use_batch_norm): units = stacked_cnn(units, n_hidden_list, cnn_filter_width, use_batch_norm, training_ph=self.training_ph) return units
def __init__(self, word_vocab, char_vocab, tag_vocab, n_filters=(128, 256), filter_width=3, token_embeddings_dim=128, char_embeddings_dim=50, use_char_embeddins=True, embeddings_dropout=False, dense_dropout=False, use_batch_norm=False, logging=False, use_crf=False, net_type='cnn', char_filter_width=5, verbose=False, sess=None, cell_type='lstm', embedder=None, use_capitalization=False, two_dense_layers=False): tf.set_random_seed(SEED) n_tags = len(tag_vocab) n_tokens = len(word_vocab) n_chars = len(char_vocab) # Create placeholders if embedder is not None: x_word = tf.placeholder(dtype=tf.float32, shape=[None, None, token_embeddings_dim], name='x_word') else: x_word = tf.placeholder(dtype=tf.int32, shape=[None, None], name='x_word') x_char = tf.placeholder(dtype=tf.int32, shape=[None, None, None], name='x_char') y_true = tf.placeholder(dtype=tf.int32, shape=[None, None], name='y_tag') x_cap = tf.placeholder(dtype=tf.float32, shape=[None, None], name='y_cap') # Auxiliary placeholders learning_rate_ph = tf.placeholder(dtype=tf.float32, shape=[], name='learning_rate') dropout_ph = tf.placeholder_with_default(1.0, shape=[]) training_ph = tf.placeholder_with_default(False, shape=[]) mask_ph = tf.placeholder(dtype=tf.float32, shape=[None, None]) # Embeddings if embedder is None: with tf.variable_scope('Embeddings'): w_emb = embedding_layer( x_word, n_tokens=n_tokens, token_embedding_dim=token_embeddings_dim) if use_char_embeddins: c_emb = character_embedding_network( x_char, n_characters=n_chars, char_embedding_dim=char_embeddings_dim, filter_width=char_filter_width) emb = tf.concat([w_emb, c_emb], axis=-1) else: emb = w_emb else: emb = x_word if use_capitalization: emb = tf.concat([emb, tf.expand_dims(x_cap, 2)], 2) # Dropout for embeddings if embeddings_dropout: emb = tf.layers.dropout(emb, dropout_ph, training=training_ph) if 'cnn' in net_type.lower(): # Convolutional network with tf.variable_scope('ConvNet'): units = stacked_cnn(emb, n_hidden_list=n_filters, filter_width=filter_width, use_batch_norm=use_batch_norm, training_ph=training_ph) elif 'rnn' in net_type.lower(): units, _ = stacked_bi_rnn(emb, n_filters, cell_type) elif 'cnn_highway' in net_type.lower(): units = stacked_highway_cnn(emb, n_hidden_list=n_filters, filter_width=filter_width, use_batch_norm=use_batch_norm, training_ph=training_ph) else: raise KeyError( 'There is no such type of network: {}'.format(net_type)) # Classifier with tf.variable_scope('Classifier'): if two_dense_layers: units = tf.layers.dense( units, n_filters[-1], kernel_initializer=xavier_initializer()) logits = tf.layers.dense(units, n_tags, kernel_initializer=xavier_initializer()) # Loss with masking if use_crf: sequence_lengths = tf.reduce_sum(mask_ph, axis=1) log_likelihood, trainsition_params = tf.contrib.crf.crf_log_likelihood( logits, y_true, sequence_lengths) loss_tensor = -log_likelihood predictions = None else: ground_truth_labels = tf.one_hot(y_true, n_tags) loss_tensor = tf.nn.softmax_cross_entropy_with_logits( labels=ground_truth_labels, logits=logits) loss_tensor = loss_tensor * mask_ph predictions = tf.argmax(logits, axis=-1) loss = tf.reduce_mean(loss_tensor) # Initialize session if sess is None: sess = tf.Session() if logging: self.train_writer = tf.summary.FileWriter('summary', sess.graph) self._token_embeddings_dim = token_embeddings_dim self.token_vocab = word_vocab self.tag_vocab = tag_vocab self.char_vocab = char_vocab self._use_crf = use_crf self.summary = tf.summary.merge_all() self._x_w = x_word self._x_c = x_char self._y_true = y_true self._y_pred = predictions self._x_cap = x_cap if use_crf: self._logits = logits self._trainsition_params = trainsition_params self._sequence_lengths = sequence_lengths self._loss = loss self._sess = sess self._learning_rate_ph = learning_rate_ph self._dropout = dropout_ph self._loss_tensor = loss_tensor self._use_dropout = True if embeddings_dropout or dense_dropout else None self._training_ph = training_ph self._logging = logging self._train_op = self.get_train_op(loss, learning_rate_ph) self._embedder = embedder self.verbose = verbose self._mask = mask_ph self._use_capitalization = use_capitalization sess.run(tf.global_variables_initializer())