def __init__(self, word_emb: np.ndarray, char_emb: np.ndarray, context_limit: int = 450, question_limit: int = 150, char_limit: int = 16, train_char_emb: bool = True, char_hidden_size: int = 100, encoder_hidden_size: int = 75, attention_hidden_size: int = 75, keep_prob: float = 0.7, min_learning_rate: float = 0.001, noans_token: bool = False, **kwargs) -> None: super().__init__(**kwargs) self.init_word_emb = word_emb self.init_char_emb = char_emb self.context_limit = context_limit self.question_limit = question_limit self.char_limit = char_limit self.train_char_emb = train_char_emb self.char_hidden_size = char_hidden_size self.hidden_size = encoder_hidden_size self.attention_hidden_size = attention_hidden_size self.keep_prob = keep_prob self.min_learning_rate = min_learning_rate self.noans_token = noans_token self.word_emb_dim = self.init_word_emb.shape[1] self.char_emb_dim = self.init_char_emb.shape[1] self.last_impatience = 0 self.lr_impatience = 0 if check_gpu_existence(): self.GRU = CudnnGRU else: self.GRU = CudnnCompatibleGRU self.sess_config = tf.ConfigProto(allow_soft_placement=True) self.sess_config.gpu_options.allow_growth = True self.sess = tf.Session(config=self.sess_config) self._init_graph() self._init_optimizer() self.sess.run(tf.global_variables_initializer()) # Try to load the model (if there are some model files the model will be loaded from them) if self.load_path is not None: self.load()
def cudnn_lstm_wrapper(units, n_hidden, n_layers=1, trainable_initial_states=None, seq_lengths=None, initial_h=None, initial_c=None, name='cudnn_lstm', reuse=False): if check_gpu_existence(): return cudnn_lstm(units, n_hidden, n_layers, trainable_initial_states, seq_lengths, initial_h, initial_c, name, reuse) log.info('\nWarning! tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell is used. ' 'It is okay for inference mode, but ' 'if you train your model with this cell it could NOT be used with ' 'tf.contrib.cudnn_rnn.CudnnLSTMCell later. ' ) return cudnn_compatible_lstm(units, n_hidden, n_layers, trainable_initial_states, seq_lengths, initial_h, initial_c, name, reuse)
def _build_cudnn_rnn(self, units, n_hidden_list, cell_type, intra_layer_dropout, mask): if not check_gpu_existence(): raise RuntimeError('Usage of cuDNN RNN layers require GPU along with cuDNN library') sequence_lengths = tf.to_int32(tf.reduce_sum(mask, axis=1)) for n, n_hidden in enumerate(n_hidden_list): with tf.variable_scope(cell_type.upper() + '_' + str(n)): if cell_type.lower() == 'lstm': units, _ = cudnn_bi_lstm(units, n_hidden, sequence_lengths) elif cell_type.lower() == 'gru': units, _ = cudnn_bi_gru(units, n_hidden, sequence_lengths) else: raise RuntimeError('Wrong cell type "{}"! Only "gru" and "lstm"!'.format(cell_type)) units = tf.concat(units, -1) if intra_layer_dropout and n != len(n_hidden_list) - 1: units = variational_dropout(units, self._dropout_ph) return units
def __init__(self, **kwargs): if not check_gpu_existence(): raise RuntimeError('SquadModel requires GPU') self.opt = deepcopy(kwargs) self.init_word_emb = self.opt['word_emb'] self.init_char_emb = self.opt['char_emb'] self.context_limit = self.opt['context_limit'] self.question_limit = self.opt['question_limit'] self.char_limit = self.opt['char_limit'] self.char_hidden_size = self.opt['char_hidden_size'] self.hidden_size = self.opt['encoder_hidden_size'] self.attention_hidden_size = self.opt['attention_hidden_size'] self.keep_prob = self.opt['keep_prob'] self.learning_rate = self.opt['learning_rate'] self.min_learning_rate = self.opt['min_learning_rate'] self.learning_rate_patience = self.opt['learning_rate_patience'] self.grad_clip = self.opt['grad_clip'] self.weight_decay = self.opt['weight_decay'] self.word_emb_dim = self.init_word_emb.shape[1] self.char_emb_dim = self.init_char_emb.shape[1] self.last_impatience = 0 self.lr_impatience = 0 self.sess_config = tf.ConfigProto(allow_soft_placement=True) self.sess_config.gpu_options.allow_growth = True self.sess = tf.Session(config=self.sess_config) self._init_graph() self._init_optimizer() self.sess.run(tf.global_variables_initializer()) super().__init__(**kwargs) # Try to load the model (if there are some model files the model will be loaded from them) if self.load_path is not None: self.load() if self.weight_decay < 1.0: self.sess.run(self.assign_vars)
def __init__(self, n_classes: int = 2, dropout_keep_prob: float = 0.5, return_probas: bool = False, **kwargs): """ Args: n_classes: number of classes for classification dropout_keep_prob: Probability of keeping the hidden state, values from 0 to 1. 0.5 works well in most cases. return_probas: whether to return confidences of the relation to be appropriate or not **kwargs: """ kwargs.setdefault('learning_rate_drop_div', 10.0) kwargs.setdefault('learning_rate_drop_patience', 5.0) kwargs.setdefault('clip_norm', 5.0) super().__init__(**kwargs) self.n_classes = n_classes self.dropout_keep_prob = dropout_keep_prob self.return_probas = return_probas config = tf.ConfigProto() config.gpu_options.allow_growth = True if check_gpu_existence(): self.GRU = CudnnGRU else: self.GRU = CudnnCompatibleGRU self.question_ph = tf.placeholder(tf.float32, [None, None, 300]) self.rel_emb_ph = tf.placeholder(tf.float32, [None, None, 300]) r_mask_2 = tf.cast(self.rel_emb_ph, tf.bool) r_len_2 = tf.reduce_sum(tf.cast(r_mask_2, tf.int32), axis=2) r_mask = tf.cast(r_len_2, tf.bool) r_len = tf.reduce_sum(tf.cast(r_mask, tf.int32), axis=1) rel_emb = tf.math.divide_no_nan(tf.reduce_sum(self.rel_emb_ph, axis=1), tf.cast(tf.expand_dims(r_len, axis=1), tf.float32)) self.y_ph = tf.placeholder(tf.int32, shape=(None,)) self.one_hot_labels = tf.one_hot(self.y_ph, depth=self.n_classes, dtype=tf.float32) self.keep_prob_ph = tf.placeholder_with_default(1.0, shape=[], name='keep_prob_ph') q_mask_2 = tf.cast(self.question_ph, tf.bool) q_len_2 = tf.reduce_sum(tf.cast(q_mask_2, tf.int32), axis=2) q_mask = tf.cast(q_len_2, tf.bool) q_len = tf.reduce_sum(tf.cast(q_mask, tf.int32), axis=1) question_dr = variational_dropout(self.question_ph, keep_prob=self.keep_prob_ph) b_size = tf.shape(self.question_ph)[0] with tf.variable_scope("question_encode"): rnn = self.GRU(num_layers=2, num_units=75, batch_size=b_size, input_size=300, keep_prob=self.keep_prob_ph) q = rnn(question_dr, seq_len=q_len) with tf.variable_scope("attention"): rel_emb_exp = tf.expand_dims(rel_emb, axis=1) dot_products = tf.reduce_sum(tf.multiply(q, rel_emb_exp), axis=2, keep_dims=False) s_mask = softmax_mask(dot_products, q_mask) att_weights = tf.expand_dims(tf.nn.softmax(s_mask), axis=2) self.s_r = tf.reduce_sum(tf.multiply(att_weights, q), axis=1) self.logits = tf.layers.dense(tf.multiply(self.s_r, rel_emb), 2, activation=None, use_bias=False) self.y_pred = tf.argmax(self.logits, axis=-1) loss_tensor = tf.nn.sigmoid_cross_entropy_with_logits(labels=self.one_hot_labels, logits=self.logits) self.loss = tf.reduce_mean(loss_tensor) self.train_op = self.get_train_op(self.loss) self.sess = tf.Session(config=config) self.sess.run(tf.global_variables_initializer()) self.load()
def __init__(self, embedder, tag_vocab, ner_vocab, pos_vocab, sess=None): # check gpu if not check_gpu_existence(): raise RuntimeError('Ontonotes NER model requires GPU with cuDNN!') n_hidden = (256, 256, 256) token_embeddings_dim = 100 n_tags = len(tag_vocab) # Create placeholders x_word = tf.placeholder(dtype=tf.float32, shape=[None, None, token_embeddings_dim], name='x_word') x_char = tf.placeholder(dtype=tf.int32, shape=[None, None, None], name='x_char') # Features x_pos = tf.placeholder(dtype=tf.float32, shape=[None, None, len(pos_vocab)], name='x_pos') # Senna x_ner = tf.placeholder(dtype=tf.float32, shape=[None, None, len(ner_vocab)], name='x_ner') # Senna x_capi = tf.placeholder(dtype=tf.float32, shape=[None, None], name='x_capi') y_true = tf.placeholder(dtype=tf.int32, shape=[None, None], name='y_tag') mask = tf.placeholder(dtype=tf.float32, shape=[None, None], name='mask') sequence_lengths = tf.reduce_sum(mask, axis=1) # Concat features to embeddings emb = tf.concat( [x_word, tf.expand_dims(x_capi, 2), x_pos, x_ner], axis=2) # The network units = emb for n, n_h in enumerate(n_hidden): with tf.variable_scope('RNN_' + str(n)): units, _ = cudnn_bi_lstm(units, n_h, tf.to_int32(sequence_lengths)) # Classifier with tf.variable_scope('Classifier'): units = tf.layers.dense(units, n_hidden[-1], kernel_initializer=xavier_initializer()) logits = tf.layers.dense(units, n_tags, kernel_initializer=xavier_initializer()) # CRF _, trainsition_params = tf.contrib.crf.crf_log_likelihood( logits, y_true, sequence_lengths) # Initialize session if sess is None: sess = tf.Session() self._ner_tagger = SennaNERTagger('download/senna/') self._pos_tagger = SennaChunkTagger('download/senna/') self._x_w = x_word self._x_c = x_char self._x_capi = x_capi self.x_pos = x_pos self.x_ner = x_ner self._y_true = y_true self._mask = mask self._sequence_lengths = sequence_lengths self._token_embeddings_dim = token_embeddings_dim self._pos_dict = pos_vocab self._ner_dict = ner_vocab self._tag_dict = tag_vocab self._logits = logits self._trainsition_params = trainsition_params self._sess = sess sess.run(tf.global_variables_initializer()) self._embedder = embedder