def model_fn(features, labels, mode, params): # For serving, features are a bit different #if isinstance(features, dict): # features = features['words'], features['nwords'] # Read vocabs and inputs dropout = args.dropout #input_ids = features["input_ids"] #mask = features["mask"] #segment_ids = features["segment_ids"] #label_ids = features["label_ids"] ##words, nwords = features #tf.print(' '.join(words[4]), output_stream=sys.stderr) training = (mode == tf.estimator.ModeKeys.TRAIN) #vocab_words = tf.contrib.lookup.index_table_from_file( # #args.vocab_words) # args.vocab_words, num_oov_buckets=args.num_oov_buckets) #with Path(args.vocab_tags).open() as f: # indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O'] # num_tags = len(indices) + 1 ##word_ids = vocab_words.lookup(words) import ipdb ipdb.set_trace() input_ids = features["input_ids"] mask = features["mask"] label_ids = None if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL: label_ids = features["label_ids"] if args.embedding == 'plain': embeddings = tf.cast(input_ids, tf.float32) elif args.embedding == 'word2id': # word2id with Path(args.vocab_words).open(encoding='utf-8') as f: vocab_words_1 = f.readlines() vocab_length = len(vocab_words_1) #input_ids = features["input_ids"] #label_ids = features["label_ids"] #mask = features["mask"] embeddings = embedding(input_ids, vocab_length, args) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) pass elif args.embedding == 'bert': from my_model.embeddings.embedding import get_bert_embedding #input_ids = features["input_ids"] #mask = features["mask"] segment_ids = features["segment_ids"] #label_ids = features["label_ids"] embeddings = get_bert_embedding(args.bert_config_file, training, input_ids, mask, segment_ids, use_one_hot_embeddings=False) else: # Word Embeddings # deafult #input_ids = features["input_ids"] #label_ids = features["label_ids"] #mask = features["mask"] glove = np.load(args.glove)['embeddings'] # np.array variable = np.vstack([glove, [[0.] * args.dim]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) embeddings = tf.nn.embedding_lookup(variable, input_ids) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) pass (total_loss, logits, predicts) = create_model(embeddings, label_ids, mask, mode, training, self.num_labels, use_one_hot_embeddings=False) tvars = tf.trainable_variables() initialized_variable_names = None scaffold_fn = None if args.init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, args.init_checkpoint) tf.train.init_from_checkpoint(args.init_checkpoint, assignment_map) self.logging.debug("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" self.logging.debug(" name = %s, shape = %s%s", var.name, var.shape, init_string) if mode == tf.estimator.ModeKeys.TRAIN: warmup_steps = args.warmup_steps step = tf.to_float(tf.train.get_global_step()) if args.learning_rate_decay == 'sqrt': lr_warmup = args.learning_rate_peak * tf.minimum( 1.0, step / warmup_steps) lr_decay = args.learning_rate_peak * tf.minimum( 1.0, tf.sqrt(warmup_steps / step)) lr = tf.where(step < warmup_steps, lr_warmup, lr_decay) elif args.learning_rate_decay == 'exp': lr = tf.train.exponential_decay( args.learning_rate_peak, global_step=step, decay_steps=args.decay_steps, decay_rate=args.decay_rate) elif args.learning_rate_decay == 'bert': num_train_steps = int(self.len_train_examples / args.batch_size * args.epochs) #num_warmup_steps = int(num_train_steps * args.warmup_steps) num_warmup_steps = int(num_train_steps * 0.1) train_op = optimization.create_optimizer( total_loss, args.learning_rate, num_train_steps, num_warmup_steps, use_tpu=False) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, #scaffold_fn=scaffold_fn ) return output_spec else: self.logging.info( 'learning rate decay strategy not supported') sys.exit() tf.print(lr) train_op = tf.train.AdamOptimizer(lr).minimize( total_loss, global_step=tf.train.get_or_create_global_step()) #return tf.estimator.EstimatorSpec( # mode, loss=loss, train_op=train_op) #output_spec = tf.contrib.tpu.TPUEstimatorSpec( output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, #scaffold_fn=scaffold_fn ) elif mode == tf.estimator.ModeKeys.EVAL: #def metric_fn(label_ids, logits,num_labels,mask): # predictions = tf.math.argmax(logits, axis=-1, output_type=tf.int32) # cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels-1, weights=mask) # return { # "confusion_matrix":cm # } # # #eval_metrics = (metric_fn, [label_ids, logits, self.num_labels, mask]) #output_spec = tf.contrib.tpu.TPUEstimatorSpec( # Metrics #weights = tf.sequence_mask(nwords) weights = mask #mask2len = tf.reduce_sum(mask,axis=1) #weights = tf.sequence_mask(mask2len) #pred_ids= tf.math.argmax(logits, axis=-1, output_type=tf.int32) pred_ids = tf.argmax(logits, axis=-1, output_type=tf.int32) num_label_ids = self.num_labels metrics = { 'mse': total_loss #'precision': tf.metrics.precision(label_ids, pred_ids, weights), #'recall': tf.metrics.recall(label_ids, pred_ids, weights), ##'f1': f1(label_ids, pred_ids, weights), #'precision': precision(label_ids, pred_ids, self.num_labels), #'recall': recall(label_ids, pred_ids, self.num_labels), #'f1': f1(label_ids, pred_ids, self.num_labels), } output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, #eval_metric_ops=metrics #scaffold_fn=scaffold_fn ) else: #output_spec = tf.contrib.tpu.TPUEstimatorSpec( output_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=predicts, #scaffold_fn=scaffold_fn ) return output_spec
def model_fn(features, labels, mode, params): # For serving, features are a bit different if isinstance(features, dict): features = features['words'], features['nwords'] # Read vocabs and inputs dropout = args.dropout words, nwords = features #tf.print(' '.join(words[4]), output_stream=sys.stderr) training = (mode == tf.estimator.ModeKeys.TRAIN) vocab_words = tf.contrib.lookup.index_table_from_file( #args.vocab_words) args.vocab_words, num_oov_buckets=args.num_oov_buckets) with Path(args.vocab_tags).open() as f: indices = [ idx for idx, tag in enumerate(f) if tag.strip() != 'O' ] num_tags = len(indices) + 1 word_ids = vocab_words.lookup(words) if args.embedding == 'word2id': # word2id with Path(args.vocab_words).open(encoding='utf-8') as f: vocab_words_1 = f.readlines() vocab_length = len(vocab_words_1) embeddings = embedding(word_ids, vocab_length, args) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) else: # Word Embeddings # deafult glove = np.load(args.glove)['embeddings'] # np.array variable = np.vstack([glove, [[0.] * args.dim]]) variable = tf.Variable(variable, dtype=tf.float32, trainable=False) embeddings = tf.nn.embedding_lookup(variable, word_ids) embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training) # LSTM t = tf.transpose(embeddings, perm=[1, 0, 2]) lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(args.lstm_size) lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(args.lstm_size) lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw) output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords) output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.transpose(output, perm=[1, 0, 2]) output = tf.layers.dropout(output, rate=dropout, training=training) # CRF logits = tf.layers.dense(output, num_tags) crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32) pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords) if mode == tf.estimator.ModeKeys.PREDICT: # Predictions reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file( args.vocab_tags) pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids)) predictions = {'pred_ids': pred_ids, 'tags': pred_strings} return tf.estimator.EstimatorSpec(mode, predictions=predictions) else: # Loss vocab_tags = tf.contrib.lookup.index_table_from_file( args.vocab_tags) tags = vocab_tags.lookup(labels) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( logits, tags, nwords, crf_params) loss = tf.reduce_mean(-log_likelihood) # Metrics weights = tf.sequence_mask(nwords) metrics = { 'acc': tf.metrics.accuracy(tags, pred_ids, weights), 'precision': precision(tags, pred_ids, num_tags, indices, weights), 'recall': recall(tags, pred_ids, num_tags, indices, weights), 'f1': f1(tags, pred_ids, num_tags, indices, weights), } for metric_name, op in metrics.items(): tf.summary.scalar(metric_name, op[1]) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics) elif mode == tf.estimator.ModeKeys.TRAIN: warmup_steps = args.warmup_steps step = tf.to_float(tf.train.get_global_step()) if args.learning_rate_decay == 'sqrt': lr_warmup = args.learning_rate_peak * tf.minimum( 1.0, step / warmup_steps) lr_decay = args.learning_rate_peak * tf.minimum( 1.0, tf.sqrt(warmup_steps / step)) lr = tf.where(step < warmup_steps, lr_warmup, lr_decay) elif args.learning_rate_decay == 'exp': lr = tf.train.exponential_decay( args.learning_rate_peak, global_step=step, decay_steps=args.decay_steps, decay_rate=args.decay_rate) else: self.logging.info( 'learning rate decay strategy not supported') sys.exit() tf.print(lr) train_op = tf.train.AdamOptimizer(lr).minimize( loss, global_step=tf.train.get_or_create_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)