def get_regression_loss( FLAGS, features, is_training): """Loss for downstream regression tasks.""" bsz_per_core = tf.shape(features["input_ids"])[0] inp = tf.transpose(features["input_ids"], [1, 0]) seg_id = tf.transpose(features["segment_ids"], [1, 0]) inp_mask = tf.transpose(features["input_mask"], [1, 0]) label = tf.reshape(features["label_ids"], [bsz_per_core]) xlnet_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path) run_config = xlnet.create_run_config(is_training, True, FLAGS) xlnet_model = xlnet.XLNetModel( xlnet_config=xlnet_config, run_config=run_config, input_ids=inp, seg_ids=seg_id, input_mask=inp_mask) summary = xlnet_model.get_pooled_out( FLAGS.summary_type, FLAGS.use_summ_proj) with tf.variable_scope("model", reuse=tf.AUTO_REUSE): per_example_loss, logits = modeling.regression_loss( hidden=summary, labels=label, initializer=xlnet_model.get_initializer(), scope="regression_{}".format(FLAGS.task_name.lower()), return_logits=True) total_loss = tf.reduce_mean(per_example_loss) return total_loss, per_example_loss, logits
def __init__(self, flags, input_ids, seg_ids, input_mask): xlnet_config = xln.XLNetConfig(json_path=flags.model_config_path) run_config = xln.create_run_config(is_training=True, is_finetune=True, FLAGS=flags) self.model = xln.XLNetModel(xlnet_config=xlnet_config, run_config=run_config, input_ids=input_ids, seg_ids=seg_ids, input_mask=input_mask)
def load_model(self, model: str, model_path: str): model_path = os.path.join(model_path, next(os.walk(model_path))[1][0]) self.xlnet_config = xlnet.XLNetConfig( json_path=os.path.join(model_path, Embeddings.mode_config_path)) self.run_config = xlnet.create_run_config(is_training=True, is_finetune=True, FLAGS=Flags) self.load_tokenizer(model_path) self.model = model print("Model loaded Successfully !")
def create_model(cf, input_ids, input_mask, segment_ids, labels, is_training=True): ''' 构建模型 :param cf: :param input_ids: :param input_mask: :param segment_ids: :param labels: :param is_training: :return: ''' bsz_per_core = tf.shape(input_ids)[0] inp = tf.transpose(input_ids, [1, 0]) seg_id = tf.transpose(segment_ids, [1, 0]) inp_mask = tf.transpose(input_mask, [1, 0]) label = tf.reshape(labels, [bsz_per_core]) xlnet_config = xlnet.XLNetConfig(json_path=cf.model_config_path) run_config = xlnet.create_run_config(is_training, True, cf) xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config, run_config=run_config, input_ids=inp, seg_ids=seg_id, input_mask=inp_mask) summary = xlnet_model.get_pooled_out(cf.summary_type, cf.use_summ_proj) with tf.variable_scope("model", reuse=tf.AUTO_REUSE): if cf.cls_scope is not None and cf.cls_scope: cls_scope = "classification_{}".format(cf.cls_scope) else: cls_scope = "classification_{}".format(cf.task_name.lower()) per_example_loss, logits = modeling.classification_loss( hidden=summary, labels=label, n_class=cf.num_labels, initializer=xlnet_model.get_initializer(), scope=cls_scope, return_logits=True) total_loss = tf.reduce_mean(per_example_loss) return total_loss, per_example_loss, logits
def main(_): tf.logging.set_verbosity(tf.logging.INFO) tpu_config = model_utils.configure_tpu(FLAGS) model_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path) run_config = xlnet.create_run_config(False, True, FLAGS) model_builder = XLNetModelBuilder( default_model_config=model_config, default_run_config=run_config, default_init_checkpoint=FLAGS.init_checkpoint, use_tpu=FLAGS.use_tpu) model_fn = model_builder.get_model_fn(model_config, run_config, FLAGS.init_checkpoint, FLAGS.model_type) # If TPU is not available, this will fall back to normal Estimator on CPU or GPU. estimator = tf.contrib.tpu.TPUEstimator(use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=tpu_config, export_to_tpu=FLAGS.use_tpu, train_batch_size=1) tokenizer = XLNetTokenizer(sp_model_file=FLAGS.spiece_model_file, lower_case=FLAGS.lower_case) example_converter = XLNetExampleConverter( label_list=[], max_seq_length=FLAGS.max_seq_length, tokenizer=tokenizer) features = example_converter.convert_examples_to_features( [PaddingInputExample()]) input_fn = XLNetInputBuilder.get_input_builder(features, FLAGS.max_seq_length, True, False) estimator.train(input_fn, max_steps=1) tf.gfile.MakeDirs(FLAGS.export_dir) serving_input_fn = XLNetInputBuilder.get_serving_input_fn( FLAGS.max_seq_length) estimator.export_savedmodel(FLAGS.export_dir, serving_input_fn, as_text=False)
def __init__(self, model_config_path, is_training, FLAGS, input_ids, segment_ids, input_mask, label, n_class): ''' :param model_config_path: :param is_training: :param FLAGS: :param input_ids: :param segment_ids: :param input_mask: :param label: :param n_class: ''' self.xlnet_config = xlnet.XLNetConfig(json_path=model_config_path) self.run_config = xlnet.create_run_config(is_training, True, FLAGS) self.input_ids = tf.transpose(input_ids, [1, 0]) self.segment_ids = tf.transpose(segment_ids, [1, 0]) self.input_mask = tf.transpose(input_mask, [1, 0]) self.model = xlnet.XLNetModel(xlnet_config=self.xlnet_config, run_config=self.run_config, input_ids=self.input_ids, seg_ids=self.segment_ids, input_mask=self.input_mask) cls_scope = FLAGS.cls_scope summary = self.model.get_pooled_out(FLAGS.summary_type, FLAGS.use_summ_proj) self.per_example_loss, self.logits = modeling.classification_loss( hidden=summary, labels=label, n_class=n_class, initializer=self.model.get_initializer(), scope=cls_scope, return_logits=True) self.total_loss = tf.reduce_mean(self.per_example_loss) with tf.name_scope("train_op"): self.train_op, _, _ = model_utils.get_train_op( FLAGS, self.total_loss) with tf.name_scope("acc"): one_hot_target = tf.one_hot(label, n_class) self.acc = self.accuracy(self.logits, one_hot_target)
def __init__(self, config, FLAGS): self.config = config self.max_segment_len = config['max_segment_len'] self.max_span_width = config["max_span_width"] self.genres = {g: i for i, g in enumerate(config["genres"])} self.subtoken_maps = {} self.gold = {} self.eval_data = None # Load eval data lazily. self.FLAGS = FLAGS self.xlnet_config = xlnet.XLNetConfig( json_path=FLAGS.xlnet_config_file) print("################ spiece_model_file #############") print(FLAGS.spiece_model_file) self.tokenizer = spm.SentencePieceProcessor() self.tokenizer.load(FLAGS.spiece_model_file) input_props = [] input_props.append((tf.int32, [None, None])) # input_ids. input_props.append((tf.int32, [None, None])) # seg_ids. input_props.append((tf.float32, [None, None])) # input_mask input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.int32, [None, None])) # Speaker IDs. input_props.append((tf.int32, [])) # Genre. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold starts. input_props.append((tf.int32, [None])) # Gold ends. input_props.append((tf.int32, [None])) # Cluster ids. input_props.append((tf.int32, [None])) # Sentence Map self.queue_input_tensors = [ tf.placeholder(dtype, shape) for dtype, shape in input_props ] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.predictions, self.loss = self.get_predictions_and_loss( *self.input_tensors) ##### xlnet ###### scaffold_fn = model_utils.init_from_checkpoint(FLAGS) self.train_op, learning_rate, _ = model_utils.get_train_op( FLAGS, self.loss) self.global_step = tf.train.get_or_create_global_step()
def get_classification_loss(options, features, n_class, is_training): """Loss for downstream classification tasks.""" bsz_per_core = tf.shape(features["input_ids"])[0] inp = tf.transpose(features["input_ids"], [1, 0]) seg_id = tf.transpose(features["segment_ids"], [1, 0]) inp_mask = tf.transpose(features["input_mask"], [1, 0]) label = tf.reshape(features["label_ids"], [bsz_per_core]) xlnet_config = xlnet.XLNetConfig(json_path=options['model_config_file']) run_config = xlnet.create_run_config(is_training, True, options) xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config, run_config=run_config, input_ids=inp, seg_ids=seg_id, input_mask=inp_mask) summary = xlnet_model.get_pooled_out(options['summary_type'], options['use_summ_proj']) with tf.variable_scope("model", reuse=tf.AUTO_REUSE): if options['cls_scope'] is not None and options['cls_scope']: cls_scope = "classification_{}".format(options['cls_scope']) else: cls_scope = "classification_{}".format( options['task_name'].lower()) per_example_loss, logits = modeling.classification_loss( hidden=summary, labels=label, n_class=n_class, initializer=xlnet_model.get_initializer(), scope=cls_scope, return_logits=True) total_loss = tf.reduce_mean(per_example_loss) return total_loss, per_example_loss, logits
def get_race_loss(FLAGS, features, is_training): """Loss for downstream multi-choice QA tasks such as RACE.""" bsz_per_core = tf.shape(features["input_ids"])[0] def _transform_features(feature): out = tf.reshape(feature, [bsz_per_core, 4, -1]) out = tf.transpose(out, [2, 0, 1]) out = tf.reshape(out, [-1, bsz_per_core * 4]) return out inp = _transform_features(features["input_ids"]) seg_id = _transform_features(features["segment_ids"]) inp_mask = _transform_features(features["input_mask"]) label = tf.reshape(features["label_ids"], [bsz_per_core]) xlnet_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path) run_config = xlnet.create_run_config(is_training, True, FLAGS) xlnet_model = xlnet.XLNetModel( xlnet_config=xlnet_config, run_config=run_config, input_ids=inp, seg_ids=seg_id, input_mask=inp_mask) summary = xlnet_model.get_pooled_out( FLAGS.summary_type, FLAGS.use_summ_proj) with tf.variable_scope("logits"): logits = tf.layers.dense( summary, 1, kernel_initializer=xlnet_model.get_initializer()) logits = tf.reshape(logits, [bsz_per_core, 4]) one_hot_target = tf.one_hot(label, 4) per_example_loss = -tf.reduce_sum( tf.nn.log_softmax(logits) * one_hot_target, -1) total_loss = tf.reduce_mean(per_example_loss) return total_loss, per_example_loss, logits
def main(_): tf.logging.set_verbosity(tf.logging.INFO) np.random.seed(FLAGS.random_seed) processor = NerProcessor(data_dir=FLAGS.data_dir, input_file=FLAGS.input_file, task_name=FLAGS.task_name.lower()) label_list = processor.get_labels() tf.logging.info(label_list) tpu_config = model_utils.configure_tpu(FLAGS) model_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path) run_config = xlnet.create_run_config(False, True, FLAGS) model_builder = XLNetModelBuilder( default_model_config=model_config, default_run_config=run_config, default_init_checkpoint=FLAGS.init_checkpoint, use_tpu=FLAGS.use_tpu) model_fn = model_builder.get_model_fn(model_config, run_config, FLAGS.init_checkpoint, label_list) # If TPU is not available, this will fall back to normal Estimator on CPU or GPU. estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=tpu_config, export_to_tpu=FLAGS.use_tpu, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) tokenizer = XLNetTokenizer(sp_model_file=FLAGS.spiece_model_file, lower_case=FLAGS.lower_case) example_converter = XLNetExampleConverter( label_list=label_list, max_seq_length=FLAGS.max_seq_length, tokenizer=tokenizer) if FLAGS.do_train: train_examples = processor.get_chem_examples() tf.logging.info("***** Run training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", FLAGS.train_steps) train_features = example_converter.convert_examples_to_features( train_examples) train_input_fn = XLNetInputBuilder.get_input_builder( train_features, FLAGS.max_seq_length, True, True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) if FLAGS.do_eval: eval_examples = processor.get_dev_examples() tf.logging.info("***** Run evaluation *****") tf.logging.info(" Num examples = %d", len(eval_examples)) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) eval_features = example_converter.convert_examples_to_features( eval_examples) eval_input_fn = XLNetInputBuilder.get_input_builder( eval_features, FLAGS.max_seq_length, False, False) result = estimator.evaluate(input_fn=eval_input_fn) precision = result["precision"] recall = result["recall"] f1_score = 2.0 * precision * recall / (precision + recall) tf.logging.info("***** Evaluation result *****") tf.logging.info(" Precision (token-level) = %s", str(precision)) tf.logging.info(" Recall (token-level) = %s", str(recall)) tf.logging.info(" F1 score (token-level) = %s", str(f1_score)) if FLAGS.do_predict: predict_examples = processor.get_test_examples() pmids = [e.guid for e in predict_examples] tokens = [e.guid for e in predict_examples] tf.logging.info("***** Run prediction *****") tf.logging.info(" Num examples = %d", len(predict_examples)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_features = example_converter.convert_examples_to_features( predict_examples) predict_input_fn = XLNetInputBuilder.get_input_builder( predict_features, FLAGS.max_seq_length, False, False) result = estimator.predict(input_fn=predict_input_fn) predict_recorder = XLNetPredictRecorder( output_dir=FLAGS.output_dir, label_list=label_list, guids=pmids, max_seq_length=FLAGS.max_seq_length, tokenizer=tokenizer, predict_tag=FLAGS.predict_tag) predicts = [{ "input_ids": feature.input_ids, "input_masks": feature.input_masks, "label_ids": feature.label_ids, "predict_ids": predict["predict"].tolist() } for feature, predict in zip(predict_features, result)] predict_recorder.record(predicts) if FLAGS.do_export: tf.logging.info("***** Running exporting *****") tf.gfile.MakeDirs(FLAGS.export_dir) serving_input_fn = XLNetInputBuilder.get_serving_input_fn( FLAGS.max_seq_length) estimator.export_savedmodel(FLAGS.export_dir, serving_input_fn, as_text=False)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) np.random.seed(FLAGS.random_seed) processor = ClassificationProcessor(data_dir=FLAGS.data_dir, task_name=FLAGS.task_name.lower()) sent_label_list = processor.get_sent_labels() model_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path) model_builder = XLNetModelBuilder(model_config=model_config, use_tpu=FLAGS.use_tpu) model_fn = model_builder.get_model_fn(sent_label_list) # If TPU is not available, this will fall back to normal Estimator on CPU or GPU. tpu_config = model_utils.configure_tpu(FLAGS) estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=tpu_config, export_to_tpu=FLAGS.use_tpu, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) tokenizer = XLNetTokenizer(sp_model_file=FLAGS.spiece_model_file, lower_case=FLAGS.lower_case) example_converter = XLNetExampleConverter( sent_label_list=sent_label_list, max_seq_length=FLAGS.max_seq_length, tokenizer=tokenizer) if FLAGS.do_train: train_examples = processor.get_train_examples() tf.logging.info("***** Run training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", FLAGS.train_steps) train_features = example_converter.convert_examples_to_features( train_examples) train_input_fn = XLNetInputBuilder.get_input_builder( train_features, FLAGS.max_seq_length, True, True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) if FLAGS.do_eval: eval_examples = processor.get_dev_examples() tf.logging.info("***** Run evaluation *****") tf.logging.info(" Num examples = %d", len(eval_examples)) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) eval_features = example_converter.convert_examples_to_features( eval_examples) eval_input_fn = XLNetInputBuilder.get_input_builder( eval_features, FLAGS.max_seq_length, False, False) result = estimator.evaluate(input_fn=eval_input_fn) sent_accuracy = result["sent_accuracy"] tf.logging.info("***** Evaluation result *****") tf.logging.info(" Accuracy (sent-level) = %s", str(sent_accuracy)) if FLAGS.do_predict: predict_examples = processor.get_test_examples() tf.logging.info("***** Run prediction *****") tf.logging.info(" Num examples = %d", len(predict_examples)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_features = example_converter.convert_examples_to_features( predict_examples) predict_input_fn = XLNetInputBuilder.get_input_builder( predict_features, FLAGS.max_seq_length, False, False) result = estimator.predict(input_fn=predict_input_fn) predict_recorder = XLNetPredictRecorder( output_dir=FLAGS.output_dir, sent_label_list=sent_label_list, max_seq_length=FLAGS.max_seq_length, tokenizer=tokenizer, predict_tag=FLAGS.predict_tag) predicts = [{ "input_ids": feature.input_ids, "input_masks": feature.input_masks, "sent_label_id": feature.sent_label_id, "sent_predict_id": predict["sent_predict_id"], "sent_predict_score": predict["sent_predict_score"], "sent_predict_prob": predict["sent_predict_prob"].tolist() } for feature, predict in zip(predict_features, result)] predict_recorder.record(predicts) if FLAGS.do_export: tf.logging.info("***** Running exporting *****") tf.gfile.MakeDirs(FLAGS.export_dir) serving_input_fn = XLNetInputBuilder.get_serving_input_fn( FLAGS.max_seq_length) estimator.export_savedmodel(FLAGS.export_dir, serving_input_fn, as_text=False)
from xlnet import xlnet from absl.flags import FLAGS # some code omitted here... # initialize FLAGS # initialize instances of tf.Tensor, including input_ids, seg_ids, and input_mask # XLNetConfig contains hyperparameters that are specific to a model checkpoint. xlnet_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path) # RunConfig contains hyperparameters that could be different between pretraining and finetuning. run_config = xlnet.create_run_config(is_training=True, is_finetune=True, FLAGS=FLAGS) # Construct an XLNet model xlnet_model = xlnet.XLNetModel( xlnet_config=xlnet_config, run_config=run_config, input_ids=input_ids, seg_ids=seg_ids, input_mask=input_mask) # Get a summary of the sequence using the last hidden state summary = xlnet_model.get_pooled_out(summary_type="last") # Get a sequence output seq_out = xlnet_model.get_sequence_output() # build your applications based on `summary` or `seq_out`
def main(_): tf.logging.set_verbosity(tf.logging.INFO) layer_indexes = [int(x) for x in FLAGS.layers.split(",")] bert_config = modeling.XLNetConfig(json_path=FLAGS.bert_config_file) tokenizer = tokenization.FullTokenizer( spm_model_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 run_config =configure_tpu(FLAGS) # tf.contrib.tpu.RunConfig( # master=FLAGS.master, # tpu_config=tf.contrib.tpu.TPUConfig( # num_shards=FLAGS.num_tpu_cores, # per_host_input_for_training=is_per_host)) # examples = read_examples(FLAGS.input_file) json_examples = [] for x in ['test', 'train', 'dev']: with open(os.path.join(FLAGS.input_file, x + '.english.jsonlines')) as f: json_examples.extend((json.loads(jsonline) for jsonline in f.readlines())) orig_examples = [] bert_examples = [] for i, json_e in enumerate(json_examples): e = process_example(json_e, i, should_filter_embedded_mentions=True) orig_examples.append(e) bert_examples.append(e.bertify(tokenizer)) model_fn = model_fn_builder( bert_config=bert_config, run_config=run_config, init_checkpoint=FLAGS.init_checkpoint, layer_indexes=layer_indexes, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_one_hot_embeddings) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, predict_batch_size=FLAGS.batch_size) input_fn = input_fn_builder( examples=bert_examples, window_size=FLAGS.window_size, stride=FLAGS.stride, tokenizer=tokenizer) writer = h5py.File(FLAGS.output_file, 'w') with tqdm(total=sum(len(e.tokens) for e in orig_examples)) as t: for result in estimator.predict(input_fn, yield_single_examples=True): document_index = int(result["unique_ids"]) bert_example = bert_examples[document_index] orig_example = orig_examples[document_index] file_key = bert_example.doc_key.replace('/', ':') t.update(n=(result['extract_indices'] >= 0).sum()) for output_index, bert_token_index in enumerate(result['extract_indices']): if bert_token_index < 0: continue token_index = bert_example.bert_to_orig_map[bert_token_index] sentence_index, token_index = orig_example.unravel_token_index(token_index) dataset_key ="{}/{}".format(file_key, sentence_index) if dataset_key not in writer: writer.create_dataset(dataset_key, (len(orig_example.sentence_tokens[sentence_index]), bert_config.hidden_size, len(layer_indexes)), dtype=np.float32) dset = writer[dataset_key] for j, layer_index in enumerate(layer_indexes): layer_output = result["layer_output_%d" % j] dset[token_index, :, j] = layer_output[output_index] writer.close()
def two_stream_loss(FLAGS, features, labels, mems, is_training): """Pretraining loss with two-stream attention Transformer-XL.""" # Unpack input mem_name = "mems" mems = mems.get(mem_name, None) inp_k = tf.transpose(features["input_k"], [1, 0]) inp_q = tf.transpose(features["input_q"], [1, 0]) seg_id = tf.transpose(features["seg_id"], [1, 0]) inp_mask = None perm_mask = tf.transpose(features["perm_mask"], [1, 2, 0]) if FLAGS.num_predict is not None: # [num_predict x tgt_len x bsz] target_mapping = tf.transpose(features["target_mapping"], [1, 2, 0]) else: target_mapping = None # target for LM loss tgt = tf.transpose(features["target"], [1, 0]) # target mask for LM loss tgt_mask = tf.transpose(features["target_mask"], [1, 0]) # construct xlnet config and save to model_dir xlnet_config = xlnet.XLNetConfig(FLAGS=FLAGS) xlnet_config.to_json(os.path.join(FLAGS.model_dir, "config.json")) # construct run config from FLAGS run_config = xlnet.create_run_config(is_training, False, FLAGS) xlnet_model = xlnet.XLNetModel( xlnet_config=xlnet_config, run_config=run_config, input_ids=inp_k, seg_ids=seg_id, input_mask=inp_mask, mems=mems, perm_mask=perm_mask, target_mapping=target_mapping, inp_q=inp_q) output = xlnet_model.get_sequence_output() new_mems = {mem_name: xlnet_model.get_new_memory()} lookup_table = xlnet_model.get_embedding_table() initializer = xlnet_model.get_initializer() with tf.variable_scope("model", reuse=tf.AUTO_REUSE): # LM loss lm_loss = modeling.lm_loss( hidden=output, target=tgt, n_token=xlnet_config.n_token, d_model=xlnet_config.d_model, initializer=initializer, lookup_table=lookup_table, tie_weight=True, bi_data=run_config.bi_data, use_tpu=run_config.use_tpu) # Quantity to monitor monitor_dict = {} if FLAGS.use_bfloat16: tgt_mask = tf.cast(tgt_mask, tf.float32) lm_loss = tf.cast(lm_loss, tf.float32) total_loss = tf.reduce_sum(lm_loss * tgt_mask) / tf.reduce_sum(tgt_mask) monitor_dict["total_loss"] = total_loss return total_loss, new_mems, monitor_dict
def get_qa_outputs(FLAGS, features, is_training): """Loss for downstream span-extraction QA tasks such as SQuAD.""" inp = tf.transpose(features["input_ids"], [1, 0]) seg_id = tf.transpose(features["segment_ids"], [1, 0]) inp_mask = tf.transpose(features["input_mask"], [1, 0]) cls_index = tf.reshape(features["cls_index"], [-1]) seq_len = tf.shape(inp)[0] xlnet_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path) run_config = xlnet.create_run_config(is_training, True, FLAGS) xlnet_model = xlnet.XLNetModel( xlnet_config=xlnet_config, run_config=run_config, input_ids=inp, seg_ids=seg_id, input_mask=inp_mask) output = xlnet_model.get_sequence_output() initializer = xlnet_model.get_initializer() return_dict = {} # invalid position mask such as query and special symbols (PAD, SEP, CLS) p_mask = features["p_mask"] # logit of the start position with tf.variable_scope("start_logits"): start_logits = tf.layers.dense( output, 1, kernel_initializer=initializer) start_logits = tf.transpose(tf.squeeze(start_logits, -1), [1, 0]) start_logits_masked = start_logits * (1 - p_mask) - 1e30 * p_mask start_log_probs = tf.nn.log_softmax(start_logits_masked, -1) # logit of the end position with tf.variable_scope("end_logits"): if is_training: # during training, compute the end logits based on the # ground truth of the start position start_positions = tf.reshape(features["start_positions"], [-1]) start_index = tf.one_hot(start_positions, depth=seq_len, axis=-1, dtype=tf.float32) start_features = tf.einsum("lbh,bl->bh", output, start_index) start_features = tf.tile(start_features[None], [seq_len, 1, 1]) end_logits = tf.layers.dense( tf.concat([output, start_features], axis=-1), xlnet_config.d_model, kernel_initializer=initializer, activation=tf.tanh, name="dense_0") end_logits = tf.contrib.layers.layer_norm( end_logits, begin_norm_axis=-1) end_logits = tf.layers.dense( end_logits, 1, kernel_initializer=initializer, name="dense_1") end_logits = tf.transpose(tf.squeeze(end_logits, -1), [1, 0]) end_logits_masked = end_logits * (1 - p_mask) - 1e30 * p_mask end_log_probs = tf.nn.log_softmax(end_logits_masked, -1) else: # during inference, compute the end logits based on beam search start_top_log_probs, start_top_index = tf.nn.top_k( start_log_probs, k=FLAGS.start_n_top) start_index = tf.one_hot(start_top_index, depth=seq_len, axis=-1, dtype=tf.float32) start_features = tf.einsum("lbh,bkl->bkh", output, start_index) end_input = tf.tile(output[:, :, None], [1, 1, FLAGS.start_n_top, 1]) start_features = tf.tile(start_features[None], [seq_len, 1, 1, 1]) end_input = tf.concat([end_input, start_features], axis=-1) end_logits = tf.layers.dense( end_input, xlnet_config.d_model, kernel_initializer=initializer, activation=tf.tanh, name="dense_0") end_logits = tf.contrib.layers.layer_norm(end_logits, begin_norm_axis=-1) end_logits = tf.layers.dense( end_logits, 1, kernel_initializer=initializer, name="dense_1") end_logits = tf.reshape( end_logits, [ seq_len, -1, FLAGS.start_n_top]) end_logits = tf.transpose(end_logits, [1, 2, 0]) end_logits_masked = end_logits * ( 1 - p_mask[:, None]) - 1e30 * p_mask[:, None] end_log_probs = tf.nn.log_softmax(end_logits_masked, -1) end_top_log_probs, end_top_index = tf.nn.top_k( end_log_probs, k=FLAGS.end_n_top) end_top_log_probs = tf.reshape( end_top_log_probs, [-1, FLAGS.start_n_top * FLAGS.end_n_top]) end_top_index = tf.reshape( end_top_index, [-1, FLAGS.start_n_top * FLAGS.end_n_top]) if is_training: return_dict["start_log_probs"] = start_log_probs return_dict["end_log_probs"] = end_log_probs else: return_dict["start_top_log_probs"] = start_top_log_probs return_dict["start_top_index"] = start_top_index return_dict["end_top_log_probs"] = end_top_log_probs return_dict["end_top_index"] = end_top_index # an additional layer to predict answerability with tf.variable_scope("answer_class"): # get the representation of CLS cls_index = tf.one_hot(cls_index, seq_len, axis=-1, dtype=tf.float32) cls_feature = tf.einsum("lbh,bl->bh", output, cls_index) # get the representation of START start_p = tf.nn.softmax(start_logits_masked, axis=-1, name="softmax_start") start_feature = tf.einsum("lbh,bl->bh", output, start_p) # note(zhiliny): no dependency on end_feature so that we can obtain # one single `cls_logits` for each sample ans_feature = tf.concat([start_feature, cls_feature], -1) ans_feature = tf.layers.dense( ans_feature, xlnet_config.d_model, activation=tf.tanh, kernel_initializer=initializer, name="dense_0") ans_feature = tf.layers.dropout(ans_feature, FLAGS.dropout, training=is_training) cls_logits = tf.layers.dense( ans_feature, 1, kernel_initializer=initializer, name="dense_1", use_bias=False) cls_logits = tf.squeeze(cls_logits, -1) return_dict["cls_logits"] = cls_logits return return_dict
def get_uda_classification_loss(options, features, n_class, is_training, global_step, input_ids, input_mask, segment_ids, labels): """Loss for downstream classification tasks.""" tsa = options['tsa'] unsup_ratio = options['unsup_ratio'] num_train_steps = options['num_train_steps'] uda_softmax_temp = options['uda_softmax_temp'] uda_confidence_thresh = options['uda_confidence_thresh'] inp = tf.transpose(input_ids, [1, 0]) seg_id = tf.transpose(segment_ids, [1, 0]) inp_mask = tf.transpose(input_mask, [1, 0]) num_sample = input_ids.shape[0].value if is_training: assert num_sample % (1 + 2 * unsup_ratio) == 0 sup_batch_size = num_sample // (1 + 2 * unsup_ratio) unsup_batch_size = sup_batch_size * unsup_ratio bsz_per_core = tf.shape(input_ids)[0] // (1 + 2 * unsup_ratio) else: sup_batch_size = num_sample unsup_batch_size = 0 bsz_per_core = tf.shape(input_ids)[0] labels = tf.reshape(labels, [bsz_per_core]) xlnet_config = xlnet.XLNetConfig(json_path=options['model_config_file']) run_config = xlnet.create_run_config(is_training, True, options) xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config, run_config=run_config, input_ids=inp, seg_ids=seg_id, input_mask=inp_mask) summary = xlnet_model.get_pooled_out(options['summary_type'], options['use_summ_proj']) if options['cls_scope'] is not None and options['cls_scope']: cls_scope = "classification_{}".format(options['cls_scope']) else: cls_scope = "classification_{}".format(options['task_name'].lower()) clas_logits = modeling.uda_logits( hidden=summary, labels=labels, n_class=n_class, initializer=xlnet_model.get_initializer(), scope=cls_scope) log_probs = tf.nn.log_softmax(clas_logits, axis=-1) correct_label_probs = None with tf.variable_scope("sup_loss"): sup_log_probs = log_probs[:sup_batch_size] one_hot_labels = tf.one_hot(labels, depth=n_class, dtype=tf.float32) tgt_label_prob = one_hot_labels per_example_loss = -tf.reduce_sum(tgt_label_prob * sup_log_probs, axis=-1) loss_mask = tf.ones_like(per_example_loss, dtype=per_example_loss.dtype) correct_label_probs = tf.reduce_sum(one_hot_labels * tf.exp(sup_log_probs), axis=-1) if tsa: tf.logging.info("Applying TSA") # Starting threshold is just the inverse number of labels. tsa_start = 1. / n_class tsa_threshold = model_utils.get_tsa_threshold(tsa, global_step, num_train_steps, tsa_start, end=1) larger_than_threshold = tf.greater(correct_label_probs, tsa_threshold) loss_mask = loss_mask * ( 1 - tf.cast(larger_than_threshold, tf.float32)) else: tsa_threshold = 1 loss_mask = tf.stop_gradient(loss_mask) per_example_loss = per_example_loss * loss_mask sup_loss = (tf.reduce_sum(per_example_loss) / tf.maximum(tf.reduce_sum(loss_mask), 1)) unsup_loss_mask = None if is_training and unsup_ratio > 0: with tf.variable_scope("unsup_loss"): ori_start = sup_batch_size ori_end = ori_start + unsup_batch_size aug_start = sup_batch_size + unsup_batch_size aug_end = aug_start + unsup_batch_size ori_log_probs = log_probs[ori_start:ori_end] aug_log_probs = log_probs[aug_start:aug_end] unsup_loss_mask = 1 if options['uda_softmax_temp'] != -1: tgt_ori_log_probs = tf.nn.log_softmax( clas_logits[ori_start:ori_end] / options['uda_softmax_temp'], axis=-1) tgt_ori_log_probs = tf.stop_gradient(tgt_ori_log_probs) else: tgt_ori_log_probs = tf.stop_gradient(ori_log_probs) if options['uda_confidence_thresh'] != -1: largest_prob = tf.reduce_max(tf.exp(ori_log_probs), axis=-1) unsup_loss_mask = tf.cast( tf.greater(largest_prob, options['uda_confidence_thresh']), tf.float32) unsup_loss_mask = tf.stop_gradient(unsup_loss_mask) per_example_kl_loss = model_utils.kl_for_log_probs( tgt_ori_log_probs, aug_log_probs) * unsup_loss_mask unsup_loss = tf.reduce_mean(per_example_kl_loss) else: unsup_loss = 0. return (sup_loss, unsup_loss, clas_logits[:sup_batch_size], per_example_loss, loss_mask, tsa_threshold, unsup_loss_mask, correct_label_probs)
def main(): tf.logging.set_verbosity(tf.logging.INFO) np.random.seed(cf.random_seed) processor = NerProcessor( data_dir=cf.train_data, task_name=cf.task_name.lower()) # label_list = processor.get_labels() label_list = processor.labels model_config = xlnet.XLNetConfig(json_path=cf.model_config_path) model_builder = XLNetModelBuilder( model_config=model_config, use_tpu=cf.use_tpu) model_fn = model_builder.get_model_fn(label_list) # If TPU is not available, this will fall back to normal Estimator on CPU or GPU. tpu_config = model_utils.configure_tpu(cf) estimator = tf.contrib.tpu.TPUEstimator( use_tpu=cf.use_tpu, model_fn=model_fn, config=tpu_config, export_to_tpu=cf.use_tpu, train_batch_size=cf.train_batch_size, eval_batch_size=cf.eval_batch_size, predict_batch_size=cf.predict_batch_size) tokenizer = XLNetTokenizer( sp_model_file=cf.spiece_model_file, lower_case=cf.lower_case) example_converter = XLNetExampleConverter( label_list=label_list, max_seq_length=cf.max_seq_length, tokenizer=tokenizer) if cf.do_train and cf.do_eval: # 开始训练 train_file = os.path.join(cf.output_dir, "train.tf_record") tf.logging.info("Use tfrecord samples: {}".format(len(train_file))) train_examples = processor.get_train_examples() # train data np.random.shuffle(train_examples) example_converter.file_based_convert_examples_to_features(train_examples, train_file) train_steps = int(len(train_examples) * cf.num_train_epochs / cf.train_batch_size) cf.warmup_steps = int(0.1 * train_steps) tf.logging.info("***** Run training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", cf.train_batch_size) tf.logging.info(" Num steps = %d", cf.train_steps) # train_features = example_converter.convert_examples_to_features(train_examples) # if not os.path.exists(train_file): # train_features = example_converter.file_based_convert_examples_to_features(train_examples, train_file) # 读取TF_record数据 # train_input_fn = XLNetInputBuilder.get_input_builder(train_features, cf.max_seq_length, True, True) train_input_fn = XLNetInputBuilder.get_file_based_input_fn( input_file= train_file, seq_length=cf.max_seq_length, is_training=True, drop_remainder=True ) estimator.train(input_fn=train_input_fn, max_steps=train_steps) eval_examples = processor.get_dev_examples() tf.logging.info("***** Run evaluation *****") tf.logging.info(" Num examples = %d", len(eval_examples)) tf.logging.info(" Batch size = %d", cf.eval_batch_size) # early stop hook # early_stopping_hook = tf.contrib.estimator.stop_if_no_decrease_hook( # estimator=estimator, # metric_name='loss', # max_steps_without_decrease=cf.num_train_steps, # eval_dir=None, # min_steps=0, # run_every_secs=None, # run_every_steps=cf.save_checkpoints_steps # ) eval_features = example_converter.convert_examples_to_features(eval_examples) eval_input_fn = XLNetInputBuilder.get_input_builder(eval_features, cf.max_seq_length, False, False) # train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=cf.num_train_steps, # hooks=[early_stopping_hook]) # eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn) # tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) result = estimator.evaluate(input_fn=eval_input_fn) precision = result["precision"] recall = result["recall"] f1_score = 2.0 * precision * recall / (precision + recall) tf.logging.info("***** Evaluation result *****") tf.logging.info(" Precision (token-level) = %s", str(precision)) tf.logging.info(" Recall (token-level) = %s", str(recall)) tf.logging.info(" F1 score (token-level) = %s", str(f1_score)) if cf.do_predict: predict_examples = processor.get_test_examples() tf.logging.info("***** Run prediction *****") tf.logging.info(" Num examples = %d", len(predict_examples)) tf.logging.info(" Batch size = %d", cf.predict_batch_size) predict_features = example_converter.convert_examples_to_features(predict_examples) predict_input_fn = XLNetInputBuilder.get_input_builder(predict_features, cf.max_seq_length, False, False) result = estimator.predict(input_fn=predict_input_fn) predict_recorder = XLNetPredictRecorder( output_dir=cf.output_dir, label_list=label_list, max_seq_length=cf.max_seq_length, tokenizer=tokenizer, predict_tag=cf.predict_tag) predicts = [{ "input_ids": feature.input_ids, "input_masks": feature.input_masks, "label_ids": feature.label_ids, "predict_ids": predict["predict"].tolist() } for feature, predict in zip(predict_features, result)] predict_recorder.record(predicts) if cf.do_export: tf.logging.info("***** Running exporting *****") tf.io.gfile.makedirs(cf.export_dir) serving_input_fn = XLNetInputBuilder.get_serving_input_fn(cf.max_seq_length) estimator.export_saved_model(cf.export_dir, serving_input_fn, as_text=False)