Пример #1
0
def get_regression_loss(
        FLAGS, features, is_training):
    """Loss for downstream regression tasks."""

    bsz_per_core = tf.shape(features["input_ids"])[0]

    inp = tf.transpose(features["input_ids"], [1, 0])
    seg_id = tf.transpose(features["segment_ids"], [1, 0])
    inp_mask = tf.transpose(features["input_mask"], [1, 0])
    label = tf.reshape(features["label_ids"], [bsz_per_core])

    xlnet_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path)
    run_config = xlnet.create_run_config(is_training, True, FLAGS)

    xlnet_model = xlnet.XLNetModel(
        xlnet_config=xlnet_config,
        run_config=run_config,
        input_ids=inp,
        seg_ids=seg_id,
        input_mask=inp_mask)

    summary = xlnet_model.get_pooled_out(
        FLAGS.summary_type, FLAGS.use_summ_proj)

    with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
        per_example_loss, logits = modeling.regression_loss(
            hidden=summary,
            labels=label,
            initializer=xlnet_model.get_initializer(),
            scope="regression_{}".format(FLAGS.task_name.lower()),
            return_logits=True)

        total_loss = tf.reduce_mean(per_example_loss)

        return total_loss, per_example_loss, logits
Пример #2
0
 def __init__(self, flags, input_ids, seg_ids, input_mask):
     xlnet_config = xln.XLNetConfig(json_path=flags.model_config_path)
     run_config = xln.create_run_config(is_training=True,
                                        is_finetune=True,
                                        FLAGS=flags)
     self.model = xln.XLNetModel(xlnet_config=xlnet_config,
                                 run_config=run_config,
                                 input_ids=input_ids,
                                 seg_ids=seg_ids,
                                 input_mask=input_mask)
Пример #3
0
 def load_model(self, model: str, model_path: str):
     model_path = os.path.join(model_path, next(os.walk(model_path))[1][0])
     self.xlnet_config = xlnet.XLNetConfig(
         json_path=os.path.join(model_path, Embeddings.mode_config_path))
     self.run_config = xlnet.create_run_config(is_training=True,
                                               is_finetune=True,
                                               FLAGS=Flags)
     self.load_tokenizer(model_path)
     self.model = model
     print("Model loaded Successfully !")
def create_model(cf,
                 input_ids,
                 input_mask,
                 segment_ids,
                 labels,
                 is_training=True):
    '''
    构建模型
    :param cf:
    :param input_ids:
    :param input_mask:
    :param segment_ids:
    :param labels:
    :param is_training:
    :return:
    '''
    bsz_per_core = tf.shape(input_ids)[0]
    inp = tf.transpose(input_ids, [1, 0])
    seg_id = tf.transpose(segment_ids, [1, 0])
    inp_mask = tf.transpose(input_mask, [1, 0])
    label = tf.reshape(labels, [bsz_per_core])

    xlnet_config = xlnet.XLNetConfig(json_path=cf.model_config_path)
    run_config = xlnet.create_run_config(is_training, True, cf)

    xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config,
                                   run_config=run_config,
                                   input_ids=inp,
                                   seg_ids=seg_id,
                                   input_mask=inp_mask)
    summary = xlnet_model.get_pooled_out(cf.summary_type, cf.use_summ_proj)

    with tf.variable_scope("model", reuse=tf.AUTO_REUSE):

        if cf.cls_scope is not None and cf.cls_scope:
            cls_scope = "classification_{}".format(cf.cls_scope)
        else:
            cls_scope = "classification_{}".format(cf.task_name.lower())

        per_example_loss, logits = modeling.classification_loss(
            hidden=summary,
            labels=label,
            n_class=cf.num_labels,
            initializer=xlnet_model.get_initializer(),
            scope=cls_scope,
            return_logits=True)

        total_loss = tf.reduce_mean(per_example_loss)

        return total_loss, per_example_loss, logits
Пример #5
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    tpu_config = model_utils.configure_tpu(FLAGS)
    model_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path)
    run_config = xlnet.create_run_config(False, True, FLAGS)

    model_builder = XLNetModelBuilder(
        default_model_config=model_config,
        default_run_config=run_config,
        default_init_checkpoint=FLAGS.init_checkpoint,
        use_tpu=FLAGS.use_tpu)

    model_fn = model_builder.get_model_fn(model_config, run_config,
                                          FLAGS.init_checkpoint,
                                          FLAGS.model_type)

    # If TPU is not available, this will fall back to normal Estimator on CPU or GPU.
    estimator = tf.contrib.tpu.TPUEstimator(use_tpu=FLAGS.use_tpu,
                                            model_fn=model_fn,
                                            config=tpu_config,
                                            export_to_tpu=FLAGS.use_tpu,
                                            train_batch_size=1)

    tokenizer = XLNetTokenizer(sp_model_file=FLAGS.spiece_model_file,
                               lower_case=FLAGS.lower_case)

    example_converter = XLNetExampleConverter(
        label_list=[],
        max_seq_length=FLAGS.max_seq_length,
        tokenizer=tokenizer)

    features = example_converter.convert_examples_to_features(
        [PaddingInputExample()])

    input_fn = XLNetInputBuilder.get_input_builder(features,
                                                   FLAGS.max_seq_length, True,
                                                   False)
    estimator.train(input_fn, max_steps=1)

    tf.gfile.MakeDirs(FLAGS.export_dir)
    serving_input_fn = XLNetInputBuilder.get_serving_input_fn(
        FLAGS.max_seq_length)
    estimator.export_savedmodel(FLAGS.export_dir,
                                serving_input_fn,
                                as_text=False)
Пример #6
0
    def __init__(self, model_config_path, is_training, FLAGS, input_ids,
                 segment_ids, input_mask, label, n_class):
        '''

        :param model_config_path:
        :param is_training:
        :param FLAGS:
        :param input_ids:
        :param segment_ids:
        :param input_mask:
        :param label:
        :param n_class:
        '''
        self.xlnet_config = xlnet.XLNetConfig(json_path=model_config_path)
        self.run_config = xlnet.create_run_config(is_training, True, FLAGS)
        self.input_ids = tf.transpose(input_ids, [1, 0])
        self.segment_ids = tf.transpose(segment_ids, [1, 0])
        self.input_mask = tf.transpose(input_mask, [1, 0])

        self.model = xlnet.XLNetModel(xlnet_config=self.xlnet_config,
                                      run_config=self.run_config,
                                      input_ids=self.input_ids,
                                      seg_ids=self.segment_ids,
                                      input_mask=self.input_mask)

        cls_scope = FLAGS.cls_scope
        summary = self.model.get_pooled_out(FLAGS.summary_type,
                                            FLAGS.use_summ_proj)
        self.per_example_loss, self.logits = modeling.classification_loss(
            hidden=summary,
            labels=label,
            n_class=n_class,
            initializer=self.model.get_initializer(),
            scope=cls_scope,
            return_logits=True)

        self.total_loss = tf.reduce_mean(self.per_example_loss)

        with tf.name_scope("train_op"):
            self.train_op, _, _ = model_utils.get_train_op(
                FLAGS, self.total_loss)

        with tf.name_scope("acc"):
            one_hot_target = tf.one_hot(label, n_class)
            self.acc = self.accuracy(self.logits, one_hot_target)
Пример #7
0
    def __init__(self, config, FLAGS):
        self.config = config
        self.max_segment_len = config['max_segment_len']
        self.max_span_width = config["max_span_width"]
        self.genres = {g: i for i, g in enumerate(config["genres"])}
        self.subtoken_maps = {}
        self.gold = {}
        self.eval_data = None  # Load eval data lazily.
        self.FLAGS = FLAGS
        self.xlnet_config = xlnet.XLNetConfig(
            json_path=FLAGS.xlnet_config_file)

        print("################ spiece_model_file #############")
        print(FLAGS.spiece_model_file)
        self.tokenizer = spm.SentencePieceProcessor()
        self.tokenizer.load(FLAGS.spiece_model_file)

        input_props = []
        input_props.append((tf.int32, [None, None]))  # input_ids.
        input_props.append((tf.int32, [None, None]))  # seg_ids.
        input_props.append((tf.float32, [None, None]))  # input_mask
        input_props.append((tf.int32, [None]))  # Text lengths.
        input_props.append((tf.int32, [None, None]))  # Speaker IDs.
        input_props.append((tf.int32, []))  # Genre.
        input_props.append((tf.bool, []))  # Is training.
        input_props.append((tf.int32, [None]))  # Gold starts.
        input_props.append((tf.int32, [None]))  # Gold ends.
        input_props.append((tf.int32, [None]))  # Cluster ids.
        input_props.append((tf.int32, [None]))  # Sentence Map

        self.queue_input_tensors = [
            tf.placeholder(dtype, shape) for dtype, shape in input_props
        ]
        dtypes, shapes = zip(*input_props)
        queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes)
        self.enqueue_op = queue.enqueue(self.queue_input_tensors)
        self.input_tensors = queue.dequeue()

        self.predictions, self.loss = self.get_predictions_and_loss(
            *self.input_tensors)
        ##### xlnet ######
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS)
        self.train_op, learning_rate, _ = model_utils.get_train_op(
            FLAGS, self.loss)
        self.global_step = tf.train.get_or_create_global_step()
Пример #8
0
def get_classification_loss(options, features, n_class, is_training):
    """Loss for downstream classification tasks."""

    bsz_per_core = tf.shape(features["input_ids"])[0]

    inp = tf.transpose(features["input_ids"], [1, 0])
    seg_id = tf.transpose(features["segment_ids"], [1, 0])
    inp_mask = tf.transpose(features["input_mask"], [1, 0])
    label = tf.reshape(features["label_ids"], [bsz_per_core])

    xlnet_config = xlnet.XLNetConfig(json_path=options['model_config_file'])
    run_config = xlnet.create_run_config(is_training, True, options)

    xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config,
                                   run_config=run_config,
                                   input_ids=inp,
                                   seg_ids=seg_id,
                                   input_mask=inp_mask)

    summary = xlnet_model.get_pooled_out(options['summary_type'],
                                         options['use_summ_proj'])

    with tf.variable_scope("model", reuse=tf.AUTO_REUSE):

        if options['cls_scope'] is not None and options['cls_scope']:
            cls_scope = "classification_{}".format(options['cls_scope'])
        else:
            cls_scope = "classification_{}".format(
                options['task_name'].lower())

        per_example_loss, logits = modeling.classification_loss(
            hidden=summary,
            labels=label,
            n_class=n_class,
            initializer=xlnet_model.get_initializer(),
            scope=cls_scope,
            return_logits=True)

        total_loss = tf.reduce_mean(per_example_loss)

        return total_loss, per_example_loss, logits
Пример #9
0
def get_race_loss(FLAGS, features, is_training):
    """Loss for downstream multi-choice QA tasks such as RACE."""

    bsz_per_core = tf.shape(features["input_ids"])[0]

    def _transform_features(feature):
        out = tf.reshape(feature, [bsz_per_core, 4, -1])
        out = tf.transpose(out, [2, 0, 1])
        out = tf.reshape(out, [-1, bsz_per_core * 4])
        return out

    inp = _transform_features(features["input_ids"])
    seg_id = _transform_features(features["segment_ids"])
    inp_mask = _transform_features(features["input_mask"])
    label = tf.reshape(features["label_ids"], [bsz_per_core])

    xlnet_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path)
    run_config = xlnet.create_run_config(is_training, True, FLAGS)

    xlnet_model = xlnet.XLNetModel(
        xlnet_config=xlnet_config,
        run_config=run_config,
        input_ids=inp,
        seg_ids=seg_id,
        input_mask=inp_mask)
    summary = xlnet_model.get_pooled_out(
        FLAGS.summary_type, FLAGS.use_summ_proj)

    with tf.variable_scope("logits"):
        logits = tf.layers.dense(
            summary, 1, kernel_initializer=xlnet_model.get_initializer())
        logits = tf.reshape(logits, [bsz_per_core, 4])

        one_hot_target = tf.one_hot(label, 4)
        per_example_loss = -tf.reduce_sum(
            tf.nn.log_softmax(logits) * one_hot_target, -1)
        total_loss = tf.reduce_mean(per_example_loss)

    return total_loss, per_example_loss, logits
Пример #10
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    np.random.seed(FLAGS.random_seed)

    processor = NerProcessor(data_dir=FLAGS.data_dir,
                             input_file=FLAGS.input_file,
                             task_name=FLAGS.task_name.lower())

    label_list = processor.get_labels()
    tf.logging.info(label_list)

    tpu_config = model_utils.configure_tpu(FLAGS)
    model_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path)
    run_config = xlnet.create_run_config(False, True, FLAGS)

    model_builder = XLNetModelBuilder(
        default_model_config=model_config,
        default_run_config=run_config,
        default_init_checkpoint=FLAGS.init_checkpoint,
        use_tpu=FLAGS.use_tpu)

    model_fn = model_builder.get_model_fn(model_config, run_config,
                                          FLAGS.init_checkpoint, label_list)

    # If TPU is not available, this will fall back to normal Estimator on CPU or GPU.
    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=tpu_config,
        export_to_tpu=FLAGS.use_tpu,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    tokenizer = XLNetTokenizer(sp_model_file=FLAGS.spiece_model_file,
                               lower_case=FLAGS.lower_case)

    example_converter = XLNetExampleConverter(
        label_list=label_list,
        max_seq_length=FLAGS.max_seq_length,
        tokenizer=tokenizer)

    if FLAGS.do_train:
        train_examples = processor.get_chem_examples()

        tf.logging.info("***** Run training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", FLAGS.train_steps)

        train_features = example_converter.convert_examples_to_features(
            train_examples)
        train_input_fn = XLNetInputBuilder.get_input_builder(
            train_features, FLAGS.max_seq_length, True, True)

        estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)

    if FLAGS.do_eval:
        eval_examples = processor.get_dev_examples()

        tf.logging.info("***** Run evaluation *****")
        tf.logging.info("  Num examples = %d", len(eval_examples))
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        eval_features = example_converter.convert_examples_to_features(
            eval_examples)
        eval_input_fn = XLNetInputBuilder.get_input_builder(
            eval_features, FLAGS.max_seq_length, False, False)

        result = estimator.evaluate(input_fn=eval_input_fn)

        precision = result["precision"]
        recall = result["recall"]
        f1_score = 2.0 * precision * recall / (precision + recall)

        tf.logging.info("***** Evaluation result *****")
        tf.logging.info("  Precision (token-level) = %s", str(precision))
        tf.logging.info("  Recall (token-level) = %s", str(recall))
        tf.logging.info("  F1 score (token-level) = %s", str(f1_score))

    if FLAGS.do_predict:
        predict_examples = processor.get_test_examples()
        pmids = [e.guid for e in predict_examples]
        tokens = [e.guid for e in predict_examples]

        tf.logging.info("***** Run prediction *****")
        tf.logging.info("  Num examples = %d", len(predict_examples))
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_features = example_converter.convert_examples_to_features(
            predict_examples)
        predict_input_fn = XLNetInputBuilder.get_input_builder(
            predict_features, FLAGS.max_seq_length, False, False)

        result = estimator.predict(input_fn=predict_input_fn)

        predict_recorder = XLNetPredictRecorder(
            output_dir=FLAGS.output_dir,
            label_list=label_list,
            guids=pmids,
            max_seq_length=FLAGS.max_seq_length,
            tokenizer=tokenizer,
            predict_tag=FLAGS.predict_tag)

        predicts = [{
            "input_ids": feature.input_ids,
            "input_masks": feature.input_masks,
            "label_ids": feature.label_ids,
            "predict_ids": predict["predict"].tolist()
        } for feature, predict in zip(predict_features, result)]

        predict_recorder.record(predicts)

    if FLAGS.do_export:
        tf.logging.info("***** Running exporting *****")
        tf.gfile.MakeDirs(FLAGS.export_dir)
        serving_input_fn = XLNetInputBuilder.get_serving_input_fn(
            FLAGS.max_seq_length)
        estimator.export_savedmodel(FLAGS.export_dir,
                                    serving_input_fn,
                                    as_text=False)
Пример #11
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    np.random.seed(FLAGS.random_seed)

    processor = ClassificationProcessor(data_dir=FLAGS.data_dir,
                                        task_name=FLAGS.task_name.lower())

    sent_label_list = processor.get_sent_labels()

    model_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path)

    model_builder = XLNetModelBuilder(model_config=model_config,
                                      use_tpu=FLAGS.use_tpu)

    model_fn = model_builder.get_model_fn(sent_label_list)

    # If TPU is not available, this will fall back to normal Estimator on CPU or GPU.
    tpu_config = model_utils.configure_tpu(FLAGS)

    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=tpu_config,
        export_to_tpu=FLAGS.use_tpu,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    tokenizer = XLNetTokenizer(sp_model_file=FLAGS.spiece_model_file,
                               lower_case=FLAGS.lower_case)

    example_converter = XLNetExampleConverter(
        sent_label_list=sent_label_list,
        max_seq_length=FLAGS.max_seq_length,
        tokenizer=tokenizer)

    if FLAGS.do_train:
        train_examples = processor.get_train_examples()

        tf.logging.info("***** Run training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", FLAGS.train_steps)

        train_features = example_converter.convert_examples_to_features(
            train_examples)
        train_input_fn = XLNetInputBuilder.get_input_builder(
            train_features, FLAGS.max_seq_length, True, True)

        estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)

    if FLAGS.do_eval:
        eval_examples = processor.get_dev_examples()

        tf.logging.info("***** Run evaluation *****")
        tf.logging.info("  Num examples = %d", len(eval_examples))
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        eval_features = example_converter.convert_examples_to_features(
            eval_examples)
        eval_input_fn = XLNetInputBuilder.get_input_builder(
            eval_features, FLAGS.max_seq_length, False, False)

        result = estimator.evaluate(input_fn=eval_input_fn)

        sent_accuracy = result["sent_accuracy"]

        tf.logging.info("***** Evaluation result *****")
        tf.logging.info("  Accuracy (sent-level) = %s", str(sent_accuracy))

    if FLAGS.do_predict:
        predict_examples = processor.get_test_examples()

        tf.logging.info("***** Run prediction *****")
        tf.logging.info("  Num examples = %d", len(predict_examples))
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_features = example_converter.convert_examples_to_features(
            predict_examples)
        predict_input_fn = XLNetInputBuilder.get_input_builder(
            predict_features, FLAGS.max_seq_length, False, False)

        result = estimator.predict(input_fn=predict_input_fn)

        predict_recorder = XLNetPredictRecorder(
            output_dir=FLAGS.output_dir,
            sent_label_list=sent_label_list,
            max_seq_length=FLAGS.max_seq_length,
            tokenizer=tokenizer,
            predict_tag=FLAGS.predict_tag)

        predicts = [{
            "input_ids": feature.input_ids,
            "input_masks": feature.input_masks,
            "sent_label_id": feature.sent_label_id,
            "sent_predict_id": predict["sent_predict_id"],
            "sent_predict_score": predict["sent_predict_score"],
            "sent_predict_prob": predict["sent_predict_prob"].tolist()
        } for feature, predict in zip(predict_features, result)]

        predict_recorder.record(predicts)

    if FLAGS.do_export:
        tf.logging.info("***** Running exporting *****")
        tf.gfile.MakeDirs(FLAGS.export_dir)
        serving_input_fn = XLNetInputBuilder.get_serving_input_fn(
            FLAGS.max_seq_length)
        estimator.export_savedmodel(FLAGS.export_dir,
                                    serving_input_fn,
                                    as_text=False)
Пример #12
0
from xlnet import xlnet
from absl.flags import FLAGS

# some code omitted here...
# initialize FLAGS
# initialize instances of tf.Tensor, including input_ids, seg_ids, and input_mask

# XLNetConfig contains hyperparameters that are specific to a model checkpoint.
xlnet_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path)

# RunConfig contains hyperparameters that could be different between pretraining and finetuning.
run_config = xlnet.create_run_config(is_training=True, is_finetune=True, FLAGS=FLAGS)

# Construct an XLNet model
xlnet_model = xlnet.XLNetModel(
    xlnet_config=xlnet_config,
    run_config=run_config,
    input_ids=input_ids,
    seg_ids=seg_ids,
    input_mask=input_mask)

# Get a summary of the sequence using the last hidden state
summary = xlnet_model.get_pooled_out(summary_type="last")

# Get a sequence output
seq_out = xlnet_model.get_sequence_output()

# build your applications based on `summary` or `seq_out`
Пример #13
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    layer_indexes = [int(x) for x in FLAGS.layers.split(",")]

    bert_config = modeling.XLNetConfig(json_path=FLAGS.bert_config_file)

    tokenizer = tokenization.FullTokenizer(
        spm_model_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    run_config =configure_tpu(FLAGS) 
    
    # tf.contrib.tpu.RunConfig(
    #     master=FLAGS.master,
    #     tpu_config=tf.contrib.tpu.TPUConfig(
    #         num_shards=FLAGS.num_tpu_cores,
    #         per_host_input_for_training=is_per_host))

    # examples = read_examples(FLAGS.input_file)
    json_examples = []
    for x in ['test', 'train', 'dev']:
        with open(os.path.join(FLAGS.input_file, x + '.english.jsonlines')) as f:
            json_examples.extend((json.loads(jsonline) for jsonline in f.readlines()))

    orig_examples = []
    bert_examples = []
    for i, json_e in enumerate(json_examples):
        e = process_example(json_e, i, should_filter_embedded_mentions=True)
        orig_examples.append(e)
        bert_examples.append(e.bertify(tokenizer))

    model_fn = model_fn_builder(
        bert_config=bert_config,
        run_config=run_config,
        init_checkpoint=FLAGS.init_checkpoint,
        layer_indexes=layer_indexes,
        use_tpu=FLAGS.use_tpu,
        use_one_hot_embeddings=FLAGS.use_one_hot_embeddings)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        predict_batch_size=FLAGS.batch_size)

    input_fn = input_fn_builder(
        examples=bert_examples, window_size=FLAGS.window_size, stride=FLAGS.stride, tokenizer=tokenizer)

    writer = h5py.File(FLAGS.output_file, 'w')
    with tqdm(total=sum(len(e.tokens) for e in orig_examples)) as t:
        for result in estimator.predict(input_fn, yield_single_examples=True):
            document_index = int(result["unique_ids"])
            bert_example = bert_examples[document_index]
            orig_example = orig_examples[document_index]
            file_key = bert_example.doc_key.replace('/', ':')

            t.update(n=(result['extract_indices'] >= 0).sum())

            for output_index, bert_token_index in enumerate(result['extract_indices']):
                if bert_token_index < 0:
                    continue

                token_index = bert_example.bert_to_orig_map[bert_token_index]
                sentence_index, token_index = orig_example.unravel_token_index(token_index)

                dataset_key ="{}/{}".format(file_key, sentence_index)
                if dataset_key not in writer:
                    writer.create_dataset(dataset_key,
                                          (len(orig_example.sentence_tokens[sentence_index]), bert_config.hidden_size, len(layer_indexes)),
                                          dtype=np.float32)

                dset = writer[dataset_key]
                for j, layer_index in enumerate(layer_indexes):
                    layer_output = result["layer_output_%d" % j]
                    dset[token_index, :, j] = layer_output[output_index]
    writer.close()
Пример #14
0
def two_stream_loss(FLAGS, features, labels, mems, is_training):
    """Pretraining loss with two-stream attention Transformer-XL."""

    # Unpack input
    mem_name = "mems"
    mems = mems.get(mem_name, None)

    inp_k = tf.transpose(features["input_k"], [1, 0])
    inp_q = tf.transpose(features["input_q"], [1, 0])

    seg_id = tf.transpose(features["seg_id"], [1, 0])

    inp_mask = None
    perm_mask = tf.transpose(features["perm_mask"], [1, 2, 0])

    if FLAGS.num_predict is not None:
        # [num_predict x tgt_len x bsz]
        target_mapping = tf.transpose(features["target_mapping"], [1, 2, 0])
    else:
        target_mapping = None

    # target for LM loss
    tgt = tf.transpose(features["target"], [1, 0])

    # target mask for LM loss
    tgt_mask = tf.transpose(features["target_mask"], [1, 0])

    # construct xlnet config and save to model_dir
    xlnet_config = xlnet.XLNetConfig(FLAGS=FLAGS)
    xlnet_config.to_json(os.path.join(FLAGS.model_dir, "config.json"))

    # construct run config from FLAGS
    run_config = xlnet.create_run_config(is_training, False, FLAGS)

    xlnet_model = xlnet.XLNetModel(
        xlnet_config=xlnet_config,
        run_config=run_config,
        input_ids=inp_k,
        seg_ids=seg_id,
        input_mask=inp_mask,
        mems=mems,
        perm_mask=perm_mask,
        target_mapping=target_mapping,
        inp_q=inp_q)

    output = xlnet_model.get_sequence_output()
    new_mems = {mem_name: xlnet_model.get_new_memory()}
    lookup_table = xlnet_model.get_embedding_table()

    initializer = xlnet_model.get_initializer()

    with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
        # LM loss
        lm_loss = modeling.lm_loss(
            hidden=output,
            target=tgt,
            n_token=xlnet_config.n_token,
            d_model=xlnet_config.d_model,
            initializer=initializer,
            lookup_table=lookup_table,
            tie_weight=True,
            bi_data=run_config.bi_data,
            use_tpu=run_config.use_tpu)

    # Quantity to monitor
    monitor_dict = {}

    if FLAGS.use_bfloat16:
        tgt_mask = tf.cast(tgt_mask, tf.float32)
        lm_loss = tf.cast(lm_loss, tf.float32)

    total_loss = tf.reduce_sum(lm_loss * tgt_mask) / tf.reduce_sum(tgt_mask)
    monitor_dict["total_loss"] = total_loss

    return total_loss, new_mems, monitor_dict
Пример #15
0
def get_qa_outputs(FLAGS, features, is_training):
    """Loss for downstream span-extraction QA tasks such as SQuAD."""

    inp = tf.transpose(features["input_ids"], [1, 0])
    seg_id = tf.transpose(features["segment_ids"], [1, 0])
    inp_mask = tf.transpose(features["input_mask"], [1, 0])
    cls_index = tf.reshape(features["cls_index"], [-1])

    seq_len = tf.shape(inp)[0]

    xlnet_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path)
    run_config = xlnet.create_run_config(is_training, True, FLAGS)

    xlnet_model = xlnet.XLNetModel(
        xlnet_config=xlnet_config,
        run_config=run_config,
        input_ids=inp,
        seg_ids=seg_id,
        input_mask=inp_mask)
    output = xlnet_model.get_sequence_output()
    initializer = xlnet_model.get_initializer()

    return_dict = {}

    # invalid position mask such as query and special symbols (PAD, SEP, CLS)
    p_mask = features["p_mask"]

    # logit of the start position
    with tf.variable_scope("start_logits"):
        start_logits = tf.layers.dense(
            output,
            1,
            kernel_initializer=initializer)
        start_logits = tf.transpose(tf.squeeze(start_logits, -1), [1, 0])
        start_logits_masked = start_logits * (1 - p_mask) - 1e30 * p_mask
        start_log_probs = tf.nn.log_softmax(start_logits_masked, -1)

    # logit of the end position
    with tf.variable_scope("end_logits"):
        if is_training:
            # during training, compute the end logits based on the
            # ground truth of the start position

            start_positions = tf.reshape(features["start_positions"], [-1])
            start_index = tf.one_hot(start_positions, depth=seq_len, axis=-1,
                                     dtype=tf.float32)
            start_features = tf.einsum("lbh,bl->bh", output, start_index)
            start_features = tf.tile(start_features[None], [seq_len, 1, 1])
            end_logits = tf.layers.dense(
                tf.concat([output, start_features], axis=-1), xlnet_config.d_model,
                kernel_initializer=initializer, activation=tf.tanh, name="dense_0")
            end_logits = tf.contrib.layers.layer_norm(
                end_logits, begin_norm_axis=-1)

            end_logits = tf.layers.dense(
                end_logits, 1,
                kernel_initializer=initializer,
                name="dense_1")
            end_logits = tf.transpose(tf.squeeze(end_logits, -1), [1, 0])
            end_logits_masked = end_logits * (1 - p_mask) - 1e30 * p_mask
            end_log_probs = tf.nn.log_softmax(end_logits_masked, -1)
        else:
            # during inference, compute the end logits based on beam search

            start_top_log_probs, start_top_index = tf.nn.top_k(
                start_log_probs, k=FLAGS.start_n_top)
            start_index = tf.one_hot(start_top_index,
                                     depth=seq_len, axis=-1, dtype=tf.float32)
            start_features = tf.einsum("lbh,bkl->bkh", output, start_index)
            end_input = tf.tile(output[:, :, None],
                                [1, 1, FLAGS.start_n_top, 1])
            start_features = tf.tile(start_features[None],
                                     [seq_len, 1, 1, 1])
            end_input = tf.concat([end_input, start_features], axis=-1)
            end_logits = tf.layers.dense(
                end_input,
                xlnet_config.d_model,
                kernel_initializer=initializer,
                activation=tf.tanh,
                name="dense_0")
            end_logits = tf.contrib.layers.layer_norm(end_logits,
                                                      begin_norm_axis=-1)
            end_logits = tf.layers.dense(
                end_logits,
                1,
                kernel_initializer=initializer,
                name="dense_1")
            end_logits = tf.reshape(
                end_logits, [
                    seq_len, -1, FLAGS.start_n_top])
            end_logits = tf.transpose(end_logits, [1, 2, 0])
            end_logits_masked = end_logits * (
                1 - p_mask[:, None]) - 1e30 * p_mask[:, None]
            end_log_probs = tf.nn.log_softmax(end_logits_masked, -1)
            end_top_log_probs, end_top_index = tf.nn.top_k(
                end_log_probs, k=FLAGS.end_n_top)
            end_top_log_probs = tf.reshape(
                end_top_log_probs,
                [-1, FLAGS.start_n_top * FLAGS.end_n_top])
            end_top_index = tf.reshape(
                end_top_index,
                [-1, FLAGS.start_n_top * FLAGS.end_n_top])

    if is_training:
        return_dict["start_log_probs"] = start_log_probs
        return_dict["end_log_probs"] = end_log_probs
    else:
        return_dict["start_top_log_probs"] = start_top_log_probs
        return_dict["start_top_index"] = start_top_index
        return_dict["end_top_log_probs"] = end_top_log_probs
        return_dict["end_top_index"] = end_top_index

    # an additional layer to predict answerability
    with tf.variable_scope("answer_class"):
        # get the representation of CLS
        cls_index = tf.one_hot(cls_index, seq_len, axis=-1, dtype=tf.float32)
        cls_feature = tf.einsum("lbh,bl->bh", output, cls_index)

        # get the representation of START
        start_p = tf.nn.softmax(start_logits_masked, axis=-1,
                                name="softmax_start")
        start_feature = tf.einsum("lbh,bl->bh", output, start_p)

        # note(zhiliny): no dependency on end_feature so that we can obtain
        # one single `cls_logits` for each sample
        ans_feature = tf.concat([start_feature, cls_feature], -1)
        ans_feature = tf.layers.dense(
            ans_feature,
            xlnet_config.d_model,
            activation=tf.tanh,
            kernel_initializer=initializer, name="dense_0")
        ans_feature = tf.layers.dropout(ans_feature, FLAGS.dropout,
                                        training=is_training)
        cls_logits = tf.layers.dense(
            ans_feature,
            1,
            kernel_initializer=initializer,
            name="dense_1",
            use_bias=False)
        cls_logits = tf.squeeze(cls_logits, -1)

        return_dict["cls_logits"] = cls_logits

    return return_dict
Пример #16
0
def get_uda_classification_loss(options, features, n_class, is_training,
                                global_step, input_ids, input_mask,
                                segment_ids, labels):
    """Loss for downstream classification tasks."""

    tsa = options['tsa']
    unsup_ratio = options['unsup_ratio']
    num_train_steps = options['num_train_steps']
    uda_softmax_temp = options['uda_softmax_temp']
    uda_confidence_thresh = options['uda_confidence_thresh']

    inp = tf.transpose(input_ids, [1, 0])
    seg_id = tf.transpose(segment_ids, [1, 0])
    inp_mask = tf.transpose(input_mask, [1, 0])

    num_sample = input_ids.shape[0].value

    if is_training:
        assert num_sample % (1 + 2 * unsup_ratio) == 0
        sup_batch_size = num_sample // (1 + 2 * unsup_ratio)
        unsup_batch_size = sup_batch_size * unsup_ratio
        bsz_per_core = tf.shape(input_ids)[0] // (1 + 2 * unsup_ratio)

    else:
        sup_batch_size = num_sample
        unsup_batch_size = 0
        bsz_per_core = tf.shape(input_ids)[0]

    labels = tf.reshape(labels, [bsz_per_core])
    xlnet_config = xlnet.XLNetConfig(json_path=options['model_config_file'])
    run_config = xlnet.create_run_config(is_training, True, options)

    xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config,
                                   run_config=run_config,
                                   input_ids=inp,
                                   seg_ids=seg_id,
                                   input_mask=inp_mask)

    summary = xlnet_model.get_pooled_out(options['summary_type'],
                                         options['use_summ_proj'])

    if options['cls_scope'] is not None and options['cls_scope']:
        cls_scope = "classification_{}".format(options['cls_scope'])
    else:
        cls_scope = "classification_{}".format(options['task_name'].lower())

    clas_logits = modeling.uda_logits(
        hidden=summary,
        labels=labels,
        n_class=n_class,
        initializer=xlnet_model.get_initializer(),
        scope=cls_scope)

    log_probs = tf.nn.log_softmax(clas_logits, axis=-1)

    correct_label_probs = None

    with tf.variable_scope("sup_loss"):
        sup_log_probs = log_probs[:sup_batch_size]
        one_hot_labels = tf.one_hot(labels, depth=n_class, dtype=tf.float32)
        tgt_label_prob = one_hot_labels

        per_example_loss = -tf.reduce_sum(tgt_label_prob * sup_log_probs,
                                          axis=-1)
        loss_mask = tf.ones_like(per_example_loss,
                                 dtype=per_example_loss.dtype)
        correct_label_probs = tf.reduce_sum(one_hot_labels *
                                            tf.exp(sup_log_probs),
                                            axis=-1)

        if tsa:
            tf.logging.info("Applying TSA")
            # Starting threshold is just the inverse number of labels.
            tsa_start = 1. / n_class
            tsa_threshold = model_utils.get_tsa_threshold(tsa,
                                                          global_step,
                                                          num_train_steps,
                                                          tsa_start,
                                                          end=1)

            larger_than_threshold = tf.greater(correct_label_probs,
                                               tsa_threshold)
            loss_mask = loss_mask * (
                1 - tf.cast(larger_than_threshold, tf.float32))
        else:
            tsa_threshold = 1

        loss_mask = tf.stop_gradient(loss_mask)
        per_example_loss = per_example_loss * loss_mask
        sup_loss = (tf.reduce_sum(per_example_loss) /
                    tf.maximum(tf.reduce_sum(loss_mask), 1))

    unsup_loss_mask = None
    if is_training and unsup_ratio > 0:
        with tf.variable_scope("unsup_loss"):
            ori_start = sup_batch_size
            ori_end = ori_start + unsup_batch_size
            aug_start = sup_batch_size + unsup_batch_size
            aug_end = aug_start + unsup_batch_size

            ori_log_probs = log_probs[ori_start:ori_end]
            aug_log_probs = log_probs[aug_start:aug_end]
            unsup_loss_mask = 1
            if options['uda_softmax_temp'] != -1:
                tgt_ori_log_probs = tf.nn.log_softmax(
                    clas_logits[ori_start:ori_end] /
                    options['uda_softmax_temp'],
                    axis=-1)
                tgt_ori_log_probs = tf.stop_gradient(tgt_ori_log_probs)
            else:
                tgt_ori_log_probs = tf.stop_gradient(ori_log_probs)

            if options['uda_confidence_thresh'] != -1:
                largest_prob = tf.reduce_max(tf.exp(ori_log_probs), axis=-1)
                unsup_loss_mask = tf.cast(
                    tf.greater(largest_prob, options['uda_confidence_thresh']),
                    tf.float32)
                unsup_loss_mask = tf.stop_gradient(unsup_loss_mask)

            per_example_kl_loss = model_utils.kl_for_log_probs(
                tgt_ori_log_probs, aug_log_probs) * unsup_loss_mask
            unsup_loss = tf.reduce_mean(per_example_kl_loss)

    else:
        unsup_loss = 0.

    return (sup_loss, unsup_loss, clas_logits[:sup_batch_size],
            per_example_loss, loss_mask, tsa_threshold, unsup_loss_mask,
            correct_label_probs)
Пример #17
0
def main():
    tf.logging.set_verbosity(tf.logging.INFO)
    
    np.random.seed(cf.random_seed)
    
    processor = NerProcessor(
        data_dir=cf.train_data,
        task_name=cf.task_name.lower())
    
    # label_list = processor.get_labels()
    label_list = processor.labels
    
    model_config = xlnet.XLNetConfig(json_path=cf.model_config_path)
    
    model_builder = XLNetModelBuilder(
        model_config=model_config,
        use_tpu=cf.use_tpu)
    
    model_fn = model_builder.get_model_fn(label_list)
    
    # If TPU is not available, this will fall back to normal Estimator on CPU or GPU.
    tpu_config = model_utils.configure_tpu(cf)
    
    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=cf.use_tpu,
        model_fn=model_fn,
        config=tpu_config,
        export_to_tpu=cf.use_tpu,
        train_batch_size=cf.train_batch_size,
        eval_batch_size=cf.eval_batch_size,
        predict_batch_size=cf.predict_batch_size)
    
    tokenizer = XLNetTokenizer(
        sp_model_file=cf.spiece_model_file,
        lower_case=cf.lower_case)
    
    example_converter = XLNetExampleConverter(
        label_list=label_list,
        max_seq_length=cf.max_seq_length,
        tokenizer=tokenizer)
    
    if cf.do_train and cf.do_eval:   # 开始训练

        train_file = os.path.join(cf.output_dir, "train.tf_record")
        tf.logging.info("Use tfrecord samples: {}".format(len(train_file)))

        train_examples = processor.get_train_examples()    # train data
        np.random.shuffle(train_examples)

        example_converter.file_based_convert_examples_to_features(train_examples, train_file)
        train_steps = int(len(train_examples) * cf.num_train_epochs / cf.train_batch_size)
        cf.warmup_steps = int(0.1 * train_steps)

        
        tf.logging.info("***** Run training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", cf.train_batch_size)
        tf.logging.info("  Num steps = %d", cf.train_steps)

        
        # train_features = example_converter.convert_examples_to_features(train_examples)

        # if not os.path.exists(train_file):
        #     train_features = example_converter.file_based_convert_examples_to_features(train_examples, train_file)


        # 读取TF_record数据
        # train_input_fn = XLNetInputBuilder.get_input_builder(train_features, cf.max_seq_length, True, True)
        train_input_fn = XLNetInputBuilder.get_file_based_input_fn(
                                    input_file= train_file,
                                    seq_length=cf.max_seq_length,
                                    is_training=True,
                                    drop_remainder=True
        )
        
        estimator.train(input_fn=train_input_fn, max_steps=train_steps)

        eval_examples = processor.get_dev_examples()
        
        tf.logging.info("***** Run evaluation *****")
        tf.logging.info("  Num examples = %d", len(eval_examples))
        tf.logging.info("  Batch size = %d", cf.eval_batch_size)

        # early stop hook
        # early_stopping_hook = tf.contrib.estimator.stop_if_no_decrease_hook(
        #                             estimator=estimator,
        #                             metric_name='loss',
        #                             max_steps_without_decrease=cf.num_train_steps,
        #                             eval_dir=None,
        #                             min_steps=0,
        #                             run_every_secs=None,
        #                             run_every_steps=cf.save_checkpoints_steps
        # )

        eval_features = example_converter.convert_examples_to_features(eval_examples)
        eval_input_fn = XLNetInputBuilder.get_input_builder(eval_features, cf.max_seq_length, False, False)

        # train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=cf.num_train_steps,
        #                                     hooks=[early_stopping_hook])
        # eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn)
        # tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

        result = estimator.evaluate(input_fn=eval_input_fn)
        
        precision = result["precision"]
        recall = result["recall"]
        f1_score = 2.0 * precision * recall / (precision + recall)
        
        tf.logging.info("***** Evaluation result *****")
        tf.logging.info("  Precision (token-level) = %s", str(precision))
        tf.logging.info("  Recall (token-level) = %s", str(recall))
        tf.logging.info("  F1 score (token-level) = %s", str(f1_score))
    
    if cf.do_predict:
        predict_examples = processor.get_test_examples()
        
        tf.logging.info("***** Run prediction *****")
        tf.logging.info("  Num examples = %d", len(predict_examples))
        tf.logging.info("  Batch size = %d", cf.predict_batch_size)
        
        predict_features = example_converter.convert_examples_to_features(predict_examples)
        predict_input_fn = XLNetInputBuilder.get_input_builder(predict_features, cf.max_seq_length, False, False)
        
        result = estimator.predict(input_fn=predict_input_fn)
        
        predict_recorder = XLNetPredictRecorder(
            output_dir=cf.output_dir,
            label_list=label_list,
            max_seq_length=cf.max_seq_length,
            tokenizer=tokenizer,
            predict_tag=cf.predict_tag)
        
        predicts = [{
            "input_ids": feature.input_ids,
            "input_masks": feature.input_masks,
            "label_ids": feature.label_ids,
            "predict_ids": predict["predict"].tolist()
        } for feature, predict in zip(predict_features, result)]
        
        predict_recorder.record(predicts)
    
    if cf.do_export:
        tf.logging.info("***** Running exporting *****")
        tf.io.gfile.makedirs(cf.export_dir)
        serving_input_fn = XLNetInputBuilder.get_serving_input_fn(cf.max_seq_length)
        estimator.export_saved_model(cf.export_dir, serving_input_fn, as_text=False)