示例#1
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.logging.info("Tensorflow Version: " + TF_VERSION)

    # load config file
    tf.logging.info("***** loding config *****")
    tf.logging.info(FLAGS.config)
    with open(FLAGS.config, 'r') as f:
        config = json.load(f)
        if FLAGS.config.split('/')[1] == 'biz':
            config[C.BIZ_NAME] = FLAGS.config.split('/')[2]

    if FLAGS.task_type == "train":
        sess_config = tf.ConfigProto(allow_soft_placement=True)

        run_config = tf.estimator.RunConfig(
            model_dir=FLAGS.checkpoint_path,
            save_checkpoints_steps=config.get('save_checkpoints_steps'),
            session_config=sess_config,
            log_step_count_steps=10)

        model_fn = model_fn_builder(config, FLAGS.init_checkpoint_path)

        estimator = tf.estimator.Estimator(
            model_fn=model_fn,
            params={"batch_size": FLAGS.batch_size},
            config=run_config)

        if FLAGS.worker_count > 1:
            FLAGS.worker_count -= 1
        if FLAGS.task_index > 0:
            FLAGS.task_index -= 1

        train_input_fn = input_fn_builder(table=FLAGS.train_table,
                                          config=config)

        tf.logging.info("***** Running training *****")
        tf.logging.info("Batch size = %d", FLAGS.batch_size)
        train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,
                                            max_steps=FLAGS.num_train_steps)

        # do eval
        eval_input_fn = input_eval_fn_builder(table=FLAGS.eval_table,
                                              config=config)
        tf.logging.info("***** Running evaluation *****")
        eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn,
                                          steps=FLAGS.max_eval_steps,
                                          start_delay_secs=30,
                                          throttle_secs=30)

        tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

    elif FLAGS.task_type == "export":
        export_saved_model(config)

    elif FLAGS.task_type == 'predict':
        predict(config, FLAGS.worker_count, FLAGS.task_index)
示例#2
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    tpu_config = tf.contrib.tpu.TPUConfig(
        iterations_per_loop=FLAGS.iterations_per_loop,
        num_shards=FLAGS.num_tpu_cores,
        per_host_input_for_training=is_per_host)

    model_fn = model_fn_builder(bert_config=bert_config,
                                init_checkpoint=INIT_CHECKPOINT,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=1,
                                num_warmup_steps=0,
                                config=config,
                                use_tpu=FLAGS.use_tpu,
                                create_model_fn=create_model,
                                fine_tune=FLAGS.fine_tune)

    run_config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        log_step_count_steps=1,
        save_summary_steps=2,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        keep_checkpoint_max=2,
        tpu_config=tpu_config)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        predict_batch_size=FLAGS.predict_batch_size)

    suffix = ''
    if FLAGS.fine_tune:
        suffix = '_fine_tune'

    eval_examples = None
    with tf.gfile.GFile(
            '%s/dev_examples%s.pickle' % (FLAGS.features_dir, suffix),
            'rb') as out_file:
        eval_examples = pickle.load(out_file)
    eval_features = None
    with tf.gfile.GFile(
            '%s/dev_features%s.pickle' % (FLAGS.features_dir, suffix),
            'rb') as out_file:
        eval_features = pickle.load(out_file)

    tf.logging.info("***** Running predictions *****")
    tf.logging.info("  Num orig examples = %d", len(eval_examples))
    tf.logging.info("  Num split examples = %d", len(eval_features))
    tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

    all_results = []

    predict_input_fn = input_fn_builder(input_file=DEV_FILENAME,
                                        seq_length=FLAGS.max_seq_length,
                                        bert_config=bert_config,
                                        is_training=False,
                                        drop_remainder=False,
                                        fine_tune=FLAGS.fine_tune)

    # If running eval on the TPU, you will need to specify the number of
    # steps.
    all_results = []
    for result in estimator.predict(predict_input_fn,
                                    yield_single_examples=False):
        if len(all_results) % 1000 == 0:
            tf.logging.info("Processing example: %d" % (len(all_results)))

        if hasattr(result["unique_ids"], 'shape'):
            for i, unique_id_s in enumerate(result['unique_ids']):
                unique_id = int(unique_id_s)
                start_logits = [
                    float(x) for x in result["start_logits"][i].flat
                ]
                end_logits = [float(x) for x in result["end_logits"][i].flat]
                all_results.append(
                    RawResult(unique_id=unique_id,
                              start_logits=start_logits,
                              end_logits=end_logits))
        else:
            unique_id = int(result["unique_ids"])
            start_logits = [float(x) for x in result["start_logits"].flat]
            end_logits = [float(x) for x in result["end_logits"].flat]
            all_results.append(
                RawResult(unique_id=unique_id,
                          start_logits=start_logits,
                          end_logits=end_logits))

    output_prediction_file = os.path.join(FLAGS.output_dir,
                                          FLAGS.predictions_output_directory,
                                          "predictions.json")
    output_nbest_file = os.path.join(FLAGS.output_dir,
                                     FLAGS.predictions_output_directory,
                                     "nbest_predictions.json")
    output_null_log_odds_file = os.path.join(
        FLAGS.output_dir, FLAGS.predictions_output_directory, "null_odds.json")

    write_predictions(eval_examples, eval_features, all_results,
                      FLAGS.n_best_size, FLAGS.max_answer_length,
                      FLAGS.do_lower_case, output_prediction_file,
                      output_nbest_file, output_null_log_odds_file)
flags.DEFINE_integer(
    "max_seq_length", 384,
    "The maximum total input sequence length after WordPiece tokenization. "
    "Sequences longer than this will be truncated, and sequences shorter "
    "than this will be padded.")

flags.DEFINE_integer("batch_size", 100, ".")

flags.DEFINE_string("data_bert_directory", 'data/uncased_L-12_H-768_A-12',
                    'directory containing BERT config and checkpoints')

bert_config = modeling.BertConfig.from_json_file("%s/bert_config.json" %
                                                 FLAGS.data_bert_directory)

input_fn = input_fn_builder('out/features/eval.tf_record',
                            FLAGS.max_seq_length, False, False, bert_config)
dataset: tf.data.TFRecordDataset = input_fn({'batch_size': FLAGS.batch_size})


def test_embedding_dimensions():
    assert dataset.output_shapes['input_ids'].dims[0].value is None
    assert dataset.output_shapes['input_ids'].dims[
        1].value == FLAGS.max_seq_length
    assert len(dataset.output_shapes['input_ids'].dims) == 2

    assert dataset.output_shapes['input_mask'].dims[0].value is None
    assert dataset.output_shapes['input_mask'].dims[
        1].value == FLAGS.max_seq_length
    assert len(dataset.output_shapes['input_mask'].dims) == 2

    assert dataset.output_shapes['unique_ids'].dims[0].value is None
示例#4
0
def train(config, worker_count, task_index, cluster, is_chief, target):
    worker_device = "/job:worker/task:%d/cpu:%d" % (task_index, 0)
    print("worker_deivce = %s" % worker_device)

    # assign io related variables and ops to local worker device
    with tf.device(worker_device):
        train_input_fn = input_fn_builder(
            table=FLAGS.train_table,
            config=config,
            slice_id=FLAGS.task_index,
            slice_count=worker_count
        )
        d = train_input_fn()
        iterator = d.make_one_shot_iterator()
        features = iterator.get_next()

    # assign global variables to ps nodes
    available_worker_device = "/job:worker/task:%d" % (task_index)
    with tf.device(tf.train.replica_device_setter(worker_device=available_worker_device, cluster=cluster)):
        global_step = tf.Variable(0, name="global_step", trainable=False)
        # construct the model structure
        # loss, optimizer = model_fn(features, labels, global_step)
        policy_network_module = utils.load_policy_network_module(config)

        simulator_network_module = utils.load_simulator_network_module(config)

        trainer_module = utils.load_trainer_module(config)

        trainReinforce = trainer_module.TrainReinforce(config, features
                                                       , policy_network_module.PGNetwork
                                                       , simulator_network_module.PGNetwork
                                                       , global_step
                                                       , FLAGS.simulator_checkpoint_path
                                                       )

        if FLAGS.init_checkpoint_path is not None:
            t_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='reinforce')
            (assignment_map, initialized_variable_names
             ) = get_assignment_map_from_checkpoint(t_vars, FLAGS.init_checkpoint_path)

            tf.train.init_from_checkpoint(FLAGS.init_checkpoint_path, assignment_map)

            tf.logging.info("**** Trainable Variables ****")
            for var in t_vars:
                init_string = ""
                if var.name in initialized_variable_names:
                    init_string = ", *INIT_FROM_CKPT*"
                tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                                init_string)



    # hooks = [tf.train.StopAtStepHook(last_step=FLAGS.num_train_steps)]
    hooks = []
    step = 0
    with tf.train.MonitoredTrainingSession(master=target
            , checkpoint_dir=FLAGS.checkpoint_path
            , save_checkpoint_secs=120
            , is_chief=is_chief, hooks=hooks) as mon_sess:
        while True:
            # _, c, g = mon_sess.run([optimizer, loss, global_step])

            trainReinforce.train(mon_sess)

            _global_step = mon_sess.run(global_step)

            if task_index == 0:
                print 'step:{}'.format(_global_step)

            if _global_step >= FLAGS.num_train_steps:
                break

    print("%d steps finished." % step)
示例#5
0
def trian_and_eval_on_single_worker(config):
    train_input_fn = input_fn_builder(
        table=FLAGS.train_table,
        config=config
    )
    d = train_input_fn()
    iterator = d.make_one_shot_iterator()
    features = iterator.get_next()

    global_step = tf.train.get_or_create_global_step()
    # construct the model structure
    # loss, optimizer = model_fn(features, labels, global_step)
    policy_network_module = utils.load_policy_network_module(config)

    simulator_network_module = utils.load_simulator_network_module(config)

    trainer_module = utils.load_trainer_module(config)

    trainer = trainer_module.Trainer(config, features
                                     , policy_network_module.PGNetwork
                                     , simulator_network_module.PGNetwork
                                     , global_step
                                     , FLAGS.simulator_checkpoint_path
                                     )

    eval_graph = tf.Graph()
    with eval_graph.as_default() as g:
        eval_input_fn = input_eval_fn_builder(
            table=FLAGS.eval_table,
            config=config
        )
        eval_d = eval_input_fn()
        eval_iterator = eval_d.make_one_shot_iterator()
        eval_features = eval_iterator.get_next()
        eval_module = utils.load_evaluator_module(config)
        evalutor = eval_module.Evaluator(config, eval_features
                                                     , policy_network_module.PGNetwork
                                                     , simulator_network_module.PGNetwork
                                                     , FLAGS.simulator_checkpoint_path)
        eval_saver = tf.train.Saver(max_to_keep=10)

    if FLAGS.init_checkpoint_path is not None:
        t_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='reinforce')
        (assignment_map, initialized_variable_names
         ) = get_assignment_map_from_checkpoint(t_vars, FLAGS.init_checkpoint_path)

        tf.train.init_from_checkpoint(FLAGS.init_checkpoint_path, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in t_vars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

    hooks = []
    step = 0
    previous_ckpt_path = ''
    with tf.train.MonitoredTrainingSession(master=''
            , checkpoint_dir=FLAGS.checkpoint_path
            , save_checkpoint_secs=60
            , is_chief=True, hooks=hooks) as mon_sess:
        while True:
            # _, c, g = mon_sess.run([optimizer, loss, global_step])

            trainer.train(mon_sess)

            _global_step = mon_sess.run(global_step)

            print 'step:{}'.format(_global_step)

            # eval
            if _global_step > 0 and _global_step % 100 == 0:
                latest_ckpt_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
                if latest_ckpt_path is None or latest_ckpt_path == previous_ckpt_path:
                    continue
                print 'latest_ckpt_path', latest_ckpt_path
                with tf.Session(graph=eval_graph) as eval_sess:
                    eval_sess.run(tf.global_variables_initializer())
                    eval_sess.run(tf.local_variables_initializer())
                    eval_saver.restore(eval_sess, latest_ckpt_path)
                    evalutor.eval(eval_sess)
                    previous_ckpt_path = latest_ckpt_path

            if _global_step >= FLAGS.num_train_steps:
                break

    print("%d steps finished." % step)
示例#6
0
def main(test_file='test.json'):
    tf.logging.set_verbosity(tf.logging.INFO)
    #1.设置数据处理器
    processors = {'joint': Joint_Processor}

    task_name = config['task_name'].lower()
    if task_name not in processors:
        raise ValueError("Task not found: %s" % task_name)
    processor = processors[task_name]()

    #1.1获取标签
    id2domain, domain2id, id2intent, intent2id, id2slot, slot2id, domain_w, intent_w = \
            processor.get_labels(config["data_dir"],\
                                 "train" if config['do_train'] else "test")

    #print(domain2id)
    #print(intent2id)
    #print(slot2id)
    #获取分词器
    tokenizer = tokenization.FullTokenizer(\
                    vocab_file=config['vocab_file'], do_lower_case=config['do_lower_case'])

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None
    save_checkpoints_steps = config['save_checkpoints_steps']

    #1.2读取训练数据,并转成example格式
    if config['do_train']:
        tf.logging.info("***** Loading training examples *****")
        train_examples = processor.get_train_examples(config['data_dir'])
        num_train_steps = int(
            len(train_examples) / config['train_batch_size'] *
            config['num_train_epochs'])
        num_warmup_steps = int(num_train_steps * config['warmup_proportion'])
        save_checkpoints_steps = int(
            len(train_examples) / config['train_batch_size']) + 1

    if config['do_train']:
        train_file = os.path.join(config['data_dir'], 'train.tf_record')
        #将example写入tf方便读取的文件
        file_based_convert_examples_to_features(train_examples, domain2id, intent2id, slot2id,\
            config['max_seq_length'], tokenizer, train_file)

        #文件读取模块
        train_input_fn = file_based_input_fn_builder(
            input_file=train_file,
            seq_length=config['max_seq_length'],
            is_training=True,
            drop_remainder=False)
    #2.创建模型
    #2.1设置模型运行参数
    bert_config = modeling.BertConfig.from_json_file(
        config['bert_config_file'])

    tf_cfg = tf.ConfigProto()
    tf_cfg.gpu_options.per_process_gpu_memory_fraction = 0.8

    run_config = tf.estimator.RunConfig(
        model_dir=config['output_dir'],
        save_checkpoints_steps=save_checkpoints_steps,
        keep_checkpoint_max=1,
        session_config=tf_cfg,
        log_step_count_steps=100,
    )
    #2.1构建模型
    model_fn = model_fn_builder(bert_config=bert_config,
                                num_domain=len(domain2id),
                                num_intent=len(intent2id),
                                num_slot=len(slot2id),
                                init_checkpoint=config['init_checkpoint'],
                                learning_rate=config['learning_rate'],
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=config['use_tpu'],
                                use_one_hot_embeddings=config['use_tpu'],
                                do_serve=config['do_serve'],
                                domain_w=domain_w,
                                intent_w=intent_w)

    estimator = tf.estimator.Estimator(
        model_fn=model_fn,
        config=run_config,
    )

    #3训练
    if config['do_train']:
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", config['train_batch_size'])
        tf.logging.info("  Num steps = %d", num_train_steps)
        if config['do_eval']:
            #没有eval环节
            train_spec = tf.estimator.TrainSpec(input_fn = train_input_fn,\
                                                max_steps = num_train_steps)
            eval_spec = tf.estimator.EvalSpec(input_fn = eval_input_fn,\
                                              steps = eval_steps, start_delay_secs=60, throttle_secs=0)
            tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
        else:
            estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

        return None

    #4预测
    #4.1加载预测数据
    if config['do_predict']:
        tf.logging.info("***** Loading training examples *****")
        test_examples = processor.get_test_examples(test_file)
        num_actual_predict_examples = len(test_examples)
        tf.logging.info("the number of test_examples is %d" %
                        len(test_examples))
        test_features = convert_examples_to_features(test_examples, domain2id,\
                intent2id, slot2id, config['max_seq_length'], tokenizer)
        tf.logging.info("the number of test_features is %d" %
                        len(test_features))

    if config['do_predict']:
        predict_input_fn = input_fn_builder(
            features=test_features,
            seq_length=config['max_seq_length'],
            is_training=False,
            drop_remainder=False,
        )
        result = estimator.predict(input_fn=predict_input_fn)
        print(result)
        pred_results = []
        for pred_line, prediction in zip(test_examples, result):
            data = {}
            #print(pred_line.text)
            data['text'] = pred_line.text
            domain_pred = prediction["domain_pred"]
            intent_pred = prediction["intent_pred"]
            slot_pred = prediction["slot_pred"]
            data['domain'] = id2domain[domain_pred]

            data['intent'] = id2intent[
                intent_pred] if id2intent[intent_pred] != 'NaN' else np.nan
            idx = 0
            len_seq = len(pred_line.text)
            slot_labels = []
            for sid in slot_pred:
                if idx >= len_seq:
                    break
                if sid == 0:
                    continue
                cur_slot = id2slot[sid]
                if cur_slot in ['[CLS]', '[SEP]']:
                    continue
                slot_labels.append(cur_slot)
                idx += 1

            data['slots'] = get_slot_name(pred_line.text, slot_labels)

            for p in code_pattern:
                result = re.match(p, data['text'])
                if result:
                    #print(result.group(1))
                    #print(result.group(0), result.group(1))
                    data['slots']['code'] = result.group(1)
                    break
            pred_results.append(data)

            #print(domain_pred, intent_pred, slot_pred)
    json.dump(pred_results,
              open(sys.argv[2], 'w', encoding='utf8'),
              ensure_ascii=False)
def main(_):
    tf.gfile.MakeDirs(OUTPUT_DIR)

    tf.logging.set_verbosity(tf.logging.INFO)

    (config, create_model) = load_and_save_config(FLAGS.config)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    tpu_config = tf.contrib.tpu.TPUConfig(
        iterations_per_loop=FLAGS.iterations_per_loop,
        num_shards=FLAGS.num_tpu_cores,
        per_host_input_for_training=is_per_host)
    run_config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        log_step_count_steps=1,
        save_summary_steps=2,
        model_dir=OUTPUT_DIR,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        keep_checkpoint_max=2,
        tpu_config=tpu_config)

    num_train_steps = None
    num_warmup_steps = None
    if FLAGS.do_train:
        num_train_examples = N_TRAIN_EXAMPLES
        if num_train_examples is None:
            num_train_examples = math.ceil(N_TOTAL_SQUAD_EXAMPLES *
                                           (1. - FLAGS.eval_percent))
        num_train_steps = int(num_train_examples / FLAGS.train_batch_size *
                              FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)
    print("Total training steps = %d" % num_train_steps)
    time.sleep(2)

    model_fn = model_fn_builder(bert_config=bert_config,
                                init_checkpoint=INIT_CHECKPOINT,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                config=config,
                                use_tpu=FLAGS.use_tpu,
                                create_model_fn=create_model,
                                fine_tune=FLAGS.fine_tune)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size)

    if FLAGS.do_train:
        # We write to a temporary file to avoid storing very large constant tensors
        # in memory.

        train_input_fn = input_fn_builder(input_file=TRAIN_FILE_NAME,
                                          seq_length=FLAGS.max_seq_length,
                                          is_training=True,
                                          bert_config=bert_config,
                                          drop_remainder=True,
                                          fine_tune=FLAGS.fine_tune)
        eval_input_fn = input_fn_builder(
            input_file=EVAL_FILE_NAME,
            seq_length=FLAGS.max_seq_length,
            # No need to shuffle eval set
            is_training=False,
            bert_config=bert_config,
            drop_remainder=True,
            fine_tune=FLAGS.fine_tune)
        # This should be .train_and_evaluate
        # https://www.tensorflow.org/api_docs/python/tf/estimator/train_and_evaluate
        # and https://towardsdatascience.com/how-to-configure-the-train-and-evaluate-loop-of-the-tensorflow-estimator-api-45c470f6f8d
        train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,
                                            max_steps=num_train_steps)
        eval_spec = tf.estimator.EvalSpec(
            input_fn=eval_input_fn,
            # start_delay_secs=FLAGS.eval_start_delay_secs,  # start evaluating after N seconds
            throttle_secs=FLAGS.eval_throttle_secs,
            steps=FLAGS.eval_steps,
        )

        tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)