示例#1
0
def simple():
    tf_logging.setLevel(logging.INFO)
    out_path = os.path.join(working_path, "dict_reader3")
    exist_or_mkdir(out_path)
    worker = DGenWorker(out_path)
    worker.gen.f_hide_word = False
    worker.work(1)
示例#2
0
def main():

    tf_logging2 = logging.getLogger('tensorflow')
    tf_logging.setLevel(logging.INFO)
    ab_logging.info("This is ab logging")
    tf_logging.info("This is TF log")
    tf_logging2.info("TFLog 2")
示例#3
0
def generate_test():
    tf_logging.setLevel(logging.INFO)
    out_path = os.path.join(working_path, "dict_reader2_test_dict")
    worker = DGenWorker(out_path)
    worker.gen.drop_none_dict = True
    worker.work(1)

    out_path = os.path.join(working_path, "dict_reader2_test_no_dict")
    worker = DGenWorker(out_path)
    worker.gen.no_dict_assist = True
    worker.work(1)
示例#4
0
def dev_fn():
    tf.compat.v1.disable_eager_execution()

    if FLAGS.task_completion_mark:
        if os.path.exists(FLAGS.task_completion_mark):
            tf_logging.warn("Task already completed")

    tf_logging.setLevel(logging.INFO)
    if FLAGS.log_debug:
        tf_logging.setLevel(logging.DEBUG)
    seq_max = 200
    data_loader = nli.DataLoader(seq_max, "bert_voca.txt", True)

    is_training = FLAGS.do_train
    init_fn = get_checkpoint_init_fn()

    model_name = FLAGS.modeling
    if FLAGS.modeling == NAME_DUMMY_WSSDR:
        tf_logging.info("Using dummy WSSDR")
        model_name = "wssdr"

    model = get_model(seq_max, model_name, is_training)
    model_config = JsonConfig.from_json_file(FLAGS.model_config_file)

    # assert that the attribute exists. ( and it should be 0 or positive)
    assert model_config.lookup_threshold >= 0
    assert model_config.lookup_min_step >= 0
    assert model_config.lookup_train_frequency > 0
    augment_data_loader = DictAugmentedDataLoader(FLAGS.modeling, data_loader,
                                                  FLAGS.use_cache)

    model_path, run_name = get_model_path(FLAGS.output_dir)

    if FLAGS.do_train:
        saved_model = train_nli_w_dict(run_name, model, model_path,
                                       model_config, augment_data_loader,
                                       init_fn)
        if FLAGS.task_completion_mark:
            f = open(FLAGS.task_completion_mark, "w")
            f.write("Done")
            f.close()

        tf.compat.v1.reset_default_graph()
        eval_nli_w_dict(run_name, model, saved_model, augment_data_loader)

    elif FLAGS.do_eval:
        eval_nli_w_dict_lookup(run_name, model, model_path,
                               augment_data_loader)

    else:
        demo_nli_w_dict(run_name, model, model_path, augment_data_loader)
示例#5
0
def main(_):
    tf_logging.setLevel(logging.INFO)
    if FLAGS.log_debug:
        tf_logging.setLevel(logging.DEBUG)

    config = JsonConfig.from_json_file(FLAGS.model_config_file)
    train_config = TrainConfigEx.from_flags(FLAGS)

    is_training = FLAGS.do_train
    input_fn = input_fn_builder(get_input_files_from_flags(FLAGS), FLAGS,
                                is_training)
    model_fn = mask_lm_as_seq2seq(config, train_config)

    run_estimator(model_fn, input_fn)
示例#6
0
def main(_):
    tf_logging.setLevel(logging.INFO)
    if FLAGS.log_debug:
        tf_logging.setLevel(logging.DEBUG)

    if FLAGS.dbert_config_file:
        FLAGS.model_config_file = FLAGS.dbert_config_file

    tf.io.gfile.makedirs(FLAGS.output_dir)

    input_files = []
    for input_pattern in FLAGS.input_file.split(","):
        input_files.extend(tf.io.gfile.glob(input_pattern))

    lm_pretrain(input_files)

    tf_logging.info("Now terminating process")
示例#7
0
def run_estimator_loop(model_fn, input_fn_list, output_name_list):
    tf_logging.setLevel(logging.INFO)
    if FLAGS.log_debug:
        tf_logging.setLevel(logging.DEBUG)

    tf.io.gfile.makedirs(FLAGS.output_dir)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
    config = tf.compat.v1.ConfigProto(allow_soft_placement=False, )
    is_per_host = tf.compat.v1.estimator.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.compat.v1.estimator.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours,
        session_config=config,
        tpu_config=tf.compat.v1.estimator.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.compat.v1.estimator.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.eval_batch_size,
    )

    if FLAGS.do_predict:
        tf_logging.info("  Batch size = %d", FLAGS.eval_batch_size)
        for input_fn, output_name in zip(input_fn_list, output_name_list):
            tf_logging.info("Predicting for %s", output_name)
            result = estimator.predict(input_fn=input_fn,
                                       yield_single_examples=False)
            pickle.dump(list(result), open(output_name, "wb"))
    else:
        raise Exception("Only predict expected")
示例#8
0
def main(_):
    tf_logging.setLevel(logging.INFO)
    if FLAGS.log_debug:
        tf_logging.setLevel(logging.DEBUG)

    tf_logging.filters[0].excludes.extend()
    tf.io.gfile.makedirs(FLAGS.output_dir)
    tf_logging.info("Predict Runner")

    file_prefix = FLAGS.input_file
    step_size = 100
    for st in range(FLAGS.predict_begin, FLAGS.predict_end, step_size):
        tf_logging.info("Starting {}".format(st))
        input_files = []
        ed = st + step_size
        FLAGS.out_file = FLAGS.out_file.format(st, ed)
        for i in range(st, ed):
            input_files.append(file_prefix + "{}".format(i))

        lm_pretrain(input_files)
示例#9
0
def run_estimator(model_fn, input_fn, host_call=None):
    tf_logging.setLevel(logging.INFO)
    if FLAGS.log_debug:
        tf_logging.setLevel(logging.DEBUG)
    #FLAGS.init_checkpoint = auto_resolve_init_checkpoint(FLAGS.init_checkpoint)
    tf.io.gfile.makedirs(FLAGS.output_dir)
    if FLAGS.do_predict:
        tf_logging.addFilter(CounterFilter())

    tpu_cluster_resolver = None
    config = tf.compat.v1.ConfigProto(allow_soft_placement=False, )
    is_per_host = tf.compat.v1.estimator.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.compat.v1.estimator.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours,
        keep_checkpoint_max=FLAGS.keep_checkpoint_max,
        session_config=config,
        tf_random_seed=FLAGS.random_seed,
    )

    if FLAGS.random_seed is not None:
        tf_logging.info("Using random seed : {}".format(FLAGS.random_seed))
        tf.random.set_seed(FLAGS.random_seed)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.compat.v1.estimator.Estimator(model_fn=model_fn,
                                                 config=run_config,
                                                 params={'batch_size': 16})

    if FLAGS.do_train:
        tf_logging.info("***** Running training *****")
        tf_logging.info("  Batch size = %d", FLAGS.train_batch_size)

        estimator.train(input_fn=input_fn, max_steps=FLAGS.num_train_steps)
示例#10
0
def run_estimator(model_fn, input_fn, host_call=None):
    tf_logging.setLevel(logging.INFO)
    if FLAGS.log_debug:
        tf_logging.setLevel(logging.DEBUG)
    #FLAGS.init_checkpoint = auto_resolve_init_checkpoint(FLAGS.init_checkpoint)
    tf.io.gfile.makedirs(FLAGS.output_dir)
    if FLAGS.do_predict:
        tf_logging.addFilter(CounterFilter())

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
    print("FLAGS.save_checkpoints_steps", FLAGS.save_checkpoints_steps)
    config = tf.compat.v1.ConfigProto(allow_soft_placement=False, )
    is_per_host = tf.compat.v1.estimator.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.compat.v1.estimator.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours,
        keep_checkpoint_max=FLAGS.keep_checkpoint_max,
        session_config=config,
        tf_random_seed=FLAGS.random_seed,
        tpu_config=tf.compat.v1.estimator.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    if FLAGS.random_seed is not None:
        tf_logging.info("Using random seed : {}".format(FLAGS.random_seed))
        tf.random.set_seed(FLAGS.random_seed)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.compat.v1.estimator.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.eval_batch_size,
    )

    if FLAGS.do_train:
        tf_logging.info("***** Running training *****")
        tf_logging.info("  Batch size = %d", FLAGS.train_batch_size)

        estimator.train(input_fn=input_fn, max_steps=FLAGS.num_train_steps)

    if FLAGS.do_eval:
        tf_logging.info("***** Running evaluation *****")
        tf_logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        if FLAGS.initialize_to_predict:
            checkpoint = FLAGS.init_checkpoint
        else:
            checkpoint = None
        result = estimator.evaluate(input_fn=input_fn,
                                    steps=FLAGS.max_eval_steps,
                                    checkpoint_path=checkpoint)

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.io.gfile.GFile(output_eval_file, "w") as writer:
            tf_logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf_logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))
        return result
    if FLAGS.do_predict:
        tf_logging.info("***** Running prediction *****")
        tf_logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        if not FLAGS.initialize_to_predict:
            verify_checkpoint(estimator.model_dir)
            checkpoint = None
            time.sleep(1)
        else:
            checkpoint = FLAGS.init_checkpoint

        result = estimator.predict(input_fn=input_fn,
                                   checkpoint_path=checkpoint,
                                   yield_single_examples=False)
        pickle.dump(list(result), open(FLAGS.out_file, "wb"))
        tf_logging.info("Prediction saved at {}".format(FLAGS.out_file))
示例#11
0
                loss1 = vectors["grouped_loss1"][i][t_i][p_i]
                loss2 = vectors["grouped_loss2"][i][t_i][p_i]

                loss1_arr[loc] = loss1
                loss2_arr[loc] = loss2
                assert mask_valid[loc] == 0
                mask_valid[loc] = 1

        features = collections.OrderedDict()
        for key in basic_keys:
            features[key] = create_int_feature(vectors[key][i])

        features["loss_valid"] = create_int_feature(mask_valid)
        features["loss1"] = create_float_feature(loss1_arr)
        features["loss2"] = create_float_feature(loss2_arr)
        features["next_sentence_labels"] = create_int_feature([0])
        writer.write_feature(features)
        #if i < 20:
        #    log_print_feature(features)
    writer.close()
    return "Done"


if __name__ == "__main__":
    tf_logging.setLevel(logging.INFO)
    st = int(sys.argv[1])
    ed = int(sys.argv[2])
    for i in range(st, ed):
        ret = work(i)
        print("Job {} {}".format(i, ret))
示例#12
0
                    output1.masked_lm_positions[i])
                features = get_segment_and_mask_inner(output1.input_ids[i],
                                                      sep_indice)
            except:
                tokens = tokenzier.convert_ids_to_tokens(output1.input_ids[i])
                print(tokenization.pretty_tokens(tokens))
                print(output1.masked_lm_ids[i])
                print(output1.masked_lm_positions[i])
                raise

        features["next_sentence_labels"] = create_int_feature([0])
        features["masked_lm_positions"] = create_int_feature(
            output1.masked_lm_positions[i])
        features["masked_lm_ids"] = create_int_feature(
            output1.masked_lm_ids[i])
        features["masked_lm_weights"] = create_float_feature(
            output1.masked_lm_weights[i])
        features["loss_base"] = create_float_feature(
            output1.masked_lm_example_loss[i])
        features["loss_target"] = create_float_feature(
            output2.masked_lm_example_loss[i])
        record_writer.write_feature(features)
        ticker.tick()

    record_writer.close()


if __name__ == '__main__':
    tf_logging.setLevel(ab_logging.DEBUG)
    do(sys.argv[1])
示例#13
0
def simple():
    tf_logging.setLevel(logging.INFO)
    worker = init_worker()
    worker.work(int(sys.argv[1]))