def _loadBertConfig_ja_sp(self): from utils import str_to_value import configparser CONFIGPATH = str(BERT_JA_DIR / "config.ini") config = configparser.ConfigParser() config.read(CONFIGPATH) import tempfile bert_config_file = tempfile.NamedTemporaryFile(mode='w+t', encoding='utf-8', suffix='.json') bert_config_file.write( json.dumps( {k: str_to_value(v) for k, v in config['BERT-CONFIG'].items()})) bert_config_file.seek(0) return modeling.BertConfig.from_json_file(bert_config_file.name)
def load_estimator(config, FLAGS): bert_config_file = tempfile.NamedTemporaryFile(mode='w+t', encoding='utf-8', suffix='.json') bert_config_file.write( json.dumps( {k: str_to_value(v) for k, v in config['BERT-CONFIG'].items()})) bert_config_file.seek(0) bert_config = modeling.BertConfig.from_json_file(bert_config_file.name) tpu_cluster_resolver = None is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) model_fn = model_fn_builder(bert_config=bert_config, num_labels=len(FLAGS.task_proc.get_labels()), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=FLAGS.num_train_steps, num_warmup_steps=FLAGS.num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) return estimator
import os import sys import tempfile import tensorflow as tf import utils CURDIR = os.path.dirname(os.path.abspath(__file__)) CONFIGPATH = os.path.join(CURDIR, os.pardir, 'config.ini') config = configparser.ConfigParser() config.read(CONFIGPATH) bert_config_file = tempfile.NamedTemporaryFile(mode='w+t', encoding='utf-8', suffix='.json') bert_config_file.write( json.dumps( {k: utils.str_to_value(v) for k, v in config['BERT-CONFIG'].items()})) bert_config_file.seek(0) sys.path.append(os.path.join(CURDIR, os.pardir, 'bert')) import modeling import optimization flags = tf.flags FLAGS = flags.FLAGS # Required parameters flags.DEFINE_string( "bert_config_file", None, "The config json file corresponding to the pre-trained BERT model. "
import json import os import sys import tempfile import tensorflow as tf import tokenization_sentencepiece as tokenization import utils CURDIR = os.path.dirname(os.path.abspath(__file__)) CONFIGPATH = os.path.join(CURDIR, os.pardir, 'config.ini') config = configparser.ConfigParser() config.read(CONFIGPATH) bert_config_file = tempfile.NamedTemporaryFile(mode='w+t', encoding='utf-8', suffix='.json') bert_config_file.write(json.dumps({k: utils.str_to_value(v) for k, v in config['BERT-CONFIG'].items()})) bert_config_file.seek(0) sys.path.append(os.path.join(CURDIR, os.pardir, 'bert')) import modeling import optimization flags = tf.flags FLAGS = flags.FLAGS # Required parameters flags.DEFINE_string( "data_dir", None, "The input data dir. Should contain the .tsv files (or other data files) "
def main(): # bert bert_config_file = tempfile.NamedTemporaryFile(mode='w+t', encoding='utf-8', suffix='.json') bert_config_file.write( json.dumps( {k: str_to_value(v) for k, v in config['BERT-CONFIG'].items()})) bert_config_file.seek(0) # [注意] 最初からread するから bert_config = modeling.BertConfig.from_json_file(bert_config_file.name) latest_ckpt = latest_ckpt_model() # model.ckpt-11052.index, model.ckpt-11052.meta データの prefix finetuned_model_path = latest_ckpt.split('.data-00000-of-00001')[0] flags = FLAGS(finetuned_model_path) processor = LivedoorProcessor() label_list = processor.get_labels() # sentencepiece tokenizer = tokenization.FullTokenizer(model_file=flags.model_file, vocab_file=flags.vocab_file, do_lower_case=flags.do_lower_case) # no use TPU tpu_cluster_resolver = None is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 # config run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, master=flags.master, model_dir=flags.output_dir, save_checkpoints_steps=flags.save_checkpoints_steps, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=flags.iterations_per_loop, num_shards=flags.num_tpu_cores, per_host_input_for_training=is_per_host)) model_fn = model_fn_builder(bert_config=bert_config, num_labels=len(label_list), init_checkpoint=flags.init_checkpoint, learning_rate=flags.learning_rate, num_train_steps=flags.num_train_steps, num_warmup_steps=flags.num_warmup_steps, use_tpu=flags.use_tpu, use_one_hot_embeddings=flags.use_tpu) estimator = tf.contrib.tpu.TPUEstimator( use_tpu=flags.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=flags.train_batch_size, eval_batch_size=flags.eval_batch_size, predict_batch_size=flags.predict_batch_size) # テストデータコレクションの取得 predict_examples = processor.get_test_examples(flags.data_dir) predict_file = tempfile.NamedTemporaryFile(mode='w+t', encoding='utf-8', suffix='.tf_record') """Convert a set of `InputExample`s to a TFRecord file.""" """出力: predict_file.name """ # https://github.com/yoheikikuta/bert-japanese/blob/master/src/run_classifier.py#L371-L380 file_based_convert_examples_to_features(predict_examples, label_list, flags.max_seq_length, tokenizer, predict_file.name) predict_drop_remainder = True if flags.use_tpu else False # TPUEstimatorに渡すクロージャを作成 predict_input_fn = file_based_input_fn_builder( input_file=predict_file.name, seq_length=flags.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) # 推論 result = estimator.predict(input_fn=predict_input_fn) result = list(result) # 精度を計算 accracy(result, label_list)