def main(unused_argv): tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) batches_per_epoch = _NUM_TRAIN_IMAGES / FLAGS.train_batch_size steps_per_checkpoint = FLAGS.steps_per_checkpoint iterations_per_loop = FLAGS.iterations_per_loop eval_steps = _NUM_EVAL_IMAGES // FLAGS.eval_batch_size if iterations_per_loop is None or steps_per_checkpoint < iterations_per_loop: iterations_per_loop = steps_per_checkpoint if FLAGS.mode == "eval": iterations_per_loop = eval_steps params = { "batches_per_epoch": batches_per_epoch, } config = contrib_tpu.RunConfig(cluster=tpu_cluster_resolver, model_dir=FLAGS.model_dir, save_checkpoints_steps=steps_per_checkpoint, log_step_count_steps=iterations_per_loop, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=FLAGS.num_shards)) densenet_estimator = contrib_tpu.TPUEstimator( model_fn=model_fn, config=config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, params=params) if FLAGS.mode == "train": tf.logging.info( "Training for %d steps (%.2f epochs in total)." % (FLAGS.train_steps, FLAGS.train_steps / batches_per_epoch)) densenet_estimator.train(input_fn=ImageNetInput(True), max_steps=FLAGS.train_steps) elif FLAGS.mode == "train_and_eval": current_step = 0 tf.logging.info( "Training for %d steps (%.2f epochs in total). Current " "step %d" % (FLAGS.train_steps, FLAGS.train_steps / batches_per_epoch, current_step)) while current_step < FLAGS.train_steps: next_checkpoint = min(current_step + steps_per_checkpoint, FLAGS.train_steps) num_steps = next_checkpoint - current_step current_step = next_checkpoint densenet_estimator.train(input_fn=ImageNetInput(True), steps=num_steps) tf.logging.info("Starting to evaluate.") eval_results = densenet_estimator.evaluate( input_fn=ImageNetInput(False), steps=_NUM_EVAL_IMAGES // FLAGS.eval_batch_size) tf.logging.info("Eval results: %s" % eval_results) else: def terminate_eval(): tf.logging.info( "Terminating eval after %d seconds of no checkpoints" % FLAGS.eval_timeout) return True # Run evaluation when there"s a new checkpoint # If the evaluation worker is delayed in processing a new checkpoint, # the checkpoint file may be deleted by the trainer before it can be # evaluated. # Ignore the error in this case. for ckpt in evaluation.checkpoints_iterator( FLAGS.model_dir, min_interval_secs=FLAGS.min_eval_interval, timeout=FLAGS.eval_timeout, timeout_fn=terminate_eval): tf.logging.info("Starting to evaluate.") try: eval_results = densenet_estimator.evaluate( input_fn=ImageNetInput(False), steps=eval_steps, checkpoint_path=ckpt) tf.logging.info("Eval results: %s" % eval_results) except tf.errors.NotFoundError: tf.logging.info( "Checkpoint %s no longer exists, skipping checkpoint")
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "cola": classifier_utils.ColaProcessor, "mnli": classifier_utils.MnliProcessor, "mismnli": classifier_utils.MisMnliProcessor, "mrpc": classifier_utils.MrpcProcessor, "rte": classifier_utils.RteProcessor, "sst-2": classifier_utils.Sst2Processor, "sts-b": classifier_utils.StsbProcessor, "qqp": classifier_utils.QqpProcessor, "qnli": classifier_utils.QnliProcessor, "wnli": classifier_utils.WnliProcessor, } if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True.") if not FLAGS.albert_config_file and not FLAGS.albert_hub_module_handle: raise ValueError("At least one of `--albert_config_file` and " "`--albert_hub_module_handle` must be set") if FLAGS.albert_config_file: albert_config = modeling.AlbertConfig.from_json_file( FLAGS.albert_config_file) if FLAGS.max_seq_length > albert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the ALBERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, albert_config.max_position_embeddings)) else: albert_config = None # Get the config from TF-Hub. tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]( use_spm=True if FLAGS.spm_model_file else False, do_lower_case=FLAGS.do_lower_case) label_list = processor.get_labels() tokenizer = fine_tuning_utils.create_vocab( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case, spm_model_file=FLAGS.spm_model_file, hub_module=FLAGS.albert_hub_module_handle) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 if FLAGS.do_train: iterations_per_loop = int(min(FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps)) else: iterations_per_loop = FLAGS.iterations_per_loop run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=int(FLAGS.save_checkpoints_steps), keep_checkpoint_max=0, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) model_fn = classifier_utils.model_fn_builder( albert_config=albert_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=FLAGS.train_step, num_warmup_steps=FLAGS.warmup_step, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, task_name=task_name, hub_module=FLAGS.albert_hub_module_handle, optimizer=FLAGS.optimizer) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: cached_dir = FLAGS.cached_dir if not cached_dir: cached_dir = FLAGS.output_dir train_file = os.path.join(cached_dir, task_name + "_train.tf_record") if not tf.gfile.Exists(train_file): classifier_utils.file_based_convert_examples_to_features( train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file, task_name) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", FLAGS.train_step) train_input_fn = classifier_utils.file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, task_name=task_name, use_tpu=FLAGS.use_tpu, bsz=FLAGS.train_batch_size) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_step) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_eval_examples = len(eval_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(classifier_utils.PaddingInputExample()) cached_dir = FLAGS.cached_dir if not cached_dir: cached_dir = FLAGS.output_dir eval_file = os.path.join(cached_dir, task_name + "_eval.tf_record") if not tf.gfile.Exists(eval_file): classifier_utils.file_based_convert_examples_to_features( eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file, task_name) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = classifier_utils.file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder, task_name=task_name, use_tpu=FLAGS.use_tpu, bsz=FLAGS.eval_batch_size) best_trial_info_file = os.path.join(FLAGS.output_dir, "best_trial.txt") def _best_trial_info(): """Returns information about which checkpoints have been evaled so far.""" if tf.gfile.Exists(best_trial_info_file): with tf.gfile.GFile(best_trial_info_file, "r") as best_info: global_step, best_metric_global_step, metric_value = ( best_info.read().split(":")) global_step = int(global_step) best_metric_global_step = int(best_metric_global_step) metric_value = float(metric_value) else: metric_value = -1 best_metric_global_step = -1 global_step = -1 tf.logging.info( "Best trial info: Step: %s, Best Value Step: %s, " "Best Value: %s", global_step, best_metric_global_step, metric_value) return global_step, best_metric_global_step, metric_value def _remove_checkpoint(checkpoint_path): for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = checkpoint_path + ".{}".format(ext) tf.logging.info("removing {}".format(src_ckpt)) tf.gfile.Remove(src_ckpt) def _find_valid_cands(curr_step): filenames = tf.gfile.ListDirectory(FLAGS.output_dir) candidates = [] for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] idx = ckpt_name.split("-")[-1] if int(idx) > curr_step: candidates.append(filename) return candidates output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") if task_name == "sts-b": key_name = "pearson" elif task_name == "cola": key_name = "matthew_corr" else: key_name = "eval_accuracy" global_step, best_perf_global_step, best_perf = _best_trial_info() writer = tf.gfile.GFile(output_eval_file, "w") while global_step < FLAGS.train_step: steps_and_files = {} filenames = tf.gfile.ListDirectory(FLAGS.output_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = os.path.join(FLAGS.output_dir, ckpt_name) if cur_filename.split("-")[-1] == "best": continue gstep = int(cur_filename.split("-")[-1]) if gstep not in steps_and_files: tf.logging.info("Add {} to eval list.".format(cur_filename)) steps_and_files[gstep] = cur_filename tf.logging.info("found {} files.".format(len(steps_and_files))) if not steps_and_files: tf.logging.info("found 0 file, global step: {}. Sleeping." .format(global_step)) time.sleep(60) else: for checkpoint in sorted(steps_and_files.items()): step, checkpoint_path = checkpoint if global_step >= step: if (best_perf_global_step != step and len(_find_valid_cands(step)) > 1): _remove_checkpoint(checkpoint_path) continue result = estimator.evaluate( input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=checkpoint_path) global_step = result["global_step"] tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) writer.write("best = {}\n".format(best_perf)) if result[key_name] > best_perf: best_perf = result[key_name] best_perf_global_step = global_step elif len(_find_valid_cands(global_step)) > 1: _remove_checkpoint(checkpoint_path) writer.write("=" * 50 + "\n") writer.flush() with tf.gfile.GFile(best_trial_info_file, "w") as best_info: best_info.write("{}:{}:{}".format( global_step, best_perf_global_step, best_perf)) writer.close() for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = "model.ckpt-{}.{}".format(best_perf_global_step, ext) tgt_ckpt = "model.ckpt-best.{}".format(ext) tf.logging.info("saving {} to {}".format(src_ckpt, tgt_ckpt)) tf.io.gfile.rename( os.path.join(FLAGS.output_dir, src_ckpt), os.path.join(FLAGS.output_dir, tgt_ckpt), overwrite=True) if FLAGS.do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. while len(predict_examples) % FLAGS.predict_batch_size != 0: predict_examples.append(classifier_utils.PaddingInputExample()) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") classifier_utils.file_based_convert_examples_to_features( predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file, task_name) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = classifier_utils.file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder, task_name=task_name, use_tpu=FLAGS.use_tpu, bsz=FLAGS.predict_batch_size) checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best") result = estimator.predict( input_fn=predict_input_fn, checkpoint_path=checkpoint_path) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") output_submit_file = os.path.join(FLAGS.output_dir, "submit_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as pred_writer, \ tf.gfile.GFile(output_submit_file, "w") as sub_writer: sub_writer.write("index" + "\t" + "prediction\n") num_written_lines = 0 tf.logging.info("***** Predict results *****") for (i, (example, prediction)) in \ enumerate(zip(predict_examples, result)): probabilities = prediction["probabilities"] if i >= num_actual_predict_examples: break output_line = "\t".join( str(class_probability) for class_probability in probabilities) + "\n" pred_writer.write(output_line) if task_name != "sts-b": actual_label = label_list[int(prediction["predictions"])] else: actual_label = str(prediction["predictions"]) sub_writer.write(example.guid + "\t" + actual_label + "\n") num_written_lines += 1 assert num_written_lines == num_actual_predict_examples
def main(_): tf.logging.set_verbosity(tf.logging.INFO) albert_config = modeling.BertConfig.from_json_file( FLAGS.albert_config_file) validate_flags_or_throw(albert_config) tf.gfile.MakeDirs(FLAGS.output_dir) tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 if FLAGS.do_train: iterations_per_loop = int( min(FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps)) else: iterations_per_loop = FLAGS.iterations_per_loop run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, keep_checkpoint_max=0, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None train_examples = squad_utils.read_squad_examples( input_file=FLAGS.train_file, is_training=True) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) if FLAGS.do_train: num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) # Pre-shuffle the input to avoid having to make a very large shuffle # buffer in in the `input_fn`. rng = random.Random(12345) rng.shuffle(train_examples) model_fn = squad_utils.v2_model_fn_builder( albert_config=albert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, max_seq_length=FLAGS.max_seq_length, start_n_top=FLAGS.start_n_top, end_n_top=FLAGS.end_n_top, dropout_prob=FLAGS.dropout_prob, hub_module=FLAGS.albert_hub_module_handle) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: # We write to a temporary file to avoid storing very large constant tensors # in memory. if not tf.gfile.Exists(FLAGS.train_feature_file): train_writer = squad_utils.FeatureWriter(filename=os.path.join( FLAGS.train_feature_file), is_training=True) squad_utils.convert_examples_to_features( examples=train_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=True, output_fn=train_writer.process_feature, do_lower_case=FLAGS.do_lower_case) train_writer.close() tf.logging.info("***** Running training *****") tf.logging.info(" Num orig examples = %d", len(train_examples)) # tf.logging.info(" Num split examples = %d", train_writer.num_features) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) del train_examples train_input_fn = squad_utils.input_fn_builder( input_file=FLAGS.train_feature_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, use_tpu=FLAGS.use_tpu, bsz=FLAGS.train_batch_size, is_v2=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_predict: with tf.gfile.Open(FLAGS.predict_file) as predict_file: prediction_json = json.load(predict_file)["data"] eval_examples = squad_utils.read_squad_examples( input_file=FLAGS.predict_file, is_training=False) if (tf.gfile.Exists(FLAGS.predict_feature_file) and tf.gfile.Exists(FLAGS.predict_feature_left_file)): tf.logging.info("Loading eval features from {}".format( FLAGS.predict_feature_left_file)) with tf.gfile.Open(FLAGS.predict_feature_left_file, "rb") as fin: eval_features = pickle.load(fin) else: eval_writer = squad_utils.FeatureWriter( filename=FLAGS.predict_feature_file, is_training=False) eval_features = [] def append_feature(feature): eval_features.append(feature) eval_writer.process_feature(feature) squad_utils.convert_examples_to_features( examples=eval_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=False, output_fn=append_feature, do_lower_case=FLAGS.do_lower_case) eval_writer.close() with tf.gfile.Open(FLAGS.predict_feature_left_file, "wb") as fout: pickle.dump(eval_features, fout) tf.logging.info("***** Running predictions *****") tf.logging.info(" Num orig examples = %d", len(eval_examples)) tf.logging.info(" Num split examples = %d", len(eval_features)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_input_fn = squad_utils.input_fn_builder( input_file=FLAGS.predict_feature_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False, use_tpu=FLAGS.use_tpu, bsz=FLAGS.predict_batch_size, is_v2=True) def get_result(checkpoint): """Evaluate the checkpoint on SQuAD v2.0.""" # If running eval on the TPU, you will need to specify the number of # steps. reader = tf.train.NewCheckpointReader(checkpoint) global_step = reader.get_tensor(tf.GraphKeys.GLOBAL_STEP) all_results = [] for result in estimator.predict(predict_input_fn, yield_single_examples=True, checkpoint_path=checkpoint): if len(all_results) % 1000 == 0: tf.logging.info("Processing example: %d" % (len(all_results))) unique_id = int(result["unique_ids"]) start_top_log_probs = ([ float(x) for x in result["start_top_log_probs"].flat ]) start_top_index = [ int(x) for x in result["start_top_index"].flat ] end_top_log_probs = ([ float(x) for x in result["end_top_log_probs"].flat ]) end_top_index = [int(x) for x in result["end_top_index"].flat] cls_logits = float(result["cls_logits"].flat[0]) all_results.append( squad_utils.RawResultV2( unique_id=unique_id, start_top_log_probs=start_top_log_probs, start_top_index=start_top_index, end_top_log_probs=end_top_log_probs, end_top_index=end_top_index, cls_logits=cls_logits)) output_prediction_file = os.path.join(FLAGS.output_dir, "predictions.json") output_nbest_file = os.path.join(FLAGS.output_dir, "nbest_predictions.json") output_null_log_odds_file = os.path.join(FLAGS.output_dir, "null_odds.json") result_dict = {} cls_dict = {} squad_utils.accumulate_predictions_v2( result_dict, cls_dict, eval_examples, eval_features, all_results, FLAGS.n_best_size, FLAGS.max_answer_length, FLAGS.start_n_top, FLAGS.end_n_top) return squad_utils.evaluate_v2( result_dict, cls_dict, prediction_json, eval_examples, eval_features, all_results, FLAGS.n_best_size, FLAGS.max_answer_length, output_prediction_file, output_nbest_file, output_null_log_odds_file), int(global_step) def _find_valid_cands(curr_step): filenames = tf.gfile.ListDirectory(FLAGS.output_dir) candidates = [] for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] idx = ckpt_name.split("-")[-1] if idx != "best" and int(idx) > curr_step: candidates.append(filename) return candidates output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best") key_name = "f1" writer = tf.gfile.GFile(output_eval_file, "w") if tf.gfile.Exists(checkpoint_path + ".index"): result = get_result(checkpoint_path) best_perf = result[0][key_name] global_step = result[1] else: global_step = -1 best_perf = -1 checkpoint_path = None while global_step < num_train_steps: steps_and_files = {} filenames = tf.gfile.ListDirectory(FLAGS.output_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = os.path.join(FLAGS.output_dir, ckpt_name) if cur_filename.split("-")[-1] == "best": continue gstep = int(cur_filename.split("-")[-1]) if gstep not in steps_and_files: tf.logging.info( "Add {} to eval list.".format(cur_filename)) steps_and_files[gstep] = cur_filename tf.logging.info("found {} files.".format(len(steps_and_files))) if not steps_and_files: tf.logging.info( "found 0 file, global step: {}. Sleeping.".format( global_step)) time.sleep(60) else: for ele in sorted(steps_and_files.items()): step, checkpoint_path = ele if global_step >= step: if len(_find_valid_cands(step)) > 1: for ext in [ "meta", "data-00000-of-00001", "index" ]: src_ckpt = checkpoint_path + ".{}".format(ext) tf.logging.info("removing {}".format(src_ckpt)) tf.gfile.Remove(src_ckpt) continue result, global_step = get_result(checkpoint_path) tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if result[key_name] > best_perf: best_perf = result[key_name] for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = checkpoint_path + ".{}".format(ext) tgt_ckpt = checkpoint_path.rsplit( "-", 1)[0] + "-best.{}".format(ext) tf.logging.info("saving {} to {}".format( src_ckpt, tgt_ckpt)) tf.gfile.Copy(src_ckpt, tgt_ckpt, overwrite=True) writer.write("saved {} to {}\n".format( src_ckpt, tgt_ckpt)) writer.write("best {} = {}\n".format(key_name, best_perf)) tf.logging.info(" best {} = {}\n".format( key_name, best_perf)) if len(_find_valid_cands(global_step)) > 2: for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = checkpoint_path + ".{}".format(ext) tf.logging.info("removing {}".format(src_ckpt)) tf.gfile.Remove(src_ckpt) writer.write("=" * 50 + "\n") checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best") result, global_step = get_result(checkpoint_path) tf.logging.info("***** Final Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) writer.write("best perf happened at step: {}".format(global_step))
def main(_): tf.logging.set_verbosity(tf.logging.INFO) if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") albert_config = modeling.AlbertConfig.from_json_file( FLAGS.albert_config_file) tf.gfile.MakeDirs(FLAGS.output_dir) input_files = [] for input_pattern in FLAGS.input_file.split(","): input_files.extend(tf.gfile.Glob(input_pattern)) tf.logging.info("*** Input Files ***") for input_file in input_files: tf.logging.info(" %s" % input_file) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, keep_checkpoint_max=FLAGS.keep_checkpoint_max, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) model_fn = model_fn_builder(albert_config=albert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=FLAGS.num_train_steps, num_warmup_steps=FLAGS.num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, optimizer=FLAGS.optimizer, poly_power=FLAGS.poly_power, start_warmup_step=FLAGS.start_warmup_step) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size) if FLAGS.do_train: tf.logging.info("***** Running training *****") tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) train_input_fn = input_fn_builder( input_files=input_files, max_seq_length=FLAGS.max_seq_length, max_predictions_per_seq=FLAGS.max_predictions_per_seq, is_training=True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.num_train_steps) if FLAGS.do_eval: tf.logging.info("***** Running evaluation *****") tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) global_step = -1 output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") writer = tf.gfile.GFile(output_eval_file, "w") tf.gfile.MakeDirs(FLAGS.export_dir) eval_input_fn = input_fn_builder( input_files=input_files, max_seq_length=FLAGS.max_seq_length, max_predictions_per_seq=FLAGS.max_predictions_per_seq, is_training=False) while global_step < FLAGS.num_train_steps: if estimator.latest_checkpoint() is None: tf.logging.info("No checkpoint found yet. Sleeping.") time.sleep(1) else: result = estimator.evaluate(input_fn=eval_input_fn, steps=FLAGS.max_eval_steps) global_step = result["global_step"] tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key])))
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "cola": ColaProcessor, "mnli": MnliProcessor, "mrpc": MrpcProcessor, "xnli": XnliProcessor, "qqp": QqpProcessor, "qnli": QnliProcessor, "sst2": Sst2Processor, "rte": QnliProcessor, } tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True.") bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=contrib_tpu.TPUConfig( tpu_job_name=FLAGS.tpu_job_name, iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder( bert_config=bert_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: train_file = os.path.join(FLAGS.output_dir, "train.tf_record") file_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_eval_examples = len(eval_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") if not tf.gfile.Exists(eval_file): file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) if FLAGS.do_continuous_eval: for ckpt in contrib_training.checkpoints_iterator(estimator.model_dir): try: result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w+") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) except tf.errors.NotFoundError: tf.logging.error("Checkpoint path '%s' no longer exists.", ckpt) else: ######################################################################## def _find_valid_cands(curr_step): filenames = tf.gfile.ListDirectory(FLAGS.output_dir) candidates = [] for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] idx = ckpt_name.split("-")[-1] if idx != "best" and int(idx) > curr_step: candidates.append(filename) return candidates tf.logging.info("Evaling all models in output dir") output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best") key_name = "eval_accuracy" tf.logging.info("Checkpoint path " + checkpoint_path) if tf.gfile.Exists(checkpoint_path + ".index"): tf.logging.info("Found a best model... not good") result = estimator.evaluate( input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=checkpoint_path) best_perf = result[key_name] global_step = result["global_step"] else: tf.logging.info("Setting global step to -1") global_step = -1 best_perf = -1 checkpoint_path = None tf.logging.info("Openning writer " + output_eval_file) writer = tf.gfile.GFile(output_eval_file, "w") steps_and_files = {} filenames = tf.gfile.ListDirectory(FLAGS.output_dir) tf.logging.info("Models found " + "\n".join(filenames)) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = os.path.join(FLAGS.output_dir, ckpt_name) if cur_filename.split("-")[-1] == "best": continue gstep = int(cur_filename.split("-")[-1]) if gstep not in steps_and_files: tf.logging.info("Add {} to eval list.".format(cur_filename)) steps_and_files[gstep] = cur_filename tf.logging.info("found {} files.".format(len(steps_and_files))) # steps_and_files = sorted(steps_and_files, key=lambda x: x[0]) if not steps_and_files: tf.logging.info( "found 0 file, global step: {}. Sleeping.".format(global_step)) else: for ele in sorted(steps_and_files.items()): step, checkpoint_path = ele if global_step >= step: if len(_find_valid_cands(step)) > 1: for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = checkpoint_path + ".{}".format(ext) tf.logging.info("removing {}".format(src_ckpt)) # GOOGLE-INTERNAL TODO(daniter): # Why should we remove checkpoints? # tf.gfile.Remove(src_ckpt) tf.logging.info("Skipping candidate for some reason") continue result = estimator.evaluate( input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=checkpoint_path) global_step = result["global_step"] tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) writer.write("best = {}\n".format(best_perf)) # No need to keep the best this way # if result[key_name] > best_perf: # best_perf = result[key_name] # for ext in ["meta", "data-00000-of-00001", "index"]: # src_ckpt = checkpoint_path + ".{}".format(ext) # tgt_ckpt = checkpoint_path.rsplit("-", # 1)[0] + "-best.{}".format(ext) # tf.logging.info("saving {} to {}".format(src_ckpt, tgt_ckpt)) # tf.gfile.Copy(src_ckpt, tgt_ckpt, overwrite=True) # writer.write("saved {} to {}\n".format(src_ckpt, tgt_ckpt)) if len(_find_valid_cands(global_step)) > 1: for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = checkpoint_path + ".{}".format(ext) tf.logging.info("removing {}".format(src_ckpt)) # tf.gfile.Remove(src_ckpt) writer.write("=" * 50 + "\n") writer.close() ######################################################################## # output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") # with tf.gfile.GFile(output_eval_file, "w") as writer: # tf.logging.info("***** Eval results *****") # for key in sorted(result.keys()): # tf.logging.info(" %s = %s", key, str(result[key])) # writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. while len(predict_examples) % FLAGS.predict_batch_size != 0: predict_examples.append(PaddingInputExample()) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") file_based_convert_examples_to_features(predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) result = estimator.predict(input_fn=predict_input_fn) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as writer: num_written_lines = 0 tf.logging.info("***** Predict results *****") for (i, prediction) in enumerate(result): probabilities = prediction["probabilities"] if i >= num_actual_predict_examples: break output_line = "\t".join( str(class_probability) for class_probability in probabilities) + "\n" writer.write(output_line) num_written_lines += 1 assert num_written_lines == num_actual_predict_examples
def main(_): tf.logging.set_verbosity(tf.logging.INFO) if not FLAGS.do_train and not FLAGS.do_eval_dev and not FLAGS.do_eval_test: raise ValueError("At least one of `do_train`, `do_eval_dev` or " "`do_eval_test' must be True.") bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) label_list = ["Yes", "No"] if FLAGS.from_three_class_model: label_list.append("Neutral") tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train: train_examples = get_train() num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder(bert_config=bert_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: train_file = os.path.join(FLAGS.output_dir, "train.tf_record") file_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) eval_on = [] if FLAGS.do_eval_dev: eval_on.append((get_dev(), "dev")) if FLAGS.do_eval_test: eval_on.append((get_test(), "test")) for eval_examples, name in eval_on: eval_file = os.path.join(FLAGS.output_dir, "%s.tf_record" % name) file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.logging.info("***** Running %s *****" % name) tf.logging.info(" Num examples = %d", len(eval_examples)) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: # Eval will be slightly WRONG on the TPU because it will truncate # the last batch. eval_steps = int(len(eval_examples) / FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "%s_eval_results.txt" % name) with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** %s eval results *****" % name) for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key])))
def main(_): tf.logging.set_verbosity(tf.logging.INFO) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) validate_flags_or_throw(bert_config) tf.gfile.MakeDirs(FLAGS.output_dir) tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) num_train_steps = None num_warmup_steps = None if FLAGS.do_train: num_train_features = FLAGS.train_num_precomputed num_train_steps = int(num_train_features / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder( bert_config=bert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this falls back to normal Estimator on CPU or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: tf.logging.info("***** Running training on precomputed features *****") tf.logging.info(" Num split examples = %d", num_train_features) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_filenames = tf.gfile.Glob(FLAGS.train_precomputed_file) train_input_fn = input_fn_builder( input_file=train_filenames, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_predict: if not FLAGS.output_prediction_file: raise ValueError( "--output_prediction_file must be defined in predict mode.") eval_examples = read_nq_examples( input_file=FLAGS.predict_file, is_training=False) eval_writer = FeatureWriter( filename=os.path.join(FLAGS.output_dir, "eval.tf_record"), is_training=False) eval_features = [] def append_feature(feature): eval_features.append(feature) eval_writer.process_feature(feature) num_spans_to_ids = convert_examples_to_features( examples=eval_examples, tokenizer=tokenizer, is_training=False, output_fn=append_feature) eval_writer.close() eval_filename = eval_writer.filename tf.logging.info("***** Running predictions *****") tf.logging.info(" Num orig examples = %d", len(eval_examples)) tf.logging.info(" Num split examples = %d", len(eval_features)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) for spans, ids in num_spans_to_ids.iteritems(): tf.logging.info(" Num split into %d = %d", spans, len(ids)) predict_input_fn = input_fn_builder( input_file=eval_filename, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) # If running eval on the TPU, you will need to specify the number of steps. all_results = [] for result in estimator.predict( predict_input_fn, yield_single_examples=True): if len(all_results) % 1000 == 0: tf.logging.info("Processing example: %d" % (len(all_results))) unique_id = int(result["unique_ids"]) start_logits = [float(x) for x in result["start_logits"].flat] end_logits = [float(x) for x in result["end_logits"].flat] answer_type_logits = [float(x) for x in result["answer_type_logits"].flat] all_results.append( RawResult( unique_id=unique_id, start_logits=start_logits, end_logits=end_logits, answer_type_logits=answer_type_logits)) candidates_dict = read_candidates(FLAGS.predict_file) eval_features = [ tf.train.Example.FromString(r) for r in tf.python_io.tf_record_iterator(eval_filename) ] nq_pred_dict = compute_pred_dict(candidates_dict, eval_features, [r._asdict() for r in all_results]) predictions_json = {"predictions": nq_pred_dict.values()} with tf.gfile.Open(FLAGS.output_prediction_file, "w") as f: json.dump(predictions_json, f, indent=4)
def reason( input_dir, output_dir, overwrite=False, model=gin.REQUIRED, num_iterations=gin.REQUIRED, training_steps_per_iteration=gin.REQUIRED, eval_steps_per_iteration=gin.REQUIRED, random_seed=gin.REQUIRED, batch_size=gin.REQUIRED, name="", ): """Trains the estimator and exports the snapshot and the gin config. The use of this function requires the gin binding 'dataset.name' to be specified if a model is trained from scratch as that determines the data set used for training. Args: input_dir: String with path to directory where the representation function is saved. output_dir: String with the path where the results should be saved. overwrite: Boolean indicating whether to overwrite output directory. model: GaussianEncoderModel that should be trained and exported. num_iterations: Integer with number of training steps. training_steps_per_iteration: Integer with number of training steps per iteration. eval_steps_per_iteration: Integer with number of validationand test steps per iteration. random_seed: Integer with random seed used for training. batch_size: Integer with the batch size. name: Optional string with name of the model (can be used to name models). """ # We do not use the variable 'name'. Instead, it can be used to name results # as it will be part of the saved gin config. del name # Delete the output directory if it already exists. if tf.gfile.IsDirectory(output_dir): if overwrite: tf.gfile.DeleteRecursively(output_dir) else: raise ValueError( "Directory already exists and overwrite is False.") # Create a numpy random state. We will sample the random seeds for training # and evaluation from this. random_state = np.random.RandomState(random_seed) # Automatically set the proper data set if necessary. We replace the active # gin config as this will lead to a valid gin config file where the data set # is present. if gin.query_parameter("dataset.name") == "auto": if input_dir is None: raise ValueError( "Cannot automatically infer data set for methods with" " no prior model directory.") # Obtain the dataset name from the gin config of the previous step. gin_config_file = os.path.join(input_dir, "results", "gin", "postprocess.gin") gin_dict = results.gin_dict(gin_config_file) with gin.unlock_config(): gin.bind_parameter("dataset.name", gin_dict["dataset.name"].replace("'", "")) dataset = pgm_data.get_pgm_dataset() # Set the path to the TFHub embedding if we are training based on a # pre-trained embedding.. if input_dir is not None: tfhub_dir = os.path.join(input_dir, "tfhub") with gin.unlock_config(): gin.bind_parameter("HubEmbedding.hub_path", tfhub_dir) # We create a TPUEstimator based on the provided model. This is primarily so # that we could switch to TPU training in the future. For now, we train # locally on GPUs. run_config = contrib_tpu.RunConfig( tf_random_seed=random_seed, keep_checkpoint_max=1, tpu_config=contrib_tpu.TPUConfig(iterations_per_loop=500)) tpu_estimator = contrib_tpu.TPUEstimator(use_tpu=False, model_fn=model.model_fn, model_dir=os.path.join( output_dir, "tf_checkpoint"), train_batch_size=batch_size, eval_batch_size=batch_size, config=run_config) # Set up time to keep track of elapsed time in results. experiment_timer = time.time() # Create a dictionary to keep track of all relevant information. results_dict_of_dicts = {} validation_scores = [] all_dicts = [] for i in range(num_iterations): steps_so_far = i * training_steps_per_iteration tf.logging.info("Training to %d steps.", steps_so_far) # Train the model for the specified steps. tpu_estimator.train(input_fn=dataset.make_input_fn( random_state.randint(2**32)), steps=training_steps_per_iteration) # Compute validation scores used for model selection. validation_results = tpu_estimator.evaluate( input_fn=dataset.make_input_fn( random_state.randint(2**32), num_batches=eval_steps_per_iteration)) validation_scores.append(validation_results["accuracy"]) tf.logging.info("Validation results %s", validation_results) # Compute test scores for final results. test_results = tpu_estimator.evaluate(input_fn=dataset.make_input_fn( random_state.randint(2**32), num_batches=eval_steps_per_iteration), name="test") dict_at_iteration = results.namespaced_dict(val=validation_results, test=test_results) results_dict_of_dicts["step{}".format( steps_so_far)] = dict_at_iteration all_dicts.append(dict_at_iteration) # Select the best number of steps based on the validation scores and add it as # as a special key to the dictionary. best_index = np.argmax(validation_scores) results_dict_of_dicts["best"] = all_dicts[best_index] # Save the results. The result dir will contain all the results and config # files that we copied along, as we progress in the pipeline. The idea is that # these files will be available for analysis at the end. if input_dir is not None: original_results_dir = os.path.join(input_dir, "results") else: original_results_dir = None results_dict = results.namespaced_dict(**results_dict_of_dicts) results_dir = os.path.join(output_dir, "results") results_dict["elapsed_time"] = time.time() - experiment_timer results.update_result_directory(results_dir, "abstract_reasoning", results_dict, original_results_dir)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) # validate_flags_or_throw(bert_config) tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 for fold_i in [2]: tf.gfile.MakeDirs(FLAGS.output_dir + "_{}".format(fold_i)) run_config = tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir + "_{}".format(fold_i), save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) session_config = tf.ConfigProto(log_device_placement=False) session_config.gpu_options.allow_growth = True run_config = run_config.replace(session_config=session_config) run_config = run_config.replace(keep_checkpoint_max=2) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train: train_examples = read_squad_examples( input_file="CV_data/data{}/train_data.csv".format(fold_i), is_training=True) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) # Pre-shuffle the input to avoid having to make a very large shuffle # buffer in in the `input_fn`. rng = random.Random(12345) rng.shuffle(train_examples) model_fn = model_fn_builder(bert_config=bert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: # We write to a temporary file to avoid storing very large constant tensors # in memory. train_writer = FeatureWriter(filename=os.path.join( FLAGS.output_dir + "_{}".format(fold_i), "train.tf_record"), is_training=True) convert_examples_to_features( examples=train_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=True, output_fn=train_writer.process_feature) train_writer.close() tf.logging.info("***** Running training *****") tf.logging.info(" Num orig examples = %d", len(train_examples)) tf.logging.info(" Num split examples = %d", train_writer.num_features) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) del train_examples eval_examples = read_squad_examples( input_file="CV_data/data{}/dev_data.csv".format(fold_i), is_training=True) eval_writer = FeatureWriter(filename=os.path.join( FLAGS.output_dir + "_{}".format(fold_i), "eval.tf_record"), is_training=False) eval_features = [] def append_feature(feature): eval_features.append(feature) eval_writer.process_feature(feature) convert_examples_to_features( examples=eval_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=False, output_fn=append_feature) eval_writer.close() tf.logging.info("***** Running predictions *****") tf.logging.info(" Num orig examples = %d", len(eval_examples)) tf.logging.info(" Num split examples = %d", len(eval_features)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) train_input_fn = input_fn_builder(input_file=train_writer.filename, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train( input_fn=train_input_fn, max_steps=num_train_steps, hooks=[ EvalHook(estimator, eval_writer.filename, "CV_data/data{}/dev_data.csv".format(fold_i), eval_features, eval_steps=FLAGS.save_checkpoints_steps, max_seq_length=FLAGS.max_seq_length, max_answer_length=FLAGS.max_answer_length, checkpoint_dir="SAVE_MODEL", input_fn_builder=input_fn_builder, th=85.5, model_name="output_model_LSTM_{}".format(fold_i)) ]) if FLAGS.do_predict: test_examples = read_squad_examples( input_file="filter_data/test_data.csv", is_training=False) test_writer = FeatureWriter(filename=os.path.join( FLAGS.output_dir, "test.tf_record"), is_training=False) test_features = [] def append_feature(feature): test_features.append(feature) test_writer.process_feature(feature) convert_examples_to_features(examples=test_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=False, output_fn=append_feature) test_writer.close() tf.logging.info("***** Running predictions *****") tf.logging.info(" Num orig examples = %d", len(test_examples)) tf.logging.info(" Num split examples = %d", len(test_features)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_input_fn = input_fn_builder(input_file=test_writer.filename, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) # If running eval on the TPU, you will need to specify the number of # steps. predictions = estimator.predict(predict_input_fn, yield_single_examples=True) # predictions = { # "unique_ids": unique_ids, # "start_logits": start_logits, # "end_logits": end_logits, # } instances = [] with open("results2.csv", "w", encoding="utf-8") as fw: # for qa_id in hoka_test: # value = "NaN" # fw.write(f"\"{qa_id}\",\"{value}\"\n") for i, item in enumerate(predictions): unique_ids = item["unique_ids"] qa_id = test_features[i].unique_id # print(unique_ids, type(unique_ids)) # print(qa_id, type(qa_id)) assert qa_id == unique_ids start_logits = item["start_logits"] end_logits = item["end_logits"] n_best_item = write_prediction( test_features[i], start_logits, end_logits, n_best_size=20, max_answer_length=FLAGS.max_answer_length) best_list = [a["text"] for a in n_best_item[:3]] fw.write("\"{}\",\"{}\",\"{}\",\"{}\"\n".format( qa_id, *best_list))
def train(model_dir, overwrite=False, model=gin.REQUIRED, training_steps=gin.REQUIRED, random_seed=gin.REQUIRED, batch_size=gin.REQUIRED, eval_steps=1000, name="", model_num=None): """Trains the estimator and exports the snapshot and the gin config. The use of this function requires the gin binding 'dataset.name' to be specified as that determines the data set used for training. Args: model_dir: String with path to directory where model output should be saved. overwrite: Boolean indicating whether to overwrite output directory. model: GaussianEncoderModel that should be trained and exported. training_steps: Integer with number of training steps. random_seed: Integer with random seed used for training. batch_size: Integer with the batch size. eval_steps: Optional integer with number of steps used for evaluation. name: Optional string with name of the model (can be used to name models). model_num: Optional integer with model number (can be used to identify models). """ # We do not use the variables 'name' and 'model_num'. Instead, they can be # used to name results as they will be part of the saved gin config. del name, model_num # Delete the output directory if it already exists. if tf.gfile.IsDirectory(model_dir): if overwrite: tf.gfile.DeleteRecursively(model_dir) else: raise ValueError("Directory already exists and overwrite is False.") # Create a numpy random state. We will sample the random seeds for training # and evaluation from this. random_state = np.random.RandomState(random_seed) # Obtain the dataset. dataset = named_data.get_named_ground_truth_data() # We create a TPUEstimator based on the provided model. This is primarily so # that we could switch to TPU training in the future. For now, we train # locally on GPUs. run_config = contrib_tpu.RunConfig( tf_random_seed=random_seed, keep_checkpoint_max=1, tpu_config=contrib_tpu.TPUConfig(iterations_per_loop=500)) tpu_estimator = contrib_tpu.TPUEstimator( use_tpu=False, model_fn=model.model_fn, model_dir=os.path.join(model_dir, "tf_checkpoint"), train_batch_size=batch_size, eval_batch_size=batch_size, config=run_config) # Set up time to keep track of elapsed time in results. experiment_timer = time.time() # Do the actual training. tpu_estimator.train( input_fn=_make_input_fn(dataset, random_state.randint(2**32)), steps=training_steps) # Save model as a TFHub module. output_shape = named_data.get_named_ground_truth_data().observation_shape module_export_path = os.path.join(model_dir, "tfhub") gaussian_encoder_model.export_as_tf_hub(model, output_shape, tpu_estimator.latest_checkpoint(), module_export_path) # Save the results. The result dir will contain all the results and config # files that we copied along, as we progress in the pipeline. The idea is that # these files will be available for analysis at the end. results_dict = tpu_estimator.evaluate( input_fn=_make_input_fn( dataset, random_state.randint(2**32), num_batches=eval_steps)) results_dir = os.path.join(model_dir, "results") results_dict["elapsed_time"] = time.time() - experiment_timer results.update_result_directory(results_dir, "train", results_dict)
def main(_): # 设置打印日志的级别:日志级别分为Debug、Info、Warning、Error tf.logging.set_verbosity(tf.logging.INFO) # 给定任务名称对应的数据处理方式 processors = {"imdb": IMDBProcessor} # 校验参数 tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True." ) # 加载Bert模型的参数 bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) # 验证参数(比如当前代码中,如果给定的最长序列长度超过512,那么直接报错,因为当前代码中Bert模型允许的最长序列为512字符) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) # 构建输出路径 tf.gfile.MakeDirs(FLAGS.output_dir) # 获取任务名称 task_name = FLAGS.task_name.lower() # 如果任务名称不在数据处理字典对象中,那么报错 if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) # 基于任务名称获取数据处理对象 processor = processors[task_name]() # 基于数据处理对象获取具体的标签列表 label_list = processor.get_labels(data_dir=FLAGS.data_dir) # 基于词汇表构建一个映射转换对象 tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) # 是否使用TPU tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) # 运行参数配置 is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train: # 加载训练数据 train_examples = processor.get_train_examples(FLAGS.data_dir) # 计算总的训练次数 num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) # 构建model_config with open( os.path.join(os.path.abspath(os.getcwd()), 'config', 'bilstm_config.json'), "r") as fr: model_config = json.load(fr) # 模型函数构建 model_fn = model_fn_builder(bert_config=bert_config, model_config=model_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, fine_tune=False) # If TPU is not available, this will fall back to normal Estimator on CPU or GPU. # 构建一个训练对象,如果TPU不支持的情况下,会在CPU或者GPU上运行 estimator = tpu.TPUEstimator(use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: # 将训练数据转换为TFRecord格式数据输出并保存 train_file = os.path.join(FLAGS.output_dir, "train.tf_record") file_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) # 打印日志 tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) # 基于TFRecord数据构建数据输出函数 train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) # 模型训练 estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_eval_examples = len(eval_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. while len(predict_examples) % FLAGS.predict_batch_size != 0: predict_examples.append(PaddingInputExample()) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") file_based_convert_examples_to_features(predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) result = estimator.predict(input_fn=predict_input_fn) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as writer: num_written_lines = 0 tf.logging.info("***** Predict results *****") for (i, prediction) in enumerate(result): # probabilities = prediction["probabilities"] # if i >= num_actual_predict_examples: # break # output_line = "\t".join( # str(class_probability) # for class_probability in probabilities) + "\n" output_line = str(prediction['predictions']) + '\n' writer.write(output_line) num_written_lines += 1 assert num_written_lines == num_actual_predict_examples
def main(_): tf.logging.set_verbosity(tf.logging.INFO) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) validate_flags_or_throw(bert_config) tf.gfile.MakeDirs(FLAGS.output_dir) tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None if FLAGS.do_train: train_examples = read_squad_examples(input_file=FLAGS.train_file, is_training=True) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) # Pre-shuffle the input to avoid having to make a very large shuffle # buffer in in the `input_fn`. rng = random.Random(12345) rng.shuffle(train_examples) model_fn = model_fn_builder( bert_config=bert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, membership_features_str=FLAGS.membership_features_str) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: # We write to a temporary file to avoid storing very large constant tensors # in memory. train_writer = FeatureWriter(filename=os.path.join( FLAGS.output_dir, "train.tf_record." + FLAGS.exp_name), is_training=True) convert_examples_to_features(examples=train_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=True, output_fn=train_writer.process_feature) train_writer.close() tf.logging.info("***** Running training *****") tf.logging.info(" Num orig examples = %d", len(train_examples)) tf.logging.info(" Num split examples = %d", train_writer.num_features) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) del train_examples train_input_fn = input_fn_builder(input_file=train_writer.filename, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_eval: # marking is_training = True to keep the labels eval_examples = read_squad_examples(input_file=FLAGS.dev_file, is_training=True) eval_writer = FeatureWriter(filename=os.path.join( FLAGS.output_dir, "eval.tf_record." + FLAGS.exp_name), is_training=True) eval_features = [] def append_feature(feature): eval_features.append(feature) eval_writer.process_feature(feature) convert_examples_to_features(examples=eval_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=True, output_fn=append_feature) eval_writer.close() tf.logging.info(" Num orig examples = %d", len(eval_examples)) tf.logging.info(" Num split examples = %d", len(eval_features)) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) all_results = [] eval_input_fn = input_fn_builder(input_file=eval_writer.filename, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d", len(eval_examples)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) if FLAGS.use_tpu: eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) tf.logging.info(" Num examples used = %d", FLAGS.eval_batch_size * eval_steps) else: eval_steps = None tf.logging.info(" Num examples used = %d", len(eval_examples)) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.do_predict: eval_examples = read_squad_examples( input_file=FLAGS.predict_input_file, is_training=False) eval_writer = FeatureWriter(filename=os.path.join( FLAGS.output_dir, "predict.tf_record." + FLAGS.exp_name), is_training=False) eval_features = [] convert_examples_to_features(examples=eval_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=False, output_fn=append_feature) eval_writer.close() tf.logging.info("***** Running predictions *****") tf.logging.info(" Num orig examples = %d", len(eval_examples)) tf.logging.info(" Num split examples = %d", len(eval_features)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) all_results = [] predict_input_fn = input_fn_builder(input_file=eval_writer.filename, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) # If running eval on the TPU, you will need to specify the number of # steps. all_results = [] for result in estimator.predict(predict_input_fn, yield_single_examples=True): if len(all_results) % 1000 == 0: tf.logging.info("Processing example: %d" % (len(all_results)))
def main(_): tf.logging.set_verbosity(tf.logging.INFO) tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError("At least one of `do_train`, `do_eval` must be True.") bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) num_train_steps = None num_warmup_steps = None if FLAGS.do_train: num_train_steps = int(FLAGS.train_data_size / FLAGS.train_batch_size) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) num_loop_steps = int(num_train_steps / FLAGS.num_train_rounds) model_fn = model_fn_builder(bert_config=bert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.eval_data_size < 0: # Count the number of records in the eval file to get the data size and # needed number of iterations # We don't do this for training because it is much larger and will take too # long to iterate over just to get a count. Please enter training size # as a parameter 'train_data_size' reader = tf.io.tf_record_iterator(FLAGS.eval_file) eval_data_size = 0 for _ in reader: eval_data_size += 1 else: eval_data_size = FLAGS.eval_data_size # For each "round", train for a fraction of the training data and then # evaluate on the validation data. Allows for intermediate progress updates train_rounds = FLAGS.num_train_rounds if FLAGS.do_train else 1 for train_round in range(train_rounds): if FLAGS.do_train: tf.logging.info("***** Running training *****") tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_loop_steps) train_input_fn = file_based_input_fn_builder( input_file=FLAGS.train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, skip=num_loop_steps * train_round * FLAGS.train_batch_size) estimator.train(input_fn=train_input_fn, steps=num_loop_steps) if FLAGS.do_eval: # This tells the estimator to run through the entire set. eval_steps = None if FLAGS.use_tpu: eval_steps = int(eval_data_size / FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=FLAGS.eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) tf.logging.info("********** Eval results: %d *******\n", train_round) for key in sorted(result.keys()): tf.logging.info("%s = %s" % (key, str(result[key])))
def main(_): tf.logging.set_verbosity(tf.logging.INFO) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) # validate_flags_or_throw(bert_config) tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 for fold_i in [4]: model_dir = FLAGS.output_dir + "_{}".format(fold_i) run_config = tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=model_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) session_config = tf.ConfigProto(log_device_placement=False) session_config.gpu_options.allow_growth = True run_config = run_config.replace(session_config=session_config) run_config = run_config.replace(keep_checkpoint_max=2) train_examples = None num_train_steps = None num_warmup_steps = None model_fn = model_fn_builder(bert_config=bert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size) test_examples = read_squad_examples( input_file="filter_data/test_data.csv", is_training=False) test_writer = FeatureWriter(filename=os.path.join( model_dir, "test.tf_record"), is_training=False) test_features = [] def append_feature(feature): test_features.append(feature) test_writer.process_feature(feature) convert_examples_to_features(examples=test_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=False, output_fn=append_feature) test_writer.close() tf.logging.info("***** Running predictions *****") tf.logging.info(" Num orig examples = %d", len(test_examples)) tf.logging.info(" Num split examples = %d", len(test_features)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_input_fn = input_fn_builder(input_file=test_writer.filename, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) # If running eval on the TPU, you will need to specify the number of # steps. predictions = estimator.predict(predict_input_fn, yield_single_examples=True) # predictions = { # "unique_ids": unique_ids, # "start_logits": start_logits, # "end_logits": end_logits, # } instances = [] with open("./results/cv_results_lstm_2th_{}.csv".format(fold_i), "w", encoding="utf-8") as fw: for i, item in enumerate(predictions): unique_ids = item["unique_ids"] qa_id = test_features[i].unique_id assert qa_id == unique_ids start_logits = item["start_logits"] end_logits = item["end_logits"] n_best_item = write_prediction( test_features[i], start_logits, end_logits, n_best_size=20, max_answer_length=FLAGS.max_answer_length) best_list = [a["text"] for a in n_best_item[:3]] # json.dump({"qa_id": qa_id, "n_best_items": n_best_item}, fw) # fw.write("\n") if len(best_list) < 3: print(n_best_item) while len(best_list) < 3: best_list.append("empty") fw.write("\"{}\",\"{}\",\"{}\",\"{}\"\n".format( qa_id, *best_list))
def main(_): hvd.init() FLAGS.output_dir = FLAGS.output_dir if hvd.rank() == 0 else\ os.path.join(FLAGS.output_dir, str(hvd.rank())) FLAGS.train_batch_size = FLAGS.train_batch_size // hvd.size() FLAGS.eval_batch_size = FLAGS.eval_batch_size // hvd.size() tf.logging.set_verbosity(tf.logging.INFO) if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") albert_config = modeling.AlbertConfig.from_json_file( FLAGS.albert_config_file) tf.gfile.MakeDirs(FLAGS.output_dir) input_files = [] for input_pattern in FLAGS.input_file.split(","): input_files.extend(tf.gfile.Glob(input_pattern)) input_files_local = [] for input_file in input_files: fname = input_file.split('/')[-1] if re.match('^.+\d+.tfrecord', fname): fid = int(fname[-13:-9]) if fid % hvd.size() == hvd.rank(): input_files_local.append(input_file) if input_files_local: input_files = input_files_local tf.logging.info("*** Input Files ***") for input_file in input_files: tf.logging.info(" %s" % input_file) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) config = tf.ConfigProto() config.gpu_options.visible_device_list = str(hvd.local_rank()) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, keep_checkpoint_max=FLAGS.keep_checkpoint_max, session_config=config, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) model_fn = model_fn_builder(albert_config=albert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=FLAGS.num_train_steps, num_warmup_steps=FLAGS.num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, optimizer=FLAGS.optimizer, poly_power=FLAGS.poly_power, start_warmup_step=FLAGS.start_warmup_step) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size) if FLAGS.do_train: tf.logging.info("***** Running training *****") tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) hooks = [hvd.BroadcastGlobalVariablesHook(0)] train_input_fn = input_fn_builder( input_files=input_files, max_seq_length=FLAGS.max_seq_length, max_predictions_per_seq=FLAGS.max_predictions_per_seq, is_training=True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.num_train_steps, hooks=hooks) if FLAGS.do_eval: tf.logging.info("***** Running evaluation *****") tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) global_step = -1 output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") writer = tf.gfile.GFile(output_eval_file, "w") eval_input_fn = input_fn_builder( input_files=input_files, max_seq_length=FLAGS.max_seq_length, max_predictions_per_seq=FLAGS.max_predictions_per_seq, is_training=False) best_perf = 0 key_name = "masked_lm_accuracy" while global_step < FLAGS.num_train_steps: if estimator.latest_checkpoint() is None: tf.logging.info("No checkpoint found yet. Sleeping.") time.sleep(1) else: result = estimator.evaluate(input_fn=eval_input_fn, steps=FLAGS.max_eval_steps) global_step = result["global_step"] tf.logging.info("***** Eval results *****") checkpoint_path = estimator.latest_checkpoint() for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if result[key_name] > best_perf: best_perf = result[key_name] for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = checkpoint_path + ".{}".format(ext) tgt_ckpt = checkpoint_path.rsplit( "-", 1)[0] + "-best.{}".format(ext) tf.logging.info("saving {} to {}".format( src_ckpt, tgt_ckpt)) tf.gfile.Copy(src_ckpt, tgt_ckpt, overwrite=True) writer.write("saved {} to {}\n".format( src_ckpt, tgt_ckpt))
def main(argv): del argv # Unused if FLAGS.use_tpu: assert FLAGS.model_dir.startswith("gs://"), ("'model_dir' should be a " "GCS bucket path!") # Fetch the data (train_x, train_y), (test_x, test_y) = iris_data.load_data() # Feature columns describe how to use the input. my_feature_columns = [] for key in train_x.keys(): my_feature_columns.append(tf.feature_column.numeric_column(key=key)) # Resolve TPU cluster and runconfig for this. tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu) run_config = contrib_tpu.RunConfig( model_dir=FLAGS.model_dir, cluster=tpu_cluster_resolver, session_config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True), tpu_config=contrib_tpu.TPUConfig(FLAGS.iterations), ) # Build 2 hidden layer DNN with 10, 10 units respectively. classifier = contrib_tpu.TPUEstimator( model_fn=my_model, use_tpu=FLAGS.use_tpu, train_batch_size=FLAGS.batch_size, eval_batch_size=FLAGS.batch_size, predict_batch_size=FLAGS.batch_size, config=run_config, params={ # Name of the feature columns in the input data. "feature_columns": my_feature_columns, # Two hidden layers of 10 nodes each. "hidden_units": [10, 10], # The model must choose between 3 classes. "n_classes": 3, "use_tpu": FLAGS.use_tpu, }) # Train the Model. classifier.train(input_fn=lambda params: iris_data.train_input_fn( train_x, train_y, params["batch_size"]), max_steps=FLAGS.train_steps) # Evaluate the model. eval_result = classifier.evaluate( input_fn=lambda params: iris_data.eval_input_fn( test_x, test_y, params["batch_size"]), steps=FLAGS.eval_steps) print("\nTest set accuracy: {accuracy:0.3f}\n".format(**eval_result)) # Generate predictions from the model predictions = classifier.predict( input_fn=lambda params: iris_data.predict_input_fn( iris_data.PREDICTION_INPUT_DATA, params["batch_size"])) for pred_dict, expec in zip(predictions, iris_data.PREDICTION_OUTPUT_DATA): template = ("\nPrediction is \"{}\" ({:.1f}%), expected \"{}\"") class_id = pred_dict["class_ids"][0] probability = pred_dict["probabilities"][class_id] print( template.format(iris_data.SPECIES[class_id], 100 * probability, expec))
def main(unused_argv): del unused_argv # Unused tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) assert FLAGS.precision == 'bfloat16' or FLAGS.precision == 'float32', ( 'Invalid value for --precision flag; must be bfloat16 or float32.') tf.logging.info('Precision: %s', FLAGS.precision) params = { 'input_perm': [0, 1, 2, 3], 'output_perm': [0, 1, 2, 3], } batch_axis = 0 if FLAGS.transpose_enabled: params['input_perm'] = [3, 0, 1, 2] params['output_perm'] = [1, 2, 3, 0] batch_axis = 3 if FLAGS.eval_total_size > 0: eval_size = FLAGS.eval_total_size else: eval_size = _NUM_EVAL_IMAGES eval_steps = eval_size // FLAGS.eval_batch_size iterations = (eval_steps if FLAGS.mode == 'eval' else FLAGS.iterations) eval_batch_size = (None if FLAGS.mode == 'train' else FLAGS.eval_batch_size) tpu_config = contrib_tpu.TPUConfig(iterations_per_loop=iterations, num_shards=FLAGS.num_shards) run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, model_dir=FLAGS.model_dir, save_checkpoints_secs=FLAGS.save_checkpoints_secs, save_summary_steps=FLAGS.save_summary_steps, session_config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement), tpu_config=tpu_config) inception_classifier = contrib_tpu.TPUEstimator( model_fn=inception_model_fn, use_tpu=FLAGS.use_tpu, config=run_config, params=params, train_batch_size=FLAGS.train_batch_size, eval_batch_size=eval_batch_size, batch_axis=(batch_axis, 0)) # Input pipelines are slightly different (with regards to shuffling and # preprocessing) between training and evaluation. use_bfloat16 = FLAGS.precision == 'bfloat16' imagenet_train = InputPipeline(is_training=True, data_dir=FLAGS.data_dir, use_bfloat16=use_bfloat16) imagenet_eval = InputPipeline(is_training=False, data_dir=FLAGS.data_dir, use_bfloat16=use_bfloat16) if FLAGS.moving_average: eval_hooks = [LoadEMAHook(FLAGS.model_dir)] else: eval_hooks = [] if FLAGS.mode == 'eval': # Run evaluation when there is a new checkpoint for checkpoint in evaluation.checkpoints_iterator( FLAGS.model_dir, timeout=FLAGS.eval_timeout): tf.logging.info('Starting to evaluate.') try: start_timestamp = time.time() # Includes compilation time eval_results = inception_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, hooks=eval_hooks, checkpoint_path=checkpoint) elapsed_time = int(time.time() - start_timestamp) tf.logging.info('Eval results: %s. Elapsed seconds: %d', eval_results, elapsed_time) # Terminate eval job when final checkpoint is reached current_step = int(os.path.basename(checkpoint).split('-')[1]) if current_step >= FLAGS.train_steps: tf.logging.info( 'Evaluation finished after training step %d', current_step) break except tf.errors.NotFoundError: # Since the coordinator is on a different job than the TPU worker, # sometimes the TPU worker does not finish initializing until long after # the CPU job tells it to start evaluating. In this case, the checkpoint # file could have been deleted already. tf.logging.info( 'Checkpoint %s no longer exists, skipping checkpoint', checkpoint) elif FLAGS.mode == 'train_and_eval': for cycle in range(FLAGS.train_steps // FLAGS.train_steps_per_eval): tf.logging.info('Starting training cycle %d.' % cycle) inception_classifier.train(input_fn=imagenet_train.input_fn, steps=FLAGS.train_steps_per_eval) tf.logging.info('Starting evaluation cycle %d .' % cycle) eval_results = inception_classifier.evaluate( input_fn=imagenet_eval.input_fn, steps=eval_steps, hooks=eval_hooks) tf.logging.info('Evaluation results: %s' % eval_results) else: tf.logging.info('Starting training ...') inception_classifier.train(input_fn=imagenet_train.input_fn, max_steps=FLAGS.train_steps) if FLAGS.export_dir is not None: tf.logging.info('Starting to export model.') inception_classifier.export_saved_model( export_dir_base=FLAGS.export_dir, serving_input_receiver_fn=image_serving_input_fn)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) validate_flags_or_throw(bert_config) tf.gfile.MakeDirs(FLAGS.output_dir) tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train: train_examples = read_squad_examples( input_file=FLAGS.train_file, is_training=True) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) # Pre-shuffle the input to avoid having to make a very large shuffle # buffer in in the `input_fn`. rng = random.Random(12345) rng.shuffle(train_examples) model_fn = model_fn_builder( bert_config=bert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: # We write to a temporary file to avoid storing very large constant tensors # in memory. train_writer = FeatureWriter( filename=os.path.join(FLAGS.output_dir, "train.tf_record"), is_training=True) convert_examples_to_features( examples=train_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=True, output_fn=train_writer.process_feature) train_writer.close() tf.logging.info("***** Running training *****") tf.logging.info(" Num orig examples = %d", len(train_examples)) tf.logging.info(" Num split examples = %d", train_writer.num_features) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) del train_examples train_input_fn = input_fn_builder( input_file=train_writer.filename, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_predict: eval_examples = read_squad_examples( input_file=FLAGS.predict_file, is_training=False) eval_writer = FeatureWriter( filename=os.path.join(FLAGS.output_dir, "eval.tf_record"), is_training=False) eval_features = [] def append_feature(feature): eval_features.append(feature) eval_writer.process_feature(feature) convert_examples_to_features( examples=eval_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=False, output_fn=append_feature) eval_writer.close() tf.logging.info("***** Running predictions *****") tf.logging.info(" Num orig examples = %d", len(eval_examples)) tf.logging.info(" Num split examples = %d", len(eval_features)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) all_results = [] predict_input_fn = input_fn_builder( input_file=eval_writer.filename, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) # If running eval on the TPU, you will need to specify the number of # steps. all_results = [] for result in estimator.predict( predict_input_fn, yield_single_examples=True): if len(all_results) % 1000 == 0: tf.logging.info("Processing example: %d" % (len(all_results))) unique_id = int(result["unique_ids"]) start_logits = [float(x) for x in result["start_logits"].flat] end_logits = [float(x) for x in result["end_logits"].flat] all_results.append( RawResult( unique_id=unique_id, start_logits=start_logits, end_logits=end_logits)) output_prediction_file = os.path.join(FLAGS.output_dir, "predictions.json") output_nbest_file = os.path.join(FLAGS.output_dir, "nbest_predictions.json") output_null_log_odds_file = os.path.join(FLAGS.output_dir, "null_odds.json") write_predictions(eval_examples, eval_features, all_results, FLAGS.n_best_size, FLAGS.max_answer_length, FLAGS.do_lower_case, output_prediction_file, output_nbest_file, output_null_log_odds_file)
def main(): tf.logging.set_verbosity(tf.logging.INFO) if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tpu.InputPipelineConfig.PER_HOST_V2 run_config = tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tpu.TPUConfig(iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) session_config = tf.ConfigProto(log_device_placement=True) session_config.gpu_options.allow_growth = True run_config.replace(session_config=session_config) num_train_steps = None num_warmup_steps = None with open('cqa_data.pkl', 'rb') as fr: train_features, dev_cid, dev_features = pkl.load(fr) dev_label = [feature.label_id for feature in dev_features] if FLAGS.do_train: num_train_steps = int( len(train_features) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder(bert_config=bert_config, num_labels=2, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, dev_cid=dev_cid) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, # params={'batch_size': FLAGS.train_batch_size}, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.eval_batch_size) if FLAGS.do_train: tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_features)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = input_fn_builder(features=train_features, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps, hooks=[ EvalHook(estimator=estimator, dev_features=dev_features, dev_label=dev_label, dev_cid=dev_cid, max_seq_length=FLAGS.max_seq_length, eval_steps=FLAGS.save_checkpoints_steps, checkpoint_dir=FLAGS.output_dir) ]) if FLAGS.do_eval: tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d", len(dev_features)) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: # Eval will be slightly WRONG on the TPU because it will truncate # the last batch. eval_steps = int(len(dev_features) / FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = input_fn_builder(features=dev_features, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) predictions = estimator.predict(eval_input_fn, yield_single_examples=False) res = np.concatenate([a for a in predictions], axis=0) print(res.shape, np.array(dev_label).shape) metrics = PRF(np.array(dev_label), res.argmax(axis=-1)) # print((np.array(dev_label) != res.argmax(axis=-1))[:1000]) MAP, AvgRec, MRR = eval_reranker(dev_cid, dev_label, res[:, 0]) metrics['MAP'] = MAP metrics['AvgRec'] = AvgRec metrics['MRR'] = MRR print_metrics(metrics, 'dev')
def main(_): tf.logging.set_verbosity(tf.logging.INFO) tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError("At least one of `do_train`, `do_eval` must be True.") bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) num_train_steps = None num_warmup_steps = None if FLAGS.do_train: num_train_steps = int( FLAGS.train_data_size / FLAGS.train_batch_size) * FLAGS.epochs num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder(bert_config=bert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: tf.logging.info("***** Running training *****") tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) if not tf.gfile.Exists(FLAGS.train_file): tf.logging.info( "DANITER:File doesn't exist, creating tfrecord data") examples = model_builder.load_hellaswag(FLAGS.train_raw_data) tf.logging.info("DANITER:Read raw data as json") model_builder.file_based_convert_examples_for_bilinear( examples, 512, tokenizer, FLAGS.train_file, do_copa=True) train_input_fn = file_based_input_fn_builder( input_file=FLAGS.train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, steps=num_train_steps) if FLAGS.do_eval: # This tells the estimator to run through the entire set. if FLAGS.eval_data_size < 0: eval_steps = None else: eval_steps = int(FLAGS.eval_data_size / FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False if not tf.gfile.Exists(FLAGS.eval_file): examples = model_builder.load_hellaswag(FLAGS.eval_raw_data) model_builder.file_based_convert_examples_for_bilinear( examples, 512, tokenizer, FLAGS.eval_file, do_copa=True) eval_input_fn = file_based_input_fn_builder( input_file=FLAGS.eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) def _find_valid_cands(curr_step): filenames = tf.gfile.ListDirectory(FLAGS.output_dir) candidates = [] for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] idx = ckpt_name.split("-")[-1] if idx != "best" and int(idx) > curr_step: candidates.append(filename) return candidates tf.logging.info("Evaling all models in output dir") output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best") key_name = "eval_accuracy" tf.logging.info("Checkpoint path " + checkpoint_path) if tf.gfile.Exists(checkpoint_path + ".index"): tf.logging.info("Found a best model... not good") result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=checkpoint_path) best_perf = result[key_name] global_step = result["global_step"] else: tf.logging.info("Setting global step to -1") global_step = -1 best_perf = -1 checkpoint_path = None tf.logging.info("Openning writer " + output_eval_file) writer = tf.gfile.GFile(output_eval_file, "w") steps_and_files = {} filenames = tf.gfile.ListDirectory(FLAGS.output_dir) tf.logging.info("Models found " + "\n".join(filenames)) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = os.path.join(FLAGS.output_dir, ckpt_name) if cur_filename.split("-")[-1] == "best": continue gstep = int(cur_filename.split("-")[-1]) if gstep not in steps_and_files: tf.logging.info( "Add {} to eval list.".format(cur_filename)) steps_and_files[gstep] = cur_filename tf.logging.info("found {} files.".format(len(steps_and_files))) # steps_and_files = sorted(steps_and_files, key=lambda x: x[0]) if not steps_and_files: tf.logging.info( "found 0 file, global step: {}. Sleeping.".format(global_step)) else: for ele in sorted(steps_and_files.items()): step, checkpoint_path = ele if global_step >= step: if len(_find_valid_cands(step)) > 1: for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = checkpoint_path + ".{}".format(ext) tf.logging.info("removing {}".format(src_ckpt)) # Why should we remove checkpoints? # tf.gfile.Remove(src_ckpt) tf.logging.info("Skipping candidate for some reason") continue result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=checkpoint_path) global_step = result["global_step"] tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) writer.write("best = {}\n".format(best_perf)) if len(_find_valid_cands(global_step)) > 1: for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = checkpoint_path + ".{}".format(ext) tf.logging.info("removing {}".format(src_ckpt)) # tf.gfile.Remove(src_ckpt) writer.write("=" * 50 + "\n") writer.close()
def handler(queue, kvm_fd, mm): global object_dict global object_id global callback_stack global initialized if not initialized: callback_stack = [] object_dict = dict() object_id = 1 # TODO: forward logging or disable it in test tf.logging.set_verbosity(tf.logging.INFO) initialized = True print("handler is initialized") while True: task = None task = queue.get(block=True) while task is None: try: task = queue.get(block=True, timeout=5) except Queue.Empty: task = None if callback_stack: if time.time() > callback_stack[-1]["deadline"]: print("callback failed deadline") return STATUS_CALLBACK_TIMEOUT vm_id = task.vm_id if vm_id == STOP_HANDLER: break param = TF_PY_PARAM.from_buffer(mm, task.data_ptr) callback_param = TF_PY_PARAM.from_buffer( mm, task.data_ptr + param.base.callback_param_offset) print( "retrieve [vm#%d] tensorflow task=%d cmd=%d, obj=%d, dstore=%lx, done=%d" % (task.vm_id, task.node_id, param.base.cmd_id, param.base.object_id, param.base.dstore_size, param.base.done)) print( "retrieve [vm#%d] callback node cmd=%d, obj=%d, dstore=%lx, done=%d" % (task.vm_id, callback_param.base.cmd_id, callback_param.base.object_id, callback_param.base.dstore_size, callback_param.base.done)) cmd_id = param.base.cmd_id try: if cmd_id == TF_PY_NW_CALLBACK_DONE: param.base.done = STATUS_TASK_DONE ret = fcntl.ioctl(kvm_fd, IOCTL_KVM_NOTIFY_TASK_FINISHED, task.node_id) if ret < 0: print("notify task completion failed: %d\n" % ret) if callback_stack and \ callback_stack[-1]["callback_id"] == param.base.object_id: print("callback is finished") return STATUS_CALLBACK_DONE else: print("callback is error") return STATUS_CALLBACK_ERROR if cmd_id == TF_PY_SESSION_INIT: print("SessionInit!!!") param1 = parse_param(vm_id, mm, param, param.param1) print(param1) sess = tf.Session(param1) # assign object_id object_dict[object_id] = sess param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_SESSION_ENTER: sess = object_dict[param.base.object_id] ctx_sess = sess.__enter__() if sess is ctx_sess: pass else: # unlikely print("unlikely to search for sess") param.base.object_id = next( obj_id for obj_id, obj in object_dict.items() if obj is ctx_sess) elif cmd_id == TF_PY_SESSION_EXIT: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) sess = object_dict[param.base.object_id] sess.__exit__(param1, param2, param3) elif cmd_id == TF_PY_SESSION_DEL: sess = object_dict[param.base.object_id] sess.__del__() # deprecated elif cmd_id == TF_PY_SESSION_RUN: sess = object_dict[param.base.object_id] param1 = parse_param(vm_id, mm, param, param.param1) if type(param1) == NwObject: print("get NwObject=%d" % param1.object_id()) param1 = object_dict[param1.object_id()] print(param1) ret_val = sess.run(param1) print(ret_val) writeback_result(vm_id, mm, param, param.ret_val1, ret_val) elif cmd_id == TF_PY_TPU_CLUSTER_RESOLVER_INIT: print("resloverInit!!!") param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) if param1 is None: param1 = None if param2 is None: param2 = None if param3 is None: param3 = None print("TPUClusterResolver", param1, param2, param3) tpu_grpc = tf.contrib.cluster_resolver.TPUClusterResolver( tpu=param1, zone=param2, project=param3) # assign object_id object_dict[object_id] = tpu_grpc param.base.object_id = object_id print("assign obj_id=%d" % object_id) object_id += 1 # deprecated elif cmd_id == TF_PY_TPU_CLUSTER_RESOLVER_MASTER: # FIXED: use __getattr__ print("master!!") tpu_grpc = object_dict[param.base.object_id] # FIXED: may have parameters tpu_grpc_url = tpu_grpc.master() # serialize return value writeback_result(vm_id, mm, param, param.ret_val1, tpu_grpc_url) elif cmd_id == TF_PY_TPU_INITIALIZE_SYSTEM: # TODO: may have parameters ts = tpu.initialize_system() object_dict[object_id] = ts param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_TPU_SHUTDOWN_SYSTEM: # TODO: may have parameters ts = tpu.shutdown_system() object_dict[object_id] = ts param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_GLOBAL_VARIABLES_INITIALIZER: # TODO: may have parameters ts = tf.global_variables_initializer() object_dict[object_id] = ts param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_ONES: print("param1 size=%ld,offset=%ld" % (param.param1.size, param.param1.offset)) param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) if param2 is None: param2 = dtypes.float32 print(param2) var = tf.ones(param1, param2) object_dict[object_id] = var param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_RANDOM_UNIFORM: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) param4 = parse_param(vm_id, mm, param, param.param4) param5 = parse_param(vm_id, mm, param, param.param5) param6 = parse_param(vm_id, mm, param, param.param6) if param2 is None: param2 = 0 if param4 is None: param4 = dtypes.float32 print(param1, param2, param3, param4) var = tf.random_uniform(param1, param2, param3, param4, param5, param6) object_dict[object_id] = var param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_TRANSPOSE: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) param4 = parse_param(vm_id, mm, param, param.param4) param1 = object_dict[param1.object_id()] if param3 is None: param3 = "transpose" if param4 is None: param4 = False print("transpose", param1, param2, param3, param4) var = tf.transpose(param1, param2, param3, param4) object_dict[object_id] = var param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_CAST: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) param1 = object_dict[param1.object_id()] print("cast", param1, param2, param3) var = tf.cast(param1, param2, param3) object_dict[object_id] = var param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_EXPAND_DIMS: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) param4 = parse_param(vm_id, mm, param, param.param4) param1 = object_dict[param1.object_id()] print("expand_dims", param1, param2, param3, param4) var = tf.expand_dims(param1, param2, param3, param4) object_dict[object_id] = var param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_CONCAT: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) param1 = object_dict[param1.object_id()] if param3 is None: param3 = "concat" print("concat", param1, param2, param3) var = tf.concat(param1, param2, param3) object_dict[object_id] = var param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_EQUAL: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) param1 = object_dict[param1.object_id()] print("equal", param1, param2, param3) if isinstance(param2, NwObject): param2 = object_dict[param2.object_id()] result = tf.equal(param1, param2, param3) print(result) object_dict[object_id] = result param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_FIXED_LEN_FEATURE: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) feature = tf.FixedLenFeature(param1, param2, param3) print(feature) object_dict[object_id] = feature param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_VAR_LEN_FEATURE: param1 = parse_param(vm_id, mm, param, param.param1) feature = tf.VarLenFeature(param1) print(feature) object_dict[object_id] = feature param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_PARSE_SINGLE_EXAMPLE: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) param4 = parse_param(vm_id, mm, param, param.param4) print(param1, param2) # expand embedded NwObject if isinstance(param1, NwObject): param1 = object_dict[param1.object_id()] dict_walker(param2) print("after translation", param1, param2) result = tf.parse_single_example(param1, param2, param3, param4) print(result) dict_mapper(result) print(result) writeback_result(vm_id, mm, param, param.ret_val1, result) elif cmd_id == TF_PY_CONTROL_FLOW_OPS_SWITCH: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) param4 = parse_param(vm_id, mm, param, param.param4) param1 = object_dict[param1.object_id()] param2 = object_dict[param2.object_id()] print("switch", param1, param2, param3, param4) result = control_flow_ops.switch(param1, param2, param3, param4) print(result) mapped_tuple = tuple_mapper(result, [0, 1]) print(mapped_tuple) writeback_result(vm_id, mm, param, param.ret_val1, mapped_tuple) elif cmd_id == TF_PY_CONTROL_FLOW_OPS_MERGE: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param1 = object_dict[param1.object_id()] print("merge", param1, param2) list_walker(param1) print("merge-new", param1, param2) result = control_flow_ops.merge(param1, param2) print(result) mapped_tuple = tuple_mapper(result, [0]) print(mapped_tuple) writeback_result(vm_id, mm, param, param.ret_val1, mapped_tuple) elif cmd_id == TF_PY_TPU_REWRITE: # TODO: may have parameters param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) # default parameter if param2 is None: param2 = None # expand embedded NwObject list_walker(param2) func = tpu.rewrite(param1, param2) object_dict[object_id] = func param.base.object_id = object_id print("rewrite object_id=%d" % object_id) object_id += 1 elif cmd_id == TF_PY_TPU_RUN_CONFIG: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) param4 = parse_param(vm_id, mm, param, param.param4) param5 = parse_param(vm_id, mm, param, param.param5) # default parameter if param1 is None: param1 = None if param2 is None: param2 = None if param3 is None: param3 = None if param4 is None: param4 = None # expand embedded NwObject param4 = object_dict[param4.object_id()] print(param4, param5) func = tpu.RunConfig(param1, param2, param3, param4, **param5) object_dict[object_id] = func param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_TPU_TPU_ESTIMATOR: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) param4 = parse_param(vm_id, mm, param, param.param4) param5 = parse_param(vm_id, mm, param, param.param5) param6 = parse_param(vm_id, mm, param, param.param6) param7 = parse_param(vm_id, mm, param, param.param7) param8 = parse_param(vm_id, mm, param, param.param8) param9 = parse_param(vm_id, mm, param, param.param9) param10 = parse_param(vm_id, mm, param, param.param10) param11 = parse_param(vm_id, mm, param, param.param11) param12 = parse_param(vm_id, mm, param, param.param12) # default parameter if param1 is None: param1 = None if param2 is None: param2 = None if param3 is None: param3 = None if param4 is None: param4 = None if param5 is None: param5 = True if param6 is None: param6 = None if param7 is None: param7 = None if param8 is None: param8 = None if param9 is None: param9 = None if param10 is None: param10 = True if param11 is None: param11 = True if param12 is None: param12 = None # expand embedded NwObject param3 = object_dict[param3.object_id()] print(param3) func = tpu.TPUEstimator(param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12) object_dict[object_id] = func param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_IMAGE_RESIZE_IMAGES: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) param4 = parse_param(vm_id, mm, param, param.param4) param5 = parse_param(vm_id, mm, param, param.param5) # default parameter if param3 is None: param3 = ResizeMethod.BILINEAR if param4 is None: param4 = False if param5 is None: param5 = False # expand embedded NwObject param1 = object_dict[param1.object_id()] print(param1) img = tf.image.resize_images(param1, param2, param3, param4, param5) # TODO: it may return a float object_dict[object_id] = img param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_SLICE: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) param4 = parse_param(vm_id, mm, param, param.param4) # expand embedded NwObject print(param1, param2, param3) param1 = object_dict[param1.object_id()] ret = tf.slice(param1, param2, param3, param4) object_dict[object_id] = ret param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_SHAPE: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) if param3 is None: param3 = dtypes.int32 # expand embedded NwObject print(param1, param2, param3) param1 = object_dict[param1.object_id()] ret = tf.shape(param1, param2, param3) object_dict[object_id] = ret param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_IMAGE_SAMPLE_DISTORTED_BOUNDING_BOX: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) param4 = parse_param(vm_id, mm, param, param.param4) param5 = parse_param(vm_id, mm, param, param.param5) param6 = parse_param(vm_id, mm, param, param.param6) param7 = parse_param(vm_id, mm, param, param.param7) param8 = parse_param(vm_id, mm, param, param.param8) param9 = parse_param(vm_id, mm, param, param.param9) param10 = parse_param(vm_id, mm, param, param.param10) # default parameter if param5 is None: param5 = 0.1 print("sample_distorted_bounding_box", param1, param2) result = tf.image.sample_distorted_bounding_box( param1, param2, param3, param4, param5, param6, param7, param8, param9, param10) print(result) mapped_tuple = tuple_mapper(result, [0, 1, 2]) print(mapped_tuple) writeback_result(vm_id, mm, param, param.ret_val1, mapped_tuple) elif cmd_id == TF_PY_IMAGE_DRAW_BOUNDING_BOXES: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) # expand embedded NwObject print(param1, param2, param3) param1 = object_dict[param1.object_id()] param2 = object_dict[param2.object_id()] ret = tf.image.draw_bounding_boxes(param1, param2, param3) object_dict[object_id] = ret param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_IMAGE_DECODE_JPEG: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) param4 = parse_param(vm_id, mm, param, param.param4) param5 = parse_param(vm_id, mm, param, param.param5) param6 = parse_param(vm_id, mm, param, param.param6) param7 = parse_param(vm_id, mm, param, param.param7) param8 = parse_param(vm_id, mm, param, param.param8) if param2 is None: param2 = 0 if param3 is None: param3 = 1 if param4 is None: param4 = True if param5 is None: param5 = False if param6 is None: param6 = 1 if param7 is None: param7 = "" param1 = object_dict[param1.object_id()] img = tf.image.decode_jpeg(param1, param2, param3, param4, param5, param6, param7, param8) object_dict[object_id] = img param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_IMAGE_CONVERT_IMAGE_DTYPE: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) param4 = parse_param(vm_id, mm, param, param.param4) # expand embedded NwObject print(param1, param2, param3) param1 = object_dict[param1.object_id()] if param3 is None: param3 = False ret = tf.image.convert_image_dtype(param1, param2, param3, param4) object_dict[object_id] = ret param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_DATA_DATASET_LIST_FILES: param1 = parse_param(vm_id, mm, param, param.param1) param2 = parse_param(vm_id, mm, param, param.param2) param3 = parse_param(vm_id, mm, param, param.param3) print(param1, param2, param3) if isinstance(param1, NwObject): param1 = object_dict[oaram1.object_id()] ret = tf.data.Dataset.list_files(param1, param2, param3) object_dict[object_id] = ret param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_NW_OBJECT: print("nw_object!! id = %d" % param.base.object_id) obj = object_dict[param.base.object_id] name = parse_param(vm_id, mm, param, param.param1) args = parse_param(vm_id, mm, param, param.param2) kwargs = parse_param(vm_id, mm, param, param.param3) print("NwObject", obj, name, args, kwargs) # expand embedded NwObject args = list(args) list_walker(args) args = tuple(args) dict_walker(kwargs) print("after translation", obj, name, args, kwargs) # run result = getattr(obj, name)(*(args or []), **(kwargs or {})) param.base.object_id = -1 param.ret_val1.size = 0 print("analyze type", type(result), result) # TODO: go through tuple, dict or list if isinstance(result, tuple): result = tuple_mapper(result, range(len(result))) if isinstance(result, dict): dict_mapper(result) if isinstance(result, list): list_mapper(result) # serialize return value if is_unpickleable_type(result) or \ pickle.pickles(result) is False: object_dict[object_id] = result param.base.object_id = object_id object_id += 1 elif result is not None: writeback_result(vm_id, mm, param, param.ret_val1, result) elif cmd_id == TF_PY_NW_METHOD: # Reuse as callback #ins = parse_param(vm_id, mm, param, param.param1) #name = parse_param(vm_id, mm, param, param.param2) #print(ins, name) #method = getattr(ins, name) #print(method) #object_dict[object_id] = method cw = callback_constructor(object_id, callback_param, param, mm, vm_id, queue, kvm_fd) object_dict[object_id] = cw param.base.object_id = object_id object_id += 1 elif cmd_id == TF_PY_NW_CALLBACK_TEST: nw_func = parse_param(vm_id, mm, param, param.param1) print(nw_func, nw_func.object_id()) func = object_dict[nw_func.object_id()] print("callback func", func) x = parse_param(vm_id, mm, param, param.param2) y = parse_param(vm_id, mm, param, param.param3) result = func(x, y) print(result) writeback_result(vm_id, mm, param, param.ret_val1, result) else: print("unsupported Tensorflow API") except Exception, error: param.base.done = STATUS_TASK_ERROR #mm.flush(task.data_ptr, sizeof(PARAM_BASE)) print "fault: ", str(error) exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, fname, exc_tb.tb_lineno) traceback.print_stack() print("finished [vm#%d] TF task %d cmd %d" % (task.vm_id, task.node_id, param.base.cmd_id)) param.base.done = STATUS_TASK_DONE #mm.flush(task.data_ptr, sizeof(PARAM_BASE)) #mm.flush(INVOKER_FIFO_SIZE + VGPU_DSTORE_SIZE * (vm_id - 1) + # param.base.dstore_offset + param.ret_val1.offset, # param.ret_val1.size) # notify hypervisor ret = fcntl.ioctl(kvm_fd, IOCTL_KVM_NOTIFY_TASK_FINISHED, task.node_id) if ret < 0: print("notify task completion failed: %d\n" % ret)
def main(unused_argv): params = params_dict.ParamsDict(squeezenet_config.SQUEEZENET_CFG, squeezenet_config.SQUEEZENET_RESTRICTIONS) params = params_dict.override_params_dict(params, FLAGS.config_file, is_strict=True) params = params_dict.override_params_dict(params, FLAGS.params_override, is_strict=True) params = flags_to_params.override_params_from_input_flags(params, FLAGS) total_steps = ( (params.train.num_epochs * params.train.num_examples_per_epoch) // params.train.train_batch_size) params.override( { "train": { "total_steps": total_steps }, "eval": { "num_steps_per_eval": (total_steps // params.eval.num_evals) }, }, is_strict=False) params.validate() params.lock() tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) if not params.use_async_checkpointing: save_checkpoints_steps = max(5000, params.train.iterations_per_loop) run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, model_dir=params.model_dir, save_checkpoints_steps=save_checkpoints_steps, session_config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False), tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=params.train.iterations_per_loop, num_shards=params.train.num_cores_per_replica, ), ) estimator = contrib_tpu.TPUEstimator( model_fn=squeezenet_model.model_fn, use_tpu=params.use_tpu, config=run_config, train_batch_size=params.train.train_batch_size, eval_batch_size=params.eval.eval_batch_size, params=params.as_dict(), ) for eval_cycle in range(params.eval.num_evals): current_cycle_last_train_step = ((eval_cycle + 1) * params.eval.num_steps_per_eval) estimator.train(input_fn=data_pipeline.InputReader(FLAGS.data_dir, is_training=True), steps=current_cycle_last_train_step) tf.logging.info("Running evaluation") tf.logging.info( "%s", estimator.evaluate(input_fn=data_pipeline.InputReader( FLAGS.data_dir, is_training=False), steps=(params.eval.num_eval_examples // params.eval.eval_batch_size)))
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "cola": ColaProcessor, "mnli": MnliProcessor, "mrpc": MrpcProcessor, "xnli": XnliProcessor, } tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True." ) albert_config = modeling.AlbertConfig.from_json_file( FLAGS.albert_config_file) if FLAGS.max_seq_length > albert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the ALBERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, albert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, sp_cdc_file=FLAGS.cdc_spm_model_file, do_lower_case=FLAGS.do_lower_case, spm_model_file=FLAGS.spm_model_file) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, keep_checkpoint_max=8, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder(albert_config=albert_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: if FLAGS.data_examples: tf.gfile.MakeDirs(FLAGS.data_examples) train_file = os.path.join(FLAGS.data_examples, "train.tf_record") else: train_file = os.path.join(FLAGS.output_dir, "train.tf_record") file_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) tf.logging.set_verbosity(tf.logging.INFO) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_eval_examples = len(eval_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) if FLAGS.data_examples: tf.gfile.MakeDirs(FLAGS.data_examples) eval_file = os.path.join(FLAGS.data_examples, "eval.tf_record") else: eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. while len(predict_examples) % FLAGS.predict_batch_size != 0: predict_examples.append(PaddingInputExample()) if FLAGS.data_examples: tf.gfile.MakeDirs(FLAGS.data_examples) predict_file = os.path.join(FLAGS.data_examples, "predict.tf_record") else: predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") file_based_convert_examples_to_features(predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) result = estimator.predict(input_fn=predict_input_fn) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") output_submit_file = os.path.join(FLAGS.output_dir, "submit_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as pred_writer,\ tf.gfile.GFile(output_submit_file, "w") as sub_writer: num_written_lines = 0 tf.logging.info("***** Predict results *****") for (i, (example, prediction)) in\ enumerate(zip(predict_examples, result)): probabilities = prediction["probabilities"] if i >= num_actual_predict_examples: break output_line = "\t".join( str(class_probability) for class_probability in probabilities) + "\n" pred_writer.write(output_line) actual_label = label_list[int(prediction["predictions"])] sub_writer.write( six.ensure_str(example.guid) + "\t" + actual_label + "\n") num_written_lines += 1 assert num_written_lines == num_actual_predict_examples
def experiment(model_config): tf.logging.set_verbosity(tf.logging.INFO) tf.logging.info("SCRIPT START") if model_config["use_tpu"]: assert 'COLAB_TPU_ADDR' in os.environ, 'Missing TPU; did you request a TPU in Notebook Settings?' auth.authenticate_user() tf.logging.info("TPU resolver started") if 'COLAB_TPU_ADDR' in os.environ: TF_MASTER = 'grpc://{}'.format(os.environ['COLAB_TPU_ADDR']) # Upload credentials to TPU. with tf.Session(TF_MASTER) as sess: with open('/content/adc.json', 'r') as f: auth_info = json.load(f) tf.contrib.cloud.configure_gcs(sess, credentials=auth_info) # Now credentials are set for all future sessions on this TPU. else: TF_MASTER='' # os.environ['PROJECT_NAME']='nnproj' # os.environ['PROJECT_ZONE']='boh' # os.environ['TPU_NAME']='bah' # # tpu_cluster_resolver = TPUClusterResolver( # tpu=os.environ['TPU_NAME'], # project=os.environ['PROJECT_NAME'], # zone=os.environ['PROJECT_ZONE']) if model_config["use_tpu"]: config = tpu.RunConfig( # cluster=tpu_cluster_resolver, tf_random_seed=RANDOM_SEED, master=TF_MASTER, model_dir=model_config['model_base_dir'] + os.path.sep + str(model_config["experiment_id"]), save_checkpoints_steps=500, save_summary_steps=250, tpu_config=tpu.TPUConfig( iterations_per_loop=500, num_shards=8, per_host_input_for_training=tpu.InputPipelineConfig.PER_HOST_V1 )) # pylint: disable=line-too-long else: config = tpu.RunConfig( # cluster=tpu_cluster_resolver, # model_dir=model_config['model_base_dir'] + os.path.sep + str(model_config["experiment_id"]), save_checkpoints_steps=500, save_summary_steps=250) # pylint: disable=line-too-long tf.logging.info("Creating datasets") urmp_train, urmp_eval, urmp_test = [urmp_input.URMPInput( mode=mode, data_dir=model_config['data_path'], transpose_input=False, use_bfloat16=model_config['use_bfloat16']) for mode in ['train', 'eval', 'test']] tf.logging.info("Assigning TPUEstimator") # Optimize in a +supervised fashion until validation loss worsens separator = tpu.TPUEstimator( use_tpu=model_config["use_tpu"], model_fn=unet_separator, config=config, train_batch_size=model_config['batch_size'], eval_batch_size=model_config['batch_size'], predict_batch_size=model_config['batch_size'], params={i: model_config[i] for i in model_config if (i != 'batch_size')} ) if model_config['load_model']: tf.logging.info("Load the model") current_step = estimator._load_global_step_from_checkpoint_dir( model_config['model_base_dir'] + os.path.sep + str(model_config["experiment_id"])) if model_config['mode'] == 'train_and_eval': tf.logging.info("Train the model") # Should be an early stopping here, but it will come with tf 1.10 separator.train( input_fn=urmp_train.input_fn, steps=model_config['training_steps']) # ...zzz... tf.logging.info("Supervised training finished!") tf.logging.info("Evaluate model") # Evaluate the model. eval_result = separator.evaluate( input_fn=urmp_eval.input_fn, steps=model_config['evaluation_steps']) tf.logging.info('Evaluation results: %s' % eval_result) elif model_config['mode'] == 'predict': tf.logging.info("Test results and save predicted sources:") predictions = separator.predict( input_fn=urmp_test.input_fn) for prediction in predictions: Test.save_prediction(prediction, estimates_path=model_config["estimates_path"], sample_rate=model_config["expected_sr"]) Utils.concat_and_upload(model_config["estimates_path"], model_config['model_base_dir'] + os.path.sep + str(model_config["experiment_id"]))
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "cola": classifier_utils.ColaProcessor, "mnli": classifier_utils.MnliProcessor, "mrpc": classifier_utils.MrpcProcessor, } if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = create_tokenizer_from_hub_module( FLAGS.albert_hub_module_handle) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder( num_labels=len(label_list), learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, albert_hub_module_handle=FLAGS.albert_hub_module_handle) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: train_features = classifier_utils.convert_examples_to_features( train_examples, label_list, FLAGS.max_seq_length, tokenizer, task_name) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = classifier_utils.input_fn_builder( features=train_features, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) eval_features = classifier_utils.convert_examples_to_features( eval_examples, label_list, FLAGS.max_seq_length, tokenizer, task_name) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d", len(eval_examples)) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: # Eval will be slightly WRONG on the TPU because it will truncate # the last batch. eval_steps = int(len(eval_examples) / FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = classifier_utils.input_fn_builder( features=eval_features, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) if FLAGS.use_tpu: # Discard batch remainder if running on TPU n = len(predict_examples) predict_examples = predict_examples[:( n - n % FLAGS.predict_batch_size)] predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") classifier_utils.file_based_convert_examples_to_features( predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d", len(predict_examples)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_input_fn = classifier_utils.file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=FLAGS.use_tpu) result = estimator.predict(input_fn=predict_input_fn) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as writer: tf.logging.info("***** Predict results *****") for prediction in result: probabilities = prediction["probabilities"] output_line = "\t".join( str(class_probability) for class_probability in probabilities) + "\n" writer.write(output_line)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = {"race": race_utils.RaceProcessor} if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True." ) albert_config = modeling.AlbertConfig.from_json_file( FLAGS.albert_config_file) if FLAGS.max_seq_length > albert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the ALBERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, albert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]( use_spm=True if FLAGS.spm_model_file else False, do_lower_case=FLAGS.do_lower_case, high_only=FLAGS.high_only, middle_only=FLAGS.middle_only) label_list = processor.get_labels() tokenizer = fine_tuning_utils.create_vocab( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case, spm_model_file=FLAGS.spm_model_file, hub_module=FLAGS.albert_hub_module_handle) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 if FLAGS.do_train: iterations_per_loop = int( min(FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps)) else: iterations_per_loop = FLAGS.iterations_per_loop run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=int(FLAGS.save_checkpoints_steps), keep_checkpoint_max=0, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) model_fn = race_utils.model_fn_builder( albert_config=albert_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=FLAGS.train_step, num_warmup_steps=FLAGS.warmup_step, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, max_seq_length=FLAGS.max_seq_length, dropout_prob=FLAGS.dropout_prob, hub_module=FLAGS.albert_hub_module_handle) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: if not tf.gfile.Exists(FLAGS.train_file): race_utils.file_based_convert_examples_to_features( train_examples, label_list, FLAGS.max_seq_length, tokenizer, FLAGS.train_file, FLAGS.max_qa_length) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", FLAGS.train_step) train_input_fn = classifier_utils.file_based_input_fn_builder( input_file=FLAGS.train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, task_name=task_name, use_tpu=FLAGS.use_tpu, bsz=FLAGS.train_batch_size, multiple=len(label_list)) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_step) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_eval_examples = len(eval_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(classifier_utils.PaddingInputExample()) if not tf.gfile.Exists(FLAGS.eval_file): race_utils.file_based_convert_examples_to_features( eval_examples, label_list, FLAGS.max_seq_length, tokenizer, FLAGS.eval_file, FLAGS.max_qa_length) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = classifier_utils.file_based_input_fn_builder( input_file=FLAGS.eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder, task_name=task_name, use_tpu=FLAGS.use_tpu, bsz=FLAGS.eval_batch_size, multiple=len(label_list)) def _find_valid_cands(curr_step): filenames = tf.gfile.ListDirectory(FLAGS.output_dir) candidates = [] for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] idx = ckpt_name.split("-")[-1] if idx != "best" and int(idx) > curr_step: candidates.append(filename) return candidates output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best") key_name = "eval_accuracy" if tf.gfile.Exists(checkpoint_path + ".index"): result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=checkpoint_path) best_perf = result[key_name] global_step = result["global_step"] else: global_step = -1 best_perf = -1 checkpoint_path = None writer = tf.gfile.GFile(output_eval_file, "w") while global_step < FLAGS.train_step: steps_and_files = {} filenames = tf.gfile.ListDirectory(FLAGS.output_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = os.path.join(FLAGS.output_dir, ckpt_name) if cur_filename.split("-")[-1] == "best": continue gstep = int(cur_filename.split("-")[-1]) if gstep not in steps_and_files: tf.logging.info( "Add {} to eval list.".format(cur_filename)) steps_and_files[gstep] = cur_filename tf.logging.info("found {} files.".format(len(steps_and_files))) # steps_and_files = sorted(steps_and_files, key=lambda x: x[0]) if not steps_and_files: tf.logging.info( "found 0 file, global step: {}. Sleeping.".format( global_step)) time.sleep(1) else: for ele in sorted(steps_and_files.items()): step, checkpoint_path = ele if global_step >= step: if len(_find_valid_cands(step)) > 1: for ext in [ "meta", "data-00000-of-00001", "index" ]: src_ckpt = checkpoint_path + ".{}".format(ext) tf.logging.info("removing {}".format(src_ckpt)) tf.gfile.Remove(src_ckpt) continue result = estimator.evaluate( input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=checkpoint_path) global_step = result["global_step"] tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) writer.write("best = {}\n".format(best_perf)) if result[key_name] > best_perf: best_perf = result[key_name] for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = checkpoint_path + ".{}".format(ext) tgt_ckpt = checkpoint_path.rsplit( "-", 1)[0] + "-best.{}".format(ext) tf.logging.info("saving {} to {}".format( src_ckpt, tgt_ckpt)) tf.gfile.Copy(src_ckpt, tgt_ckpt, overwrite=True) writer.write("saved {} to {}\n".format( src_ckpt, tgt_ckpt)) if len(_find_valid_cands(global_step)) > 1: for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = checkpoint_path + ".{}".format(ext) tf.logging.info("removing {}".format(src_ckpt)) tf.gfile.Remove(src_ckpt) writer.write("=" * 50 + "\n") writer.close() if FLAGS.do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. while len(predict_examples) % FLAGS.predict_batch_size != 0: predict_examples.append(classifier_utils.PaddingInputExample()) assert len(predict_examples) % FLAGS.predict_batch_size == 0 predict_steps = int( len(predict_examples) // FLAGS.predict_batch_size) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") race_utils.file_based_convert_examples_to_features( predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file, FLAGS.max_qa_length) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = classifier_utils.file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder, task_name=task_name, use_tpu=FLAGS.use_tpu, bsz=FLAGS.predict_batch_size, multiple=len(label_list)) checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best") result = estimator.evaluate(input_fn=predict_input_fn, steps=predict_steps, checkpoint_path=checkpoint_path) output_predict_file = os.path.join(FLAGS.output_dir, "predict_results.txt") with tf.gfile.GFile(output_predict_file, "w") as pred_writer: # num_written_lines = 0 tf.logging.info("***** Predict results *****") pred_writer.write("***** Predict results *****\n") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) pred_writer.write("%s = %s\n" % (key, str(result[key]))) pred_writer.write("best = {}\n".format(best_perf))
def main(_): tf.logging.set_verbosity(tf.logging.INFO) tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError("At least one of `do_train`, `do_eval` must be True.") bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.finetune_output_dir) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.finetune_output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) num_train_steps = None num_warmup_steps = None if FLAGS.do_train: num_train_steps = int(FLAGS.train_data_size / FLAGS.train_batch_size) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder( bert_config=bert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, num_choices=FLAGS.num_choices) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: tf.logging.info("***** Running training *****") tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) if not tf.gfile.Exists(FLAGS.train_file): file_based_convert_examples_to_features( FLAGS.train_raw_data, 512, tokenizer, FLAGS.train_file) train_input_fn = file_based_input_fn_builder( input_file=FLAGS.train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, num_choices=FLAGS.num_choices) estimator.train(input_fn=train_input_fn, steps=num_train_steps) if FLAGS.do_eval: # This tells the estimator to run through the entire set. if FLAGS.eval_data_size < 0: eval_steps = None else: eval_steps = int(FLAGS.eval_data_size / FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False if not tf.gfile.Exists(FLAGS.eval_file): file_based_convert_examples_to_features( FLAGS.eval_raw_data, 512, tokenizer, FLAGS.eval_file) eval_input_fn = file_based_input_fn_builder( input_file=FLAGS.eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder, num_choices=FLAGS.num_choices) # checkpoints_iterator blocks until a new checkpoint appears. for ckpt in contrib_training.checkpoints_iterator(estimator.model_dir): try: result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) tf.logging.info("********** Eval results:*******\n") for key in sorted(result.keys()): tf.logging.info("%s = %s" % (key, str(result[key]))) except tf.errors.NotFoundError: tf.logging.error("Checkpoint path '%s' no longer exists.", ckpt)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) albert_config = modeling.AlbertConfig.from_json_file( FLAGS.albert_config_file) validate_flags_or_throw(albert_config) tf.gfile.MakeDirs(FLAGS.output_dir) tokenizer = fine_tuning_utils.create_vocab( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case, spm_model_file=FLAGS.spm_model_file, hub_module=FLAGS.albert_hub_module_handle) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 if FLAGS.do_train: iterations_per_loop = int( min(FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps)) else: iterations_per_loop = FLAGS.iterations_per_loop run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, keep_checkpoint_max=0, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None # if FLAGS.do_train: # train_examples = squad_utils.read_squad_examples( # input_file=FLAGS.train_file, is_training=True) # num_train_steps = int( # len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) # num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) # # # Pre-shuffle the input to avoid having to make a very large shuffle # # buffer in in the `input_fn`. # rng = random.Random(12345) # rng.shuffle(train_examples) model_fn = squad_utils.v2_model_fn_builder( albert_config=albert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, max_seq_length=FLAGS.max_seq_length, start_n_top=FLAGS.start_n_top, end_n_top=FLAGS.end_n_top, dropout_prob=FLAGS.dropout_prob, hub_module=FLAGS.albert_hub_module_handle) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: # We write to a temporary file to avoid storing very large constant tensors # in memory. if not tf.gfile.Exists(FLAGS.train_feature_file): train_writer = squad_utils.FeatureWriter(filename=os.path.join( FLAGS.train_feature_file), is_training=True) squad_utils.convert_examples_to_features( examples=train_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=True, output_fn=train_writer.process_feature, do_lower_case=FLAGS.do_lower_case) train_writer.close() tf.logging.info("***** Running training *****") tf.logging.info(" Num orig examples = %d", len(train_examples)) # tf.logging.info(" Num split examples = %d", train_writer.num_features) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) del train_examples train_input_fn = squad_utils.input_fn_builder( input_file=FLAGS.train_feature_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, use_tpu=FLAGS.use_tpu, bsz=FLAGS.train_batch_size, is_v2=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_predict: with tf.gfile.Open(FLAGS.predict_file) as predict_file: prediction_json = json.load(predict_file)["data"] eval_examples = squad_utils.read_squad_examples( input_file=FLAGS.predict_file, is_training=False) if (tf.gfile.Exists(FLAGS.predict_feature_file) and tf.gfile.Exists(FLAGS.predict_feature_left_file)): tf.logging.info("Loading eval features from {}".format( FLAGS.predict_feature_left_file)) with tf.gfile.Open(FLAGS.predict_feature_left_file, "rb") as fin: eval_features = pickle.load(fin) else: eval_writer = squad_utils.FeatureWriter( filename=FLAGS.predict_feature_file, is_training=False) eval_features = [] def append_feature(feature): eval_features.append(feature) eval_writer.process_feature(feature) squad_utils.convert_examples_to_features( examples=eval_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=False, output_fn=append_feature, do_lower_case=FLAGS.do_lower_case) eval_writer.close() with tf.gfile.Open(FLAGS.predict_feature_left_file, "wb") as fout: pickle.dump(eval_features, fout) tf.logging.info("***** Running predictions *****") tf.logging.info(" Num orig examples = %d", len(eval_examples)) tf.logging.info(" Num split examples = %d", len(eval_features)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_input_fn = squad_utils.input_fn_builder( input_file=FLAGS.predict_feature_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False, use_tpu=FLAGS.use_tpu, bsz=FLAGS.predict_batch_size, is_v2=True) def get_result(checkpoint): """Evaluate the checkpoint on SQuAD v2.0.""" # If running eval on the TPU, you will need to specify the number of # steps. reader = tf.train.NewCheckpointReader(checkpoint) global_step = reader.get_tensor(tf.GraphKeys.GLOBAL_STEP) all_results = [] for result in estimator.predict(predict_input_fn, yield_single_examples=True, checkpoint_path=checkpoint): if len(all_results) % 1000 == 0: tf.logging.info("Processing example: %d" % (len(all_results))) unique_id = int(result["unique_ids"]) cls_logits = float(result["cls_logits"].flat[0]) all_results.append( squad_utils.RawResultV2(unique_id=unique_id, cls_logits=cls_logits)) output_prediction_file = os.path.join(FLAGS.output_dir, "predictions.json") output_nbest_file = os.path.join(FLAGS.output_dir, "nbest_predictions.json") output_null_log_odds_file = os.path.join(FLAGS.output_dir, "null_odds.json") result_dict = {} cls_dict = {} squad_utils.accumulate_predictions_v2( result_dict, cls_dict, eval_examples, eval_features, all_results, FLAGS.n_best_size, FLAGS.max_answer_length, FLAGS.start_n_top, FLAGS.end_n_top) from squad_utils import make_qid_to_has_ans import numpy as np qid_to_has_ans = make_qid_to_has_ans( prediction_json) # maps qid to True/False has_ans_qids = [k for k, v in qid_to_has_ans.items() if v] no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v] print("has_ans", len(has_ans_qids)) print("no_ans", len(no_ans_qids)) def compute_metrics_with_threshold(threshold): nonlocal result_dict result_dict = {} tp = 0 tn = 0 fp = 0 fn = 0 for example_index, example in enumerate(eval_examples): m = np.min(cls_dict[example_index]) predict_is_impossible = 1 / (1 + np.exp(-m)) > threshold # predict_is_impossible = m > threshold result_dict[example.qas_id] = m if example.is_impossible: if predict_is_impossible: tp += 1 else: fn += 1 else: if predict_is_impossible: fp += 1 else: tn += 1 precision = tp / (tp + fp) recall = tp / (fn + tp) f1 = 2 * tp / (2 * tp + fp + fn) tf.logging.info(f"precision: {precision}" f"recall: {recall}" f"f1: {f1}") return precision, recall, f1 # precision, recall, f1 = compute_metrics_with_threshold(0.4) precision, recall, f1 = compute_metrics_with_threshold(0.5) # precision, recall, f1 = compute_metrics_with_threshold(0.6) with tf.gfile.GFile(output_prediction_file, "w") as writer: writer.write(json.dumps(result_dict, indent=4) + "\n") return { "precision": precision, "recall": recall, "f1": f1, "total": len(eval_examples) }, int(global_step) def _find_valid_cands(curr_step): filenames = tf.gfile.ListDirectory(FLAGS.output_dir) candidates = [] for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] idx = ckpt_name.split("-")[-1] if idx != "best" and int(idx) > curr_step: candidates.append(filename) return candidates # output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") # checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best") # key_name = "f1" # writer = tf.gfile.GFile(output_eval_file, "w") # if tf.gfile.Exists(checkpoint_path + ".index"): # result = get_result(checkpoint_path) # best_perf = result[0][key_name] # global_step = result[1] # else: # global_step = -1 # best_perf = -1 # checkpoint_path = None # while global_step < num_train_steps: # steps_and_files = {} # filenames = tf.gfile.ListDirectory(FLAGS.output_dir) # for filename in filenames: # if filename.endswith(".index"): # ckpt_name = filename[:-6] # cur_filename = os.path.join(FLAGS.output_dir, ckpt_name) # if cur_filename.split("-")[-1] == "best": # continue # gstep = int(cur_filename.split("-")[-1]) # if gstep not in steps_and_files: # tf.logging.info("Add {} to eval list.".format(cur_filename)) # steps_and_files[gstep] = cur_filename # tf.logging.info("found {} files.".format(len(steps_and_files))) # if not steps_and_files: # tf.logging.info("found 0 file, global step: {}. Sleeping." # .format(global_step)) # time.sleep(60) # else: # for ele in sorted(steps_and_files.items()): # step, checkpoint_path = ele # if global_step >= step: # if len(_find_valid_cands(step)) > 1: # for ext in ["meta", "data-00000-of-00001", "index"]: # src_ckpt = checkpoint_path + ".{}".format(ext) # tf.logging.info("removing {}".format(src_ckpt)) # tf.gfile.Remove(src_ckpt) # continue # result, global_step = get_result(checkpoint_path) # tf.logging.info("***** Eval results *****") # for key in sorted(result.keys()): # tf.logging.info(" %s = %s", key, str(result[key])) # writer.write("%s = %s\n" % (key, str(result[key]))) # if result[key_name] > best_perf: # best_perf = result[key_name] # for ext in ["meta", "data-00000-of-00001", "index"]: # src_ckpt = checkpoint_path + ".{}".format(ext) # tgt_ckpt = checkpoint_path.rsplit( # "-", 1)[0] + "-best.{}".format(ext) # tf.logging.info("saving {} to {}".format(src_ckpt, tgt_ckpt)) # tf.gfile.Copy(src_ckpt, tgt_ckpt, overwrite=True) # writer.write("saved {} to {}\n".format(src_ckpt, tgt_ckpt)) # writer.write("best {} = {}\n".format(key_name, best_perf)) # tf.logging.info(" best {} = {}\n".format(key_name, best_perf)) # # if len(_find_valid_cands(global_step)) > 2: # for ext in ["meta", "data-00000-of-00001", "index"]: # src_ckpt = checkpoint_path + ".{}".format(ext) # tf.logging.info("removing {}".format(src_ckpt)) # tf.gfile.Remove(src_ckpt) # writer.write("=" * 50 + "\n") result, global_step = get_result(FLAGS.init_checkpoint)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) validate_flags_or_throw(bert_config) tf.gfile.MakeDirs(FLAGS.output_dir) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) num_train_steps = None num_warmup_steps = None if FLAGS.do_train: num_train_features = FLAGS.train_num_precomputed num_train_steps = int(num_train_features / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder(bert_config=bert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: tf.logging.info("***** Running training on precomputed features *****") tf.logging.info(" Num split examples = %d", num_train_features) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_filename = FLAGS.train_precomputed_file train_input_fn = input_fn_builder(input_file=train_filename, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_predict: tf.logging.info( "***** Running predictions on precomputed features *****") tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) eval_filename = FLAGS.predict_precomputed_file predict_input_fn = input_fn_builder(input_file=eval_filename, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) def create_int_feature(values): return tf.train.Feature(int64_list=tf.train.Int64List( value=list(values))) def create_float_feature(values): return tf.train.Feature(float_list=tf.train.FloatList( value=list(values))) # If running eval on the TPU, you will need to specify the number of # steps. processed_examples = 0 output_file = os.path.join(FLAGS.output_dir, FLAGS.output_pred_file) tf.logging.info("Writing results to: %s", output_file) with tf.python_io.TFRecordWriter(output_file) as writer: for result in estimator.predict(predict_input_fn, yield_single_examples=True): if processed_examples % 1000 == 0: tf.logging.info("Processing example: %d" % processed_examples) features = collections.OrderedDict() features["img_id"] = create_int_feature([result["img_id"]]) features["annot_id"] = create_int_feature([result["annot_id"]]) features["choice_id"] = create_int_feature( [result["choice_id"]]) features["label"] = create_int_feature([result["label"]]) features["output_logits"] = create_float_feature( result["output_logits"]) writer.write( tf.train.Example(features=tf.train.Features( feature=features)).SerializeToString()) processed_examples += 1
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "sst-2": run_classifier.SST2Processor, "mnli": run_classifier.MnliProcessor } tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint1) tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint2) if not tf.train.checkpoint_exists(FLAGS.init_checkpoint1): raise TFCheckpointNotFoundError("checkpoint1 does not exist!") if not tf.train.checkpoint_exists(FLAGS.init_checkpoint2) and \ not FLAGS.use_random: raise TFCheckpointNotFoundError("checkpoint2 does not exist!") bert_config1 = modeling.BertConfig.from_json_file(FLAGS.bert_config_file1) bert_config2 = modeling.BertConfig.from_json_file(FLAGS.bert_config_file2) if FLAGS.max_seq_length > bert_config1.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config1.max_position_embeddings)) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name, )) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) all_results = [] predict_examples = processor.get_test_examples(FLAGS.diff_input_file) num_actual_predict_examples = len(predict_examples) # For single sentence tasks (like SST2) eg.text_b is None original_data = [(eg.text_a, eg.text_b) for eg in predict_examples] if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. while len(predict_examples) % FLAGS.predict_batch_size != 0: predict_examples.append(run_classifier.PaddingInputExample()) predict_file = os.path.join(FLAGS.init_checkpoint1, FLAGS.exp_name + ".predict.tf_record") run_classifier.file_based_convert_examples_to_features( predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) for bert_config_type, output_dir in [ (bert_config1, FLAGS.init_checkpoint1), (bert_config2, FLAGS.init_checkpoint2) ]: tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) model_fn = run_classifier.model_fn_builder( bert_config=bert_config_type, num_labels=len(label_list), # This init checkpoint is eventually overriden by the estimator init_checkpoint=FLAGS.output_dir, learning_rate=FLAGS.learning_rate, num_train_steps=None, num_warmup_steps=None, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = run_classifier.file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) result = list(estimator.predict(input_fn=predict_input_fn)) all_results.append(result) all_results[0] = all_results[0][:num_actual_predict_examples] all_results[1] = all_results[1][:num_actual_predict_examples] assert len(all_results[0]) == len(all_results[1]) # Assuming model1's predictions are gold labels, calculate model2's accuracy score = 0 for prob1, prob2 in zip(all_results[0], all_results[1]): if np.argmax(prob1["probabilities"]) == np.argmax( prob2["probabilities"]): score += 1 tf.logging.info("Agreement score = %.6f", float(score) / num_actual_predict_examples) # Calculate the average value of |v1 - v2|, the distance on the simplex # Unlike KL divergence, this is a bounded metric # However, these results are not comparable across tasks # with different number classes distances = [] for prob1, prob2 in zip(all_results[0], all_results[1]): distances.append( np.linalg.norm(prob1["probabilities"] - prob2["probabilities"])) tf.logging.info("Average length |p1 - p2| = %.8f", np.mean(distances)) tf.logging.info("Max length |p1 - p2| = %.8f", np.max(distances)) tf.logging.info("Min length |p1 - p2| = %.8f", np.min(distances)) tf.logging.info("Std length |p1 - p2| = %.8f", np.std(distances)) if FLAGS.diff_type == "kld1": all_kld = [] for prob1, prob2 in zip(all_results[0], all_results[1]): all_kld.append( stats.entropy(prob1["probabilities"], prob2["probabilities"])) tf.logging.info("Average kl-divergence (p1, p2) = %.8f", np.mean(all_kld)) tf.logging.info("Max kl-divergence (p1, p2) = %.8f", np.max(all_kld)) tf.logging.info("Min kl-divergence (p1, p2) = %.8f", np.min(all_kld)) tf.logging.info("Std kl-divergence (p1, p2) = %.8f", np.std(all_kld)) elif FLAGS.diff_type == "kld2": all_kld = [] for prob1, prob2 in zip(all_results[0], all_results[1]): all_kld.append( stats.entropy(prob2["probabilities"], prob1["probabilities"])) tf.logging.info("Average kl-divergence (p2, p1) = %.8f", np.mean(all_kld)) tf.logging.info("Max kl-divergence (p2, p1) = %.8f", np.max(all_kld)) tf.logging.info("Min kl-divergence (p2, p1) = %.8f", np.min(all_kld)) tf.logging.info("Std kl-divergence (p2, p1) = %.8f", np.std(all_kld)) if FLAGS.diff_output_file: output = "" # Removing padded examples all_results[0] = all_results[0][:len(original_data)] all_results[1] = all_results[1][:len(original_data)] with tf.gfile.GFile(FLAGS.diff_output_file, "w") as f: for i, (eg, prob1, prob2) in enumerate( zip(original_data, all_results[0], all_results[1])): if i % 1000 == 0: tf.logging.info("Writing instance %d", i + 1) p1_items = [p1.item() for p1 in prob1["probabilities"]] p2_items = [p2.item() for p2 in prob2["probabilities"]] prob1_str = "%.6f\t%.6f\t%.6f" % (p1_items[0], p1_items[1], p1_items[2]) prob2_str = "%.6f\t%.6f\t%.6f" % (p2_items[0], p2_items[1], p2_items[2]) output = "%s\t%s\t%s\t%s\n" % (eg[0], eg[1], prob1_str, prob2_str) f.write(output) return