def get_assignment_map_as_is(tvars, checkpoint): current_vars = {} for var in tvars: name = var.name m = re.match("^(.*):\\d+$", name) if m is not None: name = m.group(1) current_vars[name] = var tf_logging.debug("Init from lm_checkpoint : %s" % name) assignment_map = {} initialized_variable_names = {} if checkpoint: for x in tf.train.list_variables(checkpoint): (name, var) = (x[0], x[1]) if name not in current_vars: continue assignment_map[name] = current_vars[name] tf_logging.debug("Mapped : %s" % name) initialized_variable_names[name] = 1 initialized_variable_names[name + ":0"] = 1 return assignment_map, initialized_variable_names
def get_assignment_map_remap_from_v1(tvars, remap_prefix, lm_checkpoint): tf_logging.debug("get_assignment_map_remap_from_v1") assignment_candidate = {} real_name_map = {} for var in tvars: name = var.name m = re.match("^(.*):\\d+$", name) if m is not None: name = m.group(1) tokens = name.split("/") top_scope = tokens[0] if remap_prefix == top_scope: inner_name = "/".join(tokens[1:]) targ_name = re.sub("layer_normalization[_]?\d*", "LayerNorm", inner_name) targ_name = re.sub("dense[_]?\d*", "dense", targ_name) assignment_candidate[targ_name] = var tf_logging.info("Init from v1 : %s" % name) real_name_map[targ_name] = name assignment_map = {} initialized_variable_names = {} if lm_checkpoint: for x in tf.train.list_variables(lm_checkpoint): (name, var) = (x[0], x[1]) if name not in assignment_candidate: continue assignment_map[name] = assignment_candidate[name] tvar_name = real_name_map[name] initialized_variable_names[tvar_name] = 1 initialized_variable_names[tvar_name + ":0"] = 1 return assignment_map, initialized_variable_names
def generate_alt_runs(batch): logits = self.forward_run(batch) x0, x1, x2, y = batch instance_infos = [] new_insts = [] deleted_mask_list = [] tag_size_list = [] for i in range(len(logits)): info = {} info['init_logit'] = logits[i] info['orig_input'] = (x0[i], x1[i], x2[i], y[i]) indice_delete_random = [] for _ in range(self.compare_deletion_num): indice_delete_random.append(len(new_insts)) x_list, delete_mask = self.sample_deleter( sample_size(), x0[i], x1[i], x2[i]) new_insts.append(x_list) deleted_mask_list.append(delete_mask) info['indice_delete_random'] = indice_delete_random instance_infos.append(info) if tag_size_list: avg_tag_size = average(tag_size_list) tf_logging.debug("avg Tagged token#={}".format(avg_tag_size)) return new_insts, instance_infos, deleted_mask_list
def get_bert_assignment_map(tvars, lm_checkpoint): lm_assignment_candidate = {} real_name_map = {} for var in tvars: name = var.name m = re.match("^(.*):\\d+$", name) if m is not None: name = m.group(1) tokens = name.split("/") top_scope = tokens[0] targ_name = re.sub("layer_normalization[_]?\d*", "LayerNorm", name) targ_name = re.sub("dense[_]?\d*", "dense", targ_name) lm_assignment_candidate[targ_name] = var tf_logging.debug("Init from lm_checkpoint : %s" % name) real_name_map[targ_name] = name assignment_map = {} initialized_variable_names = {} if lm_checkpoint: for x in tf.train.list_variables(lm_checkpoint): (name, var) = (x[0], x[1]) if name not in lm_assignment_candidate: continue assignment_map[name] = lm_assignment_candidate[name] tvar_name = real_name_map[name] initialized_variable_names[tvar_name] = 1 initialized_variable_names[tvar_name + ":0"] = 1 return (assignment_map, initialized_variable_names)
def align_checkpoint_for_lm( tvars, checkpoint_type, init_checkpoint, second_init_checkpoint=None, ): tf_logging.debug("align_checkpoint_for_lm") use_multiple_checkpoint = checkpoint_type in [ "v2_and_bert", "nli_and_bert" ] initialized_variable_names2 = {} if init_checkpoint: if not use_multiple_checkpoint: if checkpoint_type == "" or checkpoint_type == "bert": assignment_fn = get_bert_assignment_map elif checkpoint_type == "v2": assignment_fn = assignment_map_v2_to_v2 else: raise Exception("Undefined checkpoint exists") assignment_map, initialized_variable_names = assignment_fn( tvars, init_checkpoint) def init_fn(): tf.compat.v1.train.init_from_checkpoint( init_checkpoint, assignment_map) else: if checkpoint_type == "nli_and_bert": assignment_map, initialized_variable_names = get_bert_assignment_map( tvars, init_checkpoint) assignment_map2, initialized_variable_names2 = get_cls_assignment( tvars, second_init_checkpoint) if checkpoint_type == "v2_and_bert": assignment_map, initialized_variable_names = assignment_map_v2_to_v2( tvars, init_checkpoint) assignment_map2, initialized_variable_names2 = get_cls_assignment( tvars, second_init_checkpoint) else: raise Exception("Undefined checkpoint exists") def init_fn(): tf.compat.v1.train.init_from_checkpoint( init_checkpoint, assignment_map) tf.compat.v1.train.init_from_checkpoint( second_init_checkpoint, assignment_map2) else: initialized_variable_names = {} def init_fn(): pass return initialized_variable_names, initialized_variable_names2, init_fn
def train_debug_factory(sess, loss_tensor, acc_tensor, gradient, batch2feed_dict, batch, step_i): loss_val, acc, gradient = sess.run([ loss_tensor, acc_tensor, gradient, ], feed_dict=batch2feed_dict(batch)) tf_logging.debug("Step {0} train loss={1:.04f} acc={2:.04f}".format( step_i, loss_val, acc)) return loss_val, acc
def train_classification_factory(sess, loss_tensor, acc_tensor, train_op, batch2feed_dict, batch, step_i): loss_val, acc, _ = sess.run([ loss_tensor, acc_tensor, train_op, ], feed_dict=batch2feed_dict(batch)) tf_logging.debug("Step {0} train loss={1:.04f} acc={2:.04f}".format( step_i, loss_val, acc)) return loss_val, acc
def sero_from_v2(tvars, lm_checkpoint): tf_logging.debug("sero_from_v2") def get_target_name(var_name): targ_name = re.sub("layer_normalization[_]?\d*", "LayerNorm", var_name) targ_name = re.sub("dense[_]?\d*", "dense", targ_name) tokens = targ_name.split("/") if tokens[0] == "sero": tokens[0] = "bert" if len(tokens) > 2: if tokens[1] == "lower": tokens[1] = "encoder" elif tokens[1] == "upper": str_layer, str_no = tokens[2].split("_") str_no = str(int(str_no) + 6) tokens[1] = "encoder" tokens[2] = str_layer + "_" + str_no targ_name = "/".join(tokens) return targ_name assignment_candidate = {} real_name_map = {} for var in tvars: name = var.name m = re.match("^(.*):\\d+$", name) if m is not None: name = m.group(1) targ_name = get_target_name(name) assignment_candidate[targ_name] = var tf_logging.info("Init from v2 : %s" % name) real_name_map[targ_name] = name assignment_map = {} initialized_variable_names = {} if lm_checkpoint: for x in tf.train.list_variables(lm_checkpoint): (name, var) = (x[0], x[1]) simple_name = re.sub("layer_normalization[_]?\d*", "LayerNorm", name) simple_name = re.sub("dense[_]?\d*", "dense", simple_name) tf_logging.info("Checkpoint Var : %s" % name) if simple_name not in assignment_candidate: continue assignment_map[name] = assignment_candidate[simple_name] tvar_name = real_name_map[simple_name] initialized_variable_names[tvar_name] = 1 initialized_variable_names[tvar_name + ":0"] = 1 return assignment_map, initialized_variable_names
def work(job_id): outfile = os.path.join(working_dir, "BLC_data", "{}".format(job_id)) if os.path.exists(outfile): return "Skip" tf_logging.debug("Loading data") data = load(job_id) tf_logging.debug("Done") if data is None: return "No Input" writer = RecordWriterWrap(outfile) batch_size, seq_length = data[0]['input_ids'].shape keys = list(data[0].keys()) vectors = flatten_batches(data) basic_keys = "input_ids", "input_mask", "segment_ids" any_key = keys[0] data_len = len(vectors[any_key]) num_predictions = len(vectors["grouped_positions"][0][0]) for i in range(data_len): mask_valid = [0] * seq_length loss1_arr = [0] * seq_length loss2_arr = [0] * seq_length positions = vectors["grouped_positions"][i] num_trials = len(positions) for t_i in range(num_trials): for p_i in range(num_predictions): loc = vectors["grouped_positions"][i][t_i][p_i] loss1 = vectors["grouped_loss1"][i][t_i][p_i] loss2 = vectors["grouped_loss2"][i][t_i][p_i] loss1_arr[loc] = loss1 loss2_arr[loc] = loss2 assert mask_valid[loc] == 0 mask_valid[loc] = 1 features = collections.OrderedDict() for key in basic_keys: features[key] = create_int_feature(vectors[key][i]) features["loss_valid"] = create_int_feature(mask_valid) features["loss1"] = create_float_feature(loss1_arr) features["loss2"] = create_float_feature(loss2_arr) features["next_sentence_labels"] = create_int_feature([0]) writer.write_feature(features) #if i < 20: # log_print_feature(features) writer.close() return "Done"
def train_fn_factory(sess, loss_tensor, all_losses, train_op, batch2feed_dict, batch, step_i): loss_val, all_losses_val, _ = sess.run([ loss_tensor, all_losses, train_op, ], feed_dict=batch2feed_dict(batch)) n_layer = len(all_losses_val) verbose_loss_str = " ".join( ["{0}: {1:.2f}".format(i, all_losses_val[i]) for i in range(n_layer)]) tf_logging.debug("Step {0} train loss={1:.04f} {2}".format( step_i, loss_val, verbose_loss_str)) return loss_val, 0
def __init__(self, num_classes, ssdr_config, core_model, seq_length, is_training): super(WSSDRWrapperInterface, self).__init__() placeholder = tf.compat.v1.placeholder bert_config = BertConfig.from_json_file(os.path.join(data_path, "bert_config.json")) def_max_length = FLAGS.max_def_length loc_max_length = FLAGS.max_loc_length tf_logging.debug("WSSDRWrapper init()") tf_logging.debug("seq_length %d" % seq_length) tf_logging.debug("def_max_length %d" % def_max_length) tf_logging.debug("loc_max_length %d" % loc_max_length) self.input_ids = placeholder(tf.int64, [None, seq_length], name="input_ids") self.input_mask_ = placeholder(tf.int64, [None, seq_length], name="input_mask") self.segment_ids = placeholder(tf.int64, [None, seq_length], name="segment_ids") self.d_location_ids = placeholder(tf.int64, [None, loc_max_length], name="d_location_ids") self.d_input_ids = placeholder(tf.int64, [None, def_max_length], name="d_input_ids") self.d_input_mask = placeholder(tf.int64, [None, def_max_length], name="d_input_mask") self.d_segment_ids = placeholder(tf.int64, [None, def_max_length], name="d_segment_ids") self.ab_mapping = placeholder(tf.int64, [None, 1], name="ab_mapping") if ssdr_config.use_ab_mapping_mask: self.ab_mapping_mask = placeholder(tf.int64, [None, FLAGS.def_per_batch], name="ab_mapping_mask") else: self.ab_mapping_mask = None # [batch,seq_len], 1 if the indices in d_locations_id y_lookup = get_y_lookup_from_location_ids(self.d_location_ids, seq_length) self.y_cls = placeholder(tf.int64, [None]) self.network = core_model( config=bert_config, ssdr_config=ssdr_config, is_training=is_training, input_ids=self.input_ids, input_mask=self.input_mask_, token_type_ids=self.segment_ids, d_input_ids=self.d_input_ids, d_input_mask=self.d_input_mask, d_segment_ids=self.d_segment_ids, d_location_ids=self.d_location_ids, ab_mapping=self.ab_mapping, ab_mapping_mask=self.ab_mapping_mask, use_one_hot_embeddings=False, ) self.cls_logits = keras.layers.Dense(num_classes)(self.network.get_pooled_output()) self.cls_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.cls_logits, labels=self.y_cls) self.cls_loss = tf.reduce_mean(self.cls_loss_arr) self.lookup_logits = keras.layers.Dense(2)(self.network.get_sequence_output()) self.lookup_p_at_1 = tf_module.p_at_1(self.lookup_logits[:,:, 1], y_lookup) self.lookup_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.lookup_logits, labels=y_lookup) self.y_lookup = y_lookup self.lookup_loss_per_example = tf.reduce_sum(self.lookup_loss_arr, axis=-1) self.lookup_loss = tf.reduce_mean(self.lookup_loss_per_example) self.acc = tf_module.accuracy(self.cls_logits, self.y_cls)
def eval_nli_w_dict_lookup(run_name, model: DictReaderInterface, model_path, data_feeder_loader): print("eval_nli_w_dict_lookup :", run_name) tf_logging.debug("Building graph") batch_size = FLAGS.train_batch_size dev_data_feeder = data_feeder_loader.get_dev_feeder() runner = WSSDRRunner(model, dev_data_feeder.augment_dict_info) runner.load_last_saved_model(model_path) dev_batches = dev_data_feeder.get_all_batches(batch_size, True)[:100] n_batches = len(dev_batches) print('{} batches, about {} data'.format(n_batches, n_batches * batch_size)) loss, acc = runner.run_batches_w_lookup(dev_batches) print("Dev total loss={0:.04f} acc={1:.03f}".format(loss, acc)) return acc
def log_var_assignments_one_by_one(tvars, initialized_variable_names, initialized_variable_names2=None): for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" if initialized_variable_names2 is not None: if var.name in initialized_variable_names2: init_string = ", *INIT_FROM_CKPT2*" if init_string: tf_logging.debug(" name = %s, shape = %s%s", var.name, var.shape, init_string) else: tf_logging.info(" name = %s, shape = %s%s", var.name, var.shape, " - Not Initialized")
def eval_nli_w_dict(run_name, model: DictReaderInterface, model_path, data_feeder_loader): print("Eval nil :", run_name) tf_logging.debug("Building graph") batch_size = FLAGS.train_batch_size dev_data_feeder = data_feeder_loader.get_dev_feeder() dev_batches = dev_data_feeder.get_all_batches(batch_size) runner = WSSDRRunner(model, dev_data_feeder.augment_dict_info) runner.load_last_saved_model(model_path) def valid_fn(step_i): loss, acc = runner.run_batches_wo_lookup(dev_batches) print("Dev loss={1:.04f} acc={2:.03f}".format(step_i, loss, acc)) return acc return valid_fn(0)
def train_fn(batch, step_i): step_before_cls = fetch_global_step() loss_val, acc = train_classification(batch, step_i) summary = tf.Summary() summary.value.add(tag='acc', simple_value=acc) summary.value.add(tag='loss', simple_value=loss_val) train_writer.add_summary(summary, fetch_global_step()) train_writer.flush() tf_logging.debug("{}".format(step_i)) step_after_cls = fetch_global_step() assert step_after_cls == step_before_cls + 1 train_explain(batch, step_i) step_after_ex = fetch_global_step() assert step_after_cls == step_after_ex return loss_val, acc
def __init__(self, data, dictionary, ssdr_config, def_per_batch, max_def_length, data_info=None): self.max_def_length = max_def_length self.max_d_loc = ssdr_config.max_loc_length self.def_per_batch = def_per_batch self.raw_dictionary = dictionary self.use_ab_mapping_mask = ssdr_config.use_ab_mapping_mask tf_logging.debug("SSDRAugment init") tf_logging.debug("max_def_length: %d" % max_def_length) tf_logging.debug("max_d_loc: %d" % self.max_d_loc) tf_logging.debug("def_per_batch: %d" % self.def_per_batch) tf_logging.debug("use_ab_mapping_mask: {}".format( ssdr_config.use_ab_mapping_mask)) super(SSDRAugment, self).__init__(data, data_info)
def phase1_only_load(tvars, src_checkpoint): tf_logging.debug("phase1_only_load") initialized_variable_names = {} assignment_candidate = {} for var in tvars: name = var.name m = re.match("^(.*):\\d+$", name) if m is not None: name = m.group(1) assignment_candidate[name] = var assignment_map = collections.OrderedDict() if src_checkpoint: for x in tf.train.list_variables(src_checkpoint): (name, var) = (x[0], x[1]) if name.startswith(dual_model_prefix1): include = True elif name.startswith("cls_dense/"): include = True else: include = False if include and name in assignment_candidate: tf_logging.debug("Vars in checkpoint : %s" % name) tf_logging.debug("map to -> : %s" % name) assignment_map[name] = assignment_candidate[name] initialized_variable_names[name] = 1 initialized_variable_names[name + ":0"] = 1 return assignment_map, initialized_variable_names
def __init__(self, num_classes, seq_length, is_training): super(DictReaderWrapper, self).__init__() placeholder = tf.compat.v1.placeholder bert_config = BertConfig.from_json_file(os.path.join(data_path, "bert_config.json")) def_max_length = FLAGS.max_def_length loc_max_length = FLAGS.max_loc_length tf_logging.debug("DictReaderWrapper init()") tf_logging.debug("seq_length %d" % seq_length) tf_logging.debug("def_max_length %d" % def_max_length) tf_logging.debug("loc_max_length %d" % loc_max_length) self.input_ids = placeholder(tf.int64, [None, seq_length]) self.input_mask_ = placeholder(tf.int64, [None, seq_length]) self.segment_ids = placeholder(tf.int64, [None, seq_length]) self.d_input_ids = placeholder(tf.int64, [None, def_max_length]) self.d_input_mask = placeholder(tf.int64, [None, def_max_length]) self.d_location_ids = placeholder(tf.int64, [None, loc_max_length]) self.y_cls = placeholder(tf.int64, [None]) self.y_lookup = placeholder(tf.int64, [None, seq_length]) self.network = DictReaderModel( config=bert_config, d_config=bert_config, is_training=is_training, input_ids=self.input_ids, input_mask=self.input_mask_, d_input_ids=self.d_input_ids, d_input_mask=self.d_input_mask, d_location_ids=self.d_location_ids, use_target_pos_emb=True, token_type_ids=self.segment_ids, use_one_hot_embeddings=False, ) self.cls_logits = keras.layers.Dense(num_classes)(self.network.pooled_output) self.cls_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.cls_logits, labels=self.y_cls) self.cls_loss = tf.reduce_mean(self.cls_loss_arr) self.lookup_logits = keras.layers.Dense(2)(self.network.sequence_output) self.lookup_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.lookup_logits, labels=self.y_lookup) self.lookup_loss_per_example = tf.reduce_mean(self.lookup_loss_arr, axis=-1) self.lookup_loss = tf.reduce_mean(self.lookup_loss_per_example) self.acc = tf_module.accuracy(self.cls_logits, self.y_cls)
def debug_names(is_training): tf.compat.v1.disable_eager_execution() seq_max = 200 lr = 1e-5 batch_size = FLAGS.train_batch_size tf_logging.debug("Building graph") model = DictReaderWrapper(3, seq_max, is_training) with tf.compat.v1.variable_scope("optimizer"): train_cls, global_step = train_module.get_train_op(model.cls_loss, lr) train_lookup, global_step = train_module.get_train_op( model.lookup_loss, lr, global_step) sess = train_module.init_session() sess.run(tf.compat.v1.global_variables_initializer()) tvars = tf.compat.v1.trainable_variables() for var in tvars: name = var.name print(name)
def cppnc_assignment_remap2(tvars, lm_checkpoint): tf_logging.debug("get_assignment_map_remap_from_v2") """Compute the union of the current variables and checkpoint variables.""" initialized_variable_names = {} real_name_map = {} assignment_candidate = {} for var in tvars: name = var.name m = re.match("^(.*):\\d+$", name) if m is not None: name = m.group(1) tokens = name.split("/") top_scope = tokens[0] if triple_model_prefix2 == top_scope: targ_name = get_name_key(dual_model_prefix1, tokens) assignment_candidate[targ_name] = var tf_logging.info("Init from v2 : %s" % name) real_name_map[targ_name] = name elif triple_model_prefix3 == top_scope: targ_name = get_name_key(dual_model_prefix2, tokens) assignment_candidate[targ_name] = var tf_logging.info("Init from v2 : %s" % name) real_name_map[targ_name] = name assignment_map = collections.OrderedDict() if lm_checkpoint: for x in tf.train.list_variables(lm_checkpoint): (name, var) = (x[0], x[1]) simple_name = re.sub("layer_normalization[_]?\d*", "LayerNorm", name) simple_name = re.sub("dense[_]?\d*", "dense", simple_name) tf_logging.debug("Vars in TT : %s" % name) tf_logging.debug("map to -> : %s" % simple_name) if simple_name not in assignment_candidate: continue assignment_map[name] = assignment_candidate[simple_name] tf_logging.debug("Matched variables : %s" % name) real_name = real_name_map[simple_name] initialized_variable_names[real_name] = 1 initialized_variable_names[real_name + ":0"] = 1 return assignment_map, initialized_variable_names
def do(data_id): working_dir = os.environ["TF_WORKING_DIR"] tokenzier = get_tokenizer() name1 = os.path.join(working_dir, "bert_loss", "{}.pickle".format(data_id)) name2 = os.path.join(working_dir, "bfn_loss", "{}.pickle".format(data_id)) tf_logging.debug("Loading " + name1) output1 = PredictionOutput(name1) tf_logging.debug("Loading " + name2) output2 = PredictionOutput(name2) assert len(output1.input_ids) == len(output2.input_ids) out_path = os.path.join(working_dir, "loss_pred_train_data/{}".format(data_id)) record_writer = RecordWriterWrap(out_path) n_inst = len(output1.input_ids) sep_id = tokenzier.vocab["[SEP]"] tf_logging.debug("Iterating") ticker = TimeEstimator(n_inst, "", 1000) for i in range(n_inst): if i % 1000 == 0: assert_input_equal(output1.input_ids[i], output2.input_ids[i]) try: features = get_segment_and_mask(output1.input_ids[i], sep_id) except: try: sep_indice = get_sep_considering_masking( output1.input_ids[i], sep_id, output1.masked_lm_ids[i], output1.masked_lm_positions[i]) features = get_segment_and_mask_inner(output1.input_ids[i], sep_indice) except: tokens = tokenzier.convert_ids_to_tokens(output1.input_ids[i]) print(tokenization.pretty_tokens(tokens)) print(output1.masked_lm_ids[i]) print(output1.masked_lm_positions[i]) raise features["next_sentence_labels"] = create_int_feature([0]) features["masked_lm_positions"] = create_int_feature( output1.masked_lm_positions[i]) features["masked_lm_ids"] = create_int_feature( output1.masked_lm_ids[i]) features["masked_lm_weights"] = create_float_feature( output1.masked_lm_weights[i]) features["loss_base"] = create_float_feature( output1.masked_lm_example_loss[i]) features["loss_target"] = create_float_feature( output2.masked_lm_example_loss[i]) record_writer.write_feature(features) ticker.tick() record_writer.close()
def generate_alt_runs(batch): logits, ex_logit = self.sess.run( [self.sout, self.ex_logits], feed_dict=self.batch2feed_dict(batch)) x0, x1, x2, y = batch pred = np.argmax(logits, axis=1) instance_infos = [] new_batches = [] deleted_mask_list = [] tag_size_list = [] for i in range(len(logits)): if pred[i] in self.target_class_set: info = {} info['init_logit'] = logits[i] info['orig_input'] = (x0[i], x1[i], x2[i], y[i]) ex_tags = self.logit2tag(ex_logit[i]) tf_logging.debug("EX_Score : {}".format( numpy_print(ex_logit[i]))) tag_size = np.count_nonzero(ex_tags) tag_size_list.append(tag_size) if tag_size > 10: tf_logging.debug("#Tagged token={}".format(tag_size)) info['idx_delete_tagged'] = len(new_batches) new_batches.append( token_delete(ex_tags, x0[i], x1[i], x2[i])) deleted_mask_list.append(ex_tags) indice_delete_random = [] for _ in range(self.compare_deletion_num): indice_delete_random.append(len(new_batches)) x_list, delete_mask = seq_delete_inner( sample_size(), x0[i], x1[i], x2[i]) new_batches.append(x_list) deleted_mask_list.append(delete_mask) info['indice_delete_random'] = indice_delete_random instance_infos.append(info) if tag_size_list: avg_tag_size = average(tag_size_list) tf_logging.debug("avg Tagged token#={}".format(avg_tag_size)) return new_batches, instance_infos, deleted_mask_list
def get_tlm_assignment_map_v2(tvars, tlm_prefix, lm_checkpoint, target_task_checkpoint_tf2): """Compute the union of the current variables and checkpoint variables.""" assignment_map = {} initialized_variable_names = {} real_name_map = {} target_task_name_to_var = collections.OrderedDict() lm_assignment_candidate = {} tt_assignment_candidate = {} for var in tvars: name = var.name m = re.match("^(.*):\\d+$", name) if m is not None: name = m.group(1) tokens = name.split("/") top_scope = tokens[0] if tlm_prefix == top_scope: inner_name = "/".join(tokens[1:]) target_task_name_to_var[inner_name] = var simple_name = inner_name simple_name = re.sub("layer_normalization[_]?\d*", "LayerNorm", simple_name) simple_name = re.sub("dense[_]?\d*", "dense", simple_name) tt_assignment_candidate[simple_name] = var tf_logging.debug( "Variable to be loaded from target_task_checkpoint : %s" % name) real_name_map[simple_name] = name else: simple_name = re.sub("layer_normalization[_]?\d*", "LayerNorm", name) simple_name = re.sub("dense[_]?\d*", "dense", simple_name) lm_assignment_candidate[simple_name] = var tf_logging.debug("Variable to be loaded from lm_checkpoint : %s" % name) real_name_map[simple_name] = name assignment_map_tt = collections.OrderedDict() if target_task_checkpoint_tf2: for x in tf.train.list_variables(target_task_checkpoint_tf2): (name, var) = (x[0], x[1]) simple_name = re.sub("layer_normalization[_]?\d*", "LayerNorm", name) simple_name = re.sub("dense[_]?\d*", "dense", simple_name) tf_logging.debug("Vars in TT : %s" % name) tf_logging.debug("map to -> : %s" % simple_name) if simple_name not in tt_assignment_candidate: continue assignment_map_tt[name] = tt_assignment_candidate[simple_name] real_name = real_name_map[simple_name] initialized_variable_names[real_name] = 1 if lm_checkpoint: for x in tf.train.list_variables(lm_checkpoint): (name, var) = (x[0], x[1]) if name not in lm_assignment_candidate: continue assignment_map[name] = lm_assignment_candidate[name] real_name = real_name_map[name] initialized_variable_names[real_name] = 1 initialized_variable_names[real_name + ":0"] = 1 return assignment_map, assignment_map_tt, initialized_variable_names
def train_nli_w_dict(run_name, model: DictReaderInterface, model_path, model_config, data_feeder_loader, model_init_fn): print("Train nil :", run_name) batch_size = FLAGS.train_batch_size f_train_lookup = "lookup" in FLAGS.train_op tf_logging.debug("Building graph") with tf.compat.v1.variable_scope("optimizer"): lr = FLAGS.learning_rate lr2 = lr * 0.1 if model_config.compare_attrib_value_safe("use_two_lr", True): tf_logging.info("Using two lr for each parts") train_cls, global_step = get_train_op_sep_lr( model.get_cls_loss(), lr, 5, "dict") else: train_cls, global_step = train_module.get_train_op( model.get_cls_loss(), lr) train_lookup_op, global_step = train_module.get_train_op( model.get_lookup_loss(), lr2, global_step) sess = train_module.init_session() sess.run(tf.compat.v1.global_variables_initializer()) train_writer, test_writer = setup_summary_writer(run_name) last_saved = get_latest_model_path_from_dir_path(model_path) if last_saved: tf_logging.info("Loading previous model from {}".format(last_saved)) load_model(sess, last_saved) elif model_init_fn is not None: model_init_fn(sess) log = log_module.train_logger() train_data_feeder = data_feeder_loader.get_train_feeder() dev_data_feeder = data_feeder_loader.get_dev_feeder() lookup_train_feeder = train_data_feeder valid_runner = WSSDRRunner(model, dev_data_feeder.augment_dict_info, sess) dev_batches = [] n_dev_batch = 100 dev_batches_w_dict = dev_data_feeder.get_all_batches(batch_size, True)[:n_dev_batch] for _ in range(n_dev_batch): dev_batches.append(dev_data_feeder.get_random_batch(batch_size)) dev_batches_w_dict.append(dev_data_feeder.get_lookup_batch(batch_size)) def get_summary_obj(loss, acc): summary = tf.compat.v1.Summary() summary.value.add(tag='loss', simple_value=loss) summary.value.add(tag='accuracy', simple_value=acc) return summary def get_summary_obj_lookup(loss, p_at_1): summary = tf.compat.v1.Summary() summary.value.add(tag='lookup_loss', simple_value=loss) summary.value.add(tag='P@1', simple_value=p_at_1) return summary def train_lookup(step_i): batches, info = lookup_train_feeder.get_lookup_train_batches( batch_size) if not batches: raise NoLookupException() def get_cls_loss(batch): return sess.run([model.get_cls_loss_arr()], feed_dict=model.batch2feed_dict(batch)) loss_array = get_loss_from_batches(batches, get_cls_loss) supervision_for_lookup = train_data_feeder.get_lookup_training_batch( loss_array, batch_size, info) def lookup_train(batch): return sess.run( [model.get_lookup_loss(), model.get_p_at_1(), train_lookup_op], feed_dict=model.batch2feed_dict(batch)) avg_loss, p_at_1, _ = lookup_train(supervision_for_lookup) train_writer.add_summary(get_summary_obj_lookup(avg_loss, p_at_1), step_i) log.info("Step {0} lookup loss={1:.04f}".format(step_i, avg_loss)) return avg_loss def train_classification(step_i): batch = train_data_feeder.get_random_batch(batch_size) loss_val, acc, _ = sess.run( [model.get_cls_loss(), model.get_acc(), train_cls], feed_dict=model.batch2feed_dict(batch)) log.info("Step {0} train loss={1:.04f} acc={2:.03f}".format( step_i, loss_val, acc)) train_writer.add_summary(get_summary_obj(loss_val, acc), step_i) return loss_val, acc lookup_loss_window = MovingWindow(20) def train_classification_w_lookup(step_i): data_indices, batch = train_data_feeder.get_lookup_batch(batch_size) logits, = sess.run([model.get_lookup_logits()], feed_dict=model.batch2feed_dict(batch)) term_ranks = np.flip(np.argsort(logits[:, :, 1], axis=1)) batch = train_data_feeder.augment_dict_info(data_indices, term_ranks) loss_val, acc, _ = sess.run( [model.get_cls_loss(), model.get_acc(), train_cls], feed_dict=model.batch2feed_dict(batch)) log.info("ClsW]Step {0} train loss={1:.04f} acc={2:.03f}".format( step_i, loss_val, acc)) train_writer.add_summary(get_summary_obj(loss_val, acc), step_i) return loss_val, acc def lookup_enabled(lookup_loss_window, step_i): return step_i > model_config.lookup_min_step\ and lookup_loss_window.get_average() < model_config.lookup_threshold def train_fn(step_i): if lookup_enabled(lookup_loss_window, step_i): loss, acc = train_classification_w_lookup((step_i)) else: loss, acc = train_classification(step_i) if f_train_lookup and step_i % model_config.lookup_train_frequency == 0: try: lookup_loss = train_lookup(step_i) lookup_loss_window.append(lookup_loss, 1) except NoLookupException: log.warning("No possible lookup found") return loss, acc def debug_fn(batch): y_lookup, = sess.run([ model.y_lookup, ], feed_dict=model.batch2feed_dict(batch)) print(y_lookup) return 0, 0 def valid_fn(step_i): if lookup_enabled(lookup_loss_window, step_i): valid_fn_w_lookup(step_i) else: valid_fn_wo_lookup(step_i) def valid_fn_wo_lookup(step_i): loss_val, acc = valid_runner.run_batches_wo_lookup(dev_batches) log.info("Step {0} Dev loss={1:.04f} acc={2:.03f}".format( step_i, loss_val, acc)) test_writer.add_summary(get_summary_obj(loss_val, acc), step_i) return acc def valid_fn_w_lookup(step_i): loss_val, acc = valid_runner.run_batches_w_lookup(dev_batches_w_dict) log.info("Step {0} DevW loss={1:.04f} acc={2:.03f}".format( step_i, loss_val, acc)) test_writer.add_summary(get_summary_obj(loss_val, acc), step_i) return acc def save_fn(): op = tf.compat.v1.assign(global_step, step_i) sess.run([op]) return save_model_to_dir_path(sess, model_path, global_step) n_data = train_data_feeder.get_data_len() step_per_epoch = int((n_data + batch_size - 1) / batch_size) tf_logging.debug("{} data point -> {} batches / epoch".format( n_data, step_per_epoch)) train_steps = step_per_epoch * FLAGS.num_train_epochs tf_logging.debug("Max train step : {}".format(train_steps)) valid_freq = 100 save_interval = 60 * 20 last_save = time.time() init_step, = sess.run([global_step]) print("Initial step : ", init_step) for step_i in range(init_step, train_steps): if dev_fn is not None: if (step_i + 1) % valid_freq == 0: valid_fn(step_i) if save_fn is not None: if time.time() - last_save > save_interval: save_fn() last_save = time.time() loss, acc = train_fn(step_i) return save_fn()
def demo_nli_w_dict(run_name, model: WSSDRWrapper, model_path, data_feeder_loader): print("Demonstrate nil_w_dict :", run_name) tf_logging.debug("Building graph") batch_size = FLAGS.train_batch_size dev_data_feeder = data_feeder_loader.get_dev_feeder() runner = WSSDRRunner(model, dev_data_feeder.augment_dict_info) runner.load_last_saved_model(model_path) dev_batches = dev_data_feeder.get_all_batches(batch_size, True) n_batches = len(dev_batches) tokenizer = get_tokenizer() html = HtmlVisualizer("nli_w_dict_demo.html") def fetch_fn(step_i): for indice, batch in dev_batches: print(indice) cache_name = "term_ranks_logits_cache" #logits = load_cache(cache_name) logits, = runner.sess.run( [runner.model.get_lookup_logits()], feed_dict=runner.model.batch2feed_dict(batch)) raw_scores = logits[:, :, 1] term_ranks = np.argsort(logits[:, :, 1], axis=1) term_ranks = np.flip(term_ranks) save_to_pickle(logits, cache_name) x0, x1, x2, x3, y, x4, x5, x6, ab_map, ab_mapping_mask = batch for idx in range(len(indice)): ranks = term_ranks[idx] data_idx = indice[idx] input_ids = x0[idx] tokens = tokenizer.convert_ids_to_tokens(input_ids) words = dev_data_feeder.data_info[data_idx] location_to_word = dev_data_feeder.invert_index_word_locations( words) row = [] for rank in term_ranks[idx]: row.append(Cell(rank)) for rank in ranks: if rank in location_to_word and rank != 0: highest_rank = rank break for rank in ranks[::-1]: if rank in location_to_word and rank != 0: lowest_rank = rank break html.write_table([row]) t1 = [] s1 = [] t2 = [] s2 = [] text = [t1, t2] score_row = [s1, s2] sent_idx = 0 for i, t in enumerate(tokens): score = raw_scores[idx, i] if i in location_to_word: if i == highest_rank: c = Cell(tokens[i], 150) elif i == lowest_rank: c = Cell(tokens[i], 150, target_color="R") else: c = Cell(tokens[i], 70) s = Cell(score, score * 100) else: c = Cell(tokens[i]) s = Cell(score, score * 70) text[sent_idx].append(c) score_row[sent_idx].append(s) if tokens[i] == "[unused3]": sent_idx += 1 if sent_idx == 2: break html.write_table([t1, s1]) html.write_table([t2, s2]) rows = [] for word in words: row = [Cell(word.word), Cell(word.location)] rows.append(row) html.write_table(rows) fetch_fn(0)
def assignment_map_v2_to_v2(tvars, lm_checkpoint_v2): """Compute the union of the current variables and checkpoint variables.""" initialized_variable_names = {} real_name_map = {} tf_logging.debug("assignment_map_v2_to_v2") lm_assignment_candidate = {} for var in tvars: name = var.name m = re.match("^(.*):\\d+$", name) if m is not None: name = m.group(1) simple_name = re.sub("layer_normalization[_]?\d*", "LayerNorm", name) simple_name = re.sub("dense[_]?\d*", "dense", simple_name) lm_assignment_candidate[simple_name] = var tf_logging.debug("Variable to be loaded from lm_checkpoint : %s" % name) tf_logging.debug(" simple_name : %s" % simple_name) real_name_map[simple_name] = name assignment_map = collections.OrderedDict() if lm_checkpoint_v2: for x in tf.train.list_variables(lm_checkpoint_v2): (name, var) = (x[0], x[1]) simple_name = re.sub("layer_normalization[_]?\d*", "LayerNorm", name) simple_name = re.sub("dense[_]?\d*", "dense", simple_name) tf_logging.debug("Vars in TT : %s" % name) tf_logging.debug("map to -> : %s" % simple_name) if simple_name not in lm_assignment_candidate: continue assignment_map[name] = lm_assignment_candidate[simple_name] tf_logging.debug("Matched variables : %s" % name) real_name = real_name_map[simple_name] initialized_variable_names[real_name] = 1 initialized_variable_names[real_name + ":0"] = 1 return assignment_map, initialized_variable_names
def phase2_to_phase1_assignment_remap(tvars, src_checkpoint): tf_logging.debug("get_assignment_map_remap_from_v2") """Compute the union of the current variables and checkpoint variables.""" initialized_variable_names = {} assignment_candidate = {} for var in tvars: name = var.name m = re.match("^(.*):\\d+$", name) if m is not None: name = m.group(1) assignment_candidate[name] = var def parse_shift_name(name, key, shift_idx): tokens = name.split("/") new_tokens = [] for token in tokens: if token.startswith(key): if token == key: idx = 0 else: idx_str = token[len(key) + 1:] idx = int(idx_str) new_idx = idx + shift_idx assert new_idx >= 0 if new_idx == 0: new_token = key else: new_token = "_".join([key, str(new_idx)]) else: new_token = token new_tokens.append(new_token) return "/".join(new_tokens) assignment_map = collections.OrderedDict() if src_checkpoint: for x in tf.train.list_variables(src_checkpoint): (old_name, var) = (x[0], x[1]) if old_name.startswith(dual_model_prefix2): new_name = old_name.replace(dual_model_prefix2, dual_model_prefix1) if "/dense" in new_name: new_name = parse_shift_name(new_name, "dense", -37) if "/layer_normalization" in new_name: new_name = parse_shift_name(new_name, "layer_normalization", -25) elif old_name.startswith("cls_dense_1/"): new_name = old_name.replace("cls_dense_1/", dual_model_prefix1 + "/cls_dense/") else: new_name = None if new_name is not None and new_name in assignment_candidate: tf_logging.debug("Vars in checkpoint : %s" % old_name) tf_logging.debug("map to -> : %s" % new_name) assignment_map[old_name] = assignment_candidate[new_name] initialized_variable_names[new_name] = 1 initialized_variable_names[new_name + ":0"] = 1 return assignment_map, initialized_variable_names