Пример #1
0
def get_assignment_map_as_is(tvars, checkpoint):
    current_vars = {}
    for var in tvars:
        name = var.name
        m = re.match("^(.*):\\d+$", name)
        if m is not None:
            name = m.group(1)

        current_vars[name] = var
        tf_logging.debug("Init from lm_checkpoint : %s" % name)

    assignment_map = {}
    initialized_variable_names = {}
    if checkpoint:
        for x in tf.train.list_variables(checkpoint):
            (name, var) = (x[0], x[1])
            if name not in current_vars:
                continue
            assignment_map[name] = current_vars[name]
            tf_logging.debug("Mapped : %s" % name)

            initialized_variable_names[name] = 1
            initialized_variable_names[name + ":0"] = 1

    return assignment_map, initialized_variable_names
Пример #2
0
def get_assignment_map_remap_from_v1(tvars, remap_prefix, lm_checkpoint):
    tf_logging.debug("get_assignment_map_remap_from_v1")
    assignment_candidate = {}
    real_name_map = {}
    for var in tvars:
        name = var.name
        m = re.match("^(.*):\\d+$", name)
        if m is not None:
            name = m.group(1)

        tokens = name.split("/")
        top_scope = tokens[0]
        if remap_prefix == top_scope:
            inner_name = "/".join(tokens[1:])
            targ_name = re.sub("layer_normalization[_]?\d*", "LayerNorm",
                               inner_name)
            targ_name = re.sub("dense[_]?\d*", "dense", targ_name)
            assignment_candidate[targ_name] = var
            tf_logging.info("Init from v1 : %s" % name)
            real_name_map[targ_name] = name

    assignment_map = {}
    initialized_variable_names = {}
    if lm_checkpoint:
        for x in tf.train.list_variables(lm_checkpoint):
            (name, var) = (x[0], x[1])
            if name not in assignment_candidate:
                continue
            assignment_map[name] = assignment_candidate[name]
            tvar_name = real_name_map[name]
            initialized_variable_names[tvar_name] = 1
            initialized_variable_names[tvar_name + ":0"] = 1

    return assignment_map, initialized_variable_names
Пример #3
0
        def generate_alt_runs(batch):
            logits = self.forward_run(batch)
            x0, x1, x2, y = batch
            instance_infos = []
            new_insts = []
            deleted_mask_list = []
            tag_size_list = []
            for i in range(len(logits)):
                info = {}
                info['init_logit'] = logits[i]
                info['orig_input'] = (x0[i], x1[i], x2[i], y[i])
                indice_delete_random = []
                for _ in range(self.compare_deletion_num):
                    indice_delete_random.append(len(new_insts))
                    x_list, delete_mask = self.sample_deleter(
                        sample_size(), x0[i], x1[i], x2[i])
                    new_insts.append(x_list)
                    deleted_mask_list.append(delete_mask)

                info['indice_delete_random'] = indice_delete_random
                instance_infos.append(info)
            if tag_size_list:
                avg_tag_size = average(tag_size_list)
                tf_logging.debug("avg Tagged token#={}".format(avg_tag_size))
            return new_insts, instance_infos, deleted_mask_list
Пример #4
0
def get_bert_assignment_map(tvars, lm_checkpoint):
    lm_assignment_candidate = {}
    real_name_map = {}
    for var in tvars:
        name = var.name
        m = re.match("^(.*):\\d+$", name)
        if m is not None:
            name = m.group(1)

        tokens = name.split("/")
        top_scope = tokens[0]
        targ_name = re.sub("layer_normalization[_]?\d*", "LayerNorm", name)
        targ_name = re.sub("dense[_]?\d*", "dense", targ_name)
        lm_assignment_candidate[targ_name] = var
        tf_logging.debug("Init from lm_checkpoint : %s" % name)
        real_name_map[targ_name] = name

    assignment_map = {}
    initialized_variable_names = {}
    if lm_checkpoint:
        for x in tf.train.list_variables(lm_checkpoint):
            (name, var) = (x[0], x[1])
            if name not in lm_assignment_candidate:
                continue
            assignment_map[name] = lm_assignment_candidate[name]

            tvar_name = real_name_map[name]

            initialized_variable_names[tvar_name] = 1
            initialized_variable_names[tvar_name + ":0"] = 1

    return (assignment_map, initialized_variable_names)
Пример #5
0
def align_checkpoint_for_lm(
    tvars,
    checkpoint_type,
    init_checkpoint,
    second_init_checkpoint=None,
):
    tf_logging.debug("align_checkpoint_for_lm")

    use_multiple_checkpoint = checkpoint_type in [
        "v2_and_bert", "nli_and_bert"
    ]

    initialized_variable_names2 = {}
    if init_checkpoint:
        if not use_multiple_checkpoint:
            if checkpoint_type == "" or checkpoint_type == "bert":
                assignment_fn = get_bert_assignment_map
            elif checkpoint_type == "v2":
                assignment_fn = assignment_map_v2_to_v2
            else:
                raise Exception("Undefined checkpoint exists")

            assignment_map, initialized_variable_names = assignment_fn(
                tvars, init_checkpoint)

            def init_fn():
                tf.compat.v1.train.init_from_checkpoint(
                    init_checkpoint, assignment_map)

        else:
            if checkpoint_type == "nli_and_bert":
                assignment_map, initialized_variable_names = get_bert_assignment_map(
                    tvars, init_checkpoint)
                assignment_map2, initialized_variable_names2 = get_cls_assignment(
                    tvars, second_init_checkpoint)
            if checkpoint_type == "v2_and_bert":
                assignment_map, initialized_variable_names = assignment_map_v2_to_v2(
                    tvars, init_checkpoint)
                assignment_map2, initialized_variable_names2 = get_cls_assignment(
                    tvars, second_init_checkpoint)

            else:
                raise Exception("Undefined checkpoint exists")

            def init_fn():
                tf.compat.v1.train.init_from_checkpoint(
                    init_checkpoint, assignment_map)

                tf.compat.v1.train.init_from_checkpoint(
                    second_init_checkpoint, assignment_map2)

    else:
        initialized_variable_names = {}

        def init_fn():
            pass

    return initialized_variable_names, initialized_variable_names2, init_fn
Пример #6
0
def train_debug_factory(sess, loss_tensor, acc_tensor, gradient,
                        batch2feed_dict, batch, step_i):
    loss_val, acc, gradient = sess.run([
        loss_tensor,
        acc_tensor,
        gradient,
    ],
                                       feed_dict=batch2feed_dict(batch))
    tf_logging.debug("Step {0} train loss={1:.04f} acc={2:.04f}".format(
        step_i, loss_val, acc))
    return loss_val, acc
Пример #7
0
def train_classification_factory(sess, loss_tensor, acc_tensor, train_op,
                                 batch2feed_dict, batch, step_i):
    loss_val, acc, _ = sess.run([
        loss_tensor,
        acc_tensor,
        train_op,
    ],
                                feed_dict=batch2feed_dict(batch))
    tf_logging.debug("Step {0} train loss={1:.04f} acc={2:.04f}".format(
        step_i, loss_val, acc))
    return loss_val, acc
Пример #8
0
def sero_from_v2(tvars, lm_checkpoint):
    tf_logging.debug("sero_from_v2")

    def get_target_name(var_name):
        targ_name = re.sub("layer_normalization[_]?\d*", "LayerNorm", var_name)
        targ_name = re.sub("dense[_]?\d*", "dense", targ_name)
        tokens = targ_name.split("/")
        if tokens[0] == "sero":
            tokens[0] = "bert"

        if len(tokens) > 2:
            if tokens[1] == "lower":
                tokens[1] = "encoder"
            elif tokens[1] == "upper":
                str_layer, str_no = tokens[2].split("_")
                str_no = str(int(str_no) + 6)
                tokens[1] = "encoder"
                tokens[2] = str_layer + "_" + str_no
        targ_name = "/".join(tokens)
        return targ_name

    assignment_candidate = {}
    real_name_map = {}
    for var in tvars:
        name = var.name
        m = re.match("^(.*):\\d+$", name)
        if m is not None:
            name = m.group(1)
        targ_name = get_target_name(name)
        assignment_candidate[targ_name] = var
        tf_logging.info("Init from v2 : %s" % name)
        real_name_map[targ_name] = name

    assignment_map = {}
    initialized_variable_names = {}
    if lm_checkpoint:
        for x in tf.train.list_variables(lm_checkpoint):
            (name, var) = (x[0], x[1])
            simple_name = re.sub("layer_normalization[_]?\d*", "LayerNorm",
                                 name)
            simple_name = re.sub("dense[_]?\d*", "dense", simple_name)

            tf_logging.info("Checkpoint Var : %s" % name)
            if simple_name not in assignment_candidate:
                continue
            assignment_map[name] = assignment_candidate[simple_name]
            tvar_name = real_name_map[simple_name]
            initialized_variable_names[tvar_name] = 1
            initialized_variable_names[tvar_name + ":0"] = 1

    return assignment_map, initialized_variable_names
Пример #9
0
def work(job_id):
    outfile = os.path.join(working_dir, "BLC_data", "{}".format(job_id))
    if os.path.exists(outfile):
        return "Skip"
    tf_logging.debug("Loading data")
    data = load(job_id)
    tf_logging.debug("Done")
    if data is None:
        return "No Input"

    writer = RecordWriterWrap(outfile)

    batch_size, seq_length = data[0]['input_ids'].shape
    keys = list(data[0].keys())

    vectors = flatten_batches(data)
    basic_keys = "input_ids", "input_mask", "segment_ids"
    any_key = keys[0]
    data_len = len(vectors[any_key])
    num_predictions = len(vectors["grouped_positions"][0][0])

    for i in range(data_len):
        mask_valid = [0] * seq_length
        loss1_arr = [0] * seq_length
        loss2_arr = [0] * seq_length
        positions = vectors["grouped_positions"][i]
        num_trials = len(positions)
        for t_i in range(num_trials):
            for p_i in range(num_predictions):
                loc = vectors["grouped_positions"][i][t_i][p_i]
                loss1 = vectors["grouped_loss1"][i][t_i][p_i]
                loss2 = vectors["grouped_loss2"][i][t_i][p_i]

                loss1_arr[loc] = loss1
                loss2_arr[loc] = loss2
                assert mask_valid[loc] == 0
                mask_valid[loc] = 1

        features = collections.OrderedDict()
        for key in basic_keys:
            features[key] = create_int_feature(vectors[key][i])

        features["loss_valid"] = create_int_feature(mask_valid)
        features["loss1"] = create_float_feature(loss1_arr)
        features["loss2"] = create_float_feature(loss2_arr)
        features["next_sentence_labels"] = create_int_feature([0])
        writer.write_feature(features)
        #if i < 20:
        #    log_print_feature(features)
    writer.close()
    return "Done"
Пример #10
0
def train_fn_factory(sess, loss_tensor, all_losses, train_op, batch2feed_dict,
                     batch, step_i):
    loss_val, all_losses_val, _ = sess.run([
        loss_tensor,
        all_losses,
        train_op,
    ],
                                           feed_dict=batch2feed_dict(batch))
    n_layer = len(all_losses_val)
    verbose_loss_str = " ".join(
        ["{0}: {1:.2f}".format(i, all_losses_val[i]) for i in range(n_layer)])
    tf_logging.debug("Step {0} train loss={1:.04f} {2}".format(
        step_i, loss_val, verbose_loss_str))
    return loss_val, 0
Пример #11
0
    def __init__(self, num_classes, ssdr_config, core_model, seq_length, is_training):
        super(WSSDRWrapperInterface, self).__init__()
        placeholder = tf.compat.v1.placeholder
        bert_config = BertConfig.from_json_file(os.path.join(data_path, "bert_config.json"))
        def_max_length = FLAGS.max_def_length
        loc_max_length = FLAGS.max_loc_length
        tf_logging.debug("WSSDRWrapper init()")
        tf_logging.debug("seq_length %d" % seq_length)
        tf_logging.debug("def_max_length %d" % def_max_length)
        tf_logging.debug("loc_max_length %d" % loc_max_length)

        self.input_ids = placeholder(tf.int64, [None, seq_length], name="input_ids")
        self.input_mask_ = placeholder(tf.int64, [None, seq_length], name="input_mask")
        self.segment_ids = placeholder(tf.int64, [None, seq_length], name="segment_ids")
        self.d_location_ids = placeholder(tf.int64, [None, loc_max_length], name="d_location_ids")

        self.d_input_ids = placeholder(tf.int64, [None, def_max_length], name="d_input_ids")
        self.d_input_mask = placeholder(tf.int64, [None, def_max_length], name="d_input_mask")
        self.d_segment_ids = placeholder(tf.int64, [None, def_max_length], name="d_segment_ids")
        self.ab_mapping = placeholder(tf.int64, [None, 1], name="ab_mapping")
        if ssdr_config.use_ab_mapping_mask:
            self.ab_mapping_mask = placeholder(tf.int64, [None, FLAGS.def_per_batch], name="ab_mapping_mask")
        else:
            self.ab_mapping_mask = None

        # [batch,seq_len], 1 if the indices in d_locations_id
        y_lookup = get_y_lookup_from_location_ids(self.d_location_ids, seq_length)

        self.y_cls = placeholder(tf.int64, [None])

        self.network = core_model(
                config=bert_config,
                ssdr_config=ssdr_config,
                is_training=is_training,
                input_ids=self.input_ids,
                input_mask=self.input_mask_,
                token_type_ids=self.segment_ids,
                d_input_ids=self.d_input_ids,
                d_input_mask=self.d_input_mask,
                d_segment_ids=self.d_segment_ids,
                d_location_ids=self.d_location_ids,
                ab_mapping=self.ab_mapping,
                ab_mapping_mask=self.ab_mapping_mask,
                use_one_hot_embeddings=False,
            )
        self.cls_logits = keras.layers.Dense(num_classes)(self.network.get_pooled_output())
        self.cls_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=self.cls_logits,
            labels=self.y_cls)
        self.cls_loss = tf.reduce_mean(self.cls_loss_arr)

        self.lookup_logits = keras.layers.Dense(2)(self.network.get_sequence_output())
        self.lookup_p_at_1 = tf_module.p_at_1(self.lookup_logits[:,:, 1], y_lookup)
        self.lookup_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=self.lookup_logits,
            labels=y_lookup)
        self.y_lookup = y_lookup
        self.lookup_loss_per_example = tf.reduce_sum(self.lookup_loss_arr, axis=-1)
        self.lookup_loss = tf.reduce_mean(self.lookup_loss_per_example)
        self.acc = tf_module.accuracy(self.cls_logits, self.y_cls)
Пример #12
0
def eval_nli_w_dict_lookup(run_name, model: DictReaderInterface, model_path,
                           data_feeder_loader):
    print("eval_nli_w_dict_lookup :", run_name)
    tf_logging.debug("Building graph")
    batch_size = FLAGS.train_batch_size
    dev_data_feeder = data_feeder_loader.get_dev_feeder()
    runner = WSSDRRunner(model, dev_data_feeder.augment_dict_info)
    runner.load_last_saved_model(model_path)

    dev_batches = dev_data_feeder.get_all_batches(batch_size, True)[:100]
    n_batches = len(dev_batches)
    print('{} batches, about {} data'.format(n_batches,
                                             n_batches * batch_size))
    loss, acc = runner.run_batches_w_lookup(dev_batches)
    print("Dev total loss={0:.04f} acc={1:.03f}".format(loss, acc))
    return acc
Пример #13
0
def log_var_assignments_one_by_one(tvars,
                                   initialized_variable_names,
                                   initialized_variable_names2=None):
    for var in tvars:
        init_string = ""
        if var.name in initialized_variable_names:
            init_string = ", *INIT_FROM_CKPT*"
        if initialized_variable_names2 is not None:
            if var.name in initialized_variable_names2:
                init_string = ", *INIT_FROM_CKPT2*"
        if init_string:
            tf_logging.debug("    name = %s, shape = %s%s", var.name,
                             var.shape, init_string)
        else:
            tf_logging.info("    name = %s, shape = %s%s", var.name, var.shape,
                            " - Not Initialized")
Пример #14
0
def eval_nli_w_dict(run_name, model: DictReaderInterface, model_path,
                    data_feeder_loader):
    print("Eval nil :", run_name)
    tf_logging.debug("Building graph")
    batch_size = FLAGS.train_batch_size
    dev_data_feeder = data_feeder_loader.get_dev_feeder()
    dev_batches = dev_data_feeder.get_all_batches(batch_size)

    runner = WSSDRRunner(model, dev_data_feeder.augment_dict_info)
    runner.load_last_saved_model(model_path)

    def valid_fn(step_i):
        loss, acc = runner.run_batches_wo_lookup(dev_batches)
        print("Dev loss={1:.04f} acc={2:.03f}".format(step_i, loss, acc))
        return acc

    return valid_fn(0)
Пример #15
0
    def train_fn(batch, step_i):
        step_before_cls = fetch_global_step()
        loss_val, acc = train_classification(batch, step_i)
        summary = tf.Summary()
        summary.value.add(tag='acc', simple_value=acc)
        summary.value.add(tag='loss', simple_value=loss_val)
        train_writer.add_summary(summary, fetch_global_step())
        train_writer.flush()
        tf_logging.debug("{}".format(step_i))

        step_after_cls = fetch_global_step()

        assert step_after_cls == step_before_cls + 1
        train_explain(batch, step_i)
        step_after_ex = fetch_global_step()
        assert step_after_cls == step_after_ex
        return loss_val, acc
Пример #16
0
 def __init__(self,
              data,
              dictionary,
              ssdr_config,
              def_per_batch,
              max_def_length,
              data_info=None):
     self.max_def_length = max_def_length
     self.max_d_loc = ssdr_config.max_loc_length
     self.def_per_batch = def_per_batch
     self.raw_dictionary = dictionary
     self.use_ab_mapping_mask = ssdr_config.use_ab_mapping_mask
     tf_logging.debug("SSDRAugment init")
     tf_logging.debug("max_def_length: %d" % max_def_length)
     tf_logging.debug("max_d_loc: %d" % self.max_d_loc)
     tf_logging.debug("def_per_batch: %d" % self.def_per_batch)
     tf_logging.debug("use_ab_mapping_mask: {}".format(
         ssdr_config.use_ab_mapping_mask))
     super(SSDRAugment, self).__init__(data, data_info)
Пример #17
0
def phase1_only_load(tvars, src_checkpoint):
    tf_logging.debug("phase1_only_load")
    initialized_variable_names = {}
    assignment_candidate = {}
    for var in tvars:
        name = var.name
        m = re.match("^(.*):\\d+$", name)
        if m is not None:
            name = m.group(1)
        assignment_candidate[name] = var

    assignment_map = collections.OrderedDict()
    if src_checkpoint:
        for x in tf.train.list_variables(src_checkpoint):
            (name, var) = (x[0], x[1])

            if name.startswith(dual_model_prefix1):
                include = True
            elif name.startswith("cls_dense/"):
                include = True
            else:
                include = False

            if include and name in assignment_candidate:
                tf_logging.debug("Vars in checkpoint : %s" % name)
                tf_logging.debug("map to -> : %s" % name)
                assignment_map[name] = assignment_candidate[name]
                initialized_variable_names[name] = 1
                initialized_variable_names[name + ":0"] = 1

    return assignment_map, initialized_variable_names
Пример #18
0
    def __init__(self, num_classes, seq_length, is_training):
        super(DictReaderWrapper, self).__init__()
        placeholder = tf.compat.v1.placeholder
        bert_config = BertConfig.from_json_file(os.path.join(data_path, "bert_config.json"))
        def_max_length = FLAGS.max_def_length
        loc_max_length = FLAGS.max_loc_length
        tf_logging.debug("DictReaderWrapper init()")
        tf_logging.debug("seq_length %d" % seq_length)
        tf_logging.debug("def_max_length %d" % def_max_length)
        tf_logging.debug("loc_max_length %d" % loc_max_length)

        self.input_ids = placeholder(tf.int64, [None, seq_length])
        self.input_mask_ = placeholder(tf.int64, [None, seq_length])
        self.segment_ids = placeholder(tf.int64, [None, seq_length])

        self.d_input_ids = placeholder(tf.int64, [None, def_max_length])
        self.d_input_mask = placeholder(tf.int64, [None, def_max_length])
        self.d_location_ids = placeholder(tf.int64, [None, loc_max_length])

        self.y_cls = placeholder(tf.int64, [None])
        self.y_lookup = placeholder(tf.int64, [None, seq_length])

        self.network = DictReaderModel(
                config=bert_config,
                d_config=bert_config,
                is_training=is_training,
                input_ids=self.input_ids,
                input_mask=self.input_mask_,
                d_input_ids=self.d_input_ids,
                d_input_mask=self.d_input_mask,
                d_location_ids=self.d_location_ids,
                use_target_pos_emb=True,
                token_type_ids=self.segment_ids,
                use_one_hot_embeddings=False,
            )

        self.cls_logits = keras.layers.Dense(num_classes)(self.network.pooled_output)
        self.cls_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=self.cls_logits,
            labels=self.y_cls)
        self.cls_loss = tf.reduce_mean(self.cls_loss_arr)

        self.lookup_logits = keras.layers.Dense(2)(self.network.sequence_output)
        self.lookup_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=self.lookup_logits,
            labels=self.y_lookup)
        self.lookup_loss_per_example = tf.reduce_mean(self.lookup_loss_arr, axis=-1)
        self.lookup_loss = tf.reduce_mean(self.lookup_loss_per_example)
        self.acc = tf_module.accuracy(self.cls_logits, self.y_cls)
Пример #19
0
def debug_names(is_training):
    tf.compat.v1.disable_eager_execution()

    seq_max = 200
    lr = 1e-5
    batch_size = FLAGS.train_batch_size

    tf_logging.debug("Building graph")
    model = DictReaderWrapper(3, seq_max, is_training)

    with tf.compat.v1.variable_scope("optimizer"):
        train_cls, global_step = train_module.get_train_op(model.cls_loss, lr)
        train_lookup, global_step = train_module.get_train_op(
            model.lookup_loss, lr, global_step)

    sess = train_module.init_session()
    sess.run(tf.compat.v1.global_variables_initializer())
    tvars = tf.compat.v1.trainable_variables()

    for var in tvars:
        name = var.name
        print(name)
Пример #20
0
def cppnc_assignment_remap2(tvars, lm_checkpoint):
    tf_logging.debug("get_assignment_map_remap_from_v2")
    """Compute the union of the current variables and checkpoint variables."""
    initialized_variable_names = {}
    real_name_map = {}

    assignment_candidate = {}
    for var in tvars:
        name = var.name
        m = re.match("^(.*):\\d+$", name)
        if m is not None:
            name = m.group(1)

        tokens = name.split("/")
        top_scope = tokens[0]
        if triple_model_prefix2 == top_scope:
            targ_name = get_name_key(dual_model_prefix1, tokens)
            assignment_candidate[targ_name] = var
            tf_logging.info("Init from v2 : %s" % name)
            real_name_map[targ_name] = name
        elif triple_model_prefix3 == top_scope:
            targ_name = get_name_key(dual_model_prefix2, tokens)
            assignment_candidate[targ_name] = var
            tf_logging.info("Init from v2 : %s" % name)
            real_name_map[targ_name] = name

    assignment_map = collections.OrderedDict()
    if lm_checkpoint:
        for x in tf.train.list_variables(lm_checkpoint):
            (name, var) = (x[0], x[1])
            simple_name = re.sub("layer_normalization[_]?\d*", "LayerNorm",
                                 name)
            simple_name = re.sub("dense[_]?\d*", "dense", simple_name)
            tf_logging.debug("Vars in TT : %s" % name)
            tf_logging.debug("map to -> : %s" % simple_name)

            if simple_name not in assignment_candidate:
                continue
            assignment_map[name] = assignment_candidate[simple_name]
            tf_logging.debug("Matched variables : %s" % name)

            real_name = real_name_map[simple_name]
            initialized_variable_names[real_name] = 1
            initialized_variable_names[real_name + ":0"] = 1

    return assignment_map, initialized_variable_names
Пример #21
0
def do(data_id):
    working_dir = os.environ["TF_WORKING_DIR"]
    tokenzier = get_tokenizer()
    name1 = os.path.join(working_dir, "bert_loss", "{}.pickle".format(data_id))
    name2 = os.path.join(working_dir, "bfn_loss", "{}.pickle".format(data_id))

    tf_logging.debug("Loading " + name1)
    output1 = PredictionOutput(name1)
    tf_logging.debug("Loading " + name2)
    output2 = PredictionOutput(name2)

    assert len(output1.input_ids) == len(output2.input_ids)

    out_path = os.path.join(working_dir,
                            "loss_pred_train_data/{}".format(data_id))
    record_writer = RecordWriterWrap(out_path)
    n_inst = len(output1.input_ids)
    sep_id = tokenzier.vocab["[SEP]"]
    tf_logging.debug("Iterating")
    ticker = TimeEstimator(n_inst, "", 1000)
    for i in range(n_inst):
        if i % 1000 == 0:
            assert_input_equal(output1.input_ids[i], output2.input_ids[i])
        try:
            features = get_segment_and_mask(output1.input_ids[i], sep_id)
        except:
            try:
                sep_indice = get_sep_considering_masking(
                    output1.input_ids[i], sep_id, output1.masked_lm_ids[i],
                    output1.masked_lm_positions[i])
                features = get_segment_and_mask_inner(output1.input_ids[i],
                                                      sep_indice)
            except:
                tokens = tokenzier.convert_ids_to_tokens(output1.input_ids[i])
                print(tokenization.pretty_tokens(tokens))
                print(output1.masked_lm_ids[i])
                print(output1.masked_lm_positions[i])
                raise

        features["next_sentence_labels"] = create_int_feature([0])
        features["masked_lm_positions"] = create_int_feature(
            output1.masked_lm_positions[i])
        features["masked_lm_ids"] = create_int_feature(
            output1.masked_lm_ids[i])
        features["masked_lm_weights"] = create_float_feature(
            output1.masked_lm_weights[i])
        features["loss_base"] = create_float_feature(
            output1.masked_lm_example_loss[i])
        features["loss_target"] = create_float_feature(
            output2.masked_lm_example_loss[i])
        record_writer.write_feature(features)
        ticker.tick()

    record_writer.close()
Пример #22
0
        def generate_alt_runs(batch):
            logits, ex_logit = self.sess.run(
                [self.sout, self.ex_logits],
                feed_dict=self.batch2feed_dict(batch))
            x0, x1, x2, y = batch

            pred = np.argmax(logits, axis=1)
            instance_infos = []
            new_batches = []
            deleted_mask_list = []
            tag_size_list = []
            for i in range(len(logits)):
                if pred[i] in self.target_class_set:
                    info = {}
                    info['init_logit'] = logits[i]
                    info['orig_input'] = (x0[i], x1[i], x2[i], y[i])
                    ex_tags = self.logit2tag(ex_logit[i])
                    tf_logging.debug("EX_Score : {}".format(
                        numpy_print(ex_logit[i])))
                    tag_size = np.count_nonzero(ex_tags)
                    tag_size_list.append(tag_size)
                    if tag_size > 10:
                        tf_logging.debug("#Tagged token={}".format(tag_size))

                    info['idx_delete_tagged'] = len(new_batches)
                    new_batches.append(
                        token_delete(ex_tags, x0[i], x1[i], x2[i]))
                    deleted_mask_list.append(ex_tags)

                    indice_delete_random = []

                    for _ in range(self.compare_deletion_num):
                        indice_delete_random.append(len(new_batches))
                        x_list, delete_mask = seq_delete_inner(
                            sample_size(), x0[i], x1[i], x2[i])
                        new_batches.append(x_list)
                        deleted_mask_list.append(delete_mask)

                    info['indice_delete_random'] = indice_delete_random
                    instance_infos.append(info)
            if tag_size_list:
                avg_tag_size = average(tag_size_list)
                tf_logging.debug("avg Tagged token#={}".format(avg_tag_size))
            return new_batches, instance_infos, deleted_mask_list
Пример #23
0
def get_tlm_assignment_map_v2(tvars, tlm_prefix, lm_checkpoint,
                              target_task_checkpoint_tf2):
    """Compute the union of the current variables and checkpoint variables."""
    assignment_map = {}
    initialized_variable_names = {}
    real_name_map = {}

    target_task_name_to_var = collections.OrderedDict()
    lm_assignment_candidate = {}
    tt_assignment_candidate = {}
    for var in tvars:
        name = var.name
        m = re.match("^(.*):\\d+$", name)
        if m is not None:
            name = m.group(1)

        tokens = name.split("/")
        top_scope = tokens[0]
        if tlm_prefix == top_scope:
            inner_name = "/".join(tokens[1:])
            target_task_name_to_var[inner_name] = var
            simple_name = inner_name
            simple_name = re.sub("layer_normalization[_]?\d*", "LayerNorm",
                                 simple_name)
            simple_name = re.sub("dense[_]?\d*", "dense", simple_name)
            tt_assignment_candidate[simple_name] = var
            tf_logging.debug(
                "Variable to be loaded from target_task_checkpoint : %s" %
                name)
            real_name_map[simple_name] = name
        else:
            simple_name = re.sub("layer_normalization[_]?\d*", "LayerNorm",
                                 name)
            simple_name = re.sub("dense[_]?\d*", "dense", simple_name)
            lm_assignment_candidate[simple_name] = var
            tf_logging.debug("Variable to be loaded from lm_checkpoint : %s" %
                             name)
            real_name_map[simple_name] = name

    assignment_map_tt = collections.OrderedDict()
    if target_task_checkpoint_tf2:
        for x in tf.train.list_variables(target_task_checkpoint_tf2):
            (name, var) = (x[0], x[1])
            simple_name = re.sub("layer_normalization[_]?\d*", "LayerNorm",
                                 name)
            simple_name = re.sub("dense[_]?\d*", "dense", simple_name)
            tf_logging.debug("Vars in TT : %s" % name)
            tf_logging.debug("map to -> : %s" % simple_name)

            if simple_name not in tt_assignment_candidate:
                continue
            assignment_map_tt[name] = tt_assignment_candidate[simple_name]

            real_name = real_name_map[simple_name]
            initialized_variable_names[real_name] = 1

    if lm_checkpoint:
        for x in tf.train.list_variables(lm_checkpoint):
            (name, var) = (x[0], x[1])
            if name not in lm_assignment_candidate:
                continue
            assignment_map[name] = lm_assignment_candidate[name]
            real_name = real_name_map[name]
            initialized_variable_names[real_name] = 1
            initialized_variable_names[real_name + ":0"] = 1

    return assignment_map, assignment_map_tt, initialized_variable_names
Пример #24
0
def train_nli_w_dict(run_name, model: DictReaderInterface, model_path,
                     model_config, data_feeder_loader, model_init_fn):
    print("Train nil :", run_name)
    batch_size = FLAGS.train_batch_size
    f_train_lookup = "lookup" in FLAGS.train_op
    tf_logging.debug("Building graph")

    with tf.compat.v1.variable_scope("optimizer"):
        lr = FLAGS.learning_rate
        lr2 = lr * 0.1
        if model_config.compare_attrib_value_safe("use_two_lr", True):
            tf_logging.info("Using two lr for each parts")
            train_cls, global_step = get_train_op_sep_lr(
                model.get_cls_loss(), lr, 5, "dict")
        else:
            train_cls, global_step = train_module.get_train_op(
                model.get_cls_loss(), lr)
        train_lookup_op, global_step = train_module.get_train_op(
            model.get_lookup_loss(), lr2, global_step)

    sess = train_module.init_session()
    sess.run(tf.compat.v1.global_variables_initializer())

    train_writer, test_writer = setup_summary_writer(run_name)

    last_saved = get_latest_model_path_from_dir_path(model_path)
    if last_saved:
        tf_logging.info("Loading previous model from {}".format(last_saved))
        load_model(sess, last_saved)
    elif model_init_fn is not None:
        model_init_fn(sess)

    log = log_module.train_logger()
    train_data_feeder = data_feeder_loader.get_train_feeder()
    dev_data_feeder = data_feeder_loader.get_dev_feeder()
    lookup_train_feeder = train_data_feeder
    valid_runner = WSSDRRunner(model, dev_data_feeder.augment_dict_info, sess)

    dev_batches = []
    n_dev_batch = 100
    dev_batches_w_dict = dev_data_feeder.get_all_batches(batch_size,
                                                         True)[:n_dev_batch]
    for _ in range(n_dev_batch):
        dev_batches.append(dev_data_feeder.get_random_batch(batch_size))
        dev_batches_w_dict.append(dev_data_feeder.get_lookup_batch(batch_size))

    def get_summary_obj(loss, acc):
        summary = tf.compat.v1.Summary()
        summary.value.add(tag='loss', simple_value=loss)
        summary.value.add(tag='accuracy', simple_value=acc)
        return summary

    def get_summary_obj_lookup(loss, p_at_1):
        summary = tf.compat.v1.Summary()
        summary.value.add(tag='lookup_loss', simple_value=loss)
        summary.value.add(tag='P@1', simple_value=p_at_1)
        return summary

    def train_lookup(step_i):
        batches, info = lookup_train_feeder.get_lookup_train_batches(
            batch_size)
        if not batches:
            raise NoLookupException()

        def get_cls_loss(batch):
            return sess.run([model.get_cls_loss_arr()],
                            feed_dict=model.batch2feed_dict(batch))

        loss_array = get_loss_from_batches(batches, get_cls_loss)

        supervision_for_lookup = train_data_feeder.get_lookup_training_batch(
            loss_array, batch_size, info)

        def lookup_train(batch):
            return sess.run(
                [model.get_lookup_loss(),
                 model.get_p_at_1(), train_lookup_op],
                feed_dict=model.batch2feed_dict(batch))

        avg_loss, p_at_1, _ = lookup_train(supervision_for_lookup)
        train_writer.add_summary(get_summary_obj_lookup(avg_loss, p_at_1),
                                 step_i)
        log.info("Step {0} lookup loss={1:.04f}".format(step_i, avg_loss))
        return avg_loss

    def train_classification(step_i):
        batch = train_data_feeder.get_random_batch(batch_size)
        loss_val, acc, _ = sess.run(
            [model.get_cls_loss(),
             model.get_acc(), train_cls],
            feed_dict=model.batch2feed_dict(batch))
        log.info("Step {0} train loss={1:.04f} acc={2:.03f}".format(
            step_i, loss_val, acc))
        train_writer.add_summary(get_summary_obj(loss_val, acc), step_i)

        return loss_val, acc

    lookup_loss_window = MovingWindow(20)

    def train_classification_w_lookup(step_i):
        data_indices, batch = train_data_feeder.get_lookup_batch(batch_size)
        logits, = sess.run([model.get_lookup_logits()],
                           feed_dict=model.batch2feed_dict(batch))
        term_ranks = np.flip(np.argsort(logits[:, :, 1], axis=1))
        batch = train_data_feeder.augment_dict_info(data_indices, term_ranks)

        loss_val, acc, _ = sess.run(
            [model.get_cls_loss(),
             model.get_acc(), train_cls],
            feed_dict=model.batch2feed_dict(batch))
        log.info("ClsW]Step {0} train loss={1:.04f} acc={2:.03f}".format(
            step_i, loss_val, acc))
        train_writer.add_summary(get_summary_obj(loss_val, acc), step_i)

        return loss_val, acc

    def lookup_enabled(lookup_loss_window, step_i):
        return step_i > model_config.lookup_min_step\
               and lookup_loss_window.get_average() < model_config.lookup_threshold

    def train_fn(step_i):
        if lookup_enabled(lookup_loss_window, step_i):
            loss, acc = train_classification_w_lookup((step_i))
        else:
            loss, acc = train_classification(step_i)
        if f_train_lookup and step_i % model_config.lookup_train_frequency == 0:
            try:
                lookup_loss = train_lookup(step_i)
                lookup_loss_window.append(lookup_loss, 1)
            except NoLookupException:
                log.warning("No possible lookup found")

        return loss, acc

    def debug_fn(batch):
        y_lookup, = sess.run([
            model.y_lookup,
        ],
                             feed_dict=model.batch2feed_dict(batch))
        print(y_lookup)
        return 0, 0

    def valid_fn(step_i):
        if lookup_enabled(lookup_loss_window, step_i):
            valid_fn_w_lookup(step_i)
        else:
            valid_fn_wo_lookup(step_i)

    def valid_fn_wo_lookup(step_i):
        loss_val, acc = valid_runner.run_batches_wo_lookup(dev_batches)
        log.info("Step {0} Dev loss={1:.04f} acc={2:.03f}".format(
            step_i, loss_val, acc))
        test_writer.add_summary(get_summary_obj(loss_val, acc), step_i)
        return acc

    def valid_fn_w_lookup(step_i):
        loss_val, acc = valid_runner.run_batches_w_lookup(dev_batches_w_dict)
        log.info("Step {0} DevW loss={1:.04f} acc={2:.03f}".format(
            step_i, loss_val, acc))
        test_writer.add_summary(get_summary_obj(loss_val, acc), step_i)
        return acc

    def save_fn():
        op = tf.compat.v1.assign(global_step, step_i)
        sess.run([op])
        return save_model_to_dir_path(sess, model_path, global_step)

    n_data = train_data_feeder.get_data_len()
    step_per_epoch = int((n_data + batch_size - 1) / batch_size)
    tf_logging.debug("{} data point -> {} batches / epoch".format(
        n_data, step_per_epoch))
    train_steps = step_per_epoch * FLAGS.num_train_epochs
    tf_logging.debug("Max train step : {}".format(train_steps))
    valid_freq = 100
    save_interval = 60 * 20
    last_save = time.time()

    init_step, = sess.run([global_step])
    print("Initial step : ", init_step)
    for step_i in range(init_step, train_steps):
        if dev_fn is not None:
            if (step_i + 1) % valid_freq == 0:
                valid_fn(step_i)

        if save_fn is not None:
            if time.time() - last_save > save_interval:
                save_fn()
                last_save = time.time()

        loss, acc = train_fn(step_i)

    return save_fn()
Пример #25
0
def demo_nli_w_dict(run_name, model: WSSDRWrapper, model_path,
                    data_feeder_loader):
    print("Demonstrate nil_w_dict :", run_name)
    tf_logging.debug("Building graph")
    batch_size = FLAGS.train_batch_size

    dev_data_feeder = data_feeder_loader.get_dev_feeder()
    runner = WSSDRRunner(model, dev_data_feeder.augment_dict_info)
    runner.load_last_saved_model(model_path)
    dev_batches = dev_data_feeder.get_all_batches(batch_size, True)
    n_batches = len(dev_batches)
    tokenizer = get_tokenizer()
    html = HtmlVisualizer("nli_w_dict_demo.html")

    def fetch_fn(step_i):
        for indice, batch in dev_batches:
            print(indice)
            cache_name = "term_ranks_logits_cache"
            #logits = load_cache(cache_name)
            logits, = runner.sess.run(
                [runner.model.get_lookup_logits()],
                feed_dict=runner.model.batch2feed_dict(batch))
            raw_scores = logits[:, :, 1]
            term_ranks = np.argsort(logits[:, :, 1], axis=1)
            term_ranks = np.flip(term_ranks)
            save_to_pickle(logits, cache_name)

            x0, x1, x2, x3, y, x4, x5, x6, ab_map, ab_mapping_mask = batch

            for idx in range(len(indice)):
                ranks = term_ranks[idx]
                data_idx = indice[idx]
                input_ids = x0[idx]
                tokens = tokenizer.convert_ids_to_tokens(input_ids)

                words = dev_data_feeder.data_info[data_idx]
                location_to_word = dev_data_feeder.invert_index_word_locations(
                    words)

                row = []
                for rank in term_ranks[idx]:
                    row.append(Cell(rank))

                for rank in ranks:
                    if rank in location_to_word and rank != 0:
                        highest_rank = rank
                        break
                for rank in ranks[::-1]:
                    if rank in location_to_word and rank != 0:
                        lowest_rank = rank
                        break

                html.write_table([row])
                t1 = []
                s1 = []
                t2 = []
                s2 = []
                text = [t1, t2]
                score_row = [s1, s2]

                sent_idx = 0
                for i, t in enumerate(tokens):
                    score = raw_scores[idx, i]
                    if i in location_to_word:
                        if i == highest_rank:
                            c = Cell(tokens[i], 150)
                        elif i == lowest_rank:
                            c = Cell(tokens[i], 150, target_color="R")
                        else:
                            c = Cell(tokens[i], 70)
                        s = Cell(score, score * 100)
                    else:
                        c = Cell(tokens[i])
                        s = Cell(score, score * 70)

                    text[sent_idx].append(c)
                    score_row[sent_idx].append(s)

                    if tokens[i] == "[unused3]":
                        sent_idx += 1
                        if sent_idx == 2:
                            break

                html.write_table([t1, s1])
                html.write_table([t2, s2])

                rows = []
                for word in words:
                    row = [Cell(word.word), Cell(word.location)]
                    rows.append(row)
                html.write_table(rows)

    fetch_fn(0)
Пример #26
0
def assignment_map_v2_to_v2(tvars, lm_checkpoint_v2):
    """Compute the union of the current variables and checkpoint variables."""
    initialized_variable_names = {}
    real_name_map = {}
    tf_logging.debug("assignment_map_v2_to_v2")

    lm_assignment_candidate = {}
    for var in tvars:
        name = var.name
        m = re.match("^(.*):\\d+$", name)
        if m is not None:
            name = m.group(1)

        simple_name = re.sub("layer_normalization[_]?\d*", "LayerNorm", name)
        simple_name = re.sub("dense[_]?\d*", "dense", simple_name)
        lm_assignment_candidate[simple_name] = var
        tf_logging.debug("Variable to be loaded from lm_checkpoint : %s" %
                         name)
        tf_logging.debug("                            simple_name  : %s" %
                         simple_name)
        real_name_map[simple_name] = name

    assignment_map = collections.OrderedDict()
    if lm_checkpoint_v2:
        for x in tf.train.list_variables(lm_checkpoint_v2):
            (name, var) = (x[0], x[1])
            simple_name = re.sub("layer_normalization[_]?\d*", "LayerNorm",
                                 name)
            simple_name = re.sub("dense[_]?\d*", "dense", simple_name)
            tf_logging.debug("Vars in TT : %s" % name)
            tf_logging.debug("map to -> : %s" % simple_name)

            if simple_name not in lm_assignment_candidate:
                continue
            assignment_map[name] = lm_assignment_candidate[simple_name]
            tf_logging.debug("Matched variables : %s" % name)

            real_name = real_name_map[simple_name]
            initialized_variable_names[real_name] = 1
            initialized_variable_names[real_name + ":0"] = 1

    return assignment_map, initialized_variable_names
Пример #27
0
def phase2_to_phase1_assignment_remap(tvars, src_checkpoint):
    tf_logging.debug("get_assignment_map_remap_from_v2")
    """Compute the union of the current variables and checkpoint variables."""
    initialized_variable_names = {}
    assignment_candidate = {}
    for var in tvars:
        name = var.name
        m = re.match("^(.*):\\d+$", name)
        if m is not None:
            name = m.group(1)
        assignment_candidate[name] = var

    def parse_shift_name(name, key, shift_idx):
        tokens = name.split("/")
        new_tokens = []
        for token in tokens:
            if token.startswith(key):
                if token == key:
                    idx = 0
                else:
                    idx_str = token[len(key) + 1:]
                    idx = int(idx_str)
                new_idx = idx + shift_idx

                assert new_idx >= 0
                if new_idx == 0:
                    new_token = key
                else:
                    new_token = "_".join([key, str(new_idx)])
            else:
                new_token = token

            new_tokens.append(new_token)
        return "/".join(new_tokens)

    assignment_map = collections.OrderedDict()
    if src_checkpoint:
        for x in tf.train.list_variables(src_checkpoint):
            (old_name, var) = (x[0], x[1])
            if old_name.startswith(dual_model_prefix2):
                new_name = old_name.replace(dual_model_prefix2,
                                            dual_model_prefix1)
                if "/dense" in new_name:
                    new_name = parse_shift_name(new_name, "dense", -37)
                if "/layer_normalization" in new_name:
                    new_name = parse_shift_name(new_name,
                                                "layer_normalization", -25)
            elif old_name.startswith("cls_dense_1/"):
                new_name = old_name.replace("cls_dense_1/",
                                            dual_model_prefix1 + "/cls_dense/")
            else:
                new_name = None

            if new_name is not None and new_name in assignment_candidate:
                tf_logging.debug("Vars in checkpoint : %s" % old_name)
                tf_logging.debug("map to -> : %s" % new_name)

                assignment_map[old_name] = assignment_candidate[new_name]
                initialized_variable_names[new_name] = 1
                initialized_variable_names[new_name + ":0"] = 1

    return assignment_map, initialized_variable_names