Пример #1
0
def _evaluate(eval_fn, input_fn, decode_fn, path, config, device_list):
    graph = tf.Graph()
    with graph.as_default():
        features = input_fn()
        refs = features["references"]
        placeholders = []
        for i in range(len(device_list)):
            placeholders.append({
                "source":
                tf.placeholder(tf.int32, [None, None], "source_%d" % i),
                "source_length":
                tf.placeholder(tf.int32, [None], "source_length_%d" % i)
            })
            for j in range(100):
                if features.has_key("mt_%d" % j):
                    placeholders[-1]["mt_%d" % j] = tf.placeholder(
                        tf.int32, [None, None], "mt_%d_%d" % (j, i))
                    placeholders[-1]["mt_length_%d" % j] = tf.placeholder(
                        tf.int32, [None], "mt_length_%d_%d" % (j, i))

        predictions = parallel.data_parallelism(device_list, eval_fn,
                                                placeholders)
        predictions = [pred[0][:, 0, :] for pred in predictions]

        all_refs = [[] for _ in range(len(refs))]
        all_outputs = []

        sess_creator = tf.train.ChiefSessionCreator(checkpoint_dir=path,
                                                    config=config)

        with tf.train.MonitoredSession(session_creator=sess_creator) as sess:
            while not sess.should_stop():
                feats = sess.run(features)
                inp_feats = {key: feats[key] for key in placeholders[0].keys()}
                op, feed_dict = _shard_features(inp_feats, placeholders,
                                                predictions)
                # A list of numpy array with shape: [batch, len]
                outputs = sess.run(op, feed_dict=feed_dict)

                for shard in outputs:
                    all_outputs.extend(shard.tolist())

                # shape: ([batch, len], ..., [batch, len])
                references = [item.tolist() for item in feats["references"]]

                for i in range(len(refs)):
                    all_refs[i].extend(references[i])

        decoded_symbols = decode_fn(all_outputs)

        for i, l in enumerate(decoded_symbols):
            decoded_symbols[i] = " ".join(l).replace("@@ ", "").split()

        decoded_refs = [decode_fn(refs) for refs in all_refs]
        decoded_refs = [list(x) for x in zip(*decoded_refs)]

        return bleu.bleu(decoded_symbols, decoded_refs)
Пример #2
0
def main(args):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Load configs
    model_cls_list = [models.get_model(model) for model in args.models]
    params_list = [default_parameters() for _ in range(len(model_cls_list))]
    params_list = [
        merge_parameters(params, model_cls.get_parameters())
        for params, model_cls in zip(params_list, model_cls_list)
    ]
    params_list = [
        import_params(args.checkpoints[i], args.models[i], params_list[i])
        for i in range(len(args.checkpoints))
    ]
    params_list = [
        override_parameters(params_list[i], args)
        for i in range(len(model_cls_list))
    ]

    # Build Graph
    with tf.Graph().as_default():
        model_var_lists = []

        # Load checkpoints
        for i, checkpoint in enumerate(args.checkpoints):
            tf.logging.info("Loading %s" % checkpoint)
            var_list = tf.train.list_variables(checkpoint)
            values = {}
            reader = tf.train.load_checkpoint(checkpoint)

            for (name, shape) in var_list:
                if not name.startswith(model_cls_list[i].get_name()):
                    continue

                if name.find("losses_avg") >= 0:
                    continue

                tensor = reader.get_tensor(name)
                values[name] = tensor

            model_var_lists.append(values)

        # Build models
        model_list = []

        for i in range(len(args.checkpoints)):
            name = model_cls_list[i].get_name()
            model = model_cls_list[i](params_list[i], name + "_%d" % i)
            model_list.append(model)

        params = params_list[0]
        # Read input file
        sorted_keys, sorted_inputs = dataset.sort_input_file(args.input)
        # Build input queue
        features = dataset.get_inference_input(sorted_inputs, params)
        # Create placeholders
        placeholders = []

        for i in range(len(params.device_list)):
            placeholders.append({
                "source":
                tf.placeholder(tf.int32, [None, None], "source_%d" % i),
                "source_length":
                tf.placeholder(tf.int32, [None], "source_length_%d" % i)
            })

        # A list of outputs
        if params.generate_samples:
            inference_fn = sampling.create_sampling_graph
        else:
            inference_fn = inference.create_inference_graph

        predictions = parallel.data_parallelism(
            params.device_list, lambda f: inference_fn(model_list, f, params),
            placeholders)

        # Create assign ops
        assign_ops = []
        feed_dict = {}

        all_var_list = tf.trainable_variables()

        for i in range(len(args.checkpoints)):
            un_init_var_list = []
            name = model_cls_list[i].get_name()

            for v in all_var_list:
                if v.name.startswith(name + "_%d" % i):
                    un_init_var_list.append(v)

            ops = set_variables(un_init_var_list, model_var_lists[i],
                                name + "_%d" % i, feed_dict)
            assign_ops.extend(ops)

        assign_op = tf.group(*assign_ops)
        init_op = tf.tables_initializer()
        results = []

        tf.get_default_graph().finalize()

        # Create session
        with tf.Session(config=session_config(params)) as sess:
            # Restore variables
            sess.run(assign_op, feed_dict=feed_dict)
            sess.run(init_op)

            while True:
                try:
                    feats = sess.run(features)
                    op, feed_dict = shard_features(feats, placeholders,
                                                   predictions)
                    results.append(sess.run(op, feed_dict=feed_dict))
                    message = "Finished batch %d" % len(results)
                    tf.logging.log(tf.logging.INFO, message)
                except tf.errors.OutOfRangeError:
                    break

        # Convert to plain text
        vocab = params.vocabulary["target"]
        outputs = []
        scores = []

        for result in results:
            for item in result[0]:
                outputs.append(item.tolist())
            for item in result[1]:
                scores.append(item.tolist())

        outputs = list(itertools.chain(*outputs))
        scores = list(itertools.chain(*scores))

        restored_inputs = []
        restored_outputs = []
        restored_scores = []

        for index in range(len(sorted_inputs)):
            restored_inputs.append(sorted_inputs[sorted_keys[index]])
            restored_outputs.append(outputs[sorted_keys[index]])
            restored_scores.append(scores[sorted_keys[index]])

        # Write to file
        with open(args.output, "w") as outfile:
            count = 0
            for outputs, scores in zip(restored_outputs, restored_scores):
                for output, score in zip(outputs, scores):
                    decoded = []
                    for idx in output:
                        if idx == params.mapping["target"][params.eos]:
                            break
                        decoded.append(vocab[idx])

                    decoded = " ".join(decoded)

                    if not args.verbose:
                        outfile.write("%s\n" % decoded)
                        #break
                    else:
                        pattern = "%d ||| %s ||| %s ||| %f\n"
                        source = restored_inputs[count]
                        values = (count, source, decoded, score)
                        outfile.write(pattern % values)

                count += 1
Пример #3
0
def main(args):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Load configs
    model_cls_list = [transformer.Transformer for model in args.models]
    params_list = [default_parameters() for _ in range(len(model_cls_list))]
    params_list = [
        merge_parameters(params, model_cls.get_parameters())
        for params, model_cls in zip(params_list, model_cls_list)
    ]
    params_list = [
        import_params(args.checkpoints[i], args.models[i], params_list[i])
        for i in range(len(args.checkpoints))
    ]
    params_list = [
        override_parameters(params_list[i], args)
        for i in range(len(model_cls_list))
    ]

    # Build Graph
    with tf.Graph().as_default():
        model_var_lists = []

        # Load checkpoints
        for i, checkpoint in enumerate(args.checkpoints):
            tf.logging.info("Loading %s" % checkpoint)
            var_list = tf.train.list_variables(checkpoint)
            values = {}
            reader = tf.train.load_checkpoint(checkpoint)

            for (name, shape) in var_list:
                if not name.startswith(model_cls_list[i].get_name()):
                    continue

                if name.find("losses_avg") >= 0:
                    continue

                tensor = reader.get_tensor(name)
                values[name] = tensor
            model_var_lists.append(values)

        # Build models
        model_fns = []

        for i in range(len(args.checkpoints)):
            name = model_cls_list[i].get_name()
            model = model_cls_list[i](params_list[i], name + "_%d" % i)
            model_fn = model.get_rerank_inference_func()
            model_fns.append(model_fn)

        params = params_list[0]
        # Read input file
        with open(args.input, "r") as encoded_file:
            sorted_keys = cPickle.load(encoded_file)
            decoder_input_list = cPickle.load(encoded_file)
            encoder_output_list = cPickle.load(encoded_file)

        state_placeholders = []
        for i in range(len(params.device_list)):
            decode_state = {
                "encoder":
                tf.placeholder(tf.float32, [None, None, params.hidden_size],
                               "encoder_%d" % i),
                #"encoder_weight": we doesn't need encoder weight
                "source":
                tf.placeholder(tf.int32, [None, None], "source_%d" % i),
                "source_length":
                tf.placeholder(tf.int32, [None], "source_length_%d" % i),
                # [bos_id, ...] => [..., 0]
                "target":
                tf.placeholder(tf.int32, [None, None], "target_%d" % i),
                #"target_length": tf.placeholder(tf.int32, [None, ], "target_length_%d" % i)
            }
            #需要这些值,以进行增量式解码
            for j in range(params.num_decoder_layers):
                decode_state["decoder_layer_%d_key" % j] = tf.placeholder(
                    tf.float32, [None, None, params.hidden_size],
                    "decoder_layer_%d_key_%d" % (j, i))
                decode_state["decoder_layer_%d_value" % j] = tf.placeholder(
                    tf.float32, [None, None, params.hidden_size],
                    "decoder_layer_%d_value_%d" % (j, i))  # layer and GPU
                # we only need the return value of this
                # decode_state["decoder_layer_%d_att_weight" % j] = tf.placeholder(tf.float32, [None, None, None, None],
                #                              # N Head T S  inference的时候,T总是为1,表示1步
                #                              "decoder_layer_%d_att_weight" % j),
            state_placeholders.append(decode_state)

        def decoding_fn(s):
            _decoding_fn = model_fns[0][1]
            #split s to state and feature, and 转换为嵌套的结构,以满足transformer模型
            state = {
                "encoder": s["encoder"],
                "decoder": {
                    "layer_%d" % j: {
                        "key": s["decoder_layer_%d_key" % j],
                        "value": s["decoder_layer_%d_value" % j],
                    }
                    for j in range(params.num_decoder_layers)
                }
            }
            inputs = s["target"]
            #inputs = tf.Print(inputs, [inputs], "before target", 100, 10000)
            feature = {
                "source":
                s["source"],
                "source_length":
                s["source_length"],
                # [bos_id, ...] => [..., 0]
                # "target": tf.pad(inputs[:,1:], [[0, 0], [0, 1]])
                #"target": tf.pad(inputs, [[0, 0], [0, 1]]),  # 前面没有bos_id,因此直接补上0,这是为了和decode_graph中的补bos相配合
                "target":
                inputs,
                "target_length":
                tf.fill([tf.shape(inputs)[0]],
                        tf.shape(inputs)[1])
            }
            #feature["target"] = tf.Print(feature["target"], [feature["target"]], "target", 100,10000)
            ret = _decoding_fn(feature, state, params)
            return ret

        decoder_op = parallel.data_parallelism(params.device_list,
                                               lambda s: decoding_fn(s),
                                               state_placeholders)

        #batch = tf.shape(encoder_output)[0]

        # Create assign ops
        assign_ops = []

        all_var_list = tf.trainable_variables()

        for i in range(len(args.checkpoints)):
            un_init_var_list = []
            name = model_cls_list[i].get_name()

            for v in all_var_list:
                if v.name.startswith(name + "_%d" % i):
                    un_init_var_list.append(v)

            ops = set_variables(un_init_var_list, model_var_lists[i],
                                name + "_%d" % i)
            assign_ops.extend(ops)

        assign_op = tf.group(*assign_ops)

        results = []
        sen_decode_time = []
        grid_hyps = []  #存放每个句子中每个grid中的hyps,以便后期分析和统计
        # Create session
        with tf.Session(config=session_config(params)) as sess:
            # from tensorflow.python import debug as tf_debug
            # sess = tf_debug.LocalCLIDebugWrapperSession(sess,ui_type='curses')#readline

            # Restore variables
            sess.run(assign_op)
            #startpoint=320
            for i, (decode_input, encoder_output) in enumerate(
                    zip(decoder_input_list, encoder_output_list)):
                # if i < startpoint:
                #     continue

                # if i == startpoint:
                #     break
                # print(input["source"])
                # print(input["constraints"])
                #################
                # create constraint translation related model
                # build ensembled TM
                thumt_tm = ThumtTranslationModel(sess, decoder_op,
                                                 encoder_output,
                                                 state_placeholders,
                                                 decode_input, params)

                # Build GBS search
                cons_decoder = create_constrained_decoder(thumt_tm)
                ##################
                max_length = decode_input["source_length"][
                    0] + params.decode_length
                beam_size = params.beam_size
                # top_beams = params.top_beams
                top_beams = 1
                start_time = time.time()
                best_output, search_grid = decode(encoder_output,
                                                  sess,
                                                  decoder_op,
                                                  state_placeholders,
                                                  params,
                                                  cons_decoder,
                                                  thumt_tm,
                                                  decode_input,
                                                  top_beams,
                                                  max_hyp_len=max_length,
                                                  beam_size=beam_size,
                                                  return_alignments=True,
                                                  length_norm=False)
                sen_decode_time.append(time.time() - start_time)
                hyps_num = {k: len(search_grid[k]) for k in search_grid.keys()}
                grid_hyps.append(hyps_num)

                # output_beams = [search_grid[k] for k in search_grid.keys() if k[1] == top_row]
                # output_hyps = [h for beam in output_beams for h in beam]

                # constraints=input_constraints,
                # return_alignments=return_alignments,
                # length_norm=length_norm)
                results.append(best_output)

                message = "Finished decoding sentences index: %d" % (i)
                tf.logging.log(tf.logging.INFO, message)

        # Convert to plain text
        vocab = params.vocabulary["target"]
        outputs = []
        scores = []
        mask_ratio = []
        best_alignment = []

        for result in results:
            sub_result = zip(*result[0])
            outputs.extend(sub_result[0])
            scores.extend(sub_result[1])
            best_alignment.extend(result[1])

            # for sub_result in result:  # 每次解码结果可能有多个bestscore
            #     outputs.append(sub_result[0][0][1:])  # seqs
            #     scores.append(sub_result[0][1])  # score
            #     mask_ratio.append(0)
            #     best_alignment.extend(sub_result[1])
        new_outputs = []
        for s in outputs:
            new_outputs.append(s[1:])
        outputs = new_outputs

        for s, score in zip(outputs, scores):
            s1 = []
            for idx in s:
                if idx == params.mapping["target"][params.eos]:
                    break
                s1.append(vocab[idx])
            s1 = " ".join(s1)
            #print("%s" % s1)
            print("%f   %s" % (score, s1))

        restored_inputs = []
        restored_outputs = []
        restored_scores = []
        restored_constraints = []
        restored_alignment = []
        restored_sen_decode_time = []
        restored_grid_hyps = []
        for index in range(len(sorted_keys)):
            restored_outputs.append(outputs[sorted_keys[index]])
            restored_scores.append(scores[sorted_keys[index]])
            #restored_constraints.append(sorted_constraints[sorted_keys[index]])
            restored_alignment.append(best_alignment[sorted_keys[index]])
            restored_sen_decode_time.append(
                sen_decode_time[sorted_keys[index]])
            restored_grid_hyps.append(grid_hyps[sorted_keys[index]])

        # restored_outputs = outputs
        # restored_scores = scores
        # restored_alignment = best_alignment
        # restored_sen_decode_time = sen_decode_time
        # restored_grid_hyps = grid_hyps

        # Write to file
        with open(args.output, "w") as outfile:
            count = 0
            for output, score, de_time in zip(restored_outputs,
                                              restored_scores,
                                              restored_sen_decode_time):
                decoded = []
                for idx in output:
                    if idx == params.mapping["target"][params.eos]:
                        break
                    decoded.append(vocab[idx])
                decoded = " ".join(decoded)

                if not args.verbose:
                    outfile.write("%s\n" % decoded)
                else:
                    pattern = "%d |%s |%f |%f \n"
                    # cons = restored_constraints[count]
                    # cons_token_num = 0
                    # for cons_item in cons:
                    #     cons_token_num += cons_item["tgt_len"]
                    values = (count, decoded, score, de_time)
                    outfile.write(pattern % values)
                count += 1

        with open(args.output + ".alignment", "w") as outfile:
            count = 0
            for alignment in restored_alignment:
                outfile.write("%d\n" % count)
                cPickle.dump(alignment, outfile)
                count += 1
        #  保存解码时间和grid中的hyps,以便进行分析
        with open(args.output + ".time_hyps", "w") as outfile:
            cPickle.dump(restored_sen_decode_time, outfile)
            cPickle.dump(restored_grid_hyps, outfile)
        with open(args.output + ".time", "w") as outfile:
            time_sen = np.asarray(restored_sen_decode_time)
            ave = np.average(time_sen)
            outfile.write("average time:%f\n" % ave)
            cPickle.dump(restored_sen_decode_time, outfile)
Пример #4
0
def build_graph(params, args, model_list, model_cls_list, model_var_lists, problem=None):
    if problem == "parsing":
        fo = args.parsing_output
        fi = args.parsing_input
    elif problem == "amr":
        fo = args.amr_outpu
        fi = args.amr_input
    else:
        print("problem only in parsing or amr")

    # Read input file
    sorted_keys, sorted_inputs = dataset.sort_input_file(fi)
    # Build input queue
    features = dataset.get_inference_input(sorted_inputs, params)  # only source data
    # Create placeholders
    placeholders = []

    for i in range(len(params.device_list)):
        placeholders.append({
            "source": tf.placeholder(tf.int32, [None, None],
                                     "source_%d" % i),
            "source_length": tf.placeholder(tf.int32, [None],
                                            "source_length_%d" % i)
        })

    # A list of outputs
    if params.generate_samples:
        inference_fn = sampling.create_sampling_graph
    else:
        inference_fn = inference.create_inference_graph

    predictions = parallel.data_parallelism(
        params.device_list, lambda f: inference_fn(model_list, f, params, problem=problem),
        placeholders)

    # Create assign ops
    assign_ops = []
    feed_dict = {}

    all_var_list = tf.trainable_variables()

    for i in range(len(args.checkpoints)):
        un_init_var_list = []
        name = model_cls_list[i].get_name()

        for v in all_var_list:
            if v.name.startswith(name + "_%d" % i):
                un_init_var_list.append(v)

        ops = set_variables(un_init_var_list, model_var_lists[i],
                            name + "_%d" % i, feed_dict)
        assign_ops.extend(ops)

    assign_op = tf.group(*assign_ops)
    init_op = tf.tables_initializer()

    results = []

    tf.get_default_graph().finalize()

    # Create session
    with tf.Session(config=session_config(params)) as sess:
        # Restore variables
        sess.run(assign_op, feed_dict=feed_dict)
        sess.run(init_op)

        while True:
            try:
                feats = sess.run(features)
                op, feed_dict = shard_features(feats, placeholders,
                                               predictions)
                results.append(sess.run(op, feed_dict=feed_dict))
                message = "Finished %s batch %d" % (len(results), problem)
                tf.logging.log(tf.logging.INFO, message)
            except tf.errors.OutOfRangeError:
                break

    # Convert to plain text
    vocab = params.vocabulary[problem+"_target"]
    outputs = []
    scores = []

    for result in results:
        for item in result[0]:
            outputs.append(item.tolist())
        for item in result[1]:
            scores.append(item.tolist())

    outputs = list(itertools.chain(*outputs))
    scores = list(itertools.chain(*scores))

    restored_inputs = []
    restored_outputs = []
    restored_scores = []

    for index in range(len(sorted_inputs)):
        restored_inputs.append(sorted_inputs[sorted_keys[index]])
        restored_outputs.append(outputs[sorted_keys[index]])
        restored_scores.append(scores[sorted_keys[index]])

    # Write to file

    with open(fo, "w") as outfile:
        count = 0
        for outputs, scores in zip(restored_outputs, restored_scores):
            for output, score in zip(outputs, scores):
                decoded = []
                for idx in output:
                    if idx == params.mapping["target"][params.eos]:
                        break
                    decoded.append(vocab[idx])

                decoded = " ".join(decoded)

                if not args.verbose:
                    outfile.write("%s\n" % decoded)
                    break
                else:
                    pattern = "%d ||| %s ||| %s ||| %f\n"
                    source = restored_inputs[count]
                    values = (count, source, decoded, score)
                    outfile.write(pattern % values)
            count += 1
Пример #5
0
def main(args):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Load configs
    model_cls_list = [transformer.Transformer for model in args.models]
    params_list = [default_parameters() for _ in range(len(model_cls_list))]
    params_list = [
        merge_parameters(params, model_cls.get_parameters())
        for params, model_cls in zip(params_list, model_cls_list)
    ]
    params_list = [
        import_params(args.checkpoints[i], args.models[i], params_list[i])
        for i in range(len(args.checkpoints))
    ]
    params_list = [
        override_parameters(params_list[i], args)
        for i in range(len(model_cls_list))
    ]

    # Build Graph
    with tf.Graph().as_default():
        model_var_lists = []

        # Load checkpoints
        for i, checkpoint in enumerate(args.checkpoints):
            tf.logging.info("Loading %s" % checkpoint)
            var_list = tf.train.list_variables(checkpoint)
            values = {}
            reader = tf.train.load_checkpoint(checkpoint)

            for (name, shape) in var_list:
                if not name.startswith(model_cls_list[i].get_name()):
                    continue

                if name.find("losses_avg") >= 0:
                    continue

                tensor = reader.get_tensor(name)
                values[name] = tensor
            model_var_lists.append(values)

        # Build models
        model_fns = []

        for i in range(len(args.checkpoints)):
            name = model_cls_list[i].get_name()
            model = model_cls_list[i](params_list[i], name + "_%d" % i)
            model_fn = model.get_rerank_inference_func()
            model_fns.append(model_fn)

        params = params_list[0]
        # Read input file
        sorted_keys, sorted_inputs, sorted_constraints = \
            src_cons_dataset.sort_input_src_cons(args.input, args.constraints)

        # Build input queue
        features = src_cons_dataset.get_input_with_src_constraints(
            sorted_inputs, sorted_constraints, params)

        print(sorted_keys)

        #Create placeholder
        placeholders = []
        for i in range(len(params.device_list)):
            placeholders.append({
                "source":
                tf.placeholder(tf.int32, [None, None], "source_%d" % i),
                "source_length":
                tf.placeholder(tf.int32, [None], "source_length_%d" % i),
                "constraints_src_pos":
                tf.placeholder(tf.int32, [None, None, None],
                               "constraints_src_pos_%d" % i),
                "constraints":
                tf.placeholder(tf.int32, [None, None, None],
                               "constraints_%d" % i),
                "constraints_len":
                tf.placeholder(tf.int32, [None, None],
                               "constraints_len_%d" % i)
            })
        encoding_fn = model_fns[0][0]

        encoder_op = parallel.data_parallelism(
            params.device_list, lambda f: encoding_fn(f, params), placeholders)

        # Create assign ops
        assign_ops = []

        all_var_list = tf.trainable_variables()

        for i in range(len(args.checkpoints)):
            un_init_var_list = []
            name = model_cls_list[i].get_name()

            for v in all_var_list:
                if v.name.startswith(name + "_%d" % i):
                    un_init_var_list.append(v)

            ops = set_variables(un_init_var_list, model_var_lists[i],
                                name + "_%d" % i)
            assign_ops.extend(ops)

        assign_op = tf.group(*assign_ops)
        results = []

        # Create session
        with tf.Session(config=session_config(params)) as sess:

            # Restore variables
            sess.run(assign_op)
            sess.run(tf.tables_initializer())
            decoder_input_list = []
            encoder_output_list = []
            while True:
                try:
                    feats = sess.run(features)
                    encoder_op, feed_dict = shard_features(
                        feats, placeholders, encoder_op)
                    #print("encoding %d" % i)
                    encoder_state = sess.run(encoder_op, feed_dict=feed_dict)

                    for j in range(len(feats["source"])):
                        decoder_input_item = {
                            "source": [feats["source"][j]],
                            "source_length": [feats["source_length"][j]],
                            "constraints_src_pos":
                            feats["constraints_src_pos"][j],
                            "constraints": feats["constraints"][j],
                            "constraints_len": feats["constraints_len"][j],
                        }
                        decoder_input_list.append(decoder_input_item)
                    # 不能简单的用GPU数量来循环,要用实际的输出来循环,因为有时候会空出GPU,比如最后一句或几句,无法凑够给1个GPU
                    for i in range(len(encoder_state[0])):
                        state_len = len(encoder_state[0][i])
                        for j in range(state_len):
                            encoder_output_item = {
                                "encoder": encoder_state[0][i][j:j + 1],
                                "encoder_weight": encoder_state[1][i][j:j + 1]
                            }
                            encoder_output_list.append(encoder_output_item)
                            # if  np.shape(encoder_output_item['encoder'])[1] != decoder_input_list[i]["source_length"]
                    #for input, encoder_output in zip(decoder_input_list, encoder_output_list):

                    message = "Finish encoding sentences: %d" % len(
                        decoder_input_list)
                    tf.logging.log(tf.logging.INFO, message)
                except tf.errors.OutOfRangeError:
                    break

        # vocab = params.vocabulary["source"]
        # for decoder_input, encoder_output in zip(decoder_input_list, encoder_output_list):
        #     #print(decoder_input["source_length"][0], np.shape(encoder_output['encoder'])[1])
        #     sen = []
        #     for idx in decoder_input["source"][0]:
        #         if idx == params.mapping["source"][params.eos]:
        #             break
        #         sen.append(vocab[idx])
        #     s1 = " ".join(sen)
        #     print(s1)

        # print(encoder_result.shape)
        # for i in range(encoder_result.shape[0]):
        #     print('[')
        #     for j in range(encoder_result.shape[1]):
        #         print('[')
        #         for k in range(encoder_result.shape[2]):
        #             print("%f" % encoder_result[i][j][k])
        #         print(']')
        #     print(']')

        with open(args.output, "w") as outfile:
            cPickle.dump(sorted_keys, outfile)
            cPickle.dump(decoder_input_list, outfile)
            cPickle.dump(encoder_output_list, outfile)
Пример #6
0
def main(args):
    tf.logging.set_verbosity(tf.logging.INFO)
    model_cls_list = [models.get_model(model) for model in args.models]
    params_list = [default_parameters() for _ in range(len(model_cls_list))]
    params_list = [
        merge_parameters(params, model_cls.get_parameters())
        for params, model_cls in zip(params_list, model_cls_list)
    ]
    params_list = [
        import_params(args.checkpoints[i], args.models[i], params_list[i])
        for i in range(len(args.checkpoints))
    ]
    params_list = [
        override_parameters(params_list[i], args)
        for i in range(len(model_cls_list))
    ]

    with tf.Graph().as_default():
        model_var_lists = []

        for i, checkpoint in enumerate(args.checkpoints):
            tf.logging.info("Loading %s" % checkpoint)
            var_list = tf.train.list_variables(checkpoint)
            values = {}
            reader = tf.train.load_checkpoint(checkpoint)

            for (name, shape) in var_list:
                if not name.startswith(model_cls_list[i].get_name()):
                    continue

                if name.find("losses_avg") >= 0:
                    continue

                tensor = reader.get_tensor(name)
                values[name] = tensor

            model_var_lists.append(values)

        model_list = []

        for i in range(len(args.checkpoints)):
            name = model_cls_list[i].get_name()
            model = model_cls_list[i](params_list[i], name + "_%d" % i)
            model_list.append(model)

        params = params_list[0]
        params.initializer_gain = 1.0

        sorted_keys, sorted_inputs = dataset.read_eval_input_file(args.input)

        features = dataset.get_predict_input(sorted_inputs, params)

        placeholders = []

        for i in range(len(params.device_list)):
            placeholders.append({
                "text":
                tf.placeholder(tf.int32, [None, None], "text_%d" % i),
                "text_length":
                tf.placeholder(tf.int32, [None], "text_length_%d" % i),
                "aspect":
                tf.placeholder(tf.int32, [None, None], "aspect_%d" % i),
                "aspect_length":
                tf.placeholder(tf.int32, [None], "aspect_length_%d" % i),
                "polarity":
                tf.placeholder(tf.int32, [None, None], "polarity_%d" % i)
            })

        predict_fn = inference.create_predict_graph

        predictions = parallel.data_parallelism(
            params.device_list, lambda f: predict_fn(model_list, f, params),
            placeholders)

        assign_ops = []
        feed_dict = {}

        all_var_list = tf.trainable_variables()

        for i in range(len(args.checkpoints)):
            un_init_var_list = []
            name = model_cls_list[i].get_name()

            for v in all_var_list:
                if v.name.startswith(name + "_%d" % i):
                    un_init_var_list.append(v)

            ops = set_variables(un_init_var_list, model_var_lists[i],
                                name + "_%d" % i, feed_dict)
            assign_ops.extend(ops)

        assign_op = tf.group(*assign_ops)
        init_op = tf.tables_initializer()
        results = []

        with tf.Session(config=session_config(params)) as sess:
            sess.run(assign_op, feed_dict=feed_dict)
            sess.run(init_op)

            while True:
                try:
                    feats = sess.run(features)
                    op, feed_dict = shard_features(feats, placeholders,
                                                   predictions)
                    results.append(sess.run(op, feed_dict=feed_dict))
                    message = "Finished batch %d" % len(results)
                    tf.logging.log(tf.logging.INFO, message)
                except tf.errors.OutOfRangeError:
                    break

        input_features = []
        scores1 = []
        scores2 = []
        output_alphas = []
        for result in results:
            for item in result[0]:
                input_features.append(item.tolist())
            for item in result[1]:
                scores1.append(item.tolist())
            for item in result[2]:
                scores2.append(item.tolist())
            for item in result[3]:
                output_alphas.append(item.tolist())

        scores1 = list(itertools.chain(*scores1))
        scores2 = list(itertools.chain(*scores2))
        output_alphas = list(itertools.chain(*output_alphas))

        restored_scores1 = []
        restored_scores2 = []
        restored_output_alphas = []
        restored_inputs_text = []
        restored_inputs_aspect = []
        restored_inputs_score = []

        for index in range(len(sorted_inputs[0])):
            restored_scores1.append(scores1[sorted_keys[index]][0])
            restored_scores2.append(scores2[sorted_keys[index]])
            restored_output_alphas.append(output_alphas[sorted_keys[index]])

            restored_inputs_text.append(sorted_inputs[0][sorted_keys[index]])
            restored_inputs_aspect.append(sorted_inputs[1][sorted_keys[index]])
            restored_inputs_score.append(sorted_inputs[2][sorted_keys[index]])

        class3_bad_TP = 0.0
        class3_bad_FP = 0.0
        class3_bad_FN = 0.0

        class3_mid_TP = 0.0
        class3_mid_FP = 0.0
        class3_mid_FN = 0.0

        class3_good_TP = 0.0
        class3_good_FP = 0.0
        class3_good_FN = 0.0

        with open(args.output, "w") as outfile:

            for score1, score2, score3, alphas, text, aspect in zip(
                    restored_scores1, restored_scores2, restored_inputs_score,
                    restored_output_alphas, restored_inputs_text,
                    restored_inputs_aspect):
                score1 = str(score1)
                outfile.write("###########################\n")
                pattern = "%s|||%f,%f,%f|||%s\n"
                values = (score1, score2[0], score2[1], score2[2], score3)
                outfile.write(pattern % values)
                outfile.write(aspect + "\n")
                for (word, alpha) in zip(text.split(), alphas):
                    outfile.write(word + " " + str(alpha) + "\t")
                outfile.write("\n")

                if score1 == '0' and score3 == '0':
                    class3_bad_TP += 1.0
                if score1 == '1' and score3 == '1':
                    class3_mid_TP += 1.0
                if score1 == '2' and score3 == '2':
                    class3_good_TP += 1.0

                if score1 == '0' and score3 != '0':
                    class3_bad_FP += 1.0
                if score1 == '1' and score3 != '1':
                    class3_mid_FP += 1.0
                if score1 == '2' and score3 != '2':
                    class3_good_FP += 1.0

                if score1 != '0' and score3 == '0':
                    class3_bad_FN += 1.0
                if score1 != '1' and score3 == '1':
                    class3_mid_FN += 1.0
                if score1 != '2' and score3 == '2':
                    class3_good_FN += 1.0

            outfile.write("\n")
            outfile.write("Class 3:\n")
            outfile.write("Confusion Matrix:\n")
            outfile.write("\t" + "{name: >10s}".format(name="positive") +
                          "\t" + "{name: >10s}".format(name="neural") + "\t" +
                          "{name: >10s}".format(name="negative") + "\n")
            outfile.write("TP\t" + int2int(class3_bad_TP) + "\t" +
                          int2int(class3_mid_TP) + "\t" +
                          int2int(class3_good_TP) + "\n")
            outfile.write("FP\t" + int2int(class3_bad_FP) + "\t" +
                          int2int(class3_mid_FP) + "\t" +
                          int2int(class3_good_FP) + "\n")
            outfile.write("FN\t" + int2int(class3_bad_FN) + "\t" +
                          int2int(class3_mid_FN) + "\t" +
                          int2int(class3_good_FN) + "\n")
            outfile.write(
                "P\t" + float2int(class3_bad_TP /
                                  (class3_bad_TP + class3_bad_FP + 0.000001)) +
                "\t" + float2int(class3_mid_TP /
                                 (class3_mid_TP + class3_mid_FP + 0.000001)) +
                "\t" +
                float2int(class3_good_TP /
                          (class3_good_TP + class3_good_FP + 0.000001)) + "\n")
            outfile.write(
                "R\t" + float2int(class3_bad_TP /
                                  (class3_bad_TP + class3_bad_FN + 0.000001)) +
                "\t" + float2int(class3_mid_TP /
                                 (class3_mid_TP + class3_mid_FN + 0.000001)) +
                "\t" +
                float2int(class3_good_TP /
                          (class3_good_TP + class3_good_FN + 0.000001)) + "\n")
            outfile.write("F1\t" +
                          float2int(class3_bad_TP * 2 /
                                    (class3_bad_TP * 2 + class3_bad_FP +
                                     class3_bad_FN + 0.000001)) + "\t" +
                          float2int(class3_mid_TP * 2 /
                                    (class3_mid_TP * 2 + class3_mid_FP +
                                     class3_mid_FN + 0.000001)) + "\t" +
                          float2int(class3_good_TP * 2 /
                                    (class3_good_TP * 2 + class3_good_FP +
                                     class3_good_FN + 0.000001)) + "\n")
            outfile.write("F1-Micro:\t" + float2int(
                (class3_bad_TP + class3_mid_TP + class3_good_TP) * 2 /
                ((class3_bad_TP + class3_mid_TP + class3_good_TP) * 2 +
                 (class3_bad_FP + class3_mid_FP + class3_good_FP) +
                 (class3_bad_FN + class3_mid_FN + class3_good_FN) +
                 0.000001)) + "\n")
            outfile.write("F1-Macro:\t" + float2int(
                (class3_bad_TP * 2 /
                 (class3_bad_TP * 2 + class3_bad_FP + class3_bad_FN +
                  0.000001) + class3_mid_TP * 2 /
                 (class3_mid_TP * 2 + class3_mid_FP + class3_mid_FN +
                  0.000001) + class3_good_TP * 2 /
                 (class3_good_TP * 2 + class3_good_FP + class3_good_FN +
                  0.000001)) / 3.0) + "\n")
def main(args):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Load configs
    model_cls_list = [models.get_model(model) for model in args.models]
    params_list = [default_parameters() for _ in range(len(model_cls_list))]
    params_list = [
        merge_parameters(params, model_cls.get_parameters())
        for params, model_cls in zip(params_list, model_cls_list)
    ]
    params_list = [
        import_params(args.checkpoints[i], args.models[i], params_list[i])
        for i in range(len(args.checkpoints))
    ]
    params_list = [
        override_parameters(params_list[i], args)
        for i in range(len(model_cls_list))
    ]

    # Build Graph
    with tf.Graph().as_default():
        model_var_lists = []

        # Load checkpoints
        for i, checkpoint in enumerate(args.checkpoints):
            tf.logging.info("Loading %s" % checkpoint)
            var_list = tf.train.list_variables(checkpoint)
            values = {}
            reader = tf.train.load_checkpoint(checkpoint)

            for (name, shape) in var_list:
                if not name.startswith(model_cls_list[i].get_name()):
                    continue

                if name.find("losses_avg") >= 0:
                    continue

                tensor = reader.get_tensor(name)
                values[name] = tensor

            model_var_lists.append(values)

        # Build models
        model_list = []

        for i in range(len(args.checkpoints)):
            name = model_cls_list[i].get_name()
            model = model_cls_list[i](params_list[i], name + "_%d" % i)
            model_list.append(model)

        params = params_list[0]
        # Read input file
        sorted_keys, sorted_inputs = dataset.sort_input_file(args.input)
        # Build input queue
        features = dataset.get_inference_input(sorted_inputs, params)
        # Create placeholders
        placeholders = []

        for i in range(len(params.device_list)):
            placeholders.append({
                "source":
                tf.placeholder(tf.int32, [None, None], "source_%d" % i),
                "source_length":
                tf.placeholder(tf.int32, [None], "source_length_%d" % i)
            })

        # A list of outputs
        if params.generate_samples:
            inference_fn = sampling.create_sampling_graph
        else:
            inference_fn = inference.create_inference_graph

        predictions = parallel.data_parallelism(
            params.device_list, lambda f: inference_fn(model_list, f, params),
            placeholders)

        # Create assign ops
        assign_ops = []
        feed_dict = {}

        all_var_list = tf.trainable_variables()

        for i in range(len(args.checkpoints)):
            un_init_var_list = []
            name = model_cls_list[i].get_name()

            for v in all_var_list:
                if v.name.startswith(name + "_%d" % i):
                    un_init_var_list.append(v)

            ops = set_variables(un_init_var_list, model_var_lists[i],
                                name + "_%d" % i, feed_dict)
            assign_ops.extend(ops)

        assign_op = tf.group(*assign_ops)
        init_op = tf.tables_initializer()
        results = []

        tf.get_default_graph().finalize()

        tf.logging.info(args.models[0])
        if args.models[0] == 'transformer_raw_t5':
            t5_list = []
            for var in tf.trainable_variables():
                if 'en_t5_bias_mat' in var.name or 'de_self_relative_attention_bias' in var.name:
                    t5_list.append(var)
                    tf.logging.info(var)

            for op in tf.get_default_graph().get_operations():
                if 'encoder_t5_bias' in op.name or 'decoder_t5_bias' in op.name:
                    if 'random' in op.name or 'read' in op.name or 'Assign' in op.name or 'placeholder' in op.name:
                        continue
                    t5_list.append(op.values()[0])
                    tf.logging.info(op.values()[0].name)
        elif args.models[0] == 'transformer_raw_soft_t5':
            soft_t5_bias_list = []
            for op in tf.get_default_graph().get_operations():
                if 'soft_t5_bias' in op.name or 'soft_t5_encoder' in op.name or 'soft_t5_decoder' in op.name:
                    if 'random' in op.name or 'read' in op.name or 'Assign' in op.name or 'placeholder' in op.name or 'decoder' in op.name:
                        continue
                    soft_t5_bias_list.append(op.values()[0])
                    tf.logging.info(op.values()[0].name)

        # Create session
        with tf.Session(config=session_config(params)) as sess:
            # Restore variables
            sess.run(assign_op, feed_dict=feed_dict)
            sess.run(init_op)

            while True:
                try:
                    feats = sess.run(features)
                    op, feed_dict = shard_features(feats, placeholders,
                                                   predictions)
                    results.append(sess.run(op, feed_dict=feed_dict))
                    '''
                    if args.models[0] == 'transformer_raw_t5':
                        var_en_bucket=tf.get_default_graph().get_tensor_by_name(t5_list[0].name)
                        var_de_bucket=tf.get_default_graph().get_tensor_by_name(t5_list[1].name)
                        
                        var_en_bias=tf.get_default_graph().get_tensor_by_name(t5_list[2].name)
                        
                        en_bucket,de_bucket,en_t5_bias = sess.run([var_en_bucket,
                                                                   var_de_bucket,
                                                                   var_en_bias],
                                              feed_dict=feed_dict)
                        
                        ret_param = {'en_bucket':en_bucket,'de_bucket':en_bucket,
                                     'en_t5_bias':en_t5_bias}
                        pickle.dump(ret_param,open(args.checkpoints[0]+'/'+'t5_bias.pkl','wb'))
                        tf.logging.info('store the t5 bias')
                    elif args.models[0] == 'transformer_raw_soft_t5':
                        var_en_alpha=tf.get_default_graph().get_tensor_by_name(soft_t5_bias_list[0].name)
                        var_en_beta=tf.get_default_graph().get_tensor_by_name(soft_t5_bias_list[1].name)
                        var_en_t5_bias=tf.get_default_graph().get_tensor_by_name(soft_t5_bias_list[2].name)
                        en_alpha,en_beta,en_t5_bias = sess.run([var_en_alpha,var_en_beta,var_en_t5_bias], feed_dict=feed_dict)
                    
                        ret_param = {'en_t5_bias':en_t5_bias,'en_alpha':en_alpha,
                              'en_beta':en_beta}
                        pickle.dump(ret_param,open(args.checkpoints[0]+'/'+'soft_t5_bias.pkl','wb'))
                        tf.logging.info('store the soft-t5 bias')
                        '''
                    message = "Finished batch %d" % len(results)
                    tf.logging.log(tf.logging.INFO, message)
                except tf.errors.OutOfRangeError:
                    break

        # Convert to plain text
        vocab = params.vocabulary["target"]
        outputs = []
        scores = []

        for result in results:
            for shard in result:
                for item in shard[0]:
                    outputs.append(item.tolist())
                for item in shard[1]:
                    scores.append(item.tolist())

        restored_inputs = []
        restored_outputs = []
        restored_scores = []

        for index in range(len(sorted_inputs)):
            restored_inputs.append(sorted_inputs[sorted_keys[index]])
            restored_outputs.append(outputs[sorted_keys[index]])
            restored_scores.append(scores[sorted_keys[index]])

        # Write to file
        if sys.version_info.major == 2:
            outfile = open(args.output, "w")
        elif sys.version_info.major == 3:
            outfile = open(args.output, "w", encoding="utf-8")
        else:
            raise ValueError("Unkown python running environment!")

        count = 0
        for outputs, scores in zip(restored_outputs, restored_scores):
            for output, score in zip(outputs, scores):
                decoded = []
                for idx in output:
                    if idx == params.mapping["target"][params.eos]:
                        break
                    decoded.append(vocab[idx])

                decoded = " ".join(decoded)

                if not args.verbose:
                    outfile.write("%s\n" % decoded)
                else:
                    pattern = "%d ||| %s ||| %s ||| %f\n"
                    source = restored_inputs[count]
                    values = (count, source, decoded, score)
                    outfile.write(pattern % values)

            count += 1
        outfile.close()
Пример #8
0
        params = params_list[0]

        # Create placeholders
        placeholders = []
        for i in range(len(params.device_list)):
            placeholders.append({
                "source": tf.placeholder(tf.int32, [None, None],
                                         "source_%d" % i),
                "source_length": tf.placeholder(tf.int32, [None],
                                                "source_length_%d" % i)
            })

        # Create parallel predictions
        inference_fn = inference.create_inference_graph
        predictions = parallel.data_parallelism(
            params.device_list, lambda f: inference_fn(model_list, f, params),
            placeholders)

        # Create assign ops
        assign_ops = []

        all_var_list = tf.trainable_variables()

        for i in range(len(args.checkpoints)):
            un_init_var_list = []
            name = model_cls_list[i].get_name()
            print("1")
            for v in all_var_list:
                if v.name.startswith(name + "_%d" % i):
                    un_init_var_list.append(v)
            print("2")
Пример #9
0
def main(args):
    eval_steps = args.eval_steps
    tf.logging.set_verbosity(tf.logging.DEBUG)
    # Load configs
    model_cls_list = [models.get_model(model) for model in args.models]
    params_list = [default_parameters() for _ in range(len(model_cls_list))]
    params_list = [
        merge_parameters(params, model_cls.get_parameters())
        for params, model_cls in zip(params_list, model_cls_list)
    ]
    params_list = [
        import_params(args.checkpoints[i], args.models[i], params_list[i])
        for i in range(len(args.checkpoints))
    ]
    params_list = [
        override_parameters(params_list[i], args)
        for i in range(len(model_cls_list))
    ]

    # Build Graph
    with tf.Graph().as_default():
        model_var_lists = []

        # Load checkpoints
        for i, checkpoint in enumerate(args.checkpoints):
            tf.logging.info("Loading %s" % checkpoint)
            var_list = tf.train.list_variables(checkpoint)
            values = {}
            reader = tf.train.load_checkpoint(checkpoint)

            for (name, shape) in var_list:
                if not name.startswith(model_cls_list[i].get_name()):
                    continue

                if name.find("losses_avg") >= 0:
                    continue

                tensor = reader.get_tensor(name)
                values[name] = tensor

            model_var_lists.append(values)

        # Build models
        model_fns = []

        for i in range(len(args.checkpoints)):
            name = model_cls_list[i].get_name()
            model = model_cls_list[i](params_list[i], name + "_%d" % i)
            model_fn = model.get_inference_func()
            model_fns.append(model_fn)

        params = params_list[0]
        # Read input file
        #features = dataset.get_inference_input(args.input, params)
        #features_eval = dataset.get_inference_input(args.eval, params)
        #features_test = dataset.get_inference_input(args.test, params)

        features_train = dataset.get_inference_input(args.input, params, False,
                                                     True)
        features_eval = dataset.get_inference_input(args.eval, params, True,
                                                    False)
        features_test = dataset.get_inference_input(args.test, params, True,
                                                    False)

        # Create placeholders
        placeholders = []

        for i in range(len(params.device_list)):
            placeholders.append({
                "source":
                tf.placeholder(tf.int32, [None, None], "source_%d" % i),
                "source_length":
                tf.placeholder(tf.int32, [None], "source_length_%d" % i),
                "target":
                tf.placeholder(tf.int32, [None, 2], "target_%d" % i)
            })

        # A list of outputs
        predictions = parallel.data_parallelism(
            params.device_list,
            lambda f: inference.create_inference_graph(model_fns, f, params),
            placeholders)

        # Create assign ops
        assign_ops = []

        all_var_list = tf.trainable_variables()

        for i in range(len(args.checkpoints)):
            un_init_var_list = []
            name = model_cls_list[i].get_name()

            for v in all_var_list:
                if v.name.startswith(name + "_%d" % i):
                    un_init_var_list.append(v)

            ops = set_variables(un_init_var_list, model_var_lists[i],
                                name + "_%d" % i)
            assign_ops.extend(ops)

        assign_op = tf.group(*assign_ops)
        results = []

        tf_x = tf.placeholder(tf.float32, [None, None, 512])
        tf_y = tf.placeholder(tf.int32, [None, 2])
        tf_x_len = tf.placeholder(tf.int32, [None])

        src_mask = -1e9 * (1.0 - tf.sequence_mask(
            tf_x_len, maxlen=tf.shape(predictions[0])[1], dtype=tf.float32))
        with tf.variable_scope("my_metric"):
            #q,k,v = tf.split(linear(tf_x, 3*512, True, True, scope="logit_transform"), [512, 512,512],axis=-1)
            q, k, v = tf.split(nn.linear(predictions[0],
                                         3 * 512,
                                         True,
                                         True,
                                         scope="logit_transform"),
                               [512, 512, 512],
                               axis=-1)
            q = nn.linear(
                tf.nn.tanh(q), 1, True, True,
                scope="logit_transform2")[:, :, 0] + src_mask
            # label smoothing
            ce1 = nn.smoothed_softmax_cross_entropy_with_logits(
                logits=q,
                labels=tf_y[:, :1],
                #smoothing=params.label_smoothing,
                smoothing=False,
                normalize=True)
            w1 = tf.nn.softmax(q)[:, None, :]
            #k = nn.linear(tf.nn.tanh(tf.matmul(w1,v)+k),1,True,True,scope="logit_transform3")[:,:,0]+src_mask
            k = tf.matmul(k,
                          tf.matmul(w1, v) *
                          (512**-0.5), False, True)[:, :, 0] + src_mask
            # label smoothing
            ce2 = nn.smoothed_softmax_cross_entropy_with_logits(
                logits=k,
                labels=tf_y[:, 1:],
                #smoothing=params.label_smoothing,
                smoothing=False,
                normalize=True)
            w2 = tf.nn.softmax(k)[:, None, :]
            weights = tf.concat([w1, w2], axis=1)
        loss = tf.reduce_mean(ce1 + ce2)

        #tf_x = tf.placeholder(tf.float32, [None, 512])
        #tf_y = tf.placeholder(tf.int32, [None])

        #l1 = tf.layers.dense(tf.squeeze(predictions[0], axis=-2), 64, tf.nn.sigmoid)
        #output = tf.layers.dense(l1, int(args.softmax_size))

        #loss = tf.losses.sparse_softmax_cross_entropy(labels=tf_y, logits=output)
        o1 = tf.argmax(w1, axis=-1)
        o2 = tf.argmax(w2, axis=-1)
        a1, a1_update = tf.metrics.accuracy(labels=tf.squeeze(tf_y[:, 0]),
                                            predictions=tf.argmax(w1, axis=-1),
                                            name='a1')
        a2, a2_update = tf.metrics.accuracy(labels=tf.squeeze(tf_y[:, 1]),
                                            predictions=tf.argmax(w2, axis=-1),
                                            name='a2')
        accuracy, accuracy_update = tf.metrics.accuracy(
            labels=tf.squeeze(tf_y),
            predictions=tf.argmax(weights, axis=-1),
            name='a_all')

        running_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                         scope="my_metric")
        #running_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="my_metric")
        running_vars_initializer = tf.variables_initializer(
            var_list=running_vars)

        #variables_to_train = tf.trainable_variables()
        #print (len(variables_to_train), (variables_to_train[0]), variables_to_train[1])
        #variables_to_train.remove(variables_to_train[0])
        #variables_to_train.remove(variables_to_train[0])
        #print (len(variables_to_train))
        variables_to_train = [
            v for v in tf.trainable_variables()
            if v.name.startswith("my_metric")
        ]

        optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(loss, var_list=variables_to_train)
        #train_op = optimizer.minimize(loss, var_list=running_vars)

        # Create session
        with tf.Session(config=session_config(params)) as sess:
            init_op = tf.group(tf.global_variables_initializer(),
                               tf.local_variables_initializer())
            sess.run(init_op)
            # Restore variables
            sess.run(assign_op)
            sess.run(tf.tables_initializer())

            current_step = 0

            best_validate_acc = 0
            last_test_acc = 0

            train_x_set = []
            train_y_set = []
            valid_x_set = []
            valid_y_set = []
            test_x_set = []
            test_y_set = []
            train_x_len_set = []
            valid_x_len_set = []
            test_x_len_set = []

            while current_step < eval_steps:
                print('=======current step ' + str(current_step))
                batch_num = 0
                while True:
                    try:
                        feats = sess.run(features_train)
                        op, feed_dict = shard_features(feats, placeholders,
                                                       predictions)
                        #x = (np.squeeze(sess.run(predictions, feed_dict=feed_dict), axis = -2))
                        y = feed_dict.values()[2]
                        x_len = feed_dict.values()[1]

                        feed_dict.update({tf_y: y})
                        feed_dict.update({tf_x_len: x_len})

                        los, __, pred = sess.run([loss, train_op, weights],
                                                 feed_dict=feed_dict)
                        print("current_step", current_step, "batch_num",
                              batch_num, "loss", los)

                        batch_num += 1
                        if batch_num % 100 == 0:

                            # eval
                            b_total = 0
                            a_total = 0
                            a1_total = 0
                            a2_total = 0
                            validate_acc = 0
                            batch_num_eval = 0

                            while True:
                                try:
                                    feats_eval = sess.run(features_eval)
                                    op, feed_dict_eval = shard_features(
                                        feats_eval, placeholders, predictions)
                                    #x = (np.squeeze(sess.run(predictions, feed_dict=feed_dict), axis = -2))
                                    y = feed_dict_eval.values()[2]
                                    x_len = feed_dict_eval.values()[1]
                                    feed_dict_eval.update({tf_y: y})
                                    feed_dict_eval.update({tf_x_len: x_len})

                                    sess.run(running_vars_initializer)
                                    acc = 0
                                    #acc, pred = sess.run([accuracy, output], feed_dict = {tf_x : x, tf_y : y})
                                    sess.run([
                                        a1_update, a2_update, accuracy_update,
                                        weights
                                    ],
                                             feed_dict=feed_dict_eval)
                                    acc1, acc2, acc = sess.run(
                                        [a1, a2, accuracy])
                                    batch_size = len(y)
                                    #print(acc)
                                    a1_total += round(batch_size * acc1)
                                    a2_total += round(batch_size * acc2)
                                    a_total += round(batch_size * acc)
                                    b_total += batch_size
                                    batch_num_eval += 1

                                    if batch_num_eval == 20:
                                        break

                                except tf.errors.OutOfRangeError:
                                    print("eval out of range")
                                    break
                            if b_total:
                                validate_acc = a_total / b_total
                                print("eval acc : " + str(validate_acc) +
                                      "( " + str(a1_total / b_total) + ", " +
                                      str(a2_total / b_total) + " )")
                            print("last test acc : " + str(last_test_acc))

                            if validate_acc > best_validate_acc:
                                best_validate_acc = validate_acc

                            # test
                            b_total = 0
                            a1_total = 0
                            a2_total = 0
                            a_total = 0
                            batch_num_test = 0
                            with open(args.output, "w") as outfile:
                                while True:
                                    try:
                                        feats_test = sess.run(features_test)
                                        op, feed_dict_test = shard_features(
                                            feats_test, placeholders,
                                            predictions)

                                        #x = (np.squeeze(sess.run(predictions, feed_dict=feed_dict), axis = -2))
                                        y = feed_dict_test.values()[2]
                                        x_len = feed_dict_test.values()[1]
                                        feed_dict_test.update({tf_y: y})
                                        feed_dict_test.update(
                                            {tf_x_len: x_len})

                                        sess.run(running_vars_initializer)
                                        acc = 0
                                        #acc, pred = sess.run([accuracy, output], feed_dict = {tf_x : x, tf_y : y})
                                        __, __, __, out1, out2 = sess.run(
                                            [
                                                a1_update, a2_update,
                                                accuracy_update, o1, o2
                                            ],
                                            feed_dict=feed_dict_test)
                                        acc1, acc2, acc = sess.run(
                                            [a1, a2, accuracy])

                                        batch_size = len(y)
                                        a_total += round(batch_size * acc)
                                        a1_total += round(batch_size * acc1)
                                        a2_total += round(batch_size * acc2)
                                        b_total += batch_size
                                        batch_num_test += 1
                                        for pred1, pred2 in zip(out1, out2):
                                            outfile.write("%s " % pred1[0])
                                            outfile.write("%s\n" % pred2[0])
                                        if batch_num_test == 20:
                                            break
                                    except tf.errors.OutOfRangeError:
                                        print("test out of range")
                                        break
                                if b_total:
                                    last_test_acc = a_total / b_total
                                    print("new test acc : " +
                                          str(last_test_acc) + "( " +
                                          str(a1_total / b_total) + ", " +
                                          str(a2_total / b_total) + " )")

                        if batch_num == 25000:
                            break
                    except tf.errors.OutOfRangeError:
                        print("train out of range")
                        break

                # eval


#                b_total = 0
#                a_total = 0
#                a1_total = 0
#                a2_total = 0
#                validate_acc = 0
#                batch_num = 0

#                while True:
#                    try:
#                        feats_eval = sess.run(features_eval)
#                        op, feed_dict = shard_features(feats_eval, placeholders, predictions)
#                        #x = (np.squeeze(sess.run(predictions, feed_dict=feed_dict), axis = -2))
#                        y =  feed_dict.values()[2]
#                        x_len =  feed_dict.values()[1]
#                        feed_dict.update({tf_y:y})
#                        feed_dict.update({tf_x_len:x_len})

#                        sess.run(running_vars_initializer)
#                        acc = 0
#acc, pred = sess.run([accuracy, output], feed_dict = {tf_x : x, tf_y : y})
#                        sess.run([a1_update, a2_update, accuracy_update, weights], feed_dict = feed_dict)
#                        acc1,acc2,acc = sess.run([a1,a2,accuracy])
#                        batch_size = len(y)
#print(acc)
#                        a1_total += round(batch_size*acc1)
#                        a2_total += round(batch_size*acc2)
#                        a_total += round(batch_size*acc)
#                        b_total += batch_size
#                        batch_num += 1

#                        if batch_num == 10:
#                            break

#                    except tf.errors.OutOfRangeError:
#                        print ("eval out of range")
#                        break

#                validate_acc = a_total/b_total
#                print("eval acc : "  + str(validate_acc) + "( "+str(a1_total/b_total)+ ", "+ str(a2_total/b_total) + " )")
#                print("last test acc : " + str(last_test_acc))

#                if validate_acc > best_validate_acc:
#                    best_validate_acc = validate_acc

# test
#                    b_total = 0
#                    a1_total = 0
#                    a2_total = 0
#                    a_total = 0
#                    batch_num = 0

#                    while True:
#                        try:
#                            feats_test = sess.run(features_test)
#                            op, feed_dict = shard_features(feats_test, placeholders,
#                                                             predictions)

#x = (np.squeeze(sess.run(predictions, feed_dict=feed_dict), axis = -2))
#                            y =  feed_dict.values()[2]
#                            x_len =  feed_dict.values()[1]
#                            feed_dict.update({tf_y:y})
#                            feed_dict.update({tf_x_len:x_len})

#                            sess.run(running_vars_initializer)
#                            acc = 0
#acc, pred = sess.run([accuracy, output], feed_dict = {tf_x : x, tf_y : y})
#                            sess.run([a1_update,a2_update,accuracy_update, weights], feed_dict = feed_dict)
#                            acc1,acc2,acc = sess.run([a1,a2,accuracy])

#                            batch_size = len(y)
#                            a_total += round(batch_size*acc)
#                            a1_total += round(batch_size*acc1)
#                            a2_total += round(batch_size*acc2)
#                            b_total += batch_size
#                            batch_num += 1

#                            if batch_num==10:
#                                break
#                        except tf.errors.OutOfRangeError:
#                            print ("test out of range")
#                            break
#                    last_test_acc = a_total/b_total
#                    print("new test acc : " + str(last_test_acc)+ "( "+str(a1_total/b_total)+ ", "+ str(a2_total/b_total) + " )")

                current_step += 1
                print("")
        print("Final test acc " + str(last_test_acc))

        return
Пример #10
0
def main(args):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Load configs
    model_cls_list = [transformer.Transformer for model in args.models]
    params_list = [default_parameters() for _ in range(len(model_cls_list))]
    params_list = [
        merge_parameters(params, model_cls.get_parameters())
        for params, model_cls in zip(params_list, model_cls_list)
    ]
    params_list = [
        import_params(args.checkpoints[i], args.models[i], params_list[i])
        for i in range(len(args.checkpoints))
    ]
    params_list = [
        override_parameters(params_list[i], args)
        for i in range(len(model_cls_list))
    ]

    # Build Graph
    with tf.Graph().as_default():
        model_var_lists = []

        # Load checkpoints
        for i, checkpoint in enumerate(args.checkpoints):
            tf.logging.info("Loading %s" % checkpoint)
            var_list = tf.train.list_variables(checkpoint)
            values = {}
            reader = tf.train.load_checkpoint(checkpoint)

            for (name, shape) in var_list:
                if not name.startswith(model_cls_list[i].get_name()):
                    continue

                if name.find("losses_avg") >= 0:
                    continue

                tensor = reader.get_tensor(name)
                values[name] = tensor
            model_var_lists.append(values)

        # Build models
        model_fns = []

        for i in range(len(args.checkpoints)):
            name = model_cls_list[i].get_name()
            model = model_cls_list[i](params_list[i], name + "_%d" % i)
            model_fn = model.get_rerank_inference_func()
            model_fns.append(model_fn)

        params = params_list[0]
        # Read input file
        sorted_keys, sorted_inputs, sorted_constraints = \
            src_cons_dataset.sort_input_src_cons(args.input, args.constraints)

        # Build input queue
        features = src_cons_dataset.get_input_with_src_constraints(
            sorted_inputs, sorted_constraints, params)

        print(sorted_keys)

        #Create placeholder
        placeholders = []
        for i in range(len(params.device_list)):
            placeholders.append({
                "source":
                tf.placeholder(tf.int32, [None, None], "source_%d" % i),
                "source_length":
                tf.placeholder(tf.int32, [None], "source_length_%d" % i),
                "constraints_src_pos":
                tf.placeholder(tf.int32, [None, None, None],
                               "constraints_src_pos_%d" % i),
                "constraints":
                tf.placeholder(tf.int32, [None, None, None],
                               "constraints_%d" % i),
                "constraints_len":
                tf.placeholder(tf.int32, [None, None],
                               "constraints_len_%d" % i)
            })
        encoding_fn = model_fns[0][0]

        encoder_op = parallel.data_parallelism(
            params.device_list, lambda f: encoding_fn(f, params), placeholders)

        state_placeholders = []
        for i in range(len(params.device_list)):
            decode_state = {
                "encoder":
                tf.placeholder(tf.float32, [None, None, params.hidden_size],
                               "encoder_%d" % i),
                #"encoder_weight": we doesn't need encoder weight
                "source":
                tf.placeholder(tf.int32, [None, None], "source_%d" % i),
                "source_length":
                tf.placeholder(tf.int32, [None], "source_length_%d" % i),
                # [bos_id, ...] => [..., 0]
                "target":
                tf.placeholder(tf.int32, [None, None], "target_%d" % i),
                #"target_length": tf.placeholder(tf.int32, [None, ], "target_length_%d" % i)
            }
            #需要这些值,以进行增量式解码
            for j in range(params.num_decoder_layers):
                decode_state["decoder_layer_%d_key" % j] = tf.placeholder(
                    tf.float32, [None, None, params.hidden_size],
                    "decoder_layer_%d_key_%d" % (j, i))
                decode_state["decoder_layer_%d_value" % j] = tf.placeholder(
                    tf.float32, [None, None, params.hidden_size],
                    "decoder_layer_%d_value_%d" % (j, i))  # layer and GPU
                # we only need the return value of this
                # decode_state["decoder_layer_%d_att_weight" % j] = tf.placeholder(tf.float32, [None, None, None, None],
                #                              # N Head T S  inference的时候,T总是为1,表示1步
                #                              "decoder_layer_%d_att_weight" % j),
            state_placeholders.append(decode_state)

        def decoding_fn(s):
            _decoding_fn = model_fns[0][1]
            #split s to state and feature, and 转换为嵌套的结构,以满足transformer模型
            state = {
                "encoder": s["encoder"],
                "decoder": {
                    "layer_%d" % j: {
                        "key": s["decoder_layer_%d_key" % j],
                        "value": s["decoder_layer_%d_value" % j],
                    }
                    for j in range(params.num_decoder_layers)
                }
            }
            inputs = s["target"]
            #inputs = tf.Print(inputs, [inputs], "before target", 100, 10000)
            feature = {
                "source":
                s["source"],
                "source_length":
                s["source_length"],
                # [bos_id, ...] => [..., 0]
                # "target": tf.pad(inputs[:,1:], [[0, 0], [0, 1]])
                #"target": tf.pad(inputs, [[0, 0], [0, 1]]),  # 前面没有bos_id,因此直接补上0,这是为了和decode_graph中的补bos相配合
                "target":
                inputs,
                "target_length":
                tf.fill([tf.shape(inputs)[0]],
                        tf.shape(inputs)[1])
            }
            #feature["target"] = tf.Print(feature["target"], [feature["target"]], "target", 100,10000)
            ret = _decoding_fn(feature, state, params)
            return ret

        decoder_op = parallel.data_parallelism(params.device_list,
                                               lambda s: decoding_fn(s),
                                               state_placeholders)

        #batch = tf.shape(encoder_output)[0]

        # Create assign ops
        assign_ops = []

        all_var_list = tf.trainable_variables()

        for i in range(len(args.checkpoints)):
            un_init_var_list = []
            name = model_cls_list[i].get_name()

            for v in all_var_list:
                if v.name.startswith(name + "_%d" % i):
                    un_init_var_list.append(v)

            ops = set_variables(un_init_var_list, model_var_lists[i],
                                name + "_%d" % i)
            assign_ops.extend(ops)

        assign_op = tf.group(*assign_ops)
        results = []

        # Create session
        with tf.Session(config=session_config(params)) as sess:
            # from tensorflow.python import debug as tf_debug
            # sess = tf_debug.LocalCLIDebugWrapperSession(sess,ui_type='curses')#readline

            # Restore variables
            sess.run(assign_op)
            sess.run(tf.tables_initializer())
            # pad_id = params.mapping["target"][params.pad]
            # bos_id = params.mapping["target"][params.bos]
            # eos_id = params.mapping["target"][params.eos]
            while True:
                try:
                    feats = sess.run(features)
                    encoder_op, feed_dict = shard_features(
                        feats, placeholders, encoder_op)
                    #print("encoding %d" % i)
                    encoder_state = sess.run(encoder_op, feed_dict=feed_dict)
                    decoder_input_list = []
                    encoder_output_list = []
                    for j in range(len(feats["source"])):
                        decoder_input_item = {
                            "source": [feats["source"][j]],
                            "source_length": [feats["source_length"][j]],
                            "constraints_src_pos":
                            feats["constraints_src_pos"][j],
                            "constraints": feats["constraints"][j],
                            "constraints_len": feats["constraints_len"][j],
                        }
                        decoder_input_list.append(decoder_input_item)
                    # 不能简单的用GPU数量来循环,要用实际的输出来循环,因为有时候会空出GPU,比如最后一句或几句,无法凑够给1个GPU
                    for i in range(len(encoder_state[0])):  # gpu
                        state_len = len(encoder_state[0][i])  #
                        for j in range(state_len):
                            encoder_output_item = {
                                "encoder": encoder_state[0][i][j:j + 1],
                                "encoder_weight": encoder_state[1][i][j:j + 1]
                            }
                            encoder_output_list.append(encoder_output_item)

                    for input, encoder_output in zip(decoder_input_list,
                                                     encoder_output_list):
                        # print(input["source"])
                        # print(input["constraints"])
                        #################
                        # create constraint translation related model
                        # build ensembled TM
                        thumt_tm = ThumtTranslationModel(
                            sess, decoder_op, encoder_output,
                            state_placeholders, input, params)

                        # Build GBS search
                        cons_decoder = create_constrained_decoder(thumt_tm)
                        ##################
                        max_length = input["source_length"][
                            0] + params.decode_length
                        beam_size = params.beam_size
                        # top_beams = params.top_beams
                        top_beams = 1
                        best_output = decode(encoder_output,
                                             sess,
                                             decoder_op,
                                             state_placeholders,
                                             params,
                                             cons_decoder,
                                             thumt_tm,
                                             input,
                                             top_beams,
                                             max_hyp_len=max_length,
                                             beam_size=beam_size,
                                             return_alignments=True,
                                             length_norm=False)
                        # constraints=input_constraints,
                        # return_alignments=return_alignments,
                        # length_norm=length_norm)
                        results.append(best_output)
                    message = "Finished sentences: %d" % len(results)
                    tf.logging.log(tf.logging.INFO, message)
                except tf.errors.OutOfRangeError:
                    break
        # Convert to plain text
        vocab = params.vocabulary["target"]
        outputs = []
        scores = []
        mask_ratio = []
        best_alignment = []
        # for result in results:
        # outputs.append(result)
        # scores.append(0)
        # mask_ratio.append(0)
        for result in results:
            # print(result[0])
            # #outputs.append(result[0][0][1:])
            sub_result = zip(*result[0])
            outputs.extend(sub_result[0])
            scores.extend(sub_result[1])
            mask_ratio.extend([0] * len(sub_result[1]))  #放入假的ratio
            best_alignment.extend(result[1])

            # for sub_result in result:  # 每次解码结果可能有多个bestscore
            #     outputs.append(sub_result[0][0][1:])  # seqs
            #     scores.append(sub_result[0][1])  # score
            #     mask_ratio.append(0)
            #     best_alignment.extend(sub_result[1])
        new_outputs = []
        for s in outputs:
            new_outputs.append(s[1:])
        outputs = new_outputs

        for s, score in zip(outputs, scores):
            s1 = []
            for idx in s:
                if idx == params.mapping["target"][params.eos]:
                    break
                s1.append(vocab[idx])
            s1 = " ".join(s1)
            #print("%s" % s1)
            print("%f   %s" % (score, s1))

        restored_inputs = []
        restored_outputs = []
        restored_scores = []
        restored_ratio = []
        restored_constraints = []
        restored_alignment = []
        for index in range(len(sorted_inputs)):
            restored_inputs.append(sorted_inputs[sorted_keys[index]])
            restored_outputs.append(outputs[sorted_keys[index]])
            restored_scores.append(scores[sorted_keys[index]])
            restored_ratio.append(mask_ratio[sorted_keys[index]])
            restored_constraints.append(sorted_constraints[sorted_keys[index]])
            restored_alignment.append(best_alignment[sorted_keys[index]])

        # Write to file
        with open(args.output, "w") as outfile:
            count = 0
            for output, score, ratio in zip(restored_outputs, restored_scores,
                                            restored_ratio):
                decoded = []
                for idx in output:
                    if idx == params.mapping["target"][params.eos]:
                        break
                    decoded.append(vocab[idx])
                decoded = " ".join(decoded)

                if not args.verbose:
                    outfile.write("%s\n" % decoded)
                else:
                    pattern = "%d ||| %s ||| %s ||| %f ||| %f ||| %d\n"
                    source = restored_inputs[count]
                    cons = restored_constraints[count]
                    cons_token_num = 0
                    for cons_item in cons:
                        cons_token_num += cons_item["tgt_len"]
                    values = (count, source, decoded, score, ratios[0],
                              cons_token_num)
                    outfile.write(pattern % values)
                count += 1

        with open(args.output + ".alignment", "w") as outfile:
            count = 0
            for alignment in restored_alignment:
                outfile.write("%d\n" % count)
                cPickle.dump(alignment, outfile)
                count += 1