def decode():
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        # Load test data.
        print("Reading data in %s" % FLAGS.data_dir)
        test_set = None
        if FLAGS.decode_train:
            test_set = data_utils.read_data(FLAGS.data_dir, 'train')
        else:
            test_set = data_utils.read_data(FLAGS.data_dir, 'test')

        # Create model and load parameters.
        model = create_model(sess, test_set, True)
        model.batch_size = 1  # We decode one sentence at a time.

        test_set.pad(test_set.rank_list_size, model.hparams.reverse_input)

        rerank_scores = []

        # Decode from test data.
        for i in xrange(len(test_set.initial_list)):
            encoder_inputs, embeddings, decoder_targets, target_weights, target_initial_scores = model.get_data_by_index(
                test_set.initial_list, test_set.gold_list,
                test_set.gold_weights, test_set.initial_scores,
                test_set.features, i)
            _, test_loss, output_logits, summary = model.step(
                sess, encoder_inputs, embeddings, decoder_targets,
                target_weights, target_initial_scores, True)

            #The output is a list of rerank index for decoder_inputs (which represents the gold rank list)
            rerank_scores.append(output_logits[0][0])
            if i % FLAGS.steps_per_checkpoint == 0:
                print("Decoding %.2f \r" %
                      (float(i) / len(test_set.initial_list))),

        #get rerank indexes with new scores
        rerank_lists = []
        for i in xrange(len(rerank_scores)):
            scores = rerank_scores[i]
            rerank_lists.append(
                sorted(range(len(scores)),
                       key=lambda k: scores[k],
                       reverse=True))

        if FLAGS.decode_train:
            data_utils.output_ranklist(test_set, rerank_lists, FLAGS.test_dir,
                                       model.hparams.reverse_input, 'train')
        else:
            data_utils.output_ranklist(test_set, rerank_lists, FLAGS.test_dir,
                                       model.hparams.reverse_input, 'test')

    return
Exemplo n.º 2
0
Arquivo: main.py Projeto: TvanZ/IR2
def decode():
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        # Load test data.
        print("Reading data in %s" % FLAGS.data_dir)
        test_set = None
        if FLAGS.decode_train:
            test_set = data_utils.read_data(FLAGS.data_dir, 'train')
        else:
            test_set = data_utils.read_data(FLAGS.data_dir, 'test')

        # Create model and load parameters.
        model = create_model(sess, test_set, True)
        model.batch_size = 1  # We decode one sentence at a time.

        test_set.pad(test_set.rank_list_size)

        rerank_scores = []
        output_logits_list = []

        # Decode from test data.
        for i in xrange(len(test_set.initial_list)):
            input_feed, others_map = model.get_data_by_index(test_set, i)
            test_loss, output_logits, summary, propensity_logits = model.step(
                sess, input_feed, True)
            output_logits_list.extend(output_logits.tolist())
            #The output is a list of rerank index for decoder_inputs (which represents the gold rank list)
            rerank_scores.append(output_logits[0])
            if i % FLAGS.steps_per_checkpoint == 0:
                print("Decoding %.2f \r" %
                      (float(i) / len(test_set.initial_list))),

        #get rerank indexes with new scores
        rerank_lists = []
        for i in xrange(len(rerank_scores)):
            scores = rerank_scores[i]
            rerank_lists.append(
                sorted(range(len(scores)),
                       key=lambda k: scores[k],
                       reverse=True))

        if FLAGS.decode_train:
            data_utils.output_ranklist(test_set, rerank_scores, FLAGS.test_dir,
                                       'train')
        else:
            data_utils.output_ranklist(test_set, rerank_scores, FLAGS.test_dir,
                                       'test')

        propensity_logits = propensity_logits.tolist()
        propensity_logits.extend(output_logits_list)
        total_list = propensity_logits
        # pickle.dump(total_list, open("DLA_logits.p","wb"))
        # print("Saved a pickle!")

        f_out = open('DLA_logits.txt', 'w')
        for i in xrange(len(total_list)):
            f_out.write(str(total_list[i]) + '\n')
        f_out.close()

    return
Exemplo n.º 3
0
def decode():
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        # Load test data.
        print("Reading data in %s" % FLAGS.data_dir)
        test_set = None
        if FLAGS.decode_train:
            test_set = data_utils.read_data(FLAGS.data_dir,
                                            'train',
                                            with_dummy=True)
        else:
            test_set = data_utils.read_data(FLAGS.data_dir,
                                            'test',
                                            with_dummy=True)

        # Create model and load parameters.
        model = create_model(sess, test_set, True, FLAGS.ckpt_step)
        model.batch_size = 1  # We decode one sentence at a time.

        test_set.pad(test_set.rank_list_size, model.hparams.reverse_input)

        rerank_scores = []

        perm_pair_cnt = FLAGS.perm_pair_cnt

        # Decode from test data.
        for i in tqdm(range(len(test_set.initial_list))):
            encoder_inputs, embeddings, decoder_targets, target_weights, target_initial_scores = model.get_data_by_index(
                test_set.initial_list, test_set.gold_list,
                test_set.gold_weights, test_set.initial_scores,
                test_set.features, i)
            #target_initial_scores_fake = [np.ones_like(x) for x in target_initial_scores]

            # Permutation noise
            enc_len = np.sum(np.int32((np.array(encoder_inputs) >= 0)))
            enc_num = range(enc_len)
            rule = []
            for _ in range(perm_pair_cnt):
                rule.append(random.sample(enc_num, 2))

            for r1, r2 in rule:
                # Add position noise
                # encoder_inputs[r1], encoder_inputs[r2] = encoder_inputs[r2], encoder_inputs[r1]
                # Add score noise
                target_initial_scores[r1], target_initial_scores[
                    r2] = target_initial_scores[r2], target_initial_scores[r1]

            _, test_loss, output_logits, summary = model.step(
                sess, encoder_inputs, embeddings, decoder_targets,
                target_weights, target_initial_scores, True)

            output_logit = output_logits[0][0]

            # for r1, r2 in rule[::-1]:
            # Recover order
            # output_logit[r1], output_logit[r2] = output_logit[r2], output_logit[r1]

            _, test_loss, output_logits, summary = model.step(
                sess, encoder_inputs, embeddings, decoder_targets,
                target_weights, target_initial_scores, True)
            '''
            #print(test_loss)
            print(output_logits)
            #reorder = sorted(range(len(target_initial_scores)), key=lambda x: target_initial_scores[x], reverse=True)
            reorder = list(range(len(target_initial_scores)))#[::-1]
            reorder_sh = reorder[1:]
            #reorder[2], reorder[3] = reorder[3], reorder[2]
            #random.shuffle(reorder_sh)
            reorder_sh = reorder_sh[::-1]
            reorder[1:] = reorder_sh
            reorder[-1], reorder[0] = reorder[0], reorder[-1]
            embeddings = [embeddings[int(encoder_inputs[x])] for x in reorder[:-1]]
            target_initial_scores = [target_initial_scores[x] for x in reorder[::-1]]
            output_logits_reorder = [[[output_logits[0][0][x] for x in reorder[::-1]]]]
            _, test_loss, output_logits, summary = model.step(sess, encoder_inputs, embeddings, decoder_targets,
                                            target_weights, target_initial_scores, True)
            print(output_logits_reorder)
            print(output_logits)
            print(np.array(output_logits) - np.array(output_logits_reorder))
            #exit()
            #The output is a list of rerank index for decoder_inputs (which represents the gold rank list)
            from IPython import embed
            embed()
            if i == 0:
                exit()
            '''
            rerank_scores.append(output_logit)
            #if i % FLAGS.steps_per_checkpoint == 0:
            #    print("Decoding %.2f \r" % (float(i)/len(test_set.initial_list))),

        #get rerank indexes with new scores
        print('shuffle before sort')
        rerank_lists = []
        for i in range(len(rerank_scores)):
            scores = np.array(rerank_scores[i])
            #random.shuffle(scores)
            scores += np.random.uniform(low=-1e-5,
                                        high=1e-5,
                                        size=scores.shape)
            rerank_lists.append(
                sorted(range(len(scores)),
                       key=lambda k: scores[k],
                       reverse=True))

        if FLAGS.decode_train:
            data_utils.output_ranklist(test_set, rerank_lists, FLAGS.test_dir,
                                       model.hparams.reverse_input, 'train')
        else:
            data_utils.output_ranklist(test_set, rerank_lists, FLAGS.test_dir,
                                       model.hparams.reverse_input, 'test')

    return