Exemplo n.º 1
0
def generate_gan(sess, model, loss_type="pair", negative_size=3):
    samples = []
    for _index, pair in enumerate(raw):
        if _index % 100 == 0:
            print("have sampled %d pairs" % _index)
        q = pair[2]
        a = pair[3]

        neg_alist_index = [i for i in range(len(alist))]
        neg_alist_index.remove(_index)  # remove the positive index
        sampled_index = np.random.choice(neg_alist_index, size=[FLAGS.pools_size], replace=False)
        pools = np.array(alist)[sampled_index]

        canditates = insurance_qa_data_helpers.loadCandidateSamples(q, a, pools, vocab)
        predicteds = []
        for batch in insurance_qa_data_helpers.batch_iter(canditates, batch_size=FLAGS.batch_size):
            feed_dict = {model.input_x_1: batch[:, 0], model.input_x_2: batch[:, 1], model.input_x_3: batch[:, 2]}
            predicted = sess.run(model.gan_score, feed_dict)
            predicteds.extend(predicted)

        # index=np.argmax(predicteds)
        # samples.append([encode_sent(vocab,item, FLAGS.max_sequence_length) for item in [q,a,pools[index]]])
        exp_rating = np.exp(np.array(predicteds) * FLAGS.sampled_temperature * 1.5)
        prob = exp_rating / np.sum(exp_rating)
        neg_samples = np.random.choice(pools, size=negative_size, p=prob, replace=False)
        for neg in neg_samples:
            samples.append([encode_sent(vocab, item, FLAGS.max_sequence_length) for item in [q, a, neg]])
    return samples
Exemplo n.º 2
0
def generate_dns_pair(sess, model):
    samples = []
    for _index, pair in enumerate(raw):
        if _index % 100 == 0:
            print("have sampled %d pairs" % _index)
        q = pair[2]
        a = pair[3]

        pools = np.random.choice(alist, size=[FLAGS.pools_size])

        canditates = insurance_qa_data_helpers.loadCandidateSamples(
            q, a, pools, vocab)
        predicteds = []
        for batch in insurance_qa_data_helpers.batch_iter(
                canditates, batch_size=FLAGS.batch_size):
            feed_dict = {
                model.input_x_1: batch[:, 0],
                model.input_x_2: batch[:, 1],
                model.input_x_3: batch[:, 2]
            }
            predicted = sess.run(model.score13, feed_dict)
            predicteds.extend(predicted)
        index = np.argmax(predicteds)
        samples.append([
            encode_sent(vocab, item, FLAGS.max_sequence_length)
            for item in [q, a, pools[index]]
        ])

    return samples
Exemplo n.º 3
0
def generate_dns_pair(sess, model):
    samples = []
    for _index, pair in enumerate(raw):

        #ループ内は1組のqaについて扱う
        if _index % 100 == 0:
            print("have sampled %d pairs" % _index)
        q = pair[2]
        a = pair[3]

        pools = np.random.choice(alist, size=[FLAGS.pools_size])
        #1qaペアにつき、プールサイズ(100)分のneg
        canditates = insurance_qa_data_helpers.loadCandidateSamples(
            q, a, pools, vocab)
        predicteds = []

        for batch in insurance_qa_data_helpers.batch_iter(
                canditates, batch_size=FLAGS.batch_size):
            #batchサイズ(100)分: (q,a,neg1),...,(q,a,negバッチサイズ)
            #batch[:,0]にはバッチサイズ分のq、batch[:,1]はa、batch[:.2]はneg。
            feed_dict = {
                model.input_x_1: batch[:, 0],
                model.input_x_2: batch[:, 1],
                model.input_x_3: batch[:, 2]
            }
            predicted = sess.run(model.score13, feed_dict)  #score13はcos(q,neg)
            predicteds.extend(predicted)
        index = np.argmax(predicteds)
        samples.append([
            encode_sent(vocab, item, FLAGS.max_sequence_length)
            for item in [q, a, pools[index]]
        ])

    return samples
Exemplo n.º 4
0
def generate_uniform_pair():
    samples = []
    for pair in raw:
        q = pair[2]
        a = pair[3]
        index = random.randint(0, len(alist) - 1)
        neg = alist[index]

        samples.append([
            encode_sent(vocab, item, FLAGS.max_sequence_length)
            for item in [q, a, neg]
        ])
    return samples