示例#1
0
def generateNegSamples(sess, model, size, neg=True):
    x1_set, x2_set, s3_set = [], [], []
    for epoch in range(size):
        if neg:
            i = random.randint(0, len(raw) - 1)
            # start=random.randint(0, len(alist) - FLAGS.pools_size)
            # end =start+FLAGS.pools_size
            # # pools=
            # pools=alist[start:end]
            pools = np.random.choice(alist, size=[FLAGS.pools_size])

            # x1,x2,x3=insurance_qa_data_helpers.load_data_pair1(vocab,pools,raw[i])   # single question
            x1, x2, x3 = insurance_qa_data_helpers.load_data_pair(
                vocab, pools, raw, FLAGS.pools_size)  # diversity question

            predicteds = []
            for j in range(FLAGS.pools_size / FLAGS.batch_size):
                index_start = FLAGS.batch_size * j
                index_end = FLAGS.batch_size * (j + 1)

                feed_dict = {
                    model.input_x_1: x1[index_start:index_end],
                    model.input_x_2: x2[index_start:index_end],
                    model.input_x_3: x3[index_start:index_end],
                    model.dropout_keep_prob: 1.0
                }
                predicted = sess.run(model.pred_score, feed_dict)

                predicteds.extend(predicted)
            exp_rating = np.exp(predicteds)
            prob = exp_rating / np.sum(exp_rating)

            neg_index = prob.argsort()[-500:][::-1]
            # print prob[neg_index]
            # neg_index = np.random.choice(np.arange(len(prob)), size=[500], p=prob)

            samples_x1, samples_x2, samples_x3 = x1[neg_index], x2[
                neg_index], x3[neg_index]
        else:
            # samples_x1,samples_x2,samples_x3= insurance_qa_data_helpers.load_data_6(vocab, alist, raw, FLAGS.batch_size)
            # i=random.randint(0, len(raw) - 1)
            # start=random.randint(0, len(alist) - FLAGS.pools_size)
            # end =start+FLAGS.pools_size
            # pools=alist[start:end]
            # pools=np.random.choice(alist,size=[FLAGS.pools_size])

            # x1,x2,x3=insurance_qa_data_helpers.load_data_pair(vocab,alist,raw,FLAGS.pools_size)
            # prob=np.ones(FLAGS.pools_size) * 1.0 / FLAGS.pools_size
            # neg_index = np.random.choice(np.arange(len(prob)), size=[500], p=prob)

            # samples_x1,samples_x2,samples_x3=x1[neg_index],x2[neg_index],x3[neg_index]
            samples_x1, samples_x2, samples_x3 = insurance_qa_data_helpers.load_data_6(
                vocab, alist, raw, FLAGS.batch_size)
        x1_set.append(samples_x1)
        x2_set.append(samples_x2)
        s3_set.append(samples_x3)
    print "have sampled %d samples" % epoch
    return x1_set, x2_set, s3_set
示例#2
0
def main():
    with tf.Graph().as_default():
        with tf.device("/gpu:1"):
            session_conf = tf.ConfigProto(
                allow_soft_placement=FLAGS.allow_soft_placement,
                log_device_placement=FLAGS.log_device_placement)
            sess = tf.Session(config=session_conf)
            with sess.as_default(), open(precision, "w") as log:

                discriminator = Discriminator.Discriminator(
                    sequence_length=x_train_1.shape[1],
                    batch_size=FLAGS.batch_size,
                    vocab_size=len(vocab),
                    embedding_size=FLAGS.embedding_dim,
                    filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
                    num_filters=FLAGS.num_filters,
                    l2_reg_lambda=FLAGS.l2_reg_lambda)

                sess.run(tf.global_variables_initializer())
                # Generate batches
                # Training loop. For each batch...
                for i in range(FLAGS.num_epochs):
                    # try:
                    x_batch_1, x_batch_2, x_batch_3 = insurance_qa_data_helpers.load_data_6(
                        vocab, alist, raw, FLAGS.batch_size)
                    train_step(sess, discriminator, x_batch_1, x_batch_2,
                               x_batch_3)
                    current_step = tf.train.global_step(
                        sess, discriminator.global_step)
                    if current_step % FLAGS.evaluate_every == 0:
                        if current_step % (FLAGS.evaluate_every * 20) != 0:
                            precision_current = dev_step(
                                sess, discriminator, 100)
                            line = " %d epoch: precision %f" % (
                                current_step, precision_current)
                        else:
                            precision_current = dev_step(
                                sess, discriminator, 1800)
                            line = "__________________\n%d epoch: precision %f" % (
                                current_step, precision_current)
                        log.write(line + "\n")
                        print(line)
示例#3
0
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

# Data Preparatopn
# ==================================================

# Load data
print("Loading data...")

vocab = insurance_qa_data_helpers.build_vocab()
alist = insurance_qa_data_helpers.read_alist()
raw = insurance_qa_data_helpers.read_raw()
x_train_1, x_train_2, x_train_3 = insurance_qa_data_helpers.load_data_6(vocab, alist, raw, FLAGS.batch_size)
testList, vectors = insurance_qa_data_helpers.load_test_and_vectors()
vectors = ''
print('x_train_1', np.shape(x_train_1))
print("Load done...")

val_file = '../../insuranceQA/test1'
precision = '../../insuranceQA/test1.gan'+timeStamp
#x_val, y_val = data_deepqa.load_data_val()

# Training
# ==================================================
def train_step(sess,cnn,x_batch_1, x_batch_2, x_batch_3):
            """
            A single training step
            """
                    break
            lev1 = .0
            lev0 = .0
            for k, v in sessdict.items():
                v.sort(key=operator.itemgetter(0), reverse=True)
                score, flag = v[0]
                if flag == '1':
                    lev1 += 1
                if flag == '0':
                    lev0 += 1
            # 回答的正确数和错误数
            print('回答正确数 ' + str(lev1))
            print('回答错误数 ' + str(lev0))
            print('准确率 ' + str(float(lev1) / (lev1 + lev0)))

        # 每5000步测试一下
        evaluate_every = 5000
        # 开始训练和测试
        sess.run(tf.global_variables_initializer())
        for i in range(config.num_epochs):
            # 18540个训练样本
            # 20000+个预训练词向量,此处没有用,不过可以加进去
            x_batch_1, x_batch_2, x_batch_3 = insurance_qa_data_helpers.load_data_6(
                vocab, alist, raw, config.batch_size)
            train_step(x_batch_1, x_batch_2, x_batch_3)
            if (i + 1) % evaluate_every == 0:
                # 共20个问题,每个问题500个,对应1到2个正确答案,499到498个错误答案
                # 相当于从一个size=500的pool里选出正确答案
                print("\n测试{}:".format((i + 1) / evaluate_every))
                dev_step()
示例#5
0
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

# Data Preparatopn
# ==================================================

# Load data
print("Loading data...")

vocab = insurance_qa_data_helpers.build_vocab()
alist = insurance_qa_data_helpers.read_alist()
raw = insurance_qa_data_helpers.read_raw()
x_train_1, x_train_2, x_train_3 = insurance_qa_data_helpers.load_data_6(vocab, alist, raw, FLAGS.batch_size)
testList, vectors = insurance_qa_data_helpers.load_test_and_vectors()
vectors = ''
print('x_train_1', np.shape(x_train_1))
print("Load done...")

val_file = '/export/jw/cnn/insuranceQA/test1'
precision = '/export/jw/cnn/insuranceQA/test1.acc'
#x_val, y_val = data_deepqa.load_data_val()

# Training
# ==================================================

with tf.Graph().as_default():
  with tf.device("/gpu:1"):
    session_conf = tf.ConfigProto(