示例#1
0
def test_classifier(sess, classifier):
    rand_batch = rand_batch_gen(sentence_int, label, length, 640)
    input_vec, onehot_label, lenth = next(rand_batch)
    batch_label = np.argmax(onehot_label, axis=1)

    out = classifier.predict_batch(input_vec)
    loss = classifier.batch_loss(input_vec, batch_label)

    correct = np.argmax(out, axis=1) == batch_label
    acc = np.sum(correct) / 640
    ave_loss = np.sum(loss) / 640

    return acc, ave_loss
示例#2
0
def get_simplify(sess, actor):
    rand_batch = rand_batch_gen(sentence_int, label, length, 640)
    input_vec, onehot_label, lenth = next(rand_batch)
    actions, states, Rinput, Rlenth = sampling_RL(sess,
                                                  actor,
                                                  input_vec,
                                                  lenth,
                                                  epsilon,
                                                  Random=False)

    text = ''
    for i in range(len(input_vec)):
        text += 'example: \n'

        text += 'after simplifying: \n'
        simplify = Rinput
        text += ''.join(simplify) + '\n\n\n'

    with open('./data/example.txt', 'w') as f:
        f.write(text)
示例#3
0
def test(sess, actor, classifier, RL=True):

    rand_batch = rand_batch_gen(sentence_int, label, length, 640)
    input_vec, onehot_label, lenth = next(rand_batch)

    batch_label = np.argmax(onehot_label, axis=1)

    if RL:
        actions, states, Rinput, Rlenth = sampling_RL(sess,
                                                      actor,
                                                      input_vec,
                                                      lenth,
                                                      epsilon,
                                                      Random=False)
        out = classifier.predict_batch(Rinput)
        loss = classifier.batch_loss(Rinput, batch_label)

        for i in range(5):
            print('\nexample:')
            input_i = i
            print(input_vec[input_i])
            print('label: ', np.min(input_vec[input_i]))
            print('\nafter Simplifying:')
            print(Rinput[input_i])
            print()

        keep_ratio = np.mean(Rlenth / lenth)
        print('keep ratio: {:.2f}%'.format(keep_ratio * 100))
    else:
        out = classifier.predict_batch(input_vec, lenth)
        loss = classifier.batch_loss(input_vec, lenth, batch_label)

    correct = (np.argmax(out, axis=1) == np.argmax(onehot_label, axis=1))
    acc = np.sum(correct) / 640
    ave_loss = np.sum(loss) / 640

    return acc, ave_loss
示例#4
0
def train(sess, actor, classifier, batchsize, classifier_trainable=True):

    for b in range(1000):
        acc_test, loss = test(sess, actor, classifier)
        print("batch", b, " --batch accuracy: {:.2f}%".format(acc_test * 100))
        print("\t", "--batch loss: {:.2f}".format(loss))

        rand_batch = rand_batch_gen(sentence_int, label, length, batchsize)
        input_vec, onehot_label, lenth = next(rand_batch)

        batch_label = np.argmax(onehot_label, axis=1)

        actionlist, statelist, losslist = [], [], []
        for i in range(samplecnt):
            actions, states, Rinput, Rlenth = sampling_RL(sess,
                                                          actor,
                                                          input_vec,
                                                          lenth,
                                                          epsilon,
                                                          Random=True)
            actionlist.append(actions)
            statelist.append(states)

            loss = classifier.batch_loss(Rinput, batch_label)
            length_loss = np.maximum(Rlenth - 10, 0)**2 / (50)**2 * 0.2
            #            length_loss = (Rlenth / lenth) ** 2 * 0.2
            loss += length_loss

            losslist.append(loss)

            if i % 20 == 0:
                print('lenth loss:{:.2f}'.format(np.mean(length_loss)))
                keep_ratio = np.mean(Rlenth / lenth)
                print('random keep ratio: {:.2f}%'.format(keep_ratio * 100))

            if classifier_trainable:
                if i % 20 == 0:
                    actions, states, Rinput, Rlenth = sampling_RL(sess,
                                                                  actor,
                                                                  input_vec,
                                                                  lenth,
                                                                  epsilon,
                                                                  Random=False)
                    classifier.train_batch(Rinput, batch_label)

                    keep_ratio = np.mean(Rlenth / lenth)
                    print('argmax keep ratio: {:.2f}%'.format(keep_ratio *
                                                              100))

        loss_array = np.array(losslist)
        aveloss = np.mean(loss_array, axis=0)

        for i in range(samplecnt):
            reward = -(losslist[i] - aveloss)

            actions = actionlist[i]
            states = statelist[i]

            total_time = len(states)
            batchsize = len(reward)
            for time in range(total_time):
                rr = np.zeros((batchsize, 2))
                action = actions[time]

                for i in range(batchsize):
                    rr[i, action[i]] = reward[i]

                actor.train(states[time][0], states[time][1], rr)
示例#5
0
import json

from actor import ActorNetwork
from self_att_rnn import SelfAttRNN
from model_api import ModelAPI
from integration_func import generate_embedding_mat
from data_helper import get_data, rand_batch_gen

os.environ["CUDA_VISIBLE_DEVICES"] = "2"

metadata_pkl = './data/post_metadata.pkl'
max_length = 200

word_to_int, int_to_word, sentence_int, label, length = get_data(
    metadata_pkl, max_length)
rand_batch = rand_batch_gen(sentence_int, label, length, 64)
vocab_size = len(word_to_int)
embed_size = 128

options = {"context_lstm_dims": 100, "highway_layer_num": 1}

class_list = [
    u"投递质量", u"延误", u"信息质量", u"破损", u"丢失", u"服务态度", u"收寄质量", u"需求建议", u"售后服务"
]

config = {
    "options": options,
    "vocab_size": vocab_size,
    "max_length": max_length,
    "emb_size": embed_size,
    "extra_symbol": ["<PAD>", "<UNK>"],