示例#1
0
 def __init__(self, time_step, split, lr):
     self.dataset = Dataset(T=time_step,
                            split_ratio=split,
                            binary_file=config.BINARY_DATASET)
     self.policy_net_encoder = AttnEncoder(
         input_size=self.dataset.get_num_features(),
         hidden_size=config.ENCODER_HIDDEN_SIZE,
         time_step=time_step)
     self.policy_net_decoder = AttnDecoder(
         code_hidden_size=config.ENCODER_HIDDEN_SIZE,
         hidden_size=config.DECODER_HIDDEN_SIZE,
         time_step=time_step)
     self.policy_net = DQN(self.policy_net_encoder, self.policy_net_decoder)
     self.target_net_encoder = AttnEncoder(
         input_size=self.dataset.get_num_features(),
         hidden_size=config.ENCODER_HIDDEN_SIZE,
         time_step=time_step)
     self.target_net_decoder = AttnDecoder(
         code_hidden_size=config.ENCODER_HIDDEN_SIZE,
         hidden_size=config.DECODER_HIDDEN_SIZE,
         time_step=time_step)
     self.target_net = DQN(self.target_net_encoder, self.target_net_decoder)
     if torch.cuda.is_available():
         self.policy_net_encoder = self.policy_net_encoder.cuda()
         self.policy_net_decoder = self.policy_net_decoder.cuda()
         self.target_net_encoder = self.target_net_encoder.cuda()
         self.target_net_decoder = self.target_net_decoder.cuda()
         self.policy_net = self.policy_net.cuda()
         self.target_net = self.target_net.cuda()
     self.memory = ReplayMemory(config.MEMORY_CAPACITY)
     self.optimizer = optim.RMSprop(self.policy_net.parameters(), lr=lr)
示例#2
0
 def __init__(self, driving, target, time_step, split, lr):
     self.dataset = Dataset(driving, target, time_step, split)
     self.encoder = AttnEncoder(input_size=self.dataset.get_num_features(), hidden_size=config.ENCODER_HIDDEN_SIZE, time_step=time_step)
     self.decoder = AttnDecoder(code_hidden_size=config.ENCODER_HIDDEN_SIZE, hidden_size=config.DECODER_HIDDEN_SIZE, time_step=time_step)
     if torch.cuda.is_available():
         self.encoder = self.encoder.cuda()
         self.decoder = self.decoder.cuda()
     self.encoder_optim = optim.Adam(self.encoder.parameters(), lr)
     self.decoder_optim = optim.Adam(self.decoder.parameters(), lr)
     self.loss_func = nn.MSELoss()
     self.train_size, self.test_size = self.dataset.get_size()
示例#3
0
def get_model(config):
    encoder = maybe_cuda(Encoder(config), cuda=config.cuda)
    if config.attn_method != "disabled":
        decoder = maybe_cuda(AttnDecoder(config), cuda=config.cuda)
    else:
        decoder = maybe_cuda(Decoder(config), cuda=config.cuda)
    return encoder, decoder
示例#4
0
    def __init__(self, time_step, split, lr):
        self.dataset = Dataset(T=time_step,
                               split_ratio=split,
                               binary_file=config.BINARY_DATASET)
        self.encoder = AttnEncoder(input_size=self.dataset.get_num_features(),
                                   hidden_size=config.ENCODER_HIDDEN_SIZE,
                                   time_step=time_step)
        self.decoder = AttnDecoder(code_hidden_size=config.ENCODER_HIDDEN_SIZE,
                                   hidden_size=config.DECODER_HIDDEN_SIZE,
                                   time_step=time_step)
        self.model = Model(self.encoder, self.decoder)
        if torch.cuda.is_available():
            self.encoder = self.encoder.cuda()
            self.decoder = self.decoder.cuda()
            self.model = self.model.cuda()

        self.model_optim = optim.Adam(self.model.parameters(), lr)
        # self.encoder_optim = optim.Adam(self.encoder.parameters(), lr)
        # self.decoder_optim = optim.Adam(self.decoder.parameters(), lr)
        self.loss_func = nn.MSELoss()
        self.train_size, self.test_size = self.dataset.get_size()
示例#5
0
    def __init__(self, driving, target, time_step, split, lr, regression=True):
        self.dataset = Dataset(T=time_step,
                               split_ratio=split,
                               binary_file=config.BINARY_DATASET_HEADER)
        self.encoder = AttnEncoder(input_size=self.dataset.get_num_features(),
                                   hidden_size=config.ENCODER_HIDDEN_SIZE,
                                   time_step=time_step)
        self.decoder = AttnDecoder(code_hidden_size=config.ENCODER_HIDDEN_SIZE,
                                   hidden_size=config.DECODER_HIDDEN_SIZE,
                                   time_step=time_step)
        self.model = Model(self.encoder, self.decoder)
        if torch.cuda.is_available():
            self.encoder = self.encoder.cuda()
            self.decoder = self.decoder.cuda()
            self.model = self.model.cuda()

        self.model_optim = optim.Adam(self.model.parameters(), lr)
        # self.encoder_optim = optim.Adam(self.encoder.parameters(), lr)
        # self.decoder_optim = optim.Adam(self.decoder.parameters(), lr)

        if (regression):
            # regression model
            self.loss_func = nn.MSELoss()
        else:
            # classification model
            weight = torch.Tensor([1, 1])
            # weight = weight.cuda()
            self.loss_func = nn.CrossEntropyLoss(reduce=False,
                                                 size_average=False,
                                                 weight=weight)

        self.train_size, self.test_size, self.total_size = self.dataset.get_size(
        )
        print("train_size = %d (in terms of number of binary files)" %
              self.train_size)
        print("test_size = %d (in terms of number of binary files)" %
              self.test_size)
示例#6
0
def run(do_train, do_eval, do_predict, ckpt, get_rouge, max_epochs=100):
    train_set = Articles(test=False)
    test_set = Articles(test=True)
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=False, num_workers=1)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=1)

    encoder = Encoder()
    attention_decoder = AttnDecoder()
    model = PointerGenerator(encoder, attention_decoder)
    model.to(device)
    optimizer = torch.optim.Adagrad(model.parameters(), lr=lr)
    loss_function = torch.nn.NLLLoss()

    if ckpt:
        model, optimizer, epoch = load_ckp(checkpoint_path=ckpt, model=model, optimizer=optimizer)
        if do_eval:
            eval(test_loader, model, loss_function)
        elif do_predict:
            vocab = Vocab('data/vocab', voc_size)
            batch = iter(train_loader).next()
            story, highlight = batch
            batcher = Batcher(story, highlight, vocab)
            stories, highlights, extra_zeros, story_extended, highlight_extended, vocab_extended = batcher.get_batch(
                get_vocab_extended=True)

            stories = stories.to(device)
            highlights = highlights.to(device)
            story_extended = story_extended.to(device)
            extra_zeros = extra_zeros.to(device)

            # stories, highlights = get_random_sentences(test_set, batch_size)
            with torch.no_grad():
                output = model(stories, highlights, story_extended, extra_zeros)

            get_batch_prediction(stories, output, highlights)
    if get_rouge:
        get_rouge_files(model, test_loader)
        get_rouge_score()

    else:
        epoch = 0

    if do_train:
        train(train_loader, test_loader, loss_function, model, optimizer, epoch, num_epochs=max_epochs - epoch)
示例#7
0
class Trainer:
    def __init__(self, driving, target, time_step, split, lr):
        self.dataset = Dataset(driving, target, time_step, split)
        self.encoder = AttnEncoder(input_size=self.dataset.get_num_features(),
                                   hidden_size=config.ENCODER_HIDDEN_SIZE,
                                   time_step=time_step)
        self.decoder = AttnDecoder(code_hidden_size=config.ENCODER_HIDDEN_SIZE,
                                   hidden_size=config.DECODER_HIDDEN_SIZE,
                                   time_step=time_step)
        if torch.cuda.is_available():
            self.encoder = self.encoder.cuda()
            self.decoder = self.decoder.cuda()
        self.encoder_optim = optim.Adam(self.encoder.parameters(), lr)
        self.decoder_optim = optim.Adam(self.decoder.parameters(), lr)
        self.loss_func = nn.MSELoss()
        self.train_size, self.test_size = self.dataset.get_size()

    def train_minibatch(self, num_epochs, batch_size, interval):
        x_train, y_train, y_seq_train = self.dataset.get_train_set()
        for epoch in range(num_epochs):
            i = 0
            loss_sum = 0
            while (i < self.train_size):
                self.encoder_optim.zero_grad()
                self.decoder_optim.zero_grad()
                batch_end = i + batch_size
                if (batch_end >= self.train_size):
                    batch_end = self.train_size
                var_x = self.to_variable(x_train[i:batch_end])
                var_y = self.to_variable(y_train[i:batch_end])
                var_y_seq = self.to_variable(y_seq_train[i:batch_end])
                if var_x.dim() == 2:
                    var_x = var_x.unsqueeze(2)
                code = self.encoder(var_x)
                y_res = self.decoder(code, var_y_seq)
                loss = self.loss_func(y_res, var_y)
                loss.backward()
                self.encoder_optim.step()
                self.decoder_optim.step()
                # print('[%d], loss is %f' % (epoch, 10000 * loss.data[0]))
                loss_sum += loss.item()
                i = batch_end
            print('epoch [%d] finished, the average loss is %f' %
                  (epoch, loss_sum))
            if (epoch + 1) % (interval) == 0 or epoch + 1 == num_epochs:
                torch.save(
                    self.encoder.state_dict(),
                    'models/encoder' + str(epoch + 1) + '-norm' + '.model')
                torch.save(
                    self.decoder.state_dict(),
                    'models/decoder' + str(epoch + 1) + '-norm' + '.model')

    def test(self, num_epochs, batch_size):
        x_train, y_train, y_seq_train = self.dataset.get_train_set()
        x_test, y_test, y_seq_test = self.dataset.get_test_set()
        y_pred_train = self.predict(x_train, y_train, y_seq_train, batch_size)
        y_pred_test = self.predict(x_test, y_test, y_seq_test, batch_size)
        plt.figure(figsize=(8, 6), dpi=100)
        plt.plot(range(2000, self.train_size),
                 y_train[2000:],
                 label='train truth',
                 color='black')
        plt.plot(range(self.train_size, self.train_size + self.test_size),
                 y_test,
                 label='ground truth',
                 color='black')
        plt.plot(range(2000, self.train_size),
                 y_pred_train[2000:],
                 label='predicted train',
                 color='red')
        plt.plot(range(self.train_size, self.train_size + self.test_size),
                 y_pred_test,
                 label='predicted test',
                 color='blue')
        plt.xlabel('Days')
        plt.ylabel('Stock price of AAPL.US(USD)')
        plt.savefig('results/res-' + str(num_epochs) + '-' + str(batch_size) +
                    '.png')

    def predict(self, x, y, y_seq, batch_size):
        y_pred = np.zeros(x.shape[0])
        i = 0
        while (i < x.shape[0]):
            batch_end = i + batch_size
            if batch_end > x.shape[0]:
                batch_end = x.shape[0]
            var_x_input = self.to_variable(x[i:batch_end])
            var_y_input = self.to_variable(y_seq[i:batch_end])
            if var_x_input.dim() == 2:
                var_x_input = var_x_input.unsqueeze(2)
            code = self.encoder(var_x_input)
            y_res = self.decoder(code, var_y_input)
            for j in range(i, batch_end):
                y_pred[j] = y_res[j - i, -1]
            i = batch_end
        return y_pred

    def load_model(self, encoder_path, decoder_path):
        self.encoder.load_state_dict(
            torch.load(encoder_path,
                       map_location=lambda storage, loc: storage))
        self.decoder.load_state_dict(
            torch.load(decoder_path,
                       map_location=lambda storage, loc: storage))

    def to_variable(self, x):
        if torch.cuda.is_available():
            return Variable(torch.from_numpy(x).float()).cuda()
        else:
            return Variable(torch.from_numpy(x).float())
示例#8
0
def main(config):
    print(config)

    dictionary = json.load(
        open('./{}.lex2.dictionary.json'.format(config.data)))
    noun_id = []
    for k, v in dictionary['const'].items():
        if k[:2] == 'NN':
            noun_id.append(v)
    vocab_size = len(dictionary['word']) + 1
    word_embedding_dim = 300
    print("Vocabulary size:", vocab_size)

    word_vectors = np.random.uniform(low=-0.1,
                                     high=0.1,
                                     size=(vocab_size, word_embedding_dim))

    batch_size = 40 if config.data == 'persona' else 10
    if config.data in ['persona', 'movie']:
        train_loader = get_loader(
            './data/{}.train.src'.format(config.data),
            './data/{}.train.lex2.dat'.format(config.data),
            './data/{}.train.psn'.format(config.data), dictionary, batch_size)
        dev_loader = get_loader('./data/{}.valid.src'.format(config.data),
                                './data/{}.valid.lex2.dat'.format(config.data),
                                './data/{}.valid.psn'.format(config.data),
                                dictionary, 10)
    else:
        train_loader = get_loader('./data/{}.train.src'.format(config.data),
                                  './data/{}.train.trg'.format(config.data),
                                  None, dictionary, 20)
        dev_loader = get_loader('./data/{}.valid.src'.format(config.data),
                                './data/{}.valid.trg'.format(config.data),
                                None, dictionary, 200)

    hidden_size = 512
    cenc_input_size = hidden_size * 2

    start_batch = 50000
    start_kl_weight = config.start_kl_weight

    if not config.use_saved:
        hred = AttnDecoder(word_embedding_dim, hidden_size, hidden_size,
                           vocab_size, word_vectors, dictionary['word'],
                           config.data, 0.5).cuda()
        for p in hred.parameters():
            torch.nn.init.uniform(p.data, a=-0.1, b=0.1)
        if config.glove:
            print("Loading word vecotrs.")
            word2vec_file = open('./glove.42B.300d.txt')
            next(word2vec_file)
            found = 0
            for line in word2vec_file:
                word, vec = line.split(' ', 1)
                if word in dictionary['word']:
                    word_vectors[dictionary['word'][word]] = np.fromstring(
                        vec, dtype=np.float32, sep=' ')
                    found += 1
            print(found)
    else:
        hred = torch.load('attn.{}.pt'.format(config.data)).cuda()
        hred.flatten_parameters()
    hred.data = config.data
    params = filter(lambda x: x.requires_grad, hred.parameters())
    optimizer = torch.optim.Adam(params, lr=0.001)

    best_loss = np.inf
    last_dev_loss = np.inf
    power = 2
    for it in range(18, 30):
        ave_loss = 0
        last_time = time.time()
        params = filter(lambda x: x.requires_grad, hred.parameters())
        optimizer = torch.optim.SGD(params, lr=.1 * 0.95**it, momentum=0.9)
        hred.train()
        for _, (src_seqs, src_lengths, trg_seqs, trg_lengths, psn_seqs,
                psn_lengths, indices, pos_seqs) in enumerate(train_loader):
            if _ % config.print_every_n_batches == 1:
                print(ave_loss / min(_, config.print_every_n_batches),
                      time.time() - last_time)
                ave_loss = 0
            loss, noun_loss, count = hred.loss(src_seqs, src_lengths, indices,
                                               trg_seqs, trg_lengths, psn_seqs,
                                               psn_lengths, pos_seqs, noun_id,
                                               1)
            ave_loss += loss.data[0]
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm(params, .1)
            optimizer.step()

        # eval on dev
        dev_loss = 0
        dev_nn_loss = 0
        count = 0
        nn_total_count = 0
        hred.eval()
        for i, (src_seqs, src_lengths, trg_seqs, trg_lengths, psn_seqs,
                psn_lengths, indices, pos_seqs) in enumerate(dev_loader):
            loss, noun_loss, nn_count = hred.loss(src_seqs, src_lengths,
                                                  indices, trg_seqs,
                                                  trg_lengths, psn_seqs,
                                                  psn_lengths, pos_seqs,
                                                  noun_id, 1)
            dev_loss += loss.data[0]
            dev_nn_loss += noun_loss.data[0] * nn_count.data[0]
            nn_total_count += nn_count.data[0]
            count += 1
        if dev_loss < best_loss:
            best_loss = dev_loss
            torch.save(hred, 'attn.{}.pt'.format(config.data))
        if dev_loss > last_dev_loss:
            power += 1
            hred = torch.load('attn.{}.pt'.format(config.data))
        last_dev_loss = dev_loss
        print('dev loss: {} {}'.format(dev_loss / count,
                                       dev_nn_loss / nn_total_count))
示例#9
0
文件: train.py 项目: wenchaodudu/hred
def main(config):
    print(config)

    dictionary = json.load(
        open('./{}.parse.dictionary.json'.format(config.data)))
    vocab_size = len(dictionary) + 1
    word_embedding_dim = 300
    print("Vocabulary size:", len(dictionary))

    word_vectors = np.random.uniform(low=-0.5,
                                     high=0.5,
                                     size=(vocab_size, word_embedding_dim))
    found = 0
    print("Loading word vecotrs.")
    if config.glove:
        word2vec_file = open('./glove.6B.300d.txt')
        next(word2vec_file)
        for line in word2vec_file:
            word, vec = line.split(' ', 1)
            if word in dictionary:
                word_vectors[dictionary[word]] = np.fromstring(
                    vec, dtype=np.float32, sep=' ')
                found += 1
    else:
        word2vec = Word2Vec.load('./word2vec.vector')
        for word in word2vec.wv.vocab:
            if word in dictionary:
                word_vectors[dictionary[word]] = word2vec.wv[word]
                found += 1
    print(found)

    hidden_size = 512
    cenc_input_size = hidden_size * 2

    start_batch = 50000
    start_kl_weight = config.start_kl_weight

    if config.vhred:
        train_loader = get_hr_loader('./data/{}.train.src'.format(config.data),
                                     './data/{}.train.trg'.format(config.data),
                                     dictionary, 40)
        dev_loader = get_hr_loader('./data/{}.valid.src'.format(config.data),
                                   './data/{}.valid.trg'.format(config.data),
                                   dictionary, 200)
        if not config.use_saved:
            hred = VHRED(dictionary, vocab_size, word_embedding_dim,
                         word_vectors, hidden_size)
            print('load hred param')
            _hred = torch.load('hred.pt')
            hred.u_encoder = _hred.u_encoder
            hred.c_encoder = _hred.c_encoder
            hred.decoder.rnn = _hred.decoder.rnn
            hred.decoder.output_transform = _hred.decoder.output_transform
            hred.decoder.context_hidden_transform.weight.data[:,0:hidden_size] = \
                _hred.decoder.context_hidden_transform.weight.data
            hred.flatten_parameters()
        else:
            hred = torch.load('vhred.pt')
            hred.flatten_parameters()
    elif config.attn:
        if config.data == 'persona':
            train_loader = get_ctc_loader(
                './data/{}.train.src'.format(config.data),
                './data/{}.train.trg'.format(config.data),
                './data/{}.train.psn'.format(config.data), dictionary, 40)
            dev_loader = get_ctc_loader(
                './data/{}.valid.src'.format(config.data),
                './data/{}.valid.trg'.format(config.data),
                './data/{}.valid.psn'.format(config.data), dictionary, 200)
            if not config.use_saved:
                hred = PersonaAttnDecoder(word_embedding_dim, hidden_size,
                                          vocab_size, word_vectors,
                                          dictionary).cuda()
            else:
                hred = torch.load('attn.pt')
                hred.flatten_parameters()
        else:
            train_loader = get_loader(
                './data/{}.train.src'.format(config.data),
                './data/{}.train.trg'.format(config.data), dictionary, 40)
            dev_loader = get_loader('./data/{}.valid.src'.format(config.data),
                                    './data/{}.valid.trg'.format(config.data),
                                    dictionary, 200)
            if not config.use_saved:
                hred = AttnDecoder(word_embedding_dim, hidden_size, vocab_size,
                                   word_vectors, dictionary).cuda()
            else:
                hred = torch.load('attn.pt')
                hred.flatten_parameters()
    else:
        train_loader = get_hr_loader('./data/{}.train.src'.format(config.data),
                                     './data/{}.train.trg'.format(config.data),
                                     dictionary, 40)
        dev_loader = get_hr_loader('./data/{}.valid.src'.format(config.data),
                                   './data/{}.valid.trg'.format(config.data),
                                   dictionary, 200)
        if not config.use_saved:
            disc = torch.load('discriminator.pt')
            hred = HRED(dictionary, vocab_size, word_embedding_dim,
                        word_vectors, hidden_size, disc)
        else:
            hred = torch.load('hred.pt')
            hred.flatten_parameters()
    if hred.discriminator is not None:
        hred.discriminator.u_encoder.rnn.flatten_parameters()
    params = filter(lambda x: x.requires_grad, hred.parameters())
    #optimizer = torch.optim.SGD(params, lr=config.lr, momentum=0.99)
    #q_optimizer = torch.optim.SGD(hred.q_network.parameters(), lr=0.01)
    optimizer = torch.optim.Adam(params, lr=0.001)

    best_loss = np.inf
    for it in range(0, 20):
        ave_loss = 0
        last_time = time.time()
        for _, batch in enumerate(train_loader):
            if config.attn:
                if config.data == 'persona':
                    src_seqs, src_lengths, trg_seqs, trg_lengths, ctc_seqs, ctc_lengths, indices = batch
                else:
                    src_seqs, src_lengths, trg_seqs, trg_lengths, indices = batch
            else:
                src_seqs, src_lengths, indices, ctc_seqs, ctc_lengths, ctc_indices, trg_seqs, trg_lengths, trg_indices, turn_len = batch
            if _ % config.print_every_n_batches == 1:
                print(ave_loss / min(_, config.print_every_n_batches),
                      time.time() - last_time)
                ave_loss = 0
            if config.vhred and config.kl_weight and it * len(
                    train_loader) + _ <= start_batch:
                kl_weight = start_kl_weight + (1 - start_kl_weight) * float(
                    it * len(train_loader) + _) / start_batch
                # kl_weight = 0.5
                loss = hred.loss(src_seqs, src_lengths, indices, trg_seqs,
                                 trg_lengths, ctc_lengths, kl_weight)
            elif config.attn:
                if config.data == 'persona':
                    loss = hred.loss(src_seqs, src_lengths, indices, trg_seqs,
                                     trg_lengths, ctc_seqs, ctc_lengths, 1.0)
                else:
                    loss = hred.loss(src_seqs, src_lengths, indices, trg_seqs,
                                     trg_lengths, 1.0)
            else:
                loss = hred.loss(src_seqs, src_lengths, indices, ctc_seqs,
                                 ctc_lengths, ctc_indices, trg_seqs,
                                 trg_lengths, trg_indices, turn_len, 0.2)
                #loss = hred.augmented_loss(src_seqs, src_lengths, indices, ctc_seqs, ctc_lengths, ctc_indices, trg_seqs, trg_lengths, trg_indices, turn_len, 0.1)
            ave_loss += loss.data[0]
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm(params, 0.1)
            optimizer.step()

        # eval on dev
        dev_loss = 0
        count = 0
        for _, batch in enumerate(dev_loader):
            if config.attn:
                if config.data == 'persona':
                    src_seqs, src_lengths, trg_seqs, trg_lengths, ctc_seqs, ctc_lengths, indices = batch
                else:
                    src_seqs, src_lengths, trg_seqs, trg_lengths, indices = batch
            else:
                src_seqs, src_lengths, indices, ctc_seqs, ctc_lengths, ctc_indices, trg_seqs, trg_lengths, trg_indices, turn_len = batch
            if config.attn:
                if config.data == 'persona':
                    dev_loss += hred.evaluate(src_seqs, src_lengths, indices,
                                              trg_seqs, trg_lengths, ctc_seqs,
                                              ctc_lengths).data[0]
                else:
                    dev_loss += hred.evaluate(src_seqs, src_lengths, indices,
                                              trg_seqs, trg_lengths).data[0]
            else:
                dev_loss += hred.semantic_loss(src_seqs, src_lengths, indices,
                                               ctc_seqs, ctc_lengths,
                                               ctc_indices, trg_seqs,
                                               trg_lengths, trg_indices,
                                               turn_len).data[0]
            count += 1
        print('dev loss: {}'.format(dev_loss / count))
        if dev_loss < best_loss:
            if config.vhred:
                torch.save(hred, 'vhred.pt')
            elif config.attn:
                torch.save(hred, 'attn.{}.pt'.format(config.data))
            else:
                torch.save(hred, 'hred.pt')
            best_loss = dev_loss

    for it in range(0, 0):
        ave_loss = 0
        last_time = time.time()
        for _, (src_seqs, src_lengths, indices, ctc_seqs, ctc_lengths,
                ctc_indices, trg_seqs, trg_lengths, trg_indices,
                turn_len) in enumerate(train_loader):
            loss = hred.train_decoder(src_seqs, src_lengths, indices, turn_len,
                                      30, 5, 5)
            ave_loss += loss.data[0]
            q_optimizer.zero_grad()
            loss.backward()
            q_optimizer.step()
示例#10
0
attn_model = 'dot'
#attn_model = 'general'
#attn_model = 'concat'
hidden_size = 500
encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.1
batch_size = 64

voc, pairs = loadData('./datasets/conversations.csv')
n_tokens = voc.size

embedding = nn.Embedding(n_tokens, hidden_size)
encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
#attn = Attn(attn_model, hidden_size)
decoder = AttnDecoder(attn_model, embedding, hidden_size, n_tokens,
                      decoder_n_layers, dropout)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
encoder = encoder.to(device)
decoder = decoder.to(device)

# Configure training/optimization
clip = 50.0
teacher_forcing_ratio = 1.0
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_iteration = 4000
print_every = 1
save_every = 500
save_dir = "./checkpoints"
corpus_name = "movie conversations"
示例#11
0
class Trainer:
    def __init__(self, time_step, split, lr):
        self.dataset = Dataset(T=time_step,
                               split_ratio=split,
                               binary_file=config.BINARY_DATASET)
        self.encoder = AttnEncoder(input_size=self.dataset.get_num_features(),
                                   hidden_size=config.ENCODER_HIDDEN_SIZE,
                                   time_step=time_step)
        self.decoder = AttnDecoder(code_hidden_size=config.ENCODER_HIDDEN_SIZE,
                                   hidden_size=config.DECODER_HIDDEN_SIZE,
                                   time_step=time_step)
        self.model = Model(self.encoder, self.decoder)
        if torch.cuda.is_available():
            self.encoder = self.encoder.cuda()
            self.decoder = self.decoder.cuda()
            self.model = self.model.cuda()

        self.model_optim = optim.Adam(self.model.parameters(), lr)
        # self.encoder_optim = optim.Adam(self.encoder.parameters(), lr)
        # self.decoder_optim = optim.Adam(self.decoder.parameters(), lr)
        self.loss_func = nn.MSELoss()
        self.train_size, self.test_size = self.dataset.get_size()

    def train_minibatch(self, num_epochs, batch_size, interval):
        x_train, y_train, y_seq_train = self.dataset.get_train_set()
        for epoch in range(num_epochs):
            max_acc = 0
            i = 0
            loss_sum = 0
            while (i < self.train_size):
                self.model_optim.zero_grad()
                batch_end = i + batch_size
                if (batch_end >= self.train_size):
                    break
                var_x = self.to_variable(x_train[i:batch_end])
                var_y = self.to_variable(y_train[i:batch_end])
                var_y_seq = self.to_variable(y_seq_train[i:batch_end])
                if var_x.dim() == 2:
                    var_x = var_x.unsqueeze(2)
                y_res, y_var = self.model(var_x, var_y_seq)
                loss = self.loss_func(y_res, var_y)
                loss.backward()
                self.model_optim.step()
                print('[%d], loss is %f' % (epoch, 10000 * loss.data[0]))
                loss_sum += loss.data.item()
                i = batch_end
            print('epoch [%d] finished, the average loss is %f' %
                  (epoch, loss_sum))

            x_dev, y_dev, y_seq_dev = self.dataset.get_dev_set()
            y_pred_dev = self.predict(x_dev, y_dev, y_seq_dev, batch_size)
            acc = direction_correctness(y_pred_test=y_pred_dev, y_test=y_dev)
            if (acc > max_acc):
                max_acc = acc
            elif acc < max_acc * 0.9:  #prevent overfit
                break
            if (epoch + 1) % (interval) == 0 or epoch + 1 == num_epochs:
                torch.save(self.encoder.state_dict(),
                           'models/encoder' + str(epoch + 1) + '.model')
                torch.save(self.decoder.state_dict(),
                           'models/decoder' + str(epoch + 1) + '.model')

    def test(self, num_epochs, batch_size):
        x_test, y_test, y_seq_test = self.dataset.get_test_set()
        y_pred_test = self.predict(x_test, y_seq_test, batch_size)
        f = open('y_test', 'wb')
        pickle.dump(y_test, f)
        f.close()
        f = open('y_pred_test', 'wb')
        pickle.dump(y_pred_test, f)
        f.close()

        # plt.figure()
        # plt.ylim(0,1)
        # # plt.plot(range(1, 1 + self.train_size), y_train, label='train')
        # plt.plot(range(1 + self.train_size, 1 + self.train_size + self.test_size//50), y_test[:self.test_size//50], label='ground truth')
        # # plt.plot(range(1, 1 + self.train_size), y_pred_train, label.='predicted train')
        # plt.plot(range(1 + self.train_size, 1 + self.train_size + self.test_size//50), y_pred_test[:self.test_size//50], label='predicted test')
        # plt.savefig('res-' + str(num_epochs) + '.png')

    def predict(self, x, y_seq, batch_size):
        y_pred = np.zeros(x.shape[0])
        i = 0
        while (i < x.shape[0]):
            batch_end = i + batch_size
            if batch_end > x.shape[0]:
                break
                #batch_end = x.shape[0]
            var_x_input = self.to_variable(x[i:batch_end])
            var_y_input = self.to_variable(y_seq[i:batch_end])
            if var_x_input.dim() == 2:
                var_x_input = var_x_input.unsqueeze(2)
            # code = self.encoder(var_x_input)
            # y_res = self.decoder(code, var_y_input)
            y_res, _ = self.model(var_x_input, var_y_input)
            for j in range(i, batch_end):
                y_pred[j] = y_res[j - i]
            i = batch_end
        return y_pred

    def single_predict(self, x, y_seq):
        var_x_input = self.to_variable(x)
        var_y_input = self.to_variable(y_seq)
        if var_x_input.dim() == 2:
            var_x_input = var_x_input.unsqueeze(2)
        y_res, _ = self.model(var_x_input, var_y_input)
        return y_res

    def load_model(self, encoder_path, decoder_path):
        self.encoder.load_state_dict(
            torch.load(encoder_path,
                       map_location=lambda storage, loc: storage))
        self.decoder.load_state_dict(
            torch.load(decoder_path,
                       map_location=lambda storage, loc: storage))
        self.model = Model(self.encoder, self.decoder)

    def to_variable(self, x):
        if torch.cuda.is_available():
            return Variable(torch.from_numpy(x).float()).cuda()
        else:
            return Variable(torch.from_numpy(x).float())
示例#12
0
class Agent:
    def __init__(self, time_step, split, lr):
        self.dataset = Dataset(T=time_step,
                               split_ratio=split,
                               binary_file=config.BINARY_DATASET)
        self.policy_net_encoder = AttnEncoder(
            input_size=self.dataset.get_num_features(),
            hidden_size=config.ENCODER_HIDDEN_SIZE,
            time_step=time_step)
        self.policy_net_decoder = AttnDecoder(
            code_hidden_size=config.ENCODER_HIDDEN_SIZE,
            hidden_size=config.DECODER_HIDDEN_SIZE,
            time_step=time_step)
        self.policy_net = DQN(self.policy_net_encoder, self.policy_net_decoder)
        self.target_net_encoder = AttnEncoder(
            input_size=self.dataset.get_num_features(),
            hidden_size=config.ENCODER_HIDDEN_SIZE,
            time_step=time_step)
        self.target_net_decoder = AttnDecoder(
            code_hidden_size=config.ENCODER_HIDDEN_SIZE,
            hidden_size=config.DECODER_HIDDEN_SIZE,
            time_step=time_step)
        self.target_net = DQN(self.target_net_encoder, self.target_net_decoder)
        if torch.cuda.is_available():
            self.policy_net_encoder = self.policy_net_encoder.cuda()
            self.policy_net_decoder = self.policy_net_decoder.cuda()
            self.target_net_encoder = self.target_net_encoder.cuda()
            self.target_net_decoder = self.target_net_decoder.cuda()
            self.policy_net = self.policy_net.cuda()
            self.target_net = self.target_net.cuda()
        self.memory = ReplayMemory(config.MEMORY_CAPACITY)
        self.optimizer = optim.RMSprop(self.policy_net.parameters(), lr=lr)

    def select_action(self, state, test=False):
        global steps_done
        sample = random.random()
        eps_threshold = config.EPS_END + (
            config.EPS_START - config.EPS_END) * math.exp(
                -1. * steps_done / config.EPS_DECAY)
        steps_done += 1
        if sample > eps_threshold or test == True:
            with torch.no_grad():
                return self.policy_net(state).max(1)[1].view(1, 1)
        else:
            if torch.cuda.is_available():
                return torch.tensor([[random.randint(3)]],
                                    dtype=torch.long).cuda()
            else:
                return torch.tensor([[random.randint(3)]], dtype=torch.long)

    def optimize_model(self):
        if len(self.memory) < config.BATCH_SIZE:
            return
        transitions = self.memory.sample(config.BATCH_SIZE)
        batch = Transition(*zip(*transitions))
        state_batch = tuple([
            torch.cat(
                tuple([batch.state[i][j] for i in range(config.BATCH_SIZE)]))
            for j in range(3)
        ])
        action_batch = torch.cat(batch.action)
        reward_batch = torch.cat(batch.reward)
        next_state_batch = tuple([
            torch.cat(
                tuple(
                    [batch.next_state[i][j]
                     for i in range(config.BATCH_SIZE)])) for j in range(3)
        ])
        state_action_values = self.policy_net(state_batch).gather(
            1, action_batch)
        next_state_values = self.target_net(next_state_batch).max(
            1)[0].detach()
        expected_state_action_values = (next_state_values *
                                        config.GAMMA) + reward_batch
        loss = F.smooth_l1_loss(state_action_values,
                                expected_state_action_values.unsqueeze(1))
        self.optimizer.zero_grad()
        loss.backward()
        for param in self.policy_net.parameters():
            if param.grad is not None:
                param.grad.data.clamp_(-1, 1)
        self.optimizer.step()

    def load_model(self, encoder_path=None, decoder_path=None, DQN_path=None):
        if (DQN_path != None):
            self.policy_net.load_state_dict(
                torch.load(DQN_path,
                           map_location=lambda storage, loc: storage))
            self.target_net.load_state_dict(self.policy_net.state_dict())
        else:
            self.policy_net_encoder.load_state_dict(
                torch.load(encoder_path,
                           map_location=lambda storage, loc: storage))
            self.policy_net_decoder.load_state_dict(
                torch.load(decoder_path,
                           map_location=lambda storage, loc: storage))
            self.policy_net = DQN(self.policy_net_encoder,
                                  self.policy_net_decoder)
            self.target_net.load_state_dict(self.policy_net.state_dict())

    def train(self, num_epochs, interval):
        env = Environment(np.array([0.5, 0.5]))
        episode = 0
        for epoch in range(num_epochs):
            env.reset()
            state = (env.x[env.current_step].unsqueeze(0),
                     env.y_seq[env.current_step].unsqueeze(0),
                     env.position.unsqueeze(0))
            while (1):
                action = self.select_action(state)
                _, next_state, reward = env.step(action.item())
                if (next_state == None):
                    break
                self.memory.push(state, action, next_state, reward)
                state = next_state
                self.optimize_model()
                episode += 1
                if (episode % config.TARGET_UPDATE == 0):
                    self.target_net.load_state_dict(
                        self.policy_net.state_dict())
                print(env.wealth, action, env.position)
            if (epoch + 1) % (interval) == 0 or epoch + 1 == num_epochs:
                torch.save(self.policy_net.state_dict(),
                           'models/DQN' + str(epoch + 1) + '.model')

    def test(self, num_epochs):
        env = Environment(test=True)
        state = (env.x[env.current_step], env.y_seq[env.current_step],
                 env.position)
        while (1):
            action = self.select_action(state, test=True)
            _, next_state, _ = env.step(action.item())
            if (next_state == None):
                break
            state = next_state
            print(env.wealth)
示例#13
0
文件: seq.py 项目: victai/SDML
def main():
    epoch = 1000
    batch_size = 256
    hidden_dim = 128

    encoder = Encoder(num_words,
                      hidden_dim,
                      n_layers=args.n_layers,
                      bidirectional=args.bidirectional).to(device)
    if args.attn:
        decoder = AttnDecoder(hidden_dim,
                              num_words,
                              max_seqlen,
                              n_layers=args.n_layers).to(device)
    else:
        decoder = Decoder(hidden_dim, num_words,
                          n_layers=args.n_layers).to(device)

    if args.train:
        weight = torch.ones(num_words)
        weight[word2idx[PAD_TOKEN]] = 0
        encoder = encoder.to(device)
        decoder = decoder.to(device)
        weight = weight.to(device)
        encoder_optimizer = Adam(encoder.parameters(), lr=0.001)
        decoder_optimizer = Adam(decoder.parameters(), lr=0.001)
        criterion = nn.CrossEntropyLoss(ignore_index=word2idx[PAD_TOKEN])

        np.random.seed(1124)
        order = np.arange(len(train_X))

        best_loss = 1e10
        best_percentage = 0
        best_percentage_epoch = 0
        best_epoch = 0
        start_epoch = 0
        if args.resume:
            start_epoch, best_loss = load_checkpoint(args.model_path, encoder,
                                                     encoder_optimizer,
                                                     decoder,
                                                     decoder_optimizer)

        for e in range(start_epoch, start_epoch + epoch):
            if e - best_percentage_epoch > 2: break

            np.random.shuffle(order)
            shuffled_train_X = train_X[order]
            shuffled_train_Y = train_Y[order]
            train_loss = 0
            valid_loss = 0

            for b in tqdm(range(int(len(order) // batch_size))):
                batch_x = torch.LongTensor(
                    shuffled_train_X[b * batch_size:(b + 1) *
                                     batch_size].tolist()).t()
                batch_y = torch.LongTensor(
                    shuffled_train_Y[b * batch_size:(b + 1) *
                                     batch_size].tolist()).t()

                batch_x, batch_y = batch_x.to(device), batch_y.to(device)

                train_loss += train(batch_x, batch_y, encoder, decoder,
                                    encoder_optimizer, decoder_optimizer,
                                    criterion)

            train_loss /= b

            all_control_cnt, all_hit_cnt = [], []
            for b in range(len(valid_X) // batch_size):
                batch_x = torch.LongTensor(valid_X[b * batch_size:(b + 1) *
                                                   batch_size].tolist()).t()
                batch_y = torch.LongTensor(valid_Y[b * batch_size:(b + 1) *
                                                   batch_size].tolist()).t()
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)

                val_loss, control_cnt, hit_cnt = valid(batch_x, batch_y,
                                                       encoder, decoder,
                                                       encoder_optimizer,
                                                       decoder_optimizer,
                                                       criterion)
                valid_loss += val_loss
                all_control_cnt.extend(control_cnt)
                all_hit_cnt.extend(hit_cnt)
            valid_loss /= b
            all_control_cnt = np.array(all_control_cnt)
            all_hit_cnt = np.array(all_hit_cnt)
            nonzero = all_control_cnt != 0
            all_control_cnt = all_control_cnt[nonzero]
            all_hit_cnt = all_hit_cnt[nonzero]
            percentage = np.mean(all_hit_cnt / all_control_cnt)
            logger.info(
                "epoch {}, train_loss {:.4f}, valid_loss {:.4f}, best_epoch {}, best_loss {:.4f}, control_cnt {}, hit_cnt {}, percentage {:.4f}"
                .format(e, train_loss, valid_loss, best_epoch, best_loss,
                        np.sum(all_control_cnt), np.sum(all_hit_cnt),
                        percentage))

            if percentage > best_percentage:
                best_percentage = percentage
                best_percentage_epoch = e
                torch.save(
                    {
                        'encoder_state_dict':
                        encoder.state_dict(),
                        'encoder_optimizer_state_dict':
                        encoder_optimizer.state_dict(),
                        'decoder_state_dict':
                        decoder.state_dict(),
                        'decoder_optimizer_state_dict':
                        decoder_optimizer.state_dict(),
                        'epoch':
                        e,
                        'loss':
                        valid_loss,
                        'percentage':
                        best_percentage,
                    }, args.model_path)

            if valid_loss < best_loss:
                best_loss = valid_loss
                best_epoch = e
                torch.save(
                    {
                        'encoder_state_dict':
                        encoder.state_dict(),
                        'encoder_optimizer_state_dict':
                        encoder_optimizer.state_dict(),
                        'decoder_state_dict':
                        decoder.state_dict(),
                        'decoder_optimizer_state_dict':
                        decoder_optimizer.state_dict(),
                        'epoch':
                        e,
                        'loss':
                        valid_loss
                    }, args.model_path)

        batch_x = torch.LongTensor(valid_X[:batch_size].tolist()).t()
        batch_y = torch.LongTensor(valid_Y[:batch_size].tolist()).t()
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        input_chinese, output_chinese = predict(batch_x, batch_y, encoder,
                                                decoder, encoder_optimizer,
                                                decoder_optimizer, criterion,
                                                20)

        logger.info('*** Results ***')
        logger.info('Best Hit Accuracy: {}'.format(best_percentage))
        logger.info(
            'Best Hit Accuracy Epoch: {}'.format(best_percentage_epoch))
        for inp, out in zip(input_chinese, output_chinese):
            logger.info('{}\t||\t{}'.format(inp, out))
        logger.info(encoder)
        logger.info(decoder)
        logger.info('\n\n' + '=' * 100 + '\n\n')

    else:
        print(encoder)
        print(decoder)
示例#14
0
class Trainer:
    def __init__(self, driving, target, time_step, split, lr, regression=True):
        self.dataset = Dataset(T=time_step,
                               split_ratio=split,
                               binary_file=config.BINARY_DATASET_HEADER)
        self.encoder = AttnEncoder(input_size=self.dataset.get_num_features(),
                                   hidden_size=config.ENCODER_HIDDEN_SIZE,
                                   time_step=time_step)
        self.decoder = AttnDecoder(code_hidden_size=config.ENCODER_HIDDEN_SIZE,
                                   hidden_size=config.DECODER_HIDDEN_SIZE,
                                   time_step=time_step)
        self.model = Model(self.encoder, self.decoder)
        if torch.cuda.is_available():
            self.encoder = self.encoder.cuda()
            self.decoder = self.decoder.cuda()
            self.model = self.model.cuda()

        self.model_optim = optim.Adam(self.model.parameters(), lr)
        # self.encoder_optim = optim.Adam(self.encoder.parameters(), lr)
        # self.decoder_optim = optim.Adam(self.decoder.parameters(), lr)

        if (regression):
            # regression model
            self.loss_func = nn.MSELoss()
        else:
            # classification model
            weight = torch.Tensor([1, 1])
            # weight = weight.cuda()
            self.loss_func = nn.CrossEntropyLoss(reduce=False,
                                                 size_average=False,
                                                 weight=weight)

        self.train_size, self.test_size, self.total_size = self.dataset.get_size(
        )
        print("train_size = %d (in terms of number of binary files)" %
              self.train_size)
        print("test_size = %d (in terms of number of binary files)" %
              self.test_size)

    def train_minibatch(self,
                        num_epochs,
                        batch_size,
                        interval,
                        cout,
                        regression=True):
        #x_train, y_train, y_seq_train = self.dataset.get_train_set()
        already_trained = 100
        best_model = -1
        best_correctness = 0
        for epoch in range(num_epochs):
            for file_num in range(self.train_size):
                x_train, y_train, y_seq_train = self.dataset.get_train_set(
                    file_num)
                i = 0
                loss_sum = 0
                while (i < config.MAX_SINGLE_FILE_LINE_NUM):
                    # self.encoder_optim.zero_grad()
                    # self.decoder_optim.zero_grad()
                    self.model_optim.zero_grad()
                    batch_end = i + batch_size
                    if (config.SPLIT_RATIO != 1.0
                            and file_num == self.train_size - 1 and batch_end >
                        (config.MAX_SINGLE_FILE_LINE_NUM -
                         config.VALIDATION_LINE_NUM)):
                        break
                    if (batch_end > config.MAX_SINGLE_FILE_LINE_NUM):
                        break
                        #batch_end = self.train_size
                    var_x = self.to_variable(x_train[i:batch_end])
                    var_y = Variable(
                        torch.from_numpy(y_train[i:batch_end]).float())
                    var_y_seq = self.to_variable(y_seq_train[i:batch_end])
                    #making sure the driving series has 3 dimensions
                    if var_x.dim() == 2:
                        var_x = var_x.unsqueeze(2)
                    # code = self.encoder(var_x)
                    # y_res = self.decoder(code, var_y_seq)
                    y_res, y_var = self.model(var_x, var_y_seq)
                    # m = torch.distributions.Normal(loc = y_loc,scale=y_var)
                    # loss = torch.sum(-m.log_prob(var_y.unsqueeze(0)))
                    if (regression):
                        # regression model
                        loss = self.loss_func(y_res, var_y)
                    else:
                        # classiication model
                        var_y = var_y.long().cuda()
                        print("y_res.requires_grad: ")
                        print(y_res.requires_grad)
                        print("y_res.type()")
                        print(y_res.type())
                        print("y_res.shape")
                        print(y_res.shape)

                        print("var_y.requires_grad: ")
                        print(var_y.requires_grad)
                        print("var_y.type()")
                        print(var_y.type())
                        print("var_y.shape")
                        print(var_y.shape)
                        loss = self.loss_func(y_res, var_y)

                    loss.backward()
                    # self.encoder_optim.step()
                    # self.decoder_optim.step()
                    self.model_optim.step()
                    if cont:
                        print('epoch[%d], file[%d], batch[%d], loss is %f' %
                              (already_trained + epoch + 1, file_num,
                               batch_end / batch_size, 10000 * loss.data[0]))
                    else:
                        print('epoch[%d], file[%d], batch[%d], loss is %f' %
                              (epoch + 1, file_num, batch_end / batch_size,
                               10000 * loss.data[0]))
                    loss_sum += loss.data.item()
                    i = batch_end
            if cont:
                print('epoch [%d] finished, the average loss is %f' %
                      (already_trained + epoch + 1, loss_sum))
                if (epoch + 1) % (interval) == 0 or epoch + 1 == (
                        num_epochs + already_trained):
                    torch.save(
                        self.encoder.state_dict(),
                        'models/30min/encoder_EURUSD_30min_multifile_with_vali'
                        + str(already_trained + epoch + 1) + '.model')
                    torch.save(
                        self.decoder.state_dict(),
                        'models/30min/decoder_EURUSD_30min_multifile_with_vali'
                        + str(already_trained + epoch + 1) + '.model')
            else:
                print('epoch [%d] finished, the average loss is %f' %
                      (epoch + 1, loss_sum))
                if (epoch + 1) % (interval) == 0 or epoch + 1 == num_epochs:
                    torch.save(
                        self.encoder.state_dict(),
                        'models/EURUSD/encoder_EURUSD_30min_multifile_with_vali_without_normalization_final_test_new_'
                        + str(epoch + 1) + '.model')
                    torch.save(
                        self.decoder.state_dict(),
                        'models/EURUSD/decoder_EURUSD_30min_multifile_with_vali_without_normalization_final_test_new_'
                        + str(epoch + 1) + '.model')
            x_vali, y_vali, y_seq_vali = self.dataset.get_validation_set()
            y_pred_validation = self.predict(x_vali, y_vali, y_seq_vali,
                                             batch_size)
            seq_len = len(y_vali)
            gt_direction = (y_vali[1:] - y_vali[:seq_len - 1]) > 0
            pred_direction = (y_pred_validation[1:] - y_vali[:seq_len - 1]) > 0
            correct = np.sum(gt_direction == pred_direction)
            print('number of correct in validation set = %d' % correct)
            print('length of validation set = %d' % seq_len)
            correct = correct / (seq_len - 1)
            if (correct > best_correctness):
                best_model = epoch + 1
                best_correctness = correct
            print(
                'epoch[%d] finished, current correctness is %f, best model so far is model %d with correctness %f'
                % (epoch + 1, correct, best_model, best_correctness))

    def test(self, num_epochs, batch_size):
        start = self.train_size
        end = self.total_size
        for index in range(start, end, 1):
            #print('testing on part %d' % index)

            #x_train, y_train, y_seq_train = self.dataset.get_train_set(index)
            x_test, y_test, y_seq_test = self.dataset.get_test_set(index)
            # y_pred_train = self.predict(x_train, y_train, y_seq_train, batch_size)
            # f = open('y_train','wb')
            # pickle.dump(y_train,f)
            # f.close()
            # f = open('y_pred_train','wb')
            # pickle.dump(y_pred_train,f)
            # f.close()

            #

            y_pred_test = self.predict(x_test, y_test, y_seq_test, batch_size)
            #print(y_test)
            #print(y_pred_test)
            f = open(
                'y_test_attention_weight_observation_epoch_' +
                str(num_epochs) + '_part' + str(index - start + 1), 'wb')
            pickle.dump(y_test, f)
            f.close()
            f = open(
                'y_pred_test_attention_weight_observation_epoch_' +
                str(num_epochs) + '_part' + str(index - start + 1), 'wb')
            pickle.dump(y_pred_test, f)
            f.close()

            plt.figure()
            # plt.plot(range(1, 1 + self.train_size), y_train, label='train')
            # plt.plot(range(1 + self.train_size, 1 + self.train_size + self.test_size//50), y_test[:self.test_size//50], label='ground truth')
            plt.plot(range(
                1 + index * config.MAX_SINGLE_FILE_LINE_NUM, 1 +
                index * config.MAX_SINGLE_FILE_LINE_NUM + len(y_test) // 2),
                     y_test[:len(y_test) // 2],
                     label='ground truth')
            # plt.plot(range(1, 1 + self.train_size), y_pred_train, label.='predicted train')
            # plt.plot(range(1, 1 + self.train_size), y_pred_train, label.='predicted train')
            # plt.plot(range(1 + self.train_size, 1 + self.train_size + self.test_size//50), y_pred_test[:self.test_size//50], label='predicted test')
            plt.plot(range(
                1 + index * config.MAX_SINGLE_FILE_LINE_NUM, 1 +
                index * config.MAX_SINGLE_FILE_LINE_NUM + len(y_test) // 2),
                     y_pred_test[:len(y_test) // 2],
                     label='predicted test')
            plt.legend()
            plt.savefig('res-attention_weight_observation_epoch' +
                        str(num_epochs) + '_part_' + str(index - start + 1) +
                        '.png')

    def predict(self, x, y, y_seq, batch_size):
        y_pred = np.zeros(x.shape[0])
        i = 0
        while (i < x.shape[0]):
            #print('testing on batch %d' % (i / batch_size))
            batch_end = i + batch_size
            if batch_end > x.shape[0]:
                break
                #batch_end = x.shape[0]
            var_x_input = self.to_variable(x[i:batch_end])
            var_y_input = self.to_variable(y_seq[i:batch_end])
            if var_x_input.dim() == 2:
                var_x_input = var_x_input.unsqueeze(2)
            # code = self.encoder(var_x_input)
            # y_res = self.decoder(code, var_y_input)
            y_res, _ = self.model(var_x_input, var_y_input)
            for j in range(i, batch_end):
                y_pred[j] = y_res[j - i]
            i = batch_end
        return y_pred

    def load_model(self, encoder_path, decoder_path):
        self.encoder.load_state_dict(
            torch.load(encoder_path,
                       map_location=lambda storage, loc: storage))
        self.decoder.load_state_dict(
            torch.load(decoder_path,
                       map_location=lambda storage, loc: storage))
        self.model = Model(self.encoder, self.decoder)

    def to_variable(self, x):
        if torch.cuda.is_available():
            return Variable(torch.from_numpy(x).float()).cuda()
        else:
            return Variable(torch.from_numpy(x).float())
示例#15
0
hidden_size = 500
encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.1
batch_size = 64
loadFilename = None
checkpoint_iter = 4000

#load model

print("Load checkpint...")
checkpoint = torch.load(model_path)
encoder_sd = checkpoint['en']
decoder_sd = checkpoint['de']
embedding_sd = checkpoint['embedding']
voc = Voc()
voc.__dict__ = checkpoint['voc_dict']

size = voc.size

embedding = nn.Embedding(size, hidden_size)
embedding.load_state_dict(embedding_sd)
encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
encoder.load_state_dict(encoder_sd)
decoder = AttnDecoder(attn_model, embedding, hidden_size, size,
                      decoder_n_layers, dropout)
decoder.load_state_dict(decoder_sd)

searcher = GreedySearchDecoder(encoder, decoder, voc)
evaluateInput(searcher)