Exemplo n.º 1
0
    def infer(self, model):
        #        self=eval

        model.eval()
        if self.count >= len(self.file_list):
            self.count = 0
        if self.iter % (32 / self.batch_size) == 0:
            self.file = self.file_list[self.count]
            path = os.path.join(self.dir, self.file)
            self.input, self.target, self.idx2oov = util.get_id(
                path, input_len=self.max_len, target_len=self.summ_len)
            self.ind = 0
            self.count += 1
            with open('stats/e_count.txt', 'a') as handle:
                handle.write(str(self.count) + '\n')
        with torch.no_grad():
            src = num_to_var(
                self.input[self.ind *
                           self.batch_size:self.ind * self.batch_size +
                           self.batch_size, :]).to(self.device)  #8,400
            decoder_inp = num_to_var(
                self.target[self.ind *
                            self.batch_size:self.ind * self.batch_size +
                            self.batch_size, :]).to(self.device)
        self.ind += 1

        prob, loss = model.evaluate(src,
                                    decoder_inp,
                                    trg_len=self.summ_len + 1)
        #        loss=calc_loss(prob,decoder_inp[:,1:],self.optim)
        pred = prob.argmax(-1)

        real_summ = util.list_to_summ(
            decoder_inp[-1, 1:].cpu().numpy().tolist(), self.idx2oov)
        summ = util.list_to_summ(pred[-1, :].cpu().numpy().tolist(),
                                 self.idx2oov)

        with open('stats/e_loss.txt', 'a') as handle:
            handle.write(str(loss.item()) + '\n')

        self.iter += 1

        return real_summ, summ, loss.item(), self.file
for iter in range(iterations):
    cov_loss, loss = 0, 0
    if count >= len(file_list):
        count = 0
    if iter % (8 / args['batch']) == 0:
        file = file_list[count]
        path = os.path.join(dir, file)
        input, target, idx2oov = util.get_id(path, input_len, target_len)
        with open('count.txt', 'a') as output:
            output.write(str(count))
            output.write('\n')
        count += 1
        ind = 0

    l = [j for j in range(int(8 / args['batch']))]
    inp = num_to_var(input[ind * args['batch']:ind * args['batch'] +
                           args['batch'], :]).to(device)  #8,400
    decoder_inp = num_to_var(target[ind * args['batch']:ind * args['batch'] +
                                    args['batch'], :]).to(device)

    preds_summ, preds, p_final, attn = model.genrate(enc, dec, inp,
                                                     decoder_inp, args,
                                                     input_len, target_len)

    #train discrim
    #    preds_for_discrim=preds.detach()
    #    d_error,d_pred_real,d_pred_fake=\
    #            train_discrim(discrim_optimizer,decoder_inp,preds_for_discrim)#predic_fake is discim redic on pred from gen

    #    print(d_error.cpu().numpy(),' ','d_error')
    #    d_error=d_error.detach()
    #    del d_pred_real,d_pred_fake
Exemplo n.º 3
0
eval = eval1.Infer()
for iter in range(1800003):
    #    break
    if count >= len(names):
        count = 0
    if iter % (32 / batch_size) == 0:
        file = names[count]
        input, target, idx2oov = util.get_id(file,
                                             input_len=100,
                                             target_len=25)
        ind = 0
        with open('stats/count.txt', 'a') as handle:
            handle.write(str(count) + '\n')
        count += 1
        l = [j for j in range(int(32 / batch_size))]
    inp = num_to_var(input[ind * batch_size:ind * batch_size +
                           batch_size, :]).to(device)  #8,400
    decoder_inp = num_to_var(target[ind * batch_size:ind * batch_size +
                                    batch_size, :]).to(device)

    prob, loss = model(inp, decoder_inp, device)
    loss = update(prob, decoder_inp[:, 1:], get_token(decoder_inp[:, 1:]),
                  loss)

    if iter % 50 == 0:
        name = file.split('/')[2].split('.')[0]
        pred = prob.argmax(-1)
        real_summ = util.list_to_summ(
            decoder_inp[-1, 1:].cpu().numpy().tolist(), idx2oov)
        summ = util.list_to_summ(pred[-1, :].cpu().numpy().tolist(), idx2oov)
        e_real_summ, e_summ, e_loss, e_name = eval.infer(model)
        score = r.get_scores(summ, real_summ)[0]['rouge-2']['f']
Exemplo n.º 4
0
    def forward(self, input, target, batch, vocab, train=True):
        # input: input sequence (numpy array)
        # target: target sequence (numpy array)
        # batch: a Batch object (used for <UNK> and other stuff)
        # vocab: a Vocab object (used for <UNK> and other stuff)
        # train: whether train or test mode (Bool)

        # 0. get hyperparameters
        b = input.shape[0]  # batch size
        in_seq = input.shape[1]  # max input sequence length
        tar_seq = target.shape[1]  # max target sequence length
        h = self.hidden_size  # hidden size
        e = self.embed_size  # embedding size

        # 1. obtain encoder output
        unked_input = batch.unk_minibatch(input, vocab)
        encoder_input = num_to_var(unked_input)
        if train:  # optional: we need a decoder input for training
            unked_output = batch.unk_minibatch(target, vocab)
            decoder_input = num_to_var(unked_output)

        encoder_out, _ = self.encoder(
            self.embed(encoder_input))  # encoder_out: [b x in_seq x hid*2]

        # 2. obtain initial hidden state value
        # c0 = Variable(torch.Tensor(b,h).zero_()) # [b x hid]
        # h0 = self.W_init(encoder_out[:,0,:].squeeze()) # [b x hid]
        # C = (h0.unsqueeze(0), c0.unsqueeze(0)) # ([1 x b x hid], [1 x b x hid])

        coverage = Variable(torch.Tensor(
            b, in_seq).zero_())  # coverage vector [b x in_seq]
        coverage = self.to_cuda(coverage)
        cov_loss = 0
        start = time.time()
        if train:
            next_input = decoder_input[:, 0]  # which is already <SOS>
        else:
            ones = np.ones([b]) * vocab.w2i['<SOS>']
            next_input = num_to_var(ones)
        out_list = []  # list to concatenate all outputs later

        # 3. for each item in target
        for i in range(tar_seq - 1):
            print(i)
            # 3.1. get embedding vectors for the decoder inputs
            embedded = self.embed(
                next_input)  # [b x emb], next_input: Variable
            elapsed = time.time()
            diff = elapsed - start
            print("3-1: ", diff)
            time.sleep(5)
            start = time.time()
            # 3.2. get hidden state from previous hidden state and current decoder input
            if i == 0:
                state, C = self.decoder(embedded.unsqueeze(1))
            else:
                state, C = self.decoder(embedded.unsqueeze(1), C)
            # state: [b x 1 x hid]

            # 3.3. get current attention distribution from encoder output(1), hidden state(3.2), coverage(3.7)
            # attn: [b x in_seq]
            # if self.max_enc>in_seq:
            # 	att1 = self.Wh(encoder_out.contiguous().view(-1,self.hidden_size*2)) + self.Ws(state.squeeze()).repeat(in_seq,1) + self.Wc(torch.cat([coverage,Variable(torch.Tensor(b,self.max_enc-in_seq).zero_())],1)).repeat(in_seq,1)
            # else:
            attn1 = self.Wh(encoder_out.contiguous().view(
                -1, encoder_out.size(2))) + self.Ws(state.squeeze()).repeat(
                    in_seq, 1) + self.Wc(coverage).repeat(in_seq, 1)
            # attn1: [b*in_seq x hidden]
            attn2 = self.v(attn1)  # [b*in_seq x 1]
            attn = F.softmax(attn2.view(b, in_seq))  # [b x in_seq]

            elapsed = time.time()
            diff = elapsed - start
            print("3-2,3: ", diff)
            time.sleep(5)
            start = time.time()

            # 3.4. get context vector from encoder_out and attention
            context = torch.bmm(
                attn.unsqueeze(1),
                encoder_out)  # [b x 1 x in_seq] * [b x in_seq x hidden*2]
            context = context.squeeze()  # [b x hidden*2]

            # 3.5. get p_gen using the context vector(3.4), the hidden state(3.2), encoder input(3.1)
            p_gen = F.sigmoid(
                self.wh(context) + self.ws(state.squeeze()) +
                self.wx(embedded))  # [b]

            # 3.6. get coverage loss by comparing the attention with current coverage
            cov_loss += torch.sum(torch.min(attn, coverage))

            # 3.7. update coverage vector by adding attention(3.3)
            coverage += attn

            elapsed = time.time()
            diff = elapsed - start
            print("3-4,5,6,7: ", diff)
            time.sleep(5)
            start = time.time()

            # 3.8. get output vector by adding the two vectors
            # 3.8.1. get p_vocab from state (3.2) and context (3.4)
            p_vocab = F.softmax(
                self.V2(self.V1(torch.cat([state.squeeze(), context],
                                          1))))  # [b x vocab]
            oovs = Variable(torch.Tensor(
                b, self.max_oovs).zero_()) + 1.0 / self.vocab_size
            oovs = self.to_cuda(oovs)
            p_vocab = torch.cat([p_vocab, oovs], 1)

            # 3.8.2. get p_copy from attn (3.3) and input (np array)
            numbers = input.reshape(-1).tolist()
            set_numbers = list(set(numbers))  # all unique numbers
            set_numbers.remove(0)
            c = Counter(numbers)
            dup_list = [k for k in set_numbers if (c[k] > 1)]
            masked_idx_sum = np.zeros([b, in_seq])

            dup_attn_sum = Variable(
                torch.FloatTensor(np.zeros([b, in_seq], dtype=float)))
            dup_attn_sum = self.to_cuda(dup_attn_sum)

            elapsed = time.time()
            diff = elapsed - start
            print("3-8.2: ", diff)
            time.sleep(5)
            start = time.time()
            # 3.8.3. add all duplicate attns to a distinct matrix
            for dup in dup_list:
                mask = np.array(input == dup, dtype=float)
                masked_idx_sum += mask
                attn_mask = torch.mul(
                    Variable(torch.Tensor(mask)).cuda(), attn)
                attn_sum = attn_mask.sum(1).unsqueeze(1)  # [b x 1]
                # print(attn_mask)
                # print(attn_sum)
                # print(dup_attn_sum)
                dup_attn_sum += torch.mul(attn_mask, attn_sum)
            masked_idx_sum = Variable(torch.Tensor(masked_idx_sum).cuda())

            elapsed = time.time()
            diff = elapsed - start
            print("3-8.3: ", diff)
            time.sleep(5)
            start = time.time()
            # 3.8.4.
            attn = torch.mul(attn, (1 - masked_idx_sum)) + dup_attn_sum
            batch_indices = torch.arange(start=0, end=b).long()
            batch_indices = batch_indices.expand(in_seq, b).transpose(
                1, 0).contiguous().view(-1)
            idx_repeat = torch.arange(start=0, end=in_seq).repeat(b).long()
            p_copy = torch.zeros(b, self.vocab_size + self.max_oovs)
            p_copy = self.to_cuda(Variable(p_copy))
            word_indices = input.reshape(-1)
            p_copy[batch_indices, word_indices] += attn[batch_indices,
                                                        idx_repeat]

            elapsed = time.time()
            diff = elapsed - start
            print("3-8.4: ", diff)
            time.sleep(5)
            start = time.time()

            # en = torch.LongTensor(input) # [b x in_seq]
            # en.unsqueeze_(2) # [b x in_seq x 1]
            # one_hot = torch.FloatTensor(en.size(0),en.size(1),p_vocab.size(1)).zero_() # [b x in_seq x vocab+oov]
            # one_hot.scatter_(2,en,1) # one hot tensor: [b x in_seq x vocab+oov]
            # one_hot = self.to_cuda(one_hot)
            # p_copy = torch.bmm(attn.unsqueeze(1),Variable(one_hot, requires_grad=False)) # [b x 1 x vocab+oov]
            # p_copy = p_copy.squeeze() # [b x vocab+oov]
            # p_gen = p_gen.repeat(1,p_vocab.size(1))
            # p_gen = p_gen.unsqueeze(1) # [b x 1]
            # print(p_gen.size())
            # print(p_vocab.size())
            # print(p_copy.size())
            # print((p_vocab*p_gen).size())
            p_out = torch.mul(p_vocab, p_gen) + torch.mul(
                p_copy, (1 - p_gen))  # [b x extended_vocab]
            # extended_vocab : vocab + max_oov

            # 3.9. append to out_list
            out_list.append(p_out)
            elapsed = time.time()
            diff = elapsed - start
            print("3-9: ", diff)
            time.sleep(5)
            start = time.time()

            # 3.10. get next input
            if train:
                next_input = decoder_input[:, i]
            else:
                next_input = p_out.max(
                    1)[1].squeeze()  # if test, we take in the previous
        # 4. concatenate all into a 3-dim tensor
        out_list = torch.stack(out_list, 1)  # [b x seq x ext_vocab]
        print(out_list.size())
        # 5. delete unnecessary tensor Variables (??)

        # 6. return answer
        return out_list, cov_loss