Exemplo n.º 1
0
def retrieval_ref(model, batch_query, extra_memory_loader, evaluation, device):
    """
    Given a batch of query and a extra memory, to search a best matched img ref
    Params:
        model(torch.nn.Module): for extract feature
        batch_query(torch.Tensor): a batch of query
        extra_memory_loader(torch.data.DataLoader): A dataset loader
        evaluation(torch.nn.Module or other function for evaluation distance):
            typically cosine distance or l2 distance
        device(torch.device): move the model and data to gpu or cpu
    Return: a btach of img, the best matched img for queries
    """
    batch_query = batch_query.to(device)
    model = model.to(device)
    batch_qf = model(batch_query)
    best_min_val, best_min_img = torch.zeors(batch_qf.size(0)), torch.zeros_like(batch_query)
    for i, data in enumerate(extra_memory_loader):
        imgs = data[0]
        imgs = imgs.to(device)
        ref_f = model(imgs)
        # a n*n matrix each column represent the distance
        # between ith query and each image in memeory
        dist = evalution(batch_qf, ref_f)

        min_val, min_ind = torch.min(dist, 1)
        # Update the best min val
        min_cmp = min_val < best_min_val
        best_min_val[min_cmp] = min_val[min_cmp]
        best_min_img[min_cmp] = imgs[min_ind[min_cmp]]

    return best_min_img
Exemplo n.º 2
0
    def generate_caption_w_target(self, h_0, target_caption):
        """
        h_0(sentence embedding), target caption -LSTM-> list of (word probability distribution) 
        
        use for training. Forced teacher method

        @h_0 : (batch, hidden_size)
        @target_caption : (batch, num_of_words, len_of_Vocab) (batch*(seq of one-hot vectors))
            * starting from <START>??
        """
        batch_size = h_0.size(0) 
        num_of_words = target_caption.size(1)

        h=torch.zeros((batch_size, self.hidden_size)) # hidden state
        c=torch.zeors((batch_size, self.hidden_size)) # cell state
        hat_y_s = torch.empty((batch_size,num_of_words,self.vocab_size)) # tensor of output states(i.e. prob distributions)

        target_caption_embedding = self.pretrained_embedding(target_caption)
        # (batch, len(target_caption), embedding_size)

        for t in range(num_of_words):
            if t==0:
                h, c = self.lstm_cell(h_0, (h,c)) ## starting from <START> 면 여기서도 else와 같이 해야하는 것 아닌가?
                                                ## 2가지 architecture가 있는듯 - feature vector을  1) h_0에 2)x_1에(h_0는 zero vector) 
            else:
                h, c= self.lstm_cell(target_caption_embedding[:,t,:], (h,c))

            word_pb_distribution = nn.functional.log_softmax(self.linear_words(h))    
            hat_y_s[:,t,:] = word_pb_distribution

        return hat_y_s
Exemplo n.º 3
0
    def generate_caption_wo_target(self,h_0):
        """
        h_0(sentence embedding)-LSTM-> list of (word probability distribution) 

        use for testing . Greedy algorithm (argmax)
        """

        batch_size = h_0.size(0) 
        num_of_words = target_caption.size(1)

        h=torch.zeros((batch_size, self.hidden_size)) # hidden state
        c=torch.zeors((batch_size, self.hidden_size)) # cell state
        hat_y_s = torch.empty((batch_size,num_of_words,self.vocab_size)) # tensor of output states(i.e. prob distributions)

        nextword_embedding = torch.zeros((batch_size,self.embed_size))

        for t in range(num_of_words):   ## <END> token 나왔을때 멈춰야 하는 것 아닌가?
            if t==0:
                h, c = self.lstm_cell(h_0, (h,c))
            else:
                h, c= self.lstm_cell(nextword_embedding, (h,c))

            word_pb_distribution = nn.functional.log_softmax(self.linear_words(h))
                # (batch, vocab_size)    
            hat_y_s[:,t,:] = word_pb_distribution

            max_pb_idx = torch.argmax(word_pb_distribution, dim=1)
                # (batch)
            nextword_embedding = self.pretrained_embedding(max_pb_idx)
                # (batch, embed_size)

        return hat_y_s
Exemplo n.º 4
0
    def inference(self, input, target):
        # 进行 Beam Search
        # 此函数的 batch_size = 1
        # input = [batch_size, input_len, vocab_size]
        # target = [batch_size, target_len, vocab_size]
        batch_size = input.shape[0]
        input_len = input.shape[1]  # 取得最大字数
        vocab_size = self.decoder.cn_vocab_size

        # 准备一个储存空间来储存输出
        outputs = torch.zeors(batch_size, input_len,
                              vocab_size).to(self.device)
        # 将输入放入 encoder
        encoder_outputs, hidden = self.encoder(input)
        # encoder 最终的隐层用来初始化 decoder
        # encoder_outputs 主要是用在 Attention
        # 因为 encoder 是双向 RNN ,所以需要将同一层两个方向的 hidden_state 接在一起
        # hidden = [num_layers * directions, batch_size, hid_dim] -> [num_layers, directions, bacth_size, hid_dim]
        hidden = hidden.view(self.encoder.n_layers, 2, batch_size - 1)
        hidden = torch.cat((hidden[:, -2, :, :], hidden[:, -1, :, :]), dim=2)
        # 取 <BOS> 标识
        input = target[:, 0]
        preds = []
        for t in range(1, input_len):
            output, hidden = self.decoder(input, hidden, encoder_outputs)
            # 将预测结果存起来
            outputs[:, t] = output
            # 取出几率最大的单词
            top1 = output.argmax(1)
            input = top1
            preds.append(top1.unsqueeze(1))
        preds = torch.cat(preds, 1)
        return outputs, preds
Exemplo n.º 5
0
def plot_durations():
    plt.figure(2)
    plt.clf()
    durations_t = torch.tensor(episode_durations, dtype=torch.float)
    plt.title('Training...')
    plt.xlabel('Episode')
    plt.ylabel('Duration')
    plt.plot(durations_t.numpy())
    # Take 100 episode averages and plot them too
    if len(durations_t) >= 100:
        means = durations_t.unflod(0, 100, 1).mean(1).view(-1)
        means = torch.cat((torch.zeors(99), means))
        plt.plot(means.numpy())

    plt.pause(0.001)
 def init_hidden(self):
     num_directions = 2 if self.bidirectional else 1
     return torch.zeors(num_directions * self.num_layer, 1, self.hidden_dim, device=device)
Exemplo n.º 7
0
    def __init_hidden(self, batch_size):
        n, hs = self.num_layers, self.hidden_size

        return (torch.zeors(n * 1, batch_size,
                            hs), torch.zeros(n * 1, batch_size, hs))