def l2l_validate(model, cluster_center, n_epoch=100):
    val_accuracy = []
    for epoch in range(n_epoch):
        data_l = generate_data_l(cluster_center)
        data_n = generate_data_n(cluster_center, model.n_class_n)
        x_l, y_l = Variable(torch.from_numpy(data_l[0])).float(), Variable(
            torch.from_numpy(data_l[1]))
        x_n, y_n = Variable(torch.from_numpy(data_n[0])).float(), Variable(
            torch.from_numpy(data_n[1]))
        pred_ll, pred_nl, w, b = model(x_l, x_n)
        M = Variable(torch.zeros(model.n_class_n, model.n_dim))
        B = Variable(torch.zeros(model.n_class_n))
        for k in range(model.n_class_n):
            M[k] = torch.cat((w[:, 0][y_n == model.n_class_l + k].view(-1, 1),
                              w[:, 1][y_n == model.n_class_l + k].view(-1, 1)), 1).mean(0)
            B[k] = b[y_n == model.n_class_l + k].mean()
        pred_ln = torch.mm(x_l, M.t()) + B.view(1, -1).expand_as(torch.mm(x_l, M.t()))
        pred_nn = torch.mm(x_n, M.t()) + B.view(1, -1).expand_as(torch.mm(x_n, M.t()))
        pred = torch.cat((torch.cat((pred_ll, pred_nl)), torch.cat((pred_ln, pred_nn))), 1)
        pred = pred.data.max(1)[1]
        y = torch.cat((y_l, y_n))
        accuracy = pred.eq(y.data).cpu().sum() * 1.0 / y.size()[0]
        # print('accuracy: %.2f' % accuracy)
        val_accuracy.append(accuracy)
        acc_l = pred.eq(y.data).cpu()[0:100].sum() * 1.0 / 100
        acc_n = pred.eq(y.data).cpu()[100:150].sum() * 1.0 / 50
        print('accuracy: %.2f, lifelong accuracy: %.2f, new accuracy: %.2f' % (accuracy, acc_l, acc_n))

    return numpy.mean(numpy.asarray(val_accuracy))
Exemplo n.º 2
0
    def backward(ctx, grad_output):
        input1, input2, weight, bias = ctx.saved_variables
        grad_input1 = grad_input2 = grad_weight = grad_bias = None

        buff = Variable(input1.data.new())

        if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
            grad_input1 = torch.mm(input2, weight[0].t())
            grad_input1 = grad_input1.mul(grad_output.narrow(1, 0, 1).expand(grad_input1.size()))
            grad_input2 = torch.mm(input1, weight[0])
            grad_input2 = grad_input2.mul(grad_output.narrow(1, 0, 1).expand(grad_input2.size()))

            for k in range(1, weight.size(0)):
                buff = input2.mm(weight[k].t())
                buff = buff.mul(grad_output.narrow(1, k, 1).expand(grad_input1.size()))
                grad_input1.add_(buff)

                buff = input1.mm(weight[k])
                buff = buff.mul(grad_output.narrow(1, k, 1).expand(grad_input2.size()))
                grad_input2.add_(buff)

        grad_weight = Variable(weight.data.new(weight.size()))
        if ctx.needs_input_grad[2]:
            # accumulate parameter gradients:
            for k in range(weight.size(0)):
                buff = input1.mul(grad_output.narrow(1, k, 1).expand_as(input1))
                grad_weight[k] = torch.mm(buff.t(), input2)

        if bias is not None and ctx.needs_input_grad[3]:
            grad_bias = grad_output.sum(0, keepdim=False)

        return grad_input1, grad_input2, grad_weight, grad_bias
Exemplo n.º 3
0
    def updateGradInput(self, input, gradOutput):
        if self.gradInput is None:
            return

        self._assertInputGradOutput(input, gradOutput)
        # compute d output / d input:
        self.gradInput[0].resize_as_(input[0]).fill_(0)
        self.gradInput[1].resize_as_(input[1]).fill_(0)

        #: first slice of weight tensor (k = 1)
        self.gradInput[0].addmm_(input[1], self.weight[0].t())
        self.gradInput[0].mul_(gradOutput.narrow(1, 0, 1).expand(self.gradInput[0].size(0),
                                                                 self.gradInput[0].size(1)))
        self.gradInput[1].addmm_(input[0], self.weight[0])
        self.gradInput[1].mul_(gradOutput.narrow(1, 0, 1).expand(self.gradInput[1].size(0),
                                                                 self.gradInput[1].size(1)))

        #: remaining slices of weight tensor
        if self.weight.size(0) > 1:
            if self.buff1 is None:
                self.buff1 = input[0].new()
            self.buff1.resize_as_(input[0])

            for k in range(1, self.weight.size(0)):
                torch.mm(input[1], self.weight[k].t(), out=self.buff1)
                self.buff1.mul_(gradOutput.narrow(1, k, 1).expand(self.gradInput[0].size(0),
                                                                  self.gradInput[0].size(1)))
                self.gradInput[0].add_(self.buff1)

                torch.mm(input[0], self.weight[k], out=self.buff2)
                self.buff2.mul_(gradOutput.narrow(1, k, 1).expand(self.gradInput[1].size(0),
                                                                  self.gradInput[1].size(1)))
                self.gradInput[1].add_(self.buff2)

        return self.gradInput
Exemplo n.º 4
0
    def forward(self, input_, hx):
        """
        Args:
            input_: A (batch, input_size) tensor containing input
                features.
            hx: A tuple (h_0, c_0), which contains the initial hidden
                and cell state, where the size of both states is
                (batch, hidden_size).
            time: The current timestep value, which is used to
                get appropriate running statistics.

        Returns:
            h_1, c_1: Tensors containing the next hidden and cell state.
        """

        h_0, c_0 = hx
        batch_size = h_0.size(0)
        bias_batch = (self.bias.unsqueeze(0)
                      .expand(batch_size, *self.bias.size()))
        wh = torch.mm(h_0, self.weight_hh)
        wh = torch.mm(h_0, self.weight_hh)
        wi = torch.mm(input_, self.weight_ih)
        bn_wh = self.bn_hh(wh)
        bn_wi = self.bn_ih(wi)
        f, i, o, g = torch.split(bn_wh + bn_wi + bias_batch,
                                 split_size=self.hidden_size, dim=1)
        c_1 = torch.sigmoid(f)*c_0 + torch.sigmoid(i)*torch.tanh(g)
        h_1 = torch.sigmoid(o) * torch.tanh(self.bn_c(c_1))
        return h_1, c_1
Exemplo n.º 5
0
    def backward(ctx, grad_output):
        matrix1, matrix2 = ctx.saved_variables
        grad_add_matrix = grad_matrix1 = grad_matrix2 = None

        if ctx.needs_input_grad[0]:
            grad_add_matrix = maybe_unexpand(grad_output, ctx.add_matrix_size)
            if ctx.alpha != 1:
                grad_add_matrix = grad_add_matrix.mul(ctx.alpha)

        if ctx.needs_input_grad[1]:
            if matrix1.stride() == (1, matrix1.size(0)):
                # column major gradient if input is column major
                grad_matrix1 = torch.mm(matrix2, grad_output.t()).t()
            else:
                grad_matrix1 = torch.mm(grad_output, matrix2.t())
            if ctx.beta != 1:
                grad_matrix1 *= ctx.beta

        if ctx.needs_input_grad[2]:
            if matrix2.stride() == (1, matrix2.size(0)):
                # column major gradient if input is column major
                grad_matrix2 = torch.mm(grad_output.t(), matrix1).t()
            else:
                grad_matrix2 = torch.mm(matrix1.t(), grad_output)
            if ctx.beta != 1:
                grad_matrix2 *= ctx.beta

        return grad_add_matrix, grad_matrix1, grad_matrix2, None, None, None
Exemplo n.º 6
0
 def forward(self, attn_mem, n_step):
     """atten_mem: Tensor of size [num_sents, input_dim]"""
     attn_feat = torch.mm(attn_mem, self._attn_wm)
     hop_feat = torch.mm(attn_mem, self._hop_wm)
     outputs = []
     lstm_in = self._init_i.unsqueeze(0)
     lstm_states = (self._init_h.unsqueeze(1), self._init_c.unsqueeze(1))
     for _ in range(n_step):
         h, c = self._lstm_cell(lstm_in, lstm_states)
         query = h[:, -1, :]
         for _ in range(self._n_hop):
             query = PtrExtractorRL.attention(hop_feat, query,
                                             self._hop_v, self._hop_wq)
         score = PtrExtractorRL.attention_score(
             attn_feat, query, self._attn_v, self._attn_wq)
         if self.training:
             prob = F.softmax(score, dim=-1)
             out = torch.distributions.Categorical(prob)
         else:
             for o in outputs:
                 score[0, o[0, 0].item()][0] = -1e18
             out = score.max(dim=1, keepdim=True)[1]
         outputs.append(out)
         lstm_in = attn_mem[out[0, 0].item()].unsqueeze(0)
         lstm_states = (h, c)
     return outputs
def l2l_train(model, cluster_center, n_epoch=10000, trunc_step=10):
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    M_all = Variable(torch.zeros(model.n_class, model.n_dim))
    B_all = Variable(torch.zeros(model.n_class))
    for epoch in range(n_epoch):
        loss = 0
        M_step, B_step = [], []
        for step in range(trunc_step):
            data = generate_data(cluster_center)
            optimizer.zero_grad()
            x, y = Variable(torch.from_numpy(data[0])).float(), Variable(torch.from_numpy(data[1]))
            w, b = model(x)
            M = Variable(torch.zeros(model.n_class_n, model.n_dim))
            B = Variable(torch.zeros(model.n_class_n))
            for k in range(model.n_class_n):
                M[k] = torch.cat((w[:, 0][y == model.n_class_l + k].view(-1, 1),
                                  w[:, 1][y == model.n_class_l + k].view(-1, 1)), 1).mean(0)
                B[k] = b[y == model.n_class_l + k].mean()
            if step == 0:
                M_ = M
                B_ = B
            else:
                M_ = step / (step + 1) * M_step[-1] + 1 / (step + 1) * M
                B_ = step / (step + 1) * B_step[-1] + 1 / (step + 1) * B
            M_step.append(M_)
            B_step.append(B_)
            pred = torch.mm(x, M_.t()) + B_.view(1, -1).expand_as(torch.mm(x, M_.t()))
            loss += F.cross_entropy(pred, y)
        loss.backward()
        optimizer.step()
        print('Train Epoch: {}\tLoss: {:.6f}'.format(epoch, loss.data[0]))
    return M_all, B_all, cluster_center
Exemplo n.º 8
0
    def forward(self, embbedings, label):
        if self.device_id == None:
            kernel_norm = l2_norm(self.kernel, axis = 0)
            cos_theta = torch.mm(embbedings, kernel_norm)
        else:
            x = embbedings
            sub_kernels = torch.chunk(self.kernel, len(self.device_id), dim=1)
            temp_x = x.cuda(self.device_id[0])
            kernel_norm = l2_norm(sub_kernels[0], axis = 0).cuda(self.device_id[0])
            cos_theta = torch.mm(temp_x, kernel_norm)
            for i in range(1, len(self.device_id)):
                temp_x = x.cuda(self.device_id[i])
                kernel_norm = l2_norm(sub_kernels[i], axis = 0).cuda(self.device_id[i])
                cos_theta = torch.cat((cos_theta, torch.mm(temp_x, kernel_norm).cuda(self.device_id[0])), dim=1)

        cos_theta = cos_theta.clamp(-1, 1)  # for numerical stability
        phi = cos_theta - self.m
        label = label.view(-1, 1)  # size=(B,1)
        index = cos_theta.data * 0.0  # size=(B,Classnum)
        index.scatter_(1, label.data.view(-1, 1), 1)
        index = index.byte()
        output = cos_theta * 1.0
        output[index] = phi[index]  # only change the correct predicted output
        output *= self.s  # scale up in order to make softmax work, first introduced in normface

        return output
Exemplo n.º 9
0
    def forward(self, input, hidden, encoder_outputs, enc_padding_mask,
                context, extra_zeros, enc_batch_extend_vocab, coverage):
        """
        :param input: (B)
        :param hidden: (1, B, H), (1, B, H)
        :param encoder_outputs: (B, L, 2*H)
        :param enc_padding_mask: (B, L)
        :param context: (B, 2*H); Since beam search will use context, so we need to send context out.
        :param extra_zeros: (B, n)
        :param enc_batch_extend_vocab: (B, L)
        :param coverage: (B, L)
        :return: (B, V), ((1, B, H), (1, B, H)), (B, 2*H), (B, L), (B, 1), (B, L)
        """

        input = self.embed(input)  # B -> (B, D)
        x = self.x_context(torch.cat((context, input), 1))  # (B, 2*H), (B, D) -> (B, 2*H + D) -> (B, D)
        output, hidden = self.lstm(x.unsqueeze(1), hidden)  # (B, 1, D), ((1, B, H), (1, B, H)) -> (B, 1, H), hidden

        h_decoder, c_decoder = hidden  # (1, B, H), (1, B, H)
        hidden_hat = torch.cat((h_decoder.view(-1, self.args.hidden_dim),
                                c_decoder.view(-1, self.args.hidden_dim)), 1)  # (B, H), (B, H) -> (B, 2*H)
        context, attn_dist, coverage = self.attention(hidden_hat, encoder_outputs, enc_padding_mask, coverage)
        # (B, 2*H), (B, L), (B, L) <- (B, 2*H), (B, L, 2*H), (B, L), (B, L)

        p_gen = None
        if self.args.pointer_gen:
            p_gen_input = torch.cat((context, hidden_hat, x), 1)  # (B, 2*H), (B, 2*H), (B, D) -> (B, 2*2*H + D)
            p_gen = self.p_gen_linear(p_gen_input)  # (B, 2*2*H + D) -> (B, 1)
            p_gen = torch.sigmoid(p_gen)  # (B, 1)

        output = torch.cat((output.view(-1, self.args.hidden_dim), context), 1)  # (B, H), (B, 2*H) -> (B, 3*H)
        output = self.out_linear(output)  # (B, 3*H) -> (B, H)
        # output = F.relu(output)

        ## map (B, H) -> (B, V)
        # output = self.out2(output)  # (B, H) -> (B, V); change to below matrix multiply
        output_pos = self.hidden2dim_pos(output)  # (B, H) -> (B, D)
        output_neg = self.hidden2dim_neg(output)  # (B, H) -> (B, D)
        output_pos = F.relu(torch.mm(output_pos, self.embed.weight.t()))  # (B, D) * (D, V) -> (B, V)
        output_neg = F.relu(torch.mm(output_neg, self.embed.weight.t()))  # (B, D) * (D, V) -> (B, V)
        output = output_pos - output_neg  # (B, V)

        ## change output to vocab_dist
        vocab_dist = F.softmax(output, dim=1)  # (B, V)

        if self.args.pointer_gen:
            vocab_dist_ = p_gen * vocab_dist  # (B, 1) * (B, V) -> (B, V)
            attn_dist_ = (1 - p_gen) * attn_dist  # (B, 1) * (B, L) -> (B, L)

            if extra_zeros is not None:
                vocab_dist_ = torch.cat([vocab_dist_, extra_zeros], 1)  # (B, V), (B, n) -> (B, V + n)

            final_dist = vocab_dist_.scatter_add(1, enc_batch_extend_vocab, attn_dist_)  # (B, V) -> (B, V + n)
        else:
            final_dist = vocab_dist  # (B, V)

        return final_dist, hidden, context, attn_dist, p_gen, coverage  # (B, V), ((1, B, H), (1, B, H)), (B, 2*H), (B, L), (B, 1), (B, L)
Exemplo n.º 10
0
def addDecovRegularizer(loss, regParam, activations) :  
    for i in range( len(activations) ) :
        x =   activations[i]  
        batch_size = x.shape[0] #print("x.shape is: " , x.shape) # 2048, 100
        h_centered = x - torch.mean(x, dim=0, keepdim=True) # mean center activations
        covariance = torch.mm( h_centered.t(),  h_centered) # get small x small covariance matrix
        n = covariance.shape[0]
        covariance[np.diag_indices(n)] = 0 # zero out the diagonals of the covariance matrix (as we don't want to penalize the neurons against themselves) # alternative: t[torch.eye(n).byte()] = 5
        covariance /= batch_size # normalize by the length of the minibatch
        cost = ( 0.5 * regParam) * torch.sum( torch.mm(covariance, covariance) )
        loss += cost 
Exemplo n.º 11
0
 def merge(tbl):
     inp = scn.InputBatch(2, spatial_size)
     center = spatial_size.float().view(1, 2) / 2
     p = torch.LongTensor(2)
     v = torch.FloatTensor([1, 0, 0])
     for char in tbl['input']:
         inp.addSample()
         m = torch.eye(2)
         r = random.randint(1, 3)
         alpha = random.uniform(-0.2, 0.2)
         if alpha == 1:
             m[0][1] = alpha
         elif alpha == 2:
             m[1][0] = alpha
         else:
             m = torch.mm(m, torch.FloatTensor(
                 [[math.cos(alpha), math.sin(alpha)],
                  [-math.sin(alpha), math.cos(alpha)]]))
         c = center + torch.FloatTensor(1, 2).uniform_(-8, 8)
         for stroke in char:
             stroke = stroke.float() / 255 - 0.5
             stroke = c.expand_as(stroke) + \
                 torch.mm(stroke, m * (Scale - 0.01))
             ###############################################################
             # To avoid GIL problems use a helper function:
             scn.dim_fn(
                 2,
                 'drawCurve')(
                 inp.metadata.ffi,
                 inp.features,
                 stroke)
             ###############################################################
             # Above is equivalent to :
             # x1,x2,y1,y2,l=0,stroke[0][0],0,stroke[0][1],0
             # for i in range(1,stroke.size(0)):
             #     x1=x2
             #     y1=y2
             #     x2=stroke[i][0]
             #     y2=stroke[i][1]
             #     l=1e-10+((x2-x1)**2+(y2-y1)**2)**0.5
             #     v[1]=(x2-x1)/l
             #     v[2]=(y2-y1)/l
             #     l=max(x2-x1,y2-y1,x1-x2,y1-y2,0.9)
             #     for j in numpy.arange(0,1,1/l):
             #         p[0]=math.floor(x1*j+x2*(1-j))
             #         p[1]=math.floor(y1*j+y2*(1-j))
             #         inp.setLocation(p,v,False)
             ###############################################################
     inp.precomputeMetadata(precomputeStride)
     return {'input': inp, 'target': torch.LongTensor(tbl['target']) - 1}
Exemplo n.º 12
0
    def train(self, x):
        self.model.train()
        
        o = self.model(x)
        loss = torch.mean(torch.pow(torch.mm(o, self.B.t()) - x, 2))

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        U, _, V = torch.svd(torch.mm(x.t().data, o.data))
        self.B = torch.autograd.Variable(torch.mm(U, V.t()))
        
        return loss.data.cpu().numpy()
def anomalyScore(args, model, dataset, mean, cov, channel_idx=0, score_predictor=None):
    predictions = []
    rearranged = []
    errors = []
    hiddens = []
    predicted_scores = []
    with torch.no_grad():
        # Turn on evaluation mode which disables dropout.
        model.eval()
        pasthidden = model.init_hidden(1)
        for t in range(len(dataset)):
            out, hidden = model.forward(dataset[t].unsqueeze(0), pasthidden)
            predictions.append([])
            rearranged.append([])
            errors.append([])
            hiddens.append(model.extract_hidden(hidden))
            if score_predictor is not None:
                predicted_scores.append(score_predictor.predict(model.extract_hidden(hidden).numpy()))

            predictions[t].append(out.data.cpu()[0][0][channel_idx])
            pasthidden = model.repackage_hidden(hidden)
            for prediction_step in range(1, args.prediction_window_size):
                out, hidden = model.forward(out, hidden)
                predictions[t].append(out.data.cpu()[0][0][channel_idx])

            if t >= args.prediction_window_size:
                for step in range(args.prediction_window_size):
                    rearranged[t].append(
                        predictions[step + t - args.prediction_window_size][args.prediction_window_size - 1 - step])
                rearranged[t] =torch.FloatTensor(rearranged[t]).to(args.device).unsqueeze(0)
                errors[t] = rearranged[t] - dataset[t][0][channel_idx]
            else:
                rearranged[t] = torch.zeros(1,args.prediction_window_size).to(args.device)
                errors[t] = torch.zeros(1, args.prediction_window_size).to(args.device)

    predicted_scores = np.array(predicted_scores)
    scores = []
    for error in errors:
        mult1 = error-mean.unsqueeze(0) # [ 1 * prediction_window_size ]
        mult2 = torch.inverse(cov) # [ prediction_window_size * prediction_window_size ]
        mult3 = mult1.t() # [ prediction_window_size * 1 ]
        score = torch.mm(mult1,torch.mm(mult2,mult3))
        scores.append(score[0][0])

    scores = torch.stack(scores)
    rearranged = torch.cat(rearranged,dim=0)
    errors = torch.cat(errors,dim=0)

    return scores, rearranged, errors, hiddens, predicted_scores
Exemplo n.º 14
0
 def forward(self, input_n, hidden, phi, nh):
     self.batch_size = input_n.size()[0]
     hidden = torch.cat((hidden, input_n), 2)
     # Aggregate reresentations
     h_conv = torch.div(torch.bmm(phi, hidden), nh)
     hidden = hidden.view(-1, self.hidden_size + self.input_size)
     h_conv = h_conv.view(-1, self.hidden_size + self.input_size)
     # h_conv has shape (batch_size, n, hidden_size + input_size)
     m1 = (torch.mm(hidden, self.W1)
           .view(self.batch_size, -1, self.hidden_size))
     m2 = (torch.mm(h_conv, self.W2)
           .view(self.batch_size, -1, self.hidden_size))
     m3 = self.b.unsqueeze(0).unsqueeze(1).expand_as(m2)
     hidden = torch.sigmoid(m1 + m2 + m3)
     return hidden
Exemplo n.º 15
0
def count_accuracy(X, true_counts, air, batch_size):
    assert X.size(0) == true_counts.size(0), 'Size mismatch.'
    assert X.size(0) % batch_size == 0, 'Input size must be multiple of batch_size.'
    counts = torch.LongTensor(3, 4).zero_()
    error_latents = []
    error_indicators = []

    def count_vec_to_mat(vec, max_index):
        out = torch.LongTensor(vec.size(0), max_index + 1).zero_()
        out.scatter_(1, vec.type(torch.LongTensor).view(vec.size(0), 1), 1)
        return out

    for i in range(X.size(0) // batch_size):
        X_batch = X[i * batch_size:(i + 1) * batch_size]
        true_counts_batch = true_counts[i * batch_size:(i + 1) * batch_size]
        z_where, z_pres = air.guide(X_batch, batch_size)
        inferred_counts = sum(z.cpu() for z in z_pres).squeeze().data
        true_counts_m = count_vec_to_mat(true_counts_batch, 2)
        inferred_counts_m = count_vec_to_mat(inferred_counts, 3)
        counts += torch.mm(true_counts_m.t(), inferred_counts_m)
        error_ind = 1 - (true_counts_batch == inferred_counts)
        error_ix = error_ind.nonzero().squeeze()
        error_latents.append(latents_to_tensor((z_where, z_pres)).index_select(0, error_ix))
        error_indicators.append(error_ind)

    acc = counts.diag().sum().float() / X.size(0)
    error_indices = torch.cat(error_indicators).nonzero().squeeze()
    if X.is_cuda:
        error_indices = error_indices.cuda()
    return acc, counts, torch.cat(error_latents), error_indices
Exemplo n.º 16
0
    def _step(self, tok, states, attention):
        prev_states, prev_out = states
        lstm_in = torch.cat(
            [self._embedding(tok).squeeze(1), prev_out],
            dim=1
        )
        states = self._lstm(lstm_in, prev_states)
        lstm_out = states[0][-1]
        query = torch.mm(lstm_out, self._attn_w)
        attention, attn_mask, extend_src, extend_vsize = attention
        context, score = step_attention(
            query, attention, attention, attn_mask)
        dec_out = self._projection(torch.cat([lstm_out, context], dim=1))

        # extend generation prob to extended vocabulary
        gen_prob = self._compute_gen_prob(dec_out, extend_vsize)
        # compute the probabilty of each copying
        copy_prob = torch.sigmoid(self._copy(context, states[0][-1], lstm_in))
        # add the copy prob to existing vocab distribution
        lp = torch.log(
            ((-copy_prob + 1) * gen_prob
            ).scatter_add(
                dim=1,
                index=extend_src.expand_as(score),
                source=score * copy_prob
        ) + 1e-8)  # numerical stability for log
        return lp, (states, dec_out), score
Exemplo n.º 17
0
 def _step(self, tok, states, attention):
     prev_states, prev_out = states
     lstm_in = torch.cat(
         [self._embedding(tok).squeeze(1), prev_out],
         dim=1
     )
     states = self._lstm(lstm_in, prev_states)
     lstm_out = states[0][-1]
     query = torch.mm(lstm_out, self._attn_w)
     attention, attn_mask = attention
     context, score = step_attention(
         query, attention, attention, attn_mask)
     dec_out = self._projection(torch.cat([lstm_out, context], dim=1))
     states = (states, dec_out)
     logit = torch.mm(dec_out, self._embedding.weight.t())
     return logit, states, score
Exemplo n.º 18
0
        def test_shape(di, dj, dk):
            x = self._gen_sparse(2, 20, [di, dj])[0]
            y = self.randn(dj, dk)

            res = torch.hsmm(x, y)
            expected = torch.mm(x.to_dense(), y)
            self.assertEqual(res.to_dense(), expected)
Exemplo n.º 19
0
        def test_shape(di, dj, dk):
            x = self._gen_sparse(2, 20, [di, dj])[0]
            y = self.randn(dj, dk)

            res = torch.dsmm(x, y)
            expected = torch.mm(self.safeToDense(x), y)
            self.assertEqual(res, expected)
Exemplo n.º 20
0
    def forward(self, X, posterior_mean = False):
        """
        Funciton call to generate the output, every time we call it, the dynamic graph is created.
        There can be difference between forward in training and test:
            - In dropout we do not zero neurons in test
            - In Variational Inference we dont randombly sample from the posterior
        
        We create the forward pass by performing operations between the input X (Nsam_batch, Ndim)
        and the parameters of the model that we should have initialized in the __init__
        """
        
        ## We need to sample from the posterior !! 
        self.sample_posterior(posterior_mean)
        
        o1 = self.linear1(X)
#        o1 = torch.mm(X, self.W1) + self.b1
#        print ("x shape: ", X.shape, "W1 shape: ", self.W1.shape, "b1 shape: ", self.b1.shape)
#        print ("o1 shape: ", o1.shape)
#        print ("W2 shape: ", self.W2.shape, "b2 shape: ", self.b2.shape)
        
        ## Apply non-linearity
        o1 = self.cf_a.activation_func(o1)
        o1 = F.dropout(o1,p = self.cf_a.dop, training = self.training)
        o2 = torch.mm(o1, self.W2) + self.b2
#        print ("o2 shape: ", o2.shape)
        return o2
Exemplo n.º 21
0
def NN(epoch, net, lemniscate, trainloader, testloader, recompute_memory=0):
    net.eval()
    net_time = AverageMeter()
    cls_time = AverageMeter()
    losses = AverageMeter()
    correct = 0.
    total = 0
    testsize = testloader.dataset.__len__()

    trainFeatures = lemniscate.memory.t()
    if hasattr(trainloader.dataset, 'imgs'):
        trainLabels = torch.LongTensor([y for (p, y) in trainloader.dataset.imgs]).cuda()
    else:
        trainLabels = torch.LongTensor(trainloader.dataset.train_labels).cuda()

    if recompute_memory:
        transform_bak = trainloader.dataset.transform
        trainloader.dataset.transform = testloader.dataset.transform
        temploader = torch.utils.data.DataLoader(trainloader.dataset, batch_size=100, shuffle=False, num_workers=1)
        for batch_idx, (inputs, targets, indexes) in enumerate(temploader):
            inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = Variable(inputs, volatile=True), Variable(targets)
            batchSize = inputs.size(0)
            features = net(inputs)
            trainFeatures[:, batch_idx*batchSize:batch_idx*batchSize+batchSize] = features.data.t()
        trainLabels = torch.LongTensor(temploader.dataset.train_labels).cuda()
        trainloader.dataset.transform = transform_bak
    
    end = time.time()
    for batch_idx, (inputs, targets, indexes) in enumerate(testloader):
        inputs, targets = inputs.cuda(), targets.cuda()
        inputs, targets = Variable(inputs, volatile=True), Variable(targets)
        batchSize = inputs.size(0)
        features = net(inputs)
        net_time.update(time.time() - end)
        end = time.time()

        dist = torch.mm(features.data, trainFeatures)

        yd, yi = dist.topk(1, dim=1, largest=True, sorted=True)
        candidates = trainLabels.view(1,-1).expand(batchSize, -1)
        retrieval = torch.gather(candidates, 1, yi)

        retrieval = retrieval.narrow(1, 0, 1).clone().view(-1)
        yd = yd.narrow(1, 0, 1)

        total += targets.size(0)
        correct += retrieval.eq(targets.data).cpu().sum()
        
        cls_time.update(time.time() - end)
        end = time.time()

        print('Test [{}/{}]\t'
              'Net Time {net_time.val:.3f} ({net_time.avg:.3f})\t'
              'Cls Time {cls_time.val:.3f} ({cls_time.avg:.3f})\t'
              'Top1: {:.2f}'.format(
              total, testsize, correct*100./total, net_time=net_time, cls_time=cls_time))

    return correct/total
Exemplo n.º 22
0
def l2l_validate(model, cluster_center, n_epoch=100):
    val_accuracy = []
    for epoch in range(n_epoch):
        batch = generate_data(cluster_center)
        x, y = Variable(torch.from_numpy(batch[0])).float(), Variable(torch.from_numpy(batch[1]))
        w, b = model(x)
        M = Variable(torch.zeros(model.n_class, model.n_dim))
        B = Variable(torch.zeros(model.n_class))
        for k in range(model.n_class):
            M[k] = torch.cat((w[:, 0][y == k].view(-1, 1), w[:, 1][y == k].view(-1, 1)), 1).mean(0)
            B[k] = b[y == k].mean()
        pred = torch.mm(x, M.t()) + B.view(1, -1).expand_as(torch.mm(x, M.t()))
        pred = pred.data.max(1)[1]
        accuracy = pred.eq(y.data).cpu().sum() / y.size()[0]
        print('accuracy: %.2f' % accuracy)
        val_accuracy.append(accuracy)
    return numpy.mean(numpy.asarray(val_accuracy))
Exemplo n.º 23
0
def memModel(contxtWords, aspectWords, position, sentLength):
    vaspect = aspectWords
    for i in range(hopNumber):
        Vi = 1.0 - position / sentLength - (i / vectorLength) * (1.0 - 2.0 * (position / sentLength))
        Mi = Vi.expand_as(contxtWords) * contxtWords

        attentionInputs = torch.cat([Mi, vaspect.expand(vectorLength, sentLength)])
        attentionA = torch.mm(attention_W, attentionInputs)

        gi = torch.tanh(attentionA + attention_b.expand_as(attentionA))
        alpha = softmax(gi)

        linearLayerOut = torch.mm(linearLayer_W, vaspect) + linearLayer_b
        vaspect = torch.sum(alpha.expand_as(Mi) * Mi, 1) + linearLayerOut

    finallinearLayerOut = torch.mm(softmaxLayer_W, vaspect) + softmaxLayer_b
    return finallinearLayerOut
Exemplo n.º 24
0
 def nn(self, word, k):
     embedding = self.mu.weight.data.cpu() # [dict, embed_size]
     vector = embedding[self.dset.stoi[word], :].view(-1, 1) # [embed_size, 1]
     distance = torch.mm(embedding, vector).squeeze() / torch.norm(embedding, 2, 1)
     distance = distance / torch.norm(vector, 2, 0)[0]
     distance = distance.numpy()
     index = np.argsort(distance)[:-k]
     return [self.dset.itos[x] for x in index]
Exemplo n.º 25
0
    def _run_attention(self, h_all, return_weights=False):
        if not self.has_batch_dim:
            att_raw = torch.mm(h_all, self.attention_map[:, None])
            att = F.softmax(att_raw.squeeze(), dim=0)

            if return_weights:
                return att
            else:
                return torch.mm(att[None, :], h_all).squeeze()
        else:
            att_raw = torch.bmm(h_all, self.attention_map[:, :, None])
            att = F.softmax(att_raw.squeeze(), dim=0)

            if return_weights:
                return att
            else:
                return torch.bmm(att[:, None, :], h_all).squeeze()
Exemplo n.º 26
0
def l2l_train(model, cluster_center, n_epoch=10000):
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    for epoch in range(n_epoch):
        batch = generate_data(cluster_center)
        x, y = Variable(torch.from_numpy(batch[0])).float(), Variable(torch.from_numpy(batch[1]))
        optimizer.zero_grad()
        w, b = model(x)
        M = Variable(torch.zeros(model.n_class, model.n_dim))
        B = Variable(torch.zeros(model.n_class))
        for k in range(model.n_class):
            M[k] = torch.cat((w[:, 0][y == k].view(-1, 1), w[:, 1][y == k].view(-1, 1)), 1).mean(0)
            B[k] = b[y == k].mean()
        pred = torch.mm(x, M.t()) + B.view(1, -1).expand_as(torch.mm(x, M.t()))
        loss = F.cross_entropy(pred, y)
        loss.backward()
        optimizer.step()
        print('Train Epoch: {}\tLoss: {:.6f}'.format(epoch, loss.data[0]))
Exemplo n.º 27
0
    def forward(self, input_, c_input, hx):
        """
        Args:
            batch = 1
            input_: A (batch, input_size) tensor containing input
                features.
            c_input: A  list with size c_num,each element is the input ct from skip word (batch, hidden_size).
            hx: A tuple (h_0, c_0), which contains the initial hidden
                and cell state, where the size of both states is
                (batch, hidden_size).
        Returns:
            h_1, c_1: Tensors containing the next hidden and cell state.
        """

        h_0, c_0 = hx
        batch_size = h_0.size(0)
        #assert(batch_size == 1)
        bias_batch = (self.bias.unsqueeze(0).expand(batch_size, *self.bias.size()))
        wh_b = torch.addmm(bias_batch, h_0, self.weight_hh)
        wi = torch.mm(input_, self.weight_ih)
        i, o, g = torch.split(wh_b + wi, split_size_or_sections=self.hidden_size, dim=1)
        i = torch.sigmoid(i)
        g = torch.tanh(g)
        o = torch.sigmoid(o)
        c_num = len(c_input)
        if c_num == 0:
            f = 1 - i
            c_1 = f*c_0 + i*g
            h_1 = o * torch.tanh(c_1)
        else:
            c_input_var = torch.cat(c_input, 0)
            alpha_bias_batch = (self.alpha_bias.unsqueeze(0).expand(batch_size, *self.alpha_bias.size()))
            c_input_var = c_input_var.squeeze(1) ## (c_num, hidden_dim)
            alpha_wi = torch.addmm(self.alpha_bias, input_, self.alpha_weight_ih).expand(c_num, self.hidden_size)
            alpha_wh = torch.mm(c_input_var, self.alpha_weight_hh)
            alpha = torch.sigmoid(alpha_wi + alpha_wh)
            ## alpha  = i concat alpha
            alpha = torch.exp(torch.cat([i, alpha],0))
            alpha_sum = alpha.sum(0)
            ## alpha = softmax for each hidden element
            alpha = torch.div(alpha, alpha_sum)
            merge_i_c = torch.cat([g, c_input_var],0)
            c_1 = merge_i_c * alpha
            c_1 = c_1.sum(0).unsqueeze(0)
            h_1 = o * torch.tanh(c_1)
        return h_1, c_1
Exemplo n.º 28
0
    def forward(ctx, input1, input2, weight, bias=None):
        ctx.save_for_backward(input1, input2, weight, bias)

        output = input1.new(input1.size(0), weight.size(0))

        buff = input1.new()

        # compute output scores:
        for k, w in enumerate(weight):
            torch.mm(input1, w, out=buff)
            buff.mul_(input2)
            torch.sum(buff, 1, keepdim=True, out=output.narrow(1, k, 1))

        if bias is not None:
            output.add_(bias.expand_as(output))

        return output
Exemplo n.º 29
0
        def test_shape(di, dj, dk):
            x, _, _ = self._gen_sparse(2, 20, [di, dj])
            t = torch.randn(di, dk)
            y = torch.randn(dj, dk)
            alpha = random.random()
            beta = random.random()

            res = torch.addmm(alpha, t, beta, x, y)
            expected = torch.addmm(alpha, t, beta, self.safeToDense(x), y)
            self.assertEqual(res, expected)

            res = torch.addmm(t, x, y)
            expected = torch.addmm(t, self.safeToDense(x), y)
            self.assertEqual(res, expected)

            res = torch.mm(x, y)
            expected = torch.mm(self.safeToDense(x), y)
            self.assertEqual(res, expected)
Exemplo n.º 30
0
        def test_shape(di, dj, dk):
            x = self._gen_sparse(2, 20, [di, dj])[0]
            y = self.randn(dj, dk)

            res = torch.hsmm(x, y)
            # TODO: use self.safeToDense(), but this triggers
            # https://github.com/pytorch/pytorch/issues/3170
            expected = torch.mm(x.to_dense(), y)
            self.assertEqual(res.to_dense(), expected)
Exemplo n.º 31
0
    def forward(self, user_X, item_X):
        # ----------------------------------------GCN layer----------------------------------------

        user_X = self.sparse_dropout(user_X)
        item_X = self.sparse_dropout(item_X)

        embeddings = []
        if self.accum == 'sum':
            wu = 0.
            wv = 0.
            for i in range(self.num_support):
                # weight sharing
                wu = self.weights_u[i] + wu
                wv = self.weights_v[i] + wv

                # multiply feature matrices with weights
                if self.sparse_feature:
                    temp_u = torch.sparse.mm(user_X, wu)
                    temp_v = torch.sparse.mm(item_X, wv)
                else:
                    temp_u = torch.mm(user_X, wu)
                    temp_v = torch.mm(item_X, wv)
                all_embedding = torch.cat([temp_u, temp_v])

                # then multiply with adj matrices
                graph_A = self.support[i]
                all_emb = torch.sparse.mm(graph_A, all_embedding)
                embeddings.append(all_emb)

            embeddings = torch.stack(embeddings, dim=1)
            embeddings = torch.sum(embeddings, dim=1)
        else:
            for i in range(self.num_support):
                # multiply feature matrices with weights
                if self.sparse_feature:
                    temp_u = torch.sparse.mm(user_X, self.weights_u[i])
                    temp_v = torch.sparse.mm(item_X, self.weights_v[i])
                else:
                    temp_u = torch.mm(user_X, self.weights_u[i])
                    temp_v = torch.mm(item_X, self.weights_v[i])
                all_embedding = torch.cat([temp_u, temp_v])

                # then multiply with adj matrices
                graph_A = self.support[i]
                all_emb = torch.sparse.mm(graph_A, all_embedding)
                embeddings.append(all_emb)

            embeddings = torch.cat(embeddings, dim=1)

        users, items = torch.split(embeddings,
                                   [self.num_users, self.num_items])

        u_hidden = self.activate(users)
        v_hidden = self.activate(items)

        # ----------------------------------------Dense Layer----------------------------------------

        u_hidden = self.dropout(u_hidden)
        v_hidden = self.dropout(v_hidden)

        u_hidden = self.dense_layer_u(u_hidden)
        v_hidden = self.dense_layer_u(v_hidden)

        u_outputs = self.dense_activate(u_hidden)
        v_outputs = self.dense_activate(v_hidden)

        return u_outputs, v_outputs
Exemplo n.º 32
0
epoch = 5000  # Setting training iterations
lr = 0.1  # Setting learning rate
inputlayer_neurons = X.shape[1]  # number of features in data set
hiddenlayer_neurons = 3  # number of hidden layers neurons
output_neurons = 1  # number of neurons at output layer

# weight and bias initialization
wh = torch.randn(inputlayer_neurons,
                 hiddenlayer_neurons).type(torch.FloatTensor)
bh = torch.randn(1, hiddenlayer_neurons).type(torch.FloatTensor)
wout = torch.randn(hiddenlayer_neurons, output_neurons)
bout = torch.randn(1, output_neurons)

for i in range(epoch):
    # Forward Propogation
    hidden_layer_input1 = torch.mm(X, wh)
    hidden_layer_input = hidden_layer_input1 + bh
    hidden_layer_activations = sigmoid(hidden_layer_input)

    output_layer_input1 = torch.mm(hidden_layer_activations, wout)
    output_layer_input = output_layer_input1 + bout
    output = sigmoid(output_layer_input1)

    # Backpropagation
    E = y - output
    slope_output_layer = derivatives_sigmoid(output)
    slope_hidden_layer = derivatives_sigmoid(hidden_layer_activations)
    d_output = E * slope_output_layer
    Error_at_hidden_layer = torch.mm(d_output, wout.t())
    d_hiddenlayer = Error_at_hidden_layer * slope_hidden_layer
    wout += torch.mm(hidden_layer_activations.t(), d_output) * lr
Exemplo n.º 33
0
 def sample_v(self, y): #y stands for hidden nodes 
     wy = torch.mm(y, self.W) # as weight matrix is for p_v_given_h we does not need to take transpose here
     activation = wy + self.b.expand_as(wy)
     p_v_given_h = torch.sigmoid(activation)
     return p_v_given_h, torch.bernoulli(p_v_given_h)
Exemplo n.º 34
0
# mean
print(
    '\nmean',
    '\nnumpy: ',
    np.mean(data),  # 0.0
    '\ntorch: ',
    torch.mean(tensor)  # 0.0
)

# matrix multiplication
data = [[1, 2], [3, 4]]
tensor = torch.FloatTensor(data)  # 32-bit floating point
# correct method
print(
    '\nmatrix multiplication (matmul)',
    '\nnumpy: ',
    np.matmul(data, data),  # [[7, 10], [15, 22]]
    '\ntorch: ',
    torch.mm(tensor, tensor)  # [[7, 10], [15, 22]]
)
# incorrect method
data = np.array(data)
print(
    '\nmatrix multiplication (dot)',
    '\nnumpy: ',
    data.dot(data),  # [[7, 10], [15, 22]]
    '\ntorch: ',
    torch.dot(tensor.dot(
        tensor))  # this will convert tensor to [1,2,3,4], you'll get 30.0
)
Exemplo n.º 35
0
    def train(self):
        """Train generator and discriminator."""
        fixed_noise = self.to_variable(torch.randn(self.batch_size,
                                                   self.z_dim))
        total_step = len(self.data_loader)

        for epoch in range(self.num_epochs):
            for i, images in enumerate(self.data_loader):

                #===================== Train D =====================#
                images = self.to_variable(images)
                batch_size = images.size(0)
                noise = self.to_variable(torch.randn(batch_size, self.z_dim))

                # Train D to recognize real images as real.
                outputs = self.discriminator(images)
                real_loss = torch.mean(torch.sum((outputs - images)**2, 1))

                # Train D to recognize fake images as fake.
                fake_images = self.generator(noise)
                outputs = self.discriminator(fake_images)
                fake_loss = torch.mean(torch.sum((outputs - fake_images)**2,
                                                 1))

                # Backprop + optimize
                d_loss = real_loss + torch.nn.functional.relu(
                    1 - fake_loss)  # 1 is margin
                self.discriminator.zero_grad()
                d_loss.backward()
                self.d_optimizer.step()

                #===================== Train G =====================#
                noise = self.to_variable(torch.randn(batch_size, self.z_dim))

                # Train G so that D recognizes G(z) as real.
                fake_images = self.generator(noise)
                outputs = self.discriminator(fake_images)
                g_loss = torch.mean(torch.sum((outputs - fake_images)**2, 1))

                # Generator PT Regularizer Term
                # PT Reg. Term
                sample = fake_images.view(-1, batch_size)  # 12288 x 32
                nom = torch.mm(torch.transpose(sample, 0, 1), sample)  # 32x32

                denoms = torch.zeros((64 * 64 * 3, batch_size))
                denom_column = torch.sqrt(torch.sum(torch.pow(sample, 2),
                                                    0))  # Should be 32x32

                denoms[0, :] = denom_column.data
                denom = torch.mm(torch.transpose(denoms, 0, 1), denoms)

                denom = denom.cuda()
                pt = torch.pow(torch.div(nom.data, denom), 2)  # 32x32

                # Remove Diagonal Term
                pt -= torch.diag(torch.diag(pt, 0))

                # Final PT Value
                pt = torch.sum(pt) / (batch_size * (batch_size - 1))
                g_loss = g_loss + 0.1 * pt

                # Backprop + optimize
                self.generator.zero_grad()
                g_loss.backward()
                self.g_optimizer.step()

                # print the log info
                if (i + 1) % self.log_step == 0:
                    print(
                        'Epoch [%d/%d], Step[%d/%d], d_real_loss: %.4f, '
                        'd_fake_loss: %.4f, g_loss: %.4f' %
                        (epoch + 1, self.num_epochs, i + 1, total_step,
                         real_loss.data[0], fake_loss.data[0], g_loss.data[0]))

                # save the sampled images
                if (i + 1) % self.sample_step == 0:
                    fake_images = self.generator(fixed_noise)
                    torchvision.utils.save_image(
                        self.denorm(fake_images.data),
                        os.path.join(
                            self.sample_path,
                            'fake_samples-%d-%d.png' % (epoch + 1, i + 1)))

            # save the model parameters for each epoch
            g_path = os.path.join(self.model_path,
                                  'generator-%d.pkl' % (epoch + 1))
            d_path = os.path.join(self.model_path,
                                  'discriminator-%d.pkl' % (epoch + 1))
            torch.save(self.generator.state_dict(), g_path)
            torch.save(self.discriminator.state_dict(), d_path)
Exemplo n.º 36
0
def non_max_suppression(prediction,
                        conf_thres=0.25,
                        iou_thres=0.45,
                        classes=None,
                        agnostic=False,
                        multi_label=False,
                        labels=()):
    """Runs Non-Maximum Suppression (NMS) on inference results

    Returns:
         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
    """

    nc = prediction.shape[2] - 5  # number of classes
    xc = prediction[..., 4] > conf_thres  # candidates

    # Settings
    # (pixels) minimum and maximum box width and height
    min_wh, max_wh = 2, 4096
    max_det = 300  # maximum number of detections per image
    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
    time_limit = 10.0  # seconds to quit after
    redundant = True  # require redundant detections
    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
    merge = False  # use merge-NMS

    t = time.time()
    output = [torch.zeros(
        (0, 6), device=prediction.device)] * prediction.shape[0]
    for xi, x in enumerate(prediction):  # image index, image inference
        # Apply constraints
        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
        x = x[xc[xi]]  # confidence

        # Cat apriori labels if autolabelling
        if labels and len(labels[xi]):
            l = labels[xi]
            v = torch.zeros((len(l), nc + 5), device=x.device)
            v[:, :4] = l[:, 1:5]  # box
            v[:, 4] = 1.0  # conf
            v[range(len(l)), l[:, 0].long() + 5] = 1.0  # cls
            x = torch.cat((x, v), 0)

        # If none remain process next image
        if not x.shape[0]:
            continue

        # Compute conf
        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf

        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
        box = xywh2xyxy(x[:, :4])

        # Detections matrix nx6 (xyxy, conf, cls)
        if multi_label:
            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
        else:  # best class only
            conf, j = x[:, 5:].max(1, keepdim=True)
            x = torch.cat((box, conf, j.float()),
                          1)[conf.view(-1) > conf_thres]

        # Filter by class
        if classes is not None:
            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]

        # Apply finite constraint
        # if not torch.isfinite(x).all():
        #     x = x[torch.isfinite(x).all(1)]

        # Check shape
        n = x.shape[0]  # number of boxes
        if not n:  # no boxes
            continue
        elif n > max_nms:  # excess boxes
            # sort by confidence
            x = x[x[:, 4].argsort(descending=True)[:max_nms]]

        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
        # boxes (offset by class), scores
        boxes, scores = x[:, :4] + c, x[:, 4]
        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
        if i.shape[0] > max_det:  # limit detections
            i = i[:max_det]
        if merge and (1 < n <
                      3E3):  # Merge NMS (boxes merged using weighted mean)
            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
            weights = iou * scores[None]  # box weights
            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(
                1, keepdim=True)  # merged boxes
            if redundant:
                i = i[iou.sum(1) > 1]  # require redundancy

        output[xi] = x[i]
        if (time.time() - t) > time_limit:
            print(f'WARNING: NMS time limit {time_limit}s exceeded')
            break  # time limit exceeded

    return output
Exemplo n.º 37
0
 def forward(self, input):
   x = input.view(-1, visible_size)
   x = torch.sigmoid(torch.mm(x, self.encoder))
   x = torch.sigmoid(torch.mm(x, torch.transpose(self.encoder, 0, 1)))
   return x.view_as(input)
 def model(self, x):
     x = F.relu(torch.add(torch.mm(x, self.weights[0]),self.weights[1]))
     x = F.relu(torch.add(torch.mm(x, self.weights[2]), self.weights[3]))
     x = F.softmax(torch.add(torch.mm(x, self.weights[4]), self.weights[5]))
     return x
def net(x):
    h = relu(torch.mm(x.view((-1, num_inputs)), w1) + b1)
    # h = torch.tensor(h, dtype=torch.float, requires_grad=True)
    return softmax(torch.mm(h, w2) + b2)
Exemplo n.º 40
0
    def forward(self,
                src_inputs,
                src_mask,
                src_langs,
                tgt_inputs,
                tgt_mask,
                tgt_langs,
                src_neg_inputs=None,
                src_neg_mask=None,
                src_neg_langs=None,
                tgt_neg_inputs=None,
                tgt_neg_mask=None,
                tgt_neg_langs=None,
                normalize: bool = False):
        "Take in and process masked src and target sequences."
        device = self.encoder.embeddings.word_embeddings.weight.device
        src_langs = src_langs.unsqueeze(-1).expand(-1, src_inputs.size(-1))
        src_inputs = src_inputs.to(device)
        src_langs = src_langs.to(device)

        if src_mask.device != device:
            src_mask = src_mask.to(device)
        src_embed = self.encode(src_inputs, src_mask, src_langs)

        tgt_langs = tgt_langs.unsqueeze(-1).expand(
            -1, tgt_inputs.size(-1)).to(device)
        if tgt_inputs.device != device:
            tgt_inputs = tgt_inputs.to(device)
            tgt_mask = tgt_mask.to(device)
        tgt_embed = self.encode(tgt_inputs, tgt_mask, tgt_langs)

        src_norm = torch.norm(src_embed, dim=-1, p=2).unsqueeze(-1) + 1e-4
        src_embed = torch.div(src_embed, src_norm)
        tgt_norm = torch.norm(tgt_embed, dim=-1, p=2).unsqueeze(-1) + 1e-4
        tgt_embed = torch.div(tgt_embed, tgt_norm)
        if normalize:
            if src_neg_langs is not None:
                src_neg_langs = src_neg_langs.unsqueeze(-1).expand(
                    -1, src_neg_inputs.size(-1))
                src_neg_inputs = src_neg_inputs.to(device)
                src_neg_langs = src_neg_langs.to(device)

                if src_neg_mask.device != device:
                    src_neg_mask = src_neg_mask.to(device)
                src_neg_embed = self.encode(src_neg_inputs, src_neg_mask,
                                            src_neg_langs)
                src_neg_norm = torch.norm(src_neg_embed, dim=-1,
                                          p=2).unsqueeze(-1) + 1e-4
                src_neg_embed = torch.div(src_neg_embed, src_neg_norm)

                tgt_neg_langs = tgt_neg_langs.unsqueeze(-1).expand(
                    -1, tgt_neg_inputs.size(-1))
                tgt_neg_inputs = tgt_neg_inputs.to(device)
                tgt_neg_langs = tgt_neg_langs.to(device)

                if tgt_neg_mask.device != device:
                    tgt_neg_mask = tgt_neg_mask.to(device)
                tgt_neg_embed = self.encode(tgt_neg_inputs, tgt_neg_mask,
                                            tgt_neg_langs)
                tgt_neg_norm = torch.norm(tgt_neg_embed, dim=-1,
                                          p=2).unsqueeze(-1) + 1e-4
                tgt_neg_embed = torch.div(tgt_neg_embed, tgt_neg_norm)

                tgt_neg_embd = torch.cat([tgt_neg_embed, tgt_embed])
                src_neg_embd = torch.cat([src_neg_embed, src_embed])

                nominator = torch.sum(src_embed * tgt_embed, dim=-1) + 1e-4

                cross_dot = torch.mm(src_embed, tgt_neg_embd.T)
                cross_dot_rev = torch.mm(tgt_embed, src_neg_embd.T)
                cross_dot_all = torch.cat([cross_dot, cross_dot_rev], dim=1)
                denom = torch.log(
                    torch.sum(torch.exp(cross_dot_all), dim=-1) + 1e-4)
                log_neg = torch.sum(denom - nominator) / len(cross_dot)
            else:
                cross_dot = torch.mm(src_embed, tgt_embed.T)
                denom = torch.log(
                    torch.sum(torch.exp(cross_dot), dim=-1) + 1e-4)
                nominator = torch.diagonal(cross_dot[:, :], 0) + 1e-4
                log_neg = torch.sum(denom - nominator) / len(cross_dot)

            return log_neg
        else:
            dot_prod = torch.sum(src_embed * tgt_embed, dim=-1)
            return dot_prod
Exemplo n.º 41
0
def P(z):
    h = nn.relu(torch.mm(z, Wzh) + bzh.repeat(z.size(0), 1))
    X = nn.sigmoid(torch.mm(h, Whx) + bhx.repeat(h.size(0), 1))
    return X
Exemplo n.º 42
0
#
# > **Exercise:** Flatten the batch of images `images`. Then build a multi-layer network with 784 input units, 256 hidden units, and 10 output units using random tensors for the weights and biases. For now, use a sigmoid activation for the hidden layer. Leave the output layer without an activation, we'll add one that gives us a probability distribution next.

# In[5]:


def activation(x):
    return 1 / (1 + torch.exp(-x))


inputs = images.view(images.shape[0], -1)
w1 = torch.randn(784, 256)
b1 = torch.randn(256)
w2 = torch.randn(256, 10)
b2 = torch.randn(10)
h = activation(torch.mm(inputs, w1) + b1)
out = torch.mm(h, w2) + b2
print(out)

# Now we have 10 outputs for our network. We want to pass in an image to our network and get out a probability distribution over the classes that tells us the likely class(es) the image belongs to. Something that looks like this:
# <img src='assets/image_distribution.png' width=500px>
#
# Here we see that the probability for each class is roughly the same. This is representing an untrained network, it hasn't seen any data yet so it just returns a uniform distribution with equal probabilities for each class.
#
# To calculate this probability distribution, we often use the [**softmax** function](https://en.wikipedia.org/wiki/Softmax_function). Mathematically this looks like
#
# $$
# \Large \sigma(x_i) = \cfrac{e^{x_i}}{\sum_k^K{e^{x_k}}}
# $$
#
# What this does is squish each input $x_i$ between 0 and 1 and normalizes the values to give you a proper probability distribution where the probabilites sum up to one.
Exemplo n.º 43
0
    def initialize_weights_(self,
                            Dict=None,
                            L1_weight=None,
                            init_type='ista',
                            mu=None):
        """
    Fully initializes the encoder using given weight matrices
      (or randomly, if none are given).
    """
        self.init_type = init_type
        # fix-up L1 weights and mu's.
        if self.L1_weight is None:
            if (L1_weight is None):
                print('Using default L1 weight (0.1).')
                self.L1_weight = 0.1
            else:
                self.L1_weight = L1_weight
        if self.mu is None:
            if mu is None:
                self.mu = 1
                if init_type == 'salsa':
                    print('Using default mu value (1).')
            else:
                self.mu = mu
        # If a dictionary is not provided for initialization, initialize randomly.
        if Dict is None:
            Dict = dictionary(self.data_size, self.code_size, use_cuda=False)

        #-------------------------------------
        # Initialize the loss function.
        self.initialize_cvx_lossFcn_(Dict)

        #-------------------------------------
        # Initialize ISTA-style (first order).
        Wd = Dict.getDecWeights().cpu()
        if init_type == 'ista':
            # Get the maximum eigenvalue.
            Dict.getMaxEigVal()
            self.L = Dict.maxEig
            # Initialize.
            self.We.weight.data = (1 / self.L) * (Wd.detach()).t()
            self.S.weight.data = torch.eye(
                Dict.n) - (1 / self.L) * (torch.mm(Wd.t(), Wd)).detach()
            self.thresh = (self.L1_weight / self.L)
            # Set up the nonlinearity, aka soft-thresholding function.

        #-------------------------------------
        # Initialize FISTA-style (first order).
        elif init_type == 'fista':
            # Get the maximum eigenvalue.
            Dict.getMaxEigVal()
            self.L = Dict.maxEig
            # Initialize.
            self.We.weight.data = Wd.detach().t()
            self.thresh = (self.L1_weight / self.L)
            # Set up the nonlinearity, aka soft-thresholding function.

        #---------------------------------------
        # Initialize SALSA-style (second order).
        elif init_type == 'salsa':
            # Initialize matrices.
            self.We.weight.data = Wd.detach().t()
            AA = torch.mm(Wd.t(), Wd).cpu()
            S_weights = (self.mu * torch.eye(Dict.n) + AA).inverse()
            self.S.weight.data = S_weights.detach()
            self.thresh = (self.L1_weight / self.mu)
            # Set up the nonlinearity, aka soft-thresholding function.

        else:
            raise ValueError(
                'Encoders can only be initialized for "ista" and "salsa" like families.'
            )

        #-------------------------------------
        # Print status of the newly created encoder.
#    print('Encoder, threshold, and loss functions are initialized for {}-type algorithms.'.format(init_type))

#-------------------------------------
# Finally, put to device if requested.
        self.We = self.We.to(self.device)
        self.S = self.S.to(self.device)
Exemplo n.º 44
0
    def forward(self, query, context, context_mask):
        batch_size, item_len, dimensions = context.size()

        # mask context
        # In (batch size, item_length)
        # Out (batch size, item_length, 1)
        context_masked = context * context_mask.unsqueeze(2)

        # element-wise matrix product
        # In (batch_size, 1, dimensions) * (batch_size, item_len, dimensions) ->
        # Out (batch_size, item_len, dimensions)
        pq = query.unsqueeze(
            1) * context_masked  #(batch_size, item_len, dimensions)
        pq_ = pq.view(-1, dimensions)  #(batch * item_len, dimensions)

        # a linear layer
        # In (batch_size * item_len, dimensions)
        # Out (batch_size * item_len, dimensions)
        linear_output = self.linear_layer(pq_)
        if self.activation == 0:
            linear_output_ = torch.relu(linear_output)
        elif self.activation == 1:
            linear_output_ = torch.sigmoid(linear_output)
        elif self.activation == 2:
            linear_output_ = torch.tanh(linear_output)

        # attention score
        # In (batch_size * item_len, dimensions), (dimensions, 1)
        # Out (batch_size * item_len, 1)
        # reshape tensor
        # Out (batch_size, item_len)
        A_1 = torch.mm(linear_output_, self.h).view(batch_size, item_len)

        # use a mask to filter data
        # keep mask(1) and clear mask(0)
        # In (batch_size, item_len) and (batch_size, item_len)
        # Out (batch_size, item_len)
        A = A_1 * context_mask

        # through softmax for normalization
        # In (batch_size, item_len)
        # Out (batch_size, item_len)
        if self.beta == 1:
            attention_weight = self.softmax(A)  #(5, 7)
            attention_weight_ = attention_weight.unsqueeze(2)
        else:
            #compute softmax on non-zero rows
            # do mask
            A_without_zero = A.sum(1) != 0
            if A_without_zero.sum() != A.shape[0]:  #exist zero row
                A_rest = A[A_without_zero.to(torch.device('cpu'))]
                context_mask_rest = context_mask[A_without_zero.to(
                    torch.device('cpu'))]
                # compute upper part of \frac{exp(f(i,j))}{\sum_k exp(f(i,j))}
                A_ = torch.exp(A_rest) * context_mask_rest
                # compute lower part
                smoothing_softmax_denominator = A_.sum(1).pow(-self.beta)
                smoothing_softmax_denominator_ = smoothing_softmax_denominator.unsqueeze(
                    1)
                # compute \frac{exp(f(i,j))}{\sum_k exp(f(i,j))}
                attention_weight = A_ * smoothing_softmax_denominator_
                # restore results
                attention_weight_ = torch.zeros(A.shape).to(self.device)
                attention_weight_[A_without_zero.to(
                    torch.device('cpu'))] = attention_weight
                # In (batch_size, item_len)
                # Out (batch_size, item_len, 1)
                attention_weight_ = attention_weight_.unsqueeze(2)
            else:
                # a_ij = \frac{exp(f(p_i, q_j))}{(\sum_j exp(f(p_i, q_j)))^beta}
                # this step multiply mask is important in order to avoid unnecessary sum
                A_ = torch.exp(A) * context_mask
                # get sum on denominator
                smoothing_softmax_denominator = A_.sum(1).pow(-self.beta)
                # In (batch)
                # Out (batch, 1)
                smoothing_softmax_denominator_ = smoothing_softmax_denominator.unsqueeze(
                    1)
                # if A[i,:] is all zero, the wegiht will be inf large, so filter them again.
                attention_weight = A_ * smoothing_softmax_denominator_
                # In (batch_size, item_len)
                # Out (batch_size, item_len, 1)
                attention_weight_ = attention_weight.unsqueeze(2)  #(5, 7, 1)

        # set up final attended score by element-wise matrix production
        # In (batch_size, item_len, 1) * (batch_size, item_len, dimensions)
        # Out (batch_size, item_len, dimensions)
        ret = attention_weight_ * context_masked  #(5, 7, 10)

        # sum all items
        # In (batch_size, item_len, dimensions)
        # Out (batch_size, dimensions)
        output = ret.sum(1)  #(5, 10), a final result

        return output, attention_weight
Exemplo n.º 45
0
 def payload(self):
     x = torch.randn(10, 10).cuda()
     y = torch.randn(10, 10).cuda()
     z = torch.mm(x, y)
     z = z + y
     z = z.cpu()
 def train(self, v0, vk, ph0, phk):
     self.W += torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)
     self.b += torch.sum((v0 - vk), 0)
     self.a += torch.sum((ph0 - phk), 0)
Exemplo n.º 47
0
 def forward(self, X, A_hat):  ### 1-layer GCN architecture
     X = torch.mm(X, self.weight)
     if self.bias is not None:
         X = (X + self.bias)
     X = F.relu(torch.mm(A_hat, X))
     return X
Exemplo n.º 48
0
def logistic_regression(x):
    return torch.sigmoid(torch.mm(x, w) + b)
Exemplo n.º 49
0
 def train(self, v0, vk, ph0, phk): 
     self.W += torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)
     self.b += torch.sum((v0 - vk), 0) # just to keep dimentions on equal form we add v0 - vk to 0
     self.a += torch.sum((ph0 - phk), 0)
Exemplo n.º 50
0
 def sample_h(self, x):
     wx = torch.mm(x, self.W.t())
     activation = wx + self.a.expand_as(wx)
     p_h_given_v = torch.sigmoid(activation)
     return p_h_given_v, torch.bernoulli(p_h_given_v)
Exemplo n.º 51
0
 def sample_h(self, x): # x stands for visible nodes
     wx = torch.mm(x, self.W.t()) # torch.mm to multiply two tensors but for mathematical correction we need to take transpose of weights(as weight matrix is for p_v_given_h)
     activation = wx + self.a.expand_as(wx) # bias self.a must be applied to every line of batch. So expan_as(wx)
     p_h_given_v = torch.sigmoid(activation) # hidden node is activated given the condition of visible node
     return p_h_given_v, torch.bernoulli(p_h_given_v) # return probabilities
Exemplo n.º 52
0
 def sample_v(self, y):
     wy = torch.mm(y, self.W)
     activation = wy + self.b.expand_as(wy)
     p_v_given_h = torch.sigmoid(activation)
     return p_v_given_h, torch.bernoulli(p_v_given_h)
Exemplo n.º 53
0
    def forward(self, sentence, seq_lengths, mask, label_sent, label_mask):
        '''
        sentence: (batch, len)
        '''
        '''label descriptions'''
        label_embeds = self.word_embeddings_bow(
            label_sent)  #(12, len, emb_size)
        label_reps = torch.sum(label_embeds * label_mask.unsqueeze(2),
                               dim=1)  #(12, emb_size)
        label_hidden_reps = (
            self.emb2hidden(label_reps)).tanh()  #(12, hidden_size)
        '''neural BOW'''
        embeds_bow = self.word_embeddings_bow(sentence)
        bow = torch.sum(embeds_bow * mask.unsqueeze(2),
                        dim=1)  #(batch, emb_size)
        '''LSTM'''
        embeds_lstm = self.word_embeddings_bow(sentence)
        lstm_output = LSTM(embeds_lstm, seq_lengths, self.lstm, False)
        '''multi-channel CNN'''
        embeds_cnn = self.word_embeddings_bow(sentence)
        conv_output = multi_channel_conv_and_pool(embeds_cnn, mask,
                                                  self.conv_1, self.conv_2)

        dot_cnn_dataless = (torch.mm(
            conv_output.reshape(2 * self.batch_size, self.hidden_dim),
            label_hidden_reps.t()).reshape(self.batch_size,
                                           2 * self.tagset_size)).tanh()
        '''attentive convolution'''
        embeds_acnn = self.word_embeddings_bow(sentence)
        aconv_output = attentive_convolution(embeds_acnn, embeds_acnn, mask,
                                             mask, self.conv_self,
                                             self.conv_context)
        aconv_output2 = attentive_convolution(embeds_acnn, embeds_acnn, mask,
                                              mask, self.conv_self2,
                                              self.conv_context2)
        '''dataless'''
        dataless_cos = (cosine_two_matrices(
            bow, label_reps)).sigmoid()  #(batch, 12)
        '''dataless top-30 fine grained cosine'''
        sent_side = embeds_bow * mask.unsqueeze(
            2)  #(batch, sent_len, emb_size)
        label_side = label_embeds * label_mask.unsqueeze(
            2)  #(12, label_len, emb_size)

        cosine_matrix = cosine_two_matrices(
            label_side.view(-1, self.emb_size),
            sent_side.view(-1, self.emb_size))  #(12*label_len, batch*sent_len)
        # print('cosine_matrix:', cosine_matrix)
        dot_prod_tensor4 = cosine_matrix.reshape(
            self.batch_size, sent_side.size(1), 12,
            label_side.size(1)).permute(0, 2, 3,
                                        1)  #(batch, 12, label_len, sent_len)
        dot_prod_tensor3_new = dot_prod_tensor4.reshape(
            self.batch_size, 12,
            label_side.size(1) *
            sent_side.size(1))  #(batch, 12, label_len*sent_len)
        sorted, indices = torch.sort(dot_prod_tensor3_new, descending=True)
        top_k_sorted = sorted[:, :, :50]
        dataless_top_30 = top_k_sorted.mean(dim=-1)  #(batch, 12)
        # print('dataless_top_30:',top_k_sorted.var(dim=-1))
        '''combine all output representations'''
        '''len = self.emb_size+3*self.hidden_dim+4*self.tagset_size'''
        # combine_rep_batch = torch.cat([bow, lstm_output, conv_output, dataless_cos, dataless_top_30, dot_cnn_dataless, aconv_output,aconv_output2], 1)
        combine_rep_batch = torch.cat(
            [bow, aconv_output, aconv_output2, conv_output], 1)
        tag_space = self.hidden2tag(combine_rep_batch)
        tag_prob = tag_space.sigmoid()

        return tag_prob
Exemplo n.º 54
0
def Q(X):
    h = nn.relu(torch.mm(X, Wxh) + bxh.repeat(X.size(0), 1))
    z_mu = torch.mm(h, Whz_mu) + bhz_mu.repeat(h.size(0), 1)
    z_var = torch.mm(h, Whz_var) + bhz_var.repeat(h.size(0), 1)
    return z_mu, z_var
Exemplo n.º 55
0
start_time = time.time()
# loss_ = PMF()
# loss = loss_(rating_mat, user_features, movie_features)
optimizer = torch.optim.SGD([user_features, movie_features],
                            lr=0.01,
                            weight_decay=0.5)
pmferr = PMF(u_lambda=rating_var, v_lambda=rating_var)
for step, epoch in enumerate(range(10)):
    optimizer.zero_grad()
    loss = pmferr(rating_mat, user_features, movie_features)
    loss.backward()
    optimizer.step()
    if step % 50 == 0:
        print(f'Step {step}, {loss:.3f}')

dev_csv_path = 'data/dev.csv'
dev_df = pd.read_csv(dev_csv_path, names=['movie', 'user'])

file = open('eval/PMF_%d.txt' % latent_vectors, 'w')
for i in range(len(dev_df.movie)):
    dev_movie = dev_df.iloc[i].movie
    dev_user = dev_df.iloc[i].user

    pred = torch.mm(user_features[dev_user, :].view(1, -1), movie_features.t())
    # pred_rate = (pred*(max_rate-min_rate)+min_rate)
    pred_result = pred[0, dev_movie].data.tolist()
    file.writelines('%s\n' % (str(pred_result)))
    print('---------predicting for instance number %d' % i)
file.close()
print('%f secs spending' % (time.time() - start_time))
Exemplo n.º 56
0
def gram_matrix(input):
    N, C, H, W = input.size()
    features = input.view(N * C, H * W)
    G = torch.mm(features, features.t())  # XX^t
    return G.div(N * C * H * W)  # Normalize
Exemplo n.º 57
0
 def forward(self, graph, x):
     output = torch.mm(x, self.weight)
     output = output + self.bias if self.bias is not None else output
     output = self.bn(output)
     return self.sigma(output)
    def forward(self, features, all_phrase_ids, targets, precomp_boxes,
                precomp_score, precomp_det_label, image_scale, all_sent_sgs,
                all_sentences, image_unique_id, det_label_embedding):
        """
        :param obj_proposals: proposal from each images
        :param features: features maps from the backbone
        :param target: gt relation labels
        :param object_vocab, object_vocab_len [[xxx,xxx],[xxx],[xxx]], [2,1,1]
        :param sent_sg: sentence scene graph
        :return: prediction, loss

        note that first dimension is images
        """
        img_num_per_gpu = len(features)

        batch_decode_logits = []
        batch_topk_decoder_logits = []
        batch_pred_similarity = []
        batch_precomp_boxes = []
        batch_topk_precomp_boxes = []
        batch_pred_boxes = []
        batch_topk_pred_boxes = []
        batch_topk_fusion_pred_boxes = []
        batch_topk_pred_similarity = []
        batch_topk_fusion_similarity = []
        batch_boxes_targets = []
        batch_ctx_embed = []
        batch_ctx_s1_embed = []

        batch_rel_reconst_s0 = []
        batch_rel_reconst_s1 = []
        batch_rel_cls_s0 = []
        batch_rel_cls_s1 = []
        batch_rel_cls_gt = []

        batch_pred_targets = []
        batch_topk_pred_targets = []
        """ Language Embedding"""
        batch_phrase_ids, batch_phrase_types, batch_phrase_embed, batch_phrase_len, \
        batch_phrase_dec_ids, batch_phrase_mask, batch_decoder_word_embed, batch_phrase_glove_embed, batch_relation_conn, batch_sent_embed,\
        batch_decoder_rel_word_embed, batch_rel_mask, batch_rel_dec_idx = self.phrase_embed(all_sentences, all_phrase_ids, all_sent_sgs)

        h, w = features.shape[-2:]

        # self.iter = 100000
        self.storage = get_event_storage()

        for bid in range(img_num_per_gpu):
            """ Visual Embedding """
            precomp_boxes_bid = precomp_boxes[bid].to(self.device)  ## 100*4

            order = []
            for phr_ids in batch_phrase_ids[bid]:
                order.append(all_phrase_ids[bid].index(phr_ids))
            target_filter = targets[bid][np.array(order)]
            batch_boxes_targets.append(target_filter.to(self.device))
            batch_precomp_boxes.append(precomp_boxes_bid)

            img_feat_bid = features[[bid]]

            visual_features_bid = self.rcnn_top(
                self.det_roi_pooler(
                    [img_feat_bid],
                    [precomp_boxes_bid])).mean(dim=[2, 3]).contiguous()
            if cfg.MODEL.VG.SPATIAL_FEAT:
                spa_feat = meshgrid_generation(h, w)
                spa_feat = self.det_roi_pooler(
                    [spa_feat],
                    [precomp_boxes_bid]).view(visual_features_bid.shape[0], -1)
                spa_feat = self.spatial_trans(spa_feat)
                visual_features_bid = torch.cat(
                    (visual_features_bid, spa_feat), dim=1)

            visual_features_bid = self.visual_embedding(visual_features_bid)
            visual_features_bid = self.vis_batchnorm(visual_features_bid)
            """ Noun Phrase embedding """
            phrase_embed_bid = batch_phrase_embed[bid]
            if phrase_embed_bid.shape[0] == 1 and self.training:
                phrase_embed_bid = self.phr_batchnorm(
                    phrase_embed_bid.repeat(2, 1))[[0]]
            else:
                phrase_embed_bid = self.phr_batchnorm(phrase_embed_bid)
            """ Similarity and attention prediction """
            num_box = precomp_boxes_bid.tensor.size(0)
            num_phrase = phrase_embed_bid.size(0)
            phr_inds, obj_inds = self.make_pair(num_phrase, num_box)
            pred_similarity_bid, pred_targets_bid = self.similarity(
                visual_features_bid, phrase_embed_bid, obj_inds, phr_inds)
            pred_similarity_bid = pred_similarity_bid.reshape(
                num_phrase, num_box)
            pred_targets_bid = pred_targets_bid.reshape(num_phrase, num_box, 4)
            batch_pred_targets.append(pred_targets_bid)

            if cfg.MODEL.VG.USING_DET_KNOWLEDGE:
                det_label_embedding_bid = det_label_embedding[bid].to(
                    self.device)
                sim = self.cal_det_label_sim_max(det_label_embedding_bid,
                                                 batch_phrase_glove_embed[bid])
                pred_similarity_bid = pred_similarity_bid * sim
                sim_mask = (sim > 0).float()
                atten_bid = numerical_stability_masked_softmax(
                    pred_similarity_bid, sim_mask, dim=1)
            else:
                atten_bid = F.softmax(pred_similarity_bid, dim=1)

            ## reconstruction visual features
            visual_reconst_bid = torch.mm(atten_bid, visual_features_bid)
            decode_phr_logits = self.phrase_decoder(
                visual_reconst_bid, batch_decoder_word_embed[bid])
            batch_decode_logits.append(decode_phr_logits)

            atten_score_topk, atten_ranking_topk = torch.topk(
                atten_bid, dim=1, k=self.s2_topk)  ## (N, 10)
            ind_phr_topk = np.arange(num_phrase).repeat(self.s2_topk)

            ## -----------------------------------------------------##
            ## crop 2st features
            ## -----------------------------------------------------##

            if self.storage.iter <= cfg.SOLVER.REG_START_ITER:
                # if self.iter <= cfg.SOLVER.REG_START_ITER:
                visual_features_topk_bid = visual_features_bid[
                    atten_ranking_topk.reshape(-1)]
                precomp_boxes_topk_bid = precomp_boxes_bid[
                    atten_ranking_topk.reshape(-1)]
                batch_topk_precomp_boxes.append(precomp_boxes_topk_bid)
            else:
                topk_box_ids = atten_ranking_topk.reshape(
                    -1) + torch.as_tensor(ind_phr_topk, dtype=torch.long).to(
                        self.device) * num_box
                precomp_boxes_tensor, box_size = precomp_boxes_bid.tensor, precomp_boxes_bid.size
                precomp_boxes_topk_tensor = precomp_boxes_tensor[
                    atten_ranking_topk.reshape(-1)]  ## (N*10, 4)
                pred_targets_s0 = pred_targets_bid.view(-1, 4)[topk_box_ids]
                precomp_boxes_topk_bid = self.box2box_translation.apply_deltas(
                    pred_targets_s0, precomp_boxes_topk_tensor)
                precomp_boxes_topk_bid = Boxes(precomp_boxes_topk_bid,
                                               box_size)
                precomp_boxes_topk_bid.clip()
                batch_topk_precomp_boxes.append(precomp_boxes_topk_bid)
                visual_features_topk_bid = self.rcnn_top(
                    self.det_roi_pooler([img_feat_bid],
                                        [precomp_boxes_topk_bid])).mean(
                                            dim=[2, 3]).contiguous()

                if cfg.MODEL.VG.SPATIAL_FEAT:
                    spa_feat = meshgrid_generation(h, w)
                    spa_feat = self.det_roi_pooler(
                        [spa_feat], [precomp_boxes_topk_bid]).view(
                            visual_features_topk_bid.shape[0], -1)
                    spa_feat = self.spatial_trans(spa_feat)
                    visual_features_topk_bid = torch.cat(
                        (visual_features_topk_bid, spa_feat), dim=1)

                visual_features_topk_bid = self.visual_embedding(
                    visual_features_topk_bid)  ## (N*10, 1024)
                visual_features_topk_bid = self.vis_batchnorm(
                    visual_features_topk_bid)

            if cfg.MODEL.RELATION.IS_ON:
                relation_conn_bid = batch_relation_conn[bid]
                if len(relation_conn_bid) > 0:
                    relation_conn_bids = [rel[:2] for rel in relation_conn_bid]
                    phr_sub_idx, phr_obj_idx = torch.as_tensor(
                        relation_conn_bids).to(self.device).long().transpose(
                            0, 1)
                    visual_ctx_topk = (atten_score_topk.unsqueeze(2) *
                                       visual_features_topk_bid.reshape(
                                           num_phrase, self.s2_topk, -1)).sum(
                                               1)  ## N*1024
                    visual_trans = self.rel_trans(visual_ctx_topk)
                    ent_gate = (visual_trans[phr_sub_idx] *
                                visual_trans[phr_obj_idx]).sum(1) / 512**0.5
                    gate_mat = torch.zeros([num_phrase,
                                            num_phrase]).to(self.device)
                    gate_mat[phr_sub_idx, phr_obj_idx] = F.relu(ent_gate)
                    gate_mat[phr_obj_idx, phr_sub_idx] = F.relu(ent_gate)
                    # gate_mat[phr_sub_idx, phr_obj_idx] = 1
                    # gate_mat[phr_obj_idx, phr_sub_idx] = 1
                    gate_mask = (gate_mat != 0).float()
                    gate_mat = numerical_stability_masked_softmax(gate_mat,
                                                                  gate_mask,
                                                                  dim=1)
                    # cxt_feat = torch.mm(gate_mat, visual_ctx_topk)
                    cxt_feat = self.agg_trans(
                        torch.mm(gate_mat, visual_ctx_topk))
                    visual_features_topk_bid = visual_features_topk_bid + torch.bmm(
                        atten_score_topk.unsqueeze(2),
                        cxt_feat.unsqueeze(1)).view(-1, self.visual_embed_dim)

            pred_similarity_topk_bid, pred_targets_topk_bid = self.similarity_topk(
                visual_features_topk_bid, phrase_embed_bid, ind_phr_topk)
            pred_similarity_topk_bid = pred_similarity_topk_bid.reshape(
                num_phrase, self.s2_topk)
            pred_targets_topk_bid = pred_targets_topk_bid.reshape(
                num_phrase, self.s2_topk, 4)
            batch_topk_pred_targets.append(pred_targets_topk_bid)

            if cfg.MODEL.VG.USING_DET_KNOWLEDGE:
                sim_topk = torch.gather(sim,
                                        dim=1,
                                        index=atten_ranking_topk.long())
                sim_mask = (sim_topk > 0).float()
                pred_similarity_topk_bid = pred_similarity_topk_bid * sim_topk
                atten_topk_bid = numerical_stability_masked_softmax(
                    pred_similarity_topk_bid, sim_mask, dim=1)
            else:
                atten_topk_bid = F.softmax(pred_similarity_topk_bid, dim=1)

            atten_fusion = atten_topk_bid * atten_score_topk  ## N*10
            visual_features_topk_bid = visual_features_topk_bid.view(
                num_phrase, self.s2_topk, -1)
            visual_reconst_topk_bid = (atten_fusion.unsqueeze(2) *
                                       visual_features_topk_bid).sum(
                                           1)  ## N*1024
            decoder_phr_topk_logits = self.phrase_decoder(
                visual_reconst_topk_bid, batch_decoder_word_embed[bid])
            batch_topk_decoder_logits.append(decoder_phr_topk_logits)

            if cfg.MODEL.RELATION.IS_ON:
                relation_conn_bid = batch_relation_conn[bid]
                if len(relation_conn_bid) > 0:
                    ent_idx = []
                    rel_cates = torch.zeros([
                        len(relation_conn_bid),
                        self.phrase_embed.rel_cater_size
                    ]).to(self.device).float()
                    for rel_i, conn in enumerate(relation_conn_bid):
                        phr_sub_id, phr_obj_id, rel_cate, rel_id = conn
                        rel_cates[rel_i][
                            rel_cate] = 1  ## indicate the target relation
                        ent_idx.append([phr_sub_id, phr_obj_id])

                    phr_sub_ids, phr_obj_ids = torch.as_tensor(
                        ent_idx).long().to(self.device).transpose(0, 1)
                    visual_rel_feats_s0 = torch.cat(
                        (visual_reconst_bid[phr_sub_ids],
                         visual_reconst_bid[phr_obj_ids]),
                        dim=1)
                    visual_rel_feats_s0 = self.relation_merge(
                        visual_rel_feats_s0)
                    rel_logits_s0 = self.vis2rel(visual_rel_feats_s0)
                    batch_rel_cls_s0.append(rel_logits_s0)
                    # batch_rel_cls_s0.append(None)

                    visual_rel_feats_s1 = torch.cat(
                        (visual_reconst_topk_bid[phr_sub_ids],
                         visual_reconst_topk_bid[phr_obj_ids]),
                        dim=1)
                    visual_rel_feats_s1 = self.relation_merge(
                        visual_rel_feats_s1)
                    rel_logits_s1 = self.vis2rel(visual_rel_feats_s1)
                    batch_rel_cls_s1.append(rel_logits_s1)
                    batch_rel_cls_gt.append(
                        rel_cates)  ## change the ground-truth into cuda tensor

                    batch_rel_reconst_s0.append(None)
                    batch_rel_reconst_s1.append(None)

                else:

                    batch_rel_cls_s0.append(None)
                    batch_rel_cls_s1.append(None)
                    batch_rel_reconst_s0.append(None)
                    batch_rel_reconst_s1.append(None)
                    batch_rel_cls_gt.append(None)

            else:
                batch_rel_cls_s0.append(None)
                batch_rel_cls_s1.append(None)
                batch_rel_reconst_s0.append(None)
                batch_rel_reconst_s1.append(None)
                batch_rel_cls_gt.append(None)

            ## construct the discriminative loss
            batch_ctx_s1_embed.append(
                self.visual_mlp(visual_reconst_bid.mean(0, keepdim=True)))
            batch_ctx_embed.append(
                self.visual_mlp(visual_reconst_topk_bid.mean(0, keepdim=True)))

            batch_pred_similarity.append(atten_bid)
            batch_topk_pred_similarity.append(atten_topk_bid)
            batch_topk_fusion_similarity.append(atten_fusion)

            ### transform boxes for stage-1
            num_phrase_indices = torch.arange(num_phrase).long().to(
                self.device)
            max_box_ind = atten_bid.detach().cpu().numpy().argmax(1)
            precomp_boxes_delta_max = pred_targets_bid[
                num_phrase_indices, max_box_ind]  ## numPhrase*4

            max_topk_id = torch.topk(atten_topk_bid, dim=1,
                                     k=1)[1].long().squeeze(1)
            precomp_boxes_delta_max_topk = pred_targets_topk_bid[
                num_phrase_indices, max_topk_id]  ## num_phrase*4
            precomp_boxes_topk_bid_tensor = precomp_boxes_topk_bid.tensor.reshape(
                -1, self.s2_topk, 4)

            max_fusion_topk_id = torch.topk(atten_fusion, dim=1,
                                            k=1)[1].long().squeeze()
            precomp_boxes_delta_max_topk_fusion = pred_targets_topk_bid[
                num_phrase_indices, max_fusion_topk_id]  ## num_phrase*4

            phr_index = torch.arange(num_phrase).to(self.device) * self.s2_topk

            if self.storage.iter <= cfg.SOLVER.REG_START_ITER:
                max_select_boxes = precomp_boxes_bid[max_box_ind]
                max_precomp_boxes = precomp_boxes_topk_bid[max_topk_id +
                                                           phr_index]
                max_fusion_precomp_boxes = precomp_boxes_topk_bid[
                    max_fusion_topk_id + phr_index]
            else:
                max_select_boxes = Boxes(
                    self.box2box_translation.apply_deltas(
                        precomp_boxes_delta_max,
                        precomp_boxes_bid[max_box_ind].tensor),
                    precomp_boxes_bid.size)
                max_precomp_boxes = Boxes(
                    self.box2box_translation.apply_deltas(
                        precomp_boxes_delta_max_topk,
                        precomp_boxes_topk_bid_tensor[num_phrase_indices,
                                                      max_topk_id]),
                    precomp_boxes_bid.size)
                max_fusion_precomp_boxes = Boxes(
                    self.box2box_translation.apply_deltas(
                        precomp_boxes_delta_max_topk_fusion,
                        precomp_boxes_topk_bid_tensor[num_phrase_indices,
                                                      max_fusion_topk_id]),
                    precomp_boxes_bid.size)

            batch_pred_boxes.append(max_select_boxes)
            batch_topk_pred_boxes.append(max_precomp_boxes)
            batch_topk_fusion_pred_boxes.append(max_fusion_precomp_boxes)

        batch_ctx_sim, batch_ctx_sim_s1 = self.generate_image_sent_discriminative(
            batch_sent_embed, batch_ctx_embed, batch_ctx_s1_embed)

        noun_reconst_loss, noun_topk_reconst_loss, disc_img_sent_loss_s1, disc_img_sent_loss_s2,  reg_loss, \
        reg_loss_s1, rel_cls_loss, rel_cls_loss_s1, rel_const_loss, rel_const_loss_s1 = self.VGLoss(batch_phrase_mask, batch_decode_logits, batch_topk_decoder_logits, batch_phrase_dec_ids,
                                  batch_ctx_sim, batch_ctx_sim_s1, batch_pred_similarity, batch_topk_pred_similarity, batch_boxes_targets, batch_precomp_boxes,
                                  batch_pred_targets, batch_topk_pred_targets,
                                  batch_topk_precomp_boxes, batch_rel_cls_s0, batch_rel_cls_s1, batch_rel_cls_gt, batch_rel_reconst_s0, batch_rel_reconst_s1, batch_rel_mask, batch_rel_dec_idx)

        all_loss = dict(noun_reconst_loss=noun_reconst_loss,
                        noun_topk_reconst_loss=noun_topk_reconst_loss,
                        disc_img_sent_loss_s1=disc_img_sent_loss_s1,
                        disc_img_sent_loss_s2=disc_img_sent_loss_s2,
                        reg_loss_s1=reg_loss,
                        reg_loss_s2=reg_loss_s1,
                        rel_cls_loss=rel_cls_loss,
                        rel_cls_loss_s1=rel_cls_loss_s1,
                        rel_const_loss=rel_const_loss,
                        rel_const_loss_s1=rel_const_loss_s1)

        if self.training:
            return all_loss, None
        else:
            return all_loss, (batch_phrase_ids, batch_phrase_types,
                              move2cpu(batch_pred_boxes),
                              move2cpu(batch_pred_similarity),
                              move2cpu(batch_boxes_targets),
                              move2cpu(batch_precomp_boxes), image_unique_id,
                              move2cpu(batch_topk_pred_similarity),
                              move2cpu(batch_topk_fusion_similarity),
                              move2cpu(batch_topk_pred_boxes),
                              move2cpu(batch_topk_fusion_pred_boxes),
                              move2cpu(batch_topk_precomp_boxes),
                              move2cpu(batch_topk_pred_targets),
                              move2cpu(batch_pred_targets))
Exemplo n.º 59
0
    def forward(self,
                sentence,
                p_sentence,
                pos_tags,
                lengths,
                target_idx_in,
                region_marks,
                local_roles_voc,
                frames,
                local_roles_mask,
                sent_pred_lemmas_idx,
                dep_tags,
                dep_heads,
                targets,
                predicate_identification,
                all_l_ids,
                Predicate_link,
                Predicate_Labels_nd,
                Predicate_Labels,
                unlabeled_sentence_in=False,
                p_unlabeled_sentence_in=False,
                unlabeled_sen_lengths=False,
                test=False,
                cvt_train=False):
        """
        elmo_embedding_0 = self.elmo_embeddings_0(sentence).view(self.batch_size, len(sentence[0]), 1024)
        elmo_embedding_1 = self.elmo_embeddings_1(sentence).view(self.batch_size, len(sentence[0]), 1024)
        w = F.softmax(self.elmo_word, dim=0)
        elmo_emb = self.elmo_gamma_word * (w[0] * elmo_embedding_0 + w[1] * elmo_embedding_1)
        elmo_emb_word = self.elmo_mlp_word(elmo_emb)
        """

        region_marks = self.region_embeddings(region_marks).view(
            self.batch_size, len(sentence[0]), 16)
        fixed_embeds = self.word_fixed_embeddings(p_sentence)
        fixed_embeds = fixed_embeds.view(self.batch_size, len(sentence[0]),
                                         self.word_emb_dim)
        sent_pred_lemmas_embeds = self.p_lemma_embeddings(sent_pred_lemmas_idx)
        embeds_SRL = self.word_embeddings_SRL(sentence)
        embeds_SRL = embeds_SRL.view(self.batch_size, len(sentence[0]),
                                     self.word_emb_dim)
        pos_embeds = self.pos_embeddings(pos_tags)
        SRL_hidden_states = torch.cat((embeds_SRL, fixed_embeds, region_marks),
                                      2)
        SRL_hidden_states = self.SRL_input_dropout(SRL_hidden_states)

        # SRL layer
        # first layer
        embeds_sort, lengths_sort, unsort_idx = self.sort_batch(
            SRL_hidden_states, lengths)
        embeds_sort = rnn.pack_padded_sequence(embeds_sort,
                                               lengths_sort,
                                               batch_first=True)
        # hidden states [time_steps * batch_size * hidden_units]
        self.hidden = self.init_hidden_spe()
        hidden_states, self.hidden = self.BiLSTM_0(embeds_sort, self.hidden)
        # it seems that hidden states is already batch first, we don't need swap the dims
        # hidden_states = hidden_states.permute(1, 2, 0).contiguous().view(self.batch_size, -1, )
        hidden_states, lens = rnn.pad_packed_sequence(hidden_states,
                                                      batch_first=True)
        # hidden_states = hidden_states.transpose(0, 1)
        hidden_states_0 = hidden_states[unsort_idx]

        # second_layer
        embeds_sort, lengths_sort, unsort_idx = self.sort_batch(
            hidden_states_0, lengths)
        embeds_sort = rnn.pack_padded_sequence(embeds_sort,
                                               lengths_sort,
                                               batch_first=True)
        # hidden states [time_steps * batch_size * hidden_units]
        self.hidden_1 = self.init_hidden_spe()
        hidden_states, self.hidden_1 = self.BiLSTM_1(embeds_sort,
                                                     self.hidden_1)
        # it seems that hidden states is already batch first, we don't need swap the dims
        # hidden_states = hidden_states.permute(1, 2, 0).contiguous().view(self.batch_size, -1, )
        hidden_states, lens = rnn.pad_packed_sequence(hidden_states,
                                                      batch_first=True)
        # hidden_states = hidden_states.transpose(0, 1)
        hidden_states_1 = hidden_states[unsort_idx]

        hidden_states_0 = self.hidden_state_dropout_0(hidden_states_0)
        hidden_states_1 = self.hidden_state_dropout_1(hidden_states_1)

        hidden_states = torch.cat((hidden_states_0, hidden_states_1), 2)

        # B * H
        hidden_states_word = self.dropout_1(
            F.relu(self.Non_Predicate_Proj(hidden_states)))
        predicate_embeds = hidden_states[np.arange(0,
                                                   hidden_states.size()[0]),
                                         target_idx_in]
        hidden_states_predicate = self.dropout_2(
            F.relu(self.Predicate_Proj(predicate_embeds)))

        bias_one = torch.ones(
            (self.batch_size, len(sentence[0]), 1)).to(device)
        hidden_states_word = torch.cat(
            (hidden_states_word, Variable(bias_one)), 2)

        left_part = torch.mm(
            hidden_states_word.view(self.batch_size * len(sentence[0]), -1),
            self.W_R)
        left_part = left_part.view(self.batch_size,
                                   len(sentence[0]) * self.dep_size, -1)
        hidden_states_predicate = hidden_states_predicate.view(
            self.batch_size, -1, 1)
        tag_space = torch.bmm(left_part, hidden_states_predicate).view(
            len(sentence[0]) * self.batch_size, -1)
        SRLprobs = F.softmax(tag_space, dim=1)

        # +++++++++++++++++++++++
        wrong_l_nums = 0.0
        all_l_nums = 0.0

        right_noNull_predict = 10.0
        noNull_predict = 10.0
        noNUll_truth = 10.0

        loss_function = nn.CrossEntropyLoss(ignore_index=0)

        SRLloss = loss_function(tag_space, Predicate_Labels_nd.view(-1))

        return SRLloss, SRLloss, SRLloss, SRLprobs, wrong_l_nums, all_l_nums, wrong_l_nums, all_l_nums,  \
               right_noNull_predict, noNull_predict, noNUll_truth,\
               right_noNull_predict, noNull_predict, noNUll_truth
Exemplo n.º 60
0
if __name__ == '__main__':
    plt.close('all')
    mlp.rcParams['font.family'] = ['times new roman']  # default is sans-serif
    rc('text', usetex=True)
    f, ax = plt.subplots(1, 1, figsize=(8, 4))
    f.suptitle('Figure 3.17, pg. 172', fontsize=14)

    m = 10  #Number of basis
    alpha = 2.0
    beta = 11.1
    m0 = th.DoubleTensor(m, 1).zero_()
    X_train, T_train = generateData(1, 30, np.sqrt(1 / beta))

    blr = BayesianLinearReg(m, m0, np.exp(-5), beta, basis='guassian')
    phi = blr.getBasis(X_train)
    e, v = th.eig(beta * th.mm(th.transpose(phi, 0, 1), phi))

    X0 = np.linspace(-10, 10, 100)
    gamma = np.zeros(len(X0))
    W0 = np.zeros((m, len(X0)))

    for idx, val in enumerate(X0):
        blr = BayesianLinearReg(m, m0, np.exp(val), beta, basis='guassian')
        blr.posterUpdate(X_train, T_train)
        #Eq. 3.91
        gamma[idx] = Variable(th.sum(th.div(e,
                                            np.exp(val) + e),
                                     0)).data.numpy()[0, 0]
        W0[:, idx] = Variable(blr.getWeightsMAP().squeeze()).data.numpy()

    cmap = plt.cm.get_cmap("gnuplot")