Exemplo n.º 1
0
 def predict(self, x, attn_type = "hard"):
     #predict with greedy decoding
     emb = self.embedding(x)
     h = Variable(torch.zeros(1, x.size(0), self.hidden_dim))
     c = Variable(torch.zeros(1, x.size(0), self.hidden_dim))
     enc_h, _ = self.encoder(emb, (h, c))
     y = [Variable(torch.zeros(x.size(0)).long())]
     self.attn = []        
     for t in range(x.size(1)):
         emb_t = self.embedding(y[-1])
         dec_h, (h, c) = self.decoder(emb_t.unsqueeze(1), (h, c))
         scores = torch.bmm(enc_h, dec_h.transpose(1,2)).squeeze(2)
         attn_dist = F.softmax(scores, dim = 1)
         self.attn.append(attn_dist.data)
         if attn_type == "hard":
             _, argmax = attn_dist.max(1)
             one_hot = Variable(torch.zeros_like(attn_dist.data).scatter_(-1, argmax.data.unsqueeze(1), 1))
             context = torch.bmm(one_hot.unsqueeze(1), enc_h).squeeze(1)                    
         else:                
             context = torch.bmm(attn_dist.unsqueeze(1), enc_h).squeeze(1)
         pred = self.vocab_layer(torch.cat([dec_h.squeeze(1), context], 1))
         _, next_token = pred.max(1)
         y.append(next_token)
     self.attn = torch.stack(self.attn, 0).transpose(0, 1)
     return torch.stack(y, 0).transpose(0, 1)
Exemplo n.º 2
0
def evaluate(attention_model,x_test,y_test):
    """
        cv results
 
        Args:
            attention_model : {object} model
            x_test          : {nplist} x_test
            y_test          : {nplist} y_test
       
        Returns:
            cv-accuracy
 
      
    """
   
    attention_model.batch_size = x_test.shape[0]
    attention_model.hidden_state = attention_model.init_hidden()
    x_test_var = Variable(torch.from_numpy(x_test).type(torch.LongTensor))
    y_test_pred,_ = attention_model(x_test_var)
    if bool(attention_model.type):
        y_preds = torch.max(y_test_pred,1)[1]
        y_test_var = Variable(torch.from_numpy(y_test).type(torch.LongTensor))
       
    else:
        y_preds = torch.round(y_test_pred.type(torch.DoubleTensor).squeeze(1))
        y_test_var = Variable(torch.from_numpy(y_test).type(torch.DoubleTensor))
       
    return torch.eq(y_preds,y_test_var).data.sum()/x_test_var.size(0)
    def predict_proba(self, dataset):
        """Predict predict probability for dataset.
        This method will only work with method logistic/multiclass

        Parameters:
        ----------
        dataset (dict): dictionary with the testing dataset -
        X_wide_test, X_deep_test, target

        Returns:
        --------
        array-like with the probability for dataset.
        """

        X_w = Variable(torch.from_numpy(dataset.wide)).float()
        X_d = Variable(torch.from_numpy(dataset.deep))

        if use_cuda:
            X_w, X_d = X_w.cuda(), X_d.cuda()

        # set the model in evaluation mode so dropout is not applied
        net = self.eval()
        pred = net(X_w,X_d).cpu()
        if self.method == "logistic":
            pred = pred.squeeze(1).data.numpy()
            probs = np.zeros([pred.shape[0],2])
            probs[:,0] = 1-pred
            probs[:,1] = pred
            return probs
        if self.method == "multiclass":
            return pred.data.numpy()
Exemplo n.º 4
0
def train(ep):
    model.train()
    total_loss = 0
    count = 0
    train_idx_list = np.arange(len(X_train), dtype="int32")
    np.random.shuffle(train_idx_list)
    for idx in train_idx_list:
        data_line = X_train[idx]
        x, y = Variable(data_line[:-1]), Variable(data_line[1:])
        if args.cuda:
            x, y = x.cuda(), y.cuda()

        optimizer.zero_grad()
        output = model(x.unsqueeze(0)).squeeze(0)
        loss = -torch.trace(torch.matmul(y, torch.log(output).float().t()) +
                            torch.matmul((1 - y), torch.log(1 - output).float().t()))
        total_loss += loss.data[0]
        count += output.size(0)

        if args.clip > 0:
            torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        loss.backward()
        optimizer.step()
        if idx > 0 and idx % args.log_interval == 0:
            cur_loss = total_loss / count
            print("Epoch {:2d} | lr {:.5f} | loss {:.5f}".format(ep, lr, cur_loss))
            total_loss = 0.0
            count = 0
    def forward(self, k, x, logposterior):
        '''
        k: number of samples
        x: [B,X]
        logposterior(z) -> [P,B]
        '''

        self.B = x.size()[0]
        self.P = k

        #Encode
        out = x
        for i in range(len(self.encoder_weights)-1):
            out = self.act_func(self.encoder_weights[i](out))
        out = self.encoder_weights[-1](out)
        mean = out[:,:self.z_size]
        logvar = out[:,self.z_size:]

        #Sample
        eps = Variable(torch.FloatTensor(k, self.B, self.z_size).normal_().type(self.dtype)) #[P,B,Z]
        z = eps.mul(torch.exp(.5*logvar)) + mean  #[P,B,Z]
        logqz = lognormal(z, mean, logvar) #[P,B]

        logdetsum = 0.
        for i in range(self.n_flows):

            z, logdet = self.norm_flow(self.params[i],z)
            logdetsum += logdet


        return z, logqz-logdetsum
Exemplo n.º 6
0
 def test_broadcast_subspace(self):
     a = zeros((100, 100))
     v = Variable(torch.arange(0, 100))[:, None]
     b = Variable(torch.arange(99, -1, -1).long())
     a[b] = v
     expected = b.double().unsqueeze(1).expand(100, 100)
     self.assertEqual(a, expected)
    def predict(self, dataset):
        """Predict target for dataset.

        Parameters:
        ----------
        dataset (dict): dictionary with the testing dataset -
        X_wide_test, X_deep_test, target

        Returns:
        --------
        array-like with the target for dataset
        """

        X_w = Variable(torch.from_numpy(dataset.wide)).float()
        X_d = Variable(torch.from_numpy(dataset.deep))

        if use_cuda:
            X_w, X_d = X_w.cuda(), X_d.cuda()

        # set the model in evaluation mode so dropout is not applied
        net = self.eval()
        pred = net(X_w,X_d).cpu()
        if self.method == "regression":
            return pred.squeeze(1).data.numpy()
        if self.method == "logistic":
            return (pred > 0.5).squeeze(1).data.numpy()
        if self.method == "multiclass":
            _, pred_cat = torch.max(pred, 1)
            return pred_cat.data.numpy()
Exemplo n.º 8
0
 def sample(self, mu, logvar, k):
     eps = Variable(torch.FloatTensor(k, self.B, self.z_size).normal_()) #[P,B,Z]
     z = eps.mul(torch.exp(.5*logvar)) + mu  #[P,B,Z]
     logpz = lognormal(z, Variable(torch.zeros(self.B, self.z_size)), 
                         Variable(torch.zeros(self.B, self.z_size)))  #[P,B]
     logqz = lognormal(z, mu, logvar)
     return z, logpz, logqz
Exemplo n.º 9
0
def generate(model, start_words, ix2word, word2ix, prefix_words=None):
    """
    给定几个词,根据这几个词接着生成一首完整的诗歌
    start_words:u'春江潮水连海平'
    比如start_words 为 春江潮水连海平,可以生成:

    """
    results = list(start_words)
    start_word_len = len(start_words)
    # 手动设置第一个词为<START>
    input = Variable(t.Tensor([word2ix['<START>']]).view(1, 1).long())
    if opt.use_gpu: input = input.cuda()
    hidden = None

    if prefix_words:
        for word in prefix_words:
            output, hidden = model(input, hidden)
            input = Variable(input.data.new([word2ix[word]])).view(1, 1)

    for i in range(opt.max_gen_len):
        output, hidden = model(input, hidden)

        if i < start_word_len:
            w = results[i]
            input = Variable(input.data.new([word2ix[w]])).view(1, 1)
        else:
            top_index = output.data[0].topk(1)[1][0]
            w = ix2word[top_index]
            results.append(w)
            input = Variable(input.data.new([top_index])).view(1, 1)
        if w == '<EOP>':
            del results[-1]
            break
    return results
def l2l_validate(model, cluster_center, n_epoch=100):
    val_accuracy = []
    for epoch in range(n_epoch):
        data_l = generate_data_l(cluster_center)
        data_n = generate_data_n(cluster_center, model.n_class_n)
        x_l, y_l = Variable(torch.from_numpy(data_l[0])).float(), Variable(
            torch.from_numpy(data_l[1]))
        x_n, y_n = Variable(torch.from_numpy(data_n[0])).float(), Variable(
            torch.from_numpy(data_n[1]))
        pred_ll, pred_nl, w, b = model(x_l, x_n)
        M = Variable(torch.zeros(model.n_class_n, model.n_dim))
        B = Variable(torch.zeros(model.n_class_n))
        for k in range(model.n_class_n):
            M[k] = torch.cat((w[:, 0][y_n == model.n_class_l + k].view(-1, 1),
                              w[:, 1][y_n == model.n_class_l + k].view(-1, 1)), 1).mean(0)
            B[k] = b[y_n == model.n_class_l + k].mean()
        pred_ln = torch.mm(x_l, M.t()) + B.view(1, -1).expand_as(torch.mm(x_l, M.t()))
        pred_nn = torch.mm(x_n, M.t()) + B.view(1, -1).expand_as(torch.mm(x_n, M.t()))
        pred = torch.cat((torch.cat((pred_ll, pred_nl)), torch.cat((pred_ln, pred_nn))), 1)
        pred = pred.data.max(1)[1]
        y = torch.cat((y_l, y_n))
        accuracy = pred.eq(y.data).cpu().sum() * 1.0 / y.size()[0]
        # print('accuracy: %.2f' % accuracy)
        val_accuracy.append(accuracy)
        acc_l = pred.eq(y.data).cpu()[0:100].sum() * 1.0 / 100
        acc_n = pred.eq(y.data).cpu()[100:150].sum() * 1.0 / 50
        print('accuracy: %.2f, lifelong accuracy: %.2f, new accuracy: %.2f' % (accuracy, acc_l, acc_n))

    return numpy.mean(numpy.asarray(val_accuracy))
    def forward_single_image_tensor(self, img_tensor):
        """
        Simple forward pass on the network.

        Normalize the image if we are in TEST mode
        If we are in TRAIN mode then assume the dataset object has already normalized
        the image

        :param img_tensor: torch.FloatTensor with shape [3,H,W]
        :type img_tensor:
        :return: torch.FloatTensor with shape  [H, W, D]
        :rtype:
        """

        assert len(img_tensor.shape) == 3


        # transform to shape [1,3,H,W]
        img_tensor = img_tensor.unsqueeze(0)

        # The fcn throws and error if we don't use a variable here . . .
        # Maybe it's because it is in train mode?
        img_tensor = Variable(img_tensor.cuda(), requires_grad=False)
        res = self.forward(img_tensor) # shape [1,D,H,W]
        # print "res.shape 1", res.shape


        res = res.squeeze(0) # shape [D,H,W]
        # print "res.shape 2", res.shape

        res = res.permute(1,2,0) # shape [H,W,D]
        # print "res.shape 3", res.shape

        return res
Exemplo n.º 12
0
def train(epoch):
    epoch_loss = 0
    for iteration, batch in enumerate(training_data_loader, 1):
        randH = random.randint(0, opt.remsize)
        randW = random.randint(0, opt.remsize)
        input = Variable(batch[0][:, :, randH:randH + opt.size, randW:randW + opt.size])
        target = Variable(batch[1][:, :,
                         randH + target_gap:randH + target_gap + target_size,
                         randW + target_gap:randW + target_gap + target_size])
        #target =target.squeeze(1)
        #print(target.data.size())
        if cuda:
            input = input.cuda()
            target = target.cuda()
        input = unet(input)
        #print(input.data.size())
        loss = criterion( input, target)
        epoch_loss += loss.data[0]
        loss.backward()
        optimizer.step()
        if iteration%10 is 0:
            print("===> Epoch[{}]({}/{}): Loss: {:.4f}".format(epoch, iteration, len(training_data_loader), loss.data[0]))
    imgout = input.data/2 +1
    torchvision.utils.save_image(imgout,"/home/wcd/PytorchProject/Unet/unetdata/checkpoint/epch_"+str(epoch)+'.jpg')
    print("===> Epoch {} Complete: Avg. Loss: {:.4f}".format(epoch, epoch_loss / len(training_data_loader)))
    def probs(self, generator, outputs, vocab_pointer_switches, context_question_switches, 
        context_attention, question_attention, 
        context_indices, question_indices, 
        oov_to_limited_idx):

        size = list(outputs.size())

        size[-1] = self.generative_vocab_size
        scores = generator(outputs.view(-1, outputs.size(-1))).view(size)
        p_vocab = F.softmax(scores, dim=scores.dim()-1)
        scaled_p_vocab = vocab_pointer_switches.expand_as(p_vocab) * p_vocab

        effective_vocab_size = self.generative_vocab_size + len(oov_to_limited_idx)
        if self.generative_vocab_size < effective_vocab_size:
            size[-1] = effective_vocab_size - self.generative_vocab_size
            buff = Variable(scaled_p_vocab.data.new(*size).fill_(EPSILON))
            scaled_p_vocab = torch.cat([scaled_p_vocab, buff], dim=buff.dim()-1)

        p_context_ptr = Variable(scaled_p_vocab.data.new(*scaled_p_vocab.size()).fill_(EPSILON))
        p_context_ptr.scatter_add_(p_context_ptr.dim()-1, context_indices.unsqueeze(1).expand_as(context_attention), context_attention)
        scaled_p_context_ptr = (context_question_switches * (1 - vocab_pointer_switches)).expand_as(p_context_ptr) * p_context_ptr

        p_question_ptr = Variable(scaled_p_vocab.data.new(*scaled_p_vocab.size()).fill_(EPSILON))
        p_question_ptr.scatter_add_(p_question_ptr.dim()-1, question_indices.unsqueeze(1).expand_as(question_attention), question_attention)
        scaled_p_question_ptr = ((1 - context_question_switches) * (1 - vocab_pointer_switches)).expand_as(p_question_ptr) * p_question_ptr

        probs = scaled_p_vocab + scaled_p_context_ptr + scaled_p_question_ptr
        return probs
Exemplo n.º 14
0
def visualizeModel(model, numImages=6):
    wasTraining = model.training
    model.eval()
    imagesSoFar = 0
    fig = plt.figure()

    for i, (inputs, labels) in enumerate(dataloaders['val']):
        if use_gpu:
            inputs, labels = inputs.cuda(), labels.cuda()
        inputs, labels = Variable(inputs), Variable(labels)

        outputs = model(inputs)
        _, preds = torch.max(outputs.data, 1)

        for j in range(inputs.size(0)):
            imagesSoFar += 1
            nCols = 2
            ax = plt.subplot(numImages // nCols, nCols, imagesSoFar)
            ax.axis('off')
            ax.set_title('predicted: {}'.format(class_names[preds[j]]))
            imshow(inputs.cpu().data[j])
            
            if imagesSoFar == numImages:
                model.train(mode=wasTraining)
                return
    model.train(mode=wasTraining)
Exemplo n.º 15
0
def _pad_packed_sequence(sequence, batch_first=False, padding_value=0):
    var_data, batch_sizes = sequence
    max_batch_size = int(batch_sizes[0])
    output = var_data.data.new(len(batch_sizes), max_batch_size, *var_data.size()[1:]).fill_(padding_value)
    output = Variable(output)

    lengths = []
    data_offset = 0
    prev_batch_size = int(batch_sizes[0])
    prev_i = 0
    for i, batch_size in enumerate(batch_sizes.tolist() + [0]):
        if batch_size != prev_batch_size:
            l = prev_batch_size * (i - prev_i)
            tmp = var_data[data_offset:data_offset + l]
            output[prev_i:i, :prev_batch_size] = tmp.view(i - prev_i, prev_batch_size, *tmp.size()[1:])
            data_offset += l
            prev_i = i
        dec = prev_batch_size - batch_size
        if dec > 0:
            lengths.extend((i,) * dec)
        prev_batch_size = batch_size

    lengths.reverse()

    if batch_first:
        output = output.transpose(0, 1)
    # This Variable doesn't actually have any history (well,
    # technically it does; it's just untracked), it is purely here to
    # make ONNX export easier. That is to say, from an autodiff
    # standpoint this doesn't make any sense.
    return output, Variable(torch.LongTensor(lengths))
Exemplo n.º 16
0
def main():
    dtype = torch.FloatTensor
    N, d_in, H, d_out = 64, 1000, 100, 10  # d_in表示输入维度,d_out输出维度,H是隐藏层维度数

    x = Variable(torch.randn(N, d_in).type(dtype), requires_grad=False)
    y = Variable(torch.randn(N, d_out).type(dtype), requires_grad=False)

    w1 = Variable(torch.randn(d_in, H).type(dtype), requires_grad=True)
    w2 = Variable(torch.randn(H, d_out).type(dtype), requires_grad=True)

    learning_rate = 1e-6
    for t in range(500):

        relu = MyRelu()

        y_pred = relu(x.mm(w1)).mm(w2)

        loss = (y_pred - y).pow(2).sum()

        loss.backward()

        w1.data -= learning_rate * w1.grad.data
        w2.data -= learning_rate * w2.grad.data

        w1.grad.data.zero_()
        w2.grad.data.zero_()
    print(loss.data[0])
def show_result(num_epoch, show = False, save = False, path = 'result.png', isFix=False):
    z_ = torch.randn((5*5, 100)).view(-1, 100, 1, 1)
    z_ = Variable(z_.cuda(), volatile=True)

    G.eval()
    if isFix:
        test_images = G(fixed_z_)
    else:
        test_images = G(z_)
    G.train()

    size_figure_grid = 5
    fig, ax = plt.subplots(size_figure_grid, size_figure_grid, figsize=(5, 5))
    for i, j in itertools.product(range(size_figure_grid), range(size_figure_grid)):
        ax[i, j].get_xaxis().set_visible(False)
        ax[i, j].get_yaxis().set_visible(False)

    for k in range(5*5):
        i = k // 5
        j = k % 5
        ax[i, j].cla()
        ax[i, j].imshow((test_images[k].cpu().data.numpy().transpose(1, 2, 0) + 1) / 2)

    label = 'Epoch {0}'.format(num_epoch)
    fig.text(0.5, 0.04, label, ha='center')
    plt.savefig(path)

    if show:
        plt.show()
    else:
        plt.close()
    def forward_pass(self):
        
        ##Variables output transformed for cuda
        
        X=self.initialize_input()
        self.batch_y=self.sample['groundtruth']
        Y = Variable(self.batch_y.float())
        Y=Y.cuda()
                
        ## fwd
        if self.dist_net=='v2':
            self.batch_y_dist=distance_map_batch_v2(self.batch_y,self.threshold,self.bins)
            Y_dist = Variable(self.batch_y_dist.float())
            Y_dist=Y_dist.cuda()
            probs_dist,probs_seg=self.predict(X)
            loss_seg=self.criterion(Y,probs_seg,self.loss_fn)
            loss_dist=self.criterion(Y_dist,probs_dist,'cross-entropy')
            loss=loss_seg+loss_dist

            
        else:
            self.batch_y_dist=None
            probs_seg=self.predict(X)
            probs_dist=None
            loss=self.criterion(Y,probs_seg,self.loss_fn)

    
        return loss,probs_dist,probs_seg
Exemplo n.º 19
0
def validate(val_loader, model, criterion, location,num_epochs, print_freq):
    batch_time = AverageMeter()
    losses = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (image, target, _) in enumerate(val_loader):
        image_var = Variable(image, volatile=True)
        target_var = Variable(target, volatile=True)

        if 'cuda' in location:
            image_var = image_var.cuda()
            target_var = target_var.cuda()

        # compute output
        output = model(image_var)
        loss = criterion(output, target_var)
        losses.update(loss.data[0], image.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                   i, len(val_loader), batch_time=batch_time, loss=losses))

    print(' * Val Loss {loss.avg:.3f}'
          .format(loss=losses))

    return loss
Exemplo n.º 20
0
def to_variable(numpy_data, volatile=False, is_cuda=True):
    numpy_data = numpy_data.astype(np.float32)
    torch_data = torch.from_numpy(numpy_data).float()
    variable = Variable(torch_data, volatile=volatile)
    if is_cuda:
        variable = variable.cuda()
    return variable
Exemplo n.º 21
0
def stylize(**kwargs):
    opt = Config()

    for k_, v_ in kwargs.items():
        setattr(opt, k_, v_)

    # 图片处理
    content_image = tv.datasets.folder.default_loader(opt.content_path)
    content_transform = tv.transforms.Compose([
        tv.transforms.ToTensor(),
        tv.transforms.Lambda(lambda x: x.mul(255))
    ])
    content_image = content_transform(content_image)
    content_image = content_image.unsqueeze(0)
    content_image = Variable(content_image, volatile=True)

    # 模型
    style_model = TransformerNet().eval()
    style_model.load_state_dict(t.load(opt.model_path, map_location=lambda _s, _: _s))

    if opt.use_gpu:
        content_image = content_image.cuda()
        style_model.cuda()

    # 风格迁移与保存
    output = style_model(content_image)
    output_data = output.cpu().data[0]
    tv.utils.save_image(((output_data / 255)).clamp(min=0, max=1), opt.result_path)
def train(dataloader):
    uf.train()
    total_loss = 0
    total_items = 0
    start_time = time.time()
    for i_batch, batch in enumerate(dataloader):
        output_seq = Variable(batch['output_seq'])
        del (batch['output_seq'])
        for k in batch:
            batch[k] = Variable(batch[k])
        if DEVICE_NO != -1:
            output_seq = output_seq.cuda(DEVICE_NO)
            for k in batch:
                batch[k] = batch[k].cuda(DEVICE_NO)
        uf.zero_grad()
        pred = uf.forward(**batch)
        pred = pred.view(-1, pred.size(-1))
        output_seq = output_seq.view(-1)
        loss = criteria(pred, output_seq)
        loss.backward()
        num_items = len([x for x in output_seq if int(x) != criteria.ignore_index])
        total_loss += num_items * loss.data
        total_items += num_items
        optimizer.step()

        if i_batch % log_interval == 0 and i_batch > 0:
            cur_loss = total_loss[0] / total_items
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:04.2f} | ms/batch {:5.2f} | '
                  'loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, i_batch, len(dataloader.dataset) // dataloader.batch_size, optimizer.param_groups[0]['lr'],
                                elapsed * 1000 / log_interval, cur_loss, math.exp(cur_loss)))
            total_loss = 0
            total_items = 0
            start_time = time.time()
Exemplo n.º 23
0
    def sample(self, mu, logvar, k):

        # print (mu)
        # print (logvar)


        if torch.cuda.is_available():
            eps = Variable(torch.FloatTensor(k, self.B, self.z_size).normal_()).cuda() #[P,B,Z]

            # print (mu.size())
            # print (logvar.size())
            # print (eps.size())

            z = eps.mul(torch.exp(.5*logvar)) + mu  #[P,B,Z]
            logpz = lognormal(z, Variable(torch.zeros(self.B, self.z_size).cuda()), 
                                Variable(torch.zeros(self.B, self.z_size)).cuda())  #[P,B]



            # logqz = lognormal(z, mu, logvar)

            logqz = lognormal(z, Variable(mu.data), Variable(logvar.data))



        else:
            eps = Variable(torch.FloatTensor(k, self.B, self.z_size).normal_())#[P,B,Z]
            z = eps.mul(torch.exp(.5*logvar)) + mu  #[P,B,Z]
            logpz = lognormal(z, Variable(torch.zeros(self.B, self.z_size)), 
                                Variable(torch.zeros(self.B, self.z_size)))  #[P,B]
            logqz = lognormal(z, mu, logvar) 
        return z, logpz, logqz
def random_batch(batch_size=3):
    input_seqs = []
    target_seqs = []

    # Choose random pairs
    for i in range(batch_size):
        pair = random.choice(pairs)
        input_seqs.append(indexes_from_sentence(input_lang, pair[0]))
        target_seqs.append(indexes_from_sentence(output_lang, pair[1]))

    # Zip into pairs, sort by length (descending), unzip
    seq_pairs = sorted(zip(input_seqs, target_seqs), key=lambda p: len(p[0]), reverse=True)
    input_seqs, target_seqs = zip(*seq_pairs)

    # For input and target sequences, get array of lengths and pad with 0s to max length
    input_lengths = [len(s) for s in input_seqs]
    input_padded = [pad_seq(s, max(input_lengths)) for s in input_seqs]
    target_lengths = [len(s) for s in target_seqs]
    target_padded = [pad_seq(s, max(target_lengths)) for s in target_seqs]

    # Turn padded arrays into (batch x seq) tensors, transpose into (seq x batch)
    input_var = Variable(torch.LongTensor(input_padded)).transpose(0, 1)
    target_var = Variable(torch.LongTensor(target_padded)).transpose(0, 1)

    if USE_CUDA:
        input_var = input_var.cuda()
        target_var = target_var.cuda()

    return input_var, input_lengths, target_var, target_lengths
Exemplo n.º 25
0
    def update(self):
        

        next_value = self.actor_critic(Variable(self.rollouts.states[-1], volatile=True))[0].data

        self.rollouts.compute_returns(next_value, self.use_gae, self.gamma, self.tau)

        # values, action_log_probs, dist_entropy = self.actor_critic.evaluate_actions(
        #                                             Variable(self.rollouts.states[:-1].view(-1, *self.obs_shape)), 
        #                                             Variable(self.rollouts.actions.view(-1, self.action_shape)))


        values = torch.cat(self.rollouts.value_preds, 0).view(self.num_steps, self.num_processes, 1) 
        action_log_probs = torch.cat(self.rollouts.action_log_probs).view(self.num_steps, self.num_processes, 1)
        dist_entropy = torch.cat(self.rollouts.dist_entropy).view(self.num_steps, self.num_processes, 1)


        self.rollouts.value_preds = []
        self.rollouts.action_log_probs = []
        self.rollouts.dist_entropy = []

        advantages = Variable(self.rollouts.returns[:-1]) - values
        value_loss = advantages.pow(2).mean()

        action_loss = -(Variable(advantages.data) * action_log_probs).mean()

        self.optimizer.zero_grad()
        cost = action_loss + value_loss*self.value_loss_coef - dist_entropy.mean()*self.entropy_coef
        cost.backward()

        nn.utils.clip_grad_norm(self.actor_critic.parameters(), self.grad_clip)

        self.optimizer.step()
Exemplo n.º 26
0
 def forward(self, inputs): # inputs (bs,words/sentence) 10,7
     bsz = inputs.size(0) # batch size might change
     if inputs.size(1) < 3: # padding issues on really short sentences
         pads = Variable(torch.zeros(bsz,3-inputs.size(1))).type(torch.LongTensor)
         inputs = torch.cat([inputs,pads.cuda()],dim=1)
     embeds = self.embeddings(inputs) # 10,h,300
     embeds = embeds.unsqueeze(3)
     embeds = embeds.permute(0,2,1,3)
     s_embeds = self.s_embeddings(inputs)
     s_embeds = s_embeds.unsqueeze(3)
     s_embeds = s_embeds.permute(0,2,1,3)
     out = torch.cat([embeds,s_embeds],dim=3)
     #print(out.size())
     fw3 = self.conv3(out) # 10,100,h,1
     fw5 = self.conv5(out) # 10,100,h,1
     fw7 = self.conv7(out) # 10,100,h,1
     out = torch.cat([fw3,fw5,fw7],dim=1)
     out = F.relu(out) # 10,300,h/3,1
     #out = self.avgpool(out)
     #out = F.relu(self.conv(out))
     #print(out.size())
     #out = out.view(bsz,n_featmaps*3,-1,2) # 10,300,7
     #print(out.size())
     out = self.maxpool(out) # 10,300,1,1
     out = out.view(bsz,-1) # 10,600
     out = self.dropout(out) # 10,2
     out = self.linear(out) # 10,2
     return out
Exemplo n.º 27
0
def F_affine2d(x, matrix, center=True):
    """
    2D Affine image transform on torch.autograd.Variable
    """
    if matrix.dim() == 2:
        matrix = matrix.view(-1,2,3)

    A_batch = matrix[:,:,:2]
    if A_batch.size(0) != x.size(0):
        A_batch = A_batch.repeat(x.size(0),1,1)
    b_batch = matrix[:,:,2].unsqueeze(1)

    # make a meshgrid of normal coordinates
    _coords = th_iterproduct(x.size(1),x.size(2))
    coords = Variable(_coords.unsqueeze(0).repeat(x.size(0),1,1).float(),
                    requires_grad=False)
    if center:
        # shift the coordinates so center is the origin
        coords[:,:,0] = coords[:,:,0] - (x.size(1) / 2. + 0.5)
        coords[:,:,1] = coords[:,:,1] - (x.size(2) / 2. + 0.5)

    # apply the coordinate transformation
    new_coords = coords.bmm(A_batch.transpose(1,2)) + b_batch.expand_as(coords)

    if center:
        # shift the coordinates back so origin is origin
        new_coords[:,:,0] = new_coords[:,:,0] + (x.size(1) / 2. + 0.5)
        new_coords[:,:,1] = new_coords[:,:,1] + (x.size(2) / 2. + 0.5)

    # map new coordinates using bilinear interpolation
    x_transformed = F_bilinear_interp2d(x, new_coords)

    return x_transformed
    def __val(self):
        """
          Validation function during the train phase.
        """
        self.seg_net.eval()
        start_time = time.time()

        for j, data_tuple in enumerate(self.val_loader):
            # Change the data type.
            inputs = Variable(data_tuple[0].cuda(async=True), volatile=True)
            targets = Variable(data_tuple[1].cuda(async=True), volatile=True)
            # Forward pass.
            outputs = self.seg_net(inputs)
            # Compute the loss of the val batch.
            loss_pixel = self.pixel_loss(outputs, targets)
            loss = loss_pixel

            self.val_losses.update(loss.data[0], inputs.size(0))

            # Update the vars of the val phase.
            self.batch_time.update(time.time() - start_time)
            start_time = time.time()

        self.module_utilizer.save_net(self.seg_net, self.iters)
        # Print the log info & reset the states.
        Log.info(
            'Test Time {batch_time.sum:.3f}s, ({batch_time.avg:.3f})\t'
            'Loss {loss.avg:.8f}\n'.format(
            batch_time=self.batch_time, loss=self.val_losses))
        self.batch_time.reset()
        self.val_losses.reset()
        self.seg_net.train()
Exemplo n.º 29
0
    def update_parameters(self, batch):
        state_batch = Variable(torch.cat(batch.state))
        action_batch = Variable(torch.cat(batch.action))
        reward_batch = Variable(torch.cat(batch.reward))
        mask_batch = Variable(torch.cat(batch.mask))
        next_state_batch = Variable(torch.cat(batch.next_state))
        
        next_action_batch = self.actor_target(next_state_batch)
        next_state_action_values = self.critic_target(next_state_batch, next_action_batch)

        reward_batch = reward_batch.unsqueeze(1)
        mask_batch = mask_batch.unsqueeze(1)
        expected_state_action_batch = reward_batch + (self.gamma * mask_batch * next_state_action_values)

        self.critic_optim.zero_grad()

        state_action_batch = self.critic((state_batch), (action_batch))

        value_loss = F.mse_loss(state_action_batch, expected_state_action_batch)
        value_loss.backward()
        self.critic_optim.step()

        self.actor_optim.zero_grad()

        policy_loss = -self.critic((state_batch),self.actor((state_batch)))

        policy_loss = policy_loss.mean()
        policy_loss.backward()
        self.actor_optim.step()

        soft_update(self.actor_target, self.actor, self.tau)
        soft_update(self.critic_target, self.critic, self.tau)

        return value_loss.item(), policy_loss.item()
Exemplo n.º 30
0
def vector_grad():
    x = Variable(torch.ones(2)*3, requires_grad=True)
    y = Variable(torch.ones(2)*4, requires_grad=True)
    z = x.pow(2) + 3*y.pow(2)
    z.backward(torch.ones(2))
    print(x.grad)
    print(y.grad)
Exemplo n.º 31
0
    def forward(self, sent_tuple):
        # sent_len: [max_len, ..., min_len] (batch)
        # sent: Variable(seqlen x batch x worddim)

        sent, sent_len = sent_tuple
        bsize = sent.size(1)

        self.init_lstm = self.init_lstm if bsize == self.init_lstm.size(1) else \
                Variable(torch.FloatTensor(2, bsize, self.enc_lstm_dim).zero_()).cuda()

        # Sort by length (keep idx)
        sent_len, idx_sort = np.sort(sent_len)[::-1], np.argsort(-sent_len)
        sent = sent.index_select(1, Variable(torch.cuda.LongTensor(idx_sort)))
        # Handling padding in Recurrent Networks
        sent_packed = nn.utils.rnn.pack_padded_sequence(sent, sent_len)
        sent_output = self.enc_lstm(sent_packed,
                                    (self.init_lstm, self.init_lstm))[0]
        # seqlen x batch x 2*nhid
        sent_output = nn.utils.rnn.pad_packed_sequence(sent_output)[0]
        # Un-sort by length
        idx_unsort = np.argsort(idx_sort)
        sent_output = sent_output.index_select(1,
            Variable(torch.cuda.LongTensor(idx_unsort)))

        sent_output = sent_output.transpose(0,1).contiguous()
        sent_output_proj = self.proj_lstm(sent_output.view(-1,
            2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim)
        sent_key_proj = self.proj_key(sent_output.view(-1,
            2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim)
        sent_key_proj = torch.tanh(sent_key_proj)
        # NAACL : u_it=tanh(W_w.h_it + b_w) like in NAACL paper

        # Temperature
        Temp = 3

        sent_w1 = self.query_embedding(Variable(torch.LongTensor(bsize*[0]).cuda())).unsqueeze(2) #(bsize, nhid, 1)
        keys1 = sent_key_proj.bmm(sent_w1).squeeze(2) / Temp
        keys1 = keys1 + ((keys1 == 0).float()*-1000)
        alphas1 = self.softmax(keys1).unsqueeze(2).expand_as(sent_key_proj)
        emb1 = torch.sum(alphas1 * sent_output_proj, 1).squeeze(1)


        sent_w2 = self.query_embedding(Variable(torch.LongTensor(bsize*[1]).cuda())).unsqueeze(2) #(bsize, nhid, 1)
        keys2 = sent_key_proj.bmm(sent_w2).squeeze(2) / Temp
        keys2 = keys2 + ((keys2 == 0).float()*-1000)
        alphas2 = self.softmax(keys2).unsqueeze(2).expand_as(sent_key_proj)
        emb2 = torch.sum(alphas2 * sent_output_proj, 1).squeeze(1)

        sent_w3 = self.query_embedding(Variable(torch.LongTensor(bsize*[1]).cuda())).unsqueeze(2) #(bsize, nhid, 1)
        keys3 = sent_key_proj.bmm(sent_w3).squeeze(2) / Temp
        keys3 = keys3 + ((keys3 == 0).float()*-1000)
        alphas3 = self.softmax(keys3).unsqueeze(2).expand_as(sent_key_proj)
        emb3 = torch.sum(alphas3 * sent_output_proj, 1).squeeze(1)

        sent_w4 = self.query_embedding(Variable(torch.LongTensor(bsize*[1]).cuda())).unsqueeze(2) #(bsize, nhid, 1)
        keys4 = sent_key_proj.bmm(sent_w4).squeeze(2) / Temp
        keys4 = keys4 + ((keys4 == 0).float()*-1000)
        alphas4 = self.softmax(keys4).unsqueeze(2).expand_as(sent_key_proj)
        emb4 = torch.sum(alphas4 * sent_output_proj, 1).squeeze(1)


        if int(time.time()) % 100 == 0:
            print('alphas', torch.cat((alphas1.data[0, :, 0],
                                       alphas2.data[0, :, 0],
                                       torch.abs(alphas1.data[0, :, 0] -
                                                 alphas2.data[0, :, 0])), 1))

        emb = torch.cat((emb1, emb2, emb3, emb4), 1)
        return emb
Exemplo n.º 32
0
    def forward(self, predictions, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            targets (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        loc_data, conf_data, priors = predictions
        #num即batch_size
        num = loc_data.size(0)
        priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        #         print('loc_t.size',loc_t.size())
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            defaults = priors.data
            match(self.threshold, truths, defaults, self.variance, labels,
                  loc_t, conf_t, idx)
        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)
        pos = conf_t > 0  #pos返回的是value只有0和1的tensor,size和conf_t相同
        num_pos = pos.sum(dim=1, keepdim=True)  #size:(15,1)

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]

        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)  #扩充的值是复制
        #         print('2',loc_data.size(),pos_idx.size(),loc_data[pos_idx].size())
        loc_p = loc_data[pos_idx].view(-1, 4)
        #         print('1',loc_p.size(),loc_data[pos_idx].size())
        loc_t = loc_t[pos_idx].view(-1, 4)
        #在计算loss_l的时候,算的是conf_t>0的框的loss
        loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False)

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        #         print('1',conf_t.size())
        '''
        batch_conf.size:(15*8732,10) conf_t.size:(15,8732),conf_t对应的是真实的label,包括背景.
        eg:15张图片,每个图片生成8732个框,一共有10个类别,batch_conf是预测值,conf_t是真实的label,batch_conf.gather所做的就是返回这些框真实类别的得分,
        比如有一个框的真实的label为7,则返回batch_conf中对应预测框的第7个类别的得分,第二项的size为(15*8732,1)
        '''
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))

        # Hard Negative Mining
        #         loss_c=loss_c.view(pos.size()[0],pos.size()[1])
        #         loss_c[pos]=0
        loss_c = loss_c.view(num, -1)  #size:(15,8732)
        loss_c[
            pos] = 0  # filter out pos boxes for now  pos.size:(15,8732),将所有正例的loss_c设置为0了
        #         loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        #         num_pos = pos.long().sum(1, keepdim=True)
        #         print('num_pose',num_pos,pos.size(1))
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) -
                              1)  #返回的size为(15,1),值为num_pose*3,不能超过最大值max
        neg = idx_rank < num_neg.expand_as(
            idx_rank
        )  #idx_rank.size:(15,8732)  num_neg.size:(15,1),idx_rank里面是所有负例的loss排序,loss最大的值的位置为0

        # Confidence Loss Including Positive and Negative Examples
        #         print(pos.size(),pos.unsqueeze(2).size())
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        #将所有正例和负例的框摘出来,正例为conf_t>0的框,负例的个数为z正例的3倍,选择的是loss_c根据排序较大的值
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        #         print(conf_p.size(),targets_weighted.size())
        loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False)

        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N

        N = num_pos.data.sum()
        loss_l /= N
        loss_c /= N
        return loss_l, loss_c
def trainNet(net, batch_size, n_epochs, learning_rate):
    
    #Print all of the hyperparameters of the training iteration:
    print("===== HYPERPARAMETERS =====")
    print("batch_size=", batch_size)
    print("epochs=", n_epochs)
    print("learning_rate=", learning_rate)
    print("=" * 30)
    
    print("Number of train samples: ", len(train_data))
    print("Number of test samples: ", len(validation_data))
    print("Detected Classes are: ", train_data.class_to_idx)
    
    #Get training data
    train_loader = train_data_loader #get_train_loader(batch_size)
    n_batches = len(train_loader)
    
    #Create our loss and optimizer functions
    loss, optimizer = createLossAndOptimizer(net, learning_rate)
    
    #Time for printing
    training_start_time = time.time()
    
    #Loop for n_epochs
    for epoch in range(n_epochs):
        
        running_loss = 0.0
        print_every = n_batches // 10
        start_time = time.time()
        total_train_loss = 0
        
        for i, data in enumerate(train_loader, 0):
            
            #Get inputs
            inputs, labels = data
            
            #Wrap them in a Variable object
            inputs, labels = Variable(inputs), Variable(labels)
            
            #Set the parameter gradients to zero
            optimizer.zero_grad()
            
            #Forward pass, backward pass, optimize
            outputs = net(inputs)
            
            loss_size = loss(outputs, labels)
            loss_size.backward()
            optimizer.step()
            
            #Print statistics
            #running_loss += loss_size.data[0]
            #total_train_loss += loss_size.data[0]
            running_loss += loss_size.item()
            total_train_loss += loss_size.item()
            
            #Print every 10th batch of an epoch
            if (i + 1) % (print_every + 1) == 0:
                print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s".format(
                        epoch+1, int(100 * (i+1) / n_batches), running_loss / print_every, time.time() - start_time))
                #Reset running loss and time
                print("this is the {} th running".format(i))
                running_loss = 0.0
                start_time = time.time()
        
        print("epoch finished, took {:.2f}s".format(time.time() - training_start_time))
        #At the end of the epoch, do a pass on the validation set
        computeAccuracy(net, loss, validation_data_loader, 'validate epoch')
        print("Training finished, took {:.2f}s".format(time.time() - training_start_time))

        test(net)
def computeAccuracy(net, loss, accuracy_data_loader, title):
  total_val_loss = 0
  total = 0
  correct = 0
  
  total_a = 0
  correct_a = 0
  total_a_b = 0
  total_a_c = 0
  total_a_d = 0
  total_b = 0
  correct_b = 0
  total_b_a = 0
  total_b_c = 0
  total_b_d = 0
  total_c = 0
  correct_c = 0
  total_c_a = 0
  total_c_b = 0
  total_c_d = 0
  total_d = 0
  correct_d = 0
  total_d_a = 0
  total_d_b = 0
  total_d_c = 0
  
  for inputs, labels in accuracy_data_loader:

      #Wrap tensors in Variables
      inputs, labels = Variable(inputs), Variable(labels)

      #Forward pass
      val_outputs = net(inputs)
      val_loss_size = loss(val_outputs, labels)

      _, predicted = torch.max(val_outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()
      
      for l, p in zip(labels, predicted):
          if l.item() == 0:
             total_a +=1
             if l == p:
                 correct_a +=1
             elif p == 1:
                total_a_b +=1
             elif p == 2:
                total_a_c +=1
             elif p == 3:
                total_a_d +=1
          elif l.item() == 1:
             total_b +=1
             if l == p:
                correct_b +=1
             elif p == 0:
                total_b_a +=1
             elif p == 2:
                total_b_c +=1
             elif p == 3:
                total_b_d +=1
          elif l.item() == 2:
             total_c +=1
             if l == p:
                correct_c +=1
             elif p == 0:
                total_c_a +=1
             elif p == 1:
                total_c_b +=1
             elif p == 3:
                total_c_d +=1
          elif l.item() == 3:
             total_d +=1
             if l == p:
                correct_d +=1
             elif p == 0:
                total_d_a +=1
             elif p == 1:
                total_d_b +=1
             elif p ==2:
                total_d_c +=1
      #total_val_loss += val_loss_size.data[0]
      total_val_loss += val_loss_size.item()

  print("Validation loss = {:.2f}".format(total_val_loss / len(accuracy_data_loader)))

  print("{} total images {}".format(title, total))
  print("{} correct images {}".format(title, correct))
  
  print('Accuracy of the network on the {} images: {} %%'.format(title, 100 * correct / total))
  
  print("total a images {}".format(total_a))
  print("correct a images {}".format(correct_a))
  print('Accuracy of the a images: {} %%'.format(100 * correct_a / total_a))
  print("incorrect a images predicted b {}".format(total_a_b))
  print("incorrect a images predicted c {}".format(total_a_c))
  print("incorrect a images predicted d {}".format(total_a_d))
  
  print("total b images {}".format(total_b))
  print("correct b images {}".format(correct_b))
  print('Accuracy of the b images: {} %%'.format(100 * correct_b / total_b))
  print("incorrect b images predicted a {}".format(total_b_a))
  print("incorrect b images predicted c {}".format(total_b_c))
  print("incorrect b images predicted d {}".format(total_b_d))
  
  print("total c images {}".format(total_c))
  print("correct c images {}".format(correct_c))
  print('Accuracy of the c images: {} %%'.format(100 * correct_c / total_c))
  print("incorrect c images predicted a {}".format(total_c_a))
  print("incorrect c images predicted b {}".format(total_c_b))
  print("incorrect c images predicted d {}".format(total_c_d))
  
  print("total d images {}".format(total_d))
  print("correct d images {}".format(correct_d))
  print('Accuracy of the d images: {} %%'.format(100 * correct_d / total_d))
  print("incorrect d images predicted a {}".format(total_d_a))
  print("incorrect d images predicted b {}".format(total_d_b))
  print("incorrect d images predicted c {}".format(total_d_c))
Exemplo n.º 35
0
import torch
from torch.autograd import Variable

LR = 1e-3
x = torch.tensor([1., 2., 3., 4., 5., 6., 7., 8., 9., 11.])
y = torch.tensor([3., 5., 7., 9., 11., 14., 15., 18., 20., 23.])

x = Variable(x, requires_grad=True)
y = Variable(y, requires_grad=True)

t0 = torch.FloatTensor(torch.rand(1, 10))
t1 = torch.FloatTensor(10)


def hypothesis(x):
    y_pred = t0 + t1 * x
    return y_pred


def costfunc(y_pred, y):
    loss = (y_pred - y).pow(2).sum() / 10
    return loss


for i in range(200):
    y_pred = hypothesis(x)
    loss = costfunc(y_pred, y)
    t0_grad = (y_pred - y).sum() / 5
    t1_grad = ((y_pred - y) * x.t()).sum() / 5
    t0 -= LR * t0_grad
    t1 -= LR * t1_grad
Exemplo n.º 36
0
def train_step_2(trainloader, net_s, net_z, net_d, optimizer_zc, optimizer_d, criterion_rec, criterion_zc, criterion_d, epoch, use_cuda, _sigma1, _sigma2, _lambda):

    losses = AverageMeter()
    losses1 = AverageMeter()
    losses2 = AverageMeter()
    losses_d_rec = AverageMeter()
    losses_d = AverageMeter()

    print('\n Epoch: %d' % epoch)

    net_z.train()
    net_d.train()


    decoder_loss = 0.0
    adversarial_loss = 0.0

    for i, (inputs, pairweights, sampweights, pairs, index) in enumerate(trainloader):

        inputs = torch.squeeze(inputs,0)
        pairweights = torch.squeeze(pairweights)
        sampweights = torch.squeeze(sampweights)
        index = torch.squeeze(index)
        pairs = pairs.view(-1, 2)

        if use_cuda:
            inputs = inputs.cuda()
            pairweights = pairweights.cuda()
            sampweights = sampweights.cuda()
            index = index.cuda()
            pairs = pairs.cuda()

        inputs, sampweights, pairweights = Variable(inputs), Variable(sampweights, requires_grad=False), \
            Variable(pairweights, requires_grad=False)


        # train z encoder and decoder
        if i % 3 == 0:
            # zero the parameter gradients
            optimizer_d.zero_grad()
            optimizer_zc.zero_grad()
            # forward + backward + optimize

            outputs_s, _ = net_s(inputs)
            outputs_z, dec_z = net_z(inputs)

            loss1 = criterion_rec(inputs, dec_z, sampweights)
            loss2 = criterion_zc(outputs_z, sampweights, pairweights, pairs, index, _sigma1, _sigma2, _lambda)
            loss_zc = loss1 + loss2

            # record loss
            losses1.update(loss1.data[0], inputs.size(0))
            losses2.update(loss2.data[0], inputs.size(0))
            losses.update(loss_zc.data[0], inputs.size(0))

            decoder_input = torch.cat((outputs_s, outputs_z),1)

            outputs_d = net_d(decoder_input)
            #beta = 1.985 # change?
            beta = 1.99 # change?
            loss_d_rec = criterion_d(outputs_d, inputs)
            loss_d =  loss_d_rec - beta * loss_zc

            #record loss
            losses_d_rec.update(loss_d_rec.data[0], inputs.size(0))
            losses_d.update(loss_d.data[0], inputs.size(0))

            loss_d.backward()
            #loss_zc.backward()
            optimizer_d.step()
            optimizer_zc.step()
            decoder_loss += loss_d.data[0]

            print('dcc_reconstruction_loss', losses1.avg, epoch)
            print('dcc_clustering_loss', losses2.avg, epoch)
            print('dcc_loss', losses.avg, epoch)
            print('total_reconstruction_loss', losses_d_rec.avg, epoch)
            print('total_loss', losses_d.avg, epoch)
            # log to TensorBoard
            if args.tensorboard:
                log_value('dcc_reconstruction_loss', losses1.avg, epoch)
                log_value('dcc_clustering_loss', losses2.avg, epoch)
                log_value('dcc_loss', losses.avg, epoch)
                log_value('total_reconstruction_loss', losses_d_rec.avg, epoch)
                log_value('total_loss', losses_d.avg, epoch)

        # train adversarial clustering
        else:
            # zero the parameter gradients
            optimizer_zc.zero_grad()
            # forward + backward + optimize
            outputs_z, dec_z = net_z(inputs)

            loss1 = criterion_rec(inputs, dec_z, sampweights)
            loss2 = criterion_zc(outputs_z, sampweights, pairweights, pairs, index, _sigma1, _sigma2, _lambda)
            loss_zc = loss1 + loss2

            # record loss
            losses1.update(loss1.data[0], inputs.size(0))
            losses2.update(loss2.data[0], inputs.size(0))
            losses.update(loss_zc.data[0], inputs.size(0))

            loss_zc.backward()
            optimizer_zc.step()
            adversarial_loss += loss_zc.data[0]


        # print statistics
        if i % 2000 == 1999:  # print every 2000 mini-batches
            print('[%d, %5d] decoder loss: %.3f, adversarial loss: %.3f' %(epoch + 1, i + 1, decoder_loss / 500, adversarial_loss / 1500))
            decoder_loss = 0.0
            adversarial_loss = 0.0
Exemplo n.º 37
0
class InnerAttentionYANGEncoder(nn.Module):
    def __init__(self, config):
        super(InnerAttentionYANGEncoder, self).__init__()
        self.bsize = config['bsize']
        self.word_emb_dim = config['word_emb_dim']
        self.enc_lstm_dim = config['enc_lstm_dim']
        self.pool_type = config['pool_type']

        self.enc_lstm = nn.LSTM(self.word_emb_dim, self.enc_lstm_dim, 1,
                                bidirectional=True)
        self.init_lstm = Variable(torch.FloatTensor(2, self.bsize,
            self.enc_lstm_dim).zero_()).cuda()

        self.proj_lstm = nn.Linear(2*self.enc_lstm_dim, 2*self.enc_lstm_dim,
                                   bias=True)
        self.proj_query = nn.Linear(2*self.enc_lstm_dim, 2*self.enc_lstm_dim,
                                    bias=True)
        self.proj_enc = nn.Linear(2*self.enc_lstm_dim, 2*self.enc_lstm_dim,
                                  bias=True)

        self.query_embedding = nn.Embedding(1, 2*self.enc_lstm_dim)
        self.softmax = nn.Softmax()

    def forward(self, sent_tuple):
        # sent_len: [max_len, ..., min_len] (batch)
        # sent: Variable(seqlen x batch x worddim)

        sent, sent_len = sent_tuple
        bsize = sent.size(1)

        self.init_lstm = self.init_lstm if bsize == self.init_lstm.size(1) else \
                Variable(torch.FloatTensor(2, bsize, self.enc_lstm_dim).zero_()).cuda()

        # Sort by length (keep idx)
        sent_len, idx_sort = np.sort(sent_len)[::-1], np.argsort(-sent_len)
        sent = sent.index_select(1, Variable(torch.cuda.LongTensor(idx_sort)))
        # Handling padding in Recurrent Networks
        sent_packed = nn.utils.rnn.pack_padded_sequence(sent, sent_len)
        sent_output = self.enc_lstm(sent_packed,
                                    (self.init_lstm, self.init_lstm))[0]
        # seqlen x batch x 2*nhid
        sent_output = nn.utils.rnn.pad_packed_sequence(sent_output)[0]
        # Un-sort by length
        idx_unsort = np.argsort(idx_sort)
        sent_output = sent_output.index_select(1,
            Variable(torch.cuda.LongTensor(idx_unsort)))

        sent_output = sent_output.transpose(0,1).contiguous()

        sent_output_proj = self.proj_lstm(sent_output.view(-1,
            2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim)

        sent_keys = self.proj_enc(sent_output.view(-1,
            2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim)

        sent_max = torch.max(sent_output, 1)[0].squeeze(1)  # (bsize, 2*nhid)
        sent_summary = self.proj_query(
                       sent_max).unsqueeze(1).expand_as(sent_keys)
        # (bsize, seqlen, 2*nhid)

        sent_M = torch.tanh(sent_keys + sent_summary)
        # (bsize, seqlen, 2*nhid) YANG : M = tanh(Wh_i + Wh_avg
        sent_w = self.query_embedding(Variable(torch.LongTensor(
            bsize*[0]).cuda())).unsqueeze(2)  # (bsize, 2*nhid, 1)

        sent_alphas = self.softmax(sent_M.bmm(sent_w).squeeze(2)).unsqueeze(1)
        # (bsize, 1, seqlen)

        if int(time.time()) % 200 == 0:
            print('w', torch.max(sent_w[0]), torch.min(sent_w[0]))
            print('alphas', sent_alphas[0][0][0:sent_len[0]])
        # Get attention vector
        emb = sent_alphas.bmm(sent_output_proj).squeeze(1)

        return emb
Exemplo n.º 38
0
    def train(self, X_train, x_train_2, y_train, windwos_size, predict_move,
              ex_data):

        #defined RNN model
        class RNN(nn.Module):
            def __init__(self, i_size, h_size, n_layers, o_size):
                super(RNN, self).__init__()

                self.rnn = nn.LSTM(
                    #need to chnage this value to get more input
                    input_size=i_size * 2,
                    hidden_size=h_size,
                    num_layers=n_layers)
                self.out = nn.Linear(h_size, o_size)

            def forward(self, x, h_state):
                r_out, hidden_state = self.rnn(x, h_state)

                hidden_size = hidden_state[-1].size(-1)
                r_out = r_out.view(-1, hidden_size)
                outs = self.out(r_out)

                return outs, hidden_state

        print(torch.cuda.is_available())

        #torch.backends.cudnn.enabled = False

        #torch.backends.cudnn.benchmark = True

        #print("torch = ",torch.cuda.device_count())
        self.rnn = RNN(self.INPUT_SIZE, self.HIDDEN_SIZE, self.NUM_LAYERS,
                       self.OUTPUT_SIZE)
        #self.rnn.cuda()
        self.rnn.cuda()
        optimiser = torch.optim.Adam(self.rnn.parameters(),
                                     lr=self.learning_rate)
        criterion = nn.MSELoss()

        for epoch in range(self.num_epochs):

            hidden_state = None
            for stage in range(0,
                               len(X_train) - windwos_size - self.INPUT_SIZE,
                               windwos_size - predict_move):
                X_train_data = []
                Y_train_Data = []
                X_train_data_r = None
                Y_train_data_r = None
                for i in range(self.INPUT_SIZE + stage,
                               self.INPUT_SIZE + stage + windwos_size):
                    tempdata = []
                    tempdata = np.append(X_train[i - self.INPUT_SIZE:i, 0],
                                         x_train_2[i - self.INPUT_SIZE:i, 0])
                    #tempdata = np.append(tempdata, ex_data[i - self.INPUT_SIZE:i, 0])
                    X_train_data.append(tempdata)
                    Y_train_Data.append(y_train[i + predict_move, 0])

                X_train_data_r, Y_train_data_r = np.array(
                    X_train_data), np.array(Y_train_Data)
                X_train_data_r = np.reshape(
                    X_train_data_r,
                    (X_train_data_r.shape[0], 1, X_train_data_r.shape[1]))

                inputs = Variable(
                    torch.from_numpy(X_train_data_r).float()).cuda()
                labels = Variable(
                    torch.from_numpy(Y_train_data_r).float()).cuda()

                output, hidden_state = self.rnn(inputs, hidden_state)

                loss = criterion(output.view(-1), labels)
                optimiser.zero_grad()
                # back propagation
                loss.backward(retain_graph=True)
                # update
                optimiser.step()

                print('epoch {}, loss {}'.format(epoch, loss.item()))
        return self.rnn
Exemplo n.º 39
0
    def forward(self, predictions, wrapper, wrapper_mask):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            mask preds, and prior boxes from SSD net.
                loc shape: torch.size(batch_size,num_priors,4)
                conf shape: torch.size(batch_size,num_priors,num_classes)
                masks shape: torch.size(batch_size,num_priors,mask_dim)
                priors shape: torch.size(num_priors,4)
                proto* shape: torch.size(batch_size,mask_h,mask_w,mask_dim)

            targets (list<tensor>): Ground truth boxes and labels for a batch,
                shape: [batch_size][num_objs,5] (last idx is the label).

            masks (list<tensor>): Ground truth masks for each object in each image,
                shape: [batch_size][num_objs,im_height,im_width]

            num_crowds (list<int>): Number of crowd annotations per batch. The crowd
                annotations should be the last num_crowds elements of targets and masks.
            
            * Only if mask_type == lincomb
        """

        loc_data = predictions['loc']
        conf_data = predictions['conf']
        mask_data = predictions['mask']
        priors = predictions['priors']

        if cfg.mask_type == mask_type.lincomb:
            proto_data = predictions['proto']

        if cfg.use_instance_coeff:
            inst_data = predictions['inst']
        else:
            inst_data = None

        targets, masks, num_crowds = wrapper.get_args(wrapper_mask)
        labels = [None] * len(targets)  # Used in sem segm loss

        batch_size = loc_data.size(0)
        # This is necessary for training on multiple GPUs because
        # DataParallel will cat the priors from each GPU together
        priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # Match priors (default boxes) and ground truth boxes
        # These tensors will be created with the same device as loc_data
        loc_t = loc_data.new(batch_size, num_priors, 4)
        gt_box_t = loc_data.new(batch_size, num_priors, 4)
        conf_t = loc_data.new(batch_size, num_priors).long()
        idx_t = loc_data.new(batch_size, num_priors).long()

        defaults = priors.data

        if cfg.use_class_existence_loss:
            class_existence_t = loc_data.new(batch_size, num_classes - 1)

        for idx in range(batch_size):
            truths = targets[idx][:, :-1].data
            labels[idx] = targets[idx][:, -1].data.long()

            if cfg.use_class_existence_loss:
                # Construct a one-hot vector for each object and collapse it into an existence vector with max
                # Also it's fine to include the crowd annotations here
                class_existence_t[idx, :] = torch.eye(
                    num_classes - 1,
                    device=conf_t.get_device())[labels[idx]].max(dim=0)[0]

            # Split the crowd annotations because they come bundled in
            cur_crowds = num_crowds[idx]
            if cur_crowds > 0:
                split = lambda x: (x[-cur_crowds:], x[:-cur_crowds])
                crowd_boxes, truths = split(truths)

                # We don't use the crowd labels or masks
                _, labels[idx] = split(labels[idx])
                _, masks[idx] = split(masks[idx])
            else:
                crowd_boxes = None

            match(self.pos_threshold, self.neg_threshold, truths, defaults,
                  labels[idx], crowd_boxes, loc_t, conf_t, idx_t, idx,
                  loc_data[idx])

            gt_box_t[idx, :, :] = truths[idx_t[idx]]

        # wrap targets
        loc_t = Variable(loc_t, requires_grad=False)
        conf_t = Variable(conf_t, requires_grad=False)
        idx_t = Variable(idx_t, requires_grad=False)

        pos = conf_t > 0
        num_pos = pos.sum(dim=1, keepdim=True)

        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)

        losses = {}

        # Localization Loss (Smooth L1)
        if cfg.train_boxes:
            loc_p = loc_data[pos_idx].view(-1, 4)
            loc_t = loc_t[pos_idx].view(-1, 4)
            losses['B'] = F.smooth_l1_loss(loc_p, loc_t,
                                           reduction='sum') * cfg.bbox_alpha

        if cfg.train_masks:
            if cfg.mask_type == mask_type.direct:
                if cfg.use_gt_bboxes:
                    pos_masks = []
                    for idx in range(batch_size):
                        pos_masks.append(masks[idx][idx_t[idx, pos[idx]]])
                    masks_t = torch.cat(pos_masks, 0)
                    masks_p = mask_data[pos, :].view(-1, cfg.mask_dim)
                    losses['M'] = F.binary_cross_entropy(
                        torch.clamp(masks_p, 0, 1), masks_t,
                        reduction='sum') * cfg.mask_alpha
                else:
                    losses['M'] = self.direct_mask_loss(
                        pos_idx, idx_t, loc_data, mask_data, priors, masks)
            elif cfg.mask_type == mask_type.lincomb:
                losses.update(
                    self.lincomb_mask_loss(pos, idx_t, loc_data, mask_data,
                                           priors, proto_data, masks, gt_box_t,
                                           inst_data))

                if cfg.mask_proto_loss is not None:
                    if cfg.mask_proto_loss == 'l1':
                        losses['P'] = torch.mean(
                            torch.abs(proto_data)
                        ) / self.l1_expected_area * self.l1_alpha
                    elif cfg.mask_proto_loss == 'disj':
                        losses['P'] = -torch.mean(
                            torch.max(F.log_softmax(proto_data, dim=-1),
                                      dim=-1)[0])

        # Confidence loss
        if cfg.use_focal_loss:
            if cfg.use_sigmoid_focal_loss:
                losses['C'] = self.focal_conf_sigmoid_loss(conf_data, conf_t)
            elif cfg.use_objectness_score:
                losses['C'] = self.focal_conf_objectness_loss(
                    conf_data, conf_t)
            else:
                losses['C'] = self.focal_conf_loss(conf_data, conf_t)
        else:
            losses['C'] = self.ohem_conf_loss(conf_data, conf_t, pos,
                                              batch_size)

        # These losses also don't depend on anchors
        if cfg.use_class_existence_loss:
            losses['E'] = self.class_existence_loss(predictions['classes'],
                                                    class_existence_t)
        if cfg.use_semantic_segmentation_loss:
            losses['S'] = self.semantic_segmentation_loss(
                predictions['segm'], masks, labels)

        # Divide all losses by the number of positives.
        # Don't do it for loss[P] because that doesn't depend on the anchors.
        total_num_pos = num_pos.data.sum().float()
        for k in losses:
            if k not in ('P', 'E', 'S'):
                losses[k] /= total_num_pos
            else:
                losses[k] /= batch_size

        # Loss Key:
        #  - B: Box Localization Loss
        #  - C: Class Confidence Loss
        #  - M: Mask Loss
        #  - P: Prototype Loss
        #  - D: Coefficient Diversity Loss
        #  - E: Class Existence Loss
        #  - S: Semantic Segmentation Loss
        return losses
Exemplo n.º 40
0
class InnerAttentionNAACLEncoder(nn.Module):
    def __init__(self, config):
        super(InnerAttentionNAACLEncoder, self).__init__()
        self.bsize = config['bsize']
        self.word_emb_dim = config['word_emb_dim']
        self.enc_lstm_dim = config['enc_lstm_dim']
        self.pool_type = config['pool_type']


        self.enc_lstm = nn.LSTM(self.word_emb_dim, self.enc_lstm_dim, 1,
                                bidirectional=True)
        self.init_lstm = Variable(torch.FloatTensor(2, self.bsize,
                                  self.enc_lstm_dim).zero_()).cuda()

        self.proj_key = nn.Linear(2*self.enc_lstm_dim, 2*self.enc_lstm_dim,
                                  bias=False)
        self.proj_lstm = nn.Linear(2*self.enc_lstm_dim, 2*self.enc_lstm_dim,
                                   bias=False)
        self.query_embedding = nn.Embedding(1, 2*self.enc_lstm_dim)
        self.softmax = nn.Softmax()

    def forward(self, sent_tuple):
        # sent_len: [max_len, ..., min_len] (batch)
        # sent: Variable(seqlen x batch x worddim)

        sent, sent_len = sent_tuple
        bsize = sent.size(1)

        self.init_lstm = self.init_lstm if bsize == self.init_lstm.size(1) else \
                Variable(torch.FloatTensor(2, bsize, self.enc_lstm_dim).zero_()).cuda()

        # Sort by length (keep idx)
        sent_len, idx_sort = np.sort(sent_len)[::-1], np.argsort(-sent_len)
        sent = sent.index_select(1, Variable(torch.cuda.LongTensor(idx_sort)))
        # Handling padding in Recurrent Networks
        sent_packed = nn.utils.rnn.pack_padded_sequence(sent, sent_len)
        sent_output = self.enc_lstm(sent_packed,
                                    (self.init_lstm, self.init_lstm))[0]
        # seqlen x batch x 2*nhid
        sent_output = nn.utils.rnn.pad_packed_sequence(sent_output)[0]
        # Un-sort by length
        idx_unsort = np.argsort(idx_sort)
        sent_output = sent_output.index_select(1, Variable(torch.cuda.LongTensor(idx_unsort)))

        sent_output = sent_output.transpose(0,1).contiguous()

        sent_output_proj = self.proj_lstm(sent_output.view(-1,
            2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim)

        sent_key_proj = self.proj_key(sent_output.view(-1,
            2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim)

        sent_key_proj = torch.tanh(sent_key_proj)
        # NAACL paper: u_it=tanh(W_w.h_it + b_w)  (bsize, seqlen, 2nhid)

        sent_w = self.query_embedding(Variable(torch.LongTensor(bsize*[0]).cuda())).unsqueeze(2) #(bsize, 2*nhid, 1)

        Temp = 2
        keys = sent_key_proj.bmm(sent_w).squeeze(2) / Temp

        # Set probas of padding to zero in softmax
        keys = keys + ((keys == 0).float()*-10000)

        alphas = self.softmax(keys/Temp).unsqueeze(2).expand_as(sent_output)
        if int(time.time()) % 100 == 0:
            print('w', torch.max(sent_w), torch.min(sent_w))
            print('alphas', alphas[0, :, 0])
        emb = torch.sum(alphas * sent_output_proj, 1).squeeze(1)

        return emb
Exemplo n.º 41
0
def train_optimizer_attack(args):
    assert "Attack" in args.train_task
    task = train_task_list.tasks[args.train_task]

    print("Training ZO optimizer...\nOptimizer: {}. Optimizee: {}".format(
        task["nn_optimizer"].__name__, task["optimizee"].__name__))

    attack_model = task["attack_model"]()  # targeted model to attack
    if args.cuda:
        attack_model.cuda(args.gpu_num)
    ckpt_dict = torch.load(task["attack_model_ckpt"], map_location='cpu')
    attack_model.load_state_dict(ckpt_dict)
    attack_model.eval()
    attack_model.reset()  # not include parameters

    meta_model = task["optimizee"](optimizee.AttackModel(attack_model),
                                   task['batch_size'])  # meta optimizer
    if args.cuda:
        meta_model.cuda(args.gpu_num)
    train_loader, test_loader = meta_model.dataset_loader(
        args.data_dir, task['batch_size'], task['test_batch_size'])
    train_loader = iter(cycle(train_loader))

    if args.warm_start_ckpt != "None":
        meta_optimizer = task["nn_optimizer"](optimizee.MetaModel(meta_model),
                                              args,
                                              ckpt_path=args.warm_start_ckpt)
    else:
        meta_optimizer = task["nn_optimizer"](optimizee.MetaModel(meta_model),
                                              args)

    if args.cuda:
        meta_optimizer.cuda(args.gpu_num)
    optimizer = optim.Adam(meta_optimizer.parameters(), lr=task['lr'])

    min_test_loss = float("inf")

    for epoch in range(1, task["max_epoch"] + 1):
        decrease_in_loss = 0.0
        final_loss = 0.0
        meta_optimizer.train()
        for i in range(args.updates_per_epoch):
            # The `optimizee` for attack task
            model = task["optimizee"](optimizee.AttackModel(attack_model),
                                      task['batch_size'])
            if args.cuda:
                model.cuda(args.gpu_num)

            # In the attack task, each attacked image corresponds to a particular optmizee model
            data, target = next(train_loader)
            data, target = Variable(data.double()), Variable(target)
            if args.cuda:
                data, target = data.cuda(args.gpu_num), target.cuda(
                    args.gpu_num)

            # Compute initial loss of the model
            f_x = model(data.double())
            initial_loss = model.loss(f_x, target)

            for k in range(task['optimizer_steps'] //
                           args.truncated_bptt_step):
                # Keep states for truncated BPTT
                meta_optimizer.reset_state(keep_states=k > 0,
                                           model=model,
                                           use_cuda=args.cuda,
                                           gpu_num=args.gpu_num)

                loss_sum = 0
                prev_loss = torch.zeros(1)
                if args.cuda:
                    prev_loss = prev_loss.cuda(args.gpu_num)
                for j in range(args.truncated_bptt_step):
                    # Perfom a meta update using gradients from model
                    # and return the current meta model saved in the nn_optimizer
                    meta_model, *_ = meta_optimizer.meta_update(
                        model, data, target)

                    # Compute a loss for a step the meta nn_optimizer
                    if not args.use_finite_diff:
                        # Use first-order method to train the zeroth-order optimizer
                        # (assume the gradient is available in training time)
                        f_x = meta_model(data)
                        loss = meta_model.loss(f_x, target)
                    else:
                        # Use zeroth-order method to train the zeroth-order optimizer
                        # Approximate the gradient
                        loss = optimizee.custom_loss(meta_model.weight, data,
                                                     target,
                                                     meta_model.nondiff_loss)

                    loss_sum += (k * args.truncated_bptt_step +
                                 j) * (loss - Variable(prev_loss))
                    prev_loss = loss.data

                    if hasattr(meta_optimizer, "reg_loss"):
                        loss_sum += meta_optimizer.reg_loss
                    if hasattr(meta_optimizer, "grad_reg_loss"):
                        loss_sum += meta_optimizer.grad_reg_loss

                # Update the parameters of the meta nn_optimizer
                meta_optimizer.zero_grad()
                loss_sum.backward()
                for name, param in meta_optimizer.named_parameters():
                    if param.requires_grad:
                        param.grad.data.clamp_(-1, 1)
                optimizer.step()

            # Compute relative decrease in the loss function w.r.t initial
            # value
            decrease_in_loss += loss.item() / initial_loss.item()
            final_loss += loss.item()

        # test
        meta_optimizer.eval()
        test_loss_sum = 0.0
        test_loss_ratio = 0.0
        num = 0
        for (test_data, test_target) in test_loader:
            test_data, test_target = Variable(
                test_data.double()), Variable(test_target)
            if args.cuda:
                test_data, test_target = test_data.cuda(
                    args.gpu_num), test_target.cuda(args.gpu_num)
            model = task["optimizee"](optimizee.AttackModel(attack_model),
                                      task['test_batch_size'])
            if args.cuda:
                model.cuda(args.gpu_num)
            # Compute initial loss of the model
            f_x = model(test_data.double())
            test_initial_loss = model.loss(f_x, test_target)
            test_loss = 0.0

            meta_optimizer.reset_state(keep_states=False,
                                       model=model,
                                       use_cuda=args.cuda,
                                       gpu_num=args.gpu_num)

            for _ in range(task["test_optimizer_steps"]):
                _, test_loss, _ = meta_optimizer.meta_update(
                    model, test_data, test_target)

            test_loss_sum += test_loss
            test_loss_ratio += test_loss / test_initial_loss
            num += 1

        msg = "Epoch: {}, final loss {}, average final/initial loss ratio: {}, test loss {}, test loss ratio {}".format(
            epoch, final_loss / args.updates_per_epoch,
            decrease_in_loss / args.updates_per_epoch, test_loss_sum / num,
            test_loss_ratio / num)
        print(msg)
        with open(os.path.join(args.output_dir, "train_log.txt"), 'a+') as f:
            f.write(msg + '\n')

        if epoch % args.epochs_per_ckpt == 0:
            meta_optimizer.save(epoch, args.output_dir)

        if test_loss_sum < min_test_loss:
            min_test_loss = test_loss_sum
            meta_optimizer.save(epoch, args.output_dir, best=True)
Exemplo n.º 42
0
def train(args, Xgmodel, AEmodel):
    pos_count, neg_count = 0, 0
    training_samples, training_labels = [], []
    training_samples_encoded = []
    iter = 1
    train_seen_bidids = set()
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(AEmodel.parameters(),
                                 lr=args.ae_lr,
                                 weight_decay=args.weight_decay)
    while iter < args.iterations:
        print('iteration number:', iter)
        for date in dates:
            filepath = '../../Data/training3rd/imp.' + date + '.txt.bz2'
            with bz2.BZ2File(filepath) as f:
                for line in f:
                    line = line.split('\n')[0].split('\t')
                    if line[dicts[1]['bidid']] in dicts[0][1]\
                        or line[dicts[1]['bidid']] in dicts[0][2]:
                        continue
                    true_label = 1 if line[dicts[1]
                                           ['bidid']] in dicts[0][0] else 0
                    if (pos_count == 0 \
                            or float(neg_count) / pos_count > args.imbalance_factor) \
                            and true_label == 0:
                        continue
                    elif true_label == 0:
                        neg_count += 1
                    else:
                        pos_count += 1
                    train_seen_bidids.add(line[dicts[1]['bidid']])
                    training_sample = Xgmodel(line, dicts)
                    training_sample = Variable(
                        torch.FloatTensor(training_sample)).view(1, -1)
                    training_samples.append(training_sample)
                    encoded_output = AEmodel.encode(training_sample)
                    output = AEmodel.decode(encoded_output)
                    loss = criterion(output, training_sample)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    training_labels.append(true_label)
                    if iter == args.iterations:
                        break
                    iter += 1
    for training_sample in training_samples:
        training_sample = AEmodel.encode(training_sample).data[0].numpy()
        training_samples_encoded.append(training_sample)
    dtrain = xgb.DMatrix(training_samples_encoded, training_labels)
    param = {
        'max_depth': args.max_depth,
        'eta': args.lr,
        'silent': 1,
        'objective': 'binary:logistic'
    }
    bst = xgb.train(param, dtrain, args.num_rounds)
    print('pos_count:', pos_count, 'neg_count:', neg_count)
    if not args.cv:
        if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir)
        save_path = os.path.join(args.save_dir, 'xgboost.model')
        bst.save_model(save_path)
    return bst, train_seen_bidids
        out = self.regression(x)

        return out

## 3. 创建模型实例
model = SingleLinearRegression()

## 4. 设计判决准则
criterion = nn.MSELoss()

## 5. 使用优化方法
optimizer = optim.SGD(model.parameters(), lr=1e-3)

epoch = 2000
for i in range(epoch):
    x_train = Variable(x_train)
    y_train = Variable(y_train)

    ## 6. 获取模型的输出值
    out = model(x_train)

    ## 7. 得到损失函数值
    loss = criterion(y_train, out)

    ## 8. 清空参数的所有梯度
    optimizer.zero_grad()

    ## 9. 计算梯度值
    loss.backward()

    ## 10. 跟新参数
Exemplo n.º 44
0
def train(args):
    with open(args.input_path, 'r') as f:
        data, statement_target, drop_target, operator_target = load_data(f, args.max_len)

    model = PCCoder()

    if use_cuda:
        model.cuda()

    model = nn.DataParallel(model)

    # The cuda types are not used here on purpose - most GPUs can't handle so much memory
    data, statement_target, drop_target, operator_target = torch.LongTensor(data), torch.LongTensor(statement_target), \
                                                    torch.FloatTensor(drop_target), torch.LongTensor(operator_target)

    optimizer = torch.optim.Adam(model.parameters(), lr=learn_rate)

    statement_criterion = nn.CrossEntropyLoss()
    drop_criterion = nn.BCELoss()
    operator_criterion = nn.CrossEntropyLoss()

    lr_sched = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4)

    dataset_size = data.shape[0]
    indices = list(range(dataset_size))
    random.shuffle(indices)

    train_size = int(0.9 * dataset_size)
    train_data = data[indices[:train_size]]
    train_statement_target = statement_target[indices[:train_size]]
    train_drop_target = drop_target[indices[:train_size]]
    train_operator_target = operator_target[indices[:train_size]]

    test_data = Variable(data[indices[train_size:]].type(LongTensor))
    test_statement_target = Variable(statement_target[indices[train_size:]].type(LongTensor))
    test_drop_target = Variable(drop_target[indices[train_size:]].type(FloatTensor))
    test_operator_target = Variable(operator_target[indices[train_size:]].type(LongTensor))

    train_dataset = TensorDataset(train_data, train_statement_target, train_drop_target, train_operator_target)
    data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(num_epochs):
        model.train()
        print("Epoch %d" % epoch)
        lr_sched.step()

        statement_losses = []
        drop_losses = []
        operator_losses = []

        for batch in tqdm(data_loader):
            x = Variable(batch[0].type(LongTensor))
            y = Variable(batch[1].type(LongTensor))
            z = Variable(batch[2].type(FloatTensor))
            w = Variable(batch[3].type(LongTensor))

            optimizer.zero_grad()

            pred_act, pred_drop, pred_operator = model(x)

            statement_loss = statement_criterion(pred_act, y)
            drop_loss = drop_criterion(pred_drop, z)
            operator_loss = operator_criterion(pred_operator, w)
            loss = statement_loss + operator_loss + drop_loss

            statement_losses.append(statement_loss.item())
            drop_losses.append(drop_loss.item())
            operator_losses.append(operator_loss.item())

            loss.backward()
            optimizer.step()

        avg_statement_train_loss = np.array(statement_losses).mean()
        avg_drop_train_loss = np.array(drop_losses).mean()
        avg_operator_train_loss = np.array(operator_losses).mean()

        model.eval()

        with torch.no_grad():
            # Iterate through test set to avoid out of memory issues
            statement_pred, drop_pred, operator_pred = [], [], []
            for i in range(0, len(test_data), test_iterator_size):
                output = model(test_data[i: i + test_iterator_size])
                statement_pred.append(output[0])
                drop_pred.append(output[1])
                operator_pred.append(output[2])

            statement_pred = torch.cat(statement_pred, dim=0)
            drop_pred = torch.cat(drop_pred, dim=0)
            operator_pred = torch.cat(operator_pred, dim=0)

            test_statement_loss = statement_criterion(statement_pred, test_statement_target)
            test_drop_loss = drop_criterion(drop_pred, test_drop_target)
            test_operator_loss = operator_criterion(operator_pred, test_operator_target)

            print("Train loss: S %f" % avg_statement_train_loss, "D %f" % avg_drop_train_loss,
                  "F %f" % avg_operator_train_loss)
            print("Test loss: S %f" % test_statement_loss.item(), "D %f" % test_drop_loss.item(),
                  "F %f" % test_operator_loss.item())

            predict = statement_pred.data.max(1)[1]
            test_error = (predict != test_statement_target.data).sum().item() / float(test_data.shape[0])
            print("Test classification error: %f" % test_error)

        model.module.save(args.output_path + ".%d" % epoch)
Exemplo n.º 45
0
    def __call__(self, model, inputs, targets, to_numpy=True):
        """
        Produce adversarial examples for ``inputs``.

        :param model: the model to attack
        :type model: nn.Module
        :param inputs: the original images tensor, of dimension [B x C x H x W].
               ``inputs`` can be on either CPU or GPU, but it will eventually be
               moved to the same device as the one the parameters of ``model``
               reside
        :type inputs: torch.FloatTensor
        :param targets: the original image labels, or the attack targets, of
               dimension [B]. If ``self.targeted`` is ``True``, then ``targets``
               is treated as the attack targets, otherwise the labels.
               ``targets`` can be on either CPU or GPU, but it will eventually
               be moved to the same device as the one the parameters of
               ``model`` reside
        :type targets: torch.LongTensor
        :param to_numpy: True to return an `np.ndarray`, otherwise,
               `torch.FloatTensor`
        :type to_numpy: bool
        :return: the adversarial examples on CPU, of dimension [B x C x H x W]
        """
        # sanity check
        assert isinstance(model, nn.Module)
        assert len(inputs.size()) == 4
        assert len(targets.size()) == 1

        # get a copy of targets in numpy before moving to GPU, used when doing
        # the binary search on `scale_const`
        targets_np = targets.clone().cpu().numpy()  # type: np.ndarray

        # the type annotations here are used only for type hinting and do
        # not indicate the actual type (cuda or cpu); same applies to all codes
        # below
        inputs = runutils.make_cuda_consistent(
            model, inputs)[0]  # type: # torch.FloatTensor
        targets = runutils.make_cuda_consistent(
            model, targets)[0]  # type: # torch.FloatTensor

        # run the model a little bit to get the `num_classes`
        num_classes = model(Variable(inputs[0][None, :],
                                     requires_grad=False)).size(1)  # type: int
        batch_size = inputs.size(0)  # type: int

        # `lower_bounds_np`, `upper_bounds_np` and `scale_consts_np` are used
        # for binary search of each `scale_const` in the batch. The element-wise
        # inquality holds: lower_bounds_np < scale_consts_np <= upper_bounds_np
        lower_bounds_np = np.zeros(batch_size)
        upper_bounds_np = np.ones(batch_size) * self.c_range[1]
        scale_consts_np = np.ones(batch_size) * self.c_range[0]

        # Optimal attack to be found.
        # The three "placeholders" are defined as:
        # - `o_best_l2`: the least L2 norms
        # - `o_best_l2_ppred`: the perturbed predictions made by the adversarial
        #    perturbations with the least L2 norms
        # - `o_best_advx`: the underlying adversarial example of
        #   `o_best_l2_ppred`
        o_best_l2 = np.ones(batch_size) * np.inf
        o_best_l2_ppred = -np.ones(batch_size)
        o_best_advx = inputs.clone().cpu().numpy()  # type: np.ndarray

        # convert `inputs` to tanh-space
        inputs_tanh = self._to_tanh_space(inputs)  # type: torch.FloatTensor
        inputs_tanh_var = Variable(inputs_tanh, requires_grad=False)

        # the one-hot encoding of `targets`
        targets_oh = torch.zeros(targets.size() +
                                 (num_classes, ))  # type: torch.FloatTensor
        targets_oh = runutils.make_cuda_consistent(model, targets_oh)[0]
        targets_oh.scatter_(1, targets.unsqueeze(1), 1.0)
        targets_oh_var = Variable(targets_oh, requires_grad=False)

        # the perturbation variable to optimize.
        # `pert_tanh` is essentially the adversarial perturbation in tanh-space.
        # In Carlini's code it's denoted as `modifier`
        pert_tanh = torch.zeros(inputs.size())  # type: torch.FloatTensor
        if self.init_rand:
            nn.init.normal(pert_tanh, mean=0, std=1e-3)
        pert_tanh = runutils.make_cuda_consistent(model, pert_tanh)[0]
        pert_tanh_var = Variable(pert_tanh, requires_grad=True)

        optimizer = optim.Adam([pert_tanh_var], lr=self.optimizer_lr)
        for sstep in range(self.binary_search_steps):
            if self.repeat and sstep == self.binary_search_steps - 1:
                scale_consts_np = upper_bounds_np
            scale_consts = torch.from_numpy(
                np.copy(scale_consts_np)).float()  # type: torch.FloatTensor
            scale_consts = runutils.make_cuda_consistent(model,
                                                         scale_consts)[0]
            scale_consts_var = Variable(scale_consts, requires_grad=False)
            # print('Using scale consts:', list(scale_consts_np))  # FIXME

            # the minimum L2 norms of perturbations found during optimization
            best_l2 = np.ones(batch_size) * np.inf
            # the perturbed predictions corresponding to `best_l2`, to be used
            # in binary search of `scale_const`
            best_l2_ppred = -np.ones(batch_size)
            # previous (summed) batch loss, to be used in early stopping policy
            prev_batch_loss = np.inf  # type: float
            for optim_step in range(self.max_steps):
                batch_loss, pert_norms_np, pert_outputs_np, advxs_np = \
                    self._optimize(model, optimizer, inputs_tanh_var,
                                   pert_tanh_var, targets_oh_var,
                                   scale_consts_var)
                # if optim_step % 10 == 0: print('batch [{}] loss: {}'.format(optim_step, batch_loss))  # FIXME

                if self.abort_early and not optim_step % (self.max_steps //
                                                          10):
                    if batch_loss > prev_batch_loss * (1 - self.ae_tol):
                        break
                    prev_batch_loss = batch_loss

                # update best attack found during optimization
                pert_predictions_np = np.argmax(pert_outputs_np, axis=1)
                comp_pert_predictions_np = np.argmax(
                    self._compensate_confidence(pert_outputs_np, targets_np),
                    axis=1)
                for i in range(batch_size):
                    l2 = pert_norms_np[i]
                    cppred = comp_pert_predictions_np[i]
                    ppred = pert_predictions_np[i]
                    tlabel = targets_np[i]
                    ax = advxs_np[i]
                    if self._attack_successful(cppred, tlabel):
                        assert cppred == ppred
                        if l2 < best_l2[i]:
                            best_l2[i] = l2
                            best_l2_ppred[i] = ppred
                        if l2 < o_best_l2[i]:
                            o_best_l2[i] = l2
                            o_best_l2_ppred[i] = ppred
                            o_best_advx[i] = ax

            # binary search of `scale_const`
            for i in range(batch_size):
                tlabel = targets_np[i]
                assert best_l2_ppred[i] == -1 or \
                       self._attack_successful(best_l2_ppred[i], tlabel)
                assert o_best_l2_ppred[i] == -1 or \
                       self._attack_successful(o_best_l2_ppred[i], tlabel)
                if best_l2_ppred[i] != -1:
                    # successful; attempt to lower `scale_const` by halving it
                    if scale_consts_np[i] < upper_bounds_np[i]:
                        upper_bounds_np[i] = scale_consts_np[i]
                    # `upper_bounds_np[i] == c_range[1]` implies no solution
                    # found, i.e. upper_bounds_np[i] has never been updated by
                    # scale_consts_np[i] until
                    # `scale_consts_np[i] > 0.1 * c_range[1]`
                    if upper_bounds_np[i] < self.c_range[1] * 0.1:
                        scale_consts_np[i] = (lower_bounds_np[i] +
                                              upper_bounds_np[i]) / 2
                else:
                    # failure; multiply `scale_const` by ten if no solution
                    # found; otherwise do binary search
                    if scale_consts_np[i] > lower_bounds_np[i]:
                        lower_bounds_np[i] = scale_consts_np[i]
                    if upper_bounds_np[i] < self.c_range[1] * 0.1:
                        scale_consts_np[i] = (lower_bounds_np[i] +
                                              upper_bounds_np[i]) / 2
                    else:
                        scale_consts_np[i] *= 10

        if not to_numpy:
            o_best_advx = torch.from_numpy(o_best_advx).float().to(device)
        return o_best_advx
Exemplo n.º 46
0
def optimizer_train_optimizee_attack(args):
    assert "Attack" in args.train_task
    task = train_task_list.tasks[args.train_task]

    attack_model = task["attack_model"]()
    if args.cuda:
        attack_model.cuda(args.gpu_num)
    ckpt_dict = torch.load(task["attack_model_ckpt"], map_location='cpu')
    attack_model.load_state_dict(ckpt_dict)
    attack_model.eval()
    attack_model.reset()  # not include parameters

    for test_idx in task['tests']['test_indexes']:
        _, test_loader = task["tests"]["optimizee"].dataset_loader(
            args.data_dir, task['batch_size'],
            task['tests']['test_batch_size'])
        test_loader = iter(test_loader)

        for _ in range(test_idx):  # attacked image
            data, target = next(test_loader)

        data, target = Variable(data.double()), Variable(target)
        if args.cuda:
            data, target = data.cuda(args.gpu_num), target.cuda(args.gpu_num)

        meta_model = task["tests"]["optimizee"](
            optimizee.AttackModel(attack_model),
            task['tests']['test_batch_size'])
        if args.cuda:
            meta_model.cuda(args.gpu_num)

        ckpt_path = os.path.join(args.output_dir, args.ckpt_path)

        # ZO-LSTM (leanred ZO optimizer)
        if "nn_opt" in task["tests"]:
            meta_optimizer = task["nn_optimizer"](
                optimizee.MetaModel(meta_model), args)
            if args.cuda:
                meta_optimizer.cuda(args.gpu_num)
            meta_optimizer.load(ckpt_path)
            meta_optimizer.eval()
            nn_opt_loss_array = []

        # ZO-SGD
        if "base_opt" in task["tests"]:
            base_optimizer = task["tests"]["base_opt"](
                None, args, task["tests"]["base_lr"])
            base_optimizer.eval()
            base_opt_loss_array = []

        # ZO-signSGD
        if "sign_opt" in task["tests"]:
            sign_optimizer = task["tests"]["sign_opt"](
                None, args, task["tests"]["sign_lr"])
            sign_optimizer.eval()
            sign_opt_loss_array = []

        # ZO-ADAM
        if "adam_opt" in task["tests"]:
            adam_optimizer = task["tests"]["adam_opt"](
                None, args, task["tests"]["adam_lr"],
                task["tests"]["adam_beta_1"], task["tests"]["adam_beta_2"])
            adam_optimizer.eval()
            adam_opt_loss_array = []

        # ZO-LSTM-no-query (without QueryRNN)
        if "nn_opt_no_query" in task["tests"]:
            meta_model_2 = task["tests"]["optimizee"](
                optimizee.AttackModel(attack_model),
                task['tests']['test_batch_size'])
            if args.cuda:
                meta_model_2.cuda(args.gpu_num)

            nn_optimizer_no_query = task["tests"]["nn_opt_no_query"](
                optimizee.MetaModel(meta_model_2), args)
            if args.cuda:
                nn_optimizer_no_query.cuda(args.gpu_num)
            nn_optimizer_no_query.load(ckpt_path)
            nn_optimizer_no_query.eval()
            nn_opt_no_query_loss_array = []

        # ZO-LSTM-no-update (without UpdateRNN)
        if "nn_opt_no_update" in task["tests"]:
            meta_model_3 = task["tests"]["optimizee"](
                optimizee.AttackModel(attack_model),
                task['tests']['test_batch_size'])
            if args.cuda:
                meta_model_3.cuda(args.gpu_num)

            nn_optimizer_no_update = task["tests"]["nn_opt_no_update"](
                optimizee.MetaModel(meta_model_3), args)
            if args.cuda:
                nn_optimizer_no_update.cuda(args.gpu_num)
            nn_optimizer_no_update.load(ckpt_path)
            nn_optimizer_no_update.eval()
            nn_opt_no_update_loss_array = []

        # ZO-LSTM-guided (use Guided-ES to modify search distribution)
        if "nn_opt_guided" in task["tests"]:
            meta_model_4 = task["tests"]["optimizee"](
                optimizee.AttackModel(attack_model),
                task['tests']['test_batch_size'])
            if args.cuda:
                meta_model_4.cuda(args.gpu_num)

            nn_optimizer_guided = task["tests"]["nn_opt_guided"](
                optimizee.MetaModel(meta_model_4), args)
            if args.cuda:
                nn_optimizer_guided.cuda(args.gpu_num)
            nn_optimizer_guided.load(ckpt_path)
            nn_optimizer_guided.eval()
            nn_opt_guided_loss_array = []

        for num in range(1, task["tests"]["test_num"] + 1):
            model = task["tests"]["optimizee"](
                optimizee.AttackModel(attack_model),
                task['tests']['test_batch_size'])
            if args.cuda:
                model.cuda(args.gpu_num)

            if "nn_opt" in task["tests"]:
                meta_optimizer.reset_state(keep_states=False,
                                           model=model,
                                           use_cuda=args.cuda,
                                           gpu_num=args.gpu_num)
                nn_opt_state = copy.deepcopy(model.state_dict())

            if "base_opt" in task["tests"]:
                base_opt_state = copy.deepcopy(model.state_dict())

            if "sign_opt" in task["tests"]:
                sign_opt_state = copy.deepcopy(model.state_dict())

            if "adam_opt" in task["tests"]:
                adam_optimizer.reset_state(keep_states=False,
                                           model=model,
                                           use_cuda=args.cuda,
                                           gpu_num=args.gpu_num)
                adam_opt_state = copy.deepcopy(model.state_dict())

            if "nn_opt_no_query" in task["tests"]:
                nn_optimizer_no_query.reset_state(keep_states=False,
                                                  model=model,
                                                  use_cuda=args.cuda,
                                                  gpu_num=args.gpu_num)
                nn_opt_no_query_state = copy.deepcopy(model.state_dict())

            if "nn_opt_no_update" in task["tests"]:
                nn_optimizer_no_update.reset_state(keep_states=False,
                                                   model=model,
                                                   use_cuda=args.cuda,
                                                   gpu_num=args.gpu_num)
                nn_opt_no_update_state = copy.deepcopy(model.state_dict())

            if "nn_opt_guided" in task["tests"]:
                nn_optimizer_guided.reset_state(keep_states=False,
                                                model=model,
                                                use_cuda=args.cuda,
                                                gpu_num=args.gpu_num)
                nn_opt_guided_state = copy.deepcopy(model.state_dict())

            for step in range(1, task["tests"]["n_steps"] + 1):
                msg = "iteration {}".format(step)

                # nn_opt
                if "nn_opt" in task["tests"]:
                    model.load_state_dict(nn_opt_state)
                    with torch.no_grad():
                        _, nn_opt_loss, nn_f_x = meta_optimizer.meta_update(
                            model, data, target)
                    nn_opt_state = copy.deepcopy(model.state_dict())

                    msg += ", nn_opt_loss {:.6f}".format(
                        nn_opt_loss.data.item())
                    nn_opt_loss_array.append(nn_opt_loss.data.item())

                # base_opt
                if "base_opt" in task["tests"]:
                    model.load_state_dict(base_opt_state)
                    with torch.no_grad():
                        _, base_opt_loss, base_f_x = base_optimizer.meta_update(
                            model, data, target)
                    base_opt_state = copy.deepcopy(model.state_dict())
                    msg = msg + ", base_opt_loss {:.6f}".format(
                        base_opt_loss.data.item())
                    base_opt_loss_array.append(base_opt_loss.data.item())

                # sign_opt
                if "sign_opt" in task["tests"]:
                    model.load_state_dict(sign_opt_state)
                    with torch.no_grad():
                        _, sign_opt_loss, sign_f_x = sign_optimizer.meta_update(
                            model, data, target)
                    sign_opt_state = copy.deepcopy(model.state_dict())
                    msg = msg + ", sign_opt_loss {:.6f}".format(
                        sign_opt_loss.data.item())
                    sign_opt_loss_array.append(sign_opt_loss.data.item())

                if "adam_opt" in task["tests"]:
                    model.load_state_dict(adam_opt_state)
                    with torch.no_grad():
                        _, adam_opt_loss, adam_f_x = adam_optimizer.meta_update(
                            model, data, target)
                    adam_opt_state = copy.deepcopy(model.state_dict())
                    msg = msg + ", adam_opt_loss {:.6f}".format(
                        adam_opt_loss.data.item())
                    adam_opt_loss_array.append(adam_opt_loss.data.item())

                if "nn_opt_no_query" in task["tests"]:
                    model.load_state_dict(nn_opt_no_query_state)
                    with torch.no_grad():
                        _, nn_opt_no_query_loss, nn_no_query_f_x = nn_optimizer_no_query.meta_update(
                            model, data, target, pred_query=False)
                    nn_opt_no_query_state = copy.deepcopy(model.state_dict())
                    msg = msg + ", nn_opt_no_query_loss {:.6f}".format(
                        nn_opt_no_query_loss.data.item())
                    nn_opt_no_query_loss_array.append(
                        nn_opt_no_query_loss.data.item())

                if "nn_opt_no_update" in task["tests"]:
                    model.load_state_dict(nn_opt_no_update_state)
                    with torch.no_grad():
                        _, nn_opt_no_update_loss, nn_no_update_f_x = nn_optimizer_no_update.meta_update(
                            model,
                            data,
                            target,
                            pred_update=False,
                            base_lr=task["tests"]["base_lr"])
                    nn_opt_no_update_state = copy.deepcopy(model.state_dict())
                    msg = msg + ", nn_opt_no_update_loss {:.6f}".format(
                        nn_opt_no_update_loss.data.item())
                    nn_opt_no_update_loss_array.append(
                        nn_opt_no_update_loss.data.item())

                if "nn_opt_guided" in task["tests"]:
                    model.load_state_dict(nn_opt_guided_state)
                    with torch.no_grad():
                        _, nn_opt_guided_loss, nn_guided_f_x = nn_optimizer_guided.meta_update(
                            model,
                            data,
                            target,
                            guided=True,
                            base_lr=task["tests"]["base_lr"])
                    nn_opt_guided_state = copy.deepcopy(model.state_dict())
                    msg = msg + ", nn_opt_guided_loss {:.6f}".format(
                        nn_opt_guided_loss.data.item())
                    nn_opt_guided_loss_array.append(
                        nn_opt_guided_loss.data.item())
                print(msg)

            if args.save_loss:
                if "nn_opt" in task["tests"]:
                    np.save(
                        os.path.join(
                            args.output_dir,
                            "nn_opt_loss_array_{}_q_{}.npy".format(
                                test_idx, args.grad_est_q)),
                        np.array(nn_opt_loss_array))
                if "base_opt" in task["tests"]:
                    np.save(
                        os.path.join(
                            args.output_dir,
                            "base_opt_loss_array_{}_q_{}.npy".format(
                                test_idx, args.grad_est_q)),
                        np.array(base_opt_loss_array))
                if "sign_opt" in task["tests"]:
                    np.save(
                        os.path.join(
                            args.output_dir,
                            "sign_opt_loss_array_{}_q_{}.npy".format(
                                test_idx, args.grad_est_q)),
                        np.array(sign_opt_loss_array))
                if "adam_opt" in task["tests"]:
                    np.save(
                        os.path.join(
                            args.output_dir,
                            "adam_opt_loss_array_{}_q_{}.npy".format(
                                test_idx, args.grad_est_q)),
                        np.array(adam_opt_loss_array))
                if "nn_opt_no_query" in task["tests"]:
                    np.save(
                        os.path.join(
                            args.output_dir,
                            "nn_opt_no_query_loss_array_{}_q_{}.npy".format(
                                test_idx, args.grad_est_q)),
                        np.array(nn_opt_no_query_loss_array))
                if "nn_opt_no_update" in task["tests"]:
                    np.save(
                        os.path.join(
                            args.output_dir,
                            "nn_opt_no_update_loss_array_{}_q_{}.npy".format(
                                test_idx, args.grad_est_q)),
                        np.array(nn_opt_no_update_loss_array))
                if "nn_opt_guided" in task["tests"]:
                    np.save(
                        os.path.join(
                            args.output_dir,
                            "nn_opt_guided_loss_array_{}_q_{}.npy".format(
                                test_idx, args.grad_est_q)),
                        np.array(nn_opt_guided_loss_array))
            print("Test num {}, test idx {}, done!".format(num, test_idx))

        if args.save_fig:
            assert args.save_loss
            fig = plt.figure(figsize=(8, 6))
            iteration = np.arange(1, task["tests"]["n_steps"] + 1)
            if "base_opt" in task["tests"]:
                base_opt_loss_array = np.load(
                    os.path.join(
                        args.output_dir,
                        "base_opt_loss_array_{}_q_{}.npy".format(
                            test_idx, args.grad_est_q))).reshape(
                                (task["tests"]["test_num"],
                                 task["tests"]["n_steps"]))
                base_opt_mean = np.mean(base_opt_loss_array, axis=0)
                base_opt_std = np.std(base_opt_loss_array, axis=0)
                plt.plot(iteration, base_opt_mean, 'c', label='ZO-SGD')
                plt.fill_between(iteration,
                                 base_opt_mean - base_opt_std,
                                 base_opt_mean + base_opt_std,
                                 color='c',
                                 alpha=0.2)

            if "sign_opt" in task["tests"]:
                sign_opt_loss_array = np.load(
                    os.path.join(
                        args.output_dir,
                        "sign_opt_loss_array_{}_q_{}.npy".format(
                            test_idx, args.grad_est_q))).reshape(
                                (task["tests"]["test_num"],
                                 task["tests"]["n_steps"]))
                sign_opt_mean = np.mean(sign_opt_loss_array, axis=0)
                sign_opt_std = np.std(sign_opt_loss_array, axis=0)
                plt.plot(iteration, sign_opt_mean, 'g', label='ZO-signSGD')
                plt.fill_between(iteration,
                                 sign_opt_mean - sign_opt_std,
                                 sign_opt_mean + sign_opt_std,
                                 color='g',
                                 alpha=0.2)

            if "adam_opt" in task["tests"]:
                adam_opt_loss_array = np.load(
                    os.path.join(
                        args.output_dir,
                        "adam_opt_loss_array_{}_q_{}.npy".format(
                            test_idx, args.grad_est_q))).reshape(
                                (task["tests"]["test_num"],
                                 task["tests"]["n_steps"]))
                adam_opt_mean = np.mean(adam_opt_loss_array, axis=0)
                adam_opt_std = np.std(adam_opt_loss_array, axis=0)
                plt.plot(iteration,
                         adam_opt_mean,
                         'darkorange',
                         label='ZO-ADAM')
                plt.fill_between(iteration,
                                 adam_opt_mean - adam_opt_std,
                                 adam_opt_mean + adam_opt_std,
                                 color='darkorange',
                                 alpha=0.2)

            if "nn_opt" in task["tests"]:
                nn_opt_loss_array = np.load(
                    os.path.join(
                        args.output_dir,
                        "nn_opt_loss_array_{}_q_{}.npy".format(
                            test_idx, args.grad_est_q))).reshape(
                                (task["tests"]["test_num"],
                                 task["tests"]["n_steps"]))
                nn_opt_mean = np.mean(nn_opt_loss_array, axis=0)
                nn_opt_std = np.std(nn_opt_loss_array, axis=0)
                plt.plot(iteration, nn_opt_mean, 'b', label='ZO-LSTM')
                plt.fill_between(iteration,
                                 nn_opt_mean - nn_opt_std,
                                 nn_opt_mean + nn_opt_std,
                                 color='b',
                                 alpha=0.2)

            if "nn_opt_no_query" in task["tests"]:
                nn_opt_no_query_loss_array = np.load(
                    os.path.join(
                        args.output_dir,
                        "nn_opt_no_query_loss_array_{}_q_{}.npy".format(
                            test_idx, args.grad_est_q))).reshape(
                                (task["tests"]["test_num"],
                                 task["tests"]["n_steps"]))
                nn_opt_no_query_mean = np.mean(nn_opt_no_query_loss_array,
                                               axis=0)
                nn_opt_no_query_std = np.std(nn_opt_no_query_loss_array,
                                             axis=0)
                plt.plot(iteration,
                         nn_opt_no_query_mean,
                         'r',
                         label='ZO-LSTM-no-query')
                plt.fill_between(iteration,
                                 nn_opt_no_query_mean - nn_opt_no_query_std,
                                 nn_opt_no_query_mean + nn_opt_no_query_std,
                                 color='r',
                                 alpha=0.2)

            if "nn_opt_no_update" in task["tests"]:
                nn_opt_no_update_loss_array = np.load(
                    os.path.join(
                        args.output_dir,
                        "nn_opt_no_update_loss_array_{}_q_{}.npy".format(
                            test_idx, args.grad_est_q))).reshape(
                                (task["tests"]["test_num"],
                                 task["tests"]["n_steps"]))
                nn_opt_no_update_mean = np.mean(nn_opt_no_update_loss_array,
                                                axis=0)
                nn_opt_no_update_std = np.std(nn_opt_no_update_loss_array,
                                              axis=0)
                plt.plot(iteration,
                         nn_opt_no_update_mean,
                         'm',
                         label='ZO-LSTM-no-update')
                plt.fill_between(iteration,
                                 nn_opt_no_update_mean - nn_opt_no_update_std,
                                 nn_opt_no_update_mean + nn_opt_no_update_std,
                                 color='m',
                                 alpha=0.2)

            if "nn_opt_guided" in task["tests"]:
                nn_opt_guided_loss_array = np.load(
                    os.path.join(
                        args.output_dir,
                        "nn_opt_guided_loss_array_{}_q_{}.npy".format(
                            test_idx, args.grad_est_q))).reshape(
                                (task["tests"]["test_num"],
                                 task["tests"]["n_steps"]))
                nn_opt_guided_mean = np.mean(nn_opt_guided_loss_array, axis=0)
                nn_opt_guided_std = np.std(nn_opt_guided_loss_array, axis=0)
                plt.plot(iteration,
                         nn_opt_guided_mean,
                         'saddlebrown',
                         label='ZO-LSTM-GuidedES')
                plt.fill_between(iteration,
                                 nn_opt_guided_mean - nn_opt_guided_std,
                                 nn_opt_guided_mean + nn_opt_guided_std,
                                 color='saddlebrown',
                                 alpha=0.2)
            plt.xlabel('iteration', fontsize=15)
            plt.ylabel('loss', fontsize=15)
            plt.legend(prop={'size': 15})
            fig.savefig(
                os.path.join(
                    args.output_dir, args.fig_preffix +
                    '_{}_q_{}.png'.format(test_idx, args.grad_est_q)))
    def train(self):
        # noise for test.
        self.z_test=torch.Tensor(self.loader.batch_size, self.nz).normal_(0.0,1.0)
        # self.z_test.data.resize_(self.loader.batch_size, self.nz).normal_(0.0,1.0)
        if self.use_cuda:
            self.z_test=self.z_test.cuda()
        self.z_test=Variable(self.z_test) #,volatile=True

        for step in range(2,self.max_resl+1+5):
            for iter in tqdm(range(0,(self.trns_tick*2+self.stab_tick*2)*self.TICK,self.loader.batch_size)):
                self.globalIter=self.globalIter+1
                self.stack=self.stack+self.loader.batch_size
                if self.stack>ceil(len(self.loader.dataset)):
                    self.epoch=self.epoch+1
                    self.stack=int(self.stack%(ceil(len(self.loader.dataset))))
                # reslolution scheduler.
                self.resl_scheduler()
                # update discriminator.
                for i in range(1):
                    self.D.zero_grad()  # zero gradients.
                    self.require_grad(self.D, True)
                    self.x.data=self.feed_interpolated_input(self.loader.get_batch())
                    if self.flag_add_noise:
                        self.x=self.add_noise(self.x)
                    self.z=torch.randn(self.loader.batch_size,self.nz,1,1)
                    if self.use_cuda:
                        self.z=self.z.cuda()
                    self.x_tilde=self.G(self.z)
                    self.fx=self.D(self.x)
                    self.fx_tilde = self.D(self.x_tilde.detach())
                    # loss_d=F.mse_loss(self.fx.squeeze(), torch.ones_like(self.fx.squeeze()))+F.mse_loss(self.fx_tilde.squeeze(), torch.zeros_like(self.fx_tilde.squeeze()))+self.calc_gradient_penalty()
                    loss_d = self.fx.squeeze().mean() - self.fx_tilde.squeeze().mean() + self.calc_gradient_penalty()
                    loss_d.backward(retain_graph=False)
                    self.opt_d.step()
                # update generator.
                for i in range(1):
                    self.G.zero_grad()  # zero gradients.
                    self.require_grad(self.D, False)
                    fx_tilde=self.D(self.x_tilde)
                    # loss_g = F.mse_loss(fx_tilde.squeeze(), torch.ones_like(self.fx_tilde.squeeze()))
                    loss_g = fx_tilde.squeeze().mean()
                    loss_g.backward(retain_graph=False)
                    self.opt_g.step()
                # logging.
                log_msg = ' [E:{0}][T:{1}][{2:6}/{3:6}]  errD: {4:.4f} | errG: {5:.4f} | ' \
                          '[lr:{11:.5f}][cur:{6:.3f}][resl:{7:4}][{8}][{9:.1f}%][{10:.1f}%]'.format(
                    self.epoch, self.globalTick, self.stack, len(self.loader.dataset),
                    loss_d.item(),loss_g.item(), self.resl, int(pow(2, floor(self.resl))),
                    self.phase, self.complete['gen'],self.complete['dis'],self.lr)
                tqdm.write(log_msg)

                # save model.
                self.snapshot('repo/model')

                # save image grid.
                if self.globalIter % self.config.save_img_every == 0:
                    with torch.no_grad():
                        x_test = self.G(self.z_test)
                    utils.mkdir('repo/save/grid')
                    utils.save_image_grid(x_test.data, 'repo/save/grid/{}_{}_G{}_D{}.jpg'.format(
                        int(self.globalIter / self.config.save_img_every), self.phase, self.complete['gen'],self.complete['dis']))
                    utils.mkdir('repo/save/resl_{}'.format(int(floor(self.resl))))
                    utils.save_image_single(x_test.data,'repo/save/resl_{}/{}_{}_G{}_D{}.jpg'.format(
                        int(floor(self.resl)), int(self.globalIter / self.config.save_img_every),self.phase,self.complete['gen'],self.complete['dis']))
                    # tensorboard visualization.
                    if self.use_tb:
                        with torch.no_grad():
                            x_test = self.D(self.z_test)
                        self.tb.add_scalar('data/loss_g', loss_g[0].item(), self.globalIter)
                        self.tb.add_scalar('data/loss_d', loss_d[0].item(), self.globalIter)
                        self.tb.add_scalar('tick/lr', self.lr, self.globalIter)
                        self.tb.add_scalar('tick/cur_resl', int(pow(2, floor(self.resl))), self.globalIter)
Exemplo n.º 48
0
        else:
            print("Validing...")
            # 设置为False,不会进行Dropout并使用running mean和running var
            model.train(False)

        running_loss = 0.0
        running_corrects = 0

        # enuerate(),返回的是索引和元素值,数字1表明设置start=1,即索引值从1开始
        for batch, data in enumerate(dataloader[phase], 1):
            # X: 图片,16*3*64*64; y: 标签,16
            X, y = data

            # 封装成Variable类
            if Use_gpu:
                X, y = Variable(X.cuda()), Variable(y.cuda())
            else:
                X, y = Variable(X), Variable(y)

            # y_pred: 预测概率矩阵,16*2
            y_pred = model(X)

            # pred,概率较大值对应的索引值,可看做预测结果
            _, pred = torch.max(y_pred.data, 1)

            # 梯度归零
            optimizer.zero_grad()

            # 计算损失
            loss = loss_f(y_pred, y)
Exemplo n.º 49
0
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


model = autoencoder().cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr=learning_rate,
                             weight_decay=1e-5)

for epoch in range(num_epochs):
    for data in dataloader:
        img, _ = data
        img = img.view(img.size(0), -1)
        img = Variable(img).cuda()
        output = model(img)
        loss = criterion(output, img)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print('epoch [{}/{}], loss:{:.4f}'.format(epoch + 1, num_epochs,
                                              loss.item()))
    if epoch % 10 == 0:
        pic = to_img(output.cpu().data)
        save_image(pic, 'image_{}.png'.format(epoch))

torch.save(model.state_dict(), './sim_autoencoder.pth')
Exemplo n.º 50
0
def active_learning_taylor(func_name,start_rand_idxs=None, bud=None, valid=True,fac_loc_idx=None):
    
    torch.manual_seed(42)
    torch.cuda.manual_seed(42)
    np.random.seed(42)
    random.seed(42)
    torch.backends.cudnn.deterministic = True
    #model = ThreeLayerNet(M, num_cls, 5, 5)
    #model = LogisticRegNet(M, num_cls)
    model = TwoLayerNet(M, num_cls, 100)
    # if data_name == 'mnist':
    #     model = MnistNet()
    if torch.cuda.device_count() > 1:
        print("Using:", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)
        cudnn.benchmark = True

    model = model.to(device)

    idxs = start_rand_idxs

    if func_name == 'Facloc Regularized':
        x_val1 = torch.cat([x_val, x_trn[fac_loc_idx]], dim=0)
        y_val1 = torch.cat([y_val, y_trn[fac_loc_idx]], dim=0)

    criterion = nn.CrossEntropyLoss()
    criterion_nored = nn.CrossEntropyLoss(reduction='none')
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    if func_name == 'Full OneStep':
        setf_model = SetFunctionBatch(x_val, y_val, model, criterion, criterion_nored, learning_rate, device)

    elif func_name == 'Facility Location':
        if data_name != 'covertype':
            setf_model = SetFunctionFacLoc(device, train_batch_size_for_greedy)
            idxs = setf_model.lazy_greedy_max(bud, x_trn,model)
        else:
            idxs = run_stochastic_Facloc(x_trn, y_trn, bud)

        facility_loaction_warm_start = copy.deepcopy(idxs)


    elif func_name == 'Facloc Regularized':
        setf_model = SetFunctionTaylor(x_val1, y_val1, model, criterion, criterion_nored, learning_rate, device,num_cls)

    else:
        #setf_model = SetFunctionTaylorDeep(train_loader_greedy, valid_loader, valid, model, 
        #        criterion, criterion_nored, learning_rate, device, N)
        setf_model = SetFunctionTaylor(x_val, y_val, model, criterion, criterion_nored, learning_rate, device,num_cls)

        #setf_model = SetFunctionTaylorDeep_ReLoss_Mean(x_trn, y_trn, train_batch_size_for_greedy, x_val, y_val, valid, model, 
        #        criterion, criterion_nored, learning_rate, device, N) 

    remainList = set(list(range(N)))
    idxs = list(idxs)
    remainList = remainList.difference(idxs)

    if func_name == 'Taylor Online':
        print("Starting Online OneStep Run with taylor on loss!")
    elif func_name == 'Full OneStep':
        print("Starting Online OneStep Run without taylor!")
    elif func_name == 'Facloc Regularized':
        print("Starting Facility Location Regularized Online OneStep Run with taylor!")
    elif func_name == 'Random Greedy':
        print("Starting Randomized Greedy Online OneStep Run with taylor!")
    elif func_name == 'Facility Location':
         print("Starting Facility Location!")
    elif func_name == 'Random':
        print("Starting Random Run!")
    elif func_name == 'Random Perturbation':
        print("Starting Online OneStep Run with taylor with random perturbation!")
    elif func_name == "FASS":
        print("Filtered Active Submodular Selection(FASS)!")
    #elif func_name == 'Proximal':
        #print("Starting Online Proximal OneStep Run with taylor!")
    #elif func_name == 'Taylor on Logit':
    #    print("Starting Online OneStep Run with taylor on logit!")
    
    
    # if valid:
    #     print("Online OneStep Run with Taylor approximation and with Validation Set",file=logfile)
    # else:
    #     print("Online OneStep Run with Taylor approximation and without Validation Set",file=logfile)

    val_accies = np.zeros(no_select)
    test_accies = np.zeros(no_select)
    unlab_accies = np.zeros(no_select)
    # idxs = start_rand_idxs

    def weight_reset(m):
        torch.manual_seed(42)
        torch.cuda.manual_seed(42)
        np.random.seed(42)
        random.seed(42)
        torch.backends.cudnn.deterministic = True
        if isinstance(m, nn.Linear):
            #m.reset_parameters()
            m.weight.data.normal_(0.0, 0.02)
            m.bias.data.fill_(0)

    model =  model.apply(weight_reset).cuda()
    #print(model.linear2.weight)
    for n in range(no_select):
        loader_tr = DataLoader(CustomDataset_act(x_trn[idxs], y_trn[idxs], transform=None),batch_size=no_points)
        model.train()
        for i in range(num_epochs):
            # inputs, targets = x_trn[idxs].to(device), y_trn[idxs].to(device)
            '''inputs, targets = x_trn[idxs], y_trn[idxs]
            optimizer.zero_grad()
            scores = model(inputs)
            loss = criterion(scores, targets)
            loss.backward()
            optimizer.step()'''
            #model =  model.apply(weight_reset).cuda()

            accFinal = 0. 
            for batch_idx in list(loader_tr.batch_sampler):
                x, y, idxs = loader_tr.dataset[batch_idx]

                x, y = Variable(x.cuda()), Variable(y.cuda())
                optimizer.zero_grad()
                out = model(x)
                loss = F.cross_entropy(out, y)
                accFinal += torch.sum((torch.max(out,1)[1] == y).float()).data.item()
                loss.backward()

                if (i % 50 == 0) and (accFinal < 0.2): # reset if not converging
                    model =  model.apply(weight_reset).cuda()
                    optimizer = optim.SGD(model.parameters(), lr = learning_rate)

                # clamp gradients, just in case
                for p in filter(lambda p: p.grad is not None, model.parameters()): p.grad.data.clamp_(min=-.1, max=.1)

                optimizer.step()

            #if accFinal/len(loader_tr.dataset.X) >= 0.99:
            #    break

            '''with torch.no_grad():
                # val_in, val_t = x_val.to(device), y_val.to(device)
                val_outputs = model(x_val)
                val_loss = criterion(val_outputs, y_val)
                full_trn_outputs = model(x_trn)
                full_trn_loss = criterion(full_trn_outputs, y_trn)'''

            #accFinal = torch.sum((torch.max(scores,1)[1] == targets).float()).data.item()
            #print(accFinal / len(loader_tr.dataset.X))

            #if i % print_every == 0:  # Print Training and Validation Loss
        print( n+1,'Time', 'SubsetTrn', loss.item())#, ,FullTrn,ValLoss: full_trn_loss.item(), val_loss.item())

        curr_X_trn = x_trn[list(remainList)]
        curr_Y_trn = y_trn[list(remainList)]

        model.eval()
        with torch.no_grad():
            '''full_trn_out = model(x_trn)
            full_trn_loss = criterion(full_trn_out, y_trn).mean()
            sub_trn_out = model(x_trn[idxs])
            sub_trn_loss = criterion(sub_trn_out, y_trn[idxs]).mean()'''
            val_out = model(x_val)
            val_loss = criterion(val_out, y_val)
            _, val_predict = val_out.max(1)
            val_correct = val_predict.eq(y_val).sum().item()
            val_total = y_val.size(0)
            val_acc = 100 * val_correct / val_total

            correct = 0
            total = 0
            
            inputs, targets = x_tst.to(device), y_tst.to(device)
            outputs = model(inputs)
            test_loss = criterion(outputs, targets)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            tst_acc = 100.0 * correct / total

            rem_out = model(curr_X_trn)
            rem_loss = criterion(rem_out, curr_Y_trn)
            _, rem_predict = rem_out.max(1)
            rem_correct = rem_predict.eq(curr_Y_trn).sum().item()
            rem_total = curr_Y_trn.size(0)
            rem_acc = 100 * rem_correct / rem_total

        val_accies[n] = val_acc
        test_accies[n] = tst_acc
        unlab_accies[n] = rem_acc

        #if ((i + 1) % select_every == 0) and func_name not in ['Facility Location','Random']:
            # val_in, val_t = x_val.to(device), y_val.to(device)  # Transfer them to device
        cached_state_dict = copy.deepcopy(model.state_dict())
        clone_dict = copy.deepcopy(model.state_dict())
        # Dont put the logs for Selection on logfile!!
        # print("With Taylor approximation",file=logfile)
        # print("selEpoch: %d, Starting Selection:" % i, str(datetime.datetime.now()),file=logfile)
        #t_ng_start = time.time()

        if func_name == 'Random Greedy':
            new_idxs = setf_model.naive_greedy_max(curr_X_trn,rem_predict,int(0.9 * no_points), clone_dict)
            new_idxs = list(np.array(list(remainList))[new_idxs])
            
            remainList = remainList.difference(new_idxs)
            new_idxs.extend(list(np.random.choice(list(remainList), size=int(0.1 * no_points), replace=False)))
            remainList = remainList.difference(new_idxs)
            idxs.extend(new_idxs)

        elif func_name == "FASS":

            fn = nn.Softmax(dim=1)
            soft = fn(rem_out)

            entropy2 = Categorical(probs = soft).entropy()

            #print(entropy2.shape)
            if 5*no_points < entropy2.shape[0]:
                values,indices = entropy2.topk(5*no_points)
                #indices = list(np.array(list(remainList))[indices.cpu()])
            else:
                indices = [i for i in range(entropy2.shape[0])]#list(remainList)

            knn_idxs_flag_val = perform_knnsb_selection(datadir, data_name, curr_X_trn[indices],rem_predict[indices], 
                fraction, selUsing='val') 
            #print(knn_idxs_flag_val)
            #print(len(knn_idxs_flag_val))

            ##print(len(knn_idxs_flag_val),len(indices))
            knn_idxs_flag_val = list(np.array(list(remainList))[indices.cpu()][knn_idxs_flag_val])

            remainList = remainList.difference(knn_idxs_flag_val)
            idxs.extend(knn_idxs_flag_val)

        elif func_name == 'Random':
            state = np.random.get_state()
            np.random.seed(n*n)
            #new_idxs = gen_rand_prior_indices(list(remainList), size=no_points)
            new_idxs = np.random.choice(list(remainList), size=no_points, replace=False)
            np.random.set_state(state)
            remainList = remainList.difference(new_idxs)
            idxs.extend(new_idxs)


        elif func_name == 'Random Perturbation':
            new_idxs = setf_model.naive_greedy_max(curr_X_trn,rem_predict,no_points, clone_dict,None,True)  # , grads_idxs
            new_idxs = np.array(list(remainList))[new_idxs]

            remainList = remainList.difference(new_idxs)
            idxs.extend(new_idxs) 

        elif func_name == 'Facility Location':

            if data_name == 'covertype':
                new_idxs = run_stochastic_Facloc(curr_X_trn, rem_predict, bud)
            else:
                new_idxs = setf_model.lazy_greedy_max(bud, curr_X_trn ,model)
            new_idxs = np.array(list(remainList))[new_idxs]

            remainList = remainList.difference(new_idxs)
            idxs.extend(new_idxs)

        else: 
            new_idxs = setf_model.naive_greedy_max(curr_X_trn,rem_predict,no_points, clone_dict)  # , grads_idxs
            new_idxs = np.array(list(remainList))[new_idxs]

            remainList = remainList.difference(new_idxs)
            idxs.extend(new_idxs) 

        '''elif func_name == 'Proximal':
            previous = torch.zeros(N,device=device)
            previous[idxs] = 1.0 
            new_idxs = setf_model.naive_greedy_max(bud, clone_dict,None,previous)
            idxs = new_idxs'''

        # print("selEpoch: %d, Selection Ended at:" % (i), str(datetime.datetime.now()),file=logfile)
        # print("Naive greedy total time with taylor:", time.time()-t_ng_start,file=logfile)
        model.load_state_dict(cached_state_dict)

    # Calculate Final SubsetTrn, FullTrn, Val and Test Loss
    # Calculate Val and Test Accuracy
    
    if func_name == 'Facility Location':
        return val_accies, test_accies, unlab_accies, idxs,facility_loaction_warm_start
    else:
        return val_accies, test_accies, unlab_accies, idxs
Exemplo n.º 51
0
def train(**kwargs):
    opt.parse(kwargs)
    vis = Visualizer(opt.env)

    # step1: configure model
    model = getattr(models, opt.model)()
    if opt.load_model_path:
        model.load(opt.load_model_path)
    if opt.use_gpu: model.cuda()

    # step2: data
    train_data = DogCat(opt.train_data_root,train=True)
    val_data = DogCat(opt.train_data_root,train=False)
    train_dataloader = DataLoader(train_data,opt.batch_size,
                        shuffle=True,num_workers=opt.num_workers)
    val_dataloader = DataLoader(val_data,opt.batch_size,
                        shuffle=False,num_workers=opt.num_workers)
    
    # step3: criterion and optimizer
    criterion = t.nn.CrossEntropyLoss()
    lr = opt.lr
    optimizer = t.optim.Adam(model.parameters(),lr = lr,weight_decay = opt.weight_decay)
        
    # step4: meters
    loss_meter = meter.AverageValueMeter()
    confusion_matrix = meter.ConfusionMeter(2)
    previous_loss = 1e100

    # train
    for epoch in range(opt.max_epoch):
        
        loss_meter.reset()
        confusion_matrix.reset()

        for ii,(data,label) in tqdm(enumerate(train_dataloader)):

            # train model 
            input = Variable(data)
            target = Variable(label)
            if opt.use_gpu:
                input = input.cuda()
                target = target.cuda()

            optimizer.zero_grad()
            score = model(input)
            loss = criterion(score,target)
            loss.backward()
            optimizer.step()
            
            
            # meters update and visualize
            loss_meter.add(loss.data[0])
            confusion_matrix.add(score.data, target.data)

            if ii%opt.print_freq==opt.print_freq-1:
                vis.plot('loss', loss_meter.value()[0])
                
                # 进入debug模式
                if os.path.exists(opt.debug_file):
                    import ipdb;
                    ipdb.set_trace()


        model.save()

        # validate and visualize
        val_cm,val_accuracy = val(model,val_dataloader)

        vis.plot('val_accuracy',val_accuracy)
        vis.log("epoch:{epoch},lr:{lr},loss:{loss},train_cm:{train_cm},val_cm:{val_cm}".format(
                    epoch = epoch,loss = loss_meter.value()[0],val_cm = str(val_cm.value()),train_cm=str(confusion_matrix.value()),lr=lr))
        
        # update learning rate
        if loss_meter.value()[0] > previous_loss:          
            lr = lr * opt.lr_decay
            # 第二种降低学习率的方法:不会有moment等信息的丢失
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        

        previous_loss = loss_meter.value()[0]
class trainer:
    def __init__(self,config):
        self.config=config
        if torch.cuda.is_available():
            self.use_cuda=True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        else:
            self.use_cuda=False
            torch.set_default_tensor_type('torch.FloatTensor')

        self.nz = config.nz
        self.optimizer = config.optimizer
        self.resl = 2  # we start from 2^2 = 4
        self.lr = config.lr
        self.eps_drift = config.eps_drift
        self.smoothing = config.smoothing
        self.max_resl = config.max_resl
        self.trns_tick = config.trns_tick
        self.stab_tick = config.stab_tick
        self.TICK = config.TICK
        self.globalIter = 0
        self.globalTick = 0
        self.kimgs = 0
        self.stack = 0
        self.epoch = 0
        self.fadein = {'gen': None, 'dis': None}
        self.complete = {'gen': 0, 'dis': 0}
        self.phase = 'init'
        self.flag_flush_gen = False
        self.flag_flush_dis = False
        self.flag_add_noise = self.config.flag_add_noise
        self.flag_add_drift = self.config.flag_add_drift
        self.loader=DL.dataloader(config)
        self.LAMBDA=2

        # network
        self.G = network.Generator(config)
        self.D = network.Discriminator(config)
        print('Generator structure: ')
        print(self.G.model)
        print('Discriminator structure: ')
        print(self.D.model)
        if self.use_cuda:
            torch.cuda.manual_seed(config.random_seed)
            #self.G = self.G.cuda()
            #self.D = self.D.cuda()
            if config.n_gpu==1:
                self.G=torch.nn.DataParallel(self.G).cuda(device=0)
                self.D=torch.nn.DataParallel(self.D).cuda(device=0)
            else:
                gpus=[]
                for i in range(config.n_gpu):
                    gpus.append(i)
                self.G=torch.nn.DataParallel(self.G,device_ids=gpus).cuda()
                self.D=torch.nn.DataParallel(self.D,device_ids=gpus).cuda()
        self.renew_everything()
        self.use_tb=config.use_tb
        if self.use_tb:
            self.tb=tensorboard.tf_recorder()

    def resl_scheduler(self):
        '''
                this function will schedule image resolution(self.resl) progressively.
                it should be called every iteration to ensure resl value is updated properly.
                step 1. (trns_tick) --> transition in generator.
                step 2. (stab_tick) --> stabilize.
                step 3. (trns_tick) --> transition in discriminator.
                step 4. (stab_tick) --> stabilize.
        '''
        self.batchsize=self.loader.batch_size
        delta=1.0/(2*self.trns_tick+2*self.stab_tick)
        d_alpha=1.0*self.batchsize/self.trns_tick/self.TICK   # 连续除法运算

        if self.fadein['gen'] is not None:
            if self.resl%1.0<(self.trns_tick)*delta:
                self.fadein['gen'].update_alpha(d_alpha)
                self.complete['gen']=self.fadein['gen'].alpha*100
                self.phase='gtrns'
            elif self.resl%1.0>=(self.trns_tick)*delta and self.resl%1.0<(self.trns_tick+self.stab_tick)*delta:
                self.phase='gstab'
        if self.fadein['dis'] is not None:
            if self.resl%1.0>=(self.trns_tick+self.stab_tick)*delta and self.resl%1.0<(self.stab_tick+self.trns_tick*2)*delta:
                self.fadein['dis'].update_alpha(d_alpha)
                self.complete['dis']=self.fadein['dis'].alpha*100
                self.phase='dtrns'
            elif self.resl%1.0>=(self.stab_tick+self.trns_tick*2)*delta and self.phase !='final':
                self.phase='dstab'

        prev_kimgs=self.kimgs
        self.kimgs=self.kimgs+self.batchsize
        if (self.kimgs%self.TICK)<(prev_kimgs%self.TICK):   # 即训练了TICK张图片后
            self.globalTick=self.globalTick+1  # increase linearly every tick, and grow network structure.
            prev_resl=floor(self.resl)
            self.resl=self.resl+delta
            self.resl=max(2,min(10.5,self.resl))    # clamping, range: 4 ~ 1024

            # flush network.
            if self.flag_flush_gen and self.resl%1.0 >= (self.trns_tick+self.stab_tick)*delta and prev_resl !=2:
                if self.fadein['gen'] is not None:
                    self.fadein['gen'].update_alpha(d_alpha)
                    self.complete['gen']=self.fadein['gen'].alpha*100
                self.flag_flush_gen=False
                self.G.module.flush_network()   # flush G
                print(self.G.module.model)
                self.fadein['gen']=None
                self.complete['gen']=0.0
                self.phase='dtrns'
            elif self.flag_flush_dis and floor(self.resl) != prev_resl and prev_resl != 2:  # 除开上面if后的区域及特殊节点
                if self.fadein['dis'] is not None:
                    self.fadein['dis'].update_alpha(d_alpha)
                    self.complete['dis']=self.fadein['dis'].alpha*100
                self.flag_flush_dis=False
                self.D.module.flush_network()
                print(self.D.module.model)
                self.fadein['dis']=None
                self.complete['dis']=0.0
                if floor(self.resl)<self.max_resl and self.phase !='final':
                    self.phase='gtrns'

            # grow network.
            if floor(self.resl)!=prev_resl and floor(self.resl)<self.max_resl+1:
                self.lr=self.lr*float(self.config.lr_decay)
                self.G.module.grow_network(floor(self.resl))
                self.D.module.grow_network(floor(self.resl))
                self.renew_everything()
                self.fadein['gen']=dict(self.G.module.model.named_children())['fadein_block']
                self.fadein['dis']=dict(self.D.module.model.named_children())['fadein_block']
                self.flag_flush_gen=True
                self.flag_flush_dis=True    #勿漏否则无法flush
                print('renew_everything: ')
            if floor(self.resl)>=self.max_resl and self.resl%1.0>=(self.stab_tick+self.trns_tick*2)*delta:
                self.phase='final'
                self.resl=self.max_resl+(self.stab_tick+self.trns_tick*2)*delta

    def renew_everything(self):
        # renew dataloader.
        self.loader.renew(min(floor(self.resl),self.max_resl))
        # define tensors
        self.z=torch.Tensor(self.loader.batch_size, self.nz)
        self.x=torch.Tensor(self.loader.batch_size, 3, self.loader.imsize, self.loader.imsize)
        self.x_tile=torch.Tensor(self.loader.batch_size, 3, self.loader.imsize, self.loader.imsize)
        self.real_label=torch.Tensor(self.loader.batch_size).fill_(1)
        self.fake_label=torch.Tensor(self.loader.batch_size).fill_(0)
        # enable cuda
        if self.use_cuda:
            self.z=self.z.cuda()
            self.x=self.x.cuda()
            self.x_tilde=self.x.cuda()
            self.real_label=self.real_label.cuda()
            self.fake_label=self.fake_label.cuda()
            torch.cuda.manual_seed(config.random_seed)
        # wrapping autograd Variable
        self.x=Variable(self.x)
        self.x_tilde=Variable(self.x_tilde)
        self.z=Variable(self.z)
        self.real_label=Variable(self.real_label)
        self.fake_label=Variable(self.fake_label)
        # ship new model to cuda
        if self.use_cuda:
            self.G=self.G.cuda()
            self.D=self.D.cuda()
        # optimizer
        betas=(self.config.beta1,self.config.beta2)
        #print(list(filter(lambda p: p.requires_grad, self.G.parameters())))
        #print(list(filter(lambda p: p.requires_grad, self.D.parameters())))
        if self.optimizer=='adam':
            self.opt_g=Adam(filter(lambda p: p.requires_grad,self.G.module.parameters()),lr=self.lr,betas=betas,weight_decay=0.0)
            self.opt_d=Adam(filter(lambda p: p.requires_grad,self.D.module.parameters()),lr=self.lr,betas=betas,weight_decay=0.0)
        elif self.optimizer=='rmsprop':
            self.opt_g = torch.optim.RMSprop(filter(lambda p: p.requires_grad, self.G.module.parameters()), lr=self.lr, alpha=0.9,weight_decay=0.0)
            self.opt_d = torch.optim.RMSprop(filter(lambda p: p.requires_grad, self.D.module.parameters()), lr=self.lr, alpha=0.9,weight_decay=0.0)
        else:
            self.opt_g = torch.optim.SGD(filter(lambda p: p.requires_grad, self.G.module.parameters()), lr=self.lr,weight_decay=0.0)
            self.opt_d = torch.optim.SGD(filter(lambda p: p.requires_grad, self.D.module.parameters()), lr=self.lr,weight_decay=0.0)


    def feed_interpolated_input(self,x):
        if self.phase=='gtrns' and floor(self.resl)>2 and floor(self.resl)<=self.max_resl:
            alpha=self.complete['gen']/100.0
            transform=transforms.Compose([transforms.ToPILImage(),
                                          transforms.Resize(size=int(pow(2,floor(self.resl)-1)),interpolation=0),
                                          transforms.Resize(size=int(pow(2,floor(self.resl))),interpolation=0),
                                          transforms.ToTensor(),
                                          ])
            x_low=x.clone().add(1).mul(0.5)
            for i in range(x_low.size(0)):
                x_low[i]=transform(x_low[i]).mul(2).add(-1)
            x=torch.add(x.mul(alpha),x_low.mul(1-alpha))  #interpolated_x

        if self.use_cuda:
            return x.cuda()
        else:
            return x

    def add_noise(self,x):
        if self.flag_add_noise==False:
            return x
        if hasattr(self,'_d_'):
            self._d_=self._d_*0.9+torch.mean(self.fx_tilde).item()*0.1
        else:
            self._d_=0.0
        strength=0.2*max(0,self._d_-0.5)**2
        z=np.random.randn(*x.size()).astype(np.float32)*strength
        z=Variable(torch.from_numpy(z)).cuda()if self.use_cuda else Variable(torch.from_numpy(z))
        return x+z

    def require_grad(self, model, feature_extracting):
        if feature_extracting:
            for param in model.parameters():
                param.requires_grad = True
        else:
            for param in model.parameters():
                param.requires_grad = False

    def calc_gradient_penalty(self):
        alpha = torch.rand(1)*torch.ones_like(self.x)
        interpolates = alpha * self.x.detach() + ((torch.ones_like(self.x.detach()) - alpha) * self.x_tilde.detach())
        if self.use_cuda:
            interpolates = interpolates.cuda()
        interpolates.requires_grad_(True)
        disc_interpolates = self.D(interpolates)
        if self.use_cuda:
            grad_outputs=torch.ones(disc_interpolates.size()).cuda()
        else:
            grad_outputs=torch.ones(disc_interpolates.size())
        gradients = autograd.grad(outputs=disc_interpolates,
                                  inputs=interpolates,
                                  grad_outputs=grad_outputs,
                                  create_graph=True, retain_graph=True, only_inputs=True)[0]
        gradients = gradients.view(gradients.size(0), -1)
        gradient_penalty = ((gradients.norm(2, dim=1)) ** 6).mean() * self.LAMBDA
        return gradient_penalty

    def train(self):
        # noise for test.
        self.z_test=torch.Tensor(self.loader.batch_size, self.nz).normal_(0.0,1.0)
        # self.z_test.data.resize_(self.loader.batch_size, self.nz).normal_(0.0,1.0)
        if self.use_cuda:
            self.z_test=self.z_test.cuda()
        self.z_test=Variable(self.z_test) #,volatile=True

        for step in range(2,self.max_resl+1+5):
            for iter in tqdm(range(0,(self.trns_tick*2+self.stab_tick*2)*self.TICK,self.loader.batch_size)):
                self.globalIter=self.globalIter+1
                self.stack=self.stack+self.loader.batch_size
                if self.stack>ceil(len(self.loader.dataset)):
                    self.epoch=self.epoch+1
                    self.stack=int(self.stack%(ceil(len(self.loader.dataset))))
                # reslolution scheduler.
                self.resl_scheduler()
                # update discriminator.
                for i in range(1):
                    self.D.zero_grad()  # zero gradients.
                    self.require_grad(self.D, True)
                    self.x.data=self.feed_interpolated_input(self.loader.get_batch())
                    if self.flag_add_noise:
                        self.x=self.add_noise(self.x)
                    self.z=torch.randn(self.loader.batch_size,self.nz,1,1)
                    if self.use_cuda:
                        self.z=self.z.cuda()
                    self.x_tilde=self.G(self.z)
                    self.fx=self.D(self.x)
                    self.fx_tilde = self.D(self.x_tilde.detach())
                    # loss_d=F.mse_loss(self.fx.squeeze(), torch.ones_like(self.fx.squeeze()))+F.mse_loss(self.fx_tilde.squeeze(), torch.zeros_like(self.fx_tilde.squeeze()))+self.calc_gradient_penalty()
                    loss_d = self.fx.squeeze().mean() - self.fx_tilde.squeeze().mean() + self.calc_gradient_penalty()
                    loss_d.backward(retain_graph=False)
                    self.opt_d.step()
                # update generator.
                for i in range(1):
                    self.G.zero_grad()  # zero gradients.
                    self.require_grad(self.D, False)
                    fx_tilde=self.D(self.x_tilde)
                    # loss_g = F.mse_loss(fx_tilde.squeeze(), torch.ones_like(self.fx_tilde.squeeze()))
                    loss_g = fx_tilde.squeeze().mean()
                    loss_g.backward(retain_graph=False)
                    self.opt_g.step()
                # logging.
                log_msg = ' [E:{0}][T:{1}][{2:6}/{3:6}]  errD: {4:.4f} | errG: {5:.4f} | ' \
                          '[lr:{11:.5f}][cur:{6:.3f}][resl:{7:4}][{8}][{9:.1f}%][{10:.1f}%]'.format(
                    self.epoch, self.globalTick, self.stack, len(self.loader.dataset),
                    loss_d.item(),loss_g.item(), self.resl, int(pow(2, floor(self.resl))),
                    self.phase, self.complete['gen'],self.complete['dis'],self.lr)
                tqdm.write(log_msg)

                # save model.
                self.snapshot('repo/model')

                # save image grid.
                if self.globalIter % self.config.save_img_every == 0:
                    with torch.no_grad():
                        x_test = self.G(self.z_test)
                    utils.mkdir('repo/save/grid')
                    utils.save_image_grid(x_test.data, 'repo/save/grid/{}_{}_G{}_D{}.jpg'.format(
                        int(self.globalIter / self.config.save_img_every), self.phase, self.complete['gen'],self.complete['dis']))
                    utils.mkdir('repo/save/resl_{}'.format(int(floor(self.resl))))
                    utils.save_image_single(x_test.data,'repo/save/resl_{}/{}_{}_G{}_D{}.jpg'.format(
                        int(floor(self.resl)), int(self.globalIter / self.config.save_img_every),self.phase,self.complete['gen'],self.complete['dis']))
                    # tensorboard visualization.
                    if self.use_tb:
                        with torch.no_grad():
                            x_test = self.D(self.z_test)
                        self.tb.add_scalar('data/loss_g', loss_g[0].item(), self.globalIter)
                        self.tb.add_scalar('data/loss_d', loss_d[0].item(), self.globalIter)
                        self.tb.add_scalar('tick/lr', self.lr, self.globalIter)
                        self.tb.add_scalar('tick/cur_resl', int(pow(2, floor(self.resl))), self.globalIter)

    def get_state(self,target):
        if target=='gen':
            state={'resl':self.resl,
                   'state_dict':self.G.module.state_dict(),
                   'optimizer':self.opt_g.state_dict()}
            return state
        elif target=='dis':
            state={'resl':self.resl,
                   'state_dict':self.D.module.state_dict(),
                   'optimizer':self.opt_d.state_dict()}
            return state

    def snapshot(self,path):
        if not os.path.exists(path):
            if os.name=='nt':
                os.system('mkdir {}'.format(path.replace('/','\\')))  # 勿在mkdir后遗漏空格以免创建路径失败!!!
            else:
                os.system('mkdir -p {}'.format(path))
        # save every 100 tick if the network is in stab phase.
        ndis='dis_R{}_T{}.pth.tar'.format(int(floor(self.resl)),self.globalTick)
        ngen='gen_R{}_T{}.pth.tar'.format(int(floor(self.resl)),self.globalTick)
        if self.globalTick%50==0:
            if self.phase=='gstab'or self.phase=='dstab'or self.phase=='final':
                save_path=os.path.join(path,ndis)
                if not os.path.exists(save_path):
                    torch.save(self.get_state('dis'),save_path)
                    save_path=os.path.join(path,ngen)
                    torch.save(self.get_state('gen'),save_path)
                    print('[snapshot] model saved @ {}'.format(path))
Exemplo n.º 53
0
class Decoder(nn.Module):
    def __init__(
        self,
        n_mel_channels,
        n_frames_per_step,
        encoder_embedding_dim,
        attention_dim,
        attention_rnn_dim,
        attention_location_n_filters,
        attention_location_kernel_size,
        decoder_rnn_dim,
        prenet_dim,
        max_decoder_steps,
        gate_threshold,
        p_attention_dropout,
        p_decoder_dropout,
    ):
        super(Decoder, self).__init__()
        self.n_mel_channels = n_mel_channels
        self.n_frames_per_step = n_frames_per_step
        self.encoder_embedding_dim = encoder_embedding_dim
        self.attention_rnn_dim = attention_rnn_dim
        self.decoder_rnn_dim = decoder_rnn_dim
        self.prenet_dim = prenet_dim
        self.max_decoder_steps = max_decoder_steps
        self.gate_threshold = gate_threshold
        self.p_attention_dropout = p_attention_dropout
        self.p_decoder_dropout = p_decoder_dropout

        self.prenet = Prenet(n_mel_channels * n_frames_per_step, [prenet_dim, prenet_dim])

        self.attention_rnn = nn.LSTMCell(prenet_dim + encoder_embedding_dim, attention_rnn_dim)

        self.attention_layer = Attention(
            attention_rnn_dim,
            encoder_embedding_dim,
            attention_dim,
            attention_location_n_filters,
            attention_location_kernel_size,
        )

        self.decoder_rnn = nn.LSTMCell(attention_rnn_dim + encoder_embedding_dim, decoder_rnn_dim, 1)

        self.linear_projection = LinearNorm(decoder_rnn_dim + encoder_embedding_dim, n_mel_channels * n_frames_per_step)

        self.gate_layer = LinearNorm(decoder_rnn_dim + encoder_embedding_dim, 1, bias=True, w_init_gain="sigmoid")

    def get_go_frame(self, memory):
        """Gets all zeros frames to use as first decoder input
        PARAMS
        ------
        memory: decoder outputs

        RETURNS
        -------
        decoder_input: all zeros frames
        """
        B = memory.size(0)
        decoder_input = Variable(memory.data.new(B, self.n_mel_channels * self.n_frames_per_step).zero_())
        return decoder_input

    def initialize_decoder_states(self, memory, mask):
        """Initializes attention rnn states, decoder rnn states, attention
        weights, attention cumulative weights, attention context, stores memory
        and stores processed memory
        PARAMS
        ------
        memory: Encoder outputs
        mask: Mask for padded data if training, expects None for inference
        """
        B = memory.size(0)
        MAX_TIME = memory.size(1)

        self.attention_hidden = Variable(memory.data.new(B, self.attention_rnn_dim).zero_())
        self.attention_cell = Variable(memory.data.new(B, self.attention_rnn_dim).zero_())

        self.decoder_hidden = Variable(memory.data.new(B, self.decoder_rnn_dim).zero_())
        self.decoder_cell = Variable(memory.data.new(B, self.decoder_rnn_dim).zero_())

        self.attention_weights = Variable(memory.data.new(B, MAX_TIME).zero_())
        self.attention_weights_cum = Variable(memory.data.new(B, MAX_TIME).zero_())
        self.attention_context = Variable(memory.data.new(B, self.encoder_embedding_dim).zero_())

        self.memory = memory
        self.processed_memory = self.attention_layer.memory_layer(memory)
        self.mask = mask

    def parse_decoder_inputs(self, decoder_inputs):
        """Prepares decoder inputs, i.e. mel outputs
        PARAMS
        ------
        decode encoder_kernel_size=5,
        encoder_n_convolutions=3,
        encoder_embedding_dim=512,r_inputs: inputs used for teacher-forced training, i.e. mel-specs

        RETURNS
        -------
        inputs: processed decoder inputs

        """
        # (B, n_mel_channels, T_out) -> (B, T_out, n_mel_channels)
        decoder_inputs = decoder_inputs.transpose(1, 2)
        decoder_inputs = decoder_inputs.view(
            decoder_inputs.size(0), int(decoder_inputs.size(1) / self.n_frames_per_step), -1
        )
        # (B, T_out, n_mel_channels) -> (T_out, B, n_mel_channels)
        decoder_inputs = decoder_inputs.transpose(0, 1)
        return decoder_inputs

    def parse_decoder_outputs(self, mel_outputs, gate_outputs, alignments):
        """Prepares decoder outputs for output
        PARAMS
        ------
        mel_outputs:
        gate_outputs: gate output energies
        alignments:

        RETURNS
        -------
        mel_outputs:
        gate_outpust: gate output energies
        alignments:
        """
        # (T_out, B) -> (B, T_out)
        alignments = torch.stack(alignments).transpose(0, 1)
        # (T_out, B) -> (B, T_out)
        gate_outputs = torch.stack(gate_outputs).transpose(0, 1)
        gate_outputs = gate_outputs.contiguous()
        # (T_out, B, n_mel_channels) -> (B, T_out, n_mel_channels)
        mel_outputs = torch.stack(mel_outputs).transpose(0, 1).contiguous()
        # decouple frames per step
        mel_outputs = mel_outputs.view(mel_outputs.size(0), -1, self.n_mel_channels)
        # (B, T_out, n_mel_channels) -> (B, n_mel_channels, T_out)
        mel_outputs = mel_outputs.transpose(1, 2)

        return mel_outputs, gate_outputs, alignments

    def decode(self, decoder_input):
        """Decoder step using stored states, attention and memory
        PARAMS
        ------
        decoder_input: previous mel output

        RETURNS
        -------
        mel_output:
        gate_output: gate output energies
        attention_weights:
        """
        cell_input = torch.cat((decoder_input, self.attention_context), -1)
        self.attention_hidden, self.attention_cell = self.attention_rnn(
            cell_input, (self.attention_hidden, self.attention_cell)
        )
        self.attention_hidden = F.dropout(self.attention_hidden, self.p_attention_dropout, self.training)

        attention_weights_cat = torch.cat(
            (self.attention_weights.unsqueeze(1), self.attention_weights_cum.unsqueeze(1)), dim=1
        )
        self.attention_context, self.attention_weights = self.attention_layer(
            self.attention_hidden, self.memory, self.processed_memory, attention_weights_cat, self.mask
        )

        self.attention_weights_cum += self.attention_weights
        decoder_input = torch.cat((self.attention_hidden, self.attention_context), -1)
        self.decoder_hidden, self.decoder_cell = self.decoder_rnn(
            decoder_input, (self.decoder_hidden, self.decoder_cell)
        )
        self.decoder_hidden = F.dropout(self.decoder_hidden, self.p_decoder_dropout, self.training)

        decoder_hidden_attention_context = torch.cat((self.decoder_hidden, self.attention_context), dim=1)
        decoder_output = self.linear_projection(decoder_hidden_attention_context)

        gate_prediction = self.gate_layer(decoder_hidden_attention_context)
        return decoder_output, gate_prediction, self.attention_weights

    def forward(self, memory, decoder_inputs, memory_lengths, device):
        """Decoder forward pass for training
        PARAMS
        ------
        memory: Encoder outputs
        decoder_inputs: Decoder inputs for teacher forcing. i.e. mel-specs
        memory_lengths: Encoder output lengths for attention masking.

        RETURNS
        -------
        mel_outputs: mel outputs from the decoder
        gate_outputs: gate outputs from the decoder
        alignments: sequence of attention weights from the decoder
        """

        decoder_input = self.get_go_frame(memory).unsqueeze(0)
        decoder_inputs = self.parse_decoder_inputs(decoder_inputs)
        decoder_inputs = torch.cat((decoder_input, decoder_inputs), dim=0)
        decoder_inputs = self.prenet(decoder_inputs)

        self.initialize_decoder_states(memory, mask=~get_mask_from_lengths(memory_lengths, device))

        mel_outputs, gate_outputs, alignments = [], [], []
        while len(mel_outputs) < decoder_inputs.size(0) - 1:
            decoder_input = decoder_inputs[len(mel_outputs)]
            mel_output, gate_output, attention_weights = self.decode(decoder_input)
            mel_outputs += [mel_output.squeeze(1)]
            gate_outputs += [gate_output.squeeze(1)]
            alignments += [attention_weights]

        mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs(mel_outputs, gate_outputs, alignments)

        return mel_outputs, gate_outputs, alignments

    def inference(self, memory, max_decoder_steps=None):
        """Decoder inference
        PARAMS
        ------
        memory: Encoder outputs

        RETURNS
        -------
        mel_outputs: mel outputs from the decoder
        gate_outputs: gate outputs from the decoder
        alignments: sequence of attention weights from the decoder
        """
        if not max_decoder_steps:
            # Use default max decoder steps if not given
            max_decoder_steps = self.max_decoder_steps

        decoder_input = self.get_go_frame(memory)

        self.initialize_decoder_states(memory, mask=None)

        mel_outputs, gate_outputs, alignments = [], [], []
        while True:
            decoder_input = self.prenet(decoder_input)
            mel_output, gate_output, alignment = self.decode(decoder_input)

            mel_outputs += [mel_output.squeeze(1)]
            gate_outputs += [gate_output]
            alignments += [alignment]

            if torch.sigmoid(gate_output.data) > self.gate_threshold:
                break
            elif len(mel_outputs) == max_decoder_steps:
                raise Exception(
                    "Warning! Reached max decoder steps. Either the model is low quality or the given sentence is too short/long"
                )

            decoder_input = mel_output

        mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs(mel_outputs, gate_outputs, alignments)

        return mel_outputs, gate_outputs, alignments
Exemplo n.º 54
0
def train_model(model,
                criterion,
                optimizer,
                lr_scheduler,
                lr,
                dset_loaders,
                dset_sizes,
                use_gpu,
                num_epochs,
                exp_dir='./',
                resume=''):
    print('dictoinary length' + str(len(dset_loaders)))
    #reg_params=model.reg_params
    since = time.time()

    best_model = model
    best_acc = 0.0
    if os.path.isfile(resume):
        print("=> loading checkpoint '{}'".format(resume))
        checkpoint = torch.load(resume)
        start_epoch = checkpoint['epoch']
        #best_prec1 = checkpoint['best_prec1']
        #model = checkpoint['model']
        model.load_state_dict(checkpoint['state_dict'])
        #modelx = checkpoint['model']
        #model.reg_params=modelx.reg_params
        print('load')
        optimizer.load_state_dict(checkpoint['optimizer'])
        #pdb.
        #model.reg_params=reg_params
        #del model.reg_params
        print("=> loaded checkpoint '{}' (epoch {})".format(
            resume, checkpoint['epoch']))
    else:
        start_epoch = 0
        print("=> no checkpoint found at '{}'".format(resume))

    print(str(start_epoch))
    #pdb.set_trace()
    for epoch in range(start_epoch, num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                optimizer = lr_scheduler(optimizer, epoch, lr)
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in dset_loaders[phase]:
                # get the inputs
                inputs, labels = data
                inputs = inputs.squeeze()
                # wrap them in Variable
                if use_gpu:
                    inputs, labels = Variable(inputs.cuda()), \
                        Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()
                model.zero_grad()
                # forward
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    #print('step')
                    optimizer.step()

                # statistics
                running_loss += loss.data[0]
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dset_sizes[phase]
            epoch_acc = running_corrects / dset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss,
                                                       epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                del outputs
                del labels
                del inputs
                del loss
                del preds
                best_acc = epoch_acc
                #best_model = copy.deepcopy(model)
                torch.save(model, os.path.join(exp_dir, 'best_model.pth.tar'))

        #epoch_file_name=exp_dir+'/'+'epoch-'+str(epoch)+'.pth.tar'
        epoch_file_name = exp_dir + '/' + 'epoch' + '.pth.tar'
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'epoch_acc': epoch_acc,
                'arch': 'alexnet',
                'model': model,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            }, epoch_file_name)
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    return model
Exemplo n.º 55
0
                              num_workers=0)

# Training loop
for epoch in range(epochTimes):

    #test the result on test set
    model.eval()
    test_loss = 0
    correct = 0
    for i, data in enumerate(test_loader, 0):
        # get the inputs
        inputs, labels = data 
        labels = labels.type(torch.FloatTensor)

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # Forward pass: Compute predicted y by passing x to the model
        y_pred = model(inputs)
        if(i<10):
            print(labels.data[0])
            print(y_pred.data[0])

        # Compute and print loss
        loss = criterion(y_pred, labels)
        test_loss += loss

    test_loss /= len(test_loader.dataset)
    print('Average loss after ' + str(epoch) + ': ' + str(test_loss.data[0]))

   
Exemplo n.º 56
0
train_start_time = time.time()
early_counter = 0
decay_counter = 0
best_per = 0
for e_ in range(config.epoch):
    print("Epoch: ", e_ + 1)
    batch_counter = 0
    for ie, example in enumerate(train_examples):
        token_ids, char_ids, entities, class_samples = example

        # skip for initial experiments
        # if len(token_ids) > 20:
        #    continue

        token_var = Variable(torch.LongTensor(np.array(token_ids)))
        sample_vars = sample2tensor(class_samples, config.if_gpu)
        if config.if_gpu: token_var = token_var.cuda()

        char_vars = []
        for char_id_l in char_ids:
            char_var = Variable(torch.LongTensor(np.array(char_id_l)))
            if config.if_gpu: char_var = char_var.cuda()
            char_vars.append(char_var)

        ner_model.train()
        optimizer.zero_grad()
        loss = ner_model.forward(token_var, char_vars, entities, sample_vars)
        loss.backward()
        clip_model_grad(ner_model, config.clip_norm)
        print("{2}: sentece length {0} : loss {1}".format(
Exemplo n.º 57
0
def training_loop(args, model, criterion, optimizer, dataset, f, device, experiment):

    start = time.time()
    best_weights = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(args.num_epochs):
        print(f'Epoch {epoch} began')
        running_loss = 0.0
        running_corrects = 0
        # training phase
        for idx, data in enumerate(Bar(dataset['train_dataloader'])):
            inputs = Variable(data.get('image')).to(device)
            target = Variable(data.get('target')).to(device)
            # forward pass
            output = model(inputs)
            _, preds = torch.max(output, 1)
            loss = criterion(output, target)
            loss = loss / args.accumulation_steps           # Normalize accumulated loss (averaged)
            loss = loss.mean()
            # backward pass
            loss.backward()                                 # Backward pass (mean of parallel loss)
            if (idx+1) % args.accumulation_steps == 0:      # Wait for several backward steps
                optimizer.step()                            # Now we can do an optimizer step
                model.zero_grad()                           # Reset gradient tensors

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == target.data)
        # log training stats
        train_epoch_loss = running_loss / len(dataset['train_data'])
        train_epoch_acc = running_corrects.double() / len(dataset['train_data'])
        print('Epoch [{}/{}], training loss:{:.4f}'.format(epoch+1, args.num_epochs, train_epoch_loss))
        print('Epoch [{}/{}], training accuracy:{:.4f}'.format(epoch+1, args.num_epochs, train_epoch_acc))
        # validation phase
        running_loss = 0.0
        running_corrects = 0
        with torch.no_grad():
            for idx, data in enumerate(Bar(dataset['val_dataloader'])):
                inputs = Variable(data.get('image')).to(device)
                target = Variable(data.get('target')).to(device)
                output = model(inputs)
                _, preds = torch.max(output, 1)
                loss = criterion(output, target).mean()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == target.data)
        # log validation stats
        valid_epoch_loss = running_loss / len(dataset['val_data'])
        valid_epoch_acc = running_corrects.double() / len(dataset['val_data'])
        print('Epoch [{}/{}], validation loss:{:.4f}'.format(epoch+1, args.num_epochs, valid_epoch_loss))
        print('Epoch [{}/{}], validation accuracy:{:.4f}'.format(epoch+1, args.num_epochs, valid_epoch_acc))
        # append to experiment report
        print(f'{epoch+1}\t{train_epoch_loss}\t{train_epoch_acc}\t{valid_epoch_loss}\t{valid_epoch_acc}',
              file=open(f, "a"))
        # save best weights
        if valid_epoch_acc > best_acc:
            best_acc = valid_epoch_acc
            best_weights = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), f'models/{args.dataset}/{experiment}.pth')

    time_elapsed = time.time() - start
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60), file=open(f, "a"))
    print('Best val Acc: {:4f}'.format(best_acc), file=open(f, "a"))

    # load best weights
    model.load_state_dict(f'models/{args.dataset}/{experiment}.pth')

    return model
    def routing(self, x):
        """
        Routing algorithm for capsule.

        :input: tensor x of shape [128, 8, 1152]

        :return: vector output of capsule j
        """
        batch_size = x.size(0)

        x = x.transpose(
            1,
            2)  # dim 1 and dim 2 are swapped. out tensor shape: [128, 1152, 8]

        # Stacking and adding a dimension to a tensor.
        # stack ops output shape: [128, 1152, 10, 8]
        # unsqueeze ops output shape: [128, 1152, 10, 8, 1]
        x = torch.stack([x] * self.num_unit, dim=2).unsqueeze(4)

        # Convert single weight to batch weight.
        # [1 x 1152 x 10 x 16 x 8] to: [128, 1152, 10, 16, 8]
        batch_weight = torch.cat([self.weight] * batch_size, dim=0)

        # u_hat is "prediction vectors" from the capsules in the layer below.
        # Transform inputs by weight matrix.
        # Matrix product of 2 tensors with shape: [128, 1152, 10, 16, 8] x [128, 1152, 10, 8, 1]
        # u_hat shape: [128, 1152, 10, 16, 1]
        u_hat = torch.matmul(batch_weight, x)

        # All the routing logits (b_ij in the paper) are initialized to zero.
        # self.in_channel = primary_unit_size = 32 * 6 * 6 = 1152
        # self.num_unit = num_classes = 10
        # b_ij shape: [1, 1152, 10, 1]
        b_ij = Variable(torch.zeros(1, self.in_channel, self.num_unit, 1))
        if self.cuda_enabled:
            b_ij = b_ij.cuda()

        # From the paper in the "Capsules on MNIST" section,
        # the sample MNIST test reconstructions of a CapsNet with 3 routing iterations.
        num_iterations = self.num_routing

        for iteration in range(num_iterations):
            # Routing algorithm

            # Calculate routing or also known as coupling coefficients (c_ij).
            # c_ij shape: [1, 1152, 10, 1]
            c_ij = F.softmax(
                b_ij, dim=2)  # Convert routing logits (b_ij) to softmax.
            # c_ij shape from: [128, 1152, 10, 1] to: [128, 1152, 10, 1, 1]
            c_ij = torch.cat([c_ij] * batch_size, dim=0).unsqueeze(4)

            # Implement equation 2 in the paper.
            # s_j is total input to a capsule, is a weigthed sum over all "prediction vectors".
            # u_hat is weighted inputs, prediction ˆuj|i made by capsule i.
            # c_ij * u_hat shape: [128, 1152, 10, 16, 1]
            # s_j output shape: [batch_size=128, 1, 10, 16, 1]
            # Sum of Primary Capsules outputs, 1152D becomes 1D.
            s_j = (c_ij * u_hat).sum(dim=1, keepdim=True)

            # Squash the vector output of capsule j.
            # v_j shape: [batch_size, weighted sum of PrimaryCaps output,
            #             num_classes, output_unit_size from u_hat, 1]
            # == [128, 1, 10, 16, 1]
            # So, the length of the output vector of a capsule is 16, which is in dim 3.
            v_j = utils.squash(s_j, dim=3)

            # in_channel is 1152.
            # v_j1 shape: [128, 1152, 10, 16, 1]
            v_j1 = torch.cat([v_j] * self.in_channel, dim=1)

            # The agreement.
            # Transpose u_hat with shape [128, 1152, 10, 16, 1] to [128, 1152, 10, 1, 16],
            # so we can do matrix product u_hat and v_j1.
            # u_vj1 shape: [1, 1152, 10, 1]
            u_vj1 = torch.matmul(u_hat.transpose(3, 4),
                                 v_j1).squeeze(4).mean(dim=0, keepdim=True)

            # Update routing (b_ij) by adding the agreement to the initial logit.
            b_ij = b_ij + u_vj1

        # activation = torch.nn.Sigmoid()

        return v_j.squeeze(1)  # shape: [128, 10, 16, 1]
Exemplo n.º 59
0
def calc_sl_loss(probs, update=True):
    y_true = conf.batch_label
    y_true = Variable(torch.from_numpy(y_true)).cuda().long()
    loss = criterion(probs, y_true)
    return loss

#提取无流边界数据2
N_noflow2=10000
X_noflow2_col=x[0]+(x[50]-x[0])*lhs(1, N_noflow2)
Y_noflow2_col=y[50]*np.ones((N_noflow2,1))
T_noflow2_col=0+(t[49]-0)*lhs(1, N_noflow2)
TXY_noflow2 = np.hstack((T_noflow2_col,X_noflow2_col,Y_noflow2_col))   
kesi_noflow_col=np.random.randn(N_noflow1+N_noflow2,n_eigen)
TXY_noflow=np.vstack((TXY_noflow1,TXY_noflow2))
TXY_kesi_noflow=np.hstack((TXY_noflow,kesi_noflow_col))
TXY_kesi_noflow = torch.from_numpy(TXY_kesi_noflow)
TXY_kesi_noflow = TXY_kesi_noflow.type(torch.FloatTensor)
TXY_kesi_noflow = TXY_kesi_noflow.to(device)

TXY_kesi_noflow= Variable(TXY_kesi_noflow, requires_grad=True)

TXY_kesi_f = torch.from_numpy(TXY_kesi_f)
TXY_kesi_f = TXY_kesi_f.type(torch.FloatTensor)

TXY_kesi_train = torch.from_numpy(TXY_kesi_train)
TXY_kesi_train=TXY_kesi_train.type(torch.FloatTensor)

H_train = torch.from_numpy(H_train)
H_train=H_train.type(torch.FloatTensor)


w_x_tf = torch.from_numpy(w_x)
w_x_tf = w_x_tf.type(torch.FloatTensor)

w_y_tf = torch.from_numpy(w_y)