Exemplo n.º 1
0
    def train(self, train_loader):
        self.model.train()
        for batch_idx, (data, target) in enumerate(train_loader):

            if type(data) is dict:
                data = data['image']
            if self.use_cuda:
                data, target = to_cuda(data), to_cuda(target)

            data, target = to_var(data), to_var(target)                
            self.optimizer.zero_grad()
            if self.twoImage:
                loss = self.train_step2(data, target)
            else:
                loss = self.train_step(data, target)                
            self.optimizer.step()
            
            if batch_idx % 500 == 0:
                if type(data) is list:
                    len_data = len(data[0])
                else:
                    len_data = len(data)
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    self.epoch, batch_idx * len_data, len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss))
        self.epoch += 1
Exemplo n.º 2
0
    def evaluate_subpopulation(self, val_theta, val_data,
                               theta_constraint=lambda x: True):

        ''' cosine similarity between val_theta and val_explanation 
        theta_constraint: thata that satisfy the constraint
        '''
        i = 0
        sim = 0
        num = 0 # number of comparison
        for x, y in val_data:
            m = x.size(0)
            x, y = to_var(x), to_var(y)
            f = to_np(self.explain(x))
            w = val_theta[i:i+m]

            valid = map(theta_constraint, f)
            valid = np.nonzero(list(valid))
            f = f[valid]
            w = w[valid]

            i += m
            num += f.shape[0]

            if f.shape[0] == 0:
                continue
            
            f_norm = np.sqrt((f * f).sum(1)) + 1e-10
            w_norm = np.sqrt((w * w).sum(1)) + 1e-10
            angle = (w * f).sum(1) / f_norm / w_norm
            sim += angle.sum()
            
        return sim / num
Exemplo n.º 3
0
    def fitData(self, data, batch_size=100, n_epochs=10, print_every=10,
                valdata=None):
        '''
        fit a model to x, y data by batch
        print_every is 0 if do not wish to print
        '''
        time_start = time.time()
        losses = []
        best_valloss, best_valindex = np.inf, 0
        vallosses = []
        n = len(data.dataset)
        cost = 0 
        
        for epoch in range(n_epochs):

            for k, (x_batch, y_batch) in enumerate(data):
                x_batch, y_batch = to_var(x_batch), to_var(y_batch)
                y_hat, regret = self.step(x_batch, y_batch)
                m = x_batch.size(0)                
                cost += 1 / (k+1) * (regret/m - cost)

                if print_every != 0 and k % print_every == 0:
                    
                    losses.append(cost)
                    # progress, time, avg loss, auc
                    to_print = ('%.2f%% (%s) %.4f %.4f' % ((epoch * n + (k+1) * m) /
                                                           (n_epochs * n) * 100,
                                                           timeSince(time_start),
                                                           cost,
                                                           model_auc(self.model,
                                                                     data)))
                    if valdata is not None:
                        valloss = calc_loss(self.model, valdata, self.loss)
                        vallosses.append(valloss)
                        np.save('models/%s.valloss' % self.name, vallosses)   
                        to_print += " %.4f" % model_auc(self.model, valdata)

                        if valloss <= best_valloss:
                            best_valloss = valloss
                            best_valindex = len(vallosses) - 1
                            torch.save(self.model, 'models/%s.pt' % self.name)   
                    else:
                        torch.save(self.model, 'models/%s.pt' % self.name)
                        
                    print(to_print)
                    np.save('models/%s.loss' % self.name, losses)

                    cost = 0
                    
        return losses, vallosses
Exemplo n.º 4
0
 def explain(self, x):
     x = to_var(x.data, volatile=True).float()
     
     # form an explanation
     z = self.sampleZ(x)
     f = self.weightNet(z)
     return f
Exemplo n.º 5
0
    def plotMTL(self):
        import seaborn as sns
        import matplotlib.pyplot as plt
        if not self.mtl:
            return

        T = self.switchNet.input_size
        K = self.switch_size
        # probability assignment matrix
        A = np.zeros((T, K))

        for i in range(T):
            t = to_var(torch.FloatTensor([i]))
            A[i] = np.exp(to_np(self.switchNet(t)))

        # similarity matrix
        S = A.dot(A.T)
        np.fill_diagonal(S, 1)        

        sns.heatmap(S, vmin=0, vmax=1)
        im = ToTensor()(fig2img(plt.gcf()))
        self.writer.add_image('task_similarity', im,
                              self.count)
        plt.close()


        sns.heatmap(A, vmin=0, vmax=1)
        im = ToTensor()(fig2img(plt.gcf()))
        self.writer.add_image('task_assignment', im,
                              self.count)
        plt.close()
Exemplo n.º 6
0
 def transform(self, x):
     '''
     x is a pytorch Variable tensor, this works for combine
     x is the output of previous trainer.transform
     '''
     x = to_np(x)
     clusters = self.clf.predict(x)
     clusters = onehotize(to_var(torch.from_numpy(clusters)).view(-1, 1), self.k)
     return clusters
Exemplo n.º 7
0
 def explain(self):
     explanations = []
     for i in range(self.switch_size):
         x = np.zeros(self.switch_size)
         x[i] = 1
         x = to_var(torch.from_numpy(x)).float()
         explanations.append(list(to_np(self.forward(x))))
     # print(explanations)
     return explanations
Exemplo n.º 8
0
    def fit(self, data, **kwargs):
        x, y = data.dataset[:] # x is the original input, not necessarily kmeans input
        x = to_var(x)
        x = self.transform_function(x)

        x = to_np(x)
        self.clf.fit(x)

        savedir = os.path.dirname('nonlinear_models/%s' % self.name)
        os.system('mkdir -p %s' % savedir)                      
        joblib.dump(self.clf, 'nonlinear_models/%s.pkl' % self.name)
Exemplo n.º 9
0
    def forward(self, x):
        if self.mtl:  # the last one is task number
            if len(x.size()) == 1:
                t = np.zeros(self.input_size)
                t[int(to_np(x[-1])[0])] = 1
                x = to_var(torch.from_numpy(t).float()).view(1, -1)
            else:
                x = x[:, -1:]
                x = onehotize(x, self.input_size)

        o = self.i2o(x)
        return self.logsoftmax(o)
Exemplo n.º 10
0
    def sampleZ(self, x):
        n = x.size(0) # minibatch size        
        # determine which line to use
        probs = torch.exp(self.switchNet(x))

        m = Categorical(probs)
        one_hot = to_var(m.probs.data.new(m.probs.size()).zero_())
        indices = m.sample()
        if indices.dim() < one_hot.dim():
            indices = indices.unsqueeze(-1)
        z =  one_hot.scatter_(-1, indices, 1)
        self.z = z
        return z
Exemplo n.º 11
0
    def test(self, test_loader):
        self.model.eval()
        test_loss = 0
        correct = 0
        for data, target in test_loader:
            if type(data) is dict:            
                data = data['image']
            if self.use_cuda:
                data, target = to_cuda(data), to_cuda(target)
            data, target = to_var(data, volatile=True), to_var(target)
            output = self.model(data)

            # sum up batch loss
            test_loss += F.nll_loss(output, target, size_average=False).item()
            # get the index of the max log-probability
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()

        test_loss /= len(test_loader.dataset)
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'
              .format(test_loss, correct, len(test_loader.dataset),
                      100. * correct / len(test_loader.dataset)))
        return 100. * correct / len(test_loader.dataset)
Exemplo n.º 12
0
def open_box(model, x):
    # forward pass to determine configuration
    # assume x is flat with no batch dimension
    assert len(x.shape) == 1, "assume no batch dimension in input"
    d = x.shape[0]
    C = []

    # get W and b
    W = to_var(torch.eye(d))
    b = to_var(torch.zeros(d))
    z = x

    for i, c in enumerate(model.classifier):
        if type(c) == torch.nn.modules.linear.Linear:
            W = torch.mm(c.weight, W)
            b = c.bias + torch.mv(c.weight, b)
        elif type(c) == torch.nn.modules.ReLU:
            C.extend(list((z > 0).int().data.numpy()))  # configuration
            r = (z > 0).float()  # the slope
            t = torch.zeros_like(z)  # the bias
            W = torch.mm(torch.diag(r), W)
            b = t + torch.mv(torch.diag(r), b)
        elif type(c) == torch.nn.modules.LeakyReLU:
            C.extend(list((z > 0).int().data.numpy()))  # configuration
            r = (z > 0).float()  # the slope
            r[r == 0] = c.negative_slope
            t = torch.zeros_like(z)  # the bias
            W = torch.mm(torch.diag(r), W)
            b = t + torch.mv(torch.diag(r), b)
        else:
            raise Exception('unknown layer')

        z = c(z)  # forward pass

    C = ''.join(map(str, C))
    return W, b, C
Exemplo n.º 13
0
    return accuracy_score(y, yhat)


n, d = 1000, 2


def gendata():
    x = np.random.randn(n, d)
    y = (x.sum(1) > 0).astype(np.int)
    return x, y


xtr, ytr = gendata()
xte, yte = gendata()

r = to_var(torch.FloatTensor([0, 1]))
train_data = TensorDataset(*map(lambda x: x.data, prepareData(xtr, ytr)))
data = DataLoader(train_data, batch_size=100, shuffle=True)

n_output = 2  # binary classification task
model = LR(d, n_output)
learning_rate = 0.01
alpha = 0.08  # regularization strength

reg_parameters = model.i2o.weight
t = Trainer(model,
            lr=learning_rate,
            risk_factors=r,
            alpha=alpha,
            regularization=eye_loss,
            reg_parameters=reg_parameters)
Exemplo n.º 14
0
 def initHidden(self):
     return to_var(torch.zeros(1, self.hidden_size))
Exemplo n.º 15
0
    def fit(self, data, batch_size=100, n_epochs=10,
            valdata=None, val_theta=None, use_auc=False):
        '''
        fit a model to x, y data by batch
        val_theta: for recovering heterogeneous subpopulation
        '''
        savedir = os.path.dirname('nonlinear_models/%s' % self.name)
        os.system('mkdir -p %s' % savedir)
        self.writer = SummaryWriter(log_dir=self.log_dir)        
        
        time_start = time.time()
        losses = []
        vallosses = [1000]
        best_valloss, best_valindex = 1000, 0 # for early stopping
        n = len(data.dataset)
        cost = 0
        self.count = 0
        
        for epoch in range(n_epochs):

            for k, (x_batch, y_batch) in enumerate(data):

                x_batch, y_batch = to_var(x_batch).float(), to_var(y_batch).float()
                y_hat, regret = self.step(x_batch, y_batch)
                m = x_batch.size(0)                
                cost += 1 / (k+1) * (regret - cost)

                if self.print_every != 0 and self.count % self.print_every == 0:

                    losses.append(cost)
                    
                    # progress, time, avg loss, auc
                    duration = timeSince(time_start)
                    if int(duration.split('m')[0]) >= self.max_time:
                        return losses
                    
                    to_print = ('%.2f%% (%s) %.4f' % ((epoch * n + (k+1) * m) /
                                                      (n_epochs * n) * 100,
                                                      duration,
                                                      cost))
                    
                    print(to_print)
                    # if self.draw_plot:
                    #     self.plotMTL()
                    #     self.plot(x_batch, y_batch, silence=self.silence, inrange=True)

                    if valdata is not None:
                        if use_auc:
                            acc = reportAuc(self, valdata)
                        else:
                            acc = reportAcc(self,valdata)

                        valloss = -acc
                        vallosses.append(valloss)
                        if valloss <= best_valloss:
                            best_valloss = valloss
                            best_valindex = len(vallosses) - 1

                            torch.save(self.weightNet,
                                       'nonlinear_models/%s.pt' % self.name)
                            np.save('nonlinear_models/%s.loss' % self.name, losses)
                            
                        if len(vallosses) - best_valindex > self.n_early_stopping:
                            print('early stop at iteration', self.count)
                            return losses                            

                        if use_auc:
                            # note acc here is auc
                            self.writer.add_scalar('data/val_auc', acc,
                                                   self.count)
                        else:
                            self.writer.add_scalar('data/val_acc', acc,
                                                   self.count)
                            
                        if val_theta is not None:
                            sim = self.evaluate_subpopulation(val_theta, valdata)
                            self.writer.add_scalar('data/subpopulation_cosine',
                                                   sim, self.count)

                    self.writer.add_scalar('weight/grad_norm', gradNorm(self.weightNet),
                                           self.count)
                    self.writer.add_scalar('data/train_loss', cost, self.count)
                    
                    # for tag, value in self.weightNet.named_parameters():
                    #     tag = tag.replace('.', '/')
                    #     self.writer.add_histogram(tag, to_np(value), self.count)
                    #     if value.grad is not None:
                    #         self.writer.add_histogram(tag+'/grad', to_np(value.grad),
                    #                                   self.count)
                    cost = 0
                    
                self.count += 1

        # if self.draw_plot:
        #     self.plot(x_batch, y_batch, inrange=True, silence=self.silence)

        return losses
Exemplo n.º 16
0
 def explain(self, x):
     x = to_var(x.data).float()        
     z = self.transform_function(x)   # this is for combined trainer
     f = self.weightNet(z)
     return f
Exemplo n.º 17
0
def prepareData(x, y):
    ''' 
    convert x, y from numpy to tensor
    '''
    return to_var(torch.from_numpy(x).float()), to_var(torch.from_numpy(y).long())
Exemplo n.º 18
0
    def backward(self, x, y, sample=False, n_samples=30):

        n = x.size(0)

        log_p_z = torch.log(torch.clamp(self.p_z(x, const=True), 1e-10, 1))
        log_p_z = log_p_z.expand(n, log_p_z.size(0))
        log_p_z_x = self.switchNet(x)

        if self.z is not None:
            samplez = self.z
        else:
            samplez = self.sampleZ(x)
        
        # for y_entropy_loss            
        # p_y_z = to_var(torch.ones((2, self.switch_size)))
        # _, zs = torch.max(samplez, 1)
        # for z in range(self.switch_size):
        #     y_given_z = y[zs==z]
        #     for i, label in enumerate([-1, 1]):
        #         p_y_z[i, z] = (y_given_z == label).sum().float().data
        #         if len(y_given_z) > 0:
        #             p_y_z[i, z] /= len(y_given_z)
        p_y_z = torch.ones((2, self.switch_size))        
        zs = to_np(self.sampleZ(x)).argmax(1)
        for z in range(self.switch_size):
            y_given_z = to_np(y)[zs == z]
            for i, label in enumerate([-1, 1]):
                p_y_z[i, z] = float((y_given_z == label).sum())
                if y_given_z.shape[0] > 0:
                    p_y_z[i, z] /= y_given_z.shape[0]

        switch_cost = 0
        weight_cost = 0

        if sample:
            raise NotImplementedError
            for i in range(n_samples):
                z = samplez

                # switch net: E_z|x (L(x, y, z)
                # - a * log p(z) - a
                # + b * sum_y p(y|z) log p(y|z))
                # * d log p(z|x) / d theta
                data_loss = self.L(x, y, z)
                z_entropy_loss = - (log_p_z*z).sum(1) - 1

                # assume binary problem
                y_entropy_loss = 0
                for y_query in [0, 1]:
                    pyz = p_y_z[y_query].expand(n, self.switch_size)
                    pyz = (to_var(pyz) * z).sum(1)
                    y_entropy_loss += pyz * torch.log(torch.clamp(pyz, 1e-10, 1))
                # y_entropy_loss = 0
                # for y_query in [0, 1]:
                #     pyz = p_y_z[y_query].expand(n, self.switch_size)
                #     pyz = (pyz * z).sum(1)
                #     y_entropy_loss += pyz * torch.log(torch.clamp(pyz, 1e-10, 1))
                    
                c =  var2constvar(data_loss) + \
                     self.alpha * z_entropy_loss + \
                     self.beta * y_entropy_loss
                derivative = (log_p_z_x * z).sum(1)
                switch_cost += c * derivative

                # weight net: E_z|x d L(x, y, z) / d theta
                weight_cost += data_loss

            switch_cost /= n_samples
            switch_cost.mean().backward()

            weight_cost /= n_samples
            weight_cost.mean().backward()
        else:
            _z_entropy_loss = 0
            _y_entropy_loss = 0
            
            p_z_x = to_var(torch.exp(log_p_z_x).data)
            for i in range(self.switch_size):
                z = np.zeros(self.switch_size)
                z[i] = 1
                z = to_var(torch.from_numpy(z).float()).expand(n, self.switch_size)

                # switch net: E_z|x (L(x, y, z)
                # - a * log p(z) - a
                # + b * sum_y p(y|z) log p(y|z))
                # * d log p(z|x) / d theta
                data_loss = self.L(x, y, z)
                z_entropy_loss = - (log_p_z*z).sum(1) - 1
                # assume binary problem
                y_entropy_loss = 0
                for y_query in [0, 1]:
                    pyz = p_y_z[y_query].expand(n, self.switch_size)
                    pyz = (to_var(pyz) * z).sum(1)
                    y_entropy_loss -= pyz * torch.log(torch.clamp(pyz, 1e-10, 1))
                # y_entropy_loss = 0
                # for y_query in [0, 1]:
                #     pyz = p_y_z[y_query].expand(n, self.switch_size)
                #     pyz = (pyz * z).sum(1)
                #     y_entropy_loss += pyz * torch.log(torch.clamp(pyz, 1e-10, 1))
                    

                c =  var2constvar(data_loss) + \
                     self.alpha * z_entropy_loss - \
                     self.beta * y_entropy_loss
                derivative = (log_p_z_x * z).sum(1)
                switch_cost += p_z_x[:, i] * c * derivative

                # weight net: E_z|x d L(x, y, z) / d theta
                weight_cost += p_z_x[:, i] * data_loss

                # collect statistics: +1 for transform derivative back to entropy
                _z_entropy_loss += p_z_x[:, i] * (z_entropy_loss + 1)
                _y_entropy_loss += p_z_x[:, i] * y_entropy_loss 

            if self.count % self.switch_update_every == 0:
                switch_cost.mean().backward()
            if self.count % self.weight_update_every == 0:
                weight_cost.mean().backward()

            if self.print_every != 0 and self.count % self.print_every == 0:
                hz = _z_entropy_loss.mean().data.item()
                hyz = _y_entropy_loss.mean().data.item()
                
                self.writer.add_scalar('loss/z_entropy',
                                       hz,
                                       self.count)
                self.writer.add_scalar('loss/y_given_z_entropy',
                                       hyz,
                                       self.count)
                self.writer.add_scalar('loss/y_z_entropy',
                                       hz + hyz,
                                       self.count)
Exemplo n.º 19
0
    def fit(self, data, batch_size=100, n_epochs=10,
            valdata=None, val_theta=None):
        '''
        fit a model to x, y data by batch
        val_theta: for recovering heterogeneous subpopulation
        '''
        savedir = os.path.dirname('nonlinear_models/%s' % self.name)
        os.system('mkdir -p %s' % savedir)
        self.writer = SummaryWriter(log_dir=self.log_dir)        
        
        time_start = time.time()
        losses = []
        vallosses = [1000]
        best_valloss, best_valindex = 1000, 0 # for early stopping
        n = len(data.dataset)
        cost = 0
        self.count = 0
        
        for epoch in range(n_epochs):

            for k, (x_batch, y_batch) in enumerate(data):

                x_batch, y_batch = to_var(x_batch).float(), to_var(y_batch).float()
                y_hat, regret = self.step(x_batch, y_batch)
                m = x_batch.size(0)                
                cost += 1 / (k+1) * (regret - cost)

                if self.print_every != 0 and self.count % self.print_every == 0:

                    losses.append(cost)
                    
                    # progress, time, avg loss, auc
                    duration = timeSince(time_start)
                    if int(duration.split('m')[0]) >= self.max_time:
                        return losses
                    
                    to_print = ('%.2f%% (%s) %.4f' % ((epoch * n + (k+1) * m) /
                                                      (n_epochs * n) * 100,
                                                      duration,
                                                      cost))
                    
                    print(to_print)

                    if valdata is not None:
                        _mse = reportMSE(self,valdata,is_autoencoder=True)
                        valloss = _mse
                        vallosses.append(valloss)
                        if valloss <= best_valloss:
                            best_valloss = valloss
                            best_valindex = len(vallosses) - 1

                            torch.save(self.autoencoder,
                                       'nonlinear_models/%s.pt' % self.name)
                            np.save('nonlinear_models/%s.loss' % self.name, losses)
                            
                        if len(vallosses) - best_valindex > self.n_early_stopping:
                            print('early stop at iteration', self.count)
                            return losses                            

                        self.writer.add_scalar('data/val_mse', _mse, self.count)

                        
                    self.writer.add_scalar('model/grad_norm', gradNorm(self.autoencoder),
                                           self.count)
                    # self.writer.add_scalar('data/train_loss', cost, self.count)
                    
                    cost = 0
                    
                self.count += 1

        return losses