def train(self, train_loader): self.model.train() for batch_idx, (data, target) in enumerate(train_loader): if type(data) is dict: data = data['image'] if self.use_cuda: data, target = to_cuda(data), to_cuda(target) data, target = to_var(data), to_var(target) self.optimizer.zero_grad() if self.twoImage: loss = self.train_step2(data, target) else: loss = self.train_step(data, target) self.optimizer.step() if batch_idx % 500 == 0: if type(data) is list: len_data = len(data[0]) else: len_data = len(data) print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( self.epoch, batch_idx * len_data, len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss)) self.epoch += 1
def evaluate_subpopulation(self, val_theta, val_data, theta_constraint=lambda x: True): ''' cosine similarity between val_theta and val_explanation theta_constraint: thata that satisfy the constraint ''' i = 0 sim = 0 num = 0 # number of comparison for x, y in val_data: m = x.size(0) x, y = to_var(x), to_var(y) f = to_np(self.explain(x)) w = val_theta[i:i+m] valid = map(theta_constraint, f) valid = np.nonzero(list(valid)) f = f[valid] w = w[valid] i += m num += f.shape[0] if f.shape[0] == 0: continue f_norm = np.sqrt((f * f).sum(1)) + 1e-10 w_norm = np.sqrt((w * w).sum(1)) + 1e-10 angle = (w * f).sum(1) / f_norm / w_norm sim += angle.sum() return sim / num
def fitData(self, data, batch_size=100, n_epochs=10, print_every=10, valdata=None): ''' fit a model to x, y data by batch print_every is 0 if do not wish to print ''' time_start = time.time() losses = [] best_valloss, best_valindex = np.inf, 0 vallosses = [] n = len(data.dataset) cost = 0 for epoch in range(n_epochs): for k, (x_batch, y_batch) in enumerate(data): x_batch, y_batch = to_var(x_batch), to_var(y_batch) y_hat, regret = self.step(x_batch, y_batch) m = x_batch.size(0) cost += 1 / (k+1) * (regret/m - cost) if print_every != 0 and k % print_every == 0: losses.append(cost) # progress, time, avg loss, auc to_print = ('%.2f%% (%s) %.4f %.4f' % ((epoch * n + (k+1) * m) / (n_epochs * n) * 100, timeSince(time_start), cost, model_auc(self.model, data))) if valdata is not None: valloss = calc_loss(self.model, valdata, self.loss) vallosses.append(valloss) np.save('models/%s.valloss' % self.name, vallosses) to_print += " %.4f" % model_auc(self.model, valdata) if valloss <= best_valloss: best_valloss = valloss best_valindex = len(vallosses) - 1 torch.save(self.model, 'models/%s.pt' % self.name) else: torch.save(self.model, 'models/%s.pt' % self.name) print(to_print) np.save('models/%s.loss' % self.name, losses) cost = 0 return losses, vallosses
def explain(self, x): x = to_var(x.data, volatile=True).float() # form an explanation z = self.sampleZ(x) f = self.weightNet(z) return f
def plotMTL(self): import seaborn as sns import matplotlib.pyplot as plt if not self.mtl: return T = self.switchNet.input_size K = self.switch_size # probability assignment matrix A = np.zeros((T, K)) for i in range(T): t = to_var(torch.FloatTensor([i])) A[i] = np.exp(to_np(self.switchNet(t))) # similarity matrix S = A.dot(A.T) np.fill_diagonal(S, 1) sns.heatmap(S, vmin=0, vmax=1) im = ToTensor()(fig2img(plt.gcf())) self.writer.add_image('task_similarity', im, self.count) plt.close() sns.heatmap(A, vmin=0, vmax=1) im = ToTensor()(fig2img(plt.gcf())) self.writer.add_image('task_assignment', im, self.count) plt.close()
def transform(self, x): ''' x is a pytorch Variable tensor, this works for combine x is the output of previous trainer.transform ''' x = to_np(x) clusters = self.clf.predict(x) clusters = onehotize(to_var(torch.from_numpy(clusters)).view(-1, 1), self.k) return clusters
def explain(self): explanations = [] for i in range(self.switch_size): x = np.zeros(self.switch_size) x[i] = 1 x = to_var(torch.from_numpy(x)).float() explanations.append(list(to_np(self.forward(x)))) # print(explanations) return explanations
def fit(self, data, **kwargs): x, y = data.dataset[:] # x is the original input, not necessarily kmeans input x = to_var(x) x = self.transform_function(x) x = to_np(x) self.clf.fit(x) savedir = os.path.dirname('nonlinear_models/%s' % self.name) os.system('mkdir -p %s' % savedir) joblib.dump(self.clf, 'nonlinear_models/%s.pkl' % self.name)
def forward(self, x): if self.mtl: # the last one is task number if len(x.size()) == 1: t = np.zeros(self.input_size) t[int(to_np(x[-1])[0])] = 1 x = to_var(torch.from_numpy(t).float()).view(1, -1) else: x = x[:, -1:] x = onehotize(x, self.input_size) o = self.i2o(x) return self.logsoftmax(o)
def sampleZ(self, x): n = x.size(0) # minibatch size # determine which line to use probs = torch.exp(self.switchNet(x)) m = Categorical(probs) one_hot = to_var(m.probs.data.new(m.probs.size()).zero_()) indices = m.sample() if indices.dim() < one_hot.dim(): indices = indices.unsqueeze(-1) z = one_hot.scatter_(-1, indices, 1) self.z = z return z
def test(self, test_loader): self.model.eval() test_loss = 0 correct = 0 for data, target in test_loader: if type(data) is dict: data = data['image'] if self.use_cuda: data, target = to_cuda(data), to_cuda(target) data, target = to_var(data, volatile=True), to_var(target) output = self.model(data) # sum up batch loss test_loss += F.nll_loss(output, target, size_average=False).item() # get the index of the max log-probability pred = output.data.max(1, keepdim=True)[1] correct += pred.eq(target.data.view_as(pred)).cpu().sum() test_loss /= len(test_loader.dataset) print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n' .format(test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) return 100. * correct / len(test_loader.dataset)
def open_box(model, x): # forward pass to determine configuration # assume x is flat with no batch dimension assert len(x.shape) == 1, "assume no batch dimension in input" d = x.shape[0] C = [] # get W and b W = to_var(torch.eye(d)) b = to_var(torch.zeros(d)) z = x for i, c in enumerate(model.classifier): if type(c) == torch.nn.modules.linear.Linear: W = torch.mm(c.weight, W) b = c.bias + torch.mv(c.weight, b) elif type(c) == torch.nn.modules.ReLU: C.extend(list((z > 0).int().data.numpy())) # configuration r = (z > 0).float() # the slope t = torch.zeros_like(z) # the bias W = torch.mm(torch.diag(r), W) b = t + torch.mv(torch.diag(r), b) elif type(c) == torch.nn.modules.LeakyReLU: C.extend(list((z > 0).int().data.numpy())) # configuration r = (z > 0).float() # the slope r[r == 0] = c.negative_slope t = torch.zeros_like(z) # the bias W = torch.mm(torch.diag(r), W) b = t + torch.mv(torch.diag(r), b) else: raise Exception('unknown layer') z = c(z) # forward pass C = ''.join(map(str, C)) return W, b, C
return accuracy_score(y, yhat) n, d = 1000, 2 def gendata(): x = np.random.randn(n, d) y = (x.sum(1) > 0).astype(np.int) return x, y xtr, ytr = gendata() xte, yte = gendata() r = to_var(torch.FloatTensor([0, 1])) train_data = TensorDataset(*map(lambda x: x.data, prepareData(xtr, ytr))) data = DataLoader(train_data, batch_size=100, shuffle=True) n_output = 2 # binary classification task model = LR(d, n_output) learning_rate = 0.01 alpha = 0.08 # regularization strength reg_parameters = model.i2o.weight t = Trainer(model, lr=learning_rate, risk_factors=r, alpha=alpha, regularization=eye_loss, reg_parameters=reg_parameters)
def initHidden(self): return to_var(torch.zeros(1, self.hidden_size))
def fit(self, data, batch_size=100, n_epochs=10, valdata=None, val_theta=None, use_auc=False): ''' fit a model to x, y data by batch val_theta: for recovering heterogeneous subpopulation ''' savedir = os.path.dirname('nonlinear_models/%s' % self.name) os.system('mkdir -p %s' % savedir) self.writer = SummaryWriter(log_dir=self.log_dir) time_start = time.time() losses = [] vallosses = [1000] best_valloss, best_valindex = 1000, 0 # for early stopping n = len(data.dataset) cost = 0 self.count = 0 for epoch in range(n_epochs): for k, (x_batch, y_batch) in enumerate(data): x_batch, y_batch = to_var(x_batch).float(), to_var(y_batch).float() y_hat, regret = self.step(x_batch, y_batch) m = x_batch.size(0) cost += 1 / (k+1) * (regret - cost) if self.print_every != 0 and self.count % self.print_every == 0: losses.append(cost) # progress, time, avg loss, auc duration = timeSince(time_start) if int(duration.split('m')[0]) >= self.max_time: return losses to_print = ('%.2f%% (%s) %.4f' % ((epoch * n + (k+1) * m) / (n_epochs * n) * 100, duration, cost)) print(to_print) # if self.draw_plot: # self.plotMTL() # self.plot(x_batch, y_batch, silence=self.silence, inrange=True) if valdata is not None: if use_auc: acc = reportAuc(self, valdata) else: acc = reportAcc(self,valdata) valloss = -acc vallosses.append(valloss) if valloss <= best_valloss: best_valloss = valloss best_valindex = len(vallosses) - 1 torch.save(self.weightNet, 'nonlinear_models/%s.pt' % self.name) np.save('nonlinear_models/%s.loss' % self.name, losses) if len(vallosses) - best_valindex > self.n_early_stopping: print('early stop at iteration', self.count) return losses if use_auc: # note acc here is auc self.writer.add_scalar('data/val_auc', acc, self.count) else: self.writer.add_scalar('data/val_acc', acc, self.count) if val_theta is not None: sim = self.evaluate_subpopulation(val_theta, valdata) self.writer.add_scalar('data/subpopulation_cosine', sim, self.count) self.writer.add_scalar('weight/grad_norm', gradNorm(self.weightNet), self.count) self.writer.add_scalar('data/train_loss', cost, self.count) # for tag, value in self.weightNet.named_parameters(): # tag = tag.replace('.', '/') # self.writer.add_histogram(tag, to_np(value), self.count) # if value.grad is not None: # self.writer.add_histogram(tag+'/grad', to_np(value.grad), # self.count) cost = 0 self.count += 1 # if self.draw_plot: # self.plot(x_batch, y_batch, inrange=True, silence=self.silence) return losses
def explain(self, x): x = to_var(x.data).float() z = self.transform_function(x) # this is for combined trainer f = self.weightNet(z) return f
def prepareData(x, y): ''' convert x, y from numpy to tensor ''' return to_var(torch.from_numpy(x).float()), to_var(torch.from_numpy(y).long())
def backward(self, x, y, sample=False, n_samples=30): n = x.size(0) log_p_z = torch.log(torch.clamp(self.p_z(x, const=True), 1e-10, 1)) log_p_z = log_p_z.expand(n, log_p_z.size(0)) log_p_z_x = self.switchNet(x) if self.z is not None: samplez = self.z else: samplez = self.sampleZ(x) # for y_entropy_loss # p_y_z = to_var(torch.ones((2, self.switch_size))) # _, zs = torch.max(samplez, 1) # for z in range(self.switch_size): # y_given_z = y[zs==z] # for i, label in enumerate([-1, 1]): # p_y_z[i, z] = (y_given_z == label).sum().float().data # if len(y_given_z) > 0: # p_y_z[i, z] /= len(y_given_z) p_y_z = torch.ones((2, self.switch_size)) zs = to_np(self.sampleZ(x)).argmax(1) for z in range(self.switch_size): y_given_z = to_np(y)[zs == z] for i, label in enumerate([-1, 1]): p_y_z[i, z] = float((y_given_z == label).sum()) if y_given_z.shape[0] > 0: p_y_z[i, z] /= y_given_z.shape[0] switch_cost = 0 weight_cost = 0 if sample: raise NotImplementedError for i in range(n_samples): z = samplez # switch net: E_z|x (L(x, y, z) # - a * log p(z) - a # + b * sum_y p(y|z) log p(y|z)) # * d log p(z|x) / d theta data_loss = self.L(x, y, z) z_entropy_loss = - (log_p_z*z).sum(1) - 1 # assume binary problem y_entropy_loss = 0 for y_query in [0, 1]: pyz = p_y_z[y_query].expand(n, self.switch_size) pyz = (to_var(pyz) * z).sum(1) y_entropy_loss += pyz * torch.log(torch.clamp(pyz, 1e-10, 1)) # y_entropy_loss = 0 # for y_query in [0, 1]: # pyz = p_y_z[y_query].expand(n, self.switch_size) # pyz = (pyz * z).sum(1) # y_entropy_loss += pyz * torch.log(torch.clamp(pyz, 1e-10, 1)) c = var2constvar(data_loss) + \ self.alpha * z_entropy_loss + \ self.beta * y_entropy_loss derivative = (log_p_z_x * z).sum(1) switch_cost += c * derivative # weight net: E_z|x d L(x, y, z) / d theta weight_cost += data_loss switch_cost /= n_samples switch_cost.mean().backward() weight_cost /= n_samples weight_cost.mean().backward() else: _z_entropy_loss = 0 _y_entropy_loss = 0 p_z_x = to_var(torch.exp(log_p_z_x).data) for i in range(self.switch_size): z = np.zeros(self.switch_size) z[i] = 1 z = to_var(torch.from_numpy(z).float()).expand(n, self.switch_size) # switch net: E_z|x (L(x, y, z) # - a * log p(z) - a # + b * sum_y p(y|z) log p(y|z)) # * d log p(z|x) / d theta data_loss = self.L(x, y, z) z_entropy_loss = - (log_p_z*z).sum(1) - 1 # assume binary problem y_entropy_loss = 0 for y_query in [0, 1]: pyz = p_y_z[y_query].expand(n, self.switch_size) pyz = (to_var(pyz) * z).sum(1) y_entropy_loss -= pyz * torch.log(torch.clamp(pyz, 1e-10, 1)) # y_entropy_loss = 0 # for y_query in [0, 1]: # pyz = p_y_z[y_query].expand(n, self.switch_size) # pyz = (pyz * z).sum(1) # y_entropy_loss += pyz * torch.log(torch.clamp(pyz, 1e-10, 1)) c = var2constvar(data_loss) + \ self.alpha * z_entropy_loss - \ self.beta * y_entropy_loss derivative = (log_p_z_x * z).sum(1) switch_cost += p_z_x[:, i] * c * derivative # weight net: E_z|x d L(x, y, z) / d theta weight_cost += p_z_x[:, i] * data_loss # collect statistics: +1 for transform derivative back to entropy _z_entropy_loss += p_z_x[:, i] * (z_entropy_loss + 1) _y_entropy_loss += p_z_x[:, i] * y_entropy_loss if self.count % self.switch_update_every == 0: switch_cost.mean().backward() if self.count % self.weight_update_every == 0: weight_cost.mean().backward() if self.print_every != 0 and self.count % self.print_every == 0: hz = _z_entropy_loss.mean().data.item() hyz = _y_entropy_loss.mean().data.item() self.writer.add_scalar('loss/z_entropy', hz, self.count) self.writer.add_scalar('loss/y_given_z_entropy', hyz, self.count) self.writer.add_scalar('loss/y_z_entropy', hz + hyz, self.count)
def fit(self, data, batch_size=100, n_epochs=10, valdata=None, val_theta=None): ''' fit a model to x, y data by batch val_theta: for recovering heterogeneous subpopulation ''' savedir = os.path.dirname('nonlinear_models/%s' % self.name) os.system('mkdir -p %s' % savedir) self.writer = SummaryWriter(log_dir=self.log_dir) time_start = time.time() losses = [] vallosses = [1000] best_valloss, best_valindex = 1000, 0 # for early stopping n = len(data.dataset) cost = 0 self.count = 0 for epoch in range(n_epochs): for k, (x_batch, y_batch) in enumerate(data): x_batch, y_batch = to_var(x_batch).float(), to_var(y_batch).float() y_hat, regret = self.step(x_batch, y_batch) m = x_batch.size(0) cost += 1 / (k+1) * (regret - cost) if self.print_every != 0 and self.count % self.print_every == 0: losses.append(cost) # progress, time, avg loss, auc duration = timeSince(time_start) if int(duration.split('m')[0]) >= self.max_time: return losses to_print = ('%.2f%% (%s) %.4f' % ((epoch * n + (k+1) * m) / (n_epochs * n) * 100, duration, cost)) print(to_print) if valdata is not None: _mse = reportMSE(self,valdata,is_autoencoder=True) valloss = _mse vallosses.append(valloss) if valloss <= best_valloss: best_valloss = valloss best_valindex = len(vallosses) - 1 torch.save(self.autoencoder, 'nonlinear_models/%s.pt' % self.name) np.save('nonlinear_models/%s.loss' % self.name, losses) if len(vallosses) - best_valindex > self.n_early_stopping: print('early stop at iteration', self.count) return losses self.writer.add_scalar('data/val_mse', _mse, self.count) self.writer.add_scalar('model/grad_norm', gradNorm(self.autoencoder), self.count) # self.writer.add_scalar('data/train_loss', cost, self.count) cost = 0 self.count += 1 return losses