def test_submodular(net, epoch, sample_instance, dataset, device='cpu', evaluate=True): net.eval() # loss_fn = torch.nn.BCELoss() loss_fn = torch.nn.MSELoss() test_losses, test_objs = [], [] n, m, d, f, budget = sample_instance.n, sample_instance.m, torch.Tensor( sample_instance.d), torch.Tensor( sample_instance.f), sample_instance.budget A, b, G, h = createConstraintMatrix(m, n, budget) with tqdm.tqdm(dataset) as tqdm_loader: for batch_idx, (features, labels) in enumerate(tqdm_loader): features, labels = features.to(device), labels.to(device) if epoch >= 0: outputs = net(features) else: outputs = labels # two-stage loss loss = loss_fn(outputs, labels) # decision-focused loss objective_value_list = [] batch_size = len(labels) for (label, output) in zip(labels, outputs): if evaluate: optimize_result = getOptimalDecision(n, m, output, d, f, budget=budget) optimal_x = torch.Tensor(optimize_result.x) obj = getObjective(optimal_x, n, m, label, d, f) else: obj = torch.Tensor([0]) objective_value_list.append(obj) objective = sum(objective_value_list) / batch_size test_losses.append(loss.item()) test_objs.append(objective.item()) average_loss = np.mean(test_losses) average_obj = np.mean(test_objs) tqdm_loader.set_postfix(loss=f'{average_loss:.3f}', obj=f'{average_obj:.3f}') average_loss = np.mean(test_losses) average_obj = np.mean(test_objs) return average_loss, average_obj
optimizer = torch.optim.SGD(net.parameters(), lr=lr) # surrogate setup if training_method == 'surrogate': # A, b, G, h = LPCreateSurrogateConstraintMatrix(m, n) variable_size = n T_size = 8 # init_T = normalize_matrix(torch.rand(variable_size, T_size)) init_T = normalize_matrix_positive(torch.rand(variable_size, T_size)) T = torch.tensor(init_T, requires_grad=True) T_lr = lr T_optimizer = torch.optim.Adam([T], lr=T_lr) optimize_result = getOptimalDecision(n, m, torch.Tensor(sample_instance.c), sample_instance.d, sample_instance.f, budget=budget) optimal_x = torch.Tensor(optimize_result.x) xx = torch.autograd.Variable(optimal_x, requires_grad=True) d, f = sample_instance.d, sample_instance.f c = torch.Tensor( sample_instance.c ) # torch.autograd.Variable(torch.Tensor(sample_instance.c), requires_grad=True) obj = getObjective(xx, n, m, c, d, f) jac_torch = torch.autograd.grad(obj, xx) jac_manual = getManualDerivative(xx.detach(), n, m, c, d, f) print('torch grad:', jac_torch) print('hand grad:', jac_manual) hessian = getHessian(optimal_x, n, m, torch.Tensor(c), d, f)
def train_submodular(net, optimizer, epoch, sample_instance, dataset, lr=0.1, training_method='two-stage', device='cpu', evaluate=True): net.train() # loss_fn = torch.nn.BCELoss() loss_fn = torch.nn.MSELoss() train_losses, train_objs = [], [] n, m, d, f, budget = sample_instance.n, sample_instance.m, torch.Tensor( sample_instance.d), torch.Tensor( sample_instance.f), sample_instance.budget A, b, G, h = createConstraintMatrix(m, n, budget) forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0 REG = 0.0 with tqdm.tqdm(dataset) as tqdm_loader: for batch_idx, (features, labels) in enumerate(tqdm_loader): net_start_time = time.time() features, labels = features.to(device), labels.to(device) if epoch >= 0: outputs = net(features) else: outputs = labels # two-stage loss loss = loss_fn(outputs, labels) forward_time += time.time() - net_start_time # decision-focused loss objective_value_list = [] batch_size = len(labels) for (label, output) in zip(labels, outputs): forward_start_time = time.time() if training_method == 'decision-focused': inference_start_time = time.time() min_fun = -np.inf for _ in range(1): tmp_result = getOptimalDecision(n, m, output, d, f, budget=budget, REG=REG) if tmp_result.fun > min_fun: optimize_result = tmp_result min_fun = tmp_result.fun inference_time += time.time() - inference_start_time optimal_x = torch.Tensor(optimize_result.x) if optimize_result.success: qp_start_time = time.time() newA, newb = torch.Tensor(), torch.Tensor() newG = torch.cat((A, G)) newh = torch.cat((b, h)) Q = getHessian(optimal_x, n, m, output, d, f, REG=REG) + torch.eye(n) * 10 L = torch.cholesky(Q) jac = -getDerivative(optimal_x, n, m, output, d, f, create_graph=True, REG=REG) p = jac - Q @ optimal_x qp_solver = qpth.qp.QPFunction() x = qp_solver(Q, p, G, h, A, b)[0] # if True: # # =============== solving QP using CVXPY =============== # x_default = cp.Variable(n) # G_default, h_default = cp.Parameter(newG.shape), cp.Parameter(newh.shape) # L_default = cp.Parameter((n,n)) # p_default = cp.Parameter(n) # constraints = [G_default @ x_default <= h_default] # objective = cp.Minimize(0.5 * cp.sum_squares(L_default @ x_default) + p_default.T @ x_default) # problem = cp.Problem(objective, constraints) # cvxpylayer = CvxpyLayer(problem, parameters=[G_default, h_default, L_default, p_default], variables=[x_default]) # coverage_qp_solution, = cvxpylayer(newG, newh, L, p) # x = coverage_qp_solution # except: # print("CVXPY solver fails... Usually because Q is not PSD") # x = optimal_x else: print('Optimization failed...') x = optimal_x obj = getObjective(x, n, m, label, d, f, REG=0) qp_time += time.time() - qp_start_time elif training_method == 'two-stage': if evaluate: inference_start_time = time.time() optimize_result = getOptimalDecision(n, m, output, d, f, budget=budget, REG=REG) x = torch.Tensor(optimize_result.x) obj = getObjective(x, n, m, label, d, f, REG=0) inference_time += time.time() - inference_start_time qp_time = 0 else: obj = torch.Tensor([0]) qp_time = 0 else: raise ValueError('Not implemented method!') objective_value_list.append(obj) objective = sum(objective_value_list) / batch_size optimizer.zero_grad() backward_start_time = time.time() try: if training_method == 'two-stage': loss.backward() elif training_method == 'decision-focused': # (-objective).backward() (-objective * 0.5 + loss * 0.5).backward() # TODO for parameter in net.parameters(): parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM) else: raise ValueError('Not implemented method') except: print("no grad is backpropagated...") pass optimizer.step() backward_time += time.time() - backward_start_time train_losses.append(loss.item()) train_objs.append(objective.item()) average_loss = np.mean(train_losses) average_obj = np.mean(train_objs) # Print status tqdm_loader.set_postfix(loss=f'{average_loss:.6f}', obj=f'{average_obj:.6f}') average_loss = np.mean(train_losses) average_obj = np.mean(train_objs) return average_loss, average_obj, (forward_time, inference_time, qp_time, backward_time)