示例#1
0
def test_submodular(net,
                    epoch,
                    sample_instance,
                    dataset,
                    device='cpu',
                    evaluate=True):
    net.eval()
    # loss_fn = torch.nn.BCELoss()
    loss_fn = torch.nn.MSELoss()
    test_losses, test_objs = [], []
    n, m, d, f, budget = sample_instance.n, sample_instance.m, torch.Tensor(
        sample_instance.d), torch.Tensor(
            sample_instance.f), sample_instance.budget
    A, b, G, h = createConstraintMatrix(m, n, budget)

    with tqdm.tqdm(dataset) as tqdm_loader:
        for batch_idx, (features, labels) in enumerate(tqdm_loader):
            features, labels = features.to(device), labels.to(device)
            if epoch >= 0:
                outputs = net(features)
            else:
                outputs = labels

            # two-stage loss
            loss = loss_fn(outputs, labels)

            # decision-focused loss
            objective_value_list = []
            batch_size = len(labels)
            for (label, output) in zip(labels, outputs):
                if evaluate:
                    optimize_result = getOptimalDecision(n,
                                                         m,
                                                         output,
                                                         d,
                                                         f,
                                                         budget=budget)
                    optimal_x = torch.Tensor(optimize_result.x)
                    obj = getObjective(optimal_x, n, m, label, d, f)
                else:
                    obj = torch.Tensor([0])
                objective_value_list.append(obj)
            objective = sum(objective_value_list) / batch_size

            test_losses.append(loss.item())
            test_objs.append(objective.item())

            average_loss = np.mean(test_losses)
            average_obj = np.mean(test_objs)

            tqdm_loader.set_postfix(loss=f'{average_loss:.3f}',
                                    obj=f'{average_obj:.3f}')

    average_loss = np.mean(test_losses)
    average_obj = np.mean(test_objs)
    return average_loss, average_obj
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)

    # surrogate setup
    if training_method == 'surrogate':
        # A, b, G, h = LPCreateSurrogateConstraintMatrix(m, n)
        variable_size = n
        T_size = 8
        # init_T = normalize_matrix(torch.rand(variable_size, T_size))
        init_T = normalize_matrix_positive(torch.rand(variable_size, T_size))
        T = torch.tensor(init_T, requires_grad=True)
        T_lr = lr
        T_optimizer = torch.optim.Adam([T], lr=T_lr)

    optimize_result = getOptimalDecision(n,
                                         m,
                                         torch.Tensor(sample_instance.c),
                                         sample_instance.d,
                                         sample_instance.f,
                                         budget=budget)
    optimal_x = torch.Tensor(optimize_result.x)

    xx = torch.autograd.Variable(optimal_x, requires_grad=True)
    d, f = sample_instance.d, sample_instance.f
    c = torch.Tensor(
        sample_instance.c
    )  # torch.autograd.Variable(torch.Tensor(sample_instance.c), requires_grad=True)
    obj = getObjective(xx, n, m, c, d, f)
    jac_torch = torch.autograd.grad(obj, xx)
    jac_manual = getManualDerivative(xx.detach(), n, m, c, d, f)
    print('torch grad:', jac_torch)
    print('hand grad:', jac_manual)
    hessian = getHessian(optimal_x, n, m, torch.Tensor(c), d, f)
示例#3
0
def train_submodular(net,
                     optimizer,
                     epoch,
                     sample_instance,
                     dataset,
                     lr=0.1,
                     training_method='two-stage',
                     device='cpu',
                     evaluate=True):
    net.train()
    # loss_fn = torch.nn.BCELoss()
    loss_fn = torch.nn.MSELoss()
    train_losses, train_objs = [], []
    n, m, d, f, budget = sample_instance.n, sample_instance.m, torch.Tensor(
        sample_instance.d), torch.Tensor(
            sample_instance.f), sample_instance.budget
    A, b, G, h = createConstraintMatrix(m, n, budget)
    forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0
    REG = 0.0

    with tqdm.tqdm(dataset) as tqdm_loader:
        for batch_idx, (features, labels) in enumerate(tqdm_loader):
            net_start_time = time.time()
            features, labels = features.to(device), labels.to(device)
            if epoch >= 0:
                outputs = net(features)
            else:
                outputs = labels
            # two-stage loss
            loss = loss_fn(outputs, labels)
            forward_time += time.time() - net_start_time

            # decision-focused loss
            objective_value_list = []
            batch_size = len(labels)
            for (label, output) in zip(labels, outputs):
                forward_start_time = time.time()
                if training_method == 'decision-focused':
                    inference_start_time = time.time()
                    min_fun = -np.inf
                    for _ in range(1):
                        tmp_result = getOptimalDecision(n,
                                                        m,
                                                        output,
                                                        d,
                                                        f,
                                                        budget=budget,
                                                        REG=REG)
                        if tmp_result.fun > min_fun:
                            optimize_result = tmp_result
                            min_fun = tmp_result.fun
                    inference_time += time.time() - inference_start_time
                    optimal_x = torch.Tensor(optimize_result.x)
                    if optimize_result.success:
                        qp_start_time = time.time()
                        newA, newb = torch.Tensor(), torch.Tensor()
                        newG = torch.cat((A, G))
                        newh = torch.cat((b, h))

                        Q = getHessian(optimal_x, n, m, output, d, f,
                                       REG=REG) + torch.eye(n) * 10
                        L = torch.cholesky(Q)
                        jac = -getDerivative(optimal_x,
                                             n,
                                             m,
                                             output,
                                             d,
                                             f,
                                             create_graph=True,
                                             REG=REG)
                        p = jac - Q @ optimal_x
                        qp_solver = qpth.qp.QPFunction()
                        x = qp_solver(Q, p, G, h, A, b)[0]

                        # if True:
                        #     # =============== solving QP using CVXPY ===============
                        #     x_default = cp.Variable(n)
                        #     G_default, h_default = cp.Parameter(newG.shape), cp.Parameter(newh.shape)
                        #     L_default = cp.Parameter((n,n))
                        #     p_default = cp.Parameter(n)
                        #     constraints = [G_default @ x_default <= h_default]
                        #     objective = cp.Minimize(0.5 * cp.sum_squares(L_default @ x_default) + p_default.T @ x_default)
                        #     problem = cp.Problem(objective, constraints)

                        #     cvxpylayer = CvxpyLayer(problem, parameters=[G_default, h_default, L_default, p_default], variables=[x_default])
                        #     coverage_qp_solution, = cvxpylayer(newG, newh, L, p)
                        #     x = coverage_qp_solution

                        # except:
                        #     print("CVXPY solver fails... Usually because Q is not PSD")
                        #     x = optimal_x

                    else:
                        print('Optimization failed...')
                        x = optimal_x
                    obj = getObjective(x, n, m, label, d, f, REG=0)
                    qp_time += time.time() - qp_start_time

                elif training_method == 'two-stage':
                    if evaluate:
                        inference_start_time = time.time()
                        optimize_result = getOptimalDecision(n,
                                                             m,
                                                             output,
                                                             d,
                                                             f,
                                                             budget=budget,
                                                             REG=REG)
                        x = torch.Tensor(optimize_result.x)
                        obj = getObjective(x, n, m, label, d, f, REG=0)
                        inference_time += time.time() - inference_start_time
                        qp_time = 0
                    else:
                        obj = torch.Tensor([0])
                        qp_time = 0
                else:
                    raise ValueError('Not implemented method!')

                objective_value_list.append(obj)
            objective = sum(objective_value_list) / batch_size

            optimizer.zero_grad()
            backward_start_time = time.time()
            try:
                if training_method == 'two-stage':
                    loss.backward()
                elif training_method == 'decision-focused':
                    # (-objective).backward()
                    (-objective * 0.5 + loss * 0.5).backward()  # TODO
                    for parameter in net.parameters():
                        parameter.grad = torch.clamp(parameter.grad,
                                                     min=-MAX_NORM,
                                                     max=MAX_NORM)
                else:
                    raise ValueError('Not implemented method')
            except:
                print("no grad is backpropagated...")
                pass
            optimizer.step()
            backward_time += time.time() - backward_start_time

            train_losses.append(loss.item())
            train_objs.append(objective.item())

            average_loss = np.mean(train_losses)
            average_obj = np.mean(train_objs)
            # Print status
            tqdm_loader.set_postfix(loss=f'{average_loss:.6f}',
                                    obj=f'{average_obj:.6f}')

    average_loss = np.mean(train_losses)
    average_obj = np.mean(train_objs)
    return average_loss, average_obj, (forward_time, inference_time, qp_time,
                                       backward_time)