def __init__(self, D_in, D_out, a_bound):
        super(OptLayer, self).__init__()
        self.W = torch.nn.Parameter(1e-3 * torch.randn(D_out, D_in))
        self.b = torch.nn.Parameter(1e-3 * torch.randn(D_out))

        u = torch.as_tensor(a_bound)

        y = cp.Variable(D_out)
        Wtilde = cp.Variable((D_out, D_in))
        W = cp.Parameter((D_out, D_in))
        b = cp.Parameter(D_out)
        x = cp.Parameter(D_in)
        obj = cp.Minimize(cp.sum_squares(Wtilde @ x - b - y))
        cons = [cp.sum(y) == env.nbikes, 0 <= y, y <= u, Wtilde == W]
        prob = cp.Problem(obj, cons)
        self.layer = CvxpyLayer(prob, [W, b, x], [y])
Exemplo n.º 2
0
    def projection_eu(self, o, d):
        r = self.r
        k = self.od_serial[o, d]
        if len(self.f[int(k)]) == 1:
            return self.f[int(k)]

        flow = cp.Variable(len(self.f[int(k)]))
        b = cp.Parameter(len(self.f[int(k)]))
        constraints = [flow >= 0, sum(flow) == self.q[k]]
        objective = cp.Minimize(cp.pnorm(flow - b, p=2))
        problem = cp.Problem(objective, constraints)
        assert problem.is_dpp()

        b_tch = self.f[int(k)] - r * self.cost[int(k)]
        cvxpylayer = CvxpyLayer(problem, parameters=[b], variables=[flow])
        solution, = cvxpylayer(b_tch)
        solution = solution.clamp(0)
        return solution
Exemplo n.º 3
0
    def __init__(self, n_assets, temperature=1, max_weight=1):
        super().__init__()

        if n_assets * max_weight < 1:
            raise ValueError('One cannot create fully invested portfolio with the given max_weight')

        self.n_assets = n_assets
        self.temperature = temperature

        # Construct convex optimization problem
        x = cp.Parameter(n_assets)
        w = cp.Variable(n_assets)
        obj = cp.sum_squares(x - w)
        cons = [cp.sum(w) == 1,
                0. <= w,
                w <= max_weight]
        prob = cp.Problem(cp.Minimize(obj), cons)

        self.layer = CvxpyLayer(prob, parameters=[x], variables=[w])
Exemplo n.º 4
0
    def __init__(self, n_assets, max_weight=1):
        """Construct."""
        super().__init__()
        covmat_sqrt = cp.Parameter((n_assets, n_assets))
        b = cp.Parameter(n_assets, nonneg=True)

        w = cp.Variable(n_assets)

        term_1 = 0.5 * cp.sum_squares(covmat_sqrt @ w)
        term_2 = b @ cp.log(w)

        objective = cp.Minimize(term_1 - term_2)  # refer [2]
        constraint = [cp.sum(w) == 1, w >= 0, w <= max_weight]  # refer [2]

        prob = cp.Problem(objective, constraint)

        assert prob.is_dpp()

        self.cvxpylayer = CvxpyLayer(prob, parameters=[covmat_sqrt, b], variables=[w])
Exemplo n.º 5
0
    def test_example(self):
        n, m = 2, 3
        x = cp.Variable(n)
        A = cp.Parameter((m, n))
        b = cp.Parameter(m)
        constraints = [x >= 0]
        objective = cp.Minimize(0.5 * cp.pnorm(A @ x - b, p=1))
        problem = cp.Problem(objective, constraints)
        assert problem.is_dpp()

        cvxpylayer = CvxpyLayer(problem, parameters=[A, b], variables=[x])
        A_tch = torch.randn(m, n, requires_grad=True)
        b_tch = torch.randn(m, requires_grad=True)

        # solve the problem
        solution, = cvxpylayer(A_tch, b_tch)

        # compute the gradient of the sum of the solution with respect to A, b
        solution.sum().backward()
Exemplo n.º 6
0
def ssvr_cvxpy(X, y, hyperparam, idx_train, idx_val):
    Xtrain, Xtest, ytrain, ytest = map(
        torch.from_numpy,
        [X[idx_train, :], X[idx_val], y[idx_train], y[idx_val]])

    n_samples_train, n_features = Xtrain.shape

    # set up variables and parameters
    beta_cp = cp.Variable(n_features)
    xi_cp = cp.Variable(n_samples_train)
    xi_star_cp = cp.Variable(n_samples_train)
    C_cp = cp.Parameter(nonneg=True)
    epsilon_cp = cp.Parameter(nonneg=True)

    # set up objective
    loss = cp.sum_squares(beta_cp) / 2
    reg = C_cp * cp.sum(xi_cp + xi_star_cp)
    objective = loss + reg
    # define constraints
    constraints = [
        ytrain - Xtrain @ beta_cp <= epsilon_cp + xi_cp,
        Xtrain @ beta_cp - ytrain <= epsilon_cp + xi_star_cp, xi_cp >= 0.0,
        xi_star_cp >= 0.0,
        cp.sum(beta_cp) == 1, beta_cp >= 0.0
    ]
    # define problem
    problem = cp.Problem(cp.Minimize(objective), constraints)
    assert problem.is_dpp()

    # solve problem
    layer = CvxpyLayer(problem,
                       parameters=[C_cp, epsilon_cp],
                       variables=[beta_cp])
    hyperparam_th = torch.tensor(hyperparam, requires_grad=True)
    beta_, = layer(hyperparam_th[0], hyperparam_th[1])
    # get test loss and it's gradient
    test_loss = (Xtest @ beta_ - ytest).pow(2).mean()
    test_loss.backward()

    val = test_loss.detach().numpy()
    grad = np.array(hyperparam_th.grad)
    return val, grad
Exemplo n.º 7
0
    def init_QP(self):
        """
        Setting up the matrices Q, G, h for the QP
        """
        if self.QP == "qpth":
            # Using the qpth library for the QP
            self.Q = 2.0*torch.eye(self.size, self.size, device=self.device)
            self.e = torch.Tensor()
            self.e = (self.e).to(device=self.device)

            # Create the finite-difference matrix
            T = self.grid.item()
            D = torch.zeros(self.size-1, self.size, device=self.device)
            for i in range(self.size-1):
                D[i, i] = -1.0/T
                D[i, i+1] = 1.0/T

            self.G = torch.cat([D, -D], dim=0)
            self.h = torch.ones(2*(self.size-1), device=self.device)

        elif self.QP == "cvxpy":
            # Using the cvxpylayers library for the QP
            Q = 2.0*np.eye(self.size)
            p = cp.Parameter(self.size)

            # Create the finite-difference matrix
            D = np.zeros([self.size-1, self.size])
            size = self.grid.item()
            for i in range(self.size-1):
                D[i, i] = -1.0/size
                D[i, i+1] = 1.0/size

            G = np.concatenate((D, -D), axis=0)
            h = np.ones(2*(self.size-1))

            # Create the QP
            x = cp.Variable(self.size)
            objective = cp.Minimize((1/2)*cp.quad_form(x, Q) + p.T @ x)
            constraints = [G @ x <= h]
            problem = cp.Problem(objective, constraints)

            self.qp = CvxpyLayer(problem, parameters=[p], variables=[x])
    def __init__(self, n_assets, max_weight=1):
        """Construct."""
        super().__init__()
        covmat_sqrt = cp.Parameter((n_assets, n_assets))
        rets = cp.Parameter(n_assets)
        alpha = cp.Parameter(nonneg=True)

        w = cp.Variable(n_assets)
        ret = rets @ w
        risk = cp.sum_squares(covmat_sqrt @ w)
        reg = alpha * (cp.norm(w)**2)

        prob = cp.Problem(cp.Maximize(ret - risk - reg),
                          [cp.sum(w) == 1, w >= 0, w <= max_weight])

        assert prob.is_dpp()

        self.cvxpylayer = CvxpyLayer(prob,
                                     parameters=[rets, covmat_sqrt, alpha],
                                     variables=[w])
Exemplo n.º 9
0
 def set_cvx_layer(self, batch_size, device):
     x = cp.Variable((batch_size, 7))
     theta_max = cp.Parameter((batch_size, 7))
     theta_min = cp.Parameter((batch_size, 7))
     theta = cp.Parameter((batch_size, 7))
     constraints = [theta - x <= theta_max, theta + x >= theta_min]
     objective = cp.Minimize(cp.pnorm(x))
     problem = cp.Problem(objective, constraints)
     assert problem.is_dpp()
     self.cvxpylayer = CvxpyLayer(problem,
                                  parameters=[theta_max, theta_min, theta],
                                  variables=[x])
     eps = 1e-10
     self.theta_max_torch = util.deg2rad(
         torch.tensor([180., 140., 140., 140., 48., 48., 48.],
                      requires_grad=True)).to(device) - eps
     self.theta_max_torch = self.theta_max_torch.unsqueeze(0).repeat(
         batch_size, 1) + eps
     self.theta_min_torch = torch.zeros((batch_size, 7),
                                        requires_grad=True).to(device)
Exemplo n.º 10
0
def enet_cvxpy(X, y, lambda_alpha, idx_train, idx_val):
    Xtrain, Xtest, ytrain, ytest = map(
        torch.from_numpy,
        [X[idx_train, :], X[idx_val], y[idx_train], y[idx_val]])

    n_samples_train, n_features = Xtrain.shape

    # set up variables and parameters
    beta_cp = cp.Variable(n_features)
    lambda_cp = cp.Parameter(nonneg=True)
    alpha_cp = cp.Parameter(nonneg=True)

    # set up objective
    loss = ((1 / (2 * n_samples_train)) *
            cp.sum(cp.square(Xtrain @ beta_cp - ytrain)))
    reg = (lambda_cp * cp.norm1(beta_cp) +
           alpha_cp * cp.sum_squares(beta_cp) / 2)
    objective = loss + reg

    # define problem
    problem = cp.Problem(cp.Minimize(objective))
    assert problem.is_dpp()

    # solve problem
    layer = CvxpyLayer(problem, [lambda_cp, alpha_cp], [beta_cp])
    lambda_alpha_th = torch.tensor(lambda_alpha, requires_grad=True)
    beta_, = layer(lambda_alpha_th[0],
                   lambda_alpha_th[1],
                   solver_args={
                       'eps': 1e-6,
                       'max_iters': 2000
                   })

    # get test loss and its gradient
    test_loss = (Xtest @ beta_ - ytest).pow(2).mean()
    test_loss.backward()

    val = test_loss.detach().numpy()
    grad = np.array(lambda_alpha_th.grad)
    return val, grad
    def forward(self, A_vec):
        
        if A_vec.dim() < 2:
            A_vec = A_vec.unsqueeze(dim=0)

        if A_vec.shape[1] == 16:
            A = A_from_16_vec(A_vec)
        else:
            A = convert_Avec_to_A(A_vec)
        
        sdp_solver = CvxpyLayer(self.prob, parameters=[self.A], variables=[self.X])
        X, = sdp_solver(A)
        del(sdp_solver)
        x = x_from_xxT(X)


        if x.dim() < 2:
            x = x.unsqueeze(dim=0)

        r_vec = x[:, :9]
        rotmat = r_vec.view(-1, 3,3).transpose(1,2)
        return rotmat.squeeze()
Exemplo n.º 12
0
    def test_broadcasting(self):
        set_seed(243)
        n_batch, m, n = 2, 100, 20

        A = cp.Parameter((m, n))
        b = cp.Parameter(m)
        x = cp.Variable(n)
        obj = cp.sum_squares(A@x - b) + cp.sum_squares(x)
        prob = cp.Problem(cp.Minimize(obj))
        prob_th = CvxpyLayer(prob, [A, b], [x])

        A_th = torch.randn(m, n).double().requires_grad_()
        b_th = torch.randn(m).double().unsqueeze(0).repeat(n_batch, 1) \
            .requires_grad_()
        b_th_0 = b_th[0]

        x = prob_th(A_th, b_th, solver_args={"eps": 1e-10})[0]

        def lstsq(
            A,
            b): return torch.solve(
            (A.t() @ b).unsqueeze(1),
            A.t() @ A +
            torch.eye(n).double())[0]
        x_lstsq = lstsq(A_th, b_th_0)

        grad_A_cvxpy, grad_b_cvxpy = grad(x.sum(), [A_th, b_th])
        grad_A_lstsq, grad_b_lstsq = grad(x_lstsq.sum(), [A_th, b_th_0])

        self.assertAlmostEqual(
            torch.norm(
                grad_A_cvxpy / n_batch -
                grad_A_lstsq).item(),
            0.0)
        self.assertAlmostEqual(
            torch.norm(
                grad_b_cvxpy[0] -
                grad_b_lstsq).item(),
            0.0)
    def __init__(self,
                 temperature=1,
                 formulation="analytical",
                 n_assets=None,
                 max_weight=1):
        super().__init__()

        self.temperature = temperature

        if formulation not in {"analytical", "variational"}:
            raise ValueError("Unrecognized formulation {}".format(formulation))

        if formulation == "variational" and n_assets is None:
            raise ValueError(
                "One needs to provide n_assets for the variational formulation."
            )

        if formulation == "analytical" and max_weight != 1:
            raise ValueError(
                "Cannot constraint weights via max_weight for analytical formulation"
            )

        if formulation == "variational" and n_assets * max_weight < 1:
            raise ValueError(
                "One cannot create fully invested portfolio with the given max_weight"
            )

        self.formulation = formulation

        if formulation == "analytical":
            self.layer = torch.nn.Softmax(dim=1)
        else:
            x = cp.Parameter(n_assets)
            w = cp.Variable(n_assets)
            obj = -x @ w - cp.sum(cp.entr(w))
            cons = [cp.sum(w) == 1.0, w <= max_weight]
            prob = cp.Problem(cp.Minimize(obj), cons)
            self.layer = CvxpyLayer(prob, [x], [w])
Exemplo n.º 14
0
    def test_least_squares(self):
        set_seed(243)
        m, n = 100, 20

        A = cp.Parameter((m, n))
        b = cp.Parameter(m)
        x = cp.Variable(n)
        obj = cp.sum_squares(A@x - b) + cp.sum_squares(x)
        prob = cp.Problem(cp.Minimize(obj))
        prob_th = CvxpyLayer(prob, [A, b], [x])

        A_th = torch.randn(m, n).double().requires_grad_()
        b_th = torch.randn(m).double().requires_grad_()

        x = prob_th(A_th, b_th, solver_args={"eps": 1e-10})[0]

        def lstsq(
            A,
            b): return torch.solve(
            (A_th.t() @ b_th).unsqueeze(1),
            A_th.t() @ A_th +
            torch.eye(n).double())[0]
        x_lstsq = lstsq(A_th, b_th)

        grad_A_cvxpy, grad_b_cvxpy = grad(x.sum(), [A_th, b_th])
        grad_A_lstsq, grad_b_lstsq = grad(x_lstsq.sum(), [A_th, b_th])

        self.assertAlmostEqual(
            torch.norm(
                grad_A_cvxpy -
                grad_A_lstsq).item(),
            0.0)
        self.assertAlmostEqual(
            torch.norm(
                grad_b_cvxpy -
                grad_b_lstsq).item(),
            0.0)
Exemplo n.º 15
0
    def __init__(self, board_size, g_dim, a_dim, q_penalty=1e-3):
        super(OptNetLayer, self).__init__()

        flat_board_size = board_size**3

        # random normal initializations:
        # self.Q_sqrt = nn.Parameter(q_penalty*torch.randn(flat_board_size, flat_board_size, dtype=torch.double))
        # self.G = nn.Parameter(torch.randn(g_dim, flat_board_size, dtype=torch.double))
        # self.h = nn.Parameter(torch.randn(g_dim, dtype=torch.double))
        # self.A = nn.Parameter(torch.randn(a_dim, flat_board_size, dtype=torch.double))
        # self.b = nn.Parameter(torch.randn(a_dim, dtype=torch.double))

        # these definitions are lifted from the example cited above:
        self.Q_sqrt = nn.Parameter(
            q_penalty * torch.eye(flat_board_size, dtype=torch.double))
        self.G = nn.Parameter(
            -torch.eye(g_dim, flat_board_size, dtype=torch.double))
        self.h = nn.Parameter(torch.zeros(g_dim, dtype=torch.double))
        self.A = nn.Parameter(
            torch.rand((a_dim, flat_board_size), dtype=torch.double))
        self.b = nn.Parameter(torch.ones(a_dim, dtype=torch.double))

        z = cp.Variable(flat_board_size)
        Q_sqrt = cp.Parameter((flat_board_size, flat_board_size))
        G = cp.Parameter((g_dim, flat_board_size))
        h = cp.Parameter(g_dim)
        A = cp.Parameter((a_dim, flat_board_size))
        b = cp.Parameter(a_dim)
        q = cp.Parameter(flat_board_size)

        objective = cp.Minimize(0.5 * cp.sum_squares(Q_sqrt @ z) + q.T @ z)
        constraints = [A @ z == b, G @ z <= h]
        prob = cp.Problem(objective, constraints)
        self.layer = CvxpyLayer(prob,
                                parameters=[Q_sqrt, q, A, b, G, h],
                                variables=[z])
Exemplo n.º 16
0
def test_portfolio(model, covariance_model, epoch, dataset, device='cpu', evaluate=False):
    model.eval()
    covariance_model.eval()
    loss_fn = torch.nn.MSELoss()
    test_losses, test_objs = [], []
    test_opts = []

    forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0

    with tqdm.tqdm(dataset) as tqdm_loader:
        for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader):
            forward_start_time = time.time()
            features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data
            n = len(covariance_mat)
            Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG

            if epoch == -1:
                predictions = labels
                Q = Q_real
            else:
                predictions = model(features.float())[:,0]
                Q = covariance_model() * (1 - REG) + torch.eye(n) * REG 

            loss = loss_fn(predictions, labels)

            if evaluate:
                forward_time += time.time() - forward_start_time
                inference_start_time = time.time()

                p = predictions
                L = sqrtm(Q) # torch.cholesky(Q)
                # =============== solving QP using qpth ================
                if solver == 'qpth':
                    G = -torch.eye(n)
                    h = torch.zeros(n)
                    A = torch.ones(1,n)
                    b = torch.ones(1)
                    qp_solver = qpth.qp.QPFunction()
                    x = qp_solver(alpha * Q, -p, G, h, A, b)[0]
                    # x_opt = qp_solver(alpha * Q_real, -labels, G, h, A, b)[0]
                # =============== solving QP using CVXPY ===============
                elif solver == 'cvxpy':
                    x_var = cp.Variable(n)
                    L_para = cp.Parameter((n,n))
                    p_para = cp.Parameter(n)
                    constraints = [x_var >= 0, x_var <= 1, cp.sum(x_var) == 1]
                    objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ x_var) + p_para.T @ x_var)
                    problem = cp.Problem(objective, constraints)

                    cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para], variables=[x_var])
                    x, = cvxpylayer(L, -p)

                obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x
                # opt = labels @ x_opt - 0.5 * alpha * x_opt.t() @ Q_real @ x_opt
                # print('obj:', obj, 'opt:', opt)

                inference_time += time.time() - inference_start_time
                # ======= opt ===
                # p_opt = labels
                # L_opt = torch.cholesky(Q_real)
                # x_opt, = cvxpylayer(L_opt, p_opt)
                # opt = labels @ x_opt - 0.5 * alpha * x_opt.t() @ Q_real @ x_opt
                # test_opts.append(opt.item())
            else:
                obj = torch.Tensor([0])

            test_losses.append(loss.item())
            test_objs.append(obj.item())
            tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%') 

    # print('opts:', test_opts)
    average_loss    = np.mean(test_losses)
    average_obj     = np.mean(test_objs)
    return average_loss, average_obj # , (forward_time, inference_time, qp_time, backward_time)
def getDefUtility(single_data,
                  T,
                  s,
                  unbiased_probs_pred,
                  path_model,
                  cut_size,
                  omega=4,
                  verbose=False,
                  initial_coverage_prob=None,
                  training_mode=True,
                  training_method='surrogate-decision-focused',
                  block_selection='coverage'):
    G, Fv, coverage_prob, phi_true, path_list, min_cut, log_prob, unbiased_probs_true, previous_gradient = single_data

    n, m, variable_size = G.number_of_nodes(), G.number_of_edges(), T.shape[1]
    budget = G.graph['budget']
    U = torch.Tensor(G.graph['U'])
    initial_distribution = torch.Tensor(G.graph['initial_distribution'])
    options = {"maxiter": 100, "disp": verbose}
    tol = None
    method = "SLSQP"

    edges = G.edges()

    # full forward path, the decision variables are the entire set of variables
    # initial_coverage_prob = np.zeros(variable_size)
    # initial_coverage_prob = np.random.rand(m) # somehow this is very influential...
    initial_coverage_prob = np.ones(
        variable_size)  # somehow this is very influential...
    initial_coverage_prob = initial_coverage_prob / np.sum(
        initial_coverage_prob) * budget

    forward_start_time = time.time()
    pred_optimal_res = surrogate_get_optimal_coverage_prob(
        T.detach(),
        s,
        G,
        unbiased_probs_pred.detach(),
        U,
        initial_distribution,
        budget,
        omega=omega,
        options=options,
        method=method,
        initial_coverage_prob=initial_coverage_prob,
        tol=tol)  # scipy version
    pred_optimal_coverage = torch.Tensor(pred_optimal_res['x'])
    if not pred_optimal_res['success']:
        print('optimization fails...')
        print(pred_optimal_res)
    forward_time = time.time() - forward_start_time

    # ========================== QP part ===========================
    qp_start_time = time.time()
    scale_constant = 1  # cut_size
    A_original, b_original = torch.ones(
        1, cut_size) / scale_constant, torch.Tensor([budget])
    A_matrix, b_matrix = A_original @ T, b_original  # - A_original @ s
    G_original = torch.cat((-torch.eye(cut_size), torch.eye(cut_size)))
    h_original = torch.cat((torch.zeros(cut_size), torch.ones(cut_size)))
    # G_matrix = torch.cat((G_original, A_original)) @ T
    # h_matrix = torch.cat((torch.zeros(cut_size), torch.ones(cut_size), b_original)) # - G_original @ s
    G_matrix = torch.cat((G_original @ T, -torch.eye(variable_size)))
    h_matrix = torch.cat(
        (h_original, torch.zeros(variable_size)))  # - G_original @ s

    if training_mode and pred_optimal_res['success']:
        solver_option = 'default'
        # I seriously don't know wherether to use 'default' or 'gurobi' now...
        # Gurobi performs well when there is no noise but default performs well when there is noise
        # But theoretically they should perform roughly the same...

        # cut_size = 10
        # edge_set = np.array(sorted(np.random.choice(range(m), size=cut_size, replace=False)))
        edge_set = list(range(m))

        hessian_start_time = time.time()
        # Q = torch.eye(len(pred_optimal_coverage))
        Q = numerical_surrogate_obj_hessian_matrix_form(pred_optimal_coverage,
                                                        T.detach(),
                                                        s.detach(),
                                                        G,
                                                        unbiased_probs_pred,
                                                        U,
                                                        initial_distribution,
                                                        omega=omega,
                                                        edge_set=edge_set)
        # Q = surrogate_obj_hessian_matrix_form(pred_optimal_coverage, T.detach(), G, unbiased_probs_pred, U, initial_distribution, omega=omega, edge_set=edge_set)
        # Q = np_surrogate_obj_hessian_matrix_form(pred_optimal_coverage, T.detach(), G, unbiased_probs_pred, U, initial_distribution, omega=omega)
        jac = torch_surrogate_dobj_dx_matrix_form(pred_optimal_coverage,
                                                  T,
                                                  s,
                                                  G,
                                                  unbiased_probs_pred,
                                                  U,
                                                  initial_distribution,
                                                  omega=omega,
                                                  lib=torch,
                                                  edge_set=edge_set)
        # jac = surrogate_dobj_dx_matrix_form(pred_optimal_coverage, T, s, G, unbiased_probs_pred, U, initial_distribution, omega=omega, lib=torch, edge_set=edge_set)
        Q_sym = (Q + Q.t()) / 2
        hessian_time = time.time() - hessian_start_time
        # print('Hessian time:', hessian_time)

        # ------------------ regularization -----------------------
        Q_regularized = Q_sym.clone()
        reg_const = 0.1
        # eigenvalues, _ = torch.eig(Q_sym)
        # eigenvalues = eigenvalues[:,0]
        # Q_regularized = Q_sym + torch.eye(variable_size) * max(0, -min(eigenvalues) + reg_const)
        while True:
            # ------------------ eigen regularization -----------------------
            # Q_regularized = Q_sym + torch.eye(len(edge_set)) * max(0, -min(eigenvalues) + reg_const)
            # ----------------- diagonal regularization ---------------------
            Q_regularized[range(variable_size),
                          range(variable_size)] = torch.clamp(
                              torch.diag(Q_sym), min=reg_const)
            try:
                L = torch.cholesky(Q_regularized)
                break
            except:
                reg_const *= 2

        p = jac.view(1, -1) - Q_regularized @ pred_optimal_coverage
        # L = torch.cholesky(Q_regularized)

        try:
            x = cp.Variable(variable_size)
            A_default, b_default = cp.Parameter(
                (1, variable_size)), cp.Parameter(1)
            G_default, h_default = cp.Parameter(
                (cut_size * 2 + variable_size,
                 variable_size)), cp.Parameter(cut_size * 2 + variable_size)
            L_default = cp.Parameter((variable_size, variable_size))
            p_default = cp.Parameter(variable_size)
            constraints = [
                A_default @ x == b_default, G_default @ x <= h_default
            ]
            objective = cp.Minimize(0.5 * cp.sum_squares(L_default @ x) +
                                    p_default.T @ x)
            problem = cp.Problem(objective, constraints)

            cvxpylayer = CvxpyLayer(problem,
                                    parameters=[
                                        A_default, b_default, G_default,
                                        h_default, L_default, p_default
                                    ],
                                    variables=[x])
            coverage_qp_solution, = cvxpylayer(A_matrix, b_matrix, G_matrix,
                                               h_matrix, L, p)
            full_coverage_qp_solution = coverage_qp_solution[0]
        except:
            print("CVXPY solver fails... Usually because Q is not PSD")
            full_coverage_qp_solution = pred_optimal_coverage.clone()

        pred_defender_utility = -(surrogate_objective_function_matrix_form(
            full_coverage_qp_solution,
            T,
            s,
            G,
            unbiased_probs_true,
            torch.Tensor(U),
            torch.Tensor(initial_distribution),
            omega=omega))

    else:
        full_coverage_qp_solution = pred_optimal_coverage.clone()
        pred_defender_utility = -(surrogate_objective_function_matrix_form(
            full_coverage_qp_solution,
            T,
            s,
            G,
            unbiased_probs_true,
            torch.Tensor(U),
            torch.Tensor(initial_distribution),
            omega=omega))
    qp_time = time.time() - qp_start_time

    # ========================= Error message =========================
    if (torch.norm(T.detach() @ pred_optimal_coverage -
                   T.detach() @ full_coverage_qp_solution) > 0.1):
        print(
            'QP solution and scipy solution differ {} too much..., not backpropagating this instance'
            .format(
                torch.norm(pred_optimal_coverage - full_coverage_qp_solution)))
        print("objective value (SLSQP): {}".format(
            surrogate_objective_function_matrix_form(
                pred_optimal_coverage,
                T,
                s,
                G,
                unbiased_probs_pred,
                torch.Tensor(U),
                torch.Tensor(initial_distribution),
                omega=omega)))
        print(pred_optimal_coverage)
        print("objective value (QP): {}".format(
            surrogate_objective_function_matrix_form(
                full_coverage_qp_solution,
                T,
                s,
                G,
                unbiased_probs_pred,
                torch.Tensor(U),
                torch.Tensor(initial_distribution),
                omega=omega)))
        print(full_coverage_qp_solution)
        full_coverage_qp_solution = pred_optimal_coverage.clone()
        pred_defender_utility = -(surrogate_objective_function_matrix_form(
            full_coverage_qp_solution,
            T,
            s,
            G,
            unbiased_probs_true,
            torch.Tensor(U),
            torch.Tensor(initial_distribution),
            omega=omega))

    return pred_defender_utility, full_coverage_qp_solution, (forward_time,
                                                              qp_time)
Exemplo n.º 18
0
import cvxpy as cp
import torch
from cvxpylayers.torch import CvxpyLayer

n, m = 2, 3
x = cp.Variable(n)
A = cp.Parameter((m, n))
b = cp.Parameter(m)
constraints = [x >= 0]
objective = cp.Minimize(0.5 * cp.pnorm(A @ x - b, p=1))
problem = cp.Problem(objective, constraints)
assert problem.is_dpp()

cvxpylayer = CvxpyLayer(problem, parameters=[A, b], variables=[x])
A_tch = torch.randn(m, n, requires_grad=True)
b_tch = torch.randn(m, requires_grad=True)

# solve the problem
solution, = cvxpylayer(A_tch, b_tch)

# compute the gradient of the sum of the solution with respect to A, b
grad = solution.sum().backward()

print(grad)
Exemplo n.º 19
0
# Construct CVXPY problem and layer
x_cvxpy = cp.Parameter((n, 1))
P_sqrt_cvxpy = cp.Parameter((m, m))
P_21_cvxpy = cp.Parameter((n, m))
q_cvxpy = cp.Parameter((m, 1))

u_cvxpy = cp.Variable((m, 1))
y_cvxpy = cp.Variable((n, 1))

objective = .5 * cp.sum_squares(
    P_sqrt_cvxpy @ u_cvxpy) + x_cvxpy.T @ y_cvxpy + q_cvxpy.T @ u_cvxpy
problem = cp.Problem(cp.Minimize(objective),
                     [cp.norm(u_cvxpy) <= 1, y_cvxpy == P_21_cvxpy @ u_cvxpy])
assert problem.is_dpp()
policy = CvxpyLayer(problem, [x_cvxpy, P_sqrt_cvxpy, P_21_cvxpy, q_cvxpy],
                    [u_cvxpy])
'''
        -----------------------------------------------------------------------------------
'''


def train(iters):
    # Initialize with LQR control lyapunov function
    P_sqrt = torch.from_numpy(P_sqrt_lqr).requires_grad_(True)
    P_21 = torch.from_numpy(A.T @ P_lqr @ B).requires_grad_(True)
    q = torch.zeros((m, 1), requires_grad=True, dtype=torch.float64)
    variables = [P_sqrt, P_21, q]
    A_tch, B_tch, Q_tch, R_tch = map(torch.from_numpy, [A, B, Q, R])

    def g(x, u):
        return (x.t() @ Q_tch @ x + u.t() @ R_tch @ u).squeeze()
Exemplo n.º 20
0
def lasso_sure_cvxpy(X, y, alpha, sigma, random_state=42):
    # lambda_alpha = [alpha, alpha]
    n_samples, n_features = X.shape
    epsilon = 2 * sigma / n_samples**0.3
    rng = check_random_state(random_state)
    delta = rng.randn(n_samples)

    y2 = y + epsilon * delta
    Xth, yth, y2th, deltath = map(torch.from_numpy, [X, y, y2, delta])

    # set up variables and parameters
    beta_cp = cp.Variable(n_features)
    lambda_cp = cp.Parameter(nonneg=True)

    # set up objective
    loss = ((1 / (2 * n_samples)) * cp.sum(cp.square(Xth @ beta_cp - yth)))
    reg = lambda_cp * cp.norm1(beta_cp)
    objective = loss + reg

    # define problem
    problem1 = cp.Problem(cp.Minimize(objective))
    assert problem1.is_dpp()

    # solve problem1
    layer = CvxpyLayer(problem1, [lambda_cp], [beta_cp])
    alpha_th1 = torch.tensor(alpha, requires_grad=True)
    beta1, = layer(alpha_th1)

    # get test loss and it's gradient
    test_loss1 = (Xth @ beta1 - yth).pow(2).sum()
    test_loss1 -= 2 * sigma**2 / epsilon * (Xth @ beta1) @ deltath
    test_loss1.backward()
    val1 = test_loss1.detach().numpy()
    grad1 = np.array(alpha_th1.grad)

    # set up variables and parameters
    beta_cp = cp.Variable(n_features)
    lambda_cp = cp.Parameter(nonneg=True)

    # set up objective
    loss = ((1 / (2 * n_samples)) * cp.sum(cp.square(Xth @ beta_cp - y2th)))
    reg = lambda_cp * cp.norm1(beta_cp)
    objective = loss + reg

    # define problem
    problem2 = cp.Problem(cp.Minimize(objective))
    assert problem2.is_dpp()

    # solve problem2
    layer = CvxpyLayer(problem2, [lambda_cp], [beta_cp])
    alpha_th2 = torch.tensor(alpha, requires_grad=True)
    beta2, = layer(alpha_th2)

    # get test loss and it's gradient
    test_loss2 = 2 * sigma**2 / epsilon * (Xth @ beta2) @ deltath
    test_loss2.backward()
    val2 = test_loss2.detach().numpy()
    grad2 = np.array(alpha_th2.grad)

    val = val1 + val2 - len(y) * sigma**2
    grad = grad1 + grad2
    return val, grad
Exemplo n.º 21
0
def bregman_map_cvxtorch(s_mat, Wk_plus_value, Wk_minus_value,
                         gamma, l1_pen, dagness_pen, dagness_exp):
    """ Solves argmin g(W) + <grad f (Wk), W-Wk> + 1/gamma * Dh(W, Wk)
        with new CVXPY layers and PyTorch
        this is only implemented for a specific penalty and kernel

        Args:
            s_mat (np.array): data matrix
            Wk_plus_value (np.array): current iterate value for W+
            Wk_minus_value (np.array): current iterate value for W-
            gamma (float): Bregman iteration map param
            l1_pen (float): lambda in paper
            dagness_pen (float): mu in paper
            dagness_exp (float): alpha in paper
    """


    n = s_mat.shape[1]

    W_plus = cp.Variable((n, n), nonneg=True)
    W_plus.value = Wk_plus_value
    W_minus = cp.Variable((n, n), nonneg=True)
    inv_gamma_param = cp.Parameter(nonneg=True)
    l1_pen_param = cp.Parameter(nonneg=True)
    Wk_plus_param = cp.Parameter((n, n), nonneg=True)
    Wk_minus_param = cp.Parameter((n, n), nonneg=True)
    W_minus.value = Wk_minus_value
    sum_W = W_plus + W_minus  # sum variable

    obj_ll = cp.norm(s_mat @ (np.eye(n) - W_plus + W_minus), "fro") ** 2
    obj_spars = l1_pen_param * cp.sum(W_plus + W_minus)

    # Compute C
    sum_Wk = Wk_plus_value + Wk_minus_value
    C = compute_C(n, sum_Wk, dagness_pen, dagness_exp, inv_gamma_param)

    obj_trace = cp.trace(C @ sum_W)
    obj_kernel = inv_gamma_param * (dagness_pen * (n - 1) * (1 + dagness_exp * cp.norm(sum_W, "fro"))**n)

    obj = obj_ll + obj_spars + obj_trace + obj_kernel
    prob = cp.Problem(cp.Minimize(obj), [cp.sum(W_plus) + cp.sum(W_minus) >= n/((n-2)*dagness_exp)])
    assert prob.is_dpp(), "{}{}{}{}".format((dagness_pen * (n - 1) * (1 + dagness_exp * cp.norm(sum_W, "fro"))**n).is_dpp())

    #set_trace()

    layer = CvxpyLayer(prob, parameters = [inv_gamma_param, l1_pen_param], variables = [W_plus, W_minus])

    #TODO allow GPU
    torch_gamma = torch.tensor(1 / gamma)
    torch_l1_pen = torch.tensor(l1_pen)

    x_star = layer(torch_gamma, torch_l1_pen) #W_plus.value, W_minus.value
    #set_trace()
    next_W_plus, next_W_minus = x_star[0].numpy(), x_star[1].numpy()

    tilde_W_plus = np.maximum(next_W_plus - next_W_minus, 0.0)
    tilde_W_minus = np.maximum(next_W_minus - next_W_plus, 0.0)
    tilde_sum = tilde_W_plus + tilde_W_minus
    #
    if np.sum(tilde_sum) >= n / ((n - 2) * dagness_exp):
        return tilde_W_plus, tilde_W_minus
    else:
        return np.maximum(next_W_plus, 0), np.maximum(next_W_minus, 0)
Exemplo n.º 22
0
def getDefUtility(single_data, unbiased_probs_pred, path_model, cut_size, omega=4, verbose=False, initial_coverage_prob=None, training_mode=True, training_method='two-stage', block_selection='coverage'):
    G, Fv, coverage_prob, phi_true, path_list, min_cut, log_prob, unbiased_probs_true, previous_gradient = single_data
    
    n, m = G.number_of_nodes(), G.number_of_edges()
    budget = G.graph['budget']
    U = torch.Tensor(G.graph['U'])
    initial_distribution = torch.Tensor(G.graph['initial_distribution'])
    options = {"maxiter": 100, "disp": verbose}
    tol = None
    method = "SLSQP"

    edges = G.edges()

    # full forward path, the decision variables are the entire set of variables
    # initial_coverage_prob = np.zeros(m)
    # initial_coverage_prob = np.random.rand(m) # somehow this is very influential...
    initial_coverage_prob = np.ones(m) # somehow this is very influential...
    initial_coverage_prob = initial_coverage_prob / np.sum(initial_coverage_prob) * budget

    forward_start_time = time.time()
    pred_optimal_res = get_optimal_coverage_prob(G, unbiased_probs_pred.detach(), U, initial_distribution, budget, omega=omega, options=options, method=method, initial_coverage_prob=initial_coverage_prob, tol=tol) # scipy version
    pred_optimal_coverage = torch.Tensor(pred_optimal_res['x'])
    if not pred_optimal_res['success']:
        print(pred_optimal_res)
        print('optimization fails...')
    forward_time = time.time() - forward_start_time

    # ======================== edge set choice =====================
    qp_start_time = time.time()
    first_order_derivative = dobj_dx_matrix_form(pred_optimal_coverage, G, unbiased_probs_pred, U, initial_distribution, np.arange(m), omega=omega, lib=torch)

    if block_selection == 'derivative':
        sample_distribution = np.abs(first_order_derivative.detach().numpy()) + 1e-3
    elif block_selection == 'coverage':
        sample_distribution = pred_optimal_coverage.detach().numpy() + 1e-3
    elif block_selection == 'uniform':
        sample_distribution = np.ones(m)
    elif block_selection == 'slack':
        sample_distribution = np.exp(-np.abs(pred_optimal_coverage.detach().numpy() - 0.5) * 5)
    else:
        raise ValueError('Not Implemented Block Selection')
    sample_distribution /= sum(sample_distribution)
    if training_method == 'block-decision-focused' or training_method == 'hybrid':
        # min_sum = 1e-2
        while True:
            edge_set = np.array(sorted(np.random.choice(range(m), size=cut_size, replace=False, p=sample_distribution)))
            # if sum(pred_optimal_coverage[edge_set]) > min_sum:
            break
    elif training_method == 'corrected-block-decision-focused':
        # min_sum = 1e-2
        while True:
            edge_set = np.array(sorted(np.random.choice(range(m), size=cut_size, replace=False, p=sample_distribution)))
            indices = np.arange(cut_size)
            np.random.shuffle(indices)
            indices1, indices2 = np.array_split(indices, 2)
            indices1, indices2 = sorted(indices1), sorted(indices2)
            edge_set1, edge_set2 = edge_set[indices1], edge_set[indices2]
            # if sum(pred_optimal_coverage[edge_set1]) > min_sum / 10 and sum(pred_optimal_coverage[edge_set2]) > min_sum / 10:
            break
    else:
        edge_set = list(range(m))
    off_edge_set = sorted(list(set(range(m)) - set(edge_set)))
    # ========================== QP part ===========================

    # A_matrix, b_matrix = torch.Tensor(), torch.Tensor()
    # G_matrix = torch.cat((-torch.eye(cut_size), torch.eye(cut_size), torch.ones(1, cut_size)))
    # h_matrix = torch.cat((torch.zeros(cut_size), torch.ones(cut_size), torch.Tensor([sum(pred_optimal_coverage[edge_set])])))
    scale_constant = 1 # cut_size
    A_matrix, b_matrix = torch.ones(1, cut_size)/scale_constant, torch.Tensor([sum(pred_optimal_coverage[edge_set])])/scale_constant
    G_matrix = torch.cat((-torch.eye(cut_size), torch.eye(cut_size)))
    h_matrix = torch.cat((torch.zeros(cut_size), torch.ones(cut_size)))

    if training_mode and pred_optimal_res['success']: # and sum(pred_optimal_coverage[edge_set]) > 0.1:
        
        solver_option = 'default'
        # I seriously don't know wherether to use 'default' or 'gurobi' now...
        # Gurobi performs well when there is no noise but default performs well when there is noise
        # But theoretically they should perform roughly the same...

        hessian_start_time = time.time()
        Q = obj_hessian_matrix_form(pred_optimal_coverage, G, unbiased_probs_pred, U, initial_distribution, edge_set, omega=omega)
        jac = dobj_dx_matrix_form(pred_optimal_coverage, G, unbiased_probs_pred, U, initial_distribution, edge_set, omega=omega, lib=torch)
        Q_sym = (Q + Q.t()) / 2
        hessian_time = time.time() - hessian_start_time
        # print("Hessian time:", hessian_time)
    
        # ------------------ regularization -----------------------
        Q_regularized = Q_sym.clone()
        reg_const = 0.1
        while True:
            # ------------------ eigen regularization -----------------------
            # Q_regularized = Q_sym + torch.eye(len(edge_set)) * max(0, -min(eigenvalues) + reg_const)
            # ----------------- diagonal regularization ---------------------
            Q_regularized[range(cut_size), range(cut_size)] = torch.clamp(torch.diag(Q_sym), min=reg_const)
            try:
                L = torch.cholesky(Q_regularized)
                break
            except:
                reg_const *= 2

        p = jac.view(1, -1) - Q_regularized @ pred_optimal_coverage[edge_set]
 
        if True: # try:
            x = cp.Variable(cut_size)
            A_default, b_default = cp.Parameter((1, cut_size)), cp.Parameter(1)
            G_default, h_default = cp.Parameter((cut_size * 2, cut_size)), cp.Parameter(cut_size * 2)
            L_default = cp.Parameter((cut_size, cut_size))
            p_default = cp.Parameter(cut_size)
            constraints = [A_default @ x == b_default, G_default @ x <= h_default]
            objective = cp.Minimize(0.5 * cp.sum_squares(L_default @ x) + p_default.T @ x)
            problem = cp.Problem(objective, constraints)
    
            cvxpylayer = CvxpyLayer(problem, parameters=[A_default, b_default, G_default, h_default, L_default, p_default], variables=[x])
            coverage_qp_solution, = cvxpylayer(A_matrix, b_matrix, G_matrix, h_matrix, L, p)
            full_coverage_qp_solution = pred_optimal_coverage.clone()
            full_coverage_qp_solution[edge_set] = coverage_qp_solution[0]
        # except:
        #     print("QP solver fails... Usually because Q is not PSD")
        #     full_coverage_qp_solution = pred_optimal_coverage.clone()

        pred_defender_utility  = -(objective_function_matrix_form(full_coverage_qp_solution, G, unbiased_probs_true, torch.Tensor(U), torch.Tensor(initial_distribution), edge_set, omega=omega))

    else:
        full_coverage_qp_solution = pred_optimal_coverage.clone()
        pred_defender_utility  = -(objective_function_matrix_form(full_coverage_qp_solution, G, unbiased_probs_true, torch.Tensor(U), torch.Tensor(initial_distribution), edge_set, omega=omega))
    qp_time = time.time() - qp_start_time

    # ========================= Error message =========================
    if (torch.norm(pred_optimal_coverage - full_coverage_qp_solution) > 0.1):
        print('QP solution and scipy solution differ {} too much..., not backpropagating this instance'.format(torch.norm(pred_optimal_coverage - full_coverage_qp_solution)))
        print("objective value (SLSQP): {}".format(objective_function_matrix_form(pred_optimal_coverage, G, unbiased_probs_pred, torch.Tensor(U), torch.Tensor(initial_distribution), edge_set, omega=omega)))
        print(pred_optimal_coverage)
        print("objective value (QP): {}".format(objective_function_matrix_form(full_coverage_qp_solution, G, unbiased_probs_pred, torch.Tensor(U), torch.Tensor(initial_distribution), edge_set, omega=omega)))
        print(full_coverage_qp_solution)
        full_coverage_qp_solution = pred_optimal_coverage.clone()
        pred_defender_utility  = -(objective_function_matrix_form(full_coverage_qp_solution, G, unbiased_probs_true, torch.Tensor(U), torch.Tensor(initial_distribution), edge_set, omega=omega))

    return pred_defender_utility, full_coverage_qp_solution, (forward_time, qp_time)
Exemplo n.º 23
0
def train_portfolio(model, covariance_model, optimizer, epoch, dataset, training_method='two-stage', device='cpu', evaluate=False):
    model.train()
    covariance_model.train()
    loss_fn = torch.nn.MSELoss()
    train_losses, train_objs = [], []

    forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0

    with tqdm.tqdm(dataset) as tqdm_loader:
        for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader):
            forward_start_time = time.time()
            features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data
            n = len(covariance_mat)
            Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG
            predictions = model(features.float())[:,0]
            loss = loss_fn(predictions, labels)
            Q = covariance_model() * (1 - REG) + torch.eye(n) * REG  # TODO

            if evaluate:
                forward_time += time.time() - forward_start_time
                inference_start_time = time.time()

                p = predictions
                L = sqrtm(Q) # torch.cholesky(Q)
                # =============== solving QP using qpth ================
                if solver == 'qpth':
                    G = -torch.eye(n)
                    h = torch.zeros(n)
                    A = torch.ones(1,n)
                    b = torch.ones(1)
                    qp_solver = qpth.qp.QPFunction()
                    x = qp_solver(alpha * Q, -p, G, h, A, b)[0]
                # =============== solving QP using CVXPY ===============
                elif solver == 'cvxpy':
                    x_var = cp.Variable(n)
                    L_para = cp.Parameter((n,n))
                    p_para = cp.Parameter(n)
                    constraints = [x_var >= 0, x_var <= 1, cp.sum(x_var) == 1]
                    objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ x_var) + p_para.T @ x_var)
                    problem = cp.Problem(objective, constraints)

                    cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para], variables=[x_var])
                    x, = cvxpylayer(L, -p)

                obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x

                inference_time += time.time() - inference_start_time
                # ======= opt ===
                # p_opt = labels
                # L_opt = torch.cholesky(Q_real)
                # x_opt, = cvxpylayer(L_opt, p_opt)
                # opt = labels @ x_opt - 0.5 * alpha * x.t() @ Q_real @ x
                # print('obj:', obj, 'opt:', opt)
            else:
                obj = torch.Tensor([0])

            # ====================== back-prop =====================
            optimizer.zero_grad()
            backward_start_time = time.time()
            try:
                if training_method == 'two-stage':
                    Q_loss = torch.norm(Q - Q_real)
                    (loss + Q_loss).backward()
                elif training_method == 'decision-focused':
                    (-obj).backward()
                    # (-obj + loss).backward() # TODO
                    for parameter in model.parameters():
                        parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM)
                    for parameter in covariance_model.parameters():
                        parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM)
                else:
                    raise ValueError('Not implemented method')
            except:
                print("no grad is backpropagated...")
                pass
            optimizer.step()
            backward_time += time.time() - backward_start_time

            train_losses.append(loss.item())
            train_objs.append(obj.item())
            tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%') 

    average_loss    = np.mean(train_losses)
    average_obj     = np.mean(train_objs)
    return average_loss, average_obj, (forward_time, inference_time, qp_time, backward_time)
Exemplo n.º 24
0
def surrogate_train_portfolio(model, covariance_model, T_init, optimizer, T_optimizer, epoch, dataset, training_method='surrogate', device='cpu', evaluate=False):
    model.train()
    covariance_model.train()
    loss_fn = torch.nn.MSELoss()
    train_losses, train_objs = [], []

    forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0
    T_size = T_init.shape[1]

    with tqdm.tqdm(dataset) as tqdm_loader:
        for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader):
            forward_start_time = time.time()
            features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data
            n = len(covariance_mat)
            Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG
            predictions = model(features.float())[:,0]
            loss = loss_fn(predictions, labels)


            # randomly select column to update
            # T = init_T
            T = T_init.detach().clone()
            random_column = torch.randint(T_init.shape[1], [1])
            T[:,random_column] = T_init[:,random_column]

            Q = covariance_model() * (1 - REG) + torch.eye(n) * REG 

            forward_time += time.time() - forward_start_time
            inference_start_time = time.time()

            p = predictions @ T
            L = sqrtm(T.t() @ Q @ T) # torch.cholesky(T.t() @ Q @ T)
            # =============== solving QP using qpth ================
            if solver == 'qpth':
                G = -torch.eye(n) @ T
                h = torch.zeros(n)
                A = torch.ones(1,n) @ T
                b = torch.ones(1)
                qp_solver = qpth.qp.QPFunction()
                y = qp_solver(alpha * T.t() @ Q @ T, -p, G, h, A, b)[0]
                x = T @ y
            # =============== solving QP using CVXPY ===============
            elif solver == 'cvxpy':
                y_var = cp.Variable(T_size)
                L_para = cp.Parameter((T_size,T_size))
                p_para = cp.Parameter(T_size)
                T_para = cp.Parameter((n,T_size))
                constraints = [T_para @ y_var >= 0, cp.sum(T_para @ y_var) == 1]
                objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ y_var) + p_para.T @ y_var)
                problem = cp.Problem(objective, constraints)

                cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para, T_para], variables=[y_var])
                y, = cvxpylayer(L, -p, T)
                x = T @ y
            # print("predicted objective value:", predictions.t() @ x - 0.5 * alpha * x.t() @ Q @ x)

            obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x
            # print("real objective value:", obj)

            inference_time += time.time() - inference_start_time

            # ====================== back-prop =====================
            optimizer.zero_grad()
            T_optimizer.zero_grad()
            backward_start_time = time.time()
            try:
                if training_method == 'surrogate':
                    covariance = computeCovariance(T.t())
                    T_weight = 0.0
                    TS_weight = 0.0
                    T_loss     = torch.sum(covariance) - torch.sum(torch.diag(covariance))

                    (-obj + T_weight * T_loss).backward()
                    for parameter in model.parameters():
                        parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM)
                    for parameter in covariance_model.parameters():
                        parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM)
                    T_init.grad = torch.clamp(T_init.grad, min=-T_MAX_NORM, max=T_MAX_NORM)
                else:
                    raise ValueError('Not implemented method')
            except:
                print("no grad is backpropagated...")
                pass
            optimizer.step()
            T_optimizer.step()
            T_init.data = normalize_matrix_positive(T_init.data)
            backward_time += time.time() - backward_start_time

            train_losses.append(loss.item())
            train_objs.append(obj.item())
            tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%', T_loss=f'{T_loss:.3f}')

    average_loss    = np.mean(train_losses)
    average_obj     = np.mean(train_objs)
    return average_loss, average_obj, (forward_time, inference_time, qp_time, backward_time)
Exemplo n.º 25
0
def surrogate_validate_portfolio(model, covariance_model, T, scheduler, T_scheduler, epoch, dataset, training_method='surrogate', device='cpu', evaluate=False):
    model.eval()
    covariance_model.eval()
    loss_fn = torch.nn.MSELoss()
    validate_losses, validate_objs = [], []

    forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0
    T_size = T.shape[1]

    with tqdm.tqdm(dataset) as tqdm_loader:
        for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader):
            forward_start_time = time.time()
            features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data
            n = len(covariance_mat)
            Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG
            predictions = model(features.float())[:,0]
            loss = loss_fn(predictions, labels)

            Q = covariance_model() * (1 - REG) + torch.eye(n) * REG 

            forward_time += time.time() - forward_start_time
            inference_start_time = time.time()

            p = predictions @ T
            L = sqrtm(T.t() @ Q @ T) # torch.cholesky(T.t() @ Q @ T)
            # =============== solving QP using qpth ================
            if solver == 'qpth':
                G = -torch.eye(n) @ T
                h = torch.zeros(n)
                A = torch.ones(1,n) @ T
                b = torch.ones(1)
                qp_solver = qpth.qp.QPFunction()
                y = qp_solver(alpha * T.t() @ Q @ T, -p, G, h, A, b)[0]
                x = T @ y
            # =============== solving QP using CVXPY ===============
            elif solver == 'cvxpy':
                y_var = cp.Variable(T_size)
                L_para = cp.Parameter((T_size,T_size))
                p_para = cp.Parameter(T_size)
                T_para = cp.Parameter((n,T_size))
                constraints = [T_para @ y_var >= 0, cp.sum(T_para @ y_var) == 1]
                objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ y_var) + p_para.T @ y_var)
                problem = cp.Problem(objective, constraints)
    
                cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para, T_para], variables=[y_var])
                y, = cvxpylayer(L, -p, T)
                x = T @ y

            obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x

            validate_losses.append(loss.item())
            validate_objs.append(obj.item())
            tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%')

    average_loss    = np.mean(validate_losses)
    average_obj     = np.mean(validate_objs)

    if (epoch > 0):
        if training_method == "two-stage":
            scheduler.step(average_loss)
        elif training_method == "decision-focused":
            scheduler.step(-average_obj)
        elif training_method == "surrogate":
            # covariance = computeCovariance(T.t())
            # T_loss     = torch.sum(covariance) - torch.sum(torch.diag(covariance))
            scheduler.step(-average_obj)
            T_scheduler.step(-average_obj)
        else:
            raise TypeError("Not Implemented Method")

    return average_loss, average_obj
Exemplo n.º 26
0
def surrogate_test_portfolio(model, covariance_model, T, epoch, dataset, device='cpu', evaluate=False):
    model.eval()
    covariance_model.eval()
    loss_fn = torch.nn.MSELoss()
    test_losses, test_objs = [], []
    test_opts = []

    forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0
    T_size = T.shape[1]

    with tqdm.tqdm(dataset) as tqdm_loader:
        for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader):
            forward_start_time = time.time()
            features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data
            n = len(covariance_mat)
            Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG
            predictions = model(features.float())[:,0]
            Q = covariance_model() * (1 - REG) + torch.eye(n) * REG 
            loss = loss_fn(predictions, labels)

            forward_time += time.time() - forward_start_time
            inference_start_time = time.time()

            p = predictions @ T
            L = sqrtm(T.t() @ Q @ T) # torch.cholesky(T.t() @ Q @ T)
            # =============== solving QP using qpth ================
            if solver == 'qpth':
                G = -torch.eye(n) @ T
                h = torch.zeros(n)
                A = torch.ones(1,n) @ T
                b = torch.ones(1)
                qp_solver = qpth.qp.QPFunction()
                y = qp_solver(alpha * T.t() @ Q @ T, -p, G, h, A, b)[0]
                x = T @ y
            # =============== solving QP using CVXPY ===============
            elif solver == 'cvxpy':
                y_var = cp.Variable(T_size)
                L_para = cp.Parameter((T_size,T_size))
                p_para = cp.Parameter(T_size)
                T_para = cp.Parameter((n,T_size))
                constraints = [T_para @ y_var >= 0, cp.sum(T_para @ y_var) == 1]
                objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ y_var) + p_para.T @ y_var)
                problem = cp.Problem(objective, constraints)

                cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para, T_para], variables=[y_var])
                y, = cvxpylayer(L, -p, T)
                x = T @ y

            obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x

            # ======= opt ===
            # p_opt = labels @ T
            # L_opt = torch.cholesky(T.t() @ Q_real @ T)
            # y_opt, = cvxpylayer(L_opt, p_opt, T)
            # x_opt = T @ y_opt
            # opt = labels @ x_opt - 0.5 * alpha * x_opt.t() @ Q_real @ x_opt
            # test_opts.append(opt.item())

            test_losses.append(loss.item())
            test_objs.append(obj.item())
            tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%')

    average_loss    = np.mean(test_losses)
    average_obj     = np.mean(test_objs)
    return average_loss, average_obj
Exemplo n.º 27
0
x = cp.Variable(D)
c = cp.Parameter(D)
A = cp.Parameter((M, D))
b = cp.Parameter(M)
G = cp.Parameter((N, D))
h = cp.Parameter(N)
constraints = [A@x == b, G@x <= h]
objective = cp.Minimize(c @ x)
problem = cp.Problem(objective, constraints)
assert problem.is_dpp()


# In[10]:


cvxpylayer = CvxpyLayer(problem, parameters=[c,A,b,G,h], variables=[x])
c_t = torch.Tensor(_c)
A_t = torch.Tensor(_A)
b_t = torch.Tensor(_b)
G_t = torch.Tensor(_G)
h_t = torch.Tensor(_h)

c_t.requires_grad=True
A_t.requires_grad=True
b_t.requires_grad=True
G_t.requires_grad=True
h_t.requires_grad=True

# solve the problem
solution, = cvxpylayer(c_t, A_t, b_t, G_t, h_t)