def forward(self):
     security_embeddings = self.embedding(torch.LongTensor(range(self.n)))
     cov = computeCovariance(security_embeddings)
     return cov
Exemplo n.º 2
0
def surrogate_train_submodular(net,
                               init_T,
                               optimizer,
                               T_optimizer,
                               epoch,
                               sample_instance,
                               dataset,
                               lr=0.1,
                               training_method='two-stage',
                               device='cpu'):
    net.train()
    # loss_fn = torch.nn.BCELoss()
    loss_fn = torch.nn.MSELoss()
    train_losses, train_objs, train_T_losses = [], [], []
    x_size, variable_size = init_T.shape
    n, m, d, f, budget = sample_instance.n, sample_instance.m, torch.Tensor(
        sample_instance.d), torch.Tensor(
            sample_instance.f), sample_instance.budget
    A, b, G, h = createSurrogateConstraintMatrix(m, n, budget)
    forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0

    with tqdm.tqdm(dataset) as tqdm_loader:
        for batch_idx, (features, labels) in enumerate(tqdm_loader):
            forward_start_time = time.time()
            features, labels = features.to(device), labels.to(device)
            if epoch >= 0:
                outputs = net(features)
            else:
                outputs = labels
            # two-stage loss
            loss = loss_fn(outputs, labels)
            forward_time += time.time() - forward_start_time

            # decision-focused loss
            objective_value_list, T_loss_list = [], []
            batch_size = len(labels)

            # randomly select column to update
            T = init_T
            # T = init_T.detach().clone()
            # random_column = torch.randint(init_T.shape[1], [1])
            # T[:,random_column] = init_T[:,random_column]

            # if batch_idx == 0:
            #     plot_graph(labels.detach().numpy(), T.detach().numpy(), epoch)

            for (label, output) in zip(labels, outputs):
                if training_method == 'surrogate':
                    # output = label # for debug only # TODO
                    inference_start_time = time.time()
                    optimize_result = getSurrogateOptimalDecision(
                        T, n, m, output, d, f,
                        budget=budget)  # end-to-end for both T and net
                    inference_time += time.time() - inference_start_time
                    optimal_y = torch.Tensor(optimize_result.x)

                    qp_start_time = time.time()
                    if optimize_result.success:
                        optimal_y = torch.Tensor(optimize_result.x)
                        newA, newb = torch.Tensor(), torch.Tensor()
                        newG = torch.cat(
                            (A @ T, G @ T, -torch.eye(variable_size)))
                        newh = torch.cat((b, h, torch.zeros(variable_size)))
                        # newG = torch.cat((A @ T, G @ T, -torch.eye(variable_size), torch.eye(variable_size)))
                        # newh = torch.cat((b, h, torch.zeros(variable_size), torch.ones(variable_size)))

                        Q = getSurrogateHessian(
                            T, optimal_y, n, m, output, d,
                            f).detach() + torch.eye(len(optimal_y)) * 10
                        L = torch.cholesky(Q)
                        jac = -getSurrogateDerivative(T,
                                                      optimal_y,
                                                      n,
                                                      m,
                                                      output,
                                                      d,
                                                      f,
                                                      create_graph=True)
                        p = jac - Q @ optimal_y
                        qp_solver = qpth.qp.QPFunction()  # TODO unknown bug

                        try:
                            y = qp_solver(Q, p, newG, newh, newA, newb)[0]
                            x = T @ y
                        except:
                            y = optimal_y
                            x = T.detach() @ optimal_y
                            print('qp error! no gradient!')

                        # if True:
                        #     # =============== solving QP using CVXPY ===============
                        #     y_default = cp.Variable(variable_size)
                        #     G_default, h_default = cp.Parameter(newG.shape), cp.Parameter(newh.shape)
                        #     L_default = cp.Parameter((variable_size, variable_size))
                        #     p_default = cp.Parameter(variable_size)
                        #     constraints = [G_default @ y_default <= h_default]
                        #     objective = cp.Minimize(0.5 * cp.sum_squares(L_default @ y_default) + p_default.T @ y_default)
                        #     problem = cp.Problem(objective, constraints)

                        #     cvxpylayer = CvxpyLayer(problem, parameters=[G_default, h_default, L_default, p_default], variables=[y_default])
                        #     coverage_qp_solution, = cvxpylayer(newG, newh, L, p)
                        #     y = coverage_qp_solution
                        #     x = T @ y

                        # time test...
                        # time_test_start = time.time()
                        # for i in range(20):
                        #    _ = getDerivative(x, n, m, output, d, f)
                        # print('original gradient time:', time.time() - time_test_start)

                        # time_test_start = time.time()
                        # for i in range(20):
                        #    _ = getSurrogateDerivative(T, y, n, m, output, d, f, create_graph=False)
                        # print('surrogate gradient time:', time.time() - time_test_start)

                        # except:
                        #     print("CVXPY solver fails... Usually because Q is not PSD")
                        #     y = optimal_y
                        #     x = T.detach() @ optimal_y

                    else:  # torch.norm(y.detach() - optimal_y) > 0.05: # TODO
                        print('Optimization failed...')
                        y = optimal_y
                        x = T.detach() @ optimal_y

                    qp_time += time.time() - qp_start_time
                else:
                    raise ValueError('Not implemented method!')

                obj = getObjective(x, n, m, label, d, f)
                tmp_T_loss = 0  # torch.sum((projected_real_optimal_x - real_optimal_x) ** 2).item()

                objective_value_list.append(obj)
                T_loss_list.append(tmp_T_loss)

            # print(pairwise_distances(T.t().detach().numpy()))
            objective = sum(objective_value_list) / batch_size
            T_loss = torch.Tensor([0])
            # print('objective', objective)

            optimizer.zero_grad()
            backward_start_time = time.time()
            try:
                if training_method == 'two-stage':
                    loss.backward()
                    optimizer.step()
                elif training_method == 'decision-focused':
                    (-objective).backward()
                    for parameter in net.parameters():
                        parameter.grad = torch.clamp(parameter.grad,
                                                     min=-MAX_NORM,
                                                     max=MAX_NORM)
                    optimizer.step()
                elif training_method == 'surrogate':
                    covariance = computeCovariance(T.t())
                    T_loss = torch.sum(covariance) - torch.sum(
                        torch.diag(covariance))
                    T_optimizer.zero_grad()
                    (-objective).backward()
                    # T_loss.backward() # TODO: minimizing reparameterization loss

                    for parameter in net.parameters():
                        parameter.grad = torch.clamp(parameter.grad,
                                                     min=-MAX_NORM,
                                                     max=MAX_NORM)
                    init_T.grad = torch.clamp(init_T.grad,
                                              min=-MAX_NORM,
                                              max=MAX_NORM)
                    optimizer.step()
                    T_optimizer.step()
                    init_T.data = normalize_matrix_positive(init_T.data)
                else:
                    raise ValueError('Not implemented method')
            except:
                print("Error! No grad is backpropagated...")
                pass
            backward_time += time.time() - backward_start_time

            train_losses.append(loss.item())
            train_objs.append(objective.item())
            train_T_losses.append(T_loss.item())

            average_loss = np.mean(train_losses)
            average_obj = np.mean(train_objs)
            average_T_loss = np.mean(train_T_losses)
            # Print status
            # tqdm_loader.set_postfix(loss=f'{average_loss:.3f}', obj=f'{average_obj:.3f}')
            tqdm_loader.set_postfix(loss=f'{average_loss:.3f}',
                                    obj=f'{average_obj:.3f}',
                                    T_loss=f'{average_T_loss:.3f}')

    average_loss = np.mean(train_losses)
    average_obj = np.mean(train_objs)
    return average_loss, average_obj, (forward_time, inference_time, qp_time,
                                       backward_time)
Exemplo n.º 3
0
def surrogate_test_portfolio(model, covariance_model, T, epoch, dataset, device='cpu', evaluate=False):
    model.eval()
    covariance_model.eval()
    loss_fn = torch.nn.MSELoss()
    test_losses, test_objs = [], []
    test_opts = []

    forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0
    T_size = T.shape[1]

    with tqdm.tqdm(dataset) as tqdm_loader:
        for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader):
            forward_start_time = time.time()
            features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data
            n = len(covariance_mat)
            Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG
            predictions = model(features.float())[:,0]
            Q = covariance_model() * (1 - REG) + torch.eye(n) * REG 
            loss = loss_fn(predictions, labels)

            forward_time += time.time() - forward_start_time
            inference_start_time = time.time()

            p = predictions @ T
            L = sqrtm(T.t() @ Q @ T) # torch.cholesky(T.t() @ Q @ T)
            # =============== solving QP using qpth ================
            if solver == 'qpth':
                G = -torch.eye(n) @ T
                h = torch.zeros(n)
                A = torch.ones(1,n) @ T
                b = torch.ones(1)
                qp_solver = qpth.qp.QPFunction()
                y = qp_solver(alpha * T.t() @ Q @ T, -p, G, h, A, b)[0]
                x = T @ y
            # =============== solving QP using CVXPY ===============
            elif solver == 'cvxpy':
                y_var = cp.Variable(T_size)
                L_para = cp.Parameter((T_size,T_size))
                p_para = cp.Parameter(T_size)
                T_para = cp.Parameter((n,T_size))
                constraints = [T_para @ y_var >= 0, cp.sum(T_para @ y_var) == 1]
                objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ y_var) + p_para.T @ y_var)
                problem = cp.Problem(objective, constraints)

                cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para, T_para], variables=[y_var])
                y, = cvxpylayer(L, -p, T)
                x = T @ y

            obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x

            # ======= opt ===
            # p_opt = labels @ T
            # L_opt = torch.cholesky(T.t() @ Q_real @ T)
            # y_opt, = cvxpylayer(L_opt, p_opt, T)
            # x_opt = T @ y_opt
            # opt = labels @ x_opt - 0.5 * alpha * x_opt.t() @ Q_real @ x_opt
            # test_opts.append(opt.item())

            test_losses.append(loss.item())
            test_objs.append(obj.item())
            tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%')

    average_loss    = np.mean(test_losses)
    average_obj     = np.mean(test_objs)
    return average_loss, average_obj
Exemplo n.º 4
0
def test_portfolio(model, covariance_model, epoch, dataset, device='cpu', evaluate=False):
    model.eval()
    covariance_model.eval()
    loss_fn = torch.nn.MSELoss()
    test_losses, test_objs = [], []
    test_opts = []

    forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0

    with tqdm.tqdm(dataset) as tqdm_loader:
        for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader):
            forward_start_time = time.time()
            features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data
            n = len(covariance_mat)
            Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG

            if epoch == -1:
                predictions = labels
                Q = Q_real
            else:
                predictions = model(features.float())[:,0]
                Q = covariance_model() * (1 - REG) + torch.eye(n) * REG 

            loss = loss_fn(predictions, labels)

            if evaluate:
                forward_time += time.time() - forward_start_time
                inference_start_time = time.time()

                p = predictions
                L = sqrtm(Q) # torch.cholesky(Q)
                # =============== solving QP using qpth ================
                if solver == 'qpth':
                    G = -torch.eye(n)
                    h = torch.zeros(n)
                    A = torch.ones(1,n)
                    b = torch.ones(1)
                    qp_solver = qpth.qp.QPFunction()
                    x = qp_solver(alpha * Q, -p, G, h, A, b)[0]
                    # x_opt = qp_solver(alpha * Q_real, -labels, G, h, A, b)[0]
                # =============== solving QP using CVXPY ===============
                elif solver == 'cvxpy':
                    x_var = cp.Variable(n)
                    L_para = cp.Parameter((n,n))
                    p_para = cp.Parameter(n)
                    constraints = [x_var >= 0, x_var <= 1, cp.sum(x_var) == 1]
                    objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ x_var) + p_para.T @ x_var)
                    problem = cp.Problem(objective, constraints)

                    cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para], variables=[x_var])
                    x, = cvxpylayer(L, -p)

                obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x
                # opt = labels @ x_opt - 0.5 * alpha * x_opt.t() @ Q_real @ x_opt
                # print('obj:', obj, 'opt:', opt)

                inference_time += time.time() - inference_start_time
                # ======= opt ===
                # p_opt = labels
                # L_opt = torch.cholesky(Q_real)
                # x_opt, = cvxpylayer(L_opt, p_opt)
                # opt = labels @ x_opt - 0.5 * alpha * x_opt.t() @ Q_real @ x_opt
                # test_opts.append(opt.item())
            else:
                obj = torch.Tensor([0])

            test_losses.append(loss.item())
            test_objs.append(obj.item())
            tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%') 

    # print('opts:', test_opts)
    average_loss    = np.mean(test_losses)
    average_obj     = np.mean(test_objs)
    return average_loss, average_obj # , (forward_time, inference_time, qp_time, backward_time)
Exemplo n.º 5
0
def surrogate_validate_portfolio(model, covariance_model, T, scheduler, T_scheduler, epoch, dataset, training_method='surrogate', device='cpu', evaluate=False):
    model.eval()
    covariance_model.eval()
    loss_fn = torch.nn.MSELoss()
    validate_losses, validate_objs = [], []

    forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0
    T_size = T.shape[1]

    with tqdm.tqdm(dataset) as tqdm_loader:
        for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader):
            forward_start_time = time.time()
            features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data
            n = len(covariance_mat)
            Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG
            predictions = model(features.float())[:,0]
            loss = loss_fn(predictions, labels)

            Q = covariance_model() * (1 - REG) + torch.eye(n) * REG 

            forward_time += time.time() - forward_start_time
            inference_start_time = time.time()

            p = predictions @ T
            L = sqrtm(T.t() @ Q @ T) # torch.cholesky(T.t() @ Q @ T)
            # =============== solving QP using qpth ================
            if solver == 'qpth':
                G = -torch.eye(n) @ T
                h = torch.zeros(n)
                A = torch.ones(1,n) @ T
                b = torch.ones(1)
                qp_solver = qpth.qp.QPFunction()
                y = qp_solver(alpha * T.t() @ Q @ T, -p, G, h, A, b)[0]
                x = T @ y
            # =============== solving QP using CVXPY ===============
            elif solver == 'cvxpy':
                y_var = cp.Variable(T_size)
                L_para = cp.Parameter((T_size,T_size))
                p_para = cp.Parameter(T_size)
                T_para = cp.Parameter((n,T_size))
                constraints = [T_para @ y_var >= 0, cp.sum(T_para @ y_var) == 1]
                objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ y_var) + p_para.T @ y_var)
                problem = cp.Problem(objective, constraints)
    
                cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para, T_para], variables=[y_var])
                y, = cvxpylayer(L, -p, T)
                x = T @ y

            obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x

            validate_losses.append(loss.item())
            validate_objs.append(obj.item())
            tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%')

    average_loss    = np.mean(validate_losses)
    average_obj     = np.mean(validate_objs)

    if (epoch > 0):
        if training_method == "two-stage":
            scheduler.step(average_loss)
        elif training_method == "decision-focused":
            scheduler.step(-average_obj)
        elif training_method == "surrogate":
            # covariance = computeCovariance(T.t())
            # T_loss     = torch.sum(covariance) - torch.sum(torch.diag(covariance))
            scheduler.step(-average_obj)
            T_scheduler.step(-average_obj)
        else:
            raise TypeError("Not Implemented Method")

    return average_loss, average_obj
Exemplo n.º 6
0
def surrogate_train_portfolio(model, covariance_model, T_init, optimizer, T_optimizer, epoch, dataset, training_method='surrogate', device='cpu', evaluate=False):
    model.train()
    covariance_model.train()
    loss_fn = torch.nn.MSELoss()
    train_losses, train_objs = [], []

    forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0
    T_size = T_init.shape[1]

    with tqdm.tqdm(dataset) as tqdm_loader:
        for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader):
            forward_start_time = time.time()
            features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data
            n = len(covariance_mat)
            Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG
            predictions = model(features.float())[:,0]
            loss = loss_fn(predictions, labels)


            # randomly select column to update
            # T = init_T
            T = T_init.detach().clone()
            random_column = torch.randint(T_init.shape[1], [1])
            T[:,random_column] = T_init[:,random_column]

            Q = covariance_model() * (1 - REG) + torch.eye(n) * REG 

            forward_time += time.time() - forward_start_time
            inference_start_time = time.time()

            p = predictions @ T
            L = sqrtm(T.t() @ Q @ T) # torch.cholesky(T.t() @ Q @ T)
            # =============== solving QP using qpth ================
            if solver == 'qpth':
                G = -torch.eye(n) @ T
                h = torch.zeros(n)
                A = torch.ones(1,n) @ T
                b = torch.ones(1)
                qp_solver = qpth.qp.QPFunction()
                y = qp_solver(alpha * T.t() @ Q @ T, -p, G, h, A, b)[0]
                x = T @ y
            # =============== solving QP using CVXPY ===============
            elif solver == 'cvxpy':
                y_var = cp.Variable(T_size)
                L_para = cp.Parameter((T_size,T_size))
                p_para = cp.Parameter(T_size)
                T_para = cp.Parameter((n,T_size))
                constraints = [T_para @ y_var >= 0, cp.sum(T_para @ y_var) == 1]
                objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ y_var) + p_para.T @ y_var)
                problem = cp.Problem(objective, constraints)

                cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para, T_para], variables=[y_var])
                y, = cvxpylayer(L, -p, T)
                x = T @ y
            # print("predicted objective value:", predictions.t() @ x - 0.5 * alpha * x.t() @ Q @ x)

            obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x
            # print("real objective value:", obj)

            inference_time += time.time() - inference_start_time

            # ====================== back-prop =====================
            optimizer.zero_grad()
            T_optimizer.zero_grad()
            backward_start_time = time.time()
            try:
                if training_method == 'surrogate':
                    covariance = computeCovariance(T.t())
                    T_weight = 0.0
                    TS_weight = 0.0
                    T_loss     = torch.sum(covariance) - torch.sum(torch.diag(covariance))

                    (-obj + T_weight * T_loss).backward()
                    for parameter in model.parameters():
                        parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM)
                    for parameter in covariance_model.parameters():
                        parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM)
                    T_init.grad = torch.clamp(T_init.grad, min=-T_MAX_NORM, max=T_MAX_NORM)
                else:
                    raise ValueError('Not implemented method')
            except:
                print("no grad is backpropagated...")
                pass
            optimizer.step()
            T_optimizer.step()
            T_init.data = normalize_matrix_positive(T_init.data)
            backward_time += time.time() - backward_start_time

            train_losses.append(loss.item())
            train_objs.append(obj.item())
            tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%', T_loss=f'{T_loss:.3f}')

    average_loss    = np.mean(train_losses)
    average_obj     = np.mean(train_objs)
    return average_loss, average_obj, (forward_time, inference_time, qp_time, backward_time)
Exemplo n.º 7
0
def train_portfolio(model, covariance_model, optimizer, epoch, dataset, training_method='two-stage', device='cpu', evaluate=False):
    model.train()
    covariance_model.train()
    loss_fn = torch.nn.MSELoss()
    train_losses, train_objs = [], []

    forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0

    with tqdm.tqdm(dataset) as tqdm_loader:
        for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader):
            forward_start_time = time.time()
            features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data
            n = len(covariance_mat)
            Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG
            predictions = model(features.float())[:,0]
            loss = loss_fn(predictions, labels)
            Q = covariance_model() * (1 - REG) + torch.eye(n) * REG  # TODO

            if evaluate:
                forward_time += time.time() - forward_start_time
                inference_start_time = time.time()

                p = predictions
                L = sqrtm(Q) # torch.cholesky(Q)
                # =============== solving QP using qpth ================
                if solver == 'qpth':
                    G = -torch.eye(n)
                    h = torch.zeros(n)
                    A = torch.ones(1,n)
                    b = torch.ones(1)
                    qp_solver = qpth.qp.QPFunction()
                    x = qp_solver(alpha * Q, -p, G, h, A, b)[0]
                # =============== solving QP using CVXPY ===============
                elif solver == 'cvxpy':
                    x_var = cp.Variable(n)
                    L_para = cp.Parameter((n,n))
                    p_para = cp.Parameter(n)
                    constraints = [x_var >= 0, x_var <= 1, cp.sum(x_var) == 1]
                    objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ x_var) + p_para.T @ x_var)
                    problem = cp.Problem(objective, constraints)

                    cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para], variables=[x_var])
                    x, = cvxpylayer(L, -p)

                obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x

                inference_time += time.time() - inference_start_time
                # ======= opt ===
                # p_opt = labels
                # L_opt = torch.cholesky(Q_real)
                # x_opt, = cvxpylayer(L_opt, p_opt)
                # opt = labels @ x_opt - 0.5 * alpha * x.t() @ Q_real @ x
                # print('obj:', obj, 'opt:', opt)
            else:
                obj = torch.Tensor([0])

            # ====================== back-prop =====================
            optimizer.zero_grad()
            backward_start_time = time.time()
            try:
                if training_method == 'two-stage':
                    Q_loss = torch.norm(Q - Q_real)
                    (loss + Q_loss).backward()
                elif training_method == 'decision-focused':
                    (-obj).backward()
                    # (-obj + loss).backward() # TODO
                    for parameter in model.parameters():
                        parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM)
                    for parameter in covariance_model.parameters():
                        parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM)
                else:
                    raise ValueError('Not implemented method')
            except:
                print("no grad is backpropagated...")
                pass
            optimizer.step()
            backward_time += time.time() - backward_start_time

            train_losses.append(loss.item())
            train_objs.append(obj.item())
            tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%') 

    average_loss    = np.mean(train_losses)
    average_obj     = np.mean(train_objs)
    return average_loss, average_obj, (forward_time, inference_time, qp_time, backward_time)