def _check_optimality_conditions(self,
                                     model_params,
                                     lambdas,
                                     opt_thres=1e-2):
        # sanity check function to see that cvxpy is solving to a good enough accuracy
        # check that the gradient is close to zero
        # can use this to check that our implicit derivative assumptions hold
        # lambdas must be an exploded lambda matrix
        print "check_optimality_conditions!"

        alpha = model_params["alpha"]
        beta = model_params["beta"]
        gamma = model_params["gamma"]

        u_hat, sigma_hat, v_hat = self._get_svd_mini(gamma)

        d_square_loss = -1.0 / self.num_train * np.multiply(
            self.train_vec,
            make_column_major_flat(self.data.observed_matrix -
                                   get_matrix_completion_fitted_values(
                                       self.data.row_features, self.data.
                                       col_features, alpha, beta, gamma)))

        left_grad_at_opt_gamma = (u_hat.T * make_column_major_reshape(
            d_square_loss, (self.data.num_rows, self.data.num_cols)) +
                                  lambdas[0] * np.sign(sigma_hat) * v_hat.T)
        right_grad_at_opt_gamma = (make_column_major_reshape(
            d_square_loss, (self.data.num_rows, self.data.num_cols)) * v_hat +
                                   lambdas[0] * u_hat * np.sign(sigma_hat))
        left_grad_norm = np.linalg.norm(left_grad_at_opt_gamma)
        right_grad_norm = np.linalg.norm(right_grad_at_opt_gamma)
        print "grad_at_opt wrt gamma (should be zero)", left_grad_norm, right_grad_norm
        assert (left_grad_norm < opt_thres)
        assert (right_grad_norm < opt_thres)

        print "alpha", alpha
        grad_at_opt_alpha = []
        for i in range(alpha.size):
            if np.abs(alpha[i]) > self.zero_thres:
                alpha_sign = np.sign(alpha[i])
                grad_at_opt_alpha.append(
                    (d_square_loss.T * make_column_major_flat(
                        self.data.row_features[:, i] * self.onesT_row) +
                     lambdas[1] * alpha_sign + lambdas[2] * alpha[i])[0, 0])
        print "grad_at_opt wrt alpha (should be zero)", grad_at_opt_alpha
        assert (np.all(np.abs(grad_at_opt_alpha) < opt_thres))

        print "beta", beta
        grad_at_opt_beta = []
        for i in range(beta.size):
            if np.abs(beta[i]) > self.zero_thres:
                beta_sign = np.sign(beta[i])
                grad_at_opt_beta.append(
                    (d_square_loss.T * make_column_major_flat(
                        (self.data.col_features[:, i] * self.onesT_col).T) +
                     lambdas[3] * beta_sign + lambdas[4] * beta[i])[0, 0])
        print "grad_at_opt wrt beta (should be zero)", grad_at_opt_beta
        assert (np.all(np.abs(grad_at_opt_beta) < opt_thres))
Exemplo n.º 2
0
 def _get_dsquare_loss():
     d_square_loss = -1.0 / self.num_train * make_column_major_flat(
         self.observed_matrix - get_matrix_completion_fitted_values(
             self.row_features,
             self.col_features,
             self.alpha_curr,
             self.beta_curr,
             self.gamma_curr,
         ))
     d_square_loss[self.non_train_mask_vec] = 0
     return d_square_loss
 def _get_d_square_loss(self, alpha, beta, gamma, row_features,
                        col_features):
     # get first derivative of the square loss wrt X = gamma + stuff
     d_square_loss = -1.0 / self.num_train * np.multiply(
         self.train_vec,
         make_column_major_flat(self.data.observed_matrix -
                                get_matrix_completion_fitted_values(
                                    row_features,
                                    col_features,
                                    alpha,
                                    beta,
                                    gamma,
                                )))
     return d_square_loss
    def _print_model_details(self):
        # overriding the function in Gradient_Descent_Algo
        alpha = self.fmodel.current_model_params["alpha"]
        beta = self.fmodel.current_model_params["beta"]
        gamma = self.fmodel.current_model_params["gamma"]
        u, s, v = self._get_svd_mini(gamma)
        self.log("model_deet alpha %s" % alpha)
        self.log("model_deet beta %s" % beta)
        self.log("model_deet sigma %s" % np.diag(s))

        # check that the matrices are similar - sanity check
        self.log("data.real_matrix row 1 %s" % self.data.real_matrix[1, :])
        fitted_m = get_matrix_completion_fitted_values(
            self.data.row_features, self.data.col_features,
            self.fmodel.current_model_params["alpha"],
            self.fmodel.current_model_params["beta"],
            self.fmodel.current_model_params["gamma"])
        self.log("fitted_m row 1 %s" % fitted_m[1, :])
    def _get_val_gradient(self, grad_dict, alpha, beta, gamma, row_features,
                          col_features):
        # get gradient of the validation loss wrt lambda given the gradient of the
        # model parameters wrt lambda
        model_grad = grad_dict["dgamma_dlambda"]
        if alpha.size > 0:
            model_grad += row_features * grad_dict[
                "dalpha_dlambda"] * self.onesT_row
        if beta.size > 0:
            model_grad += (col_features * grad_dict["dbeta_dlambda"] *
                           self.onesT_col).T

        dval_dlambda = -1.0 / self.num_val * make_column_major_flat(
            self.data.observed_matrix - get_matrix_completion_fitted_values(
                row_features,
                col_features,
                alpha,
                beta,
                gamma,
            ))[self.data.validate_idx].T * make_column_major_flat(model_grad)[
                self.data.validate_idx]
        return dval_dlambda
Exemplo n.º 6
0
 def get_value(self, alpha, beta, gamma, given_nuc_norm=None):
     matrix_eval = make_column_major_flat(
         self.observed_matrix - get_matrix_completion_fitted_values(
             self.row_features,
             self.col_features,
             alpha,
             beta,
             gamma,
         ))
     square_loss = 0.5 / self.num_train * get_norm2(
         matrix_eval[self.train_idx],
         power=2,
     )
     if given_nuc_norm is None:
         nuc_norm = self.lambdas[0] * np.linalg.norm(gamma, ord="nuc")
     else:
         nuc_norm = self.lambdas[0] * given_nuc_norm
     alpha_norm1 = self.lambdas[1] * np.linalg.norm(alpha, ord=1)
     alpha_norm2 = 0.5 * self.lambdas[2] * get_norm2(alpha, power=2)
     beta_norm1 = self.lambdas[3] * np.linalg.norm(beta, ord=1)
     beta_norm2 = 0.5 * self.lambdas[4] * get_norm2(beta, power=2)
     return square_loss + nuc_norm + alpha_norm1 + alpha_norm2 + beta_norm1 + beta_norm2