def train(self, matrix_a, matrix_b): """ If cross validation is set to True, it performs generalized cross validation. (Hastie, Tibshirani and Friedman, Second edition, page 244). """ if not self._crossvalidation: return Linalg.ridge_regression(matrix_a, matrix_b, self._param, self._intercept)[0] else: min_err_param = 0 min_err = np.Inf gcv_err = np.Inf N = matrix_a.shape[0] for param in self._param_range: mat_x, S_trace, err1 = Linalg.ridge_regression(matrix_a, matrix_b, param, self._intercept) nom = pow(1 - S_trace / N, 2) * N if nom != 0: gcv_err = (err1 * err1) / nom if gcv_err < min_err: min_err = gcv_err min_err_param = param #print "lambda:", min_err_param return Linalg.ridge_regression(matrix_a, matrix_b, min_err_param, self._intercept)[0]
def train(self, matrix_a, matrix_b): """ If cross validation is set to True, it performs generalized cross validation. (Hastie, Tibshirani and Friedman, Second edition, page 244). """ if not self._crossvalidation: return Linalg.ridge_regression(matrix_a, matrix_b, self._param, self._intercept)[0] else: min_err_param = 0 min_err = np.Inf gcv_err = np.Inf N = matrix_a.shape[0] for param in self._param_range: mat_x, S_trace, err1 = Linalg.ridge_regression(matrix_a, matrix_b, param, self._intercept) nom = pow(1 - S_trace / N, 2) * N if nom != 0: gcv_err = (err1 * err1) / nom if gcv_err < min_err: min_err = gcv_err min_err_param = param #print "lambda:", min_err_param return Linalg.ridge_regression(matrix_a, matrix_b, min_err_param, self._intercept)[0]
def train(self, matrix_a, matrix_b=None): """ If cross validation is set to True, it performs generalized cross validation. (Hastie, Tibshirani and Friedman, Second edition, page 244). TODO: not yet! """ pid = str(getpid()) logdir = 'convergence/'+pid call(['mkdir', '-p', logdir]) # mkdir -p convergence/pid self._trainId += 1 # For logging purposes if not self._crossvalidation: if self._projector: matrix_b = matrix_a W, costs = Linalg.tracenorm_regression(matrix_a, matrix_b, self._param, self._iterations, self._intercept) print_tuple_list(costs, logdir+'/'+str(self._trainId)+'-lmbd-'+str(self._param)) return W elif matrix_b == None: raise ValueError("Unable to perform cross-validation without a phrase space") else: min_err_param = 0 min_err = np.Inf gcv_err = np.Inf N = matrix_a.shape[0] for param in self._param_range: mat_x, S_trace, err1 = Linalg.ridge_regression(matrix_a, matrix_b, param, self._intercept) nom = pow(1 - S_trace / N, 2) * N if nom != 0: gcv_err = (err1 * err1) / nom if gcv_err < min_err: min_err = gcv_err min_err_param = param #print "lambda:", min_err_param return Linalg.ridge_regression(matrix_a, matrix_b, min_err_param, self._intercept)[0]
def test_sparse_ridge_regression(self): test_cases = self.pinv_test_cases for m, m_inv in test_cases: m1 = SparseMatrix(m) id_ = SparseMatrix.identity(m1.shape[0]) res1 = Linalg.lstsq_regression(m1, id_) np.testing.assert_array_almost_equal(res1.mat.todense(), m_inv, 7) res2 = Linalg.ridge_regression(m1, id_, 1)[0] error1 = (m1 * res1 - SparseMatrix(m_inv)).norm() error2 = (m1 * res2 - SparseMatrix(m_inv)).norm() #print "err", error1, error2 norm1 = error1 + res1.norm() norm2 = error2 + res2.norm() #print "norm", norm1, norm2 #THIS SHOULD HOLD, MAYBE ROUNDIGN ERROR? #self.assertGreaterEqual(error2, error1) self.assertGreaterEqual(norm1, norm2)
def test_sparse_ridge_regression(self): test_cases = self.pinv_test_cases for m, m_inv in test_cases: m1 = SparseMatrix(m) id_ = SparseMatrix.identity(m1.shape[0]) res1 = Linalg.lstsq_regression(m1, id_) np.testing.assert_array_almost_equal(res1.mat.todense(), m_inv, 7) res2 = Linalg.ridge_regression(m1, id_, 1)[0] error1 = (m1 * res1 - SparseMatrix(m_inv)).norm() error2 = (m1 * res2 - SparseMatrix(m_inv)).norm() #print "err", error1, error2 norm1 = error1 + res1.norm() norm2 = error2 + res2.norm() #print "norm", norm1, norm2 #THIS SHOULD HOLD, MAYBE ROUNDIGN ERROR? #self.assertGreaterEqual(error2, error1) self.assertGreaterEqual(norm1, norm2)