def test_linear_fit(): from statsmodels.regression.linear_model import GLS, OLS earth = Earth(**default_params) earth.fit(X, y) earth.linear_fit(X, y) soln = OLS(y, earth.transform(X)).fit().params assert_almost_equal(numpy.mean((earth.coef_ - soln)**2), 0.0) sample_weight = 1.0 / (numpy.random.normal(size=y.shape)**2) earth.fit(X, y) earth.linear_fit(X, y, sample_weight) soln = GLS(y, earth.transform(X), 1.0 / sample_weight).fit().params assert_almost_equal(numpy.mean((earth.coef_ - soln)**2), 0.0)
def test_linear_fit(): from statsmodels.regression.linear_model import GLS, OLS earth = Earth(**default_params) earth.fit(X, y) earth._Earth__linear_fit(X, y) soln = OLS(y, earth.transform(X)).fit().params assert_almost_equal(numpy.mean((earth.coef_ - soln) ** 2), 0.0) sample_weight = 1.0 / (numpy.random.normal(size=y.shape) ** 2) earth.fit(X, y) earth._Earth__linear_fit(X, y, sample_weight) soln = GLS(y, earth.transform( X), 1.0 / sample_weight).fit().params assert_almost_equal(numpy.mean((earth.coef_ - soln) ** 2), 0.0)
def test_exhaustive_search(): model = Earth(max_terms=13, enable_pruning=False, check_every=1, thresh=0, minspan=1, endspan=1) model.fit(X, y) assert_equal(model.basis_.plen(), model.coef_.shape[1]) assert_equal(model.transform(X).shape[1], len(model.basis_))
class TestEarth(object): def __init__(self): numpy.random.seed(0) self.basis = Basis(10) constant = ConstantBasisFunction() self.basis.append(constant) bf1 = HingeBasisFunction(constant, 0.1, 10, 1, False, 'x1') bf2 = HingeBasisFunction(constant, 0.1, 10, 1, True, 'x1') bf3 = LinearBasisFunction(bf1, 2, 'x2') self.basis.append(bf1) self.basis.append(bf2) self.basis.append(bf3) self.X = numpy.random.normal(size=(100, 10)) self.B = numpy.empty(shape=(100, 4), dtype=numpy.float64) self.basis.transform(self.X, self.B) self.beta = numpy.random.normal(size=4) self.y = numpy.empty(shape=100, dtype=numpy.float64) self.y[:] = numpy.dot( self.B, self.beta) + numpy.random.normal(size=100) self.earth = Earth(penalty=1) def test_get_params(self): assert_equal( Earth().get_params(), {'penalty': None, 'min_search_points': None, 'endspan_alpha': None, 'check_every': None, 'max_terms': None, 'max_degree': None, 'minspan_alpha': None, 'thresh': None, 'minspan': None, 'endspan': None, 'allow_linear': None, 'smooth': None}) assert_equal( Earth( max_degree=3).get_params(), {'penalty': None, 'min_search_points': None, 'endspan_alpha': None, 'check_every': None, 'max_terms': None, 'max_degree': 3, 'minspan_alpha': None, 'thresh': None, 'minspan': None, 'endspan': None, 'allow_linear': None, 'smooth': None}) @if_statsmodels def test_linear_fit(self): from statsmodels.regression.linear_model import GLS, OLS self.earth.fit(self.X, self.y) self.earth._Earth__linear_fit(self.X, self.y) soln = OLS(self.y, self.earth.transform(self.X)).fit().params assert_almost_equal(numpy.mean((self.earth.coef_ - soln) ** 2), 0.0) sample_weight = 1.0 / (numpy.random.normal(size=self.y.shape) ** 2) self.earth.fit(self.X, self.y) self.earth._Earth__linear_fit(self.X, self.y, sample_weight) soln = GLS(self.y, self.earth.transform( self.X), 1.0 / sample_weight).fit().params assert_almost_equal(numpy.mean((self.earth.coef_ - soln) ** 2), 0.0) def test_sample_weight(self): group = numpy.random.binomial(1, .5, size=1000) == 1 sample_weight = 1 / (group * 100 + 1.0) x = numpy.random.uniform(-10, 10, size=1000) y = numpy.abs(x) y[group] = numpy.abs(x[group] - 5) y += numpy.random.normal(0, 1, size=1000) model = Earth().fit(x, y, sample_weight=sample_weight) # Check that the model fits better for the more heavily weighted group assert_true(model.score(x[group], y[group]) < model.score( x[numpy.logical_not(group)], y[numpy.logical_not(group)])) # Make sure that the score function gives the same answer as the trace pruning_trace = model.pruning_trace() rsq_trace = pruning_trace.rsq(model.pruning_trace().get_selected()) assert_almost_equal(model.score(x, y, sample_weight=sample_weight), rsq_trace) # Uncomment below to see what this test situation looks like # from matplotlib import pyplot # print model.summary() # print model.score(x,y,sample_weight = sample_weight) # pyplot.figure() # pyplot.plot(x,y,'b.') # pyplot.plot(x,model.predict(x),'r.') # pyplot.show() def test_fit(self): self.earth.fit(self.X, self.y) res = str(self.earth.trace()) + '\n' + self.earth.summary() # fl.write(res) filename = os.path.join(os.path.dirname(__file__), 'earth_regress.txt') with open(filename, 'r') as fl: prev = fl.read() assert_equal(res, prev) def test_smooth(self): model = Earth(penalty=1, smooth=True) model.fit(self.X, self.y) res = str(model.trace()) + '\n' + model.summary() filename = os.path.join(os.path.dirname(__file__), 'earth_regress_smooth.txt') with open(filename, 'r') as fl: prev = fl.read() assert_equal(res, prev) def test_linvars(self): self.earth.fit(self.X, self.y, linvars=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) res = str(self.earth.trace()) + '\n' + self.earth.summary() filename = os.path.join(os.path.dirname(__file__), 'earth_linvars_regress.txt') with open(filename, 'r') as fl: prev = fl.read() assert_equal(res, prev) def test_score(self): model = self.earth.fit(self.X, self.y) record = model.pruning_trace() rsq = record.rsq(record.get_selected()) assert_almost_equal(rsq, model.score(self.X, self.y)) @if_pandas @if_environ_has('test_pathological_cases') def test_pathological_cases(self): import pandas directory = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'pathological_data') cases = {'issue_44': {}, 'issue_50': {'penalty': 0.5, 'minspan': 1, 'allow_linear': False, 'endspan': 1, 'check_every': 1, 'sample_weight': 'issue_50_weight.csv'}} for case, settings in cases.iteritems(): data = pandas.read_csv(os.path.join(directory, case + '.csv')) y = data['y'] del data['y'] X = data if 'sample_weight' in settings: filename = os.path.join(directory, settings['sample_weight']) sample_weight = pandas.read_csv(filename)['sample_weight'] del settings['sample_weight'] else: sample_weight = None model = Earth(**settings) model.fit(X, y, sample_weight=sample_weight) with open(os.path.join(directory, case + '.txt'), 'r') as infile: correct = infile.read() assert_equal(model.summary(), correct) @if_pandas def test_pandas_compatibility(self): import pandas X = pandas.DataFrame(self.X) y = pandas.DataFrame(self.y) colnames = ['xx' + str(i) for i in range(X.shape[1])] X.columns = colnames model = self.earth.fit(X, y) assert_list_equal( colnames, model.forward_trace()._getstate()['xlabels']) @if_patsy @if_pandas def test_patsy_compatibility(self): import pandas import patsy X = pandas.DataFrame(self.X) y = pandas.DataFrame(self.y) colnames = ['xx' + str(i) for i in range(X.shape[1])] X.columns = colnames X['y'] = y y, X = patsy.dmatrices( 'y ~ xx0 + xx1 + xx2 + xx3 + xx4 + xx5 + xx6 + xx7 + xx8 + xx9 - 1', data=X) model = self.earth.fit(X, y) assert_list_equal( colnames, model.forward_trace()._getstate()['xlabels']) def test_pickle_compatibility(self): model = self.earth.fit(self.X, self.y) model_copy = pickle.loads(pickle.dumps(model)) assert_true(model_copy == model) assert_true( numpy.all(model.predict(self.X) == model_copy.predict(self.X))) assert_true(model.basis_[0] is model.basis_[1]._get_root()) assert_true(model_copy.basis_[0] is model_copy.basis_[1]._get_root()) def test_copy_compatibility(self): model = self.earth.fit(self.X, self.y) model_copy = copy.copy(model) assert_true(model_copy == model) assert_true( numpy.all(model.predict(self.X) == model_copy.predict(self.X))) assert_true(model.basis_[0] is model.basis_[1]._get_root()) assert_true(model_copy.basis_[0] is model_copy.basis_[1]._get_root())