Exemplo n.º 1
0
    def transform(self, X_dict):
        X = []
        for i, x in enumerate(X_dict):
            real_period = x['period'] / x['div_period']
            x_new = [x['magnitude_b'], x['magnitude_r'], real_period,
                     x['asym_b'], x['asym_r'], x['log_p_not_variable'],
                     x['sigma_flux_b'], x['sigma_flux_r'],
                     x['quality'], x['div_period'] ]

            for color in ['r', 'b']:
                unfold_sample(x, color=color)
                x_train = x['phase_' + color]
                y_train = x['light_points_' + color]
                y_sigma = x['error_points_' + color]

                num_bins = 64
                bins = np.linspace(0, 1, num_bins + 1)

                model = Earth(penalty=0.3,
                              max_terms=10,
                              thresh=0,
                              smooth=True,
                              check_every=5,
                              max_degree=10)
                x_train, y_train  = binify(bins, x_train, y_train)

                time_points_ = np.concatenate((x_train - 1.,
                                                x_train,
                                                x_train + 1.), axis=0)
                light_points_ = np.concatenate((y_train,
                                                y_train,
                                                y_train), axis=0)

                model.fit(time_points_[:, np.newaxis], light_points_)

                t = np.arange(-1., 2., 0.01)
                y=model.predict(t)
                i_max = y.argmax()

                t_ = t
                y_ = np.concatenate( (y[i_max:], y[0:i_max]), axis=0 )

                x_new.append(t[i_max])
                amplitude = max(y_) - min(y_)
                x_new.append(amplitude)
                y_ /= amplitude

                #plt.plot(time_points_, light_points_, c='red')
                #plt.plot(t_, y_, c='green')

                #plt.show()

                for p in y_:
                    x_new.append(p)

            X.append(x_new)
        return np.array(X)
Exemplo n.º 2
0
def test_gradient_boosting_estimator_with_smooth_quantile_loss():
    np.random.seed(0)
    m = 15000
    n = 10
    p = .8
    X = np.random.normal(size=(m,n))
    beta = np.random.normal(size=n)
    mu = np.dot(X, beta)
    y = np.random.lognormal(mu)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.33333333333333)
    loss_function = SmoothQuantileLossFunction(1, p, .0001)
    q_loss = QuantileLossFunction(1, p)
    model = Booster(BaggingRegressor(Earth(max_degree=2, verbose=False, use_fast=True, max_terms=10)), 
                                      loss_function, n_estimators=150, 
                                      stopper=stop_after_n_iterations_without_percent_improvement_over_threshold(3, .01), verbose=True)
    assert_raises(NotFittedError, lambda : model.predict(X_train))
    
    model.fit(X_train, y_train)
    
    prediction = model.predict(X_test)
    model2 = GradientBoostingRegressor(loss='quantile', alpha=p)
    model2.fit(X_train, y_train)
    prediction2 = model2.predict(X_test)
    assert_less(q_loss(y_test, prediction), q_loss(y_test, prediction2))
    assert_greater(r2_score(y_test,prediction), r2_score(y_test,prediction2))
    q = np.mean(y_test <= prediction)
    assert_less(np.abs(q-p), .05)
    assert_greater(model.score_, 0.)
    assert_approx_equal(model.score(X_train, y_train), model.score_)
Exemplo n.º 3
0
 def fit(self, X, y):
     self.iso_ = IsotonicRegression(y_min=self.y_min, y_max=self.y_max).fit(X,y)
     n = self.iso_.X_.shape[0]
     last = self.iso_.y_[0]
     current_sum = 0.0
     current_count = 0
     i = 0
     X_ = []
     y_ = []
     w_ = []
     while True:
         current = self.iso_.y_[i]
         if current != last:
             X_.append(current_sum / float(current_count))
             y_.append(last)
             w_.append(float(current_count))
             current_sum = 0.0
             current_count = 0
             last = current
         current_sum += self.iso_.X_[i]
         current_count += 1
         i += 1
         if i >= n:
             break
     self.X_ = numpy.array(X_)
     self.y_ = numpy.array(y_)
     self.w_ = numpy.array(w_)
     self.spline_ = Earth(**self.kwargs).fit(self.X_, self.y_, sample_weight=self.w_)
     return self
Exemplo n.º 4
0
def mars_tune(max_degree, penalty):
    # Combine Earth with LogisticRegression in a pipeline to do classification
    clf = Pipeline([('earth', Earth(max_degree=int(max_degree), penalty=penalty)),
                             ('logistic', LogisticRegression())])

    clf.fit(x0, y0)
    ll = auc(y1, clf.predict_proba(x1)[:,1])
    return ll
Exemplo n.º 5
0
def test_gradient_boosting_estimator_with_binomial_deviance_loss():
    np.random.seed(0)
    X, y = make_classification(n_classes=2)
    loss_function = BinomialDeviance(2)
    model = Booster(Earth(max_degree=2, use_fast=True, max_terms=10), loss_function)
    model.fit(X, y)
    assert_greater(np.sum(model.predict(X)==y) / float(y.shape[0]), .90)
    assert_true(np.all(0<=model.predict_proba(X)))
    assert_true(np.all(1>=model.predict_proba(X)))
Exemplo n.º 6
0
def test_sklearn2code_export():
    np.random.seed(0)
    X, y = make_classification(n_classes=2)
    X = DataFrame(X, columns=['x%d' % i for i in range(X.shape[1])])
    loss_function = BinomialDeviance(2)
    model = Booster(Earth(max_degree=2, use_fast=True, max_terms=10), loss_function)
    model.fit(X, y)
    code = sklearn2code(model, ['predict', 'predict_proba', 'transform'], numpy_flat)
    module = exec_module('test_module', code)
    assert_correct_exported_module(model, module, ['predict', 'predict_proba', 'transform'], dict(X=X), X)
Exemplo n.º 7
0
 def fit(self, X, y):
     if self.window_size is None:
         window_size = len(X) / 100
     else:
         window_size = self.window_size
         
     order = numpy.argsort(X)
     y_ = moving_average(y[order], window_size)
     x_ = X[order][int(window_size)/2 - 1:-int(window_size)/2]
     self.spline_ = Earth(**self.kwargs).fit(x_, y_)
     return self
Exemplo n.º 8
0
def test_with_response_transformation():
    X, y = load_boston(return_X_y=True)

    log_y = np.log(y)

    X = pandas.DataFrame(X, columns=['x%d' % i for i in range(X.shape[1])])
    y = pandas.DataFrame(y, columns=['y'])

    transformer = VariableTransformer(dict(y=Log(Identity('y'))))
    model = ResponseTransformingEstimator(Earth(), transformer)
    model.fit(X, y)
    log_y_pred = model.predict(X)
    assert r2_score(log_y, log_y_pred) > .8
    assert r2_score(y, log_y_pred) < .1
Exemplo n.º 9
0
class SmoothIso(BaseEstimator, RegressorMixin):
    def __init__(self, y_min=None, y_max=None, **kwargs):
        self.y_min = y_min
        self.y_max = y_max
        self.kwargs = kwargs
        
    def fit(self, X, y):
        self.iso_ = IsotonicRegression(y_min=self.y_min, y_max=self.y_max).fit(X,y)
        n = self.iso_.X_.shape[0]
        last = self.iso_.y_[0]
        current_sum = 0.0
        current_count = 0
        i = 0
        X_ = []
        y_ = []
        w_ = []
        while True:
            current = self.iso_.y_[i]
            if current != last:
                X_.append(current_sum / float(current_count))
                y_.append(last)
                w_.append(float(current_count))
                current_sum = 0.0
                current_count = 0
                last = current
            current_sum += self.iso_.X_[i]
            current_count += 1
            i += 1
            if i >= n:
                break
        self.X_ = numpy.array(X_)
        self.y_ = numpy.array(y_)
        self.w_ = numpy.array(w_)
        self.spline_ = Earth(**self.kwargs).fit(self.X_, self.y_, sample_weight=self.w_)
        return self
    
    def predict(self, X):
        return self.spline_.predict(X)
    
    def transform(self, X):
        return self.predict(X)
Exemplo n.º 10
0
class SmoothMovingAverage(BaseEstimator, RegressorMixin):
    def __init__(self, window_size=None, **kwargs):
        self.window_size = window_size
        self.kwargs = kwargs
    
    def fit(self, X, y):
        if self.window_size is None:
            window_size = len(X) / 100
        else:
            window_size = self.window_size
            
        order = numpy.argsort(X)
        y_ = moving_average(y[order], window_size)
        x_ = X[order][int(window_size)/2 - 1:-int(window_size)/2]
        self.spline_ = Earth(**self.kwargs).fit(x_, y_)
        return self
    
    def predict(self, X):
        return self.spline_.predict(X)
    
    def transform(self, X):
        return self.predict(X)
Exemplo n.º 11
0
        train.drop('activity_id', axis=1, inplace=True)
        train.drop('outcome', axis=1, inplace=True)

        test = pd.read_csv(projPath + 'input/xtest_ds_' + dataset_version +
                           '.csv')
        id_test = test.activity_id
        test.drop('activity_id', axis=1, inplace=True)

        # folds
        xfolds = pd.read_csv(projPath + 'input/5-fold.csv')

        ## model
        # setup model instances
        # Combine Earth with LogisticRegression in a pipeline to do classification
        earth_classifier1 = Pipeline([('earth',
                                       Earth(max_degree=1, penalty=.005)),
                                      ('logistic', LogisticRegression())])
        # Combine Earth with LogisticRegression in a pipeline to do classification
        earth_classifier2 = Pipeline([('earth', Earth(max_degree=4,
                                                      penalty=7)),
                                      ('logistic', LogisticRegression())])

        stacker = BinaryStackingClassifier(
            [earth_classifier1, earth_classifier2],
            xfolds=xfolds,
            evaluation=auc)
        stacker.fit(train, y_train)

        meta = stacker.meta_train
        meta['activity_id'] = id_train
        meta['outcome'] = y_train
Exemplo n.º 12
0
from sklearn.naive_bayes import GaussianNB
from sklearn.lda import LDA
from sklearn.qda import QDA

from sklearn.linear_model.logistic import LogisticRegression
from sklearn.pipeline import Pipeline
from pyearth.earth import Earth

print(__doc__)

h = .02  # step size in the mesh

np.random.seed(1)

# Combine Earth with LogisticRegression in a pipeline to do classification
earth_classifier = Pipeline([('earth', Earth(max_degree=3, penalty=1.5)),
                             ('logistic', LogisticRegression())])

names = [
    "Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree",
    "Random Forest", "Naive Bayes", "LDA", "QDA", "Earth"
]
classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025, probability=True),
    SVC(gamma=2, C=1, probability=True),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    GaussianNB(),
    LDA(),
    QDA(), earth_classifier
Exemplo n.º 13
0
    missing = np.random.binomial(1, .1, size=X.shape)
    X[missing] = np.nan
    X = DataFrame(X, columns=['x%d' % i for i in range(n)])
    return (dict(X=X, y=y), dict(X=X), dict(X=X))


def create_boston_housing():
    X, y = load_boston(return_X_y=True)
    X = DataFrame(X, columns=['x%d' % i for i in range(X.shape[1])])
    return (dict(X=X, y=y), dict(X=X), dict(X=X))


test_cases = [
    (VotingClassifier([('logistic', LogisticRegression()),
                       ('earth',
                        Pipeline([('earth', Earth()),
                                  ('logistic', LogisticRegression())]))],
                      'hard',
                      weights=[1.01, 1.01]), ['predict'],
     create_weird_classification_problem_1()),
    (GradientBoostingClassifier(max_depth=10,
                                n_estimators=10), ['predict_proba', 'predict'],
     create_weird_classification_problem_1()),
    (LogisticRegression(), ['predict_proba', 'predict'],
     create_weird_classification_problem_1()),
    (IsotonicRegression(out_of_bounds='clip'), ['predict'],
     create_isotonic_regression_problem_1()),
    (Earth(), ['predict', 'transform'], create_regression_problem_1()),
    (Earth(allow_missing=True), ['predict', 'transform'],
     create_regression_problem_with_missingness_1()),
    (ElasticNet(), ['predict'], create_regression_problem_1()),
Exemplo n.º 14
0
from sklearn.datasets.base import load_boston
from pyearth.earth import Earth
from pandas import DataFrame
from sklearn2code.sklearn2code import sklearn2code
from sklearn2code.languages import numpy_flat
from sklearn2code.utility import exec_module
from numpy.testing.utils import assert_array_almost_equal
from yapf.yapflib.yapf_api import FormatCode

# Load a data set.
boston = load_boston()
X = DataFrame(boston['data'], columns=boston['feature_names'])
y = boston['target']

# Fit a py-earth model.
model = Earth(max_degree=2).fit(X, y)

# Generate code from the py-earth model.
code = sklearn2code(model, ['predict'], numpy_flat)

# Execute the generated code in its own module.
boston_housing_module = exec_module('boston_housing_module', code)

# Confirm that the generated module produces output identical
# to the fitted model's predict method.
assert_array_almost_equal(model.predict(X), boston_housing_module.predict(**X))

# Print the generated code (using yapf for formatting).
print(FormatCode(code, style_config='pep8')[0])