def test_online_batch_consistent(): # Batch batch = ( preprocessing.StandardScaler() | multiclass.OneVsRestClassifier( linear_model.LogisticRegression() ) ) dataset = datasets.ImageSegments() batch_metric = metrics.MacroF1() for i, x in enumerate(pd.read_csv(dataset.path, chunksize=1)): y = x.pop('category') y_pred = batch.predict_many(x) batch.fit_many(x, y) for yt, yp in zip(y, y_pred): if yp is not None: batch_metric.update(yt, yp) if i == 30: break # Online online = ( preprocessing.StandardScaler() | multiclass.OneVsRestClassifier( linear_model.LogisticRegression() ) ) online_metric = metrics.MacroF1() X = pd.read_csv(dataset.path) Y = X.pop('category') for i, (x, y) in enumerate(stream.iter_pandas(X, Y)): y_pred = online.predict_one(x) online.fit_one(x, y) if y_pred is not None: online_metric.update(y, y_pred) if i == 30: break assert online_metric.get() == batch_metric.get()
def build_oracle(self) -> compose.Pipeline: model = compose.Pipeline( ('scale', preprocessing.StandardScaler()), ('learn', multiclass.OneVsRestClassifier( binary_classifier=linear_model.LogisticRegression()))) return model
def test_phishing(client, app): r = client.post('/api/init', json={'flavor': 'binary'}) model = preprocessing.StandardScaler() | linear_model.LogisticRegression() client.post('/api/model', data=pickle.dumps(model)) for i, (x, y) in enumerate(datasets.Phishing().take(30)): # Predict/learn via chantilly r = client.post('/api/predict', data=json.dumps({ 'id': i, 'features': x }), content_type='application/json') client.post('/api/learn', data=json.dumps({ 'id': i, 'ground_truth': y }), content_type='application/json') # Predict/learn directly via creme y_pred = model.predict_proba_one(x) model.fit_one(x, y) # Compare the predictions from both sides assert math.isclose(y_pred[True], r.json['prediction']['true'])
def test_phishing_without_id(client, app): r = client.post('/api/init', json={'flavor': 'binary'}) model = preprocessing.StandardScaler() | linear_model.LogisticRegression() client.post('/api/model', data=pickle.dumps(model)) for x, y in datasets.Phishing().take(30): # Predict/learn via chantilly r = client.post('/api/predict', data=json.dumps({'features': x}), content_type='application/json') client.post('/api/learn', data=json.dumps({ 'features': x, 'ground_truth': y }), content_type='application/json') # Predict/learn directly via creme y_pred = model.predict_proba_one(x) # Because no ID is provided, chantilly will ask the model to make a prediction a second # time in order to update the metric model.predict_proba_one(x) model.fit_one(x, y) # Compare the predictions from both sides assert math.isclose(y_pred[True], r.json['prediction']['true'])
def test_log_reg_sklearn_coherence(): """Checks that the sklearn and creme implementations produce the same results.""" ss = preprocessing.StandardScaler() cr = lm.LogisticRegression(optimizer=optim.SGD(.01)) sk = sklm.SGDClassifier(learning_rate='constant', eta0=.01, alpha=.0, loss='log') for x, y in datasets.Bananas(): x = ss.fit_one(x).transform_one(x) cr.fit_one(x, y) sk.partial_fit([list(x.values())], [y], classes=[False, True]) for i, w in enumerate(cr.weights.values()): assert math.isclose(w, sk.coef_[0][i]) assert math.isclose(cr.intercept, sk.intercept_[0])
def _default_params(cls): return { 'classifier': linear_model.LogisticRegression(), 'code_size': 6 }
def get_all_estimators(): ignored = (CremeBaseWrapper, SKLBaseWrapper, base.Wrapper, compose.FuncTransformer, ensemble.StackingBinaryClassifier, feature_extraction.Agg, feature_extraction.TargetAgg, feature_extraction.Differ, linear_model.FMRegressor, linear_model.SoftmaxRegression, multioutput.ClassifierChain, multioutput.RegressorChain, naive_bayes.BernoulliNB, naive_bayes.ComplementNB, preprocessing.OneHotEncoder, tree.DecisionTreeClassifier) def is_estimator(obj): return inspect.isclass(obj) and issubclass(obj, base.Estimator) for submodule in importlib.import_module('creme').__all__: if submodule == 'base': continue for name, obj in inspect.getmembers( importlib.import_module(f'creme.{submodule}'), is_estimator): if issubclass(obj, ignored): continue if issubclass(obj, dummy.StatisticRegressor): inst = obj(statistic=stats.Mean()) elif issubclass(obj, ensemble.BaggingClassifier): inst = obj(linear_model.LogisticRegression()) elif issubclass(obj, ensemble.BaggingRegressor): inst = obj(linear_model.LinearRegression()) elif issubclass(obj, ensemble.HedgeRegressor): inst = obj([ preprocessing.StandardScaler() | linear_model.LinearRegression(intercept_lr=0.1), preprocessing.StandardScaler() | linear_model.PARegressor(), ]) elif issubclass(obj, feature_selection.RandomDiscarder): inst = obj(n_to_keep=5) elif issubclass(obj, feature_selection.SelectKBest): inst = obj(similarity=stats.PearsonCorrelation()) elif issubclass(obj, linear_model.LinearRegression): inst = preprocessing.StandardScaler() | obj(intercept_lr=0.1) elif issubclass(obj, linear_model.PARegressor): inst = preprocessing.StandardScaler() | obj() elif issubclass(obj, multiclass.OneVsRestClassifier): inst = obj(binary_classifier=linear_model.LogisticRegression()) else: inst = obj() yield inst
inst = preprocessing.StandardScaler() | obj() elif issubclass(obj, multiclass.OneVsRestClassifier): inst = obj(binary_classifier=linear_model.LogisticRegression()) else: inst = obj() yield inst @pytest.mark.parametrize('estimator', [ pytest.param(copy.deepcopy(estimator), id=str(estimator)) for estimator in list(get_all_estimators()) + [ feature_extraction.TFIDF(), linear_model.LogisticRegression(), preprocessing.StandardScaler() | linear_model.LinearRegression(), preprocessing.StandardScaler() | linear_model.PAClassifier(), preprocessing.StandardScaler() | multiclass.OneVsRestClassifier(linear_model.LogisticRegression()), preprocessing.StandardScaler() | multiclass.OneVsRestClassifier(linear_model.PAClassifier()), naive_bayes.GaussianNB(), preprocessing.StandardScaler(), cluster.KMeans(n_clusters=5, seed=42), preprocessing.MinMaxScaler(), preprocessing.MinMaxScaler() + preprocessing.StandardScaler(), preprocessing.PolynomialExtender(), feature_selection.VarianceThreshold(), feature_selection.SelectKBest(similarity=stats.PearsonCorrelation()) ] ]) def test_check_estimator(estimator):
def _default_params(cls): return {'model': linear_model.LogisticRegression()}
] @pytest.mark.parametrize('estimator', [ pytest.param(copy.deepcopy(estimator), id=str(estimator)) for estimator in ESTIMATORS ]) def test_sklearn_check_estimator(estimator): estimator_checks.check_estimator( compat.convert_creme_to_sklearn(estimator)) @pytest.mark.parametrize( 'estimator', [ pytest.param(copy.deepcopy(estimator), id=str(estimator)) for estimator in ESTIMATORS + [ # sklearn's check_estimator doesn't binary classifiers yet linear_model.LogisticRegression(), # sklearn's check_estimator doesn't support pipelines yet preprocessing.StandardScaler() | linear_model.LinearRegression(), preprocessing.StandardScaler() | linear_model.PAClassifier(), preprocessing.StandardScaler() | multiclass.OneVsRestClassifier( linear_model.LogisticRegression()), preprocessing.StandardScaler() | multiclass.OneVsRestClassifier(linear_model.PAClassifier()), ] ]) def test_creme_check_estimator(estimator): utils.check_estimator(estimator)
def main(): benchmark.benchmark( get_X_y=datasets.fetch_electricity, n=45312, get_pp=preprocessing.StandardScaler, models=[ # ('No-change', 'No-change', dummy.NoChangeClassifier()), ('creme', 'Logistic regression', linear_model.LogisticRegression(optimizer=optim.VanillaSGD(0.05), l2=0, intercept_lr=0.05)), # ('creme', 'PA-I', linear_model.PAClassifier(C=1, mode=1)), # ('creme', 'PA-II', linear_model.PAClassifier(C=1, mode=2)), ('sklearn', 'Logistic regression', compat.CremeClassifierWrapper( sklearn_estimator=sk_linear_model.SGDClassifier( loss='log', learning_rate='constant', eta0=0.05, penalty='none'), classes=[False, True])), # ('sklearn', 'PA-I', compat.CremeClassifierWrapper( # sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier( # C=1, # loss='hinge' # ), # classes=[False, True] # )), # ('sklearn', 'PA-II', compat.CremeClassifierWrapper( # sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier( # C=1, # loss='squared_hinge' # ), # classes=[False, True] # )), # ('sklearn', 'Logistic regression NI', compat.CremeClassifierWrapper( # sklearn_estimator=sk_linear_model.SGDClassifier( # loss='log', # learning_rate='constant', # eta0=0.01, # fit_intercept=True, # penalty='none' # ), # classes=[False, True] # )), # ('sklearn', 'PA-I NI', compat.CremeClassifierWrapper( # sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier( # C=1, # loss='hinge', # fit_intercept=False # ), # classes=[False, True] # )), # ('sklearn', 'PA-II NI', compat.CremeClassifierWrapper( # sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier( # C=1, # loss='squared_hinge', # fit_intercept=False # ), # classes=[False, True] # )), ], get_metric=metrics.Accuracy)
preprocessing.StandardScaler(), compat.CremeClassifierWrapper( sklearn_estimator=PassiveAggressiveClassifier(), classes=[False, True] ) ]), 'No-change classifier': dummy.NoChangeClassifier(), 'Passive-aggressive II': compose.Pipeline([ preprocessing.StandardScaler(), linear_model.PAClassifier(C=1, mode=2) ]), 'Logistic regression w/ VanillaSGD': compose.Pipeline([ preprocessing.StandardScaler(), linear_model.LogisticRegression( optimizer=optim.VanillaSGD( lr=optim.OptimalLR() ) ) ]), 'Logistic regression w/ Adam': compose.Pipeline([ preprocessing.StandardScaler(), linear_model.LogisticRegression(optim.Adam(optim.OptimalLR())) ]), 'Logistic regression w/ AdaGrad': compose.Pipeline([ preprocessing.StandardScaler(), linear_model.LogisticRegression(optim.AdaGrad(optim.OptimalLR())) ]), 'Logistic regression w/ RMSProp': compose.Pipeline([ preprocessing.StandardScaler(), linear_model.LogisticRegression(optim.RMSProp(optim.OptimalLR())) ])
elif (opt == "Adam"): optimizer = optim.Adam(lr, beta_1, beta_2, eps) elif (opt == "FTRLProximal"): optimizer = optim.FTRLProximal(alpha, beta, l1, l2) elif (opt == "Momentum"): optimizer = optim.Momentum(lr, rho) elif (opt == "RMSProp"): optimizer = optim.RMSProp(lr, rho, eps) elif (opt == "VanillaSGD"): optimizer = optim.VanillaSGD(lr) elif (opt == "NesterovMomentum"): optimizer = optim.NesterovMomentum(lr, rho) else: optimizer = None log_reg = linear_model.LogisticRegression(optimizer, l2=l2) OVRClassifier = multiclass.OneVsRestClassifier(log_reg) output = {} while True: #wait request data = input() Xi = json.loads(data) y = float(Xi.pop(target)) output["Predict"] = OVRClassifier.predict_one(Xi) output["Truth"] = y
def main(): benchmark.benchmark( get_X_y=functools.partial(stream.iter_sklearn_dataset, datasets.load_breast_cancer()), n=569, get_pp=preprocessing.StandardScaler, models=[ ('creme', 'Log reg', linear_model.LogisticRegression( optimizer=optim.VanillaSGD(0.01), l2=0, intercept_lr=0.01 )), ('sklearn', 'SGD', compat.CremeClassifierWrapper( sklearn_estimator=sk_linear_model.SGDClassifier( loss='log', learning_rate='constant', eta0=0.01, penalty='none' ), classes=[False, True] )), ('creme', 'PA-I', linear_model.PAClassifier( C=0.01, mode=1, fit_intercept=True )), ('sklearn', 'PA-I', compat.CremeClassifierWrapper( sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier( C=0.01, loss='hinge', fit_intercept=True ), classes=[False, True] )), # ('creme', 'PA-I', linear_model.PAClassifier(C=1, mode=1)), # ('creme', 'PA-II', linear_model.PAClassifier(C=1, mode=2)), # ('sklearn', 'PA-I', compat.CremeClassifierWrapper( # sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier( # C=1, # loss='hinge' # ), # classes=[False, True] # )), # ('sklearn', 'PA-II', compat.CremeClassifierWrapper( # sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier( # C=1, # loss='squared_hinge' # ), # classes=[False, True] # )), # ('sklearn', 'Logistic regression NI', compat.CremeClassifierWrapper( # sklearn_estimator=sk_linear_model.SGDClassifier( # loss='log', # learning_rate='constant', # eta0=0.01, # fit_intercept=True, # penalty='none' # ), # classes=[False, True] # )), # ('sklearn', 'PA-I NI', compat.CremeClassifierWrapper( # sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier( # C=1, # loss='hinge', # fit_intercept=False # ), # classes=[False, True] # )), # ('sklearn', 'PA-II NI', compat.CremeClassifierWrapper( # sklearn_estimator=sk_linear_model.PassiveAggressiveClassifier( # C=1, # loss='squared_hinge', # fit_intercept=False # ), # classes=[False, True] # )), ], get_metric=metrics.Accuracy )