def fit(self, data, args): self.model = MaxAbsScaler() with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval
def test_maxabs_scaler_large_negative_value(): """Check MaxAbsScaler on toy data with a large negative value""" X = [[0., 1., +0.5, -1.0], [0., 1., -0.3, -0.5], [0., 1., -100.0, 0.0], [0., 0., +0.0, -2.0]] scaler = MaxAbsScaler() X_trans = scaler.fit_transform(X) X_expected = [[0., 1., 0.005, -0.5], [0., 1., -0.003, -0.25], [0., 1., -1.0, 0.0], [0., 0., 0.0, -1.0]] assert_array_almost_equal(X_trans, X_expected)
class MaxAbsScalerImpl(): def __init__(self, copy=True): self._hyperparams = {'copy': copy} self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X)
def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self
def test_maxabs_scaler_zero_variance_features(): """Check MaxAbsScaler on toy data with zero variance features""" X = [[0., 1., +0.5], [0., 1., -0.3], [0., 1., +1.5], [0., 0., +0.0]] scaler = MaxAbsScaler() X_trans = scaler.fit_transform(X) X_expected = [[0., 1., 1.0 / 3.0], [0., 1., -0.2], [0., 1., 1.0], [0., 0., 0.0]] assert_array_almost_equal(X_trans, X_expected) X_trans_inv = scaler.inverse_transform(X_trans) assert_array_almost_equal(X, X_trans_inv) # make sure new data gets transformed correctly X_new = [[+0., 2., 0.5], [-1., 1., 0.0], [+0., 1., 1.5]] X_trans_new = scaler.transform(X_new) X_expected_new = [[+0., 2.0, 1.0 / 3.0], [-1., 1.0, 0.0], [+0., 1.0, 1.0]] assert_array_almost_equal(X_trans_new, X_expected_new, decimal=2) # sparse data X_csr = sparse.csr_matrix(X) X_trans = scaler.fit_transform(X_csr) X_expected = [[0., 1., 1.0 / 3.0], [0., 1., -0.2], [0., 1., 1.0], [0., 0., 0.0]] assert_array_almost_equal(X_trans.A, X_expected) X_trans_inv = scaler.inverse_transform(X_trans) assert_array_almost_equal(X, X_trans_inv.A)
def test_warning_scaling_integers(): # Check warning when scaling integer data X = np.array([[1, 2, 0], [0, 0, 0]], dtype=np.uint8) w = "Data with input dtype uint8 was converted to float64" clean_warning_registry() assert_warns_message(DataConversionWarning, w, scale, X) assert_warns_message(DataConversionWarning, w, StandardScaler().fit, X) assert_warns_message(DataConversionWarning, w, MinMaxScaler().fit, X) assert_warns_message(DataConversionWarning, w, MaxAbsScaler().fit, X)
class CreateMaxAbsScaler(CreateModel): def fit(self, data, args): self.model = MaxAbsScaler() with Timer() as t: self.model.fit(data.X_train, data.y_train) return t.interval def test(self, data): assert self.model is not None return self.model.transform(data.X_test) def predict(self, data): with Timer() as t: self.predictions = self.test(data) data.learning_task = LearningTask.REGRESSION return t.interval
def make_models(X, y, y_bin): return dict(ols=LinearRegression().fit(X, y), lr_bin=LogisticRegression().fit(X, y_bin), lr_ovr=LogisticRegression(multi_class='ovr').fit(X, y), lr_mn=LogisticRegression(solver='lbfgs', multi_class='multinomial').fit(X, y), svc=SVC(kernel='linear').fit(X, y_bin), svr=SVR(kernel='linear').fit(X, y), dtc=DecisionTreeClassifier(max_depth=4).fit(X, y), dtr=DecisionTreeRegressor(max_depth=4).fit(X, y), rfc=RandomForestClassifier(n_estimators=3, max_depth=3, random_state=1).fit(X, y), rfr=RandomForestRegressor(n_estimators=3, max_depth=3, random_state=1).fit(X, y), gbc=GradientBoostingClassifier(n_estimators=3, max_depth=3, random_state=1).fit(X, y), gbr=GradientBoostingRegressor(n_estimators=3, max_depth=3, random_state=1).fit(X, y), abc=AdaBoostClassifier(algorithm='SAMME', n_estimators=3, random_state=1).fit(X, y), abc2=AdaBoostClassifier(algorithm='SAMME.R', n_estimators=3, random_state=1).fit(X, y), abc3=AdaBoostClassifier(algorithm='SAMME', n_estimators=3, random_state=1).fit(X, y_bin), abc4=AdaBoostClassifier(algorithm='SAMME.R', n_estimators=3, random_state=1).fit(X, y_bin), km=KMeans(1).fit(X), km2=KMeans(5).fit(X), pc1=PCA(1).fit(X), pc2=PCA(2).fit(X), pc3=PCA(2, whiten=True).fit(X), mlr1=MLPRegressor([2], 'relu').fit(X, y), mlr2=MLPRegressor([2, 1], 'tanh').fit(X, y), mlr3=MLPRegressor([2, 2, 2], 'identity').fit(X, y), mlc=MLPClassifier([2, 2], 'tanh').fit(X, y), mlc_bin=MLPClassifier([2, 2], 'identity').fit(X, y_bin), bin=Binarizer(0.5), mms=MinMaxScaler().fit(X), mas=MaxAbsScaler().fit(X), ss1=StandardScaler().fit(X), ss2=StandardScaler(with_mean=False).fit(X), ss3=StandardScaler(with_std=False).fit(X), n1=Normalizer('l1'), n2=Normalizer('l2'), n3=Normalizer('max'))
('w2v_title_content', Word2VecTitleContent()), ('sw', StopWordsCount()), ('sw_title', StopWordsTitle()), ('pmi', PMI()), # ('lda', LDAVectorContent()), ('CountingWords', CountingWords()), # ('readability', ReadabilityFeatures()), ('typos', TyposCount()), ('english', EnglishInTitle()), ('pos', POSFeatures()) # ('fastext_sup', FastTextSupervised()), # ('fast_text', FastTextAverageContentVector()), # ('dicts', Dicts()), # ('wmd', WMDDistance()), ])), ('scaler', MaxAbsScaler()), ('clf', LinearSVC(random_state=42)) ]) # grid_search = GridSearchCV(pipe, tuned_parameters, cv=5, # scoring='accuracy', verbose=1, n_jobs=-1) # grid_search.fit(train, train['click_bait_score']) # print(grid_search.best_params_) print("training...") pipe.fit_transform(train, train['click_bait_score']) print("testing...") score = pipe.score(test, test['click_bait_score']) print(score)
'LassoLars':LassoLars(), 'LassoLarsCV':LassoLarsCV(), 'LassoLarsIC':LassoLarsIC(), 'LatentDirichletAllocation':LatentDirichletAllocation(), 'LedoitWolf':LedoitWolf(), 'LinearDiscriminantAnalysis':LinearDiscriminantAnalysis(), 'LinearRegression':LinearRegression(), 'LinearSVC':LinearSVC(), 'LinearSVR':LinearSVR(), 'LocallyLinearEmbedding':LocallyLinearEmbedding(), 'LogisticRegression':LogisticRegression(), 'LogisticRegressionCV':LogisticRegressionCV(), 'MDS':MDS(), 'MLPClassifier':MLPClassifier(), 'MLPRegressor':MLPRegressor(), 'MaxAbsScaler':MaxAbsScaler(), 'MeanShift':MeanShift(), 'MinCovDet':MinCovDet(), 'MinMaxScaler':MinMaxScaler(), 'MiniBatchDictionaryLearning':MiniBatchDictionaryLearning(), 'MiniBatchKMeans':MiniBatchKMeans(), 'MiniBatchSparsePCA':MiniBatchSparsePCA(), 'MultiTaskElasticNet':MultiTaskElasticNet(), 'MultiTaskElasticNetCV':MultiTaskElasticNetCV(), 'MultiTaskLasso':MultiTaskLasso(), 'MultiTaskLassoCV':MultiTaskLassoCV(), 'MultinomialNB':MultinomialNB(), 'NMF':NMF(), 'NearestCentroid':NearestCentroid(), 'NearestNeighbors':NearestNeighbors(), 'Normalizer':Normalizer(),
def __init__(self, copy=True): self._hyperparams = {'copy': copy} self._wrapped_model = SKLModel(**self._hyperparams)