def test_score_samples(self): X = self.iris['data'] y = self.iris['target'] param_grid = {'classifier__n_estimators': [100, 50]} steps = [('classifier', RandomForestClassifier(random_state=42))] pipe = Pipeline(steps).train() tmp = GridSearchCV( estimator=pipe, param_grid=param_grid, scoring='accuracy', verbose=1, ) tmp.fit(X, y)
def test_score_samples(self): #Apply transforms, and score_samples of the final estimator. pipe = Pipeline(self.kde_pipe).fit(self.data.df_devices) tmp = pipe.score_samples(self.data.df_devices) print(tmp)
def test_score(self): #Apply transforms, and score with the final estimator pipe = Pipeline(self.rf_pipe).fit(self.data.df_devices, self.data.df_activities) tmp = pipe.score(self.data.df_devices, self.data.df_activities) print(tmp)
def test_predict_proba(self): #Apply transforms, and predict_proba of the final estimator pipe = Pipeline(self.rf_pipe).fit(self.data.df_devices, self.data.df_activities) tmp = pipe.predict_proba(self.data.df_devices) print(tmp)
def test_get_params(self): #Get parameters for this estimator. pipe = Pipeline(self.rf_pipe) tmp = pipe.get_params()
def test_fit_transform(self): #Fit the model and transform with the final estimator pipe = Pipeline(self.rf_pipe) tmp = pipe.fit_transform(self.data.df_devices, self.data.df_activities)
def test_fit_predict(self): # Applies fit_predict of last step in pipeline after transforms. pipe = Pipeline(self.rf_pipe) tmp = pipe.fit_predict(self.data.df_devices, self.data.df_activities) print(tmp)
def test_fit(self): pipe = Pipeline(self.rf_pipe) tmp = pipe.fit(self.data.df_devices, self.data.df_activities)
split='leave_one_day_out') """ Example: Cross Validation """ ts = TimeSeriesSplit(n_splits=5) scores = [] # cross validation on train set for train_int, val_int in ts.split(X_train): steps = [('enc', BinaryEncoder(encode='raw')), ('lbl', TrainOrEvalOnlyWrapper(LabelEncoder(idle=True))), ('drop_val', TrainOnlyWrapper(CVSubset(train_int))), ('drop_train', EvalOnlyWrapper(CVSubset(val_int))), ('drop_time_idx', DropTimeIndex()), ('classifier', RandomForestClassifier(random_state=42))] pipe = Pipeline(steps).train() pipe.fit(X_train, y_train) # evaluate pipe = pipe.eval() scores.append(pipe.score(X_train, y_train)) print('scores of the pipeline: {}'.format(str(scores))) print('mean score: {:.3f}'.format(np.array(scores).mean())) """ Simple Example Gridsearch """ from pyadlml.model_selection import GridSearchCV param_grid = { 'encode_devices__encode': ['changepoint', 'raw', 'lastfired'],
('time_bin', TimeBinExtractor(one_hot_encoding=True)), ('pass_through', IdentityTransformer()) ]) #('time_diff', TimeDifferenceExtractor())] steps = [('encode_devices', BinaryEncoder()), ('fit_labels', TrainOrEvalOnlyWrapper(LabelEncoder())), ('feature_extraction', feature_extraction), ('select_train_set', TrainOnlyWrapper(CVSubset())), ('select_val_set', EvalOnlyWrapper(CVSubset())), ('drop_time_idx', DropTimeIndex()), ('drop_duplicates', TrainOnlyWrapper(DropDuplicates())), ('classifier', RandomForestClassifier(random_state=42))] cv = KFold(n_splits=5) pipe = Pipeline(steps).train() with open('my_estimator.html', 'w') as f: f.write(estimator_html_repr(pipe)) param_grid = { #'encode_devices__encode': ['changepoint', 'raw', 'lastfired'], 'encode_devices__encode': ['raw'], #'fit_labels__wr__idle': [True, False], #'feature_extraction__time_bin__resolution': ['2h', '1h'], #'feature_extraction__skip_day_of_week': [True, False] } tmp = steps[2][1] pipe = Pipeline(steps).train()