def get_score_for_clf(self, clf, df_test, feature_name, pca=None): x_test = df_test[self.feature_columns_dict[feature_name]] x_test_pca = x_test[[col for col in x_test.columns if 'gdf' in col]] x_test = x_test[[col for col in x_test.columns if 'gdf' not in col]] if pca: x_test_pca = pca.transform(x_test_pca) for n in range(pca.n_components): x_test['pca_{}'.format(n)] = x_test_pca[:, n] y_test = df_test['mid_price_indicator'].values return model.test_model(clf, x_test, y_test)
def get_score_for_clf(self, clf, df_test, feature_name, pca=None): x_test = df_test[self.feature_columns_dict[feature_name]] if pca: x_test = pca.transform(x_test) y_test = df_test['mid_price_indicator'].values return model.test_model(clf, x_test, y_test)
def train_lstm(self, clf, feature_name='', should_validate=True, method=None, fit_kwargs=None, compile_kwargs=None, n_steps=None, plot_name=None, class_weight=None, should_return_model=False): logger.info('Training %s r=%s s=%s: clf=%s', self.stock, self.r, self.s, clf) train_x = self.df[self.feature_columns_dict[feature_name]].values test_x = self.df_test[self.feature_columns_dict[feature_name]].values train_y = self.df['mid_price_indicator'].values test_y = self.df_test['mid_price_indicator'].values pca = self.get_pca(feature_name) if pca: train_x = pca.transform(train_x) test_x = pca.transform(test_x) if n_steps: train_x, train_y = self.split_sequences(train_x, train_y, n_steps=n_steps) test_x, test_y = self.split_sequences(test_x, test_y, n_steps=n_steps) else: train_x = np.reshape(train_x, (train_x.shape[0], 1, train_x.shape[1])) test_x = np.reshape(test_x, (test_x.shape[0], 1, test_x.shape[1])) if should_validate: scores_arrays, m = model.validate_model_lstm( clf, train_x, train_y, fit_kwargs=fit_kwargs, compile_kwargs=compile_kwargs, plot_name=plot_name, class_weight=class_weight, print_debug=False) scores = self.get_mean_scores(scores_arrays) else: m = clf() scores = model.train_model(m, train_x, train_y, compile_kwargs=compile_kwargs, fit_kwargs=fit_kwargs, is_lstm=True, class_weight=class_weight) if not method: method = 'lstm' components_num = None if pca: components_num = pca.n_components_ res = { **scores, 'stock': self.stock, 'kernel': method, 'features': feature_name, 'pca_components': components_num } # m = clf() # model.train_model( # m, train_x, train_y, compile_kwargs=compile_kwargs, fit_kwargs=fit_kwargs, is_lstm=True, # class_weight=class_weight) test_scores = model.test_model(m, test_x, test_y, is_lstm=True) logger.info('Finished training %s %s', self.stock, { **res, **test_scores }) if should_return_model: return {**res, **test_scores, 'arch': m.to_json()}, m else: return {**res, **test_scores, 'arch': m.to_json()}
def get_score_for_clf(clf, df_test): x_test = df_test[['queue_imbalance']] y_test = df_test['mid_price_indicator'].values return model.test_model(clf, x_test, y_test)
def get_score_for_clf(self, clf, df_test: pd.DataFrame, feature_name: str) -> dict: x_test = df_test[self.feature_columns_dict[feature_name]] y_test = df_test['mid_price_indicator'].values return model.test_model(clf, x_test, y_test)