def _hp_search(self, input_df, validation_df, metric, recipe, mc, resources_per_trial, remote_dir): ft = TimeSequenceFeatureTransformer(self.future_seq_len, self.dt_col, self.target_col, self.extra_features_col, self.drop_missing) if isinstance(input_df, list): feature_list = ft.get_feature_list(input_df[0]) else: feature_list = ft.get_feature_list(input_df) # model = VanillaLSTM(check_optional_config=False) model = TimeSequenceModel(check_optional_config=False, future_seq_len=self.future_seq_len) # prepare parameters for search engine search_space = recipe.search_space(feature_list) runtime_params = recipe.runtime_params() num_samples = runtime_params['num_samples'] stop = dict(runtime_params) search_algorithm_params = recipe.search_algorithm_params() search_algorithm = recipe.search_algorithm() fixed_params = recipe.fixed_params() del stop['num_samples'] searcher = RayTuneSearchEngine(logs_dir=self.logs_dir, resources_per_trial=resources_per_trial, name=self.name, remote_dir=remote_dir, ) searcher.compile(input_df, search_space=search_space, stop=stop, search_algorithm_params=search_algorithm_params, search_algorithm=search_algorithm, fixed_params=fixed_params, # feature_transformers=TimeSequenceFeatures, feature_transformers=ft, # model=model, future_seq_len=self.future_seq_len, validation_df=validation_df, metric=metric, mc=mc, num_samples=num_samples) # searcher.test_run() searcher.run() best = searcher.get_best_trials(k=1)[0] # get the best one trial, later could be n pipeline = self._make_pipeline(best, feature_transformers=ft, model=model, remote_dir=remote_dir) return pipeline
def _hp_search(self, input_df, validation_df, metric): # features # feature_list = ["WEEKDAY(datetime)", "HOUR(datetime)", # "PERCENTILE(value)", "IS_WEEKEND(datetime)", # "IS_AWAKE(datetime)", "IS_BUSY_HOURS(datetime)" # # "DAY(datetime)","MONTH(datetime)", #probabaly not useful # ] # target_list = ["value"] # ft = TimeSequenceFeatures(self.future_seq_len, self.dt_col, self.target_col, self.extra_features_col) # ft = DummyTimeSequenceFeatures(file_path='../../../../data/nyc_taxi_rolled_split.npz') ft = TimeSequenceFeatureTransformer(self.future_seq_len, self.dt_col, self.target_col, self.extra_features_col, self.drop_missing) feature_list = ft.get_feature_list(input_df) # model model = VanillaLSTM(check_optional_config=False, future_seq_len=self.future_seq_len) search_space = { # -------- feature related parameters "selected_features": RandomSample(lambda spec: np.random.choice( feature_list, size=np.random.randint(low=3, high=len(feature_list), size=1), replace=False)), # --------- model related parameters # 'input_shape_x': x_train.shape[1], # 'input_shape_y': x_train.shape[-1], 'out_units': self.future_seq_len, "lr": 0.001, "lstm_1_units": GridSearch([16, 32]), "dropout_1": 0.2, "lstm_2_units": 10, "dropout_2": RandomSample(lambda spec: np.random.uniform(0.2, 0.5)), "batch_size": 1024, } stop = {"reward_metric": -0.05, "training_iteration": 10} searcher = RayTuneSearchEngine(logs_dir=self.logs_dir, ray_num_cpus=6, resources_per_trial={"cpu": 2}) searcher.compile( input_df, search_space=search_space, stop=stop, # feature_transformers=TimeSequenceFeatures, feature_transformers=ft, # use dummy features for testing the rest model=model, validation_df=validation_df, metric=metric) # searcher.test_run() trials = searcher.run() best = searcher.get_best_trials( k=1)[0] # get the best one trial, later could be n pipeline = self._make_pipeline( best, feature_transformers=ft, # feature_transformers=TimeSequenceFeatures( # file_path='../../../../data/nyc_taxi_rolled_split.npz'), model=VanillaLSTM(check_optional_config=False)) return pipeline