def _hp_search(self, input_df, validation_df, metric, recipe, mc,
                   resources_per_trial, remote_dir):

        ft = TimeSequenceFeatureTransformer(self.future_seq_len, self.dt_col,
                                            self.target_col,
                                            self.extra_features_col,
                                            self.drop_missing)
        if isinstance(input_df, list):
            feature_list = ft.get_feature_list(input_df[0])
        else:
            feature_list = ft.get_feature_list(input_df)

        # model = VanillaLSTM(check_optional_config=False)
        model = TimeSequenceModel(check_optional_config=False,
                                  future_seq_len=self.future_seq_len)

        # prepare parameters for search engine
        search_space = recipe.search_space(feature_list)
        runtime_params = recipe.runtime_params()
        num_samples = runtime_params['num_samples']
        stop = dict(runtime_params)
        search_algorithm_params = recipe.search_algorithm_params()
        search_algorithm = recipe.search_algorithm()
        fixed_params = recipe.fixed_params()
        del stop['num_samples']

        metric_mode = TimeSequencePredictor._get_metric_mode(metric)
        searcher = RayTuneSearchEngine(
            logs_dir=self.logs_dir,
            resources_per_trial=resources_per_trial,
            name=self.name,
            remote_dir=remote_dir,
        )
        searcher.compile(input_df,
                         search_space=search_space,
                         stop=stop,
                         search_algorithm_params=search_algorithm_params,
                         search_algorithm=search_algorithm,
                         fixed_params=fixed_params,
                         feature_transformers=ft,
                         future_seq_len=self.future_seq_len,
                         validation_df=validation_df,
                         metric=metric,
                         metric_mode=metric_mode,
                         mc=mc,
                         num_samples=num_samples)
        # searcher.test_run()
        analysis = searcher.run()

        pipeline = self._make_pipeline(analysis,
                                       metric_mode,
                                       feature_transformers=ft,
                                       model=model,
                                       remote_dir=remote_dir)
        return pipeline
Exemplo n.º 2
0
    def _hp_search(
        self,
        input_df,
        validation_df,
        config,
        metric,
        recipe,
        mc,
        resources_per_trial,
        remote_dir,
    ):
        def model_create_func():
            _model = XGBoost(model_type=self.model_type, config=config)
            if "cpu" in resources_per_trial:
                _model.set_params(n_jobs=resources_per_trial.get("cpu"))
            return _model

        model = model_create_func()
        ft = IdentityTransformer(self.feature_cols, self.target_col)

        # prepare parameters for search engine
        search_space = recipe.search_space(None)

        from zoo.automl.regression.time_sequence_predictor import TimeSequencePredictor
        metric_mode = TimeSequencePredictor._get_metric_mode(metric)
        searcher = RayTuneSearchEngine(
            logs_dir=self.logs_dir,
            resources_per_trial=resources_per_trial,
            name=self.name,
            remote_dir=remote_dir,
        )
        searcher.compile(
            input_df,
            model_create_func=model_create_func(),
            search_space=search_space,
            recipe=recipe,
            feature_transformers=ft,
            validation_df=validation_df,
            metric=metric,
            metric_mode=metric_mode,
            mc=mc,
        )
        # searcher.test_run()
        analysis = searcher.run()

        pipeline = self._make_pipeline(analysis,
                                       metric_mode,
                                       feature_transformers=ft,
                                       model=model,
                                       remote_dir=remote_dir)
        return pipeline
    def _hp_search(self, input_df, validation_df, config, metric, recipe, mc,
                   resources_per_trial, remote_dir):
        def model_create_func():
            model = XGBoostRegressor(config)
            model.set_params(n_jobs=resources_per_trial)
            return model

        model = model_create_func()
        ft = IdentityTransformer(self.feature_cols, self.target_col)

        # prepare parameters for search engine
        search_space = recipe.search_space(None)
        runtime_params = recipe.runtime_params()
        num_samples = runtime_params['num_samples']
        stop = dict(runtime_params)
        search_algorithm_params = recipe.search_algorithm_params()
        search_algorithm = recipe.search_algorithm()
        fixed_params = recipe.fixed_params()
        del stop['num_samples']

        from zoo.automl.regression.time_sequence_predictor import TimeSequencePredictor
        metric_mode = TimeSequencePredictor._get_metric_mode(metric)
        searcher = RayTuneSearchEngine(
            logs_dir=self.logs_dir,
            resources_per_trial=resources_per_trial,
            name=self.name,
            remote_dir=remote_dir,
        )
        searcher.compile(input_df,
                         model_create_func=model_create_func(),
                         search_space=search_space,
                         stop=stop,
                         search_algorithm_params=search_algorithm_params,
                         search_algorithm=search_algorithm,
                         fixed_params=fixed_params,
                         feature_transformers=ft,
                         validation_df=validation_df,
                         metric=metric,
                         metric_mode=metric_mode,
                         mc=mc,
                         num_samples=num_samples)
        # searcher.test_run()
        analysis = searcher.run()

        pipeline = self._make_pipeline(analysis,
                                       metric_mode,
                                       feature_transformers=ft,
                                       model=model,
                                       remote_dir=remote_dir)
        return pipeline
    def _hp_search(self, input_df, validation_df, metric, recipe, mc,
                   resources_per_trial, remote_dir):
        ft = TimeSequenceFeatureTransformer(self.future_seq_len, self.dt_col,
                                            self.target_col,
                                            self.extra_features_col,
                                            self.drop_missing)
        if isinstance(input_df, list):
            feature_list = ft.get_feature_list(input_df[0])
        else:
            feature_list = ft.get_feature_list(input_df)

        def model_create_func():
            # model = VanillaLSTM(check_optional_config=False)
            _model = TimeSequenceModel(check_optional_config=False,
                                       future_seq_len=self.future_seq_len)
            return _model

        model = model_create_func()

        # prepare parameters for search engine
        search_space = recipe.search_space(feature_list)

        metric_mode = TimeSequencePredictor._get_metric_mode(metric)
        searcher = RayTuneSearchEngine(
            logs_dir=self.logs_dir,
            resources_per_trial=resources_per_trial,
            name=self.name,
            remote_dir=remote_dir,
        )
        searcher.compile(
            input_df,
            model_create_func=model_create_func(),
            search_space=search_space,
            recipe=recipe,
            feature_transformers=ft,
            future_seq_len=self.future_seq_len,
            validation_df=validation_df,
            metric=metric,
            metric_mode=metric_mode,
            mc=mc,
        )
        # searcher.test_run()
        analysis = searcher.run()

        pipeline = self._make_pipeline(analysis,
                                       metric_mode,
                                       feature_transformers=ft,
                                       model=model,
                                       remote_dir=remote_dir)
        return pipeline
Exemplo n.º 5
0
    def _hp_search(self, input_df, validation_df, metric):
        # features
        # feature_list = ["WEEKDAY(datetime)", "HOUR(datetime)",
        #                "PERCENTILE(value)", "IS_WEEKEND(datetime)",
        #                "IS_AWAKE(datetime)", "IS_BUSY_HOURS(datetime)"
        #                # "DAY(datetime)","MONTH(datetime)", #probabaly not useful
        #                ]
        # target_list = ["value"]
        # ft = TimeSequenceFeatures(self.future_seq_len, self.dt_col, self.target_col, self.extra_features_col)

        # ft = DummyTimeSequenceFeatures(file_path='../../../../data/nyc_taxi_rolled_split.npz')
        ft = TimeSequenceFeatureTransformer(self.future_seq_len, self.dt_col,
                                            self.target_col,
                                            self.extra_features_col,
                                            self.drop_missing)

        feature_list = ft.get_feature_list(input_df)
        # model
        model = VanillaLSTM(check_optional_config=False,
                            future_seq_len=self.future_seq_len)

        search_space = {
            # -------- feature related parameters
            "selected_features":
            RandomSample(lambda spec: np.random.choice(
                feature_list,
                size=np.random.randint(low=3, high=len(feature_list), size=1),
                replace=False)),

            # --------- model related parameters
            # 'input_shape_x': x_train.shape[1],
            # 'input_shape_y': x_train.shape[-1],
            'out_units':
            self.future_seq_len,
            "lr":
            0.001,
            "lstm_1_units":
            GridSearch([16, 32]),
            "dropout_1":
            0.2,
            "lstm_2_units":
            10,
            "dropout_2":
            RandomSample(lambda spec: np.random.uniform(0.2, 0.5)),
            "batch_size":
            1024,
        }

        stop = {"reward_metric": -0.05, "training_iteration": 10}

        searcher = RayTuneSearchEngine(logs_dir=self.logs_dir,
                                       ray_num_cpus=6,
                                       resources_per_trial={"cpu": 2})
        searcher.compile(
            input_df,
            search_space=search_space,
            stop=stop,
            # feature_transformers=TimeSequenceFeatures,
            feature_transformers=ft,  # use dummy features for testing the rest
            model=model,
            validation_df=validation_df,
            metric=metric)
        # searcher.test_run()

        trials = searcher.run()
        best = searcher.get_best_trials(
            k=1)[0]  # get the best one trial, later could be n
        pipeline = self._make_pipeline(
            best,
            feature_transformers=ft,
            # feature_transformers=TimeSequenceFeatures(
            #     file_path='../../../../data/nyc_taxi_rolled_split.npz'),
            model=VanillaLSTM(check_optional_config=False))
        return pipeline