Python DeepARLearner примеры использования

Язык программирования: Python

Пространство имен/Пакет: deepar.model.learner

Класс/Тип: DeepARLearner

Примеров на hotexamples.com: 5

Python DeepARLearner - 5 примеров найдено. Это лучшие примеры Python кода для deepar.model.learner.DeepARLearner, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

DeepARLearner(4)

fit(3)

predict(3)

load_weights(2)

save_weights(1)

Пример #1

Показать файл

Файл: train.py Проект: jlgleason/deepar

def train(working_dir, dataset='56_sunspots', epochs=100, stopping_patience=3):
    # read in training df
    df = pd.read_csv(
        f'../../datasets/seed_datasets_current/{dataset}/TRAIN/dataset_TRAIN/tables/learningData.csv'
    )
    df = _multi_index_prep(df, dataset)
    df = _time_col_to_seconds(df, dataset)

    # create TimeSeries and Learner objects
    ds = _create_ts_object(df, dataset)
    learner = DeepARLearner(ds, verbose=1)

    # fit
    learner.fit(epochs=epochs,
                stopping_patience=stopping_patience,
                early_stopping=True,
                checkpoint_dir=os.path.join("./checkpoints", working_dir))

    # evaluate
    test_df = pd.read_csv(
        f'../../datasets/seed_datasets_current/{dataset}/TEST/dataset_TEST/tables/learningData.csv'
    )
    test_df = _multi_index_prep(test_df, dataset)
    test_df = _time_col_to_seconds(test_df, dataset)
    test_ds = _create_ts_test_object(test_df, ds, dataset)

    preds = learner.predict(test_ds,
                            horizon=None,
                            samples=1,
                            include_all_training=True).reshape(-1)

Пример #2

Показать файл

    def _create_data_object_and_learner(self, val_split):
        """ private util function:
            creates (or updates) train ds object and learner 

            Arguments:
                val_split {float} -- proportion of training data to withhold for validation

        """

        # Create TimeSeries dataset objects
        #logger.info(self._ts_frame.head())
        self._ts_object = TimeSeriesTrain(
            self._ts_frame,
            target_idx=self._target_column,
            timestamp_idx=self._timestamp_column,
            grouping_idx=self._grouping_column,
            index_col=self._index_column,
            count_data=self._count_data,
            negative_obs=self.hyperparams["negative_obs"],
            val_split=val_split,
            integer_timestamps=self._integer_timestamps,
            freq=self.freq,
        )
        #logger.info(self._ts_object.data.head())

        # Create learner
        self._learner = DeepARLearner(
            self._ts_object,
            emb_dim=self.hyperparams["emb_dim"],
            lstm_dim=self.hyperparams["lstm_dim"],
            dropout=self.hyperparams["dropout_rate"],
            lr=self.hyperparams["learning_rate"],
            batch_size=self.hyperparams["batch_size"],
            train_window=self.hyperparams["window_size"],
            verbose=0,
        )

        # save weights so we can restart fitting from scratch (if desired by caller)
        self._learner.save_weights("model_initial_weights.h5")

Пример #3

Показать файл

    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        """ Produce primitive's predictions for specific time series at specific future time instances
            * these specific timesteps / series are specified implicitly by input dataset

            Arguments:
                inputs {Inputs} -- full D3M dataframe, containing attributes, key, and target
            
            Keyword Arguments:
                timeout {float} -- timeout, not considered (default: {None})
                iterations {int} -- iterations, not considered (default: {None})

            Raises:
                PrimitiveNotFittedError: if primitive not fit
            
            Returns:
                CallResult[Outputs] -- (N, 2) dataframe with d3m_index and value for each prediction slice requested.
                    prediction slice = specific horizon idx for specific series in specific regression 
        """

        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        if len(self._drop_cols_no_tgt
               ) > 0 and inputs.shape[1] != self._cols_after_drop:
            test_frame = inputs.remove_columns(self._drop_cols_no_tgt)
        else:
            test_frame = inputs.copy()

        # Create TimeSeriesTest object
        if self._train_data.equals(inputs):
            ts_test_object = TimeSeriesTest(self._ts_object)
        # test
        else:
            ts_test_object = TimeSeriesTest(self._ts_object, test_frame)

        # get prediction slices
        pred_intervals = self._get_pred_intervals(test_frame)

        # make predictions with learner
        learner = DeepARLearner(
            self._ts_object,
            emb_dim=self.hyperparams["emb_dim"],
            lstm_dim=self.hyperparams["lstm_dim"],
            dropout=self.hyperparams["dropout_rate"],
            lr=self.hyperparams["learning_rate"],
            batch_size=self.hyperparams["batch_size"],
            train_window=self.hyperparams["window_size"],
            verbose=0,
        )
        learner.load_weights(self.hyperparams['weights_filepath'])
        start_time = time.time()
        logger.info(f"Making predictions...")
        preds = learner.predict(ts_test_object, include_all_training=True)
        logger.info(
            f"Prediction took {time.time() - start_time}s. Predictions array shape: {preds.shape}"
        )

        # slice predictions with learned intervals
        all_preds = []
        for p, idxs in zip(preds, pred_intervals.values):
            # all_preds.extend(p[: len(idxs)])  # this takes first n predictions
            all_preds.extend([p[i] for i in idxs
                              ])  # this takes predictions at actual indices
        flat_list = np.array([p for pred_list in all_preds for p in pred_list])

        # if np.isinf(all_preds).any():
        #     logger.debug(f'There are {np.isinf(all_preds).sum()} inf preds')
        # if np.isnan(all_preds).any():
        #     logger.debug(f'There are {np.isnan(all_preds).sum()} nan preds')
        # logger.debug(f'Max: {preds.max()}, Min: {preds.min()}')

        # fill nans with 0s in case model predicted some (shouldnt need to - preventing edge case)
        flat_list = np.nan_to_num(flat_list)

        # create output frame
        result_df = container.DataFrame(
            {self._ts_frame.columns[self._target_column]: flat_list},
            generate_metadata=True,
        )
        result_df.metadata = result_df.metadata.add_semantic_type(
            (metadata_base.ALL_ELEMENTS, 0),
            ("https://metadata.datadrivendiscovery.org/types/PredictedTarget"),
        )

        return CallResult(result_df, has_finished=self._is_fit)

Пример #4

Показать файл

class DeepArPrimitive(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params,
                                                     Hyperparams]):
    """
        Primitive that applies a deep autoregressive forecasting algorithm for time series
        prediction. The implementation is based off of this paper: https://arxiv.org/pdf/1704.04110.pdf
        and is implemented in AWS's Sagemaker interface.

        Training inputs: 1) Feature dataframe, 2) Target dataframe
        Outputs: Dataframe with predictions for specific time series at specific future time instances 
    
        Arguments:
            hyperparams {Hyperparams} -- D3M Hyperparameter object
        
        Keyword Arguments:
            random_seed {int} -- random seed (default: {0})
    """

    metadata = metadata_base.PrimitiveMetadata({
        # Simply an UUID generated once and fixed forever. Generated using "uuid.uuid4()".
        "id":
        "3410d709-0a13-4187-a1cb-159dd24b584b",
        "version":
        __version__,
        "name":
        "DeepAR",
        # Keywords do not have a controlled vocabulary. Authors can put here whatever they find suitable.
        "keywords": [
            "time series",
            "forecasting",
            "recurrent neural network",
            "autoregressive",
        ],
        "source": {
            "name":
            __author__,
            "contact":
            __contact__,
            "uris": [
                # Unstructured URIs.
                "https://github.com/Yonder-OSS/D3M-Primitives",
            ],
        },
        # A list of dependencies in order. These can be Python packages, system packages, or Docker images.
        # Of course Python packages can also have their own dependencies, but sometimes it is necessary to
        # install a Python package first to be even able to run setup.py of another package. Or you have
        # a dependency which is not on PyPi.
        "installation": [
            {
                "type": "PIP",
                "package": "cython",
                "version": "0.29.14"
            },
            {
                "type":
                metadata_base.PrimitiveInstallationType.PIP,
                "package_uri":
                "git+https://github.com/Yonder-OSS/D3M-Primitives.git@{git_commit}#egg=yonder-primitives"
                .format(git_commit=utils.current_git_commit(
                    os.path.dirname(__file__)), ),
            },
        ],
        # The same path the primitive is registered with entry points in setup.py.
        "python_path":
        "d3m.primitives.time_series_forecasting.lstm.DeepAR",
        # Choose these from a controlled vocabulary in the schema. If anything is missing which would
        # best describe the primitive, make a merge request.
        "algorithm_types": [
            metadata_base.PrimitiveAlgorithmType.RECURRENT_NEURAL_NETWORK,
        ],
        "primitive_family":
        metadata_base.PrimitiveFamily.TIME_SERIES_FORECASTING,
    })

    def __init__(self,
                 *,
                 hyperparams: Hyperparams,
                 random_seed: int = 0) -> None:
        super().__init__(hyperparams=hyperparams, random_seed=random_seed)

        # set seed for reproducibility
        tf.random.set_seed(random_seed)

        self._cols_after_drop = 0
        self._is_fit = False

    def get_params(self) -> Params:
        if not self._is_fit:
            return Params(drop_cols_no_tgt=None,
                          cols_after_drop=None,
                          train_data=None,
                          ts_frame=None,
                          target_column=None,
                          timestamp_column=None,
                          ts_object=None,
                          grouping_column=None,
                          output_columns=None,
                          min_train=None,
                          freq=None,
                          integer_timestamps=None,
                          is_fit=None)
        return Params(drop_cols_no_tgt=self._drop_cols_no_tgt,
                      cols_after_drop=self._cols_after_drop,
                      train_data=self._train_data,
                      ts_frame=self._ts_frame,
                      target_column=self._target_column,
                      timestamp_column=self._timestamp_column,
                      ts_object=self._ts_object,
                      grouping_column=self._grouping_column,
                      output_columns=self._output_columns,
                      min_train=self._min_train,
                      freq=self.freq,
                      integer_timestamps=self._integer_timestamps,
                      is_fit=self._is_fit)

    def set_params(self, *, params: Params) -> None:
        self._drop_cols_no_tgt = params['drop_cols_no_tgt']
        self._cols_after_drop = params['cols_after_drop']
        self._train_data = params['train_data']
        self._ts_frame = params['ts_frame']
        self._target_column = params['target_column']
        self._timestamp_column = params['timestamp_column']
        self._ts_object = params['ts_object']
        self._grouping_column = params['grouping_column']
        self._output_columns = params['output_columns']
        self._min_train = params['min_train']
        self.freq = params['freq']
        self._integer_timestamps = params['integer_timestamps']
        self._is_fit = params['is_fit']

    def _drop_multiple_special_cols(self, col_list, col_type):
        """
            private util function that creates list of duplicated special columns (for deletion)

            Arguments:
                col_list {List[int]} -- list of column indices 
                col_type {str} -- D3M semantic type

            Returns:
                int or None -- first column idx in col_list if any column idxs are marked (else None)
        """

        if len(col_list) == 0:
            return None
        elif len(col_list) > 1:
            logger.warn(
                f"""There are more than one {col_type} marked. This primitive will use the first and drop other {col_type}s."""
            )
            self._drop_cols += col_list[1:]
            if col_type != "target column":
                self._drop_cols_no_tgt += col_list[1:]
        return col_list[0]

    def _get_cols(self, input_metadata):
        """ private util function: get indices of important columns from metadata 

            Arguments:
                input_metadata {D3M Metadata object} -- D3M Metadata object for input frame

            Raises:
                ValueError: If Target column is not of type 'Integer' or 'Float'
        """

        self._drop_cols = []
        self._drop_cols_no_tgt = []

        # get target idx (first column by default)
        target_columns = input_metadata.list_columns_with_semantic_types((
            "https://metadata.datadrivendiscovery.org/types/SuggestedTarget",
            "https://metadata.datadrivendiscovery.org/types/TrueTarget",
            "https://metadata.datadrivendiscovery.org/types/Target",
        ))
        if len(target_columns) == 0:
            raise ValueError("At least one column must be marked as a target")
        self._target_column = self._drop_multiple_special_cols(
            target_columns, "target column")

        # get timestamp idx (first column by default)
        timestamp_columns = input_metadata.list_columns_with_semantic_types((
            "https://metadata.datadrivendiscovery.org/types/Time",
            "http://schema.org/DateTime",
        ))
        self._timestamp_column = self._drop_multiple_special_cols(
            timestamp_columns, "timestamp column")

        # get grouping idx and add suggested grouping keys to drop_cols list
        grouping_columns = input_metadata.list_columns_with_semantic_types(
            ("https://metadata.datadrivendiscovery.org/types/GroupingKey", ))
        self._grouping_column = self._drop_multiple_special_cols(
            grouping_columns, "grouping column")
        suggested_grouping_columns = input_metadata.list_columns_with_semantic_types(
            ("https://metadata.datadrivendiscovery.org/types/SuggestedGroupingKey",
             ))
        self._drop_cols += suggested_grouping_columns
        self._drop_cols_no_tgt += suggested_grouping_columns

        # get index_col (first index column by default)
        index_columns = input_metadata.list_columns_with_semantic_types(
            ("https://metadata.datadrivendiscovery.org/types/PrimaryKey", ))
        self._index_column = self._drop_multiple_special_cols(
            index_columns, "index column")

        # determine whether targets are count data
        target_semantic_types = input_metadata.query_column_field(
            self._target_column, "semantic_types")
        if self.hyperparams["count_data"] is not None:
            self._count_data = self.hyperparams["count_data"]
        elif "http://schema.org/Integer" in target_semantic_types:
            if np.min(self._ts_frame.iloc[:, self._target_column]) > 0:
                self._count_data = True
            else:
                self._count_data = False
        elif "http://schema.org/Float" in target_semantic_types:
            self._count_data = False
        else:
            raise ValueError(
                "Target column is not of type 'Integer' or 'Float'")

    def _update_indices(self):
        """ private util function: 
            subtract length of drop cols from each marked idx to account for smaller df 
        """

        length = len(self._drop_cols)
        if self._target_column is not None:
            self._target_column -= length
        if self._timestamp_column is not None:
            self._timestamp_column -= length
        if self._grouping_column is not None:
            self._grouping_column -= length
        if self._index_column is not None:
            self._index_column -= length
        self._cols_after_drop = self._ts_frame.shape[0]

    def _create_data_object_and_learner(self, val_split):
        """ private util function:
            creates (or updates) train ds object and learner 

            Arguments:
                val_split {float} -- proportion of training data to withhold for validation

        """

        # Create TimeSeries dataset objects
        self._ts_object = TimeSeriesTrain(
            self._ts_frame,
            target_idx=self._target_column,
            timestamp_idx=self._timestamp_column,
            grouping_idx=self._grouping_column,
            index_col=self._index_column,
            count_data=self._count_data,
            negative_obs=self.hyperparams["negative_obs"],
            val_split=val_split,
            integer_timestamps=self._integer_timestamps,
            freq=self.freq,
        )

        # Create learner
        self._learner = DeepARLearner(
            self._ts_object,
            emb_dim=self.hyperparams["emb_dim"],
            lstm_dim=self.hyperparams["lstm_dim"],
            dropout=self.hyperparams["dropout_rate"],
            lr=self.hyperparams["learning_rate"],
            batch_size=self.hyperparams["batch_size"],
            train_window=self.hyperparams["window_size"],
            verbose=0,
        )

        # save weights so we can restart fitting from scratch (if desired by caller)
        self._learner.save_weights(self.hyperparams['weights_filepath'])

    def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None:
        """ Sets primitive's training data
        
            Arguments:
                inputs {Inputs} -- D3M dataframe containing attributes
                outputs {Outputs} -- D3M dataframe containing targets
            
            Raises:
                ValueError: If multiple columns are annotated with 'Time' or 'DateTime' metadata
        """

        # save copy of train data so we don't predict for each row in training
        self._output_columns = outputs.columns
        self._train_data = inputs.copy()

        # combine inputs and outputs for internal TimeSeries object
        self._ts_frame = inputs.append_columns(outputs)

        # Parse cols needed for ts object
        self._get_cols(self._ts_frame.metadata)

        # drop cols if multiple special type columns
        if len(self._drop_cols) > 0:
            self._ts_frame = self._ts_frame.remove_columns(self._drop_cols)
            self._update_indices()

        # assumption is that integer timestamps are days (treated this way by DeepAR objects)
        if "http://schema.org/Integer" in self._ts_frame.metadata.query_column_field(
                self._timestamp_column, "semantic_types"):
            self._integer_timestamps = True
        else:
            self._integer_timestamps = False

        # calculate frequency of time series
        g_col, t_col = (
            self._ts_frame.columns[self._grouping_column],
            self._ts_frame.columns[self._timestamp_column],
        )
        if self._grouping_column is None:
            time_col_sorted = np.sort(self._ts_frame[t_col])
            self._min_train = time_col_sorted[0]
            self.freq = calculate_time_frequency(time_col_sorted[1] -
                                                 self._min_train)
            # self._train_diff = int(
            #     np.diff(np.sort(self._ts_frame.iloc[:, self._timestamp_column]))[0]
            # )
        else:
            # assume frequency is the same across all time series
            self.freq = calculate_time_frequency(
                int(
                    self._ts_frame.groupby(g_col)[t_col].apply(
                        lambda x: np.diff(np.sort(x))).iloc[0][0]))
            self._min_train = self._ts_frame.groupby(g_col)[t_col].agg(
                "min").min()

        # Create TimeSeries dataset object and learner
        self._create_data_object_and_learner(self.hyperparams["val_split"])

    def fit(self,
            *,
            timeout: float = None,
            iterations: int = None) -> CallResult[None]:
        """ Fits DeepAR model using training data from set_training_data and hyperparameters
            
            Keyword Arguments:
                timeout {float} -- timeout, considered (default: {None})
                iterations {int} -- iterations, considered (default: {None})
            
            Returns:
                CallResult[None]
        """

        # special case for no validation
        if iterations is not None:
            self._create_data_object_and_learner(0)

        if iterations is None:
            iterations_set = False
            iterations = self.hyperparams["epochs"]
            validation = self.hyperparams["val_split"] > 0
        else:
            iterations_set = True
            validation = False

        # time training for 1 epoch so we can consider timeout argument thoughtfully
        if timeout:
            logger.info("""Timing the fitting procedure for one epoch so we
                can consider timeout thoughtfully""")
            start_time = time.time()
            _, iterations_completed = self._learner.fit(
                validation=validation,
                steps_per_epoch=self.hyperparams["steps_per_epoch"],
                epochs=1,
                stopping_patience=self.hyperparams["early_stopping_patience"],
                stopping_delta=self.hyperparams["early_stopping_delta"],
                tensorboard=False,
            )
            epoch_time_estimate = time.time() - start_time
            # subract 1 for epoch that already happened and 1 more to be safe
            timeout_epochs = timeout // epoch_time_estimate - 2
            iters = min(timeout_epochs, iterations)
        else:
            iters = iterations

        # normal fitting
        logger.info(f"Fitting for {iters} iterations")
        start_time = time.time()

        _, iterations_completed = self._learner.fit(
            validation=validation,
            steps_per_epoch=self.hyperparams["steps_per_epoch"],
            epochs=iters,
            stopping_patience=self.hyperparams["early_stopping_patience"],
            stopping_delta=self.hyperparams["early_stopping_delta"],
            tensorboard=False,
        )
        logger.info(
            f"Fit for {iterations_completed} epochs, took {time.time() - start_time}s"
        )

        # maintain primitive state (mark that training data has been used)
        self._new_train_data = False
        self._is_fit = True

        # use fitting history to set CallResult return values
        if iterations_set:
            has_finished = False
        elif iters < iterations:
            has_finished = False
        else:
            has_finished = self._is_fit

        return CallResult(None,
                          has_finished=has_finished,
                          iterations_done=iterations_completed)

    def _get_pred_intervals(self, df, keep_all=False):
        """ private util function that retrieves unevenly spaced prediction intervals from data frame 

            Arguments:
                df {pandas df} -- df of predictions from which to extract prediction intervals

            Keyword Arguments:
                keep_all {bool} -- if True, take every interval slice, otherwise only take
                    those given by the df

            Returns:
                pd Series -- series of intervals, indexed by group, granularity of 1 interval 

        """

        # no grouping column
        if self._grouping_column is None:
            interval = discretize_time_difference(
                df.iloc[:, self._timestamp_column],
                self._min_train,
                self.freq,
                self._integer_timestamps,
            )
            if keep_all:
                interval = np.arange(min(interval), max(interval) + 1)
            return pd.Series([interval])

        # grouping column
        else:
            g_col, t_col = (
                df.columns[self._grouping_column],
                df.columns[self._timestamp_column],
            )
            all_intervals, groups = [], []
            for (group, vals) in df.groupby(g_col)[t_col]:
                interval = discretize_time_difference(vals, self._min_train,
                                                      self.freq,
                                                      self._integer_timestamps)
                if keep_all:
                    interval = np.arange(min(interval), max(interval) + 1)
                all_intervals.append(interval)
                groups.append(group)
            return pd.Series(all_intervals, index=groups)

    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        """ Produce primitive's predictions for specific time series at specific future time instances
            * these specific timesteps / series are specified implicitly by input dataset

            Arguments:
                inputs {Inputs} -- full D3M dataframe, containing attributes, key, and target
            
            Keyword Arguments:
                timeout {float} -- timeout, not considered (default: {None})
                iterations {int} -- iterations, not considered (default: {None})

            Raises:
                PrimitiveNotFittedError: if primitive not fit
            
            Returns:
                CallResult[Outputs] -- (N, 2) dataframe with d3m_index and value for each prediction slice requested.
                    prediction slice = specific horizon idx for specific series in specific regression 
        """

        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        if len(self._drop_cols_no_tgt
               ) > 0 and inputs.shape[1] != self._cols_after_drop:
            test_frame = inputs.remove_columns(self._drop_cols_no_tgt)
        else:
            test_frame = inputs.copy()

        # Create TimeSeriesTest object
        if self._train_data.equals(inputs):
            ts_test_object = TimeSeriesTest(self._ts_object)
        # test
        else:
            ts_test_object = TimeSeriesTest(self._ts_object, test_frame)

        # get prediction slices
        pred_intervals = self._get_pred_intervals(test_frame)

        # make predictions with learner
        learner = DeepARLearner(
            self._ts_object,
            emb_dim=self.hyperparams["emb_dim"],
            lstm_dim=self.hyperparams["lstm_dim"],
            dropout=self.hyperparams["dropout_rate"],
            lr=self.hyperparams["learning_rate"],
            batch_size=self.hyperparams["batch_size"],
            train_window=self.hyperparams["window_size"],
            verbose=0,
        )
        learner.load_weights(self.hyperparams['weights_filepath'])
        start_time = time.time()
        logger.info(f"Making predictions...")
        preds = learner.predict(ts_test_object, include_all_training=True)
        logger.info(
            f"Prediction took {time.time() - start_time}s. Predictions array shape: {preds.shape}"
        )

        # slice predictions with learned intervals
        all_preds = []
        for p, idxs in zip(preds, pred_intervals.values):
            # all_preds.extend(p[: len(idxs)])  # this takes first n predictions
            all_preds.extend([p[i] for i in idxs
                              ])  # this takes predictions at actual indices
        flat_list = np.array([p for pred_list in all_preds for p in pred_list])

        # if np.isinf(all_preds).any():
        #     logger.debug(f'There are {np.isinf(all_preds).sum()} inf preds')
        # if np.isnan(all_preds).any():
        #     logger.debug(f'There are {np.isnan(all_preds).sum()} nan preds')
        # logger.debug(f'Max: {preds.max()}, Min: {preds.min()}')

        # fill nans with 0s in case model predicted some (shouldnt need to - preventing edge case)
        flat_list = np.nan_to_num(flat_list)

        # create output frame
        result_df = container.DataFrame(
            {self._ts_frame.columns[self._target_column]: flat_list},
            generate_metadata=True,
        )
        result_df.metadata = result_df.metadata.add_semantic_type(
            (metadata_base.ALL_ELEMENTS, 0),
            ("https://metadata.datadrivendiscovery.org/types/PredictedTarget"),
        )

        return CallResult(result_df, has_finished=self._is_fit)

    def produce_confidence_intervals(self,
                                     *,
                                     inputs: Inputs,
                                     timeout: float = None,
                                     iterations: int = None
                                     ) -> CallResult[Outputs]:
        """ produce confidence intervals for each series 'confidence_interval_horizon' periods into
                the future
        
        Arguments:
            inputs {Inputs} -- full D3M dataframe, containing attributes, key, and target
        
        Keyword Arguments:
            timeout {float} -- timeout, not considered (default: {None})
            iterations {int} -- iterations, considered (default: {None})
        
        Raises:
            PrimitiveNotFittedError: 
        
        Returns:
            CallResult[Outputs] -- 

            Ex. 
                series | timestep | mean | 0.05 | 0.95
                --------------------------------------
                a      |    0     |  5   |   3  |   7
                a      |    1     |  6   |   4  |   8
                b      |    0     |  5   |   3  |   7
                b      |    1     |  6   |   4  |   8
        """

        if not self._is_fit:
            raise PrimitiveNotFittedError("Primitive not fitted.")

        alpha = self.hyperparams["confidence_interval_alpha"]

        if len(self._drop_cols_no_tgt
               ) > 0 and inputs.shape[1] != self._cols_after_drop:
            test_frame = inputs.remove_columns(self._drop_cols_no_tgt)
        else:
            test_frame = inputs.copy()

        # Create TimeSeriesTest object
        if self._train_data.equals(inputs):
            ts_test_object = TimeSeriesTest(self._ts_object)
            include_all_training = True
            horizon = 0
        # test
        else:
            ts_test_object = TimeSeriesTest(self._ts_object, test_frame)
            include_all_training = self.hyperparams[
                'seed_predictions_with_all_data']
            horizon = self.hyperparams["confidence_interval_horizon"]

        # make predictions with learner
        start_time = time.time()
        logger.info(f"Making predictions...")
        preds = self._learner.predict(
            ts_test_object,
            horizon=horizon,
            samples=self.hyperparams["confidence_interval_samples"],
            include_all_training=include_all_training,
            point_estimate=False)
        logger.info(
            f"Prediction took {time.time() - start_time}s. Predictions array shape: {preds.shape}"
        )

        # convert samples to percentiles
        means = np.percentile(preds, 50, axis=2).reshape(-1, 1)
        lowers = np.percentile(preds, alpha / 2 * 100, axis=2).reshape(-1, 1)
        uppers = np.percentile(preds, (1 - alpha / 2) * 100,
                               axis=2).reshape(-1, 1)

        assert (lowers < means).all()
        assert (means < uppers).all()

        # convert to df
        if self._grouping_column is None:
            indices = np.repeat(self._output_columns[0], preds.shape[1])
        else:
            indices = np.repeat(
                test_frame[test_frame.columns[self._grouping_column]].unique(),
                preds.shape[1])
        interval_df = pd.DataFrame(
            np.concatenate((means, lowers, uppers), axis=1),
            columns=["mean", str(alpha / 2),
                     str(1 - alpha / 2)],
            index=indices,
        )

        # add index column
        interval_df["horizon_index"] = np.tile(np.arange(preds.shape[1]),
                                               len(interval_df.index.unique()))

        logger.debug(interval_df.head())

        # structure return df
        return CallResult(
            container.DataFrame(interval_df, generate_metadata=True),
            has_finished=self._is_fit,
        )

Пример #5

Показать файл

Файл: train.py Проект: jlgleason/deepar

def hp_search(working_dir,
              dataset='56_sunspots',
              epochs=100,
              metric='eval_mae_result',
              stopping_patience=5,
              stopping_delta=1):

    working_dir = os.path.join("./checkpoints", working_dir)

    # define domains for HP search
    HP_EMB_DIM = hp.HParam('emb_dim', hp.Discrete([32, 64, 128]))
    HP_LSTM_DIM = hp.HParam('lstm_dim', hp.Discrete([32, 64, 128]))
    HP_DROPOUT = hp.HParam('lstm_dropout', hp.Discrete([0.1, 0.2, 0.3]))
    HP_LR = hp.HParam('learning_rate', hp.Discrete([.0001, .001, .01]))
    HP_BS = hp.HParam('batch_size', hp.Discrete([32, 64, 128]))
    HP_WINDOW = hp.HParam('window_size', hp.Discrete([20, 40, 60]))

    # set up config
    with tf.summary.create_file_writer(working_dir).as_default():
        hp.hparams_config(hparams=[
            HP_EMB_DIM, HP_LSTM_DIM, HP_DROPOUT, HP_LR, HP_BS, HP_WINDOW
        ],
                          metrics=[hp.Metric(metric, display_name=metric)])

    # read in training df
    df = pd.read_csv(
        f'../../datasets/seed_datasets_current/{dataset}/TRAIN/dataset_TRAIN/tables/learningData.csv'
    )
    df = _multi_index_prep(df, dataset)
    df = _time_col_to_seconds(df, dataset)

    # create TimeSeries and Learner objects
    ds = _create_ts_object(df, dataset)

    # grid search over parameters
    run_num = 0
    total_run_count = len(HP_EMB_DIM.domain.values) * \
        len(HP_LSTM_DIM.domain.values) * \
        len(HP_DROPOUT.domain.values) * \
        len(HP_LR.domain.values) * \
        len(HP_BS.domain.values) * \
        len(HP_WINDOW.domain.values)

    # outfile for saving hp config and runtimes
    outfile = open(os.path.join(working_dir, "metrics.txt"), "w+", buffering=1)

    for emb_dim in HP_EMB_DIM.domain.values:
        for lstm_dim in HP_LSTM_DIM.domain.values:
            for dropout in HP_DROPOUT.domain.values:
                for lr in HP_LR.domain.values:
                    for bs in HP_BS.domain.values:
                        for window_size in HP_WINDOW.domain.values:

                            # create dict of parameters
                            hp_dict = {
                                'emb_dim': emb_dim,
                                'lstm_dim': lstm_dim,
                                'lstm_dropout': dropout,
                                'learning_rate': lr,
                                'batch_size': bs,
                                'window_size': window_size,
                            }
                            run_name = f'run-{run_num}'
                            logger.info(
                                f'--- Starting Run: {run_name} of {total_run_count} ---'
                            )
                            # print_dict = {
                            #     h.name: hp_dict[h] for h in hp_dict
                            # }
                            logger.info(f'HP Dict: {hp_dict}')
                            hp.hparams(hp_dict)

                            # create learner and fit with these HPs
                            start_time = time.time()
                            learner = DeepARLearner(ds,
                                                    verbose=1,
                                                    hparams=hp_dict)
                            final_metric = learner.fit(
                                epochs=epochs,
                                stopping_patience=stopping_patience,
                                stopping_delta=stopping_delta,
                                checkpoint_dir=os.path.join(
                                    working_dir, run_name))
                            outfile.write(
                                f'HPs: {hp_dict} ---- Metric: {final_metric} ---- Time: {round(time.time() - start_time,2)}\n'
                            )
                            tf.summary.scalar(metric, final_metric, step=1)

                            run_num += 1
    outfile.close()