示例#1
0
    def test_historical_regular_holidays_fall_into_precomputed_holidays(self):
        from pandas import DatetimeIndex

        precomputed_holidays = DatetimeIndex(self.calendar.adhoc_holidays)

        # precomputed holidays won't include weekends (saturday, sunday)
        self.assertTrue(all(d.weekday() < 5 for d in precomputed_holidays))

        generated_holidays = self.calendar.regular_holidays.holidays(
            precomputed_holidays.min(),
            precomputed_holidays.max(),
            return_name=True)

        # generated holidays include weekends
        self.assertFalse(all(d.weekday() < 5
                             for d in generated_holidays.index))

        # filter non weekend generated holidays
        non_weekend_mask = DatetimeIndex(
            [d for d in generated_holidays.index if d.weekday() < 5])
        non_weekend_generated_holidays = generated_holidays[non_weekend_mask]

        # generated holidays should generally fall into one of the precomputed holidays
        # except the future holidays that are not precomputed yet
        isin = non_weekend_generated_holidays.index.isin(precomputed_holidays)
        missing = non_weekend_generated_holidays[~isin]

        self.assertTrue(all(isin), "missing holidays = \n%s" % missing)
示例#2
0
def read_eng_file(private_file: str,
                  date_index: bool = True,
                  compute_date: bool = True,
                  allowed_flags: Sequence[int] = (0, ),
                  dates: pd.DatetimeIndex = None) -> pd.DataFrame:
    """Read a .eof.csv (engineering output file, comma-separated value format) file

    Parameters
    ----------
    private_file: 
        the path to the private netCDF file or the .eof.csv file

    date_index:
        if `True` then the returned dataframe is indexed by date. 

    compute_date:
        if `True` and `date_index` is `False`, then a 'date' column is added to the dataframe containing the observation
        datetimes. Has no effect if reading a netCDF file.

    allowed_flags:
        which quality flags are kept in the dataframe. If `None` or the string `'all'` any flag is valid. 

    dates:
        a date array indicating the date range of data to retain. Data between the min and max of this array will be
        kept. If this is `None`, no date limiting is done. If this is given, `compute_date` is considered `True` regardless
        of its actual value.

    Returns
    -------
    pd.DataFrame:
        dataframe with all the information from the .eof.csv file
    """
    if private_file.endswith('.nc') or private_file.endswith('.nc4'):
        df = _read_private_nc(private_file, date_index=date_index)
    else:
        df = _read_eof_csv(private_file,
                           date_index=date_index,
                           compute_date=compute_date)

    if allowed_flags is None or allowed_flags == 'all':
        xx = df['flag'] > -99
    else:
        xx = df['flag'].isin(allowed_flags)

    if dates is not None:
        if date_index:
            df_dates = this_df.index
        else:
            df_dates = this_df['date']

        xx &= (this_df['date'] >= dates.min()) & (this_df['date'] <=
                                                  dates.max())

    return df[xx]
示例#3
0
    def to_tree(cls, node: pd.DatetimeIndex, ctx):
        """Serialize DatetimeIndex to tree."""
        tree = {}
        if node.inferred_freq is not None:
            tree["freq"] = node.inferred_freq
        else:
            tree["values"] = node.values.astype(np.int64)

        tree["start"] = node[0]
        tree["end"] = node[-1]
        tree["min"] = node.min()
        tree["max"] = node.max()
        return tree
示例#4
0
    def to_yaml_tree(self, obj: pd.DatetimeIndex, tag: str, ctx) -> dict:
        """Convert to python dict."""
        tree = {}
        if obj.inferred_freq is not None:
            tree["freq"] = obj.inferred_freq
        else:
            tree["values"] = obj.values.astype(np.int64)

        tree["start"] = obj[0]
        tree["end"] = obj[-1]
        tree["min"] = obj.min()
        tree["max"] = obj.max()
        return tree
示例#5
0
def create_cal(trade_dates: pd.DatetimeIndex) -> pd.DataFrame:
    """构造交易日历

    Args:
        trade_dates (pd.DatatimeIndex, optional): 交易日. Defaults to None.

    Returns:
        pd.DataFrame: 交易日历表
    """

    min_date = trade_dates.min()
    max_date = trade_dates.max()

    dates = pd.date_range(min_date, max_date)
    df = pd.DataFrame(index=dates)

    df['is_tradeday'] = False
    df.loc[trade_dates, 'is_tradeday'] = True

    return df
示例#6
0
    def _get_features(
        self,
        train_index: pd.DatetimeIndex,
        prediction_length: int,
        custom_features: np.ndarray = None,
    ) -> Tuple[np.ndarray, np.ndarray]:
        """
        Internal method for computing default, (optional) seasonal features
        for the training and prediction ranges given time index for the
        training range and the prediction length.

        Appends `custom_features` if provided.

        Parameters
        ----------
        train_index
            Pandas DatetimeIndex
        prediction_length
            prediction length
        custom_features
            shape: (num_custom_features, train_length + pred_length)

        Returns
        -------
        a tuple of (training, prediction) feature tensors
            shape: (num_features, train_length/pred_length)
        """

        train_length = len(train_index)
        full_time_index = pd.date_range(
            train_index.min(),
            periods=train_length + prediction_length,
            freq=train_index.freq,
        )

        # Default feature map for both seasonal and non-seasonal models.
        if self._is_exp_kernel():
            # Default time index features: index of the time point
            # [0, train_length + pred_length - 1]
            features = np.expand_dims(np.array(range(len(full_time_index))),
                                      axis=0)

            # Rescale time index features into the range: [-0.5, 0.5]
            # similar to the seasonal features
            # (see gluonts.time_feature)
            features = features / (train_length + prediction_length - 1) - 0.5
        else:
            # For uniform seasonal model we do not add time index features
            features = np.empty((0, len(full_time_index)))

        # Add more features for seasonal variant
        if self.use_seasonal_model:
            if custom_features is not None:
                total_length = train_length + prediction_length

                assert len(custom_features.shape) == 2, (
                    "Custom features should be 2D-array where the rows "
                    "represent features and columns the time points.")

                assert custom_features.shape[1] == total_length, (
                    f"For a seasonal model, feat_dynamic_real must be defined "
                    f"for both training and prediction ranges. They are only "
                    f"provided for {custom_features.shape[1]} time steps "
                    f"instead of {train_length + prediction_length} steps.")

                features = np.vstack(
                    [features, self.feature_scale * custom_features])

            if self.use_default_time_features or custom_features is None:
                # construct seasonal features
                seasonal_features_gen = time_features_from_frequency_str(
                    full_time_index.freqstr)
                seasonal_features = [
                    self.feature_scale * gen(full_time_index) for gen in
                    seasonal_features_gen[:self.num_default_time_features]
                ]

                features = np.vstack([features, *seasonal_features])

        train_features = features[:, :train_length]
        pred_features = features[:, train_length:]

        return train_features, pred_features