Exemplo n.º 1
0
    def gen_dt_feature(self):
        '''
        | Generate datetime feature for each row. Currently we generate following features:
        | "MINUTE": The minute of the time stamp.
        | "DAY": The day of the time stamp.
        | "DAYOFYEAR": The ordinal day of the year of the time stamp.
        | "HOUR": The hour of the time stamp.
        | "WEEKDAY": The day of the week of the time stamp, Monday=0, Sunday=6.
        | "WEEKOFYEAR": The ordinal week of the year of the time stamp.
        | "MONTH": The month of the time stamp.
        | "IS_AWAKE": Bool value indicating whether it belongs to awake hours for the time stamp,
        | True for hours between 6A.M. and 1A.M.
        | "IS_BUSY_HOURS": Bool value indicating whether it belongs to busy hours for the time
        | stamp, True for hours between 7A.M. and 10A.M. and hours between 4P.M. and 8P.M.
        | "IS_WEEKEND": Bool value indicating whether it belongs to weekends for the time stamp,
        | True for Saturdays and Sundays.

        :return: the tsdataset instance.
        '''
        df_list = [
            generate_dt_features(
                input_df=self.df[self.df[self.id_col] == id_name],
                dt_col=self.dt_col) for id_name in self._id_list
        ]
        self.df = pd.concat(df_list)
        from zoo.chronos.data.utils.feature import TIME_FEATURE, \
            ADDITIONAL_TIME_FEATURE_HOUR, ADDITIONAL_TIME_FEATURE_WEEKDAY
        increased_attrbutes = list(TIME_FEATURE) +\
            list(ADDITIONAL_TIME_FEATURE_HOUR) +\
            list(ADDITIONAL_TIME_FEATURE_WEEKDAY)
        self.feature_col += [
            attr + "({})".format(self.dt_col) for attr in increased_attrbutes
        ]
        return self
Exemplo n.º 2
0
    def gen_dt_feature(self):
        '''
        Generate datetime feature for each row. Currently we generate following features:
        "MINUTE", "DAY", "DAYOFYEAR", "HOUR", "WEEKDAY", "WEEKOFYEAR", "MONTH", "IS_AWAKE",
        "IS_BUSY_HOURS", "IS_WEEKEND"

        :return: the tsdataset instance.

        Note: it should be called before scale if needed.
        '''
        df_list = [
            generate_dt_features(
                input_df=self.df[self.df[self.id_col] == id_name],
                dt_col=self.dt_col) for id_name in self._id_list
        ]
        self.df = pd.concat(df_list)
        from zoo.chronos.data.utils.feature import TIME_FEATURE, \
            ADDITIONAL_TIME_FEATURE_HOUR, ADDITIONAL_TIME_FEATURE_WEEKDAY
        increased_attrbutes = list(TIME_FEATURE) +\
            list(ADDITIONAL_TIME_FEATURE_HOUR) +\
            list(ADDITIONAL_TIME_FEATURE_WEEKDAY)
        self.feature_col += [
            attr + "({})".format(self.dt_col) for attr in increased_attrbutes
        ]
        return self
Exemplo n.º 3
0
 def test_generate_dt_features(self):
     dates = pd.date_range('1/1/2019', periods=8)
     data = np.random.randn(8, 3)
     df = pd.DataFrame({
         "datetime": dates,
         "values": data[:, 0],
         "A": data[:, 1],
         "B": data[:, 2]
     })
     df = generate_dt_features(df, dt_col="datetime")
     assert set(df.columns) == {
         'IS_AWAKE(datetime)', 'IS_BUSY_HOURS(datetime)', 'HOUR(datetime)',
         'DAY(datetime)', 'IS_WEEKEND(datetime)', 'WEEKDAY(datetime)',
         'MONTH(datetime)', 'DAYOFYEAR(datetime)', 'WEEKOFYEAR(datetime)',
         'MINUTE(datetime)', 'A', 'B', 'values', 'datetime'
     }
Exemplo n.º 4
0
    def gen_dt_feature(self, features="auto", one_hot_features=None):
        '''
        Generate datetime feature(s) for each record.

        :param features: str or list, states which feature(s) will be generated. If the value
               is set to be a str, it should be one of "auto" or "all". For "auto", a subset
               of datetime features will be generated under the consideration of the sampling
               frequency of your data. For "all", the whole set of datetime features will be
               generated. If the value is set to be a list, the list should contain the features
               you want to generate. A table of all datatime features and their description is
               listed below. The value defaults to "auto".
        :param one_hot_features: list, states which feature(s) will be generated as one-hot-encoded
               feature. The value defaults to None, which means no features will be generated with\
               one-hot-encoded.

        | "MINUTE": The minute of the time stamp.
        | "DAY": The day of the time stamp.
        | "DAYOFYEAR": The ordinal day of the year of the time stamp.
        | "HOUR": The hour of the time stamp.
        | "WEEKDAY": The day of the week of the time stamp, Monday=0, Sunday=6.
        | "WEEKOFYEAR": The ordinal week of the year of the time stamp.
        | "MONTH": The month of the time stamp.
        | "IS_AWAKE": Bool value indicating whether it belongs to awake hours for the time stamp,
        | True for hours between 6A.M. and 1A.M.
        | "IS_BUSY_HOURS": Bool value indicating whether it belongs to busy hours for the time
        | stamp, True for hours between 7A.M. and 10A.M. and hours between 4P.M. and 8P.M.
        | "IS_WEEKEND": Bool value indicating whether it belongs to weekends for the time stamp,
        | True for Saturdays and Sundays.

        :return: the tsdataset instance.
        '''
        features_generated = []
        df_list = [
            generate_dt_features(
                input_df=self.df[self.df[self.id_col] == id_name],
                dt_col=self.dt_col,
                features=features,
                one_hot_features=one_hot_features,
                freq=self._freq,
                features_generated=features_generated)
            for id_name in self._id_list
        ]
        self.df = pd.concat(df_list)
        self.feature_col += features_generated
        return self
Exemplo n.º 5
0
 def test_generate_dt_features(self):
     dates = pd.date_range('1/1/2019', periods=8)
     data = np.random.randn(8, 3)
     df = pd.DataFrame({
         "datetime": dates,
         "values": data[:, 0],
         "A": data[:, 1],
         "B": data[:, 2]
     })
     df = generate_dt_features(df,
                               dt_col="datetime",
                               features="auto",
                               one_hot_features=None,
                               freq=pd.Timedelta("1D"),
                               features_generated=[])
     assert set(df.columns) == {
         'DAY', 'IS_WEEKEND', 'WEEKDAY', 'MONTH', 'DAYOFYEAR', 'WEEKOFYEAR',
         'A', 'B', 'YEAR', 'values', 'datetime'
     }