def test_df_utils_func():
    log.info("testing: df_utils Test")
    df = pd.read_csv(PEYTON_FILE, nrows=95)
    df = df_utils.check_dataframe(df, check_y=False)

    # test find_time_threshold
    df_dict, _ = df_utils.prep_copy_df_dict(df)
    time_threshold = df_utils.find_time_threshold(df_dict,
                                                  n_lags=2,
                                                  valid_p=0.2,
                                                  inputs_overbleed=True)
    df_train, df_val = df_utils.split_considering_timestamp(
        df_dict,
        n_lags=2,
        n_forecasts=2,
        inputs_overbleed=True,
        threshold_time_stamp=time_threshold)

    # init data params with a list
    global_data_params = df_utils.init_data_params(df_dict, normalize="soft")
    global_data_params = df_utils.init_data_params(df_dict, normalize="soft1")
    global_data_params = df_utils.init_data_params(df_dict,
                                                   normalize="standardize")

    log.debug("Time Threshold: \n {}".format(time_threshold))
    log.debug("Df_train: \n {}".format(type(df_train)))
    log.debug("Df_val: \n {}".format(type(df_val)))
示例#2
0
    def test_time_dataset(self):
        # manually load any file that stores a time series, for example:
        df_in = pd.read_csv(AIR_FILE, index_col=False)
        log.debug("Infile shape: {}".format(df_in.shape))

        n_lags = 3
        n_forecasts = 1
        valid_p = 0.2
        df_train, df_val = df_utils.split_df(df_in,
                                             n_lags,
                                             n_forecasts,
                                             valid_p,
                                             inputs_overbleed=True)

        # create a tabularized dataset from time series
        df = df_utils.check_dataframe(df_train)
        data_params = df_utils.init_data_params(df, normalize="minmax")
        df = df_utils.normalize(df, data_params)
        inputs, targets = time_dataset.tabularize_univariate_datetime(
            df,
            n_lags=n_lags,
            n_forecasts=n_forecasts,
        )
        log.debug("tabularized inputs: {}".format("; ".join([
            "{}: {}".format(inp, values.shape)
            for inp, values in inputs.items()
        ])))
示例#3
0
 def test_normalize(self):
     for add in [0, -1, 0.00000001, -0.99999999]:
         length = 1000
         days = pd.date_range(start="2017-01-01", periods=length)
         y = np.zeros(length)
         y[1] = 1
         y = y + add
         df = pd.DataFrame({"ds": days, "y": y})
         m = NeuralProphet(normalize="soft", )
         data_params = df_utils.init_data_params(
             df,
             normalize=m.normalize,
             covariates_config=m.config_covar,
             regressor_config=m.regressors_config,
             events_config=m.events_config,
         )
         df_norm = df_utils.normalize(df, data_params)
示例#4
0
 def init_data_params(self,
                      df_dict,
                      covariates_config=None,
                      regressor_config=None,
                      events_config=None):
     if len(df_dict) == 1:
         if not self.global_normalization:
             log.info(
                 "Setting normalization to global as only one dataframe provided for training."
             )
             self.global_normalization = True
     self.local_data_params, self.global_data_params = df_utils.init_data_params(
         df_dict=df_dict,
         normalize=self.normalize,
         covariates_config=covariates_config,
         regressor_config=regressor_config,
         events_config=events_config,
         global_normalization=self.global_normalization,
         global_time_normalization=self.global_normalization,
     )
示例#5
0
def test_normalize():
    length = 100
    days = pd.date_range(start="2017-01-01", periods=length)
    y = np.ones(length)
    y[1] = 0
    y[2] = 2
    y[3] = 3.3
    df = pd.DataFrame({"ds": days, "y": y})
    m = NeuralProphet(normalize="soft", )
    # with config
    m.config_normalization.init_data_params(
        df_utils.prep_copy_df_dict(df)[0], m.config_covar, m.regressors_config,
        m.events_config)
    df_norm = m._normalize(df_utils.prep_copy_df_dict(df)[0])
    m.config_normalization.unknown_data_normalization = True
    df_norm = m._normalize(df_utils.prep_copy_df_dict(df)[0])
    m.config_normalization.unknown_data_normalization = False
    # using config for utils
    df_norm = df_utils.normalize(df.copy(deep=True),
                                 m.config_normalization.global_data_params)
    df_norm = df_utils.normalize(
        df_utils.prep_copy_df_dict(df)[0]["__df__"],
        m.config_normalization.local_data_params["__df__"])

    # with utils
    local_data_params, global_data_params = df_utils.init_data_params(
        df_dict=df_utils.prep_copy_df_dict(df)[0],
        normalize=m.config_normalization.normalize,
        covariates_config=m.config_covar,
        regressor_config=m.regressors_config,
        events_config=m.events_config,
        global_normalization=m.config_normalization.global_normalization,
        global_time_normalization=m.config_normalization.
        global_time_normalization,
    )
    df_norm = df_utils.normalize(df.copy(deep=True), global_data_params)
    df_norm = df_utils.normalize(
        df_utils.prep_copy_df_dict(df)[0]["__df__"],
        local_data_params["__df__"])