Exemplo n.º 1
0
def inverse_transform(X, scaler, trend=None):
    X = X.astype(np.float32)
    X = scaler.inverse_transform(X)
    try:
        X += trend
    except TypeError as e:
        logger.warn(str(e))
    except Exception as e:
        logger.warn(
            'General error (not a TypeError) while adding back time series trend. \n {}'
            .format(str(e)))
    return X
Exemplo n.º 2
0
def inverse_transform(X, scaler, trend=None):
    """
    :param X: the data
    :param scaler: the scaler that have been used for transforming X
    :param trend: the trebd values that has been removed from X. None if no detrending has been used.
        It has to be the same dim. as X.
    :return:
        X with trend adds back and de-standardized
    """
    X = X.astype(np.float32)
    X = scaler.inverse_transform(X)
    try:
        X += trend
    except TypeError as e:
        logger.warn(str(e))
    except Exception as e:
        logger.warn(
            'General error (not a TypeError) while adding back time series trend. \n {}'
            .format(str(e)))
    return X
Exemplo n.º 3
0
def load_data(fill_nan=None,
              preprocessing=True,
              detrend=False,
              exogenous_vars=False,
              train_len=365 * 3 * SAMPLES_PER_DAY,
              test_len=365 * SAMPLES_PER_DAY,
              valid_len=0,
              split_type='simple',
              is_train=False,
              use_prebuilt=True):
    """
    Create a split of the data according to the given dimensions for each set.
    :param fill_nan: string that identifies how NaN values should be filled. Options are:
        -bfill: fill NaN value at index i with value at index i-1
        -ffill: fill NaN value at index i with value at index i+1
        -mean: fill NaN value at index i  with the mean value over all dataset at the same hour,minute
        -median: fill NaN value at index i  with the median value over all dataset at the same hour,minute
        -drop: drop all rows with missing values
    :param preprocessing: if True, standardize features using standard scaler
    :param detrend: if True, use train weekly statistics to detrend the time series.
        (WORKS ONLY FOR split_type=simple or split_type=default when is_train=False)
    :param exogenous_vars: if True, add exogenous features to the input data
        (temperatures + date/time feature + holiday feature)
    :param train_len: length of the train dataset
    :param test_len: length of the test set
    :param valid_len: length of the validation set
    :param split_type: 'simple', 'multi' or 'default'.
        - 'simple': See dts.utils.split.simple_split
        - 'multi':  See dts.utils.split.multiple_split
        - 'default': Uses 'simple' split for train-test, then divides training using the 'multi' approach.
    :param use_prebuilt: if True, load already splitted data files from disk
    :return: a dict having the following (key, value) pairs:
        - train = training dataset, np.array of shape()
        - test = test dataset, np.array of shape()
        - scaler = the scaler used to preprocess the data
        - trend  = None or the values that has to be added back after prediction if pdetrending has been used.
    """
    dataset = dict(
        train=None,
        test=None,
        scaler=None,
        trend=[None, None],
    )
    if valid_len == 0:
        valid_len = int(0.1 * train_len)

    if split_type == 'simple':
        train_test_split = lambda x: simple_split(
            x, train_len=None, valid_len=0, test_len=test_len)
        train_valid_split = lambda x: simple_split(
            train_test_split(x)[0],
            train_len=len(train_test_split(x)[0]) - valid_len,
            valid_len=0,
            test_len=valid_len)
    elif split_type == 'multi':
        train_test_split = lambda x: multiple_splits(
            x, train_len=train_len + valid_len, valid_len=0, test_len=test_len)
        train_valid_split = lambda x: [
            x[0][:, :train_len, :], None, x[0][:, train_len:, :]
        ]
    elif split_type == 'default':
        train_test_split = lambda x: simple_split(
            x, train_len=None, valid_len=0, test_len=int(0.1 * df.shape[0]))
        train_valid_split = lambda x: multiple_splits(
            train_test_split(x)[0],
            train_len=5 * 31 * SAMPLES_PER_DAY,
            valid_len=0,
            test_len=31 * SAMPLES_PER_DAY)
    else:
        raise ValueError('{} is not a valid split type.'.format(split_type))

    if not use_prebuilt:
        logger.info(
            'Fetching and preprocessing data. This will take a while...')
        try:
            df = load_dataset()
        except FileNotFoundError:
            logger.info(
                'The dataset seems to be unavailable on your disk at {}. \n'
                'Downloading...'.format(
                    os.path.join(config['data'], 'gefcom2014.csv')))
            download()
            df = load_dataset()

        if detrend:
            if split_type == 'default' and not is_train:
                df, trend_values = apply_detrend(
                    df, df.shape[0] - 365 * SAMPLES_PER_DAY)
                trend_values = train_test_split(
                    np.expand_dims(trend_values, -1))[::2]
            elif split_type == 'simple' and is_train:
                df, trend_values = apply_detrend(df, train_len)
                trend_values = train_valid_split(
                    np.expand_dims(trend_values, -1))[::2]
            elif split_type == 'simple':
                df, trend_values = apply_detrend(df, train_len + valid_len)
                trend_values = train_test_split(
                    np.expand_dims(trend_values, -1))[::2]
            else:
                raise ValueError(
                    'Detrend cannot be applied with this type of split.')
            dataset['trend'] = trend_values

        X = df[TARGET].values[:-1]  # load values
        X = np.expand_dims(X, axis=-1)
        if preprocessing:
            # init scaler using only information for training
            scaler, _ = transform(X[:train_len])
            # actual preprocess
            _, X = transform(X, scaler)
        else:
            scaler = None
        if exogenous_vars:
            # init scaler using only temperature information for training
            X_temp, X_ex = add_exogenous_variables(df, one_hot=True)
            scaler_temp, _ = transform(X_temp[:train_len],
                                       scaler_type='minmax')
            _, X_temp = transform(X_temp, scaler_temp)
            X = np.concatenate([X, X_temp, X_ex],
                               axis=1)  # Load @ t-1, Datetime @ t, Temp @ t

        if is_train:
            data = train_valid_split(X)
        else:
            data = train_test_split(X)

        dataset['scaler'] = scaler
        dataset['train'] = np.array(data[0], dtype=np.float32)
        dataset['test'] = np.array(data[2], dtype=np.float32)
        return dataset

    else:
        logger.info('Fetching preprocessed data from disk...')
        try:
            return load_prebuilt_data(split_type=split_type,
                                      exogenous_vars=exogenous_vars,
                                      detrend=detrend,
                                      is_train=is_train,
                                      dataset_name=NAME)
        except FileNotFoundError:
            logger.warn(
                'An already preprocessed version of the data do not exists on disk. '
                'The train/test data will be created now.')
            return load_data(fill_nan,
                             preprocessing,
                             detrend,
                             exogenous_vars,
                             train_len,
                             test_len,
                             valid_len,
                             split_type,
                             is_train,
                             use_prebuilt=False)
Exemplo n.º 4
0
def main(_run):
    ################################
    # Load Experiment's paramaters #
    ################################
    params = vars(args)
    logger.info(params)

    ################################
    #         Load Dataset         #
    ################################
    dataset_name = params['dataset']
    if dataset_name == 'gefcom':
        dataset = gefcom2014
    else:
        dataset = uci_single_households

    data = dataset.load_data(fill_nan='median',
                             preprocessing=True,
                             split_type='simple',
                             is_train=params['train'],
                             detrend=params['detrend'],
                             exogenous_vars=params['exogenous'],
                             use_prebuilt=True)
    scaler, train, test, trend = data['scaler'], data['train'], data[
        'test'], data['trend']
    if not params['detrend']:
        trend = None

    if params['recursive_forecast']:
        horizon = 1
    else:
        horizon = params['output_sequence_length']
    X_train, y_train = get_rnn_inputs(
        train,
        window_size=params['input_sequence_length'],
        horizon=horizon,
        shuffle=True,
        multivariate_output=params['exogenous'])

    ################################
    #     Build & Train Model      #
    ################################
    if params['ffnn_type'] == 'simple':
        ffnn = SimpleNet
    else:
        ffnn = ResNet
    ffnn = ffnn(layers=params['layers'],
                kernel_initializer='glorot_normal',
                kernel_regularizer=l2(params['l2_reg']),
                bias_regularizer=l2(params['l2_reg']),
                use_bias=False,
                recursive_forecast=params['recursive_forecast'])

    if params['exogenous']:
        exog_var_train = y_train[:, :, 1:]  # [n_samples, 1, n_features]
        y_train = y_train[:, :, 0]  # [n_samples, 1]
        conditions_shape = (exog_var_train.shape[1], exog_var_train.shape[-1])

        X_test, y_test = get_rnn_inputs(
            test,
            window_size=params['input_sequence_length'],
            horizon=params['output_sequence_length'],
            shuffle=False,
            multivariate_output=True)
        exog_var_test = y_test[:, :, 1:]  # [n_samples, 1, n_features]
        y_test = y_test[:, :, 0]  # [n_samples, 1]
    else:
        X_test, y_test = get_rnn_inputs(
            test,
            window_size=params['input_sequence_length'],
            horizon=params['output_sequence_length'],
            shuffle=False)
        exog_var_train = None
        exog_var_test = None
        conditions_shape = None

    # IMPORTANT: Remember to pass the trend values through the same ops as the inputs values
    if params['detrend']:
        X_trend_test, y_trend_test = get_rnn_inputs(
            trend[1],
            window_size=params['input_sequence_length'],
            horizon=params['output_sequence_length'],
            shuffle=False)
        trend = y_trend_test

    model = ffnn.build_model(input_shape=(X_train.shape[1], X_train.shape[-1]),
                             horizon=params['output_sequence_length'],
                             conditions_shape=conditions_shape)

    if params['load']:
        logger.info("Loading model's weights from disk using {}".format(
            params['load']))
        model.load_weights(params['load'])

    optimizer = Adam(params['learning_rate'])
    model.compile(optimizer=optimizer, loss=['mse'], metrics=metrics)
    callbacks = [EarlyStopping(patience=50, monitor='val_loss')]

    if params['exogenous']:
        history = model.fit([X_train, exog_var_train],
                            y_train,
                            validation_split=0.1,
                            batch_size=params['batch_size'],
                            epochs=params['epochs'],
                            callbacks=callbacks,
                            verbose=2)
    else:
        history = model.fit(X_train,
                            y_train,
                            validation_split=0.1,
                            batch_size=params['batch_size'],
                            epochs=params['epochs'],
                            callbacks=callbacks,
                            verbose=2)

    ################################
    #          Save weights        #
    ################################
    model_filepath = os.path.join(
        config['weights'], '{}_{}_{}'.format(params['ffnn_type'],
                                             params['dataset'], time.time()))
    model.save_weights(model_filepath)
    logger.info("Model's weights saved at {}".format(model_filepath))

    #################################
    # Evaluate on Validation & Test #
    #################################
    fn_inverse_val = lambda x: dataset.inverse_transform(
        x, scaler=scaler, trend=None)
    fn_inverse_test = lambda x: dataset.inverse_transform(
        x, scaler=scaler, trend=trend)
    fn_plot = lambda x: plot(x, dataset.SAMPLES_PER_DAY, save_at=None)

    if params['recursive_forecast']:
        val_scores = []
        txt = "When FFNN is trained in Recursive mode training and inference are different. Specifically, training is "\
              "a 1 step forecasting problem and inference is multi step forecasting problem. Thus, "\
              "validation results will not be provided as they are not comparable with test results"
        logger.warn(txt)
        _run.info['extra'] = txt
    else:
        # has to add this filter because of unexpected behaviour of history.validation_data when using resent.
        validation_data = list(
            filter(lambda x: isinstance(x, np.ndarray),
                   history.validation_data))
        val_scores = ffnn.evaluate(validation_data[:-1],
                                   fn_inverse=fn_inverse_val)

    if params['exogenous']:
        test_scores = ffnn.evaluate([[X_test, exog_var_test], y_test],
                                    fn_inverse=fn_inverse_test,
                                    fn_plot=fn_plot)
    else:
        test_scores = ffnn.evaluate([X_test, y_test],
                                    fn_inverse=fn_inverse_test,
                                    fn_plot=fn_plot)

    metrics_names = [
        m.__name__ if not isinstance(m, str) else m for m in model.metrics
    ]
    return dict(zip(metrics_names, val_scores)), \
           dict(zip(metrics_names, test_scores)), \
           model_filepath
Exemplo n.º 5
0
def main(_run):
    ################################
    # Load Experiment's paramaters #
    ################################
    params = vars(args)
    print(params)

    ################################
    #         Load Dataset         #
    ################################
    dataset_name = params['dataset']
    if dataset_name == 'gefcom':
        dataset = gefcom2014
    else:
        dataset = uci_single_households

    data = dataset.load_data(fill_nan='median',
                             preprocessing=True,
                             split_type='simple',
                             is_train=params['train'],
                             detrend=params['detrend'],
                             exogenous_vars=params['exogenous'],
                             use_prebuilt=True)
    scaler, train, test, trend = data['scaler'], data['train'], data['test'], data['trend']
    if not params['detrend']:
        trend = None

    if params['MIMO']:
        X_train, y_train = get_rnn_inputs(train,
                                          window_size=params['input_sequence_length'],
                                          horizon=params['output_sequence_length'],
                                          shuffle=True,
                                          multivariate_output=True)
    else:
        X_train, y_train = get_rnn_inputs(train,
                                          window_size=params['input_sequence_length'],
                                          horizon=1,
                                          shuffle=True,
                                          multivariate_output=True)

    if params['exogenous']:
        exog_var_train = y_train[:, :, 1:]
        y_train = y_train[:, :, 0]
        exogenous_shape = (exog_var_train.shape[1], exog_var_train.shape[-1])
        if params['MIMO']:
            X_train = [X_train, exog_var_train]

        X_test, y_test = get_rnn_inputs(test,
                                        window_size=params['input_sequence_length'],
                                        horizon=params['output_sequence_length'],
                                        shuffle=False,
                                        multivariate_output=True)
        exog_var_test = y_test[:, :, 1:]  # [n_samples, 1, n_features]
        y_test = y_test[:, :, 0]  # [n_samples, 1]
    else:
        y_train = y_train[:, :, 0]
        X_test, y_test = get_rnn_inputs(test,
                                        window_size=params['input_sequence_length'],
                                        horizon=params['output_sequence_length'],
                                        shuffle=False)
        exog_var_train = None
        exog_var_test = None
        exogenous_shape = None

    # IMPORTANT: Remember to pass the trend values through the same ops as the inputs values
    if params['detrend']:
        X_trend_test, y_trend_test = get_rnn_inputs(trend[1],
                                                    window_size=params['input_sequence_length'],
                                                    horizon=params['output_sequence_length'],
                                                    shuffle=False)
        trend = y_trend_test

    ################################
    #     Build & Train Model      #
    ################################
    cell_params = dict(units=params['units'],
                       activation='tanh',
                       dropout=params['dropout'],
                       kernel_regularizer=l2(params['l2']),
                       recurrent_regularizer=l2(params['l2']),
                       kernel_initializer='lecun_uniform',
                       recurrent_initializer='lecun_uniform')

    if params['MIMO']:
        rnn = RecurrentNN_MIMO(cell_type=params['cell'],
                               layers=params['layers'],
                               cell_params=cell_params)
        if params['exogenous']:
            model = rnn.build_model(input_shape=(params['input_sequence_length'], X_train[0].shape[-1]),
                                    horizon=params['output_sequence_length'],
                                    exogenous_shape=exogenous_shape)
        else:
            model = rnn.build_model(input_shape=(params['input_sequence_length'], X_train.shape[-1]),
                                    horizon=params['output_sequence_length'])
    else:
        rnn = RecurrentNN_Rec(cell_type=params['cell'],
                              layers=params['layers'],
                              cell_params=cell_params)
        model = rnn.build_model(input_shape=(params['input_sequence_length'], X_train.shape[-1]),
                                horizon=params['output_sequence_length'])

    model.compile(optimizer=Adam(params['learning_rate']), loss='mse', metrics=metrics)
    callbacks = [EarlyStopping(patience=100, monitor='val_loss'),
                 # LambdaCallback(on_epoch_end=lambda _, logs: f_log_metrics(logs=logs))
                 ]
    if params['load']:
        logger.info("Loading model's weights from disk using {}".format(params['load']))
        model.load_weights(params['load'])

    history = model.fit(X_train, y_train,
                        validation_split=0.1,
                        batch_size = params['batch_size'],
                        # steps_per_epoch= train.shape[0] // params['batch_size'],
                        epochs=params['epochs'],
                        callbacks=callbacks,
                        verbose=2)

    ################################
    #          Save weights        #
    ################################
    model_filepath = os.path.join(config['weights'],'{}_{}_{}'.format(
        params['cell'], params['dataset'], time.time()))
    model.save_weights(model_filepath)
    logger.info("Model's weights saved at {}".format(model_filepath))

    #################################
    # Evaluate on Validation & Test #
    #################################
    fn_inverse_val = lambda x: dataset.inverse_transform(x, scaler=scaler, trend=None)
    fn_inverse_test = lambda x: dataset.inverse_transform(x, scaler=scaler, trend=trend)
    fn_plot = lambda x: plot(x, dataset.SAMPLES_PER_DAY, save_at=None)

    if params['MIMO']:
        val_scores = rnn.evaluate(history.validation_data[:-1], fn_inverse=fn_inverse_val)
    else:
        val_scores = []
        txt = "When RNN is trained in Recursive mode training and inference are different. Specifically, training is "\
              "a 1 step forecasting problem and inference is multi step forecasting problem. Thus, "\
              "validation results will not be provided as they are not comparable with test results"
        logger.warn(txt)
        _run.info['extra'] = txt
    if params['exogenous']:
        test_scores = rnn.evaluate([[X_test, exog_var_test], y_test], fn_inverse=fn_inverse_test, fn_plot=fn_plot)
    else:
        test_scores = rnn.evaluate([X_test, y_test], fn_inverse=fn_inverse_test, fn_plot=fn_plot)

    metrics_names = [m.__name__ if not isinstance(m, str) else m for m in model.metrics]
    return dict(zip(metrics_names, val_scores)), \
           dict(zip(metrics_names, test_scores)), \
           model_filepath