Пример #1
0
def print_point_statistics(data,
                           models,
                           externalmodels=None,
                           externalforecasts=None,
                           indexers=None):
    ret = "Model		& Order     & RMSE		& SMAPE      & Theil's U		\\\\ \n"
    for count, model in enumerate(models, start=0):
        _rmse, _smape, _u = Measures.get_point_statistics(
            data, model, indexers)
        ret += model.shortname + "		& "
        ret += str(model.order) + "		& "
        ret += str(_rmse) + "		& "
        ret += str(_smape) + "		& "
        ret += str(_u)
        #ret += str(round(Measures.TheilsInequality(np.array(data[fts.order:]), np.array(forecasts[:-1])), 4))
        ret += "	\\\\ \n"
    if externalmodels is not None:
        l = len(externalmodels)
        for k in np.arange(0, l):
            ret += externalmodels[k] + "		& "
            ret += " 1		& "
            ret += str(round(Measures.rmse(data, externalforecasts[k][:-1]),
                             2)) + "		& "
            ret += str(
                round(Measures.smape(data, externalforecasts[k][:-1]),
                      2)) + "		& "
            ret += str(
                round(Measures.UStatistic(data, externalforecasts[k][:-1]), 2))
            ret += "	\\\\ \n"
    print(ret)
Пример #2
0
def forecast_params(data, train_split, method, params, plot=False):
    train, test = sampling.train_test_split(data, train_split)
    fcst = method(train, test, params)
    _output = params['output']
    _step = params.get('step', 1)
    _offset = params['order'] + _step - 1
    yobs = test[_output].iloc[_offset:].values

    if plot:
        plt.figure(figsize=(20, 10))
        plt.plot(yobs)
        plt.plot(fcst)
        plt.show()

    rmse = Measures.rmse(yobs, fcst)
    print("RMSE: ", rmse)

    nrmse = metrics.normalized_rmse(yobs, fcst)
    print("nRMSE: ", nrmse)

    smape = Measures.smape(yobs, fcst)
    print("SMAPE: ", smape)

    u = Measures.UStatistic(yobs, fcst)
    print("U Statistic: ", u)

    return rmse, nrmse, smape, u
Пример #3
0
def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None):
    """
    Run point benchmarks on given models and data and print the results

    :param data: test data
    :param models: a list of FTS models to benchmark
    :param externalmodels: a list with benchmark models (façades for other methods)
    :param externalforecasts:
    :param indexers:
    :return:
    """
    ret = "Model		& Order     & RMSE		& SMAPE      & Theil's U		\\\\ \n"
    for count,model in enumerate(models,start=0):
        _rmse, _smape, _u = Measures.get_point_statistics(data, model, indexers)
        ret += model.shortname + "		& "
        ret += str(model.order) + "		& "
        ret += str(_rmse) + "		& "
        ret += str(_smape)+ "		& "
        ret += str(_u)
        #ret += str(round(Measures.TheilsInequality(np.array(data[fts.order:]), np.array(forecasts[:-1])), 4))
        ret += "	\\\\ \n"
    if externalmodels is not None:
        l = len(externalmodels)
        for k in np.arange(0,l):
            ret += externalmodels[k] + "		& "
            ret += " 1		& "
            ret += str(round(Measures.rmse(data, externalforecasts[k][:-1]), 2)) + "		& "
            ret += str(round(Measures.smape(data, externalforecasts[k][:-1]), 2))+ "		& "
            ret += str(round(Measures.UStatistic(data, externalforecasts[k][:-1]), 2))
            ret += "	\\\\ \n"
    print(ret)
Пример #4
0
def compareModelsTable(original, models_fo, models_ho):
    fig = plt.figure(figsize=[12, 4])
    fig.suptitle("Comparação de modelos ")
    columns = ['Modelo', 'Ordem', 'Partições', 'RMSE', 'MAPE (%)']
    rows = []
    for model in models_fo:
        fts = model["model"]
        error_r = Measures.rmse(model["forecasted"], original)
        error_m = round(Measures.mape(model["forecasted"], original) * 100, 2)
        rows.append(
            [model["name"], fts.order,
             len(fts.sets), error_r, error_m])
    for model in models_ho:
        fts = model["model"]
        error_r = Measures.rmse(model["forecasted"][fts.order:],
                                original[fts.order:])
        error_m = round(
            Measures.mape(model["forecasted"][fts.order:],
                          original[fts.order:]) * 100, 2)
        rows.append(
            [model["name"], fts.order,
             len(fts.sets), error_r, error_m])
    ax1 = fig.add_axes([0, 0, 1, 1])  # left, bottom, width, height
    ax1.set_xticks([])
    ax1.set_yticks([])
    ax1.table(cellText=rows,
              colLabels=columns,
              cellLoc='center',
              bbox=[0, 0, 1, 1])
    sup = "\\begin{tabular}{"
    header = ""
    body = ""
    footer = ""

    for c in columns:
        sup = sup + "|c"
        if len(header) > 0:
            header = header + " & "
        header = header + "\\textbf{" + c + "} "
    sup = sup + "|} \\hline\n"
    header = header + "\\\\ \\hline \n"

    for r in rows:
        lin = ""
        for c in r:
            if len(lin) > 0:
                lin = lin + " & "
            lin = lin + str(c)

        body = body + lin + "\\\\ \\hline \n"

    return sup + header + body + "\\end{tabular}"
Пример #5
0
def cluster_method(individual, train, test):
    from pyFTS.common import Util, Membership
    from pyFTS.models import hofts
    from pyFTS.partitioners import Grid, Entropy
    from pyFTS.benchmarks import Measures

    if individual['mf'] == 1:
        mf = Membership.trimf
    elif individual['mf'] == 2:
        mf = Membership.trapmf
    elif individual['mf'] == 3 and individual['partitioner'] != 2:
        mf = Membership.gaussmf
    else:
        mf = Membership.trimf

    if individual['partitioner'] == 1:
        partitioner = Grid.GridPartitioner(data=train, npart=individual['npart'], func=mf)
    elif individual['partitioner'] == 2:
        npart = individual['npart'] if individual['npart'] > 10 else 10
        partitioner = Entropy.EntropyPartitioner(data=train, npart=npart, func=mf)


    model = hofts.WeightedHighOrderFTS(partitioner=partitioner,
                               lags=individual['lags'],
                               alpha_cut=individual['alpha'],
                               order=individual['order'])

    model.fit(train)

    rmse, mape, u = Measures.get_point_statistics(test, model)

    size = len(model)

    return individual, rmse, size, mape, u
Пример #6
0
def cluster_method(individual, dataset, **kwargs):
    from pyFTS.common import Util, Membership
    from pyFTS.models import hofts
    from pyFTS.partitioners import Grid, Entropy
    from pyFTS.benchmarks import Measures
    import numpy as np

    if individual['mf'] == 1:
        mf = Membership.trimf
    elif individual['mf'] == 2:
        mf = Membership.trapmf
    elif individual['mf'] == 3 and individual['partitioner'] != 2:
        mf = Membership.gaussmf
    else:
        mf = Membership.trimf

    window_size = kwargs.get('window_size', 800)
    train_rate = kwargs.get('train_rate', .8)
    increment_rate = kwargs.get('increment_rate', .2)
    parameters = kwargs.get('parameters', {})

    errors = []
    sizes = []

    for count, train, test in Util.sliding_window(dataset,
                                                  window_size,
                                                  train=train_rate,
                                                  inc=increment_rate):

        if individual['partitioner'] == 1:
            partitioner = Grid.GridPartitioner(data=train,
                                               npart=individual['npart'],
                                               func=mf)
        elif individual['partitioner'] == 2:
            npart = individual['npart'] if individual['npart'] > 10 else 10
            partitioner = Entropy.EntropyPartitioner(data=train,
                                                     npart=npart,
                                                     func=mf)

        model = hofts.WeightedHighOrderFTS(partitioner=partitioner,
                                           lags=individual['lags'],
                                           alpha_cut=individual['alpha'],
                                           order=individual['order'])
        model.fit(train)

        forecasts = model.predict(test)

        #rmse, mape, u = Measures.get_point_statistics(test, model)
        rmse = Measures.rmse(test[model.max_lag:], forecasts)

        size = len(model)

        errors.append(rmse)
        sizes.append(size)

    return {
        'parameters': individual,
        'rmse': np.nanmean(errors),
        'size': np.nanmean(size)
    }
Пример #7
0
def evaluation1(dataset, individual):
    from pyFTS.common import Util
    from pyFTS.benchmarks import Measures

    try:
        results = []
        lengths = []

        for count, train, test in Util.sliding_window(dataset,
                                                      800,
                                                      train=.8,
                                                      inc=.25):
            model = phenotype(individual, train)

            if model is None:
                return (None)

            rmse, _, _ = Measures.get_point_statistics(test, model)
            lengths.append(len(model))

            results.append(rmse)

            _lags = sum(model.lags) * 100

            rmse = np.nansum(
                [.6 * np.nanmean(results), .4 * np.nanstd(results)])
            len_lags = np.nansum([.4 * np.nanmean(lengths), .6 * _lags])

        return len_lags, rmse

    except Exception as ex:
        print("EXCEPTION!", str(ex), str(individual))
        return np.inf
Пример #8
0
def forecast_best_params(data,
                         train_split,
                         method_id,
                         method,
                         space,
                         plot=False,
                         save=False):
    print("Running experiment ", method_id)

    best = pickle.load(open("best_" + method_id + ".pkl", "rb"))
    train, test = sampling.train_test_split(data, train_split)
    best_params = space_eval(space, best)
    fcst = method(train, test, best_params)
    _order = best_params['order']
    _output = best_params['output']
    yobs = test[_output].iloc[_order:].values

    if plot:
        plt.figure(figsize=(20, 10))
        plt.plot(yobs)
        plt.plot(fcst)
        plt.show()

    rmse = Measures.rmse(yobs, fcst)
    print("RMSE: ", rmse)

    nrmse = metrics.normalized_rmse(yobs, fcst)
    print("nRMSE: ", nrmse)

    smape = Measures.smape(yobs, fcst)
    print("SMAPE: ", smape)

    u = Measures.UStatistic(yobs, fcst)
    print("U Statistic: ", u)

    if save:
        results = {
            "method_id": method_id,
            "forecast": fcst,
            "RMSE": rmse,
            "SMAPE": smape,
            "U": u
        }
        pickle.dump(results, open("results_" + method_id + ".pkl", "wb"))

    return rmse, nrmse, smape, u
Пример #9
0
def run_interval(mfts, partitioner, train_data, test_data, window_key=None, **kwargs):
    """
    Run the interval forecasting benchmarks

    :param mfts: FTS model
    :param partitioner: Universe of Discourse partitioner
    :param train_data: data used to train the model
    :param test_data: ata used to test the model
    :param window_key: id of the sliding window
    :param transformation: data transformation
    :param indexer: seasonal indexer
    :return: a dictionary with the benchmark results
    """
    import time
    from pyFTS.models import hofts,ifts,pwfts
    from pyFTS.partitioners import Grid, Entropy, FCM
    from pyFTS.benchmarks import Measures, arima, quantreg

    tmp = [hofts.HighOrderFTS, ifts.IntervalFTS,  pwfts.ProbabilisticWeightedFTS]

    tmp2 = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]

    tmp4 = [arima.ARIMA, quantreg.QuantileRegression]

    tmp3 = [Measures.get_interval_statistics]

    steps_ahead = kwargs.get('steps_ahead', 1)
    method = kwargs.get('method', None)

    if mfts.benchmark_only:
        _key = mfts.shortname + str(mfts.order if mfts.order is not None else "") + str(mfts.alpha)
    else:
        pttr = str(partitioner.__module__).split('.')[-1]
        _key = mfts.shortname + " n = " + str(mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
        mfts.partitioner = partitioner
        mfts.append_transformation(partitioner.transformation)

    _key += str(steps_ahead)
    _key += str(method) if method is not None else ""

    _start = time.time()
    mfts.fit(train_data, **kwargs)
    _end = time.time()
    times = _end - _start

    _start = time.time()
    #_sharp, _res, _cov, _q05, _q25, _q75, _q95, _w05, _w25
    metrics = Measures.get_interval_statistics(test_data, mfts, **kwargs)
    _end = time.time()
    times += _end - _start

    ret = {'key': _key, 'obj': mfts, 'sharpness': metrics[0], 'resolution': metrics[1], 'coverage': metrics[2],
           'time': times,'Q05': metrics[3], 'Q25': metrics[4], 'Q75': metrics[5], 'Q95': metrics[6],
           'winkler05': metrics[7], 'winkler25': metrics[8],
           'window': window_key,'steps': steps_ahead, 'method': method}

    return ret
Пример #10
0
def evaluate(dataset, individual, **kwargs):
    """
    Evaluate an individual using a sliding window cross validation over the dataset.

    :param dataset: Evaluation dataset
    :param individual: genotype to be tested
    :param window_size: The length of scrolling window for train/test on dataset
    :param train_rate: The train/test split ([0,1])
    :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
    :param parameters: dict with model specific arguments for fit method.
    :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
    """
    from pyFTS.models import hofts, ifts, pwfts
    from pyFTS.common import Util
    from pyFTS.benchmarks import Measures
    from pyFTS.hyperparam.Evolutionary import phenotype, __measures
    import numpy as np

    window_size = kwargs.get('window_size', 800)
    train_rate = kwargs.get('train_rate', .8)
    increment_rate = kwargs.get('increment_rate', .2)
    fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS)
    parameters = kwargs.get('parameters', {})

    if individual['f1'] is not None and individual['f2'] is not None:
        return {key: individual[key] for key in __measures}

    errors = []
    lengths = []

    for count, train, test in Util.sliding_window(dataset,
                                                  window_size,
                                                  train=train_rate,
                                                  inc=increment_rate):

        model = phenotype(individual,
                          train,
                          fts_method=fts_method,
                          parameters=parameters)

        forecasts = model.predict(test)

        rmse = Measures.rmse(test[model.max_lag:], forecasts[:-1])
        lengths.append(len(model))

        errors.append(rmse)

    _lags = sum(model.lags) * 100

    _rmse = np.nanmean(errors)
    _len = np.nanmean(lengths)

    f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)])
    f2 = np.nansum([.4 * _len, .6 * _lags])

    return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len}
Пример #11
0
def evaluate_individual_model(model, partitioner, train, test, window_size, time_displacement):
    import numpy as np
    from pyFTS.partitioners import Grid
    from pyFTS.benchmarks import Measures

    try:
        model.train(train, sets=partitioner.sets, order=model.order, parameters=window_size)
        forecasts = model.forecast(test, time_displacement=time_displacement, window_size=window_size)
        _rmse = Measures.rmse(test[model.order:], forecasts[:-1])
        _mape = Measures.mape(test[model.order:], forecasts[:-1])
        _u = Measures.UStatistic(test[model.order:], forecasts[:-1])
    except Exception as e:
        print(e)
        _rmse = np.nan
        _mape = np.nan
        _u = np.nan

    return {'model': model.shortname, 'partitions': partitioner.partitions, 'order': model.order,
            'rmse': _rmse, 'mape': _mape, 'u': _u}
Пример #12
0
def print_distribution_statistics(original, models, steps, resolution):
    ret = "Model	& Order     &  Interval & Distribution	\\\\ \n"
    for fts in models:
        _crps1, _crps2, _t1, _t2 = Measures.get_distribution_statistics(
            original, fts, steps, resolution)
        ret += fts.shortname + "		& "
        ret += str(fts.order) + "		& "
        ret += str(_crps1) + "		& "
        ret += str(_crps2) + "	\\\\ \n"
    print(ret)
Пример #13
0
def rolling_window_forecast_params(data, train_percent, window_size, method,
                                   params):

    # get training days
    training_days = pd.unique(data.index.date)
    fcst = []
    yobs = []

    for day in training_days:
        print("Processing :", day)
        daily_data = data[data.index.date == day]
        nsamples = len(daily_data.index)
        train_size = round(nsamples * train_percent)
        test_end = 0
        index = 0

        while test_end < nsamples:
            train_start, train_end, test_start, test_end = get_data_index(
                index, train_size, window_size, nsamples)
            train = data[train_start:train_end]
            test = data[test_start:test_end]
            index += window_size
            f = method(train, test, params)
            fcst.extend(f)
            _step = params.get('step', 1)
            _output = params['output']
            _offset = params['order'] + _step - 1
            yobs.extend(test[_output].iloc[_offset:].values)

    rmse = Measures.rmse(yobs, fcst)
    print("RMSE: ", rmse)

    nrmse = metrics.normalized_rmse(yobs, fcst)
    print("nRMSE: ", nrmse)

    smape = Measures.smape(yobs, fcst)
    print("SMAPE: ", smape)

    u = Measures.UStatistic(yobs, fcst)
    print("U Statistic: ", u)

    return rmse, nrmse, smape, u
Пример #14
0
def evaluate(dataset, individual, **kwargs):
    """
    Evaluate an individual using a sliding window cross validation over the dataset.

    :param dataset: Evaluation dataset
    :param individual: genotype to be tested
    :param window_size: The length of scrolling window for train/test on dataset
    :param train_rate: The train/test split ([0,1])
    :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
    :param parameters: dict with model specific arguments for fit method.
    :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
    """
    from pyFTS.common import Util
    from pyFTS.benchmarks import Measures
    from pyFTS.fcm.GA import phenotype
    import numpy as np

    window_size = kwargs.get('window_size', 800)
    train_rate = kwargs.get('train_rate', .8)
    increment_rate = kwargs.get('increment_rate', .2)
    #parameters = kwargs.get('parameters',{})

    errors = []

    for count, train, test in Util.sliding_window(dataset,
                                                  window_size,
                                                  train=train_rate,
                                                  inc=increment_rate):

        model = phenotype(individual, train)

        if model is None:
            raise Exception("Phenotype returned None")

        model.uod_clip = False

        forecasts = model.predict(test)

        rmse = Measures.rmse(
            test[model.max_lag:],
            forecasts[:-1])  #.get_point_statistics(test, model)

        errors.append(rmse)

    _rmse = np.nanmean(errors)
    _std = np.nanstd(errors)

    #print("EVALUATION {}".format(individual))
    return {'rmse': .6 * _rmse + .4 * _std}
Пример #15
0
def print_interval_statistics(original, models):
    ret = "Model	& Order     & Sharpness		& Resolution		& Coverage & .05  & .25 & .75 & .95	\\\\ \n"
    for fts in models:
        _sharp, _res, _cov, _q5, _q25, _q75, _q95 = Measures.get_interval_statistics(
            original, fts)
        ret += fts.shortname + "		& "
        ret += str(fts.order) + "		& "
        ret += str(_sharp) + "		& "
        ret += str(_res) + "		& "
        ret += str(_cov) + "        &"
        ret += str(_q5) + "        &"
        ret += str(_q25) + "        &"
        ret += str(_q75) + "        &"
        ret += str(_q95) + "\\\\ \n"
    print(ret)
Пример #16
0
def compare_residuals(data, models):
    """
    Compare residual's statistics of several models
    :param data: test data
    :param models: 
    :return: 
    """
    ret = "Model		& Order     & Mean      & STD       & Box-Pierce    & Box-Ljung & P-value \\\\ \n"
    for mfts in models:
        forecasts = mfts.forecast(data)
        res = residuals(data, forecasts, mfts.order)
        mu = np.mean(res)
        sig = np.std(res)
        ret += mfts.shortname + "		& "
        ret += str(mfts.order) + "		& "
        ret += str(round(mu, 2)) + "		& "
        ret += str(round(sig, 2)) + "		& "
        q1 = Measures.BoxPierceStatistic(res, 10)
        ret += str(round(q1, 2)) + "		& "
        q2 = Measures.BoxLjungStatistic(res, 10)
        ret += str(round(q2, 2)) + "		& "
        ret += str(chi_squared(q2, 10))
        ret += "	\\\\ \n"
    return ret
Пример #17
0
def print_distribution_statistics(original, models, steps, resolution):
    """
    Run probabilistic benchmarks on given models and data and print the results

    :param data: test data
    :param models: a list of FTS models to benchmark
    :return:
    """
    ret = "Model	& Order     &  Interval & Distribution	\\\\ \n"
    for fts in models:
        _crps1, _crps2, _t1, _t2 = Measures.get_distribution_statistics(original, fts, steps, resolution)
        ret += fts.shortname + "		& "
        ret += str(fts.order) + "		& "
        ret += str(_crps1) + "		& "
        ret += str(_crps2) + "	\\\\ \n"
    print(ret)
Пример #18
0
def print_interval_statistics(original, models):
    """
    Run interval benchmarks on given models and data and print the results

    :param data: test data
    :param models: a list of FTS models to benchmark
    :return:
    """
    ret = "Model	& Order     & Sharpness		& Resolution		& Coverage & .05  & .25 & .75 & .95	\\\\ \n"
    for fts in models:
        _sharp, _res, _cov, _q5, _q25, _q75, _q95  = Measures.get_interval_statistics(original, fts)
        ret += fts.shortname + "		& "
        ret += str(fts.order) + "		& "
        ret += str(_sharp) + "		& "
        ret += str(_res) + "		& "
        ret += str(_cov) + "        &"
        ret += str(_q5) + "        &"
        ret += str(_q25) + "        &"
        ret += str(_q75) + "        &"
        ret += str(_q95) + "\\\\ \n"
    print(ret)
Пример #19
0
def rolling_window_benchmark(data, train=0.8, **kwargs):
    resample = __pop('resample', None, kwargs)
    output = __pop('output', None, kwargs)

    if resample:
        data = sampling.resample_data(data, resample)

    train_data, test_data = sampling.train_test_split(data, train)

    methods = __pop('methods', None, kwargs)
    orders = __pop("orders", [1, 2, 3], kwargs)
    steps_ahead = __pop('steps_ahead', [1], kwargs)

    for method in methods:
        for order in orders:
            for step in steps_ahead:
                m = method()

                if isinstance(m, fts.FTS):
                    partitioners = __pop("partitioners",
                                         [Grid.GridPartitioner], kwargs)
                    partitions = __pop("partitions", [10], kwargs)
                    for partitioner in partitioners:
                        for partition in partitions:
                            data_train_fs = partitioner(data=train_data,
                                                        npart=partition)
                            m.partitioner = data_train_fs

                # medir tempo de treinamento
                m.fit(train_data, **kwargs)

                # medir tempo de forecast
                yhat = m.predict()
                #_start = time.time()

                # implementar metricas de avaliacao
                _rmse = Measures.rmse(test_data[output].iloc[order:],
                                      yhat[:-step])
                print("RMSE: ", _rmse)
Пример #20
0
from pyFTS.benchmarks import Measures
from pyFTS.partitioners import Grid, Entropy
from pyFTS.models import hofts
from pyFTS.common import Membership

x = [k for k in np.arange(-2 * np.pi, 2 * np.pi, 0.1)]
y = [np.sin(k) for k in x]

rows = []

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15, 5])

ax.plot(y, label='Original', color='black')

for npart in np.arange(5, 35, 5):
    part = Grid.GridPartitioner(data=y, npart=npart)
    model = hofts.HighOrderFTS(order=1, partitioner=part)
    model.fit(y)
    forecasts = model.predict(y)

    ax.plot(forecasts[:-1], label=str(npart) + " partitions")

    rmse, mape, u = Measures.get_point_statistics(y, model)

    rows.append([npart, rmse, mape, u])

handles, labels = ax.get_legend_handles_labels()
lgd = ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1))

df = pd.DataFrame(rows, columns=['Partitions', 'RMSE', 'MAPE', 'U'])
Пример #21
0
def normalized_rmse(targets, forecasts):
    if isinstance(targets, list):
        targets = np.array(targets)

    return Measures.rmse(targets, forecasts) / np.nanmean(targets)
Пример #22
0
def evaluate(dataset, individual, **kwargs):
    """
    Evaluate an individual using a sliding window cross validation over the dataset.

    :param dataset: Evaluation dataset
    :param individual: genotype to be tested
    :param window_size: The length of scrolling window for train/test on dataset
    :param train_rate: The train/test split ([0,1])
    :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
    :param parameters: dict with model specific arguments for fit method.
    :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
    """
    import logging
    from pyFTS.models import hofts, ifts, pwfts
    from pyFTS.common import Util
    from pyFTS.benchmarks import Measures
    from pyFTS.hyperparam.Evolutionary import __measures
    from pyFTS.hyperparam.mvfts import phenotype
    from pyFTS.models.multivariate import mvfts, wmvfts, partitioner, variable, cmvfts, grid, granular, common
    import numpy as np

    window_size = kwargs.get('window_size', 800)
    train_rate = kwargs.get('train_rate', .8)
    increment_rate = kwargs.get('increment_rate', .2)
    fts_method = kwargs.get('fts_method', wmvfts.WeightedMVFTS)
    parameters = kwargs.get('parameters', {})
    tvar = kwargs.get('target_variable', None)

    if individual['f1'] is not None and individual['f2'] is not None:
        return {key: individual[key] for key in __measures}

    errors = []
    lengths = []

    kwargs2 = kwargs.copy()
    kwargs2.pop('fts_method')
    if 'parameters' in kwargs2:
        kwargs2.pop('parameters')

    for count, train, test in Util.sliding_window(dataset,
                                                  window_size,
                                                  train=train_rate,
                                                  inc=increment_rate):

        try:

            model = phenotype(individual,
                              train,
                              fts_method=fts_method,
                              parameters=parameters,
                              **kwargs2)

            forecasts = model.predict(test)

            rmse = Measures.rmse(
                test[tvar['data_label']].values[model.max_lag:],
                forecasts[:-1])
            lengths.append(len(model))

            errors.append(rmse)

        except Exception as ex:
            logging.exception("Error")

            lengths.append(np.nan)
            errors.append(np.nan)

    try:
        _rmse = np.nanmean(errors)
        _len = np.nanmean(lengths)

        f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)])
        f2 = np.nansum([.9 * _len, .1 * np.nanstd(lengths)])

        return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len}
    except Exception as ex:
        logging.exception("Error")
        return {'f1': np.inf, 'f2': np.inf, 'rmse': np.inf, 'size': np.inf}
Пример #23
0
from pyFTS.data import TAIEX, SP500, NASDAQ, Malaysia

dataset = Malaysia.get_data('temperature')[:1000]

p = Grid.GridPartitioner(data=dataset, npart=20)

print(p)

model = hofts.WeightedHighOrderFTS(partitioner=p, order=2)

model.fit(dataset)  #[22, 22, 23, 23, 24])

print(model)

Measures.get_point_statistics(dataset, model)
'''
#dataset = SP500.get_data()[11500:16000]
#dataset = NASDAQ.get_data()
#print(len(dataset))


bchmk.sliding_window_benchmarks(dataset, 1000, train=0.8, inc=0.2,
                                methods=[chen.ConventionalFTS], #[pwfts.ProbabilisticWeightedFTS],
                                benchmark_models=False,
                                transformations=[None],
                                #orders=[1, 2, 3],
                                partitions=np.arange(10, 100, 2),
                                progress=False, type="point",
                                #steps_ahead=[1,2,4,6,8,10],
                                distributed=False, nodes=['192.168.0.110', '192.168.0.107', '192.168.0.106'],
Пример #24
0
def run_probabilistic(mfts,
                      partitioner,
                      train_data,
                      test_data,
                      window_key=None,
                      **kwargs):
    """
    Probabilistic forecast benchmark function to be executed on cluster nodes
    :param mfts: FTS model
    :param partitioner: Universe of Discourse partitioner
    :param train_data: data used to train the model
    :param test_data: ata used to test the model
    :param steps:
    :param resolution:
    :param window_key: id of the sliding window
    :param transformation: data transformation
    :param indexer: seasonal indexer
    :return: a dictionary with the benchmark results
    """
    import time
    import numpy as np
    from pyFTS.models import hofts, ifts, pwfts
    from pyFTS.models.ensemble import ensemble
    from pyFTS.partitioners import Grid, Entropy, FCM
    from pyFTS.benchmarks import Measures, arima, quantreg, knn
    from pyFTS.models.seasonal import SeasonalIndexer

    tmp = [
        hofts.HighOrderFTS, ifts.IntervalFTS, pwfts.ProbabilisticWeightedFTS,
        arima.ARIMA, ensemble.AllMethodEnsembleFTS, knn.KNearestNeighbors
    ]

    tmp2 = [
        Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner
    ]

    tmp3 = [
        Measures.get_distribution_statistics, SeasonalIndexer.SeasonalIndexer,
        SeasonalIndexer.LinearSeasonalIndexer
    ]

    indexer = kwargs.get('indexer', None)

    steps_ahead = kwargs.get('steps_ahead', 1)
    method = kwargs.get('method', None)

    if mfts.benchmark_only:
        _key = mfts.shortname + str(
            mfts.order if mfts.order is not None else "") + str(mfts.alpha)
    else:
        pttr = str(partitioner.__module__).split('.')[-1]
        _key = mfts.shortname + " n = " + str(
            mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
        mfts.partitioner = partitioner
        mfts.append_transformation(partitioner.transformation)

    _key += str(steps_ahead)
    _key += str(method) if method is not None else ""

    if mfts.has_seasonality:
        mfts.indexer = indexer

    _start = time.time()
    mfts.fit(train_data, **kwargs)
    _end = time.time()
    times = _end - _start

    _crps1, _t1, _brier = Measures.get_distribution_statistics(
        test_data, mfts, **kwargs)
    _t1 += times

    ret = {
        'key': _key,
        'obj': mfts,
        'CRPS': _crps1,
        'time': _t1,
        'brier': _brier,
        'window': window_key,
        'steps': steps_ahead,
        'method': method
    }

    return ret
Пример #25
0
def run_point(mfts,
              partitioner,
              train_data,
              test_data,
              window_key=None,
              **kwargs):
    """
    Point forecast benchmark function to be executed on cluster nodes
    :param mfts: FTS model
    :param partitioner: Universe of Discourse partitioner
    :param train_data: data used to train the model
    :param test_data: ata used to test the model
    :param window_key: id of the sliding window
    :param transformation: data transformation
    :param indexer: seasonal indexer
    :return: a dictionary with the benchmark results
    """
    import time
    from pyFTS.models import yu, chen, hofts, pwfts, ismailefendi, sadaei, song, cheng, hwang
    from pyFTS.partitioners import Grid, Entropy, FCM
    from pyFTS.benchmarks import Measures, naive, arima, quantreg
    from pyFTS.common import Transformations

    tmp = [
        song.ConventionalFTS, chen.ConventionalFTS, yu.WeightedFTS,
        ismailefendi.ImprovedWeightedFTS, cheng.TrendWeightedFTS,
        sadaei.ExponentialyWeightedFTS, hofts.HighOrderFTS, hwang.HighOrderFTS,
        pwfts.ProbabilisticWeightedFTS
    ]

    tmp2 = [
        Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner
    ]

    tmp4 = [naive.Naive, arima.ARIMA, quantreg.QuantileRegression]

    tmp3 = [Measures.get_point_statistics]

    tmp5 = [Transformations.Differential]

    indexer = kwargs.get('indexer', None)

    steps_ahead = kwargs.get('steps_ahead', 1)
    method = kwargs.get('method', None)

    if mfts.benchmark_only:
        _key = mfts.shortname + str(
            mfts.order if mfts.order is not None else "")
    else:
        pttr = str(partitioner.__module__).split('.')[-1]
        _key = mfts.shortname + " n = " + str(
            mfts.order) + " " + pttr + " q = " + str(partitioner.partitions)
        mfts.partitioner = partitioner
        mfts.append_transformation(partitioner.transformation)

    _key += str(steps_ahead)
    _key += str(method) if method is not None else ""

    _start = time.time()
    mfts.fit(train_data, **kwargs)
    _end = time.time()
    times = _end - _start

    _start = time.time()
    _rmse, _smape, _u = Measures.get_point_statistics(test_data, mfts,
                                                      **kwargs)
    _end = time.time()
    times += _end - _start

    ret = {
        'key': _key,
        'obj': mfts,
        'rmse': _rmse,
        'smape': _smape,
        'u': _u,
        'time': times,
        'window': window_key,
        'steps': steps_ahead,
        'method': method
    }

    return ret
Пример #26
0
def simpleSearch_RMSE(train,
                      test,
                      model,
                      partitions,
                      orders,
                      save=False,
                      file=None,
                      tam=[10, 15],
                      plotforecasts=False,
                      elev=30,
                      azim=144,
                      intervals=False,
                      parameters=None,
                      partitioner=Grid.GridPartitioner,
                      transformation=None,
                      indexer=None):
    _3d = len(orders) > 1
    ret = []
    if _3d:
        errors = np.array([[0 for k in range(len(partitions))]
                           for kk in range(len(orders))])
    else:
        errors = []
    forecasted_best = []
    fig = plt.figure(figsize=tam)
    # fig.suptitle("Comparação de modelos ")
    if plotforecasts:
        ax0 = fig.add_axes([0, 0.4, 0.9, 0.5])  # left, bottom, width, height
        ax0.set_xlim([0, len(train)])
        ax0.set_ylim([min(train) * 0.9, max(train) * 1.1])
        ax0.set_title('Forecasts')
        ax0.set_ylabel('F(T)')
        ax0.set_xlabel('T')
    min_rmse = 1000000.0
    best = None

    for pc, p in enumerate(partitions, start=0):

        sets = partitioner(data=train, npart=p,
                           transformation=transformation).sets
        for oc, o in enumerate(orders, start=0):
            fts = model("q = " + str(p) + " n = " + str(o))
            fts.append_transformation(transformation)
            fts.train(train, sets=sets, order=o, parameters=parameters)
            if not intervals:
                forecasted = fts.forecast(test)
                if not fts.has_seasonality:
                    error = Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted[:-1]))
                else:
                    error = Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted))
                for kk in range(o):
                    forecasted.insert(0, None)
                if plotforecasts: ax0.plot(forecasted, label=fts.name)
            else:
                forecasted = fts.forecast_interval(test)
                error = 1.0 - Measures.rmse_interval(np.array(test[o:]),
                                                     np.array(forecasted[:-1]))
            if _3d:
                errors[oc, pc] = error
            else:
                errors.append(error)
            if error < min_rmse:
                min_rmse = error
                best = fts
                forecasted_best = forecasted

    # print(min_rmse)
    if plotforecasts:
        # handles0, labels0 = ax0.get_legend_handles_labels()
        # ax0.legend(handles0, labels0)
        ax0.plot(test, label="Original", linewidth=3.0, color="black")
        if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
    if _3d and not plotforecasts:
        ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
        ax1.set_title('Error Surface')
        ax1.set_ylabel('Model order')
        ax1.set_xlabel('Number of partitions')
        ax1.set_zlabel('RMSE')
        X, Y = np.meshgrid(partitions, orders)
        surf = ax1.plot_surface(X,
                                Y,
                                errors,
                                rstride=1,
                                cstride=1,
                                antialiased=True)
    else:
        ax1 = fig.add_axes([0, 1, 0.9, 0.9])
        ax1.set_title('Error Curve')
        ax1.set_xlabel('Number of partitions')
        ax1.set_ylabel('RMSE')
        ax1.plot(partitions, errors)
    ret.append(best)
    ret.append(forecasted_best)
    ret.append(min_rmse)

    # plt.tight_layout()

    cUtil.show_and_save_image(fig, file, save)

    return ret
Пример #27
0
    df = loader.series_to_supervised(signals[key], n_in=_order, n_out=1)
    data_input = df.iloc[:, :_order].values
    data_output = df.iloc[:, -1].values

    l = len(df.index)
    limit = l // 2
    train = data_input[:limit]
    test = data_input[limit:]

    ax[row].plot(data_output[limit + _order:], label="Original")
    ax[row].set_title(key)

    persistence_forecast = data_output[limit + _order - 1:-1]
    ax[row].plot(persistence_forecast, label="Persistence")
    _rmse = Measures.rmse(data_output[limit + _order:], persistence_forecast)
    data = [key, "Persistence", _rmse]

    evolving_model = evolvingclusterfts.EvolvingClusterFTS(
        defuzzy='weighted', membership_threshold=0.6, variance_limit=0.001)
    evolving_model.fit(train, order=_order)
    y_hat_df = pd.DataFrame(evolving_model.predict(test))
    forecasts = y_hat_df.iloc[:, -1].values
    ax[row].plot(forecasts, label="EvolvingFTS")
    _rmse = Measures.rmse(data_output[limit + _order:], forecasts[:-1])
    data = [key, "EvolvingFTS", _rmse]
    rows.append(data)

    fbem_model = FBeM.FBeM()
    fbem_model.n = _order
    fbem_model.fit(train, order=_order)
Пример #28
0
def sliding_window_simple_search(data, windowsize, model, partitions, orders,
                                 **kwargs):

    _3d = len(orders) > 1
    ret = []
    errors = np.array([[0 for k in range(len(partitions))]
                       for kk in range(len(orders))])
    forecasted_best = []

    figsize = kwargs.get('figsize', [10, 15])
    fig = plt.figure(figsize=figsize)

    plotforecasts = kwargs.get('plotforecasts', False)
    if plotforecasts:
        ax0 = fig.add_axes([0, 0.4, 0.9, 0.5])  # left, bottom, width, height
        ax0.set_xlim([0, len(data)])
        ax0.set_ylim([min(data) * 0.9, max(data) * 1.1])
        ax0.set_title('Forecasts')
        ax0.set_ylabel('F(T)')
        ax0.set_xlabel('T')
    min_rmse = 1000000.0
    best = None

    intervals = kwargs.get('intervals', False)
    threshold = kwargs.get('threshold', 0.5)

    progressbar = kwargs.get('progressbar', None)

    rng1 = enumerate(partitions, start=0)

    if progressbar:
        from tqdm import tqdm
        rng1 = enumerate(tqdm(partitions), start=0)

    for pc, p in rng1:
        fs = Grid.GridPartitioner(data=data, npart=p)

        rng2 = enumerate(orders, start=0)

        if progressbar:
            rng2 = enumerate(tqdm(orders), start=0)

        for oc, o in rng2:
            _error = []
            for ct, train, test in Util.sliding_window(data, windowsize, 0.8,
                                                       **kwargs):
                fts = model("q = " + str(p) + " n = " + str(o), partitioner=fs)
                fts.fit(train, order=o)
                if not intervals:
                    forecasted = fts.forecast(test)
                    if not fts.has_seasonality:
                        _error.append(
                            Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted[:-1])))
                    else:
                        _error.append(
                            Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted)))
                    for kk in range(o):
                        forecasted.insert(0, None)
                    if plotforecasts: ax0.plot(forecasted, label=fts.name)
                else:
                    forecasted = fts.forecast_interval(test)
                    _error.append(1.0 - Measures.rmse_interval(
                        np.array(test[o:]), np.array(forecasted[:-1])))
            error = np.nanmean(_error)
            errors[oc, pc] = error
            if (min_rmse - error) > threshold:
                min_rmse = error
                best = fts
                forecasted_best = forecasted

    # print(min_rmse)
    if plotforecasts:
        # handles0, labels0 = ax0.get_legend_handles_labels()
        # ax0.legend(handles0, labels0)
        elev = kwargs.get('elev', 30)
        azim = kwargs.get('azim', 144)
        ax0.plot(test, label="Original", linewidth=3.0, color="black")
        if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
    if not plotforecasts:
        ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
    # ax1 = fig.add_axes([0.6, 0.5, 0.45, 0.45], projection='3d')
    if _3d:
        ax1.set_title('Error Surface')
        ax1.set_ylabel('Model order')
        ax1.set_xlabel('Number of partitions')
        ax1.set_zlabel('RMSE')
        X, Y = np.meshgrid(partitions, orders)
        surf = ax1.plot_surface(X,
                                Y,
                                errors,
                                rstride=1,
                                cstride=1,
                                antialiased=True)
    else:
        ax1 = fig.add_axes([0, 1, 0.9, 0.9])
        ax1.set_title('Error Curve')
        ax1.set_ylabel('Number of partitions')
        ax1.set_xlabel('RMSE')
        ax0.plot(errors, partitions)
    ret.append(best)
    ret.append(forecasted_best)

    # plt.tight_layout()

    file = kwargs.get('file', None)
    save = kwargs.get('save', False)

    Util.show_and_save_image(fig, file, save)

    return ret
Пример #29
0
def SelecaoSimples_MenorRMSE(original, parameters, modelo):
    ret = []
    errors = []
    forecasted_best = []
    print("Série Original")
    fig = plt.figure(figsize=[20, 12])
    fig.suptitle("Comparação de modelos ")
    ax0 = fig.add_axes([0, 0.5, 0.65, 0.45])  # left, bottom, width, height
    ax0.set_xlim([0, len(original)])
    ax0.set_ylim([min(original), max(original)])
    ax0.set_title('Série Temporal')
    ax0.set_ylabel('F(T)')
    ax0.set_xlabel('T')
    ax0.plot(original, label="Original")
    min_rmse = 100000.0
    best = None
    for p in parameters:
        sets = Grid.GridPartitioner(data=original, npart=p).sets
        fts = modelo(str(p) + " particoes")
        fts.train(original, sets=sets)
        # print(original)
        forecasted = fts.forecast(original)
        forecasted.insert(0, original[0])
        # print(forecasted)
        ax0.plot(forecasted, label=fts.name)
        error = Measures.rmse(np.array(forecasted), np.array(original))
        print(p, error)
        errors.append(error)
        if error < min_rmse:
            min_rmse = error
            best = fts
            forecasted_best = forecasted
    handles0, labels0 = ax0.get_legend_handles_labels()
    ax0.legend(handles0, labels0)
    ax1 = fig.add_axes([0.7, 0.5, 0.3, 0.45])  # left, bottom, width, height
    ax1.set_title('Comparação dos Erros Quadráticos Médios')
    ax1.set_ylabel('RMSE')
    ax1.set_xlabel('Quantidade de Partições')
    ax1.set_xlim([min(parameters), max(parameters)])
    ax1.plot(parameters, errors)
    ret.append(best)
    ret.append(forecasted_best)
    # Modelo diferencial
    print("\nSérie Diferencial")
    difffts = Transformations.differential(original)
    errors = []
    forecastedd_best = []
    ax2 = fig.add_axes([0, 0, 0.65, 0.45])  # left, bottom, width, height
    ax2.set_xlim([0, len(difffts)])
    ax2.set_ylim([min(difffts), max(difffts)])
    ax2.set_title('Série Temporal')
    ax2.set_ylabel('F(T)')
    ax2.set_xlabel('T')
    ax2.plot(difffts, label="Original")
    min_rmse = 100000.0
    bestd = None
    for p in parameters:
        sets = Grid.GridPartitioner(data=difffts, npart=p)
        fts = modelo(str(p) + " particoes")
        fts.train(difffts, sets=sets)
        forecasted = fts.forecast(difffts)
        forecasted.insert(0, difffts[0])
        ax2.plot(forecasted, label=fts.name)
        error = Measures.rmse(np.array(forecasted), np.array(difffts))
        print(p, error)
        errors.append(error)
        if error < min_rmse:
            min_rmse = error
            bestd = fts
            forecastedd_best = forecasted
    handles0, labels0 = ax2.get_legend_handles_labels()
    ax2.legend(handles0, labels0)
    ax3 = fig.add_axes([0.7, 0, 0.3, 0.45])  # left, bottom, width, height
    ax3.set_title('Comparação dos Erros Quadráticos Médios')
    ax3.set_ylabel('RMSE')
    ax3.set_xlabel('Quantidade de Partições')
    ax3.set_xlim([min(parameters), max(parameters)])
    ax3.plot(parameters, errors)
    ret.append(bestd)
    ret.append(forecastedd_best)
    return ret
Пример #30
0
        model = method(**parameters[ct])
        model.fit(train)
        start = model.order + 1
        end = start + horizon
        intervals = model.predict(test[:10],
                                  type='interval',
                                  alpha=.25,
                                  steps_ahead=horizon)
        distributions = model.predict(test[:10],
                                      type='distribution',
                                      smooth='histogram',
                                      steps_ahead=horizon,
                                      num_bins=100)
        print(
            model.name,
            Measures.get_interval_ahead_statistics(test[start:end], intervals))
        print(
            model.name,
            Measures.get_distribution_ahead_statistics(test[start:end],
                                                       distributions))

print('end')

#f, ax = plt.subplots(1, 1, figsize=[20, 5])

#ax.plot(data)
#bchmk.plot_interval(ax, forecasts, 3, "")
#print(forecasts)
'''
mu_local = 5
sigma_local = 0.25