Exemplo n.º 1
0
def single_plot_residuals(targets,
                          forecasts,
                          order,
                          tam=[8, 8],
                          save=False,
                          file=None):
    fig, ax = plt.subplots(nrows=1, ncols=3, figsize=tam)

    res = residuals(targets, forecasts, order)

    ax[0].set_title("Residuals", size='large')
    ax[0].set_ylabel("Model", size='large')
    ax[0].set_xlabel(' ')
    ax[0].plot(res)

    ax[1].set_title("Residuals Autocorrelation", size='large')
    ax[1].set_ylabel('ACS')
    ax[1].set_xlabel('Lag')
    ax[1].acorr(res)

    ax[2].set_title("Residuals Histogram", size='large')
    ax[2].set_ylabel('Freq')
    ax[2].set_xlabel('Bins')
    ax[2].hist(res)

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Exemplo n.º 2
0
def pftsExploreOrderAndPartitions(data, save=False, file=None):
    fig, axes = plt.subplots(nrows=4, ncols=1, figsize=[6, 8])
    data_fs1 = Grid.GridPartitioner(data=data, npart=10).sets
    mi = []
    ma = []

    axes[0].set_title('Point Forecasts by Order')
    axes[2].set_title('Interval Forecasts by Order')

    for order in np.arange(1, 6):
        fts = pwfts.ProbabilisticWeightedFTS("")
        fts.shortname = "n = " + str(order)
        fts.train(data, sets=data_fs1.sets, order=order)
        point_forecasts = fts.forecast(data)
        interval_forecasts = fts.forecast_interval(data)
        lower = [kk[0] for kk in interval_forecasts]
        upper = [kk[1] for kk in interval_forecasts]
        mi.append(min(lower) * 0.95)
        ma.append(max(upper) * 1.05)
        for k in np.arange(0, order):
            point_forecasts.insert(0, None)
            lower.insert(0, None)
            upper.insert(0, None)
        axes[0].plot(point_forecasts, label=fts.shortname)
        axes[2].plot(lower, label=fts.shortname)
        axes[2].plot(upper)

    axes[1].set_title('Point Forecasts by Number of Partitions')
    axes[3].set_title('Interval Forecasts by Number of Partitions')

    for partitions in np.arange(5, 11):
        data_fs = Grid.GridPartitioner(data=data, npart=partitions).sets
        fts = pwfts.ProbabilisticWeightedFTS("")
        fts.shortname = "q = " + str(partitions)
        fts.train(data, sets=data_fs.sets, order=1)
        point_forecasts = fts.forecast(data)
        interval_forecasts = fts.forecast_interval(data)
        lower = [kk[0] for kk in interval_forecasts]
        upper = [kk[1] for kk in interval_forecasts]
        mi.append(min(lower) * 0.95)
        ma.append(max(upper) * 1.05)
        point_forecasts.insert(0, None)
        lower.insert(0, None)
        upper.insert(0, None)
        axes[1].plot(point_forecasts, label=fts.shortname)
        axes[3].plot(lower, label=fts.shortname)
        axes[3].plot(upper)

    for ax in axes:
        ax.set_ylabel('F(T)')
        ax.set_xlabel('T')
        ax.plot(data, label="Original", color="black", linewidth=1.5)
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1))
        ax.set_ylim([min(mi), max(ma)])
        ax.set_xlim([0, len(data)])

    plt.tight_layout()

    cUtil.show_and_save_image(fig, file, save)
Exemplo n.º 3
0
def plot_residuals(targets, models, tam=[8, 8], save=False, file=None):
    fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam)

    for c, mfts in enumerate(models, start=0):
        if len(models) > 1:
            ax = axes[c]
        else:
            ax = axes
        forecasts = mfts.forecast(targets)
        res = residuals(targets, forecasts, mfts.order)
        mu = np.mean(res)
        sig = np.std(res)

        if c == 0: ax[0].set_title("Residuals", size='large')
        ax[0].set_ylabel(mfts.shortname, size='large')
        ax[0].set_xlabel(' ')
        ax[0].plot(res)

        if c == 0: ax[1].set_title("Residuals Autocorrelation", size='large')
        ax[1].set_ylabel('ACS')
        ax[1].set_xlabel('Lag')
        ax[1].acorr(res)

        if c == 0: ax[2].set_title("Residuals Histogram", size='large')
        ax[2].set_ylabel('Freq')
        ax[2].set_xlabel('Bins')
        ax[2].hist(res)

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Exemplo n.º 4
0
def plot_sets(data, sets, titles, tam=[12, 10], save=False, file=None):
    num = len(sets)
    #fig = plt.figure(figsize=tam)
    maxx = max(data)
    minx = min(data)
    #h = 1/num
    #print(h)
    fig, axes = plt.subplots(nrows=num, ncols=1, figsize=tam)
    for k in np.arange(0, num):
        ticks = []
        x = []
        ax = axes[k]
        ax.set_title(titles[k])
        ax.set_ylim([0, 1.1])
        for key in sets[k].keys():
            s = sets[k][key]
            if s.mf == Membership.trimf:
                ax.plot(s.parameters, [0, 1, 0])
            elif s.mf == Membership.gaussmf:
                tmpx = [kk for kk in np.arange(s.lower, s.upper)]
                tmpy = [s.membership(kk) for kk in np.arange(s.lower, s.upper)]
                ax.plot(tmpx, tmpy)
            elif s.mf == Membership.trapmf:
                ax.plot(s.parameters, [0, 1, 1, 0])
            ticks.append(str(round(s.centroid, 0)) + '\n' + s.name)
            x.append(s.centroid)
        ax.xaxis.set_ticklabels(ticks)
        ax.xaxis.set_ticks(x)

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Exemplo n.º 5
0
def plot_sets(partitioner,
              start=0,
              end=10,
              step=1,
              tam=[5, 5],
              colors=None,
              save=False,
              file=None,
              axes=None,
              data=None,
              window_size=1,
              only_lines=False,
              legend=True):

    range = np.arange(start, end, step)
    ticks = []
    if axes is None:
        fig, axes = plt.subplots(nrows=1, ncols=1, figsize=tam)

    for ct, key in enumerate(partitioner.ordered_sets):
        fset = partitioner.sets[key]
        if not only_lines:
            for t in range:
                tdisp = t - (t % window_size)
                fset.membership(0, tdisp)
                param = fset.perturbated_parameters[str(tdisp)]

                if fset.mf == Membership.trimf:
                    if t == start:
                        line = axes.plot([t, t + 1, t], param, label=fset.name)
                        fset.metadata['color'] = line[0].get_color()
                    else:
                        axes.plot([t, t + 1, t],
                                  param,
                                  c=fset.metadata['color'])

                ticks.extend(["t+" + str(t), ""])
        else:
            tmp = []
            for t in range:
                tdisp = t - (t % window_size)
                fset.membership(0, tdisp)
                param = fset.perturbated_parameters[str(tdisp)]
                tmp.append(np.polyval(param, tdisp))
            axes.plot(range, tmp, ls="--", c="blue")

    axes.set_ylabel("Universe of Discourse")
    axes.set_xlabel("Time")
    plt.xticks([k for k in range], ticks, rotation='vertical')

    if legend:
        handles0, labels0 = axes.get_legend_handles_labels()
        lgd = axes.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))

    if data is not None:
        axes.plot(np.arange(start, start + len(data), 1), data, c="black")

    if file is not None:
        plt.tight_layout()
        Util.show_and_save_image(fig, file, save)
Exemplo n.º 6
0
def distributed_train(model, train_method, nodes, fts_method, data, num_batches=10,
                      train_parameters={}, **kwargs):
    import dispy, dispy.httpd, datetime

    batch_save = kwargs.get('batch_save', False)  # save model between batches

    batch_save_interval = kwargs.get('batch_save_interval', 1)

    file_path = kwargs.get('file_path', None)

    cluster, http_server = start_dispy_cluster(train_method, nodes)

    print("[{0: %H:%M:%S}] Distrituted Train Started with {1} CPU's"
          .format(datetime.datetime.now(), get_number_of_cpus(cluster)))

    jobs = []
    n = len(data)
    batch_size = int(n / num_batches)
    bcount = 1
    for ct in range(model.order, n, batch_size):
        if model.is_multivariate:
            ndata = data.iloc[ct - model.order:ct + batch_size]
        else:
            ndata = data[ct - model.order: ct + batch_size]

        tmp_model = fts_method()

        tmp_model.clone_parameters(model)

        job = cluster.submit(tmp_model, ndata, train_parameters)
        job.id = bcount  # associate an ID to identify jobs (if needed later)
        jobs.append(job)

        bcount += 1

    for job in jobs:
        print("[{0: %H:%M:%S}] Processing batch ".format(datetime.datetime.now()) + str(job.id))
        tmp = job()
        if job.status == dispy.DispyJob.Finished and tmp is not None:
            model.merge(tmp)

            if batch_save and (job.id % batch_save_interval) == 0:
                Util.persist_obj(model, file_path)

        else:
            print(job.exception)
            print(job.stdout)

        print("[{0: %H:%M:%S}] Finished batch ".format(datetime.datetime.now()) + str(job.id))

    print("[{0: %H:%M:%S}] Distrituted Train Finished".format(datetime.datetime.now()))

    stop_dispy_cluster(cluster, http_server)

    return model
Exemplo n.º 7
0
def train_individual_model(partitioner, train_data, indexer):
    pttr = str(partitioner.__module__).split('.')[-1]
    diff = "_diff" if partitioner.transformation is not None else ""
    _key = "msfts_" + pttr + str(
        partitioner.partitions) + diff + "_" + indexer.name

    print(_key)

    model = cmsfts.ContextualMultiSeasonalFTS(_key, indexer=indexer)
    model.append_transformation(partitioner.transformation)
    model.train(train_data, partitioner.sets, order=1)

    cUtil.persist_obj(model, "models/" + _key + ".pkl")

    return model
Exemplo n.º 8
0
def evaluation1(dataset, individual):
    from pyFTS.common import Util
    from pyFTS.benchmarks import Measures

    try:
        results = []
        lengths = []

        for count, train, test in Util.sliding_window(dataset,
                                                      800,
                                                      train=.8,
                                                      inc=.25):
            model = phenotype(individual, train)

            if model is None:
                return (None)

            rmse, _, _ = Measures.get_point_statistics(test, model)
            lengths.append(len(model))

            results.append(rmse)

            _lags = sum(model.lags) * 100

            rmse = np.nansum(
                [.6 * np.nanmean(results), .4 * np.nanstd(results)])
            len_lags = np.nansum([.4 * np.nanmean(lengths), .6 * _lags])

        return len_lags, rmse

    except Exception as ex:
        print("EXCEPTION!", str(ex), str(individual))
        return np.inf
Exemplo n.º 9
0
def cluster_method(individual, dataset, **kwargs):
    from pyFTS.common import Util, Membership
    from pyFTS.models import hofts
    from pyFTS.partitioners import Grid, Entropy
    from pyFTS.benchmarks import Measures
    import numpy as np

    if individual['mf'] == 1:
        mf = Membership.trimf
    elif individual['mf'] == 2:
        mf = Membership.trapmf
    elif individual['mf'] == 3 and individual['partitioner'] != 2:
        mf = Membership.gaussmf
    else:
        mf = Membership.trimf

    window_size = kwargs.get('window_size', 800)
    train_rate = kwargs.get('train_rate', .8)
    increment_rate = kwargs.get('increment_rate', .2)
    parameters = kwargs.get('parameters', {})

    errors = []
    sizes = []

    for count, train, test in Util.sliding_window(dataset,
                                                  window_size,
                                                  train=train_rate,
                                                  inc=increment_rate):

        if individual['partitioner'] == 1:
            partitioner = Grid.GridPartitioner(data=train,
                                               npart=individual['npart'],
                                               func=mf)
        elif individual['partitioner'] == 2:
            npart = individual['npart'] if individual['npart'] > 10 else 10
            partitioner = Entropy.EntropyPartitioner(data=train,
                                                     npart=npart,
                                                     func=mf)

        model = hofts.WeightedHighOrderFTS(partitioner=partitioner,
                                           lags=individual['lags'],
                                           alpha_cut=individual['alpha'],
                                           order=individual['order'])
        model.fit(train)

        forecasts = model.predict(test)

        #rmse, mape, u = Measures.get_point_statistics(test, model)
        rmse = Measures.rmse(test[model.max_lag:], forecasts)

        size = len(model)

        errors.append(rmse)
        sizes.append(size)

    return {
        'parameters': individual,
        'rmse': np.nanmean(errors),
        'size': np.nanmean(size)
    }
Exemplo n.º 10
0
def plot_sets_conditional(model,
                          data,
                          start=0,
                          end=10,
                          step=1,
                          tam=[5, 5],
                          colors=None,
                          save=False,
                          file=None,
                          axes=None):

    range = np.arange(start, end, step)
    ticks = []
    if axes is None:
        fig, axes = plt.subplots(nrows=1, ncols=1, figsize=tam)

    for ct, key in enumerate(model.partitioner.ordered_sets):
        set = model.partitioner.sets[key]
        for t in range:
            tdisp = model.perturbation_factors(data[t])
            set.perturbate_parameters(tdisp[ct])
            param = set.perturbated_parameters[str(tdisp[ct])]

            if set.mf == Membership.trimf:
                if t == start:
                    line = axes.plot([t, t + 1, t], param, label=set.name)
                    set.metadata['color'] = line[0].get_color()
                else:
                    axes.plot([t, t + 1, t], param, c=set.metadata['color'])

            ticks.extend(["t+" + str(t), ""])

    axes.set_ylabel("Universe of Discourse")
    axes.set_xlabel("Time")
    plt.xticks([k for k in range], ticks, rotation='vertical')

    handles0, labels0 = axes.get_legend_handles_labels()
    lgd = axes.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))

    if data is not None:
        axes.plot(np.arange(start, start + len(data), 1), data, c="black")

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Exemplo n.º 11
0
def evaluate(dataset, individual, **kwargs):
    """
    Evaluate an individual using a sliding window cross validation over the dataset.

    :param dataset: Evaluation dataset
    :param individual: genotype to be tested
    :param window_size: The length of scrolling window for train/test on dataset
    :param train_rate: The train/test split ([0,1])
    :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
    :param parameters: dict with model specific arguments for fit method.
    :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
    """
    from pyFTS.models import hofts, ifts, pwfts
    from pyFTS.common import Util
    from pyFTS.benchmarks import Measures
    from pyFTS.hyperparam.Evolutionary import phenotype, __measures
    import numpy as np

    window_size = kwargs.get('window_size', 800)
    train_rate = kwargs.get('train_rate', .8)
    increment_rate = kwargs.get('increment_rate', .2)
    fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS)
    parameters = kwargs.get('parameters', {})

    if individual['f1'] is not None and individual['f2'] is not None:
        return {key: individual[key] for key in __measures}

    errors = []
    lengths = []

    for count, train, test in Util.sliding_window(dataset,
                                                  window_size,
                                                  train=train_rate,
                                                  inc=increment_rate):

        model = phenotype(individual,
                          train,
                          fts_method=fts_method,
                          parameters=parameters)

        forecasts = model.predict(test)

        rmse = Measures.rmse(test[model.max_lag:], forecasts[:-1])
        lengths.append(len(model))

        errors.append(rmse)

    _lags = sum(model.lags) * 100

    _rmse = np.nanmean(errors)
    _len = np.nanmean(lengths)

    f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)])
    f2 = np.nansum([.4 * _len, .6 * _lags])

    return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len}
Exemplo n.º 12
0
def plotResiduals(targets, models, tam=[8, 8], save=False, file=None):
    """
    Plot residuals and statistics

    :param targets: 
    :param models: 
    :param tam: 
    :param save: 
    :param file: 
    :return: 
    """
    fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam)
    for c, mfts in enumerate(models):
        if len(models) > 1:
            ax = axes[c]
        else:
            ax = axes
        forecasts = mfts.forecast(targets)
        res = residuals(targets,forecasts,mfts.order)
        mu = np.mean(res)
        sig = np.std(res)

        ax[0].set_title("Residuals Mean=" + str(mu) + " STD = " + str(sig))
        ax[0].set_ylabel('E')
        ax[0].set_xlabel('T')
        ax[0].plot(res)

        ax[1].set_title("Residuals Autocorrelation")
        ax[1].set_ylabel('ACS')
        ax[1].set_xlabel('Lag')
        ax[1].acorr(res)

        ax[2].set_title("Residuals Histogram")
        ax[2].set_ylabel('Freq')
        ax[2].set_xlabel('Bins')
        ax[2].hist(res)

        c += 1

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Exemplo n.º 13
0
def plot_residuals_by_model(targets,
                            models,
                            tam=[8, 8],
                            save=False,
                            file=None):
    import scipy as sp

    fig, axes = plt.subplots(nrows=len(models), ncols=4, figsize=tam)

    for c, mfts in enumerate(models, start=0):
        if len(models) > 1:
            ax = axes[c]
        else:
            ax = axes
        forecasts = mfts.predict(targets)
        res = residuals(targets, forecasts, mfts.order + 1)
        mu = np.mean(res)
        sig = np.std(res)

        if c == 0: ax[0].set_title("Residuals", size='large')
        ax[0].set_ylabel(mfts.shortname, size='large')
        ax[0].set_xlabel(' ')
        ax[0].plot(res)

        if c == 0: ax[1].set_title("Autocorrelation", size='large')
        ax[1].set_ylabel('ACS')
        ax[1].set_xlabel('Lag')
        ax[1].acorr(res)

        if c == 0: ax[2].set_title("Histogram", size='large')
        ax[2].set_ylabel('Freq')
        ax[2].set_xlabel('Bins')
        ax[2].hist(res)

        if c == 0: ax[3].set_title("QQ Plot", size='large')

        _, (__, ___, r) = sp.stats.probplot(res, plot=ax[3], fit=True)

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Exemplo n.º 14
0
    def train(self, data, **kwargs):
        self.original_max = max(self.indexer.get_data(data))
        self.original_min = min(self.indexer.get_data(data))

        num_cores = multiprocessing.cpu_count()

        pool = {}
        count = 0
        for ix in self.indexers:
            for pt in self.partitioners:
                pool[count] = {'ix': ix, 'pt': pt}
                count += 1

        results = Parallel(n_jobs=num_cores)(delayed(train_individual_model)(
            deepcopy(pool[m]['pt']), data, deepcopy(pool[m]['ix']))
                                             for m in pool.keys())

        for tmp in results:
            self.append_model(tmp)

        cUtil.persist_obj(self, "models/" + self.name + ".pkl")
Exemplo n.º 15
0
def explore_partitioners(data, npart, methods=None, mf=None, tam=[12, 10], save=False, file=None):
    all_methods = [Grid.GridPartitioner, Entropy.EntropyPartitioner, FCM.FCMPartitioner]
    mfs = [Membership.trimf, Membership.gaussmf, Membership.trapmf]

    if methods is None:
        methods = all_methods

    if mf is None:
        mf = mfs

    num_cores = multiprocessing.cpu_count()

    objs = []
    for method in methods:
        print(str(method))
        tmp = Parallel(n_jobs=num_cores)(delayed(method)(deepcopy(data), npart, m) for m in mf)
        objs.append(tmp)

    objs = np.ravel(objs).tolist()

    Util.plot_partitioners(data, objs, tam, save, file)
Exemplo n.º 16
0
def execute(datasetname, dataset, **kwargs):
    nodes = kwargs.get('nodes', ['127.0.0.1'])

    cluster, http_server = Util.start_dispy_cluster(cluster_method,
                                                    nodes=nodes)
    conn = hUtil.open_hyperparam_db('hyperparam.db')

    ngen = kwargs.get('ngen', 70)
    npop = kwargs.get('npop', 20)
    pcruz = kwargs.get('pcruz', .8)
    pmut = kwargs.get('pmut', .2)
    option = kwargs.get('option', 1)

    jobs = []

    for i in range(kwargs.get('experiments', 30)):
        print("Experiment {}".format(i))
        job = cluster.submit(dataset, ngen, npop, pcruz, pmut, option)
        jobs.append(job)

    process_jobs(jobs, datasetname, conn)

    Util.stop_dispy_cluster(cluster, http_server)
Exemplo n.º 17
0
def single_plot_residuals(res, order, tam=[10, 7], save=False, file=None):
    import scipy as sp

    fig, ax = plt.subplots(nrows=2, ncols=2, figsize=tam)

    ax[0][0].set_title("Residuals", size='large')
    ax[0][0].plot(res)

    ax[0][1].set_title("Autocorrelation", size='large')
    ax[0][1].set_ylabel('ACF')
    ax[0][1].set_xlabel('Lag')
    ax[0][1].acorr(res)

    ax[1][0].set_title("Histogram", size='large')
    ax[1][0].set_ylabel('Freq')
    ax[1][0].set_xlabel('Bins')
    ax[1][0].hist(res)

    _, (__, ___, r) = sp.stats.probplot(res, plot=ax[1][1], fit=True)

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Exemplo n.º 18
0
def evaluate(dataset, individual, **kwargs):
    """
    Evaluate an individual using a sliding window cross validation over the dataset.

    :param dataset: Evaluation dataset
    :param individual: genotype to be tested
    :param window_size: The length of scrolling window for train/test on dataset
    :param train_rate: The train/test split ([0,1])
    :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
    :param parameters: dict with model specific arguments for fit method.
    :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
    """
    from pyFTS.common import Util
    from pyFTS.benchmarks import Measures
    from pyFTS.fcm.GA import phenotype
    import numpy as np

    window_size = kwargs.get('window_size', 800)
    train_rate = kwargs.get('train_rate', .8)
    increment_rate = kwargs.get('increment_rate', .2)
    #parameters = kwargs.get('parameters',{})

    errors = []

    for count, train, test in Util.sliding_window(dataset,
                                                  window_size,
                                                  train=train_rate,
                                                  inc=increment_rate):

        model = phenotype(individual, train)

        if model is None:
            raise Exception("Phenotype returned None")

        model.uod_clip = False

        forecasts = model.predict(test)

        rmse = Measures.rmse(
            test[model.max_lag:],
            forecasts[:-1])  #.get_point_statistics(test, model)

        errors.append(rmse)

    _rmse = np.nanmean(errors)
    _std = np.nanstd(errors)

    #print("EVALUATION {}".format(individual))
    return {'rmse': .6 * _rmse + .4 * _std}
Exemplo n.º 19
0
sonda.index = np.arange(0, len(sonda.index))

sonda_treino = sonda[:1051200]
sonda_teste = sonda[1051201:]

#res = bchmk.simpleSearch_RMSE(sonda_treino, sonda_teste,
#                              sfts.SeasonalFTS,np.arange(3,30),[1],parameters=1440,
#                              tam=[15,8], plotforecasts=False,elev=45, azim=40,
#                               save=False,file="pictures/sonda_sfts_error_surface", intervals=False)

partitions = ['grid', 'entropy']

indexers = ['m15', 'Mh', 'Mhm15']

models = []
ixs = []

sample = sonda_teste[0:4300]

for max_part in [10, 20, 30, 40, 50]:
    for part in partitions:
        for ind in indexers:
            ix = Util.load_obj("models/sonda_ix_" + ind + ".pkl")
            model = Util.load_obj("models/sonda_msfts_" + part + "_" +
                                  str(max_part) + "_" + ind + ".pkl")
            model.shortname = part + "_" + str(max_part) + "_" + ind

            models.append(model)
            ixs.append(ix)

print(bchmk.print_point_statistics(sample, models, indexers=ixs))
Exemplo n.º 20
0
def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs):
    """
    Sliding window benchmarks for FTS forecasters.

    For each data window, a train and test datasets will be splitted. For each train split, number of
    partitions and partitioning method will be created a partitioner model. And for each partitioner, order,
    steps ahead and FTS method a foreasting model will be trained.

    Then all trained models are benchmarked on the test data and the metrics are stored on a sqlite3 database
    (identified by the 'file' parameter) for posterior analysis.

    All these process can be distributed on a dispy cluster, setting the atributed 'distributed' to true and
    informing the list of dispy nodes on 'nodes' parameter.

    The number of experiments is determined by 'windowsize' and 'inc' parameters.

    :param data: test data
    :param windowsize: size of sliding window
    :param train: percentual of sliding window data used to train the models
    :param kwargs: dict, optional arguments

    :keyword
        benchmark_methods:  a list with Non FTS models to benchmark. The default is None.
        benchmark_methods_parameters:  a list with Non FTS models parameters. The default is None.
        benchmark_models: A boolean value indicating if external FTS methods will be used on benchmark. The default is False.
        build_methods: A boolean value indicating if the default FTS methods will be used on benchmark. The default is True.
        dataset: the dataset name to identify the current set of benchmarks results on database.
        distributed: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False
        file: file path to save the results. The default is benchmarks.db.
        inc: a float on interval [0,1] indicating the percentage of the windowsize to move the window
        methods: a list with FTS class names. The default depends on the forecasting type and contains the list of all FTS methods.
        models: a list with prebuilt FTS objects. The default is None.
        nodes: a list with the dispy cluster nodes addresses. The default is [127.0.0.1].
        orders: a list with orders of the models (for high order models). The default is [1,2,3].
        partitions: a list with the numbers of partitions on the Universe of Discourse. The default is [10].
        partitioners_models: a list with prebuilt Universe of Discourse partitioners objects. The default is None.
        partitioners_methods: a list with Universe of Discourse partitioners class names. The default is [partitioners.Grid.GridPartitioner].
        progress: If true a progress bar will be displayed during the benchmarks. The default is False.
        start: in the multi step forecasting, the index of the data where to start forecasting. The default is 0.
        steps_ahead: a list with  the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1.
        tag: a name to identify the current set of benchmarks results on database.
        type: the forecasting type, one of these values: point(default), interval or distribution. The default is point.
        transformations: a list with data transformations do apply . The default is [None].
    """

    tag = __pop('tag', None, kwargs)
    dataset = __pop('dataset', None, kwargs)

    distributed = __pop('distributed', False, kwargs)

    transformations = kwargs.get('transformations', [None])
    progress = kwargs.get('progress', None)
    type = kwargs.get("type", 'point')

    orders = __pop("orders", [1, 2, 3], kwargs)

    partitioners_models = __pop("partitioners_models", None, kwargs)
    partitioners_methods = __pop("partitioners_methods",
                                 [Grid.GridPartitioner], kwargs)
    partitions = __pop("partitions", [10], kwargs)

    steps_ahead = __pop('steps_ahead', [1], kwargs)

    methods = __pop('methods', None, kwargs)

    models = __pop('models', None, kwargs)

    pool = [] if models is None else models

    if methods is None:
        if type == 'point':
            methods = get_point_methods()
        elif type == 'interval':
            methods = get_interval_methods()
        elif type == 'distribution':
            methods = get_probabilistic_methods()

    build_methods = __pop("build_methods", True, kwargs)

    if build_methods:
        for method in methods:
            mfts = method()

            if mfts.is_high_order:
                for order in orders:
                    if order >= mfts.min_order:
                        mfts = method()
                        mfts.order = order
                        pool.append(mfts)
            else:
                mfts.order = 1
                pool.append(mfts)

    benchmark_models = __pop("benchmark_models", False, kwargs)

    if benchmark_models != False:

        benchmark_methods = __pop("benchmark_methods", None, kwargs)
        benchmark_methods_parameters = __pop("benchmark_methods_parameters",
                                             None, kwargs)

        benchmark_pool = [] if ( benchmark_models is None or not isinstance(benchmark_models, list)) \
            else benchmark_models

        if benchmark_models is None and benchmark_methods is None:
            if type == 'point' or type == 'partition':
                benchmark_methods = get_benchmark_point_methods()
            elif type == 'interval':
                benchmark_methods = get_benchmark_interval_methods()
            elif type == 'distribution':
                benchmark_methods = get_benchmark_probabilistic_methods()

        if benchmark_methods is not None:
            for transformation in transformations:
                for count, model in enumerate(benchmark_methods, start=0):
                    par = benchmark_methods_parameters[count]
                    mfts = model(**par)
                    mfts.append_transformation(transformation)
                    benchmark_pool.append(mfts)

    if type == 'point':
        experiment_method = run_point
        synthesis_method = process_point_jobs
    elif type == 'interval':
        experiment_method = run_interval
        synthesis_method = process_interval_jobs
    elif type == 'distribution':
        experiment_method = run_probabilistic
        synthesis_method = process_probabilistic_jobs
    else:
        raise ValueError("Type parameter has a unkown value!")

    if distributed:
        import dispy, dispy.httpd

        nodes = kwargs.get("nodes", ['127.0.0.1'])
        cluster, http_server = cUtil.start_dispy_cluster(
            experiment_method, nodes)

    jobs = []

    inc = __pop("inc", 0.1, kwargs)

    if progress:
        from tqdm import tqdm
        _tdata = len(data) / (windowsize * inc)
        _tasks = (len(partitioners_models) * len(orders) * len(partitions) *
                  len(transformations) * len(steps_ahead))
        _tbcmk = len(benchmark_pool) * len(steps_ahead)
        progressbar = tqdm(total=_tdata * _tasks + _tdata * _tbcmk,
                           desc="Benchmarks:")

    file = kwargs.get('file', "benchmarks.db")

    conn = bUtil.open_benchmark_db(file)

    for ct, train, test in cUtil.sliding_window(data,
                                                windowsize,
                                                train,
                                                inc=inc,
                                                **kwargs):
        if benchmark_models != False:
            for model in benchmark_pool:
                for step in steps_ahead:

                    kwargs['steps_ahead'] = step

                    if not distributed:
                        if progress:
                            progressbar.update(1)
                        try:
                            job = experiment_method(deepcopy(model), None,
                                                    train, test, **kwargs)
                            synthesis_method(dataset, tag, job, conn)
                        except Exception as ex:
                            print('EXCEPTION! ', model.shortname, model.order)
                            traceback.print_exc()
                    else:
                        job = cluster.submit(deepcopy(model), None, train,
                                             test, **kwargs)
                        jobs.append(job)

        partitioners_pool = []

        if partitioners_models is None:

            for transformation in transformations:

                for partition in partitions:

                    for partitioner in partitioners_methods:

                        data_train_fs = partitioner(
                            data=train,
                            npart=partition,
                            transformation=transformation)

                        partitioners_pool.append(data_train_fs)
        else:
            partitioners_pool = partitioners_models

        for step in steps_ahead:

            for partitioner in partitioners_pool:

                for _id, model in enumerate(pool, start=0):

                    kwargs['steps_ahead'] = step

                    if not distributed:
                        if progress:
                            progressbar.update(1)
                        try:
                            job = experiment_method(deepcopy(model),
                                                    deepcopy(partitioner),
                                                    train, test, **kwargs)
                            synthesis_method(dataset, tag, job, conn)
                        except Exception as ex:
                            print('EXCEPTION! ', model.shortname, model.order,
                                  partitioner.name, partitioner.partitions,
                                  str(partitioner.transformation))
                            traceback.print_exc()
                    else:
                        job = cluster.submit(deepcopy(model),
                                             deepcopy(partitioner), train,
                                             test, **kwargs)
                        job.id = id  # associate an ID to identify jobs (if needed later)
                        jobs.append(job)

    if progress:
        progressbar.close()

    if distributed:

        for job in jobs:
            if progress:
                progressbar.update(1)
            job()
            if job.status == dispy.DispyJob.Finished and job is not None:
                tmp = job.result
                synthesis_method(dataset, tag, tmp, conn)
            else:
                print("status", job.status)
                print("result", job.result)
                print("stdout", job.stdout)
                print("stderr", job.exception)

        cluster.wait()  # wait for all jobs to finish

        cUtil.stop_dispy_cluster(cluster, http_server)

    conn.close()
Exemplo n.º 21
0
                          data=train,
                          partitioner_specific=sp)

vhour.partitioner.plot(axes[1])

vavg = variable.Variable("Radiance",
                         data_label="glo_avg",
                         partitioner=Grid.GridPartitioner,
                         npart=30,
                         data=train)

model1 = mvfts.MVFTS("")

model1.append_variable(vmonth)

model1.append_variable(vhour)

model1.append_variable(vavg)

model1.target_variable = vavg

#model1.fit(train, num_batches=60, save=True, batch_save=True, file_path='mvfts_sonda')

#model.fit(train, num_batches=60, save=True, batch_save=True, file_path='mvfts_sonda')

#model1.fit(train, num_batches=200, save=True, batch_save=True, file_path='mvfts_sonda', distributed=False,
#          nodes=['192.168.0.110'], batch_save_interval=10)

model = Util.load_obj('mvfts_sonda')

forecasts = model.predict(test)
Exemplo n.º 22
0
def plot_compared_intervals_ahead(original,
                                  models,
                                  colors,
                                  distributions,
                                  time_from,
                                  time_to,
                                  intervals=True,
                                  save=False,
                                  file=None,
                                  tam=[20, 5],
                                  resolution=None,
                                  cmap='Blues',
                                  linewidth=1.5):
    """
    Plot the forecasts of several one step ahead models, by point or by interval 
    :param original: Original time series data (list)
    :param models: List of models to compare
    :param colors: List of models colors
    :param distributions: True to plot a distribution
    :param time_from: index of data poit to start the ahead forecasting
    :param time_to: number of steps ahead to forecast
    :param interpol: Fill space between distribution plots
    :param save: Save the picture on file
    :param file: Filename to save the picture
    :param tam: Size of the picture
    :param resolution: 
    :param cmap: Color map to be used on distribution plot 
    :param option: Distribution type to be passed for models
    :return: 
    """
    fig = plt.figure(figsize=tam)
    ax = fig.add_subplot(111)

    cm = plt.get_cmap(cmap)
    cNorm = pltcolors.Normalize(vmin=0, vmax=1)
    scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=cm)

    if resolution is None: resolution = (max(original) - min(original)) / 100

    mi = []
    ma = []

    for count, fts in enumerate(models, start=0):
        if fts.has_probability_forecasting and distributions[count]:
            density = fts.forecast_ahead_distribution(
                original[time_from - fts.order:time_from],
                time_to,
                resolution=resolution)

            #plot_density_scatter(ax, cmap, density, fig, resolution, time_from, time_to)
            plot_density_rectange(ax, cm, density, fig, resolution, time_from,
                                  time_to)

        if fts.has_interval_forecasting and intervals:
            forecasts = fts.forecast_ahead_interval(
                original[time_from - fts.order:time_from], time_to)
            lower = [kk[0] for kk in forecasts]
            upper = [kk[1] for kk in forecasts]
            mi.append(min(lower))
            ma.append(max(upper))
            for k in np.arange(0, time_from - fts.order):
                lower.insert(0, None)
                upper.insert(0, None)
            ax.plot(lower,
                    color=colors[count],
                    label=fts.shortname,
                    linewidth=linewidth)
            ax.plot(upper, color=colors[count], linewidth=linewidth * 1.5)

    ax.plot(original,
            color='black',
            label="Original",
            linewidth=linewidth * 1.5)
    handles0, labels0 = ax.get_legend_handles_labels()
    if True in distributions:
        lgd = ax.legend(handles0, labels0, loc=2)
    else:
        lgd = ax.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))
    _mi = min(mi)
    if _mi < 0:
        _mi *= 1.1
    else:
        _mi *= 0.9
    _ma = max(ma)
    if _ma < 0:
        _ma *= 0.9
    else:
        _ma *= 1.1

    ax.set_ylim([_mi, _ma])
    ax.set_ylabel('F(T)')
    ax.set_xlabel('T')
    ax.set_xlim([0, len(original)])

    cUtil.show_and_save_image(fig, file, save, lgd=lgd)
Exemplo n.º 23
0
model.fit(train, method='GD', alpha=0.5, momentum=None, iteractions=1)
'''
model.fit(train, method='GA', ngen=15, #number of generations
    mgen=7, # stop after mgen generations without improvement
    npop=15, # number of individuals on population
    pcruz=.5, # crossover percentual of population
    pmut=.3, # mutation percentual of population
    window_size = 7000,
    train_rate = .8,
    increment_rate =.2,
    experiments=1
    )
'''

Util.persist_obj(model, 'fcm_fts10c')
'''
model = Util.load_obj('fcm_fts05c')
'''
#forecasts = model.predict(test)

#print(model)

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[15, 5])

ax.plot(test, label='Original')

forecasts = model.predict(test)

for w in np.arange(model.order):
    forecasts.insert(0, None)
Exemplo n.º 24
0
def point_sliding_window(data,
                         windowsize,
                         train=0.8,
                         inc=0.1,
                         models=None,
                         partitioners=[Grid.GridPartitioner],
                         partitions=[10],
                         max_order=3,
                         transformation=None,
                         indexer=None,
                         dump=False,
                         benchmark_models=None,
                         benchmark_models_parameters=None,
                         save=False,
                         file=None,
                         sintetic=False,
                         nodes=None,
                         depends=None):
    """
    Distributed sliding window benchmarks for FTS point forecasters
    :param data: 
    :param windowsize: size of sliding window
    :param train: percentual of sliding window data used to train the models
    :param inc: percentual of window is used do increment 
    :param models: FTS point forecasters
    :param partitioners: Universe of Discourse partitioner
    :param partitions: the max number of partitions on the Universe of Discourse 
    :param max_order: the max order of the models (for high order models)
    :param transformation: data transformation
    :param indexer: seasonal indexer
    :param dump: 
    :param benchmark_models: Non FTS models to benchmark
    :param benchmark_models_parameters: Non FTS models parameters
    :param save: save results
    :param file: file path to save the results
    :param sintetic: if true only the average and standard deviation of the results
    :param nodes: list of cluster nodes to distribute tasks
    :param depends: list of module dependencies 
    :return: DataFrame with the results
    """

    cluster = dispy.JobCluster(benchmarks.run_point,
                               nodes=nodes)  #, depends=dependencies)

    http_server = dispy.httpd.DispyHTTPServer(cluster)

    _process_start = time.time()

    print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now()))

    jobs = []
    objs = {}
    rmse = {}
    smape = {}
    u = {}
    times = {}

    pool = build_model_pool_point(models, max_order, benchmark_models,
                                  benchmark_models_parameters)

    experiments = 0
    for ct, train, test in Util.sliding_window(data, windowsize, train, inc):
        experiments += 1

        benchmarks_only = {}

        if dump: print('\nWindow: {0}\n'.format(ct))

        for partition in partitions:

            for partitioner in partitioners:

                data_train_fs = partitioner(train,
                                            partition,
                                            transformation=transformation)

                for _id, m in enumerate(pool, start=0):
                    if m.benchmark_only and m.shortname in benchmarks_only:
                        continue
                    else:
                        benchmarks_only[m.shortname] = m
                    job = cluster.submit(m, data_train_fs, train, test, ct,
                                         transformation)
                    job.id = _id  # associate an ID to identify jobs (if needed later)
                    jobs.append(job)

    for job in jobs:
        tmp = job()
        if job.status == dispy.DispyJob.Finished and tmp is not None:
            if tmp['key'] not in objs:
                objs[tmp['key']] = tmp['obj']
                rmse[tmp['key']] = []
                smape[tmp['key']] = []
                u[tmp['key']] = []
                times[tmp['key']] = []
            rmse[tmp['key']].append_rhs(tmp['rmse'])
            smape[tmp['key']].append_rhs(tmp['smape'])
            u[tmp['key']].append_rhs(tmp['u'])
            times[tmp['key']].append_rhs(tmp['time'])
            print(tmp['key'], tmp['window'])
        else:
            print(job.exception)
            print(job.stdout)

    _process_end = time.time()

    print("Process End: {0: %H:%M:%S}".format(datetime.datetime.now()))

    print("Process Duration: {0}".format(_process_end - _process_start))

    cluster.wait()  # wait for all jobs to finish

    cluster.print_status()

    http_server.shutdown()  # this waits until browser gets all updates
    cluster.close()

    return bUtil.save_dataframe_point(experiments, file, objs, rmse, save,
                                      sintetic, smape, times, u)
Exemplo n.º 25
0
def sliding_window_simple_search(data, windowsize, model, partitions, orders,
                                 **kwargs):

    _3d = len(orders) > 1
    ret = []
    errors = np.array([[0 for k in range(len(partitions))]
                       for kk in range(len(orders))])
    forecasted_best = []

    figsize = kwargs.get('figsize', [10, 15])
    fig = plt.figure(figsize=figsize)

    plotforecasts = kwargs.get('plotforecasts', False)
    if plotforecasts:
        ax0 = fig.add_axes([0, 0.4, 0.9, 0.5])  # left, bottom, width, height
        ax0.set_xlim([0, len(data)])
        ax0.set_ylim([min(data) * 0.9, max(data) * 1.1])
        ax0.set_title('Forecasts')
        ax0.set_ylabel('F(T)')
        ax0.set_xlabel('T')
    min_rmse = 1000000.0
    best = None

    intervals = kwargs.get('intervals', False)
    threshold = kwargs.get('threshold', 0.5)

    progressbar = kwargs.get('progressbar', None)

    rng1 = enumerate(partitions, start=0)

    if progressbar:
        from tqdm import tqdm
        rng1 = enumerate(tqdm(partitions), start=0)

    for pc, p in rng1:
        fs = Grid.GridPartitioner(data=data, npart=p)

        rng2 = enumerate(orders, start=0)

        if progressbar:
            rng2 = enumerate(tqdm(orders), start=0)

        for oc, o in rng2:
            _error = []
            for ct, train, test in Util.sliding_window(data, windowsize, 0.8,
                                                       **kwargs):
                fts = model("q = " + str(p) + " n = " + str(o), partitioner=fs)
                fts.fit(train, order=o)
                if not intervals:
                    forecasted = fts.forecast(test)
                    if not fts.has_seasonality:
                        _error.append(
                            Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted[:-1])))
                    else:
                        _error.append(
                            Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted)))
                    for kk in range(o):
                        forecasted.insert(0, None)
                    if plotforecasts: ax0.plot(forecasted, label=fts.name)
                else:
                    forecasted = fts.forecast_interval(test)
                    _error.append(1.0 - Measures.rmse_interval(
                        np.array(test[o:]), np.array(forecasted[:-1])))
            error = np.nanmean(_error)
            errors[oc, pc] = error
            if (min_rmse - error) > threshold:
                min_rmse = error
                best = fts
                forecasted_best = forecasted

    # print(min_rmse)
    if plotforecasts:
        # handles0, labels0 = ax0.get_legend_handles_labels()
        # ax0.legend(handles0, labels0)
        elev = kwargs.get('elev', 30)
        azim = kwargs.get('azim', 144)
        ax0.plot(test, label="Original", linewidth=3.0, color="black")
        if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
    if not plotforecasts:
        ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
    # ax1 = fig.add_axes([0.6, 0.5, 0.45, 0.45], projection='3d')
    if _3d:
        ax1.set_title('Error Surface')
        ax1.set_ylabel('Model order')
        ax1.set_xlabel('Number of partitions')
        ax1.set_zlabel('RMSE')
        X, Y = np.meshgrid(partitions, orders)
        surf = ax1.plot_surface(X,
                                Y,
                                errors,
                                rstride=1,
                                cstride=1,
                                antialiased=True)
    else:
        ax1 = fig.add_axes([0, 1, 0.9, 0.9])
        ax1.set_title('Error Curve')
        ax1.set_ylabel('Number of partitions')
        ax1.set_xlabel('RMSE')
        ax0.plot(errors, partitions)
    ret.append(best)
    ret.append(forecasted_best)

    # plt.tight_layout()

    file = kwargs.get('file', None)
    save = kwargs.get('save', False)

    Util.show_and_save_image(fig, file, save)

    return ret
Exemplo n.º 26
0
from pyFTS.partitioners import Grid, Util as pUtil
partitioner = Grid.GridPartitioner(data=dataset[:800], npart=10, transformation=tdiff)


from pyFTS.common import Util as cUtil
from pyFTS.benchmarks import benchmarks as bchmk, Util as bUtil, Measures, knn, quantreg, arima, naive

from pyFTS.models import pwfts, song, chen, ifts, hofts
from pyFTS.models.ensemble import ensemble

model = chen.ConventionalFTS(partitioner=partitioner)
#model = hofts.HighOrderFTS(partitioner=partitioner,order=2)
model.append_transformation(tdiff)
model.fit(dataset[:800])

cUtil.plot_rules(model, size=[20,20], rules_by_axis=5, columns=1)

print(model)

print("fim")



'''
model = knn.KNearestNeighbors(order=3)
#model = ensemble.AllMethodEnsembleFTS("", partitioner=partitioner)
#model = arima.ARIMA("", order=(2,0,2))
#model = quantreg.QuantileRegression("", order=2, dist=True)
#model.append_transformation(tdiff)
model.fit(dataset[:800])
print(Measures.get_distribution_statistics(dataset[800:1000], model))
Exemplo n.º 27
0
def ahead_sliding_window(data,
                         windowsize,
                         steps,
                         resolution,
                         train=0.8,
                         inc=0.1,
                         models=None,
                         partitioners=[Grid.GridPartitioner],
                         partitions=[10],
                         max_order=3,
                         transformation=None,
                         indexer=None,
                         dump=False,
                         benchmark_models=None,
                         benchmark_models_parameters=None,
                         save=False,
                         file=None,
                         synthetic=False,
                         nodes=None):
    """
    Distributed sliding window benchmarks for FTS probabilistic forecasters
    :param data: 
    :param windowsize: size of sliding window
    :param train: percentual of sliding window data used to train the models
    :param steps: 
    :param resolution: 
    :param models: FTS point forecasters
    :param partitioners: Universe of Discourse partitioner
    :param partitions: the max number of partitions on the Universe of Discourse 
    :param max_order: the max order of the models (for high order models)
    :param transformation: data transformation
    :param indexer: seasonal indexer
    :param dump: 
    :param save: save results
    :param file: file path to save the results
    :param synthetic: if true only the average and standard deviation of the results
    :param nodes: list of cluster nodes to distribute tasks
    :param depends: list of module dependencies 
    :return: DataFrame with the results 
    """

    alphas = [0.05, 0.25]

    if benchmark_models is None and models is None:
        benchmark_models = [
            arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA, arima.ARIMA
        ]

    if benchmark_models_parameters is None:
        benchmark_models_parameters = [(1, 0, 0), (1, 0, 1), (2, 0, 0),
                                       (2, 0, 1), (2, 0, 2)]

    cluster = dispy.JobCluster(benchmarks.run_ahead,
                               nodes=nodes)  # , depends=dependencies)

    http_server = dispy.httpd.DispyHTTPServer(cluster)

    _process_start = time.time()

    print("Process Start: {0: %H:%M:%S}".format(datetime.datetime.now()))

    pool = []
    jobs = []
    objs = {}
    crps_interval = {}
    crps_distr = {}
    times1 = {}
    times2 = {}

    if models is None:
        models = benchmarks.get_probabilistic_methods()

    for model in models:
        mfts = model("")

        if mfts.is_high_order:
            for order in np.arange(1, max_order + 1):
                if order >= mfts.min_order:
                    mfts = model("")
                    mfts.order = order
                    pool.append(mfts)
        else:
            pool.append(mfts)

    if benchmark_models is not None:
        for count, model in enumerate(benchmark_models, start=0):
            for a in alphas:
                par = benchmark_models_parameters[count]
                mfts = model(str(par if par is not None else ""),
                             alpha=a,
                             dist=True)
                mfts.order = par
                pool.append(mfts)

    experiments = 0
    for ct, train, test in Util.sliding_window(data,
                                               windowsize,
                                               train,
                                               inc=inc):
        experiments += 1

        benchmarks_only = {}

        if dump: print('\nWindow: {0}\n'.format(ct))

        for partition in partitions:

            for partitioner in partitioners:

                data_train_fs = partitioner(train,
                                            partition,
                                            transformation=transformation)

                for id, m in enumerate(pool, start=0):
                    if m.benchmark_only and m.shortname in benchmarks_only:
                        continue
                    else:
                        benchmarks_only[m.shortname] = m
                    job = cluster.submit(m, data_train_fs, train, test, steps,
                                         resolution, ct, transformation,
                                         indexer)
                    job.id = id  # associate an ID to identify jobs (if needed later)
                    jobs.append(job)

    for job in jobs:
        tmp = job()
        if job.status == dispy.DispyJob.Finished and tmp is not None:
            if tmp['key'] not in objs:
                objs[tmp['key']] = tmp['obj']
                crps_interval[tmp['key']] = []
                crps_distr[tmp['key']] = []
                times1[tmp['key']] = []
                times2[tmp['key']] = []
            crps_interval[tmp['key']].append_rhs(tmp['CRPS_Interval'])
            crps_distr[tmp['key']].append_rhs(tmp['CRPS_Distribution'])
            times1[tmp['key']].append_rhs(tmp['TIME_Interval'])
            times2[tmp['key']].append_rhs(tmp['TIME_Distribution'])

        else:
            print(job.exception)
            print(job.stdout)

    _process_end = time.time()

    print("Process End: {0: %H:%M:%S}".format(datetime.datetime.now()))

    print("Process Duration: {0}".format(_process_end - _process_start))

    cluster.wait()  # wait for all jobs to finish

    cluster.print_status()

    http_server.shutdown()  # this waits until browser gets all updates
    cluster.close()

    return bUtil.save_dataframe_ahead(experiments, file, objs, crps_interval,
                                      crps_distr, times1, times2, save,
                                      synthetic)
Exemplo n.º 28
0
def simpleSearch_RMSE(train,
                      test,
                      model,
                      partitions,
                      orders,
                      save=False,
                      file=None,
                      tam=[10, 15],
                      plotforecasts=False,
                      elev=30,
                      azim=144,
                      intervals=False,
                      parameters=None,
                      partitioner=Grid.GridPartitioner,
                      transformation=None,
                      indexer=None):
    _3d = len(orders) > 1
    ret = []
    if _3d:
        errors = np.array([[0 for k in range(len(partitions))]
                           for kk in range(len(orders))])
    else:
        errors = []
    forecasted_best = []
    fig = plt.figure(figsize=tam)
    # fig.suptitle("Comparação de modelos ")
    if plotforecasts:
        ax0 = fig.add_axes([0, 0.4, 0.9, 0.5])  # left, bottom, width, height
        ax0.set_xlim([0, len(train)])
        ax0.set_ylim([min(train) * 0.9, max(train) * 1.1])
        ax0.set_title('Forecasts')
        ax0.set_ylabel('F(T)')
        ax0.set_xlabel('T')
    min_rmse = 1000000.0
    best = None

    for pc, p in enumerate(partitions, start=0):

        sets = partitioner(data=train, npart=p,
                           transformation=transformation).sets
        for oc, o in enumerate(orders, start=0):
            fts = model("q = " + str(p) + " n = " + str(o))
            fts.append_transformation(transformation)
            fts.train(train, sets=sets, order=o, parameters=parameters)
            if not intervals:
                forecasted = fts.forecast(test)
                if not fts.has_seasonality:
                    error = Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted[:-1]))
                else:
                    error = Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted))
                for kk in range(o):
                    forecasted.insert(0, None)
                if plotforecasts: ax0.plot(forecasted, label=fts.name)
            else:
                forecasted = fts.forecast_interval(test)
                error = 1.0 - Measures.rmse_interval(np.array(test[o:]),
                                                     np.array(forecasted[:-1]))
            if _3d:
                errors[oc, pc] = error
            else:
                errors.append(error)
            if error < min_rmse:
                min_rmse = error
                best = fts
                forecasted_best = forecasted

    # print(min_rmse)
    if plotforecasts:
        # handles0, labels0 = ax0.get_legend_handles_labels()
        # ax0.legend(handles0, labels0)
        ax0.plot(test, label="Original", linewidth=3.0, color="black")
        if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
    if _3d and not plotforecasts:
        ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
        ax1.set_title('Error Surface')
        ax1.set_ylabel('Model order')
        ax1.set_xlabel('Number of partitions')
        ax1.set_zlabel('RMSE')
        X, Y = np.meshgrid(partitions, orders)
        surf = ax1.plot_surface(X,
                                Y,
                                errors,
                                rstride=1,
                                cstride=1,
                                antialiased=True)
    else:
        ax1 = fig.add_axes([0, 1, 0.9, 0.9])
        ax1.set_title('Error Curve')
        ax1.set_xlabel('Number of partitions')
        ax1.set_ylabel('RMSE')
        ax1.plot(partitions, errors)
    ret.append(best)
    ret.append(forecasted_best)
    ret.append(min_rmse)

    # plt.tight_layout()

    cUtil.show_and_save_image(fig, file, save)

    return ret
Exemplo n.º 29
0
    def fit(self, ndata, **kwargs):
        """
        Fit the model's parameters based on the training data.

        :param ndata: training time series data
        :param kwargs:

        :keyword num_batches: split the training data in num_batches to save memory during the training process
        :keyword save_model: save final model on disk
        :keyword batch_save: save the model between each batch
        :keyword file_path: path to save the model
        :keyword distributed: boolean, indicate if the training procedure will be distributed in a dispy cluster
        :keyword nodes: a list with the dispy cluster nodes addresses

        """

        import datetime

        if self.is_multivariate:
            data = ndata
        else:
            data = self.apply_transformations(ndata)

            self.original_min = np.nanmin(data)
            self.original_max = np.nanmax(data)

        if 'partitioner' in kwargs:
            self.partitioner = kwargs.pop('partitioner')

        if not self.is_multivariate and not self.is_wrapper and not self.benchmark_only:
            if self.partitioner is None:
                raise Exception(
                    "Fuzzy sets were not provided for the model. Use 'partitioner' parameter. "
                )

        if 'order' in kwargs:
            self.order = kwargs.pop('order')

        dump = kwargs.get('dump', None)

        num_batches = kwargs.get('num_batches', 10)

        save = kwargs.get('save_model', False)  # save model on disk

        batch_save = kwargs.get('batch_save',
                                False)  #save model between batches

        file_path = kwargs.get('file_path', None)

        distributed = kwargs.get('distributed', False)

        batch_save_interval = kwargs.get('batch_save_interval', 10)

        if distributed is not None and distributed:

            if distributed == 'dispy':
                from pyFTS.distributed import dispy
                nodes = kwargs.get('nodes', False)
                train_method = kwargs.get('train_method',
                                          dispy.simple_model_train)
                dispy.distributed_train(
                    self,
                    train_method,
                    nodes,
                    type(self),
                    data,
                    num_batches, {},
                    batch_save=batch_save,
                    file_path=file_path,
                    batch_save_interval=batch_save_interval)
            elif distributed == 'spark':
                from pyFTS.distributed import spark
                url = kwargs.get('url', 'spark://192.168.0.110:7077')
                app = kwargs.get('app', 'pyFTS')

                spark.distributed_train(self, data, url=url, app=app)
        else:

            if dump == 'time':
                print("[{0: %H:%M:%S}] Start training".format(
                    datetime.datetime.now()))

            if num_batches is not None and not self.is_wrapper:
                n = len(data)
                batch_size = int(n / num_batches)
                bcount = 1

                rng = range(self.order, n, batch_size)

                if dump == 'tqdm':
                    from tqdm import tqdm

                    rng = tqdm(rng)

                for ct in rng:
                    if dump == 'time':
                        print("[{0: %H:%M:%S}] Starting batch ".format(
                            datetime.datetime.now()) + str(bcount))
                    if self.is_multivariate:
                        mdata = data.iloc[ct - self.order:ct + batch_size]
                    else:
                        mdata = data[ct - self.order:ct + batch_size]

                    self.train(mdata, **kwargs)

                    if batch_save:
                        Util.persist_obj(self, file_path)

                    if dump == 'time':
                        print("[{0: %H:%M:%S}] Finish batch ".format(
                            datetime.datetime.now()) + str(bcount))

                    bcount += 1

            else:
                self.train(data, **kwargs)

            if dump == 'time':
                print("[{0: %H:%M:%S}] Finish training".format(
                    datetime.datetime.now()))

        if save:
            Util.persist_obj(self, file_path)
Exemplo n.º 30
0
def evaluate(dataset, individual, **kwargs):
    """
    Evaluate an individual using a sliding window cross validation over the dataset.

    :param dataset: Evaluation dataset
    :param individual: genotype to be tested
    :param window_size: The length of scrolling window for train/test on dataset
    :param train_rate: The train/test split ([0,1])
    :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1])
    :param parameters: dict with model specific arguments for fit method.
    :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value
    """
    import logging
    from pyFTS.models import hofts, ifts, pwfts
    from pyFTS.common import Util
    from pyFTS.benchmarks import Measures
    from pyFTS.hyperparam.Evolutionary import __measures
    from pyFTS.hyperparam.mvfts import phenotype
    from pyFTS.models.multivariate import mvfts, wmvfts, partitioner, variable, cmvfts, grid, granular, common
    import numpy as np

    window_size = kwargs.get('window_size', 800)
    train_rate = kwargs.get('train_rate', .8)
    increment_rate = kwargs.get('increment_rate', .2)
    fts_method = kwargs.get('fts_method', wmvfts.WeightedMVFTS)
    parameters = kwargs.get('parameters', {})
    tvar = kwargs.get('target_variable', None)

    if individual['f1'] is not None and individual['f2'] is not None:
        return {key: individual[key] for key in __measures}

    errors = []
    lengths = []

    kwargs2 = kwargs.copy()
    kwargs2.pop('fts_method')
    if 'parameters' in kwargs2:
        kwargs2.pop('parameters')

    for count, train, test in Util.sliding_window(dataset,
                                                  window_size,
                                                  train=train_rate,
                                                  inc=increment_rate):

        try:

            model = phenotype(individual,
                              train,
                              fts_method=fts_method,
                              parameters=parameters,
                              **kwargs2)

            forecasts = model.predict(test)

            rmse = Measures.rmse(
                test[tvar['data_label']].values[model.max_lag:],
                forecasts[:-1])
            lengths.append(len(model))

            errors.append(rmse)

        except Exception as ex:
            logging.exception("Error")

            lengths.append(np.nan)
            errors.append(np.nan)

    try:
        _rmse = np.nanmean(errors)
        _len = np.nanmean(lengths)

        f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)])
        f2 = np.nansum([.9 * _len, .1 * np.nanstd(lengths)])

        return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len}
    except Exception as ex:
        logging.exception("Error")
        return {'f1': np.inf, 'f2': np.inf, 'rmse': np.inf, 'size': np.inf}