def compareModelsTable(original, models_fo, models_ho): fig = plt.figure(figsize=[12, 4]) fig.suptitle("Comparação de modelos ") columns = ['Modelo', 'Ordem', 'Partições', 'RMSE', 'MAPE (%)'] rows = [] for model in models_fo: fts = model["model"] error_r = Measures.rmse(model["forecasted"], original) error_m = round(Measures.mape(model["forecasted"], original) * 100, 2) rows.append( [model["name"], fts.order, len(fts.sets), error_r, error_m]) for model in models_ho: fts = model["model"] error_r = Measures.rmse(model["forecasted"][fts.order:], original[fts.order:]) error_m = round( Measures.mape(model["forecasted"][fts.order:], original[fts.order:]) * 100, 2) rows.append( [model["name"], fts.order, len(fts.sets), error_r, error_m]) ax1 = fig.add_axes([0, 0, 1, 1]) # left, bottom, width, height ax1.set_xticks([]) ax1.set_yticks([]) ax1.table(cellText=rows, colLabels=columns, cellLoc='center', bbox=[0, 0, 1, 1]) sup = "\\begin{tabular}{" header = "" body = "" footer = "" for c in columns: sup = sup + "|c" if len(header) > 0: header = header + " & " header = header + "\\textbf{" + c + "} " sup = sup + "|} \\hline\n" header = header + "\\\\ \\hline \n" for r in rows: lin = "" for c in r: if len(lin) > 0: lin = lin + " & " lin = lin + str(c) body = body + lin + "\\\\ \\hline \n" return sup + header + body + "\\end{tabular}"
def print_point_statistics(data, models, externalmodels=None, externalforecasts=None, indexers=None): ret = "Model & Order & RMSE & SMAPE & Theil's U \\\\ \n" for count, model in enumerate(models, start=0): _rmse, _smape, _u = Measures.get_point_statistics( data, model, indexers) ret += model.shortname + " & " ret += str(model.order) + " & " ret += str(_rmse) + " & " ret += str(_smape) + " & " ret += str(_u) #ret += str(round(Measures.TheilsInequality(np.array(data[fts.order:]), np.array(forecasts[:-1])), 4)) ret += " \\\\ \n" if externalmodels is not None: l = len(externalmodels) for k in np.arange(0, l): ret += externalmodels[k] + " & " ret += " 1 & " ret += str(round(Measures.rmse(data, externalforecasts[k][:-1]), 2)) + " & " ret += str( round(Measures.smape(data, externalforecasts[k][:-1]), 2)) + " & " ret += str( round(Measures.UStatistic(data, externalforecasts[k][:-1]), 2)) ret += " \\\\ \n" print(ret)
def forecast_params(data, train_split, method, params, plot=False): train, test = sampling.train_test_split(data, train_split) fcst = method(train, test, params) _output = params['output'] _step = params.get('step', 1) _offset = params['order'] + _step - 1 yobs = test[_output].iloc[_offset:].values if plot: plt.figure(figsize=(20, 10)) plt.plot(yobs) plt.plot(fcst) plt.show() rmse = Measures.rmse(yobs, fcst) print("RMSE: ", rmse) nrmse = metrics.normalized_rmse(yobs, fcst) print("nRMSE: ", nrmse) smape = Measures.smape(yobs, fcst) print("SMAPE: ", smape) u = Measures.UStatistic(yobs, fcst) print("U Statistic: ", u) return rmse, nrmse, smape, u
def print_point_statistics(data, models, externalmodels = None, externalforecasts = None, indexers=None): """ Run point benchmarks on given models and data and print the results :param data: test data :param models: a list of FTS models to benchmark :param externalmodels: a list with benchmark models (façades for other methods) :param externalforecasts: :param indexers: :return: """ ret = "Model & Order & RMSE & SMAPE & Theil's U \\\\ \n" for count,model in enumerate(models,start=0): _rmse, _smape, _u = Measures.get_point_statistics(data, model, indexers) ret += model.shortname + " & " ret += str(model.order) + " & " ret += str(_rmse) + " & " ret += str(_smape)+ " & " ret += str(_u) #ret += str(round(Measures.TheilsInequality(np.array(data[fts.order:]), np.array(forecasts[:-1])), 4)) ret += " \\\\ \n" if externalmodels is not None: l = len(externalmodels) for k in np.arange(0,l): ret += externalmodels[k] + " & " ret += " 1 & " ret += str(round(Measures.rmse(data, externalforecasts[k][:-1]), 2)) + " & " ret += str(round(Measures.smape(data, externalforecasts[k][:-1]), 2))+ " & " ret += str(round(Measures.UStatistic(data, externalforecasts[k][:-1]), 2)) ret += " \\\\ \n" print(ret)
def cluster_method(individual, dataset, **kwargs): from pyFTS.common import Util, Membership from pyFTS.models import hofts from pyFTS.partitioners import Grid, Entropy from pyFTS.benchmarks import Measures import numpy as np if individual['mf'] == 1: mf = Membership.trimf elif individual['mf'] == 2: mf = Membership.trapmf elif individual['mf'] == 3 and individual['partitioner'] != 2: mf = Membership.gaussmf else: mf = Membership.trimf window_size = kwargs.get('window_size', 800) train_rate = kwargs.get('train_rate', .8) increment_rate = kwargs.get('increment_rate', .2) parameters = kwargs.get('parameters', {}) errors = [] sizes = [] for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate): if individual['partitioner'] == 1: partitioner = Grid.GridPartitioner(data=train, npart=individual['npart'], func=mf) elif individual['partitioner'] == 2: npart = individual['npart'] if individual['npart'] > 10 else 10 partitioner = Entropy.EntropyPartitioner(data=train, npart=npart, func=mf) model = hofts.WeightedHighOrderFTS(partitioner=partitioner, lags=individual['lags'], alpha_cut=individual['alpha'], order=individual['order']) model.fit(train) forecasts = model.predict(test) #rmse, mape, u = Measures.get_point_statistics(test, model) rmse = Measures.rmse(test[model.max_lag:], forecasts) size = len(model) errors.append(rmse) sizes.append(size) return { 'parameters': individual, 'rmse': np.nanmean(errors), 'size': np.nanmean(size) }
def evaluate(dataset, individual, **kwargs): """ Evaluate an individual using a sliding window cross validation over the dataset. :param dataset: Evaluation dataset :param individual: genotype to be tested :param window_size: The length of scrolling window for train/test on dataset :param train_rate: The train/test split ([0,1]) :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1]) :param parameters: dict with model specific arguments for fit method. :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value """ from pyFTS.models import hofts, ifts, pwfts from pyFTS.common import Util from pyFTS.benchmarks import Measures from pyFTS.hyperparam.Evolutionary import phenotype, __measures import numpy as np window_size = kwargs.get('window_size', 800) train_rate = kwargs.get('train_rate', .8) increment_rate = kwargs.get('increment_rate', .2) fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS) parameters = kwargs.get('parameters', {}) if individual['f1'] is not None and individual['f2'] is not None: return {key: individual[key] for key in __measures} errors = [] lengths = [] for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate): model = phenotype(individual, train, fts_method=fts_method, parameters=parameters) forecasts = model.predict(test) rmse = Measures.rmse(test[model.max_lag:], forecasts[:-1]) lengths.append(len(model)) errors.append(rmse) _lags = sum(model.lags) * 100 _rmse = np.nanmean(errors) _len = np.nanmean(lengths) f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)]) f2 = np.nansum([.4 * _len, .6 * _lags]) return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len}
def evaluate(dataset, individual, **kwargs): """ Evaluate an individual using a sliding window cross validation over the dataset. :param dataset: Evaluation dataset :param individual: genotype to be tested :param window_size: The length of scrolling window for train/test on dataset :param train_rate: The train/test split ([0,1]) :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1]) :param parameters: dict with model specific arguments for fit method. :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value """ from pyFTS.common import Util from pyFTS.benchmarks import Measures from pyFTS.fcm.GA import phenotype import numpy as np window_size = kwargs.get('window_size', 800) train_rate = kwargs.get('train_rate', .8) increment_rate = kwargs.get('increment_rate', .2) #parameters = kwargs.get('parameters',{}) errors = [] for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate): model = phenotype(individual, train) if model is None: raise Exception("Phenotype returned None") model.uod_clip = False forecasts = model.predict(test) rmse = Measures.rmse( test[model.max_lag:], forecasts[:-1]) #.get_point_statistics(test, model) errors.append(rmse) _rmse = np.nanmean(errors) _std = np.nanstd(errors) #print("EVALUATION {}".format(individual)) return {'rmse': .6 * _rmse + .4 * _std}
def forecast_best_params(data, train_split, method_id, method, space, plot=False, save=False): print("Running experiment ", method_id) best = pickle.load(open("best_" + method_id + ".pkl", "rb")) train, test = sampling.train_test_split(data, train_split) best_params = space_eval(space, best) fcst = method(train, test, best_params) _order = best_params['order'] _output = best_params['output'] yobs = test[_output].iloc[_order:].values if plot: plt.figure(figsize=(20, 10)) plt.plot(yobs) plt.plot(fcst) plt.show() rmse = Measures.rmse(yobs, fcst) print("RMSE: ", rmse) nrmse = metrics.normalized_rmse(yobs, fcst) print("nRMSE: ", nrmse) smape = Measures.smape(yobs, fcst) print("SMAPE: ", smape) u = Measures.UStatistic(yobs, fcst) print("U Statistic: ", u) if save: results = { "method_id": method_id, "forecast": fcst, "RMSE": rmse, "SMAPE": smape, "U": u } pickle.dump(results, open("results_" + method_id + ".pkl", "wb")) return rmse, nrmse, smape, u
def evaluate_individual_model(model, partitioner, train, test, window_size, time_displacement): import numpy as np from pyFTS.partitioners import Grid from pyFTS.benchmarks import Measures try: model.train(train, sets=partitioner.sets, order=model.order, parameters=window_size) forecasts = model.forecast(test, time_displacement=time_displacement, window_size=window_size) _rmse = Measures.rmse(test[model.order:], forecasts[:-1]) _mape = Measures.mape(test[model.order:], forecasts[:-1]) _u = Measures.UStatistic(test[model.order:], forecasts[:-1]) except Exception as e: print(e) _rmse = np.nan _mape = np.nan _u = np.nan return {'model': model.shortname, 'partitions': partitioner.partitions, 'order': model.order, 'rmse': _rmse, 'mape': _mape, 'u': _u}
def rolling_window_forecast_params(data, train_percent, window_size, method, params): # get training days training_days = pd.unique(data.index.date) fcst = [] yobs = [] for day in training_days: print("Processing :", day) daily_data = data[data.index.date == day] nsamples = len(daily_data.index) train_size = round(nsamples * train_percent) test_end = 0 index = 0 while test_end < nsamples: train_start, train_end, test_start, test_end = get_data_index( index, train_size, window_size, nsamples) train = data[train_start:train_end] test = data[test_start:test_end] index += window_size f = method(train, test, params) fcst.extend(f) _step = params.get('step', 1) _output = params['output'] _offset = params['order'] + _step - 1 yobs.extend(test[_output].iloc[_offset:].values) rmse = Measures.rmse(yobs, fcst) print("RMSE: ", rmse) nrmse = metrics.normalized_rmse(yobs, fcst) print("nRMSE: ", nrmse) smape = Measures.smape(yobs, fcst) print("SMAPE: ", smape) u = Measures.UStatistic(yobs, fcst) print("U Statistic: ", u) return rmse, nrmse, smape, u
def rolling_window_benchmark(data, train=0.8, **kwargs): resample = __pop('resample', None, kwargs) output = __pop('output', None, kwargs) if resample: data = sampling.resample_data(data, resample) train_data, test_data = sampling.train_test_split(data, train) methods = __pop('methods', None, kwargs) orders = __pop("orders", [1, 2, 3], kwargs) steps_ahead = __pop('steps_ahead', [1], kwargs) for method in methods: for order in orders: for step in steps_ahead: m = method() if isinstance(m, fts.FTS): partitioners = __pop("partitioners", [Grid.GridPartitioner], kwargs) partitions = __pop("partitions", [10], kwargs) for partitioner in partitioners: for partition in partitions: data_train_fs = partitioner(data=train_data, npart=partition) m.partitioner = data_train_fs # medir tempo de treinamento m.fit(train_data, **kwargs) # medir tempo de forecast yhat = m.predict() #_start = time.time() # implementar metricas de avaliacao _rmse = Measures.rmse(test_data[output].iloc[order:], yhat[:-step]) print("RMSE: ", _rmse)
def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=None, tam=[10, 15], plotforecasts=False, elev=30, azim=144, intervals=False, parameters=None, partitioner=Grid.GridPartitioner, transformation=None, indexer=None): _3d = len(orders) > 1 ret = [] if _3d: errors = np.array([[0 for k in range(len(partitions))] for kk in range(len(orders))]) else: errors = [] forecasted_best = [] fig = plt.figure(figsize=tam) # fig.suptitle("Comparação de modelos ") if plotforecasts: ax0 = fig.add_axes([0, 0.4, 0.9, 0.5]) # left, bottom, width, height ax0.set_xlim([0, len(train)]) ax0.set_ylim([min(train) * 0.9, max(train) * 1.1]) ax0.set_title('Forecasts') ax0.set_ylabel('F(T)') ax0.set_xlabel('T') min_rmse = 1000000.0 best = None for pc, p in enumerate(partitions, start=0): sets = partitioner(data=train, npart=p, transformation=transformation).sets for oc, o in enumerate(orders, start=0): fts = model("q = " + str(p) + " n = " + str(o)) fts.append_transformation(transformation) fts.train(train, sets=sets, order=o, parameters=parameters) if not intervals: forecasted = fts.forecast(test) if not fts.has_seasonality: error = Measures.rmse(np.array(test[o:]), np.array(forecasted[:-1])) else: error = Measures.rmse(np.array(test[o:]), np.array(forecasted)) for kk in range(o): forecasted.insert(0, None) if plotforecasts: ax0.plot(forecasted, label=fts.name) else: forecasted = fts.forecast_interval(test) error = 1.0 - Measures.rmse_interval(np.array(test[o:]), np.array(forecasted[:-1])) if _3d: errors[oc, pc] = error else: errors.append(error) if error < min_rmse: min_rmse = error best = fts forecasted_best = forecasted # print(min_rmse) if plotforecasts: # handles0, labels0 = ax0.get_legend_handles_labels() # ax0.legend(handles0, labels0) ax0.plot(test, label="Original", linewidth=3.0, color="black") if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) if _3d and not plotforecasts: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) ax1.set_title('Error Surface') ax1.set_ylabel('Model order') ax1.set_xlabel('Number of partitions') ax1.set_zlabel('RMSE') X, Y = np.meshgrid(partitions, orders) surf = ax1.plot_surface(X, Y, errors, rstride=1, cstride=1, antialiased=True) else: ax1 = fig.add_axes([0, 1, 0.9, 0.9]) ax1.set_title('Error Curve') ax1.set_xlabel('Number of partitions') ax1.set_ylabel('RMSE') ax1.plot(partitions, errors) ret.append(best) ret.append(forecasted_best) ret.append(min_rmse) # plt.tight_layout() cUtil.show_and_save_image(fig, file, save) return ret
def SelecaoSimples_MenorRMSE(original, parameters, modelo): ret = [] errors = [] forecasted_best = [] print("Série Original") fig = plt.figure(figsize=[20, 12]) fig.suptitle("Comparação de modelos ") ax0 = fig.add_axes([0, 0.5, 0.65, 0.45]) # left, bottom, width, height ax0.set_xlim([0, len(original)]) ax0.set_ylim([min(original), max(original)]) ax0.set_title('Série Temporal') ax0.set_ylabel('F(T)') ax0.set_xlabel('T') ax0.plot(original, label="Original") min_rmse = 100000.0 best = None for p in parameters: sets = Grid.GridPartitioner(data=original, npart=p).sets fts = modelo(str(p) + " particoes") fts.train(original, sets=sets) # print(original) forecasted = fts.forecast(original) forecasted.insert(0, original[0]) # print(forecasted) ax0.plot(forecasted, label=fts.name) error = Measures.rmse(np.array(forecasted), np.array(original)) print(p, error) errors.append(error) if error < min_rmse: min_rmse = error best = fts forecasted_best = forecasted handles0, labels0 = ax0.get_legend_handles_labels() ax0.legend(handles0, labels0) ax1 = fig.add_axes([0.7, 0.5, 0.3, 0.45]) # left, bottom, width, height ax1.set_title('Comparação dos Erros Quadráticos Médios') ax1.set_ylabel('RMSE') ax1.set_xlabel('Quantidade de Partições') ax1.set_xlim([min(parameters), max(parameters)]) ax1.plot(parameters, errors) ret.append(best) ret.append(forecasted_best) # Modelo diferencial print("\nSérie Diferencial") difffts = Transformations.differential(original) errors = [] forecastedd_best = [] ax2 = fig.add_axes([0, 0, 0.65, 0.45]) # left, bottom, width, height ax2.set_xlim([0, len(difffts)]) ax2.set_ylim([min(difffts), max(difffts)]) ax2.set_title('Série Temporal') ax2.set_ylabel('F(T)') ax2.set_xlabel('T') ax2.plot(difffts, label="Original") min_rmse = 100000.0 bestd = None for p in parameters: sets = Grid.GridPartitioner(data=difffts, npart=p) fts = modelo(str(p) + " particoes") fts.train(difffts, sets=sets) forecasted = fts.forecast(difffts) forecasted.insert(0, difffts[0]) ax2.plot(forecasted, label=fts.name) error = Measures.rmse(np.array(forecasted), np.array(difffts)) print(p, error) errors.append(error) if error < min_rmse: min_rmse = error bestd = fts forecastedd_best = forecasted handles0, labels0 = ax2.get_legend_handles_labels() ax2.legend(handles0, labels0) ax3 = fig.add_axes([0.7, 0, 0.3, 0.45]) # left, bottom, width, height ax3.set_title('Comparação dos Erros Quadráticos Médios') ax3.set_ylabel('RMSE') ax3.set_xlabel('Quantidade de Partições') ax3.set_xlim([min(parameters), max(parameters)]) ax3.plot(parameters, errors) ret.append(bestd) ret.append(forecastedd_best) return ret
def normalized_rmse(targets, forecasts): if isinstance(targets, list): targets = np.array(targets) return Measures.rmse(targets, forecasts) / np.nanmean(targets)
df = loader.series_to_supervised(signals[key], n_in=_order, n_out=1) data_input = df.iloc[:, :_order].values data_output = df.iloc[:, -1].values l = len(df.index) limit = l // 2 train = data_input[:limit] test = data_input[limit:] ax[row].plot(data_output[limit + _order:], label="Original") ax[row].set_title(key) persistence_forecast = data_output[limit + _order - 1:-1] ax[row].plot(persistence_forecast, label="Persistence") _rmse = Measures.rmse(data_output[limit + _order:], persistence_forecast) data = [key, "Persistence", _rmse] evolving_model = evolvingclusterfts.EvolvingClusterFTS( defuzzy='weighted', membership_threshold=0.6, variance_limit=0.001) evolving_model.fit(train, order=_order) y_hat_df = pd.DataFrame(evolving_model.predict(test)) forecasts = y_hat_df.iloc[:, -1].values ax[row].plot(forecasts, label="EvolvingFTS") _rmse = Measures.rmse(data_output[limit + _order:], forecasts[:-1]) data = [key, "EvolvingFTS", _rmse] rows.append(data) fbem_model = FBeM.FBeM() fbem_model.n = _order fbem_model.fit(train, order=_order)
def sliding_window_simple_search(data, windowsize, model, partitions, orders, **kwargs): _3d = len(orders) > 1 ret = [] errors = np.array([[0 for k in range(len(partitions))] for kk in range(len(orders))]) forecasted_best = [] figsize = kwargs.get('figsize', [10, 15]) fig = plt.figure(figsize=figsize) plotforecasts = kwargs.get('plotforecasts', False) if plotforecasts: ax0 = fig.add_axes([0, 0.4, 0.9, 0.5]) # left, bottom, width, height ax0.set_xlim([0, len(data)]) ax0.set_ylim([min(data) * 0.9, max(data) * 1.1]) ax0.set_title('Forecasts') ax0.set_ylabel('F(T)') ax0.set_xlabel('T') min_rmse = 1000000.0 best = None intervals = kwargs.get('intervals', False) threshold = kwargs.get('threshold', 0.5) progressbar = kwargs.get('progressbar', None) rng1 = enumerate(partitions, start=0) if progressbar: from tqdm import tqdm rng1 = enumerate(tqdm(partitions), start=0) for pc, p in rng1: fs = Grid.GridPartitioner(data=data, npart=p) rng2 = enumerate(orders, start=0) if progressbar: rng2 = enumerate(tqdm(orders), start=0) for oc, o in rng2: _error = [] for ct, train, test in Util.sliding_window(data, windowsize, 0.8, **kwargs): fts = model("q = " + str(p) + " n = " + str(o), partitioner=fs) fts.fit(train, order=o) if not intervals: forecasted = fts.forecast(test) if not fts.has_seasonality: _error.append( Measures.rmse(np.array(test[o:]), np.array(forecasted[:-1]))) else: _error.append( Measures.rmse(np.array(test[o:]), np.array(forecasted))) for kk in range(o): forecasted.insert(0, None) if plotforecasts: ax0.plot(forecasted, label=fts.name) else: forecasted = fts.forecast_interval(test) _error.append(1.0 - Measures.rmse_interval( np.array(test[o:]), np.array(forecasted[:-1]))) error = np.nanmean(_error) errors[oc, pc] = error if (min_rmse - error) > threshold: min_rmse = error best = fts forecasted_best = forecasted # print(min_rmse) if plotforecasts: # handles0, labels0 = ax0.get_legend_handles_labels() # ax0.legend(handles0, labels0) elev = kwargs.get('elev', 30) azim = kwargs.get('azim', 144) ax0.plot(test, label="Original", linewidth=3.0, color="black") if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) if not plotforecasts: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) # ax1 = fig.add_axes([0.6, 0.5, 0.45, 0.45], projection='3d') if _3d: ax1.set_title('Error Surface') ax1.set_ylabel('Model order') ax1.set_xlabel('Number of partitions') ax1.set_zlabel('RMSE') X, Y = np.meshgrid(partitions, orders) surf = ax1.plot_surface(X, Y, errors, rstride=1, cstride=1, antialiased=True) else: ax1 = fig.add_axes([0, 1, 0.9, 0.9]) ax1.set_title('Error Curve') ax1.set_ylabel('Number of partitions') ax1.set_xlabel('RMSE') ax0.plot(errors, partitions) ret.append(best) ret.append(forecasted_best) # plt.tight_layout() file = kwargs.get('file', None) save = kwargs.get('save', False) Util.show_and_save_image(fig, file, save) return ret
def evaluate(dataset, individual, **kwargs): """ Evaluate an individual using a sliding window cross validation over the dataset. :param dataset: Evaluation dataset :param individual: genotype to be tested :param window_size: The length of scrolling window for train/test on dataset :param train_rate: The train/test split ([0,1]) :param increment_rate: The increment of the scrolling window, relative to the window_size ([0,1]) :param parameters: dict with model specific arguments for fit method. :return: a tuple (len_lags, rmse) with the parsimony fitness value and the accuracy fitness value """ import logging from pyFTS.models import hofts, ifts, pwfts from pyFTS.common import Util from pyFTS.benchmarks import Measures from pyFTS.hyperparam.Evolutionary import __measures from pyFTS.hyperparam.mvfts import phenotype from pyFTS.models.multivariate import mvfts, wmvfts, partitioner, variable, cmvfts, grid, granular, common import numpy as np window_size = kwargs.get('window_size', 800) train_rate = kwargs.get('train_rate', .8) increment_rate = kwargs.get('increment_rate', .2) fts_method = kwargs.get('fts_method', wmvfts.WeightedMVFTS) parameters = kwargs.get('parameters', {}) tvar = kwargs.get('target_variable', None) if individual['f1'] is not None and individual['f2'] is not None: return {key: individual[key] for key in __measures} errors = [] lengths = [] kwargs2 = kwargs.copy() kwargs2.pop('fts_method') if 'parameters' in kwargs2: kwargs2.pop('parameters') for count, train, test in Util.sliding_window(dataset, window_size, train=train_rate, inc=increment_rate): try: model = phenotype(individual, train, fts_method=fts_method, parameters=parameters, **kwargs2) forecasts = model.predict(test) rmse = Measures.rmse( test[tvar['data_label']].values[model.max_lag:], forecasts[:-1]) lengths.append(len(model)) errors.append(rmse) except Exception as ex: logging.exception("Error") lengths.append(np.nan) errors.append(np.nan) try: _rmse = np.nanmean(errors) _len = np.nanmean(lengths) f1 = np.nansum([.6 * _rmse, .4 * np.nanstd(errors)]) f2 = np.nansum([.9 * _len, .1 * np.nanstd(lengths)]) return {'f1': f1, 'f2': f2, 'rmse': _rmse, 'size': _len} except Exception as ex: logging.exception("Error") return {'f1': np.inf, 'f2': np.inf, 'rmse': np.inf, 'size': np.inf}
order = 3 nparts = 20 fuzzysets = [] fuzzysets.append(Grid.GridPartitioner(fln_train.glo_avg,nparts)) fuzzysets.append(Grid.GridPartitioner(joi_train.glo_avg,nparts)) fuzzysets.append(Grid.GridPartitioner(sbr_train.glo_avg,nparts)) d = {'fln_glo_avg':fln_train.glo_avg,'sbr_glo_avg':sbr_train.glo_avg,'joi_glo_avg':joi_train.glo_avg} data_train = pd.DataFrame(d) data_train = data_train.dropna(axis=0, how='any') model_file = "models/fts/multivariate/mvhofts-"+str(order)+"-"+str(nparts)+".pkl" mvhofts = mvhofts.MultivariateHighOrderFTS("") mvhofts.train(data_train,fuzzysets,order) cUtil.persist_obj(mvhofts, model_file) obj = cUtil.load_obj(model_file) dt = {'fln_glo_avg':fln_test.glo_avg,'sbr_glo_avg':sbr_test.glo_avg,'joi_glo_avg':joi_test.glo_avg} data_test = pd.DataFrame(dt) data_test = data_test.dropna(axis=0, how='any') ret = obj.forecast(data_test) print("RMSE: " + str(Measures.rmse(list(data_test.fln_glo_avg[order:]), ret[:-1]))) #print(mvhofts)