def execute(dataset, **kwargs): file = kwargs.get('file', 'hyperparam.db') conn = hUtil.open_hyperparam_db(file) experiments = kwargs.get('experiments', 30) distributed = kwargs.get('distributed', False) if distributed == 'dispy': import dispy from pyFTS.distributed import dispy as dUtil nodes = kwargs.get('nodes', ['127.0.0.1']) cluster, http_server = dUtil.start_dispy_cluster(evaluate, nodes=nodes) kwargs['cluster'] = cluster ret = [] for i in np.arange(experiments): print("Experiment {}".format(i)) start = time.time() ret, statistics = GeneticAlgorithm(dataset, **kwargs) end = time.time() ret['time'] = end - start experiment = {'individual': ret, 'statistics': statistics} ret = process_experiment(experiment, '', conn) if distributed == 'dispy': dUtil.stop_dispy_cluster(cluster, http_server) return ret
def execute(datasetname, dataset, **kwargs): """ Batch execution of Distributed Evolutionary Hyperparameter Optimization (DEHO) for monovariate methods :param datasetname: :param dataset: The time series to optimize the FTS :keyword database_file: :keyword experiments: :keyword distributed: :keyword ngen: An integer value with the maximum number of generations, default value: 30 :keyword mgen: An integer value with the maximum number of generations without improvement to stop, default value 7 :keyword npop: An integer value with the population size, default value: 20 :keyword pcross: A float value between 0 and 1 with the probability of crossover, default: .5 :keyword psel: A float value between 0 and 1 with the probability of selection, default: .5 :keyword pmut: A float value between 0 and 1 with the probability of mutation, default: .3 :keyword fts_method: The FTS method to optimize :keyword parameters: dict with model specific arguments for fts_method :keyword elitism: A boolean value indicating if the best individual must always survive to next population :keyword initial_operator: a function that receives npop and return a random population with size npop :keyword random_individual: create an random genotype :keyword evalutation_operator: a function that receives a dataset and an individual and return its fitness :keyword selection_operator: a function that receives the whole population and return a selected individual :keyword crossover_operator: a function that receives the whole population and return a descendent individual :keyword mutation_operator: a function that receives one individual and return a changed individual :keyword window_size: An integer value with the the length of scrolling window for train/test on dataset :keyword train_rate: A float value between 0 and 1 with the train/test split ([0,1]) :keyword increment_rate: A float value between 0 and 1 with the the increment of the scrolling window, relative to the window_size ([0,1]) :keyword collect_statistics: A boolean value indicating to collect statistics for each generation :keyword distributed: A value indicating it the execution will be local and sequential (distributed=False), or parallel and distributed (distributed='dispy' or distributed='spark') :keyword cluster: If distributed='dispy' the list of cluster nodes, else if distributed='spark' it is the master node :return: the best genotype """ file = kwargs.get('database_file', 'hyperparam.db') conn = hUtil.open_hyperparam_db(file) experiments = kwargs.get('experiments', 30) distributed = kwargs.get('distributed', False) fts_method = kwargs.get('fts_method', hofts.WeightedHighOrderFTS) shortname = str(fts_method.__module__).split('.')[-1] if distributed == 'dispy': nodes = kwargs.get('nodes', ['127.0.0.1']) cluster, http_server = dUtil.start_dispy_cluster(evaluate, nodes=nodes) kwargs['cluster'] = cluster ret = [] for i in np.arange(experiments): print("Experiment {}".format(i)) start = time.time() ret, statistics = GeneticAlgorithm(dataset, **kwargs) end = time.time() ret['time'] = end - start experiment = {'individual': ret, 'statistics': statistics} ret = process_experiment(shortname, experiment, datasetname, conn) if distributed == 'dispy': dUtil.stop_dispy_cluster(cluster, http_server) return ret
def execute(hyperparams, datasetname, dataset, **kwargs): nodes = kwargs.get('nodes', ['127.0.0.1']) individuals = [] if 'lags' in hyperparams: lags = hyperparams.pop('lags') else: lags = [k for k in np.arange(50)] keys_sorted = [k for k in sorted(hyperparams.keys())] index = {} for k in np.arange(len(keys_sorted)): index[keys_sorted[k]] = k print("Evaluation order: \n {}".format(index)) hp_values = [[v for v in hyperparams[hp]] for hp in keys_sorted] print("Evaluation values: \n {}".format(hp_values)) cluster, http_server = dUtil.start_dispy_cluster(cluster_method, nodes=nodes) conn = hUtil.open_hyperparam_db('hyperparam.db') for instance in product(*hp_values): partitions = instance[index['partitions']] partitioner = instance[index['partitioner']] mf = instance[index['mf']] alpha_cut = instance[index['alpha']] order = instance[index['order']] count = 0 for lag1 in lags: # o é o lag1 _lags = [lag1] count += 1 if order > 1: for lag2 in lags: # o é o lag1 _lags2 = [lag1, lag1 + lag2] count += 1 if order > 2: for lag3 in lags: # o é o lag1 count += 1 _lags3 = [lag1, lag1 + lag2, lag1 + lag2 + lag3] individuals.append( dict_individual(mf, partitioner, partitions, order, _lags3, alpha_cut)) else: individuals.append( dict_individual(mf, partitioner, partitions, order, _lags2, alpha_cut)) else: individuals.append( dict_individual(mf, partitioner, partitions, order, _lags, alpha_cut)) if count > 10: jobs = [] for ind in individuals: print("Testing individual {}".format(ind)) job = cluster.submit(ind, dataset, **kwargs) jobs.append(job) process_jobs(jobs, datasetname, conn) count = 0 individuals = [] dUtil.stop_dispy_cluster(cluster, http_server)
def sliding_window_benchmarks(data, windowsize, train=0.8, **kwargs): """ Sliding window benchmarks for FTS forecasters. For each data window, a train and test datasets will be splitted. For each train split, number of partitions and partitioning method will be created a partitioner model. And for each partitioner, order, steps ahead and FTS method a foreasting model will be trained. Then all trained models are benchmarked on the test data and the metrics are stored on a sqlite3 database (identified by the 'file' parameter) for posterior analysis. All these process can be distributed on a dispy cluster, setting the atributed 'distributed' to true and informing the list of dispy nodes on 'nodes' parameter. The number of experiments is determined by 'windowsize' and 'inc' parameters. :param data: test data :param windowsize: size of sliding window :param train: percentual of sliding window data used to train the models :param kwargs: dict, optional arguments :keyword benchmark_methods: a list with Non FTS models to benchmark. The default is None. :keyword benchmark_methods_parameters: a list with Non FTS models parameters. The default is None. :keyword benchmark_models: A boolean value indicating if external FTS methods will be used on benchmark. The default is False. :keyword build_methods: A boolean value indicating if the default FTS methods will be used on benchmark. The default is True. :keyword dataset: the dataset name to identify the current set of benchmarks results on database. :keyword distributed: A boolean value indicating if the forecasting procedure will be distributed in a dispy cluster. . The default is False :keyword file: file path to save the results. The default is benchmarks.db. :keyword inc: a float on interval [0,1] indicating the percentage of the windowsize to move the window :keyword methods: a list with FTS class names. The default depends on the forecasting type and contains the list of all FTS methods. :keyword models: a list with prebuilt FTS objects. The default is None. :keyword nodes: a list with the dispy cluster nodes addresses. The default is [127.0.0.1]. :keyword orders: a list with orders of the models (for high order models). The default is [1,2,3]. :keyword partitions: a list with the numbers of partitions on the Universe of Discourse. The default is [10]. :keyword partitioners_models: a list with prebuilt Universe of Discourse partitioners objects. The default is None. :keyword partitioners_methods: a list with Universe of Discourse partitioners class names. The default is [partitioners.Grid.GridPartitioner]. :keyword progress: If true a progress bar will be displayed during the benchmarks. The default is False. :keyword start: in the multi step forecasting, the index of the data where to start forecasting. The default is 0. :keyword steps_ahead: a list with the forecasting horizons, i. e., the number of steps ahead to forecast. The default is 1. :keyword tag: a name to identify the current set of benchmarks results on database. :keyword type: the forecasting type, one of these values: point(default), interval or distribution. The default is point. :keyword transformations: a list with data transformations do apply . The default is [None]. """ tag = __pop('tag', None, kwargs) dataset = __pop('dataset', None, kwargs) distributed = __pop('distributed', False, kwargs) transformations = kwargs.get('transformations', [None]) progress = kwargs.get('progress', None) type = kwargs.get("type", 'point') orders = __pop("orders", [1,2,3], kwargs) partitioners_models = __pop("partitioners_models", None, kwargs) partitioners_methods = __pop("partitioners_methods", [Grid.GridPartitioner], kwargs) partitions = __pop("partitions", [10], kwargs) steps_ahead = __pop('steps_ahead', [1], kwargs) methods = __pop('methods', None, kwargs) models = __pop('models', None, kwargs) pool = [] if models is None else models if methods is None: if type == 'point': methods = get_point_methods() elif type == 'interval': methods = get_interval_methods() elif type == 'distribution': methods = get_probabilistic_methods() build_methods = __pop("build_methods", True, kwargs) if build_methods: for method in methods: mfts = method() if mfts.is_high_order: for order in orders: if order >= mfts.min_order: mfts = method() mfts.order = order pool.append(mfts) else: mfts.order = 1 pool.append(mfts) benchmark_models = __pop("benchmark_models", False, kwargs) if benchmark_models != False: benchmark_methods = __pop("benchmark_methods", None, kwargs) benchmark_methods_parameters = __pop("benchmark_methods_parameters", None, kwargs) benchmark_pool = [] if ( benchmark_models is None or not isinstance(benchmark_models, list)) \ else benchmark_models if benchmark_models is None and benchmark_methods is None: if type == 'point'or type == 'partition': benchmark_methods = get_benchmark_point_methods() elif type == 'interval': benchmark_methods = get_benchmark_interval_methods() elif type == 'distribution': benchmark_methods = get_benchmark_probabilistic_methods() if benchmark_methods is not None: for transformation in transformations: for count, model in enumerate(benchmark_methods, start=0): par = benchmark_methods_parameters[count] mfts = model(**par) mfts.append_transformation(transformation) benchmark_pool.append(mfts) if type == 'point': experiment_method = run_point synthesis_method = process_point_jobs elif type == 'interval': experiment_method = run_interval synthesis_method = process_interval_jobs elif type == 'distribution': experiment_method = run_probabilistic synthesis_method = process_probabilistic_jobs else: raise ValueError("Type parameter has a unkown value!") if distributed: import pyFTS.distributed.dispy as dispy nodes = kwargs.get("nodes", ['127.0.0.1']) cluster, http_server = dispy.start_dispy_cluster(experiment_method, nodes) jobs = [] inc = __pop("inc", 0.1, kwargs) if progress: from tqdm import tqdm _tdata = len(data) / (windowsize * inc) _tasks = (len(partitioners_models) * len(orders) * len(partitions) * len(transformations) * len(steps_ahead)) _tbcmk = len(benchmark_pool)*len(steps_ahead) progressbar = tqdm(total=_tdata*_tasks + _tdata*_tbcmk, desc="Benchmarks:") file = kwargs.get('file', "benchmarks.db") conn = bUtil.open_benchmark_db(file) for ct, train, test in cUtil.sliding_window(data, windowsize, train, inc=inc, **kwargs): if benchmark_models != False: for model in benchmark_pool: for step in steps_ahead: kwargs['steps_ahead'] = step if not distributed: if progress: progressbar.update(1) try: job = experiment_method(deepcopy(model), None, train, test, **kwargs) synthesis_method(dataset, tag, job, conn) except Exception as ex: print('EXCEPTION! ', model.shortname, model.order) traceback.print_exc() else: job = cluster.submit(deepcopy(model), None, train, test, **kwargs) jobs.append(job) partitioners_pool = [] if partitioners_models is None: for transformation in transformations: for partition in partitions: for partitioner in partitioners_methods: data_train_fs = partitioner(data=train, npart=partition, transformation=transformation) partitioners_pool.append(data_train_fs) else: partitioners_pool = partitioners_models for step in steps_ahead: for partitioner in partitioners_pool: for _id, model in enumerate(pool,start=0): kwargs['steps_ahead'] = step if not distributed: if progress: progressbar.update(1) try: job = experiment_method(deepcopy(model), deepcopy(partitioner), train, test, **kwargs) synthesis_method(dataset, tag, job, conn) except Exception as ex: print('EXCEPTION! ',model.shortname, model.order, partitioner.name, partitioner.partitions, str(partitioner.transformation)) traceback.print_exc() else: job = cluster.submit(deepcopy(model), deepcopy(partitioner), train, test, **kwargs) job.id = id # associate an ID to identify jobs (if needed later) jobs.append(job) if progress: progressbar.close() if distributed: for job in jobs: if progress: progressbar.update(1) job() if job.status == dispy.dispy.DispyJob.Finished and job is not None: tmp = job.result synthesis_method(dataset, tag, tmp, conn) else: print("status",job.status) print("result",job.result) print("stdout",job.stdout) print("stderr",job.exception) cluster.wait() # wait for all jobs to finish dispy.stop_dispy_cluster(cluster, http_server) conn.close()