import time import timeit import traceback from hyperopt import base, fmin, Trials from hyperopt.base import validate_timeout, validate_loss_threshold from hyperopt.utils import coarse_utcnow, _get_logger, _get_random_id try: from pyspark.sql import SparkSession _have_spark = True except ImportError as e: _have_spark = False logger = _get_logger("hyperopt-spark") class SparkTrials(Trials): """ Implementation of hyperopt.Trials supporting distributed execution using Apache Spark clusters. This requires fmin to be run on a Spark cluster. Plugging SparkTrials into hyperopt.fmin() allows hyperopt to send model training and evaluation tasks to Spark workers, parallelizing hyperparameter search. Each trial (set of hyperparameter values) is handled within a single Spark task; i.e., each model will be fit and evaluated on a single worker machine. Trials are run asynchronously.
import copy import numbers import threading import time import timeit from hyperopt import base, fmin, Trials from hyperopt.utils import coarse_utcnow, _get_logger, _get_random_id try: from pyspark.sql import SparkSession _have_spark = True except ModuleNotFoundError as e: _have_spark = False logger = _get_logger('hyperopt-spark') class SparkTrials(Trials): """ Implementation of hyperopt.Trials supporting distributed execution using Apache Spark clusters. This requires fmin to be run on a Spark cluster. Plugging SparkTrials into hyperopt.fmin() allows hyperopt to send model training and evaluation tasks to Spark workers, parallelizing hyperparameter search. Each trial (set of hyperparameter values) is handled within a single Spark task; i.e., each model will be fit and evaluated on a single worker machine. Trials are run asynchronously.