class TCMF(BaseModel): """ MF regularized TCN + TCN. This version is not for automated searching yet. """ def __init__(self): """ Initialize hyper parameters :param check_optional_config: :param future_seq_len: """ # models self.model = None self.model_init = False def set_params(self, **config): self.vbsize = config.get("vbsize", 128) self.hbsize = config.get("hbsize", 256) self.num_channels_X = config.get("num_channels_X", [32, 32, 32, 32, 32, 1]) self.num_channels_Y = config.get("num_channels_Y", [16, 16, 16, 16, 16, 1]) self.kernel_size = config.get("kernel_size", 7) self.dropout = config.get("dropout", 0.1) self.rank = config.get("rank", 64) self.kernel_size_Y = config.get("kernel_size_Y", 7) self.lr = config.get("learning_rate", 0.0005) self.val_len = config.get("val_len", 24) self.normalize = config.get("normalize", False) self.start_date = config.get("start_date", "2020-4-1") self.freq = config.get("freq", "1H") self.covariates = config.get('covariates', None) self.use_time = config.get("use_time", True) self.dti = config.get("dti", None) self.svd = config.get("svd", True) self.period = config.get("period", 24) self.alt_iters = config.get("alt_iters", 10) self.y_iters = config.get("y_iters", 10) self.init_epoch = config.get("init_FX_epoch", 100) self.max_FX_epoch = config.get("max_FX_epoch", 300) self.max_TCN_epoch = config.get("max_TCN_epoch", 300) def _build(self, **config): """ build the models and initialize. :param config: hyper parameters for building the model :return: """ self.set_params(**config) self.model = DeepGLO(vbsize=self.vbsize, hbsize=self.hbsize, num_channels_X=self.num_channels_X, num_channels_Y=self.num_channels_Y, kernel_size=self.kernel_size, dropout=self.dropout, rank=self.rank, kernel_size_Y=self.kernel_size_Y, lr=self.lr, val_len=self.val_len, normalize=self.normalize, start_date=self.start_date, freq=self.freq, covariates=self.covariates, use_time=self.use_time, dti=self.dti, svd=self.svd, period=self.period, forward_cov=False) self.model_init = True def fit_eval(self, x, y=None, verbose=0, num_workers=None, **config): """ Fit on the training data from scratch. Since the rolling process is very customized in this model, we enclose the rolling process inside this method. :param x: training data, an array in shape (nd, Td), nd is the number of series, Td is the time dimension :param y: None. target is extracted from x directly :param verbose: :param num_workers: number of workers to use. :return: the evaluation metric value """ if not self.model_init: self._build(**config) if num_workers is None: num_workers = TCMF.get_default_num_workers() val_loss = self.model.train_all_models( x, alt_iters=self.alt_iters, y_iters=self.y_iters, init_epochs=self.init_epoch, max_FX_epoch=self.max_FX_epoch, max_TCN_epoch=self.max_TCN_epoch, num_workers=num_workers, ) return val_loss def fit_incremental(self, x): """ Incremental fitting given a pre-trained model. :param x: incremental data :param config: fitting parameters :return: """ # TODO incrementally train models pass @staticmethod def get_default_num_workers(): from zoo.ray import RayContext try: ray_ctx = RayContext.get(initialize=False) num_workers = ray_ctx.num_ray_nodes except: num_workers = 1 return num_workers def predict(self, x=None, horizon=24, mc=False, num_workers=None): """ Predict horizon time-points ahead the input x in fit_eval :param x: We don't support input x currently. :param horizon: horizon length to predict :param mc: :param num_workers: the number of workers to use. Note that there has to be an activate RayContext if num_workers > 1. :return: """ if x is not None: raise ValueError("We don't support input x directly.") if self.model is None: raise Exception( "Needs to call fit_eval or restore first before calling predict" ) if num_workers is None: num_workers = TCMF.get_default_num_workers() if num_workers > 1: import ray from zoo.ray import RayContext try: RayContext.get(initialize=False) except: try: # detect whether ray has been started. ray.put(None) except: raise RuntimeError( f"There must be an activate ray context while running with " f"{num_workers} workers. You can either start and init a " f"RayContext by init_orca_context(..., init_ray_on_spark=" f"True) or start Ray with ray.init()") out = self.model.predict_horizon( future=horizon, bsize=90, normalize=False, num_workers=num_workers, ) return out[:, -horizon::] def evaluate(self, x=None, y=None, metrics=None, num_workers=None): """ Evaluate on the prediction results and y. We predict horizon time-points ahead the input x in fit_eval before evaluation, where the horizon length equals the second dimension size of y. :param x: We don't support input x currently. :param y: target. We interpret the second dimension of y as the horizon length for evaluation. :param metrics: a list of metrics in string format :param num_workers: the number of workers to use in evaluate. It defaults to 1. :return: a list of metric evaluation results """ if x is not None: raise ValueError("We don't support input x directly.") if y is None: raise ValueError("Input invalid y of None") if self.model is None: raise Exception( "Needs to call fit_eval or restore first before calling predict" ) if len(y.shape) == 1: y = np.expand_dims(y, axis=1) horizon = 1 else: horizon = y.shape[1] result = self.predict(x=None, horizon=horizon, num_workers=num_workers) if y.shape[1] == 1: multioutput = 'uniform_average' else: multioutput = 'raw_values' return [ Evaluator.evaluate(m, y, result, multioutput=multioutput) for m in metrics ] def save(self, model_file): pickle.dump(self.model, open(model_file, "wb")) def restore(self, model_file): with open(model_file, 'rb') as f: self.model = pickle.load(f) self.model_init = True def _get_optional_parameters(self): return {} def _get_required_parameters(self): return {}
class TCMF(BaseModel): """ MF regularized TCN + TCN. This version is not for automated searching yet. """ def __init__(self): """ Initialize hyper parameters :param check_optional_config: :param future_seq_len: """ # models self.model = None self.model_init = False def build(self, config): """ build the models and initialize. :param config: hyper parameters for building the model :return: """ self.model = DeepGLO( vbsize=config.get("vbsize", 128), hbsize=config.get("hbsize", 256), num_channels_X=config.get("num_channels_X", [32, 32, 32, 32, 32, 1]), num_channels_Y=config.get("num_channels_Y", [16, 16, 16, 16, 16, 1]), kernel_size=config.get("kernel_size", 7), dropout=config.get("dropout", 0.1), rank=config.get("rank", 64), kernel_size_Y=config.get("kernel_size_Y", 7), lr=config.get("learning_rate", 0.0005), normalize=config.get("normalize", False), use_time=config.get("use_time", True), svd=config.get("svd", True), forward_cov=False) self.model_init = True def fit_eval(self, x, y=None, verbose=0, num_workers=None, **config): """ Fit on the training data from scratch. Since the rolling process is very customized in this model, we enclose the rolling process inside this method. :param x: training data, an array in shape (nd, Td), nd is the number of series, Td is the time dimension :param y: None. target is extracted from x directly :param verbose: :param num_workers: number of workers to use. :return: the evaluation metric value """ if not self.model_init: self.build(config) if num_workers is None: num_workers = TCMF.get_default_num_workers() covariates = config.get('covariates', None) dti = config.get("dti", None) self._check_covariates_dti(covariates=covariates, dti=dti, ts_len=x.shape[1]) val_loss = self.model.train_all_models( x, val_len=config.get("val_len", 24), start_date=config.get("start_date", "2020-4-1"), freq=config.get("freq", "1H"), covariates=covariates, dti=dti, period=config.get("period", 24), init_epochs=config.get("init_FX_epoch", 100), alt_iters=config.get("alt_iters", 10), y_iters=config.get("y_iters", 10), max_FX_epoch=config.get("max_FX_epoch", 300), max_TCN_epoch=config.get("max_TCN_epoch", 300), num_workers=num_workers, ) return val_loss def fit_incremental(self, x, covariates_new=None, dti_new=None): """ Incremental fitting given a pre-trained model. :param x: incremental data :param covariates_new: covariates corresponding to the incremental x :param dti_new: dti corresponding to the incremental x :return: """ if x is None: raise ValueError("Input invalid x of None") if self.model is None: raise Exception( "Needs to call fit_eval or restore first before calling " "fit_incremental") self._check_covariates_dti(covariates=covariates_new, dti=dti_new, ts_len=x.shape[1], method_name='fit_incremental') self.model.inject_new(x, covariates_new=covariates_new, dti_new=dti_new) @staticmethod def get_default_num_workers(): from zoo.ray import RayContext try: ray_ctx = RayContext.get(initialize=False) num_workers = ray_ctx.num_ray_nodes except: num_workers = 1 return num_workers def predict(self, x=None, horizon=24, mc=False, future_covariates=None, future_dti=None, num_workers=None): """ Predict horizon time-points ahead the input x in fit_eval :param x: We don't support input x currently. :param horizon: horizon length to predict :param mc: :param future_covariates: covariates corresponding to future horizon steps data to predict. :param future_dti: dti corresponding to future horizon steps data to predict. :param num_workers: the number of workers to use. Note that there has to be an activate RayContext if num_workers > 1. :return: """ if x is not None: raise ValueError("We don't support input x directly.") if self.model is None: raise Exception( "Needs to call fit_eval or restore first before calling predict" ) self._check_covariates_dti(covariates=future_covariates, dti=future_dti, ts_len=horizon, method_name="predict") if num_workers is None: num_workers = TCMF.get_default_num_workers() if num_workers > 1: import ray from zoo.ray import RayContext try: RayContext.get(initialize=False) except: try: # detect whether ray has been started. ray.put(None) except: raise RuntimeError( f"There must be an activate ray context while running with " f"{num_workers} workers. You can either start and init a " f"RayContext by init_orca_context(..., init_ray_on_spark=" f"True) or start Ray with ray.init()") out = self.model.predict_horizon( future=horizon, bsize=90, num_workers=num_workers, future_covariates=future_covariates, future_dti=future_dti, ) return out[:, -horizon::] def evaluate(self, x=None, y=None, metrics=None, target_covariates=None, target_dti=None, num_workers=None): """ Evaluate on the prediction results and y. We predict horizon time-points ahead the input x in fit_eval before evaluation, where the horizon length equals the second dimension size of y. :param x: We don't support input x currently. :param y: target. We interpret the second dimension of y as the horizon length for evaluation. :param metrics: a list of metrics in string format :param target_covariates: covariates corresponding to target_value. 2-D ndarray or None. The shape of ndarray should be (r, horizon), where r is the number of covariates. Global covariates for all time series. If None, only default time coveriates will be used while use_time is True. If not, the time coveriates used is the stack of input covariates and default time coveriates. :param target_dti: dti corresponding to target_value. DatetimeIndex or None. If None, use default fixed frequency DatetimeIndex generated with the last date of x in fit and freq. :param num_workers: the number of workers to use in evaluate. It defaults to 1. :return: a list of metric evaluation results """ if x is not None: raise ValueError("We don't support input x directly.") if y is None: raise ValueError("Input invalid y of None") if self.model is None: raise Exception( "Needs to call fit_eval or restore first before calling predict" ) if len(y.shape) == 1: y = np.expand_dims(y, axis=1) horizon = 1 else: horizon = y.shape[1] result = self.predict(x=None, horizon=horizon, future_covariates=target_covariates, future_dti=target_dti, num_workers=num_workers) if y.shape[1] == 1: multioutput = 'uniform_average' else: multioutput = 'raw_values' return [ Evaluator.evaluate(m, y, result, multioutput=multioutput) for m in metrics ] def save(self, model_file): pickle.dump(self.model, open(model_file, "wb")) def restore(self, model_file): with open(model_file, 'rb') as f: self.model = pickle.load(f) self.model_init = True def _get_optional_parameters(self): return {} def _get_required_parameters(self): return {} def _check_covariates_dti(self, covariates=None, dti=None, ts_len=24, method_name='fit'): if covariates is not None and not isinstance(covariates, np.ndarray): raise ValueError("Input covariates must be a ndarray. Got", type(covariates)) if covariates is not None and not covariates.ndim == 2: raise ValueError( "You should input a 2-D ndarray of covariates. But Got dimension of", covariates.ndim) if covariates is not None and not covariates.shape[1] == ts_len: raise ValueError( f"The second dimension shape of covariates should be {ts_len}, " f"but got {covariates.shape[1]} instead.") if dti is not None and not isinstance(dti, pd.DatetimeIndex): raise ValueError("Input dti must be a pandas DatetimeIndex. Got", type(dti)) if dti is not None and len(dti) != ts_len: raise ValueError(f"Input dti length should be equal to {ts_len}, " f"but got {len(dti)} instead.") if method_name != 'fit': # covariates and dti should be consistent with that in fit if self.model.covariates is None and covariates is not None: raise ValueError( f"Find valid covariates in {method_name} but invalid covariates " f"in fit. Please keep them in consistence!") if self.model.covariates is not None and covariates is None: raise ValueError( f"Find valid covariates in fit but invalid covariates in " f"{method_name}. Please keep them in consistence!") if self.model.covariates is not None \ and self.model.covariates.shape[0] != covariates.shape[0]: raise ValueError( f"The input covariates number in {method_name} should be the same " f"as the input covariates number in fit. Got {covariates.shape[0]}" f"and {self.model.covariates.shape[0]} respectively.") if self.model.dti is None and dti is not None: raise ValueError( f"Find valid dti in {method_name} but invalid dti in fit. " f"Please keep them in consistence!") if self.model.dti is not None and dti is None: raise ValueError( f"Find valid dti in fit but invalid dti in {method_name}. " f"Please keep them in consistence!")