def __call__(self, data: DataEntry) -> DataEntry: try: value = ProcessStartField.process(data[self.name], self.freq) except (TypeError, ValueError) as e: raise GluonTSDataError( f'Error "{e}" occurred, when reading field "{self.name}"') if value.tz is not None: raise GluonTSDataError( f'Timezone information is not supported, but provided in the "{self.name}" field' ) data[self.name] = value return data
def __iter__(self): # Basic idea is to split the dataset into roughly equally sized segments # with lower and upper bound, where each worker is assigned one segment segment_size = int(len(self) / MPWorkerInfo.num_workers) if not self.cache or (self.cache and not self._data_cache): with open(self.path) as jsonl_file: for line_number, raw in enumerate(jsonl_file): lower_bound = MPWorkerInfo.worker_id * segment_size upper_bound = ( (MPWorkerInfo.worker_id + 1) * segment_size if MPWorkerInfo.worker_id + 1 != MPWorkerInfo.num_workers else len(self) ) if not lower_bound <= line_number < upper_bound: continue span = Span(path=self.path, line=line_number) try: parsed_line = Line(json.loads(raw), span=span) if self.cache: self._data_cache.append(parsed_line) yield parsed_line except ValueError: raise GluonTSDataError( f"Could not read json line {line_number}, {raw}" ) else: yield from self._data_cache
def __call__(self, data: DataEntry) -> DataEntry: value = data.get(self.name, None) if value is not None: value = np.asarray(value, dtype=self.dtype) if self.req_ndim != value.ndim: raise GluonTSDataError( f"Array '{self.name}' has bad shape - expected " f"{self.req_ndim} dimensions, got {value.ndim}.") data[self.name] = value return data elif not self.is_required: return data else: raise GluonTSDataError( f"Object is missing a required field `{self.name}`")
def __iter__(self): with open(self.path) as jsonl_file: for line_number, raw in enumerate(jsonl_file, start=1): span = Span(path=self.path, line=line_number) try: yield Line(json.loads(raw), span=span) except ValueError: raise GluonTSDataError( f"Could not read json line {line_number}, {raw}")
def predict_time_series( self, ts: pd.Series, num_samples: int, custom_features: np.ndarray = None, ) -> SampleForecast: """ Given a training time series, this method generates `Forecast` object containing prediction samples for `prediction_length` time points. The predictions are generated via weighted sampling where the weights are determined by the `NPTSPredictor` kernel type and feature map. Parameters ---------- ts training time series object custom_features custom features (covariates) to use num_samples number of samples to draw Returns ------- Forecast A prediction for the supplied `ts` and `custom_features`. """ if np.all(np.isnan(ts.values[-self.context_length :])): raise GluonTSDataError( f"The last {self.context_length} positions of the target time " f"series are all NaN. Please increase the `context_length` " f"parameter of your NPTS model so the last " f"{self.context_length} positions of each target contain at " f"least one non-NaN value." ) # Get the features for both training and prediction ranges train_features, predict_features = self._get_features( ts.index, self.prediction_length, custom_features ) # Compute weights for sampling for each time step `t` in the # prediction range sampling_weights_iterator = NPTS.compute_weights( train_features=train_features, pred_features=predict_features, target_isnan_positions=np.argwhere(np.isnan(ts.values)), kernel=self.kernel, do_exp=self._is_exp_kernel(), ) # Generate forecasts forecast = NPTS.predict( ts, self.prediction_length, sampling_weights_iterator, num_samples ) return forecast
def __call__(self, data: DataEntry) -> DataEntry: value = data.get(self.name, None) if value is not None: value = np.asarray(value, dtype=self.dtype) ddiff = self.req_ndim - value.ndim if ddiff == 1: value = np.expand_dims(a=value, axis=0) elif ddiff != 0: raise GluonTSDataError( f"JSON array has bad shape - expected {self.req_ndim} " f"dimensions, got {ddiff}") data[self.name] = value return data elif not self.is_required: return data else: raise GluonTSDataError( f"JSON object is missing a required field `{self.name}`")
def __call__(self, data: DataEntry) -> DataEntry: try: timestamp = ProcessStartField.process(data[self.name], self.freq) except (TypeError, ValueError) as e: raise GluonTSDataError( f'Error "{e}" occurred, when reading field "{self.name}"') if timestamp.tz is not None: if self.tz_strategy == TimeZoneStrategy.error: raise GluonTSDataError( "Timezone information is not supported, " f'but provided in the "{self.name}" field.') elif self.tz_strategy == TimeZoneStrategy.utc: # align timestamp to utc timezone timestamp = timestamp.tz_convert("UTC") # removes timezone information timestamp = timestamp.tz_localize(None) data[self.name] = timestamp return data
def __iter__(self): with open(self.path) as jsonl_file: for line_number, raw in enumerate(jsonl_file): # The dataset is equally distributed among the workers if not (line_number % MPWorkerInfo.num_workers == MPWorkerInfo.worker_id): continue span = Span(path=self.path, line=line_number) try: yield Line(json.loads(raw), span=span) except ValueError: raise GluonTSDataError( f"Could not read json line {line_number}, {raw}")
def __iter__(self): # Basic idea is to split the dataset into roughly equally sized segments # with lower and upper bound, where each worker is assigned one segment bounds = get_bounds_for_mp_data_loading(len(self)) if not self.cache or (self.cache and not self._data_cache): with self.open(self.path) as jsonl_file: for line_number, raw in enumerate(jsonl_file): if not bounds.lower <= line_number < bounds.upper: continue span = Span(path=self.path, line=line_number) try: parsed_line = Line(json.loads(raw), span=span) if self.cache: self._data_cache.append(parsed_line) yield parsed_line except ValueError: raise GluonTSDataError( f"Could not read json line {line_number}, {raw}") else: yield from self._data_cache
def number_of_workers(app: flask.Flask) -> int: logger = app.logger cpu_count = multiprocessing.cpu_count() if MODEL_SERVER_WORKERS > 0: try: logger.info( 'Using {} workers (set by MODEL_SERVER_WORKERS environment ' 'variable).'.format(MODEL_SERVER_WORKERS)) return MODEL_SERVER_WORKERS except ValueError as ex: raise GluonTSDataError('Cannot parse "inference worker count" ' 'parameter `{}` to int.'.format(ex)) elif SAGEMAKER_BATCH and SAGEMAKER_MAX_CONCURRENT_TRANSFORMS < cpu_count: logger.info( 'Using {} workers (set by MaxConcurrentTransforms parameter in ' 'batch mode).'.format(SAGEMAKER_MAX_CONCURRENT_TRANSFORMS)) return SAGEMAKER_MAX_CONCURRENT_TRANSFORMS else: logger.info('Using {} workers'.format(cpu_count)) return cpu_count
def check_loss_finite(val: float) -> None: if not np.isfinite(val): raise GluonTSDataError( "Encountered invalid loss value! Try reducing the learning rate " "or try a different likelihood.")