示例#1
0
    def __call__(self, data: DataEntry) -> DataEntry:
        try:
            value = ProcessStartField.process(data[self.name], self.freq)
        except (TypeError, ValueError) as e:
            raise GluonTSDataError(
                f'Error "{e}" occurred, when reading field "{self.name}"')

        if value.tz is not None:
            raise GluonTSDataError(
                f'Timezone information is not supported, but provided in the "{self.name}" field'
            )

        data[self.name] = value

        return data
示例#2
0
    def __iter__(self):
        # Basic idea is to split the dataset into roughly equally sized segments
        # with lower and upper bound, where each worker is assigned one segment
        segment_size = int(len(self) / MPWorkerInfo.num_workers)

        if not self.cache or (self.cache and not self._data_cache):
            with open(self.path) as jsonl_file:
                for line_number, raw in enumerate(jsonl_file):
                    lower_bound = MPWorkerInfo.worker_id * segment_size
                    upper_bound = (
                        (MPWorkerInfo.worker_id + 1) * segment_size
                        if MPWorkerInfo.worker_id + 1
                        != MPWorkerInfo.num_workers
                        else len(self)
                    )
                    if not lower_bound <= line_number < upper_bound:
                        continue

                    span = Span(path=self.path, line=line_number)
                    try:
                        parsed_line = Line(json.loads(raw), span=span)
                        if self.cache:
                            self._data_cache.append(parsed_line)
                        yield parsed_line
                    except ValueError:
                        raise GluonTSDataError(
                            f"Could not read json line {line_number}, {raw}"
                        )
        else:
            yield from self._data_cache
示例#3
0
    def __call__(self, data: DataEntry) -> DataEntry:
        value = data.get(self.name, None)
        if value is not None:
            value = np.asarray(value, dtype=self.dtype)

            if self.req_ndim != value.ndim:
                raise GluonTSDataError(
                    f"Array '{self.name}' has bad shape - expected "
                    f"{self.req_ndim} dimensions, got {value.ndim}.")

            data[self.name] = value

            return data
        elif not self.is_required:
            return data
        else:
            raise GluonTSDataError(
                f"Object is missing a required field `{self.name}`")
示例#4
0
 def __iter__(self):
     with open(self.path) as jsonl_file:
         for line_number, raw in enumerate(jsonl_file, start=1):
             span = Span(path=self.path, line=line_number)
             try:
                 yield Line(json.loads(raw), span=span)
             except ValueError:
                 raise GluonTSDataError(
                     f"Could not read json line {line_number}, {raw}")
示例#5
0
    def predict_time_series(
        self,
        ts: pd.Series,
        num_samples: int,
        custom_features: np.ndarray = None,
    ) -> SampleForecast:
        """
        Given a training time series, this method generates `Forecast` object
        containing prediction samples for `prediction_length` time points.

        The predictions are generated via weighted sampling where the weights
        are determined by the `NPTSPredictor` kernel type and feature map.

        Parameters
        ----------
        ts
            training time series object
        custom_features
            custom features (covariates) to use
        num_samples
            number of samples to draw
        Returns
        -------
        Forecast
          A prediction for the supplied `ts` and `custom_features`.
        """

        if np.all(np.isnan(ts.values[-self.context_length :])):
            raise GluonTSDataError(
                f"The last {self.context_length} positions of the target time "
                f"series are all NaN. Please increase the `context_length` "
                f"parameter of your NPTS model so the last "
                f"{self.context_length} positions of each target contain at "
                f"least one non-NaN value."
            )

        # Get the features for both training and prediction ranges
        train_features, predict_features = self._get_features(
            ts.index, self.prediction_length, custom_features
        )

        # Compute weights for sampling for each time step `t` in the
        # prediction range
        sampling_weights_iterator = NPTS.compute_weights(
            train_features=train_features,
            pred_features=predict_features,
            target_isnan_positions=np.argwhere(np.isnan(ts.values)),
            kernel=self.kernel,
            do_exp=self._is_exp_kernel(),
        )

        # Generate forecasts
        forecast = NPTS.predict(
            ts, self.prediction_length, sampling_weights_iterator, num_samples
        )

        return forecast
示例#6
0
    def __call__(self, data: DataEntry) -> DataEntry:
        value = data.get(self.name, None)
        if value is not None:
            value = np.asarray(value, dtype=self.dtype)
            ddiff = self.req_ndim - value.ndim

            if ddiff == 1:
                value = np.expand_dims(a=value, axis=0)
            elif ddiff != 0:
                raise GluonTSDataError(
                    f"JSON array has bad shape - expected {self.req_ndim} "
                    f"dimensions, got {ddiff}")

            data[self.name] = value

            return data
        elif not self.is_required:
            return data
        else:
            raise GluonTSDataError(
                f"JSON object is missing a required field `{self.name}`")
示例#7
0
    def __call__(self, data: DataEntry) -> DataEntry:
        try:
            timestamp = ProcessStartField.process(data[self.name], self.freq)
        except (TypeError, ValueError) as e:
            raise GluonTSDataError(
                f'Error "{e}" occurred, when reading field "{self.name}"')

        if timestamp.tz is not None:
            if self.tz_strategy == TimeZoneStrategy.error:
                raise GluonTSDataError(
                    "Timezone information is not supported, "
                    f'but provided in the "{self.name}" field.')
            elif self.tz_strategy == TimeZoneStrategy.utc:
                # align timestamp to utc timezone
                timestamp = timestamp.tz_convert("UTC")

            # removes timezone information
            timestamp = timestamp.tz_localize(None)

        data[self.name] = timestamp

        return data
示例#8
0
    def __iter__(self):
        with open(self.path) as jsonl_file:
            for line_number, raw in enumerate(jsonl_file):
                # The dataset is equally distributed among the workers
                if not (line_number % MPWorkerInfo.num_workers
                        == MPWorkerInfo.worker_id):
                    continue

                span = Span(path=self.path, line=line_number)
                try:
                    yield Line(json.loads(raw), span=span)
                except ValueError:
                    raise GluonTSDataError(
                        f"Could not read json line {line_number}, {raw}")
示例#9
0
文件: jsonl.py 项目: Schmedu/gluon-ts
    def __iter__(self):
        # Basic idea is to split the dataset into roughly equally sized segments
        # with lower and upper bound, where each worker is assigned one segment
        bounds = get_bounds_for_mp_data_loading(len(self))
        if not self.cache or (self.cache and not self._data_cache):
            with self.open(self.path) as jsonl_file:
                for line_number, raw in enumerate(jsonl_file):
                    if not bounds.lower <= line_number < bounds.upper:
                        continue

                    span = Span(path=self.path, line=line_number)
                    try:
                        parsed_line = Line(json.loads(raw), span=span)
                        if self.cache:
                            self._data_cache.append(parsed_line)
                        yield parsed_line
                    except ValueError:
                        raise GluonTSDataError(
                            f"Could not read json line {line_number}, {raw}")
        else:
            yield from self._data_cache
示例#10
0
def number_of_workers(app: flask.Flask) -> int:
    logger = app.logger

    cpu_count = multiprocessing.cpu_count()

    if MODEL_SERVER_WORKERS > 0:
        try:
            logger.info(
                'Using {} workers (set by MODEL_SERVER_WORKERS environment '
                'variable).'.format(MODEL_SERVER_WORKERS))
            return MODEL_SERVER_WORKERS
        except ValueError as ex:
            raise GluonTSDataError('Cannot parse "inference worker count" '
                                   'parameter `{}` to int.'.format(ex))

    elif SAGEMAKER_BATCH and SAGEMAKER_MAX_CONCURRENT_TRANSFORMS < cpu_count:
        logger.info(
            'Using {} workers (set by MaxConcurrentTransforms parameter in '
            'batch mode).'.format(SAGEMAKER_MAX_CONCURRENT_TRANSFORMS))
        return SAGEMAKER_MAX_CONCURRENT_TRANSFORMS

    else:
        logger.info('Using {} workers'.format(cpu_count))
        return cpu_count
示例#11
0
def check_loss_finite(val: float) -> None:
    if not np.isfinite(val):
        raise GluonTSDataError(
            "Encountered invalid loss value! Try reducing the learning rate "
            "or try a different likelihood.")