Пример #1
0
    def flatmap_transform(self, data: DataEntry,
                          is_train: bool) -> Iterator[DataEntry]:
        ts_fields = self.dynamic_feature_fields + [self.target_field]
        ts_target = data[self.target_field]

        len_target = ts_target.shape[-1]

        if is_train:
            if len_target < self.instance_length:
                sampling_indices = (
                    # Returning [] for all time series will cause this to be in loop forever!
                    [len_target] if self.allow_target_padding else [])
            else:
                sampling_indices = self.instance_sampler(
                    ts_target, self.instance_length, len_target)
        else:
            sampling_indices = [len_target]

        for i in sampling_indices:
            d = data.copy()

            pad_length = max(self.instance_length - i, 0)

            # update start field
            d[self.start_field] = shift_timestamp(data[self.start_field],
                                                  i - self.instance_length)

            # set is_pad field
            is_pad = np.zeros(self.instance_length)
            if pad_length > 0:
                is_pad[:pad_length] = 1
            d[self.is_pad_field] = is_pad

            # update time series fields
            for ts_field in ts_fields:
                full_ts = data[ts_field]
                if pad_length > 0:
                    pad_pre = self.pad_value * np.ones(
                        shape=full_ts.shape[:-1] + (pad_length, ))
                    past_ts = np.concatenate([pad_pre, full_ts[..., :i]],
                                             axis=-1)
                else:
                    past_ts = full_ts[..., (i - self.instance_length):i]

                past_ts = past_ts.transpose() if self.time_first else past_ts
                d[self._past(ts_field)] = past_ts

                if self.use_prediction_features and not is_train:
                    if not ts_field == self.target_field:
                        future_ts = full_ts[..., i:i + self.prediction_length]
                        future_ts = (future_ts.transpose()
                                     if self.time_first else future_ts)
                        d[self._future(ts_field)] = future_ts

                del d[ts_field]

            d[self.forecast_start_field] = shift_timestamp(
                d[self.start_field], self.instance_length)

            yield d
Пример #2
0
    def flatmap_transform(self, data: DataEntry,
                          is_train: bool) -> Iterator[DataEntry]:

        assert data[self.start_field].freq == data[self.end_field].freq

        total_interval_length = (data[self.end_field] - data[self.start_field]
                                 ) / data[self.start_field].freq.delta

        # sample forecast start times in continuous time
        if is_train:
            if total_interval_length < (self.future_interval_length +
                                        self.past_interval_length):
                sampling_times: np.ndarray = np.array([])
            else:
                sampling_times = self.train_sampler(
                    self.past_interval_length,
                    total_interval_length - self.future_interval_length,
                )
        else:
            sampling_times = np.array([total_interval_length])

        ia_times = data[self.target_field][0, :]
        marks = data[self.target_field][1:, :]

        ts = np.cumsum(ia_times)
        assert ts[-1] < total_interval_length, (
            "Target interarrival times provided are inconsistent with "
            "start and end timestamps.")

        # select field names that will be included in outputs
        keep_cols = {
            k: v
            for k, v in data.items()
            if k not in [self.target_field, self.start_field, self.end_field]
        }

        for future_start in sampling_times:

            r: DataEntry = dict()

            past_start = future_start - self.past_interval_length
            future_end = future_start + self.future_interval_length

            assert past_start >= 0

            past_mask = self._mask_sorted(ts, past_start, future_start)

            past_ia_times = np.diff(np.r_[0, ts[past_mask] -
                                          past_start])[np.newaxis]

            r[f"past_{self.target_field}"] = np.concatenate(
                [past_ia_times, marks[:, past_mask]], axis=0).transpose()

            r["past_valid_length"] = np.array([len(past_mask)])

            r[self.forecast_start_field] = (
                data[self.start_field] +
                data[self.start_field].freq.delta * future_start)

            if is_train:  # include the future only if is_train
                assert future_end <= total_interval_length

                future_mask = self._mask_sorted(ts, future_start, future_end)

                future_ia_times = np.diff(np.r_[0, ts[future_mask] -
                                                future_start])[np.newaxis]

                r[f"future_{self.target_field}"] = np.concatenate(
                    [future_ia_times, marks[:, future_mask]],
                    axis=0).transpose()

                r["future_valid_length"] = np.array([len(future_mask)])

            # include other fields
            r.update(keep_cols.copy())

            yield r
Пример #3
0
    def flatmap_transform(self, data: DataEntry,
                          is_train: bool) -> Iterator[DataEntry]:
        pl = self.future_length
        slice_cols = self.ts_fields + [self.target_field]
        target = data[self.target_field]

        len_target = target.shape[-1]

        minimum_length = (self.future_length if self.pick_incomplete else
                          self.past_length + self.future_length)

        if is_train:
            sampling_bounds = ((0, len_target - self.future_length)
                               if self.pick_incomplete else
                               (self.past_length,
                                len_target - self.future_length))
            # We currently cannot handle time series that are
            # too short during training, so we just skip these.
            # If we want to include them we would need to pad and to
            # mask the loss.
            if self.is_full_batch:
                sampled_indices = tuple([
                    i for i in range(
                        self.past_length,
                        len_target - self.future_length + 1,
                    )
                ])
            else:
                sampled_indices = (
                    np.array([], dtype=int) if len_target < minimum_length else
                    self.train_sampler(target, *sampling_bounds))
        else:
            assert self.pick_incomplete or len_target >= self.past_length
            sampled_indices = np.array([len_target], dtype=int)
        for i in sampled_indices:
            pad_length = max(self.past_length - i, 0)
            if not self.pick_incomplete:
                assert (pad_length == 0
                        ), f"pad_length should be zero, got {pad_length}"
            d = data.copy()
            for ts_field in slice_cols:
                if i > self.past_length:
                    # truncate to past_length
                    past_piece = d[ts_field][..., i - self.past_length:i]
                elif i < self.past_length:
                    pad_block = np.zeros(
                        d[ts_field].shape[:-1] + (pad_length, ),
                        dtype=d[ts_field].dtype,
                    )
                    past_piece = np.concatenate(
                        [pad_block, d[ts_field][..., :i]], axis=-1)
                else:
                    past_piece = d[ts_field][..., :i]
                d[self._past(ts_field)] = past_piece
                d[self._future(ts_field)] = d[ts_field][..., i:i + pl]
                del d[ts_field]
            pad_indicator = np.zeros(self.past_length)
            if pad_length > 0:
                pad_indicator[:pad_length] = 1

            if self.time_first:
                for ts_field in slice_cols:
                    d[self._past(ts_field)] = d[self._past(
                        ts_field)].transpose()
                    d[self._future(ts_field)] = d[self._future(
                        ts_field)].transpose()

            d[self._past(self.is_pad_field)] = pad_indicator
            d[self.forecast_start_field] = shift_timestamp(
                d[self.start_field], i)
            yield d
Пример #4
0
 def transform(self, data: DataEntry) -> DataEntry:
     return self.func(data.copy())
Пример #5
0
 def transform(self, data: DataEntry) -> DataEntry:
     if self.output_field not in data.keys():
         data[self.output_field] = self.value
     return data
Пример #6
0
 def transform(self, data: DataEntry) -> DataEntry:
     for k in self.field_names:
         if k in data.keys():
             del data[k]
     return data