示例#1
0
def load_parsed_dataset(path: Path, freq: str) -> Iterator[Any]:
    yield from FileDataset(path, freq)
示例#2
0
def load_file_dataset(path: Path, freq: str) -> Iterator[Any]:
    return iter(FileDataset(path, freq))
示例#3
0
def load_file_dataset_cached(path: Path, freq: str) -> Iterator[Any]:
    return iter(FileDataset(path, freq, cache=True))
示例#4
0
def load_file_dataset_numpy(path: Path, freq: str) -> Iterator[Any]:
    for item in FileDataset(path, freq):
        item["start"] = pd.Period(item["start"])
        item["target"] = np.array(item["target"])
        yield item
示例#5
0
 def _get_datasets(self):
     freq = self.hyperparameters["time_freq"]
     return {
         name: FileDataset(self.channels[name], freq)
         for name in DATASETS if name in self.channels
     }
示例#6
0
def load_parsed_dataset(path: Path, freq: str) -> Iterator[Any]:
    for item in FileDataset(path, freq):
        yield TimeSeriesItem(
            start=item["start"], target=item["target"], item="ABC"
        )
示例#7
0
def test_train_loader_goes_over_all_data(num_workers) -> None:
    batch_size = 4
    num_batches_per_epoch = 4

    X = 3

    simple_data = [{
        "start": "2012-01-01",
        "target": np.random.uniform(size=40).astype(float).tolist(),
        "item_id": i,
    } for i in range(batch_size * num_batches_per_epoch * X)]

    num_passes = 5
    num_epochs = X * num_passes

    def test_dataset(dataset):
        class ExactlyOneSampler(InstanceSampler):
            def __call__(self, ts: np.ndarray, a: int, b: int) -> np.ndarray:
                window_size = b - a + 1
                assert window_size > 0
                return np.array([a])

        transformation = InstanceSplitter(
            target_field=FieldName.TARGET,
            is_pad_field=FieldName.IS_PAD,
            start_field=FieldName.START,
            forecast_start_field=FieldName.FORECAST_START,
            train_sampler=ExactlyOneSampler(),
            past_length=10,
            future_length=5,
            dummy_value=1.0,
        )

        dl = TrainDataLoader(
            dataset=dataset,
            transform=transformation,
            batch_size=batch_size,
            stack_fn=partial(batchify, ctx=current_context()),
            num_workers=num_workers,
            num_batches_per_epoch=num_batches_per_epoch,
        )

        item_ids = defaultdict(int)

        for epoch in range(num_epochs):
            for batch in dl:
                for item_id in batch["item_id"]:
                    item_ids[item_id] += 1

        for i in range(len(dataset)):
            assert num_passes - 1 <= item_ids[i] <= num_passes + 1

    test_dataset(ListDataset(simple_data, freq="1H"))

    with tempfile.TemporaryDirectory() as tmpdir:
        with open(tmpdir + "/data.json", "w") as f:
            for data in simple_data:
                json.dump(data, f)
                f.write("\n")

        test_dataset(FileDataset(Path(tmpdir), freq="1H"))
        test_dataset(FileDataset(Path(tmpdir), freq="1H", cache=True))
示例#8
0
def load_parsed_dataset(path: Path, freq: str) -> Iterator[Any]:
    for item in FileDataset(path, freq):
        yield TimeSeriesItem(start=item['start'],
                             target=item['target'],
                             item='ABC')
示例#9
0
def load_file_dataset_numpy(path: Path, freq: str) -> Iterator[Any]:
    for item in FileDataset(path, freq):
        item['start'] = pd.Timestamp(item['start'])
        item['target'] = np.array(item['target'])
        yield item