Пример #1
0
 def build_validation_spec(
         self, hparams: Dict[str, Any]) -> tf.estimator.EvalSpec:
     download_data_dir = get_download_data_dir()
     val_files = self._get_filenames(
         os.path.join(download_data_dir, "validation"))
     return tf.estimator.EvalSpec(
         self._input_fn(hparams, val_files, shuffle_and_repeat=False))
Пример #2
0
 def build_train_spec(self, hparams: Dict[str,
                                          Any]) -> tf.estimator.TrainSpec:
     download_data_dir = get_download_data_dir()
     train_files = self._get_filenames(
         os.path.join(download_data_dir, "train"))
     return tf.estimator.TrainSpec(
         self._input_fn(hparams, train_files, shuffle_and_repeat=True))
Пример #3
0
def make_data_loaders(
    experiment_config: Dict[str, Any], hparams: Dict[str, Any]
) -> Tuple[DataLoader, DataLoader]:
    download_data_dir = get_download_data_dir()
    batch_size = hparams["batch_size"]
    return (
        DataLoader(get_data(True, download_data_dir), batch_size=batch_size),
        DataLoader(get_data(False, download_data_dir), batch_size=batch_size),
    )
Пример #4
0
def make_data_loaders(
    experiment_config: Dict[str, Any], hparams: Dict[str, Any]
) -> Tuple[Optional[tp.DataFlow], Optional[tp.DataFlow]]:
    """Provides training and validation data for model training."""
    download_dir = get_download_data_dir()
    training_dataflow = tp.BatchData(
        tp.dataset.Mnist("train", dir=download_dir), hparams["batch_size"])
    validation_dataflow = tp.BatchData(
        tp.dataset.Mnist("test", dir=download_dir), hparams["batch_size"])

    return training_dataflow, validation_dataflow
Пример #5
0
def make_data_loaders(
        experiment_config: Dict[str, Any],
        hparams: Dict[str, Any]) -> Tuple[DataLoader, DataLoader]:
    download_data_dir = get_download_data_dir()
    train_data = get_dataset(download_data_dir, True)
    validation_data = get_dataset(download_data_dir, False)
    batch_size = hparams["batch_size"]
    return (
        DataLoader(train_data, batch_size=batch_size),
        DataLoader(validation_data, batch_size=batch_size),
    )
Пример #6
0
def create_cifar10_sequence(
    experiment_config: Dict[str, Any], hparams: Dict[str, Any]
) -> Tuple[Sequence, Sequence]:
    """
    In this example we added some fields of note under the `data` field in the YAML experiment
    configuration: the `acceleration` field. Under this field, you can configure multithreading by
    setting `use_multiprocessing` to `False`, or set it to `True` for multiprocessing. You can also
    configure the number of workers (processes or threads depending on `use_multiprocessing`).

    Another thing of note are the data augmentation fields in hyperparameters. The fields here get
    passed through to Keras' `ImageDataGenerator` for real-time data augmentation.
    """
    acceleration = experiment_config["data"].get("acceleration")
    width_shift_range = hparams.get("width_shift_range", 0.0)
    height_shift_range = hparams.get("height_shift_range", 0.0)
    horizontal_flip = hparams.get("horizontal_flip", False)
    batch_size = hparams["batch_size"]

    download_dir = get_download_data_dir()
    (train_data, train_labels), (test_data, test_labels) = get_data(download_dir)

    # Setup training data loader.
    data_augmentation = {
        "width_shift_range": width_shift_range,
        "height_shift_range": height_shift_range,
        "horizontal_flip": horizontal_flip,
    }
    train = augment_data(train_data, train_labels, batch_size, data_augmentation)

    if acceleration:
        workers = acceleration.get("workers", 1)
        use_multiprocessing = acceleration.get("use_multiprocessing", False)
        train = KerasDataAdapter(train, workers=workers, use_multiprocessing=use_multiprocessing)

    # Setup validation data loader.
    test = keras.data.InMemorySequence(
        data=preprocess_data(test_data),
        labels=preprocess_labels(test_labels),
        batch_size=batch_size,
    )

    return train, test
Пример #7
0
def create_cifar10_tf_dataset(
    experiment_config: Dict[str, Any], hparams: Dict[str, Any]
) -> Tuple[tf.data.Dataset, tf.data.Dataset]:

    download_dir = get_download_data_dir()

    cifar10 = tfds.image.Cifar10(data_dir=download_dir)
    cifar10.download_and_prepare(download_dir=download_dir)
    datasets = cifar10.as_dataset()
    ds_train, ds_test = datasets["train"], datasets["test"]

    ds_train = wrap_dataset(ds_train)
    ds_test = wrap_dataset(ds_test)

    batch_size = hparams["batch_size"]
    ds_train = ds_train.map(
        lambda x: (tf.divide(x["image"], 255), tf.one_hot(x["label"], NUM_CLASSES))
    )
    ds_test = ds_test.map(
        lambda x: (tf.divide(x["image"], 255), tf.one_hot(x["label"], NUM_CLASSES))
    )

    return ds_train.batch(batch_size), ds_test.batch(batch_size)