示例#1
0
# -*- coding:utf-8 -*-
# @version: 1.0
# @author: wuxikun
# @date: '2020/4/3'

import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds
tfds.disable_progress_bar()
import numpy as np
print(tf.__version__)

(train_data, test_data), info = tfds.load(
    # Use the version pre-encoded with an ~8k vocabulary.
    'imdb_reviews/subwords8k',
    # Return the train/test datasets as a tuple.
    split=(tfds.Split.TRAIN, tfds.Split.TEST),
    # Return (example, label) pairs from the dataset (instead of a dictionary).
    as_supervised=True,
    # Also return the `info` structure.
    with_info=True)

encoder = info.features['text'].encoder

print('Vocabulary size: {}'.format(encoder.vocab_size))
def main(_):
    if FLAGS.module_import:
        import_modules(FLAGS.module_import)

    if FLAGS.debug_start:
        pdb.set_trace()
    if FLAGS.sleep_start:
        time.sleep(60 * 60 * 3)

    if FLAGS.disable_tqdm:
        logging.info("Disabling tqdm.")
        tfds.disable_progress_bar()

    if FLAGS.checksums_dir:
        tfds.download.add_checksums_dir(FLAGS.checksums_dir)

    datasets_to_build = set(FLAGS.datasets and FLAGS.datasets.split(",")
                            or tfds.list_builders())
    datasets_to_build -= set(FLAGS.exclude_datasets.split(","))
    version = "experimental_latest" if FLAGS.experimental_latest_version else None
    logging.info("Running download_and_prepare for datasets:\n%s",
                 "\n".join(datasets_to_build))
    logging.info('Version: "%s"', version)
    builders = {
        name: tfds.builder(name, data_dir=FLAGS.data_dir, version=version)
        for name in datasets_to_build
    }

    if FLAGS.builder_config_id is not None:
        # Requesting a single config of a single dataset
        if len(builders) > 1:
            raise ValueError(
                "--builder_config_id can only be used when building a single dataset"
            )
        builder = builders[list(builders.keys())[0]]
        if not builder.BUILDER_CONFIGS:
            raise ValueError(
                "--builder_config_id can only be used with datasets with configs"
            )
        config = builder.BUILDER_CONFIGS[FLAGS.builder_config_id]
        logging.info("Running download_and_prepare for config: %s",
                     config.name)
        builder_for_config = tfds.builder(builder.name,
                                          data_dir=FLAGS.data_dir,
                                          config=config,
                                          version=version)
        download_and_prepare(builder_for_config)
    else:
        for name, builder in builders.items():
            if builder.BUILDER_CONFIGS and "/" not in name:
                # If builder has multiple configs, and no particular config was
                # requested, then compute all.
                for config in builder.BUILDER_CONFIGS:
                    builder_for_config = tfds.builder(builder.name,
                                                      data_dir=FLAGS.data_dir,
                                                      config=config,
                                                      version=version)
                    download_and_prepare(builder_for_config)
            else:
                # If there is a slash in the name, then user requested a specific
                # dataset configuration.
                download_and_prepare(builder)
示例#3
0
def main(_):
    tfds.disable_progress_bar()

    if _DATASET == "mnist" or _DATASET == "fashion_mnist":
        input_shape = (28, 28, 1)
        output_size = 10
    elif _DATASET == "cifar10" or _DATASET == "svhn_cropped":
        input_shape = (32, 32, 3)
        output_size = 10
    elif _DATASET == "cifar100":
        input_shape = (32, 32, 3)
        output_size = 100

    if _HPARAMS:
        ds_train, ds_test = tfds.load(_DATASET,
                                      split=["train", "test"],
                                      shuffle_files=True,
                                      as_supervised=True)
    else:
        ds_train, ds_test = tfds.load(
            _DATASET,
            split=["train[0%:90%]", "train[90%:100%]"],
            shuffle_files=True,
            as_supervised=True)
    ds_train = ds_train.map(normalize_img,
                            num_parallel_calls=tf.data.experimental.AUTOTUNE)
    ds_train = ds_train.cache()
    ds_train = ds_train.batch(128)
    ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)
    ds_test = ds_test.map(normalize_img,
                          num_parallel_calls=tf.data.experimental.AUTOTUNE)
    ds_test = ds_test.batch(128)
    ds_test = ds_test.cache()
    ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)

    if _HPARAMS:
        hparams = _HPARAMS.split(";")
        conv_units1 = int(hparams[0])
        conv_units2 = int(hparams[1])
        conv_units3 = int(hparams[2])
        dense_units1 = int(hparams[3])
        dense_units2 = int(hparams[4])
        kernel_width = int(hparams[5])
        pool_width = int(hparams[6])
        epochs = int(hparams[7])
    else:
        conv_units1 = int(np.round(random.uniform(8, 512)))
        conv_units2 = int(np.round(random.uniform(8, 512)))
        conv_units3 = int(np.round(random.uniform(8, 512)))
        dense_units1 = int(np.round(random.uniform(8, 512)))
        dense_units2 = int(np.round(random.uniform(8, 512)))
        kernel_width = int(np.round(random.uniform(2, 6)))
        pool_width = int(np.round(random.uniform(2, 6)))
        epochs = int(np.round(random.uniform(1, 25)))

    model = tf.keras.models.Sequential()
    model.add(
        tf.keras.layers.Conv2D(conv_units1, (kernel_width, kernel_width),
                               activation="relu",
                               padding="same",
                               input_shape=input_shape))
    model.add(
        tf.keras.layers.MaxPooling2D((pool_width, pool_width), padding="same"))
    model.add(
        tf.keras.layers.Conv2D(conv_units2, (kernel_width, kernel_width),
                               padding="same",
                               activation="relu"))
    model.add(
        tf.keras.layers.MaxPooling2D((pool_width, pool_width), padding="same"))
    model.add(
        tf.keras.layers.Conv2D(conv_units3, (kernel_width, kernel_width),
                               padding="same",
                               activation="relu"))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(dense_units1, activation="relu"))
    model.add(tf.keras.layers.Dense(dense_units2, activation="relu"))
    model.add(tf.keras.layers.Dense(output_size, activation="softmax"))

    model.compile(
        loss="sparse_categorical_crossentropy",
        optimizer=tf.keras.optimizers.Adam(),
        metrics=["accuracy"],
    )
    history = model.fit(ds_train,
                        epochs=epochs,
                        validation_data=ds_test,
                        verbose=0)

    print("[metric] conv_units1=" + str(conv_units1))
    print("[metric] conv_units2=" + str(conv_units2))
    print("[metric] conv_units3=" + str(conv_units3))
    print("[metric] dense_units1=" + str(dense_units1))
    print("[metric] dense_units2=" + str(dense_units2))
    print("[metric] kernel_width=" + str(kernel_width))
    print("[metric] pool_width=" + str(pool_width))
    print("[metric] epochs=" + str(epochs))
    print("[metric] val_accuracy=" + str(history.history["val_accuracy"][-1]))
    print(history.history)
示例#4
0
def main_fun(args, ctx):
    import numpy as np
    import tensorflow as tf
    import tensorflow_datasets as tfds
    from tensorflowonspark import TFNode

    tfds.disable_progress_bar()

    class StopFeedHook(tf.estimator.SessionRunHook):
        """SessionRunHook to terminate InputMode.SPARK RDD feeding if the training loop exits before the entire RDD is consumed."""
        def __init__(self, feed):
            self.feed = feed

        def end(self, session):
            self.feed.terminate()
            self.feed.next_batch(1)

    BUFFER_SIZE = args.buffer_size
    BATCH_SIZE = args.batch_size
    LEARNING_RATE = args.learning_rate

    tf_feed = TFNode.DataFeed(ctx.mgr)

    def rdd_generator():
        while not tf_feed.should_stop():
            batch = tf_feed.next_batch(1)
            if len(batch) > 0:
                example = batch[0]
                image = np.array(example[0]).astype(np.float32) / 255.0
                image = np.reshape(image, (28, 28, 1))
                label = np.array(example[1]).astype(np.float32)
                label = np.reshape(label, (1, ))
                yield (image, label)
            else:
                return

    def input_fn(mode, input_context=None):
        if mode == tf.estimator.ModeKeys.TRAIN:
            # Note: Spark is responsible for sharding/repeating/shuffling the data via RDD
            ds = tf.data.Dataset.from_generator(
                rdd_generator, (tf.float32, tf.float32),
                (tf.TensorShape([28, 28, 1]), tf.TensorShape([1])))
            return ds.batch(BATCH_SIZE)
        else:
            raise Exception("I'm evaluating: mode={}, input_context={}".format(
                mode, input_context))

            def scale(image, label):
                image = tf.cast(image, tf.float32) / 255.0
                return image, label

            mnist = tfds.load(name='mnist', with_info=True, as_supervised=True)
            ds = mnist['test']
            if input_context:
                ds = ds.shard(input_context.num_input_pipelines,
                              input_context.input_pipeline_id)
            return ds.map(scale).batch(BATCH_SIZE)

    def serving_input_receiver_fn():
        features = tf.compat.v1.placeholder(dtype=tf.float32,
                                            shape=[None, 28, 28, 1],
                                            name='features')
        receiver_tensors = {'features': features}
        return tf.estimator.export.ServingInputReceiver(
            receiver_tensors, receiver_tensors)

    def model_fn(features, labels, mode):
        model = tf.keras.Sequential([
            tf.keras.layers.Conv2D(32,
                                   3,
                                   activation='relu',
                                   input_shape=(28, 28, 1)),
            tf.keras.layers.MaxPooling2D(),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(10, activation='softmax')
        ])
        logits = model(features, training=False)

        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {'logits': logits}
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)

        optimizer = tf.compat.v1.train.GradientDescentOptimizer(
            learning_rate=LEARNING_RATE)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=True, reduction=tf.keras.losses.Reduction.NONE)(labels,
                                                                        logits)
        loss = tf.reduce_sum(input_tensor=loss) * (1. / BATCH_SIZE)
        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode, loss=loss)

        return tf.estimator.EstimatorSpec(
            mode=mode,
            loss=loss,
            train_op=optimizer.minimize(
                loss, tf.compat.v1.train.get_or_create_global_step()))

    strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
    config = tf.estimator.RunConfig(train_distribute=strategy,
                                    save_checkpoints_steps=100)

    classifier = tf.estimator.Estimator(model_fn=model_fn,
                                        model_dir=args.model_dir,
                                        config=config)

    # exporter = tf.estimator.FinalExporter("serving", serving_input_receiver_fn=serving_input_receiver_fn)

    # Note: MultiWorkerMirroredStrategy (CollectiveAllReduceStrategy) is synchronous,
    # so we need to ensure that all workers complete training before any of them run out of data from the RDD.
    # And given that Spark RDD partitions (and partition sizes) can be non-evenly divisible by num_workers,
    # we'll just stop training at 90% of the total expected number of steps.
    steps = 60000 * args.epochs / args.batch_size
    steps_per_worker = steps / ctx.num_workers
    max_steps_per_worker = steps_per_worker * 0.9

    tf.estimator.train_and_evaluate(
        classifier,
        train_spec=tf.estimator.TrainSpec(input_fn=input_fn,
                                          max_steps=max_steps_per_worker,
                                          hooks=[StopFeedHook(tf_feed)]),
        eval_spec=tf.estimator.EvalSpec(input_fn=input_fn)
        # eval_spec=tf.estimator.EvalSpec(input_fn=input_fn, exporters=exporter)
    )

    if ctx.job_name == 'chief':
        print("Exporting saved_model to {}".format(args.export_dir))
        classifier.export_saved_model(args.export_dir,
                                      serving_input_receiver_fn)
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt

tfds.disable_progress_bar()  # the training progress bar will not be shown
import numpy as np

print(tf.__version__)

(train_data, test_data), info = tfds.load(
    'imdb_reviews/subwords8k',  # pre-encoded with ~8k vocabulary
    split=(tfds.Split.TRAIN,
           tfds.Split.TEST),  # returns the train and test data sets as a
    as_supervised=True,  # returns the data as (examples, labels)
    with_info=True)  # returns the 'info' structure with the data set

encoder = info.features[
    'text'].encoder  # setting up the encoder 'SubwordTextEncoder'
print('Vocabulary size: ',
      encoder.vocab_size)  # prints out the size of the vocabulary

sample_string = 'Hello TensorFlow.'  # making a 'sample_string' variable

encoded_string = encoder.encode(
    sample_string)  # reversibly encodes any string passed into the encoder
print('Encoded string is ', encoded_string)
示例#6
0
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0], [
                tf.config.experimental.VirtualDeviceConfiguration(
                    memory_limit=8500)
            ])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Virtual devices must be set before GPUs have been initialized
        print(e)

tfds.disable_progress_bar()  # disable tqdm progress bar
AUTOTUNE = tf.data.experimental.AUTOTUNE

print("TensorFlow Version: ", tf.__version__)
print("Number of GPU available: ",
      len(tf.config.experimental.list_physical_devices("GPU")))


def read_and_label(file_path):
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    label = get_label(file_path)
    return img, label


def decode_img(img):
示例#7
0
def main_fun(args, ctx):
  import tensorflow_datasets as tfds
  import tensorflow as tf
  from tensorflowonspark import compat

  tfds.disable_progress_bar()

  strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()

  BUFFER_SIZE = args.buffer_size
  BATCH_SIZE = args.batch_size
  NUM_WORKERS = args.cluster_size

  # Scaling MNIST data from (0, 255] to (0., 1.]
  def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255
    return image, label

  datasets, info = tfds.load(name='mnist',
                             with_info=True,
                             as_supervised=True)

  train_datasets_unbatched = datasets['train'].repeat().map(scale).shuffle(BUFFER_SIZE)

  def build_and_compile_cnn_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    model.compile(
        loss=tf.keras.losses.sparse_categorical_crossentropy,
        optimizer=tf.keras.optimizers.SGD(learning_rate=0.001),
        metrics=['accuracy'])
    return model

  # single node
  # single_worker_model = build_and_compile_cnn_model()
  # single_worker_model.fit(x=train_datasets, epochs=3)

  # Here the batch size scales up by number of workers since
  # `tf.data.Dataset.batch` expects the global batch size. Previously we used 64,
  # and now this becomes 128.
  GLOBAL_BATCH_SIZE = BATCH_SIZE * NUM_WORKERS
  train_datasets = train_datasets_unbatched.batch(GLOBAL_BATCH_SIZE)

  # this fails
  # callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath=args.model_dir)]
  tf.io.gfile.makedirs(args.model_dir)
  filepath = args.model_dir + "/weights-{epoch:04d}"
  callbacks = [
    tf.keras.callbacks.ModelCheckpoint(filepath=filepath, verbose=1, save_weights_only=True),
    tf.keras.callbacks.TensorBoard(log_dir=args.model_dir)
  ]

  with strategy.scope():
    multi_worker_model = build_and_compile_cnn_model()
  multi_worker_model.fit(x=train_datasets, epochs=args.epochs, steps_per_epoch=args.steps_per_epoch, callbacks=callbacks)

  from tensorflow_estimator.python.estimator.export import export_lib
  export_dir = export_lib.get_timestamped_export_dir(args.export_dir)
  compat.export_saved_model(multi_worker_model, export_dir, ctx.job_name == 'chief')
import os
import tensorflow
import tensorflow_datasets

tensorflow_datasets.disable_progress_bar()

print(tensorflow.__version__)

datasets, info = tensorflow_datasets.load(name='mnist',
                                          with_info=True,
                                          as_supervised=True)

mnist_train, mnist_test = datasets['train'], datasets['test']

strategy = tensorflow.distribute.MirroredStrategy()

print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

# You can also do info.splits.total_num_examples to get the total number of examples in the dataset.

num_train_examples = info.splits['train'].num_examples
num_test_examples = info.splits['test'].num_examples

BUFFER_SIZE = 10000

BATCH_SIZE_PER_REPLICA = 64
BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync


def scale(image, label):
    image = tensorflow.cast(image, tensorflow.float32)
示例#9
0
def main():
    parser = CycleGANArgParser(is_demo=False)
    args = parser.parse_args()
    if not args.no_hpu:
        from habana_frameworks.tensorflow import load_habana_module
        load_habana_module()
        if args.habana_instance_norm:
            tfa.layers.InstanceNormalization = HabanaInstanceNormalization
        if args.data_type == 'bf16':
            tf.keras.mixed_precision.set_global_policy('mixed_bfloat16')
    if args.run_deterministic:
        tf.random.set_seed(12345)
    input_image_shape = (args.crop, args.crop, 3)
    input_transformation = TrasformInputs(orig_img_size=(
        args.resize, args.resize), input_img_size=(args.crop, args.crop))

    horovod = None
    if args.use_horovod:
        from TensorFlow.common.horovod_helpers import hvd as horovod
        horovod.init()
        if args.log_all_workers:
            args.logdir = os.path.join(args.logdir, f"worker_{horovod.rank()}")

    tfds.disable_progress_bar()
    # Load the horse-zebra dataset using tensorflow-datasets.
    if is_local_master(args.use_horovod, horovod):
        dataset, _ = tfds.load("cycle_gan/horse2zebra", data_dir=args.dataset_dir,
                               with_info=True, as_supervised=True, download=True)
        if args.use_horovod:
            horovod.broadcast(0, 0)  # nodes synchronization
    else:
        if args.use_horovod:
            horovod.broadcast(0, 0)
        dataset, _ = tfds.load(
            "cycle_gan/horse2zebra", data_dir=args.dataset_dir, with_info=True, as_supervised=True)

    train_horses, train_zebras = dataset["trainA"], dataset["trainB"]
    test_horses, test_zebras = dataset["testA"], dataset["testB"]

    # Apply the preprocessing operations to the training data
    train_horses = (
        train_horses.map(
            input_transformation.preprocess_train_image, num_parallel_calls=1 if args.run_deterministic else autotune)
        .cache()
        .shuffle(args.buffer)
        .batch(args.batch_size, drop_remainder=True)
    )
    train_zebras = (
        train_zebras.map(
            input_transformation.preprocess_train_image, num_parallel_calls=1 if args.run_deterministic else autotune)
        .cache()
        .shuffle(args.buffer)
        .batch(args.batch_size, drop_remainder=True)
    )
    train_ds = tf.data.Dataset.zip((train_horses, train_zebras))
    test_ds = test_horses, test_zebras

    disc_X = get_discriminator(input_image_shape, name="discriminator_X")
    disc_Y = get_discriminator(input_image_shape, name="discriminator_Y")
    gen_X = get_resnet_generator(input_image_shape, name="generator_X")
    gen_Y = get_resnet_generator(input_image_shape, name="generator_Y")

    # Create cycle gan model
    cycle_gan_model = CycleGan(
        generator_X=gen_X, generator_Y=gen_Y, discriminator_X=disc_X, discriminator_Y=disc_Y
    )

    latest = None
    if args.restore:
        print(f"Trying to restore checkpoint from {args.logdir}")
        latest = tf.train.latest_checkpoint(args.logdir)

    if args.train:
        train(args, cycle_gan_model, train_ds, test_ds, latest, horovod)

    if args.test and is_master(args.use_horovod, horovod):
        eval(args, cycle_gan_model, test_ds, input_transformation, latest)