# -*- coding:utf-8 -*- # @version: 1.0 # @author: wuxikun # @date: '2020/4/3' import tensorflow as tf from tensorflow import keras import tensorflow_datasets as tfds tfds.disable_progress_bar() import numpy as np print(tf.__version__) (train_data, test_data), info = tfds.load( # Use the version pre-encoded with an ~8k vocabulary. 'imdb_reviews/subwords8k', # Return the train/test datasets as a tuple. split=(tfds.Split.TRAIN, tfds.Split.TEST), # Return (example, label) pairs from the dataset (instead of a dictionary). as_supervised=True, # Also return the `info` structure. with_info=True) encoder = info.features['text'].encoder print('Vocabulary size: {}'.format(encoder.vocab_size))
def main(_): if FLAGS.module_import: import_modules(FLAGS.module_import) if FLAGS.debug_start: pdb.set_trace() if FLAGS.sleep_start: time.sleep(60 * 60 * 3) if FLAGS.disable_tqdm: logging.info("Disabling tqdm.") tfds.disable_progress_bar() if FLAGS.checksums_dir: tfds.download.add_checksums_dir(FLAGS.checksums_dir) datasets_to_build = set(FLAGS.datasets and FLAGS.datasets.split(",") or tfds.list_builders()) datasets_to_build -= set(FLAGS.exclude_datasets.split(",")) version = "experimental_latest" if FLAGS.experimental_latest_version else None logging.info("Running download_and_prepare for datasets:\n%s", "\n".join(datasets_to_build)) logging.info('Version: "%s"', version) builders = { name: tfds.builder(name, data_dir=FLAGS.data_dir, version=version) for name in datasets_to_build } if FLAGS.builder_config_id is not None: # Requesting a single config of a single dataset if len(builders) > 1: raise ValueError( "--builder_config_id can only be used when building a single dataset" ) builder = builders[list(builders.keys())[0]] if not builder.BUILDER_CONFIGS: raise ValueError( "--builder_config_id can only be used with datasets with configs" ) config = builder.BUILDER_CONFIGS[FLAGS.builder_config_id] logging.info("Running download_and_prepare for config: %s", config.name) builder_for_config = tfds.builder(builder.name, data_dir=FLAGS.data_dir, config=config, version=version) download_and_prepare(builder_for_config) else: for name, builder in builders.items(): if builder.BUILDER_CONFIGS and "/" not in name: # If builder has multiple configs, and no particular config was # requested, then compute all. for config in builder.BUILDER_CONFIGS: builder_for_config = tfds.builder(builder.name, data_dir=FLAGS.data_dir, config=config, version=version) download_and_prepare(builder_for_config) else: # If there is a slash in the name, then user requested a specific # dataset configuration. download_and_prepare(builder)
def main(_): tfds.disable_progress_bar() if _DATASET == "mnist" or _DATASET == "fashion_mnist": input_shape = (28, 28, 1) output_size = 10 elif _DATASET == "cifar10" or _DATASET == "svhn_cropped": input_shape = (32, 32, 3) output_size = 10 elif _DATASET == "cifar100": input_shape = (32, 32, 3) output_size = 100 if _HPARAMS: ds_train, ds_test = tfds.load(_DATASET, split=["train", "test"], shuffle_files=True, as_supervised=True) else: ds_train, ds_test = tfds.load( _DATASET, split=["train[0%:90%]", "train[90%:100%]"], shuffle_files=True, as_supervised=True) ds_train = ds_train.map(normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE) ds_train = ds_train.cache() ds_train = ds_train.batch(128) ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE) ds_test = ds_test.map(normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE) ds_test = ds_test.batch(128) ds_test = ds_test.cache() ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE) if _HPARAMS: hparams = _HPARAMS.split(";") conv_units1 = int(hparams[0]) conv_units2 = int(hparams[1]) conv_units3 = int(hparams[2]) dense_units1 = int(hparams[3]) dense_units2 = int(hparams[4]) kernel_width = int(hparams[5]) pool_width = int(hparams[6]) epochs = int(hparams[7]) else: conv_units1 = int(np.round(random.uniform(8, 512))) conv_units2 = int(np.round(random.uniform(8, 512))) conv_units3 = int(np.round(random.uniform(8, 512))) dense_units1 = int(np.round(random.uniform(8, 512))) dense_units2 = int(np.round(random.uniform(8, 512))) kernel_width = int(np.round(random.uniform(2, 6))) pool_width = int(np.round(random.uniform(2, 6))) epochs = int(np.round(random.uniform(1, 25))) model = tf.keras.models.Sequential() model.add( tf.keras.layers.Conv2D(conv_units1, (kernel_width, kernel_width), activation="relu", padding="same", input_shape=input_shape)) model.add( tf.keras.layers.MaxPooling2D((pool_width, pool_width), padding="same")) model.add( tf.keras.layers.Conv2D(conv_units2, (kernel_width, kernel_width), padding="same", activation="relu")) model.add( tf.keras.layers.MaxPooling2D((pool_width, pool_width), padding="same")) model.add( tf.keras.layers.Conv2D(conv_units3, (kernel_width, kernel_width), padding="same", activation="relu")) model.add(tf.keras.layers.Flatten()) model.add(tf.keras.layers.Dense(dense_units1, activation="relu")) model.add(tf.keras.layers.Dense(dense_units2, activation="relu")) model.add(tf.keras.layers.Dense(output_size, activation="softmax")) model.compile( loss="sparse_categorical_crossentropy", optimizer=tf.keras.optimizers.Adam(), metrics=["accuracy"], ) history = model.fit(ds_train, epochs=epochs, validation_data=ds_test, verbose=0) print("[metric] conv_units1=" + str(conv_units1)) print("[metric] conv_units2=" + str(conv_units2)) print("[metric] conv_units3=" + str(conv_units3)) print("[metric] dense_units1=" + str(dense_units1)) print("[metric] dense_units2=" + str(dense_units2)) print("[metric] kernel_width=" + str(kernel_width)) print("[metric] pool_width=" + str(pool_width)) print("[metric] epochs=" + str(epochs)) print("[metric] val_accuracy=" + str(history.history["val_accuracy"][-1])) print(history.history)
def main_fun(args, ctx): import numpy as np import tensorflow as tf import tensorflow_datasets as tfds from tensorflowonspark import TFNode tfds.disable_progress_bar() class StopFeedHook(tf.estimator.SessionRunHook): """SessionRunHook to terminate InputMode.SPARK RDD feeding if the training loop exits before the entire RDD is consumed.""" def __init__(self, feed): self.feed = feed def end(self, session): self.feed.terminate() self.feed.next_batch(1) BUFFER_SIZE = args.buffer_size BATCH_SIZE = args.batch_size LEARNING_RATE = args.learning_rate tf_feed = TFNode.DataFeed(ctx.mgr) def rdd_generator(): while not tf_feed.should_stop(): batch = tf_feed.next_batch(1) if len(batch) > 0: example = batch[0] image = np.array(example[0]).astype(np.float32) / 255.0 image = np.reshape(image, (28, 28, 1)) label = np.array(example[1]).astype(np.float32) label = np.reshape(label, (1, )) yield (image, label) else: return def input_fn(mode, input_context=None): if mode == tf.estimator.ModeKeys.TRAIN: # Note: Spark is responsible for sharding/repeating/shuffling the data via RDD ds = tf.data.Dataset.from_generator( rdd_generator, (tf.float32, tf.float32), (tf.TensorShape([28, 28, 1]), tf.TensorShape([1]))) return ds.batch(BATCH_SIZE) else: raise Exception("I'm evaluating: mode={}, input_context={}".format( mode, input_context)) def scale(image, label): image = tf.cast(image, tf.float32) / 255.0 return image, label mnist = tfds.load(name='mnist', with_info=True, as_supervised=True) ds = mnist['test'] if input_context: ds = ds.shard(input_context.num_input_pipelines, input_context.input_pipeline_id) return ds.map(scale).batch(BATCH_SIZE) def serving_input_receiver_fn(): features = tf.compat.v1.placeholder(dtype=tf.float32, shape=[None, 28, 28, 1], name='features') receiver_tensors = {'features': features} return tf.estimator.export.ServingInputReceiver( receiver_tensors, receiver_tensors) def model_fn(features, labels, mode): model = tf.keras.Sequential([ tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)), tf.keras.layers.MaxPooling2D(), tf.keras.layers.Flatten(), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(10, activation='softmax') ]) logits = model(features, training=False) if mode == tf.estimator.ModeKeys.PREDICT: predictions = {'logits': logits} return tf.estimator.EstimatorSpec(mode, predictions=predictions) optimizer = tf.compat.v1.train.GradientDescentOptimizer( learning_rate=LEARNING_RATE) loss = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True, reduction=tf.keras.losses.Reduction.NONE)(labels, logits) loss = tf.reduce_sum(input_tensor=loss) * (1. / BATCH_SIZE) if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec(mode, loss=loss) return tf.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=optimizer.minimize( loss, tf.compat.v1.train.get_or_create_global_step())) strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() config = tf.estimator.RunConfig(train_distribute=strategy, save_checkpoints_steps=100) classifier = tf.estimator.Estimator(model_fn=model_fn, model_dir=args.model_dir, config=config) # exporter = tf.estimator.FinalExporter("serving", serving_input_receiver_fn=serving_input_receiver_fn) # Note: MultiWorkerMirroredStrategy (CollectiveAllReduceStrategy) is synchronous, # so we need to ensure that all workers complete training before any of them run out of data from the RDD. # And given that Spark RDD partitions (and partition sizes) can be non-evenly divisible by num_workers, # we'll just stop training at 90% of the total expected number of steps. steps = 60000 * args.epochs / args.batch_size steps_per_worker = steps / ctx.num_workers max_steps_per_worker = steps_per_worker * 0.9 tf.estimator.train_and_evaluate( classifier, train_spec=tf.estimator.TrainSpec(input_fn=input_fn, max_steps=max_steps_per_worker, hooks=[StopFeedHook(tf_feed)]), eval_spec=tf.estimator.EvalSpec(input_fn=input_fn) # eval_spec=tf.estimator.EvalSpec(input_fn=input_fn, exporters=exporter) ) if ctx.job_name == 'chief': print("Exporting saved_model to {}".format(args.export_dir)) classifier.export_saved_model(args.export_dir, serving_input_receiver_fn)
import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import tensorflow as tf from tensorflow import keras import tensorflow_datasets as tfds import matplotlib.pyplot as plt tfds.disable_progress_bar() # the training progress bar will not be shown import numpy as np print(tf.__version__) (train_data, test_data), info = tfds.load( 'imdb_reviews/subwords8k', # pre-encoded with ~8k vocabulary split=(tfds.Split.TRAIN, tfds.Split.TEST), # returns the train and test data sets as a as_supervised=True, # returns the data as (examples, labels) with_info=True) # returns the 'info' structure with the data set encoder = info.features[ 'text'].encoder # setting up the encoder 'SubwordTextEncoder' print('Vocabulary size: ', encoder.vocab_size) # prints out the size of the vocabulary sample_string = 'Hello TensorFlow.' # making a 'sample_string' variable encoded_string = encoder.encode( sample_string) # reversibly encodes any string passed into the encoder print('Encoded string is ', encoded_string)
gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: # Restrict TensorFlow to only allocate 1GB of memory on the first GPU try: tf.config.experimental.set_virtual_device_configuration( gpus[0], [ tf.config.experimental.VirtualDeviceConfiguration( memory_limit=8500) ]) logical_gpus = tf.config.experimental.list_logical_devices('GPU') print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") except RuntimeError as e: # Virtual devices must be set before GPUs have been initialized print(e) tfds.disable_progress_bar() # disable tqdm progress bar AUTOTUNE = tf.data.experimental.AUTOTUNE print("TensorFlow Version: ", tf.__version__) print("Number of GPU available: ", len(tf.config.experimental.list_physical_devices("GPU"))) def read_and_label(file_path): img = tf.io.read_file(file_path) img = decode_img(img) label = get_label(file_path) return img, label def decode_img(img):
def main_fun(args, ctx): import tensorflow_datasets as tfds import tensorflow as tf from tensorflowonspark import compat tfds.disable_progress_bar() strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() BUFFER_SIZE = args.buffer_size BATCH_SIZE = args.batch_size NUM_WORKERS = args.cluster_size # Scaling MNIST data from (0, 255] to (0., 1.] def scale(image, label): image = tf.cast(image, tf.float32) image /= 255 return image, label datasets, info = tfds.load(name='mnist', with_info=True, as_supervised=True) train_datasets_unbatched = datasets['train'].repeat().map(scale).shuffle(BUFFER_SIZE) def build_and_compile_cnn_model(): model = tf.keras.Sequential([ tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)), tf.keras.layers.MaxPooling2D(), tf.keras.layers.Flatten(), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(10, activation='softmax') ]) model.compile( loss=tf.keras.losses.sparse_categorical_crossentropy, optimizer=tf.keras.optimizers.SGD(learning_rate=0.001), metrics=['accuracy']) return model # single node # single_worker_model = build_and_compile_cnn_model() # single_worker_model.fit(x=train_datasets, epochs=3) # Here the batch size scales up by number of workers since # `tf.data.Dataset.batch` expects the global batch size. Previously we used 64, # and now this becomes 128. GLOBAL_BATCH_SIZE = BATCH_SIZE * NUM_WORKERS train_datasets = train_datasets_unbatched.batch(GLOBAL_BATCH_SIZE) # this fails # callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath=args.model_dir)] tf.io.gfile.makedirs(args.model_dir) filepath = args.model_dir + "/weights-{epoch:04d}" callbacks = [ tf.keras.callbacks.ModelCheckpoint(filepath=filepath, verbose=1, save_weights_only=True), tf.keras.callbacks.TensorBoard(log_dir=args.model_dir) ] with strategy.scope(): multi_worker_model = build_and_compile_cnn_model() multi_worker_model.fit(x=train_datasets, epochs=args.epochs, steps_per_epoch=args.steps_per_epoch, callbacks=callbacks) from tensorflow_estimator.python.estimator.export import export_lib export_dir = export_lib.get_timestamped_export_dir(args.export_dir) compat.export_saved_model(multi_worker_model, export_dir, ctx.job_name == 'chief')
import os import tensorflow import tensorflow_datasets tensorflow_datasets.disable_progress_bar() print(tensorflow.__version__) datasets, info = tensorflow_datasets.load(name='mnist', with_info=True, as_supervised=True) mnist_train, mnist_test = datasets['train'], datasets['test'] strategy = tensorflow.distribute.MirroredStrategy() print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) # You can also do info.splits.total_num_examples to get the total number of examples in the dataset. num_train_examples = info.splits['train'].num_examples num_test_examples = info.splits['test'].num_examples BUFFER_SIZE = 10000 BATCH_SIZE_PER_REPLICA = 64 BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync def scale(image, label): image = tensorflow.cast(image, tensorflow.float32)
def main(): parser = CycleGANArgParser(is_demo=False) args = parser.parse_args() if not args.no_hpu: from habana_frameworks.tensorflow import load_habana_module load_habana_module() if args.habana_instance_norm: tfa.layers.InstanceNormalization = HabanaInstanceNormalization if args.data_type == 'bf16': tf.keras.mixed_precision.set_global_policy('mixed_bfloat16') if args.run_deterministic: tf.random.set_seed(12345) input_image_shape = (args.crop, args.crop, 3) input_transformation = TrasformInputs(orig_img_size=( args.resize, args.resize), input_img_size=(args.crop, args.crop)) horovod = None if args.use_horovod: from TensorFlow.common.horovod_helpers import hvd as horovod horovod.init() if args.log_all_workers: args.logdir = os.path.join(args.logdir, f"worker_{horovod.rank()}") tfds.disable_progress_bar() # Load the horse-zebra dataset using tensorflow-datasets. if is_local_master(args.use_horovod, horovod): dataset, _ = tfds.load("cycle_gan/horse2zebra", data_dir=args.dataset_dir, with_info=True, as_supervised=True, download=True) if args.use_horovod: horovod.broadcast(0, 0) # nodes synchronization else: if args.use_horovod: horovod.broadcast(0, 0) dataset, _ = tfds.load( "cycle_gan/horse2zebra", data_dir=args.dataset_dir, with_info=True, as_supervised=True) train_horses, train_zebras = dataset["trainA"], dataset["trainB"] test_horses, test_zebras = dataset["testA"], dataset["testB"] # Apply the preprocessing operations to the training data train_horses = ( train_horses.map( input_transformation.preprocess_train_image, num_parallel_calls=1 if args.run_deterministic else autotune) .cache() .shuffle(args.buffer) .batch(args.batch_size, drop_remainder=True) ) train_zebras = ( train_zebras.map( input_transformation.preprocess_train_image, num_parallel_calls=1 if args.run_deterministic else autotune) .cache() .shuffle(args.buffer) .batch(args.batch_size, drop_remainder=True) ) train_ds = tf.data.Dataset.zip((train_horses, train_zebras)) test_ds = test_horses, test_zebras disc_X = get_discriminator(input_image_shape, name="discriminator_X") disc_Y = get_discriminator(input_image_shape, name="discriminator_Y") gen_X = get_resnet_generator(input_image_shape, name="generator_X") gen_Y = get_resnet_generator(input_image_shape, name="generator_Y") # Create cycle gan model cycle_gan_model = CycleGan( generator_X=gen_X, generator_Y=gen_Y, discriminator_X=disc_X, discriminator_Y=disc_Y ) latest = None if args.restore: print(f"Trying to restore checkpoint from {args.logdir}") latest = tf.train.latest_checkpoint(args.logdir) if args.train: train(args, cycle_gan_model, train_ds, test_ds, latest, horovod) if args.test and is_master(args.use_horovod, horovod): eval(args, cycle_gan_model, test_ds, input_transformation, latest)