def test_path_to_string(self): class PathLikeDummy(object): def __fspath__(self): return 'dummypath' dummy = object() # conversion of PathLike self.assertEqual(io_utils.path_to_string(Path('path')), 'path') self.assertEqual(io_utils.path_to_string(PathLikeDummy()), 'dummypath') # pass-through, works for all versions of python self.assertEqual(io_utils.path_to_string('path'), 'path') self.assertIs(io_utils.path_to_string(dummy), dummy)
def plot_model(model, to_file=None, display_image=True, **kwargs): """ Extend tf.keras.utils.plot_model Parameters: model: A Keras model instance to_file: File name of the plot image display_image: """ dot = model_to_dot(model, **kwargs) if to_file is not None: from tensorflow.python.keras.utils.io_utils import path_to_string import os to_file = path_to_string(to_file) _, extension = os.path.splitext(to_file) extension = extension[1:] dot.write(to_file, format=extension) if display_image: try: from IPython import display if to_file is None or extension == 'svg': return display.SVG(dot.create(prog='dot', format='svg')) else: return display.Image(filename=to_file) except ImportError: pass
def __init__(self, filepath, verbose=0, save_freq='epoch', **kwargs): super(OptimizerCheckpoint, self).__init__() self.verbose = verbose self.filepath = path_to_string(filepath) self.save_freq = save_freq self.epochs_since_last_save = 0 self._batches_seen_since_last_saving = 0 self._last_batch_seen = 0 self._current_epoch = 0 if 'load_weights_on_restart' in kwargs: self.load_weights_on_restart = kwargs['load_weights_on_restart'] logging.warning('`load_weights_on_restart` argument is deprecated. ' 'Please use `model.load_weights()` for loading weights ' 'before the start of `model.fit()`.') else: self.load_weights_on_restart = False if 'period' in kwargs: self.period = kwargs['period'] logging.warning('`period` argument is deprecated. Please use `save_freq` ' 'to specify the frequency in number of batches seen.') else: self.period = 1 if self.save_freq != 'epoch' and not isinstance(self.save_freq, int): raise ValueError('Unrecognized save_freq: {}'.format(self.save_freq)) # Only the chief worker writes model checkpoints, but all workers # restore checkpoint at on_train_begin(). self._chief_worker_only = False
def _extract_archive(file_path, path='.', archive_format='auto'): """Extracts an archive if it matches tar, tar.gz, tar.bz, or zip formats. Arguments: file_path: path to the archive file path: path to extract the archive file archive_format: Archive format to try for extracting the file. Options are 'auto', 'tar', 'zip', and None. 'tar' includes tar, tar.gz, and tar.bz files. The default 'auto' is ['tar', 'zip']. None or an empty list will return no matches found. Returns: True if a match was found and an archive extraction was completed, False otherwise. """ if archive_format is None: return False if archive_format == 'auto': archive_format = ['tar', 'zip'] if isinstance(archive_format, six.string_types): archive_format = [archive_format] file_path = path_to_string(file_path) path = path_to_string(path) for archive_type in archive_format: if archive_type == 'tar': open_fn = tarfile.open is_match_fn = tarfile.is_tarfile if archive_type == 'zip': open_fn = zipfile.ZipFile is_match_fn = zipfile.is_zipfile if is_match_fn(file_path): with open_fn(file_path) as archive: try: archive.extractall(path) except (tarfile.TarError, RuntimeError, KeyboardInterrupt): if os.path.exists(path): if os.path.isfile(path): os.remove(path) else: shutil.rmtree(path) raise return True return False
def test_path_to_string(self): class PathLikeDummy(object): def __fspath__(self): return 'dummypath' dummy = object() if sys.version_info >= (3, 4): from pathlib import Path # pylint:disable=g-import-not-at-top # conversion of PathLike self.assertEqual(io_utils.path_to_string(Path('path')), 'path') if sys.version_info >= (3, 6): self.assertEqual(io_utils.path_to_string(PathLikeDummy()), 'dummypath') # pass-through, works for all versions of python self.assertEqual(io_utils.path_to_string('path'), 'path') self.assertIs(io_utils.path_to_string(dummy), dummy)
def load_model(filepath, custom_objects=None, compile=True, options=None): # pylint: disable=redefined-builtin """Loads a model saved via `model.save()`. Usage: >>> model = tf.keras.Sequential([ ... tf.keras.layers.Dense(5, input_shape=(3,)), ... tf.keras.layers.Softmax()]) >>> model.save('/tmp/model') >>> loaded_model = tf.keras.models.load_model('/tmp/model') >>> x = tf.random.uniform((10, 3)) >>> assert np.allclose(model.predict(x), loaded_model.predict(x)) Note that the model weights may have different scoped names after being loaded. Scoped names include the model/layer names, such as `"dense_1/kernel:0"`. It is recommended that you use the layer properties to access specific variables, e.g. `model.get_layer("dense_1").kernel`. Args: filepath: One of the following: - String or `pathlib.Path` object, path to the saved model - `h5py.File` object from which to load the model custom_objects: Optional dictionary mapping names (strings) to custom classes or functions to be considered during deserialization. compile: Boolean, whether to compile the model after loading. options: Optional `tf.saved_model.LoadOptions` object that specifies options for loading from SavedModel. Returns: A Keras model instance. If the original model was compiled, and saved with the optimizer, then the returned model will be compiled. Otherwise, the model will be left uncompiled. In the case that an uncompiled model is returned, a warning is displayed if the `compile` argument is set to `True`. Raises: ImportError: if loading from an hdf5 file and h5py is not available. IOError: In case of an invalid savefile. """ with generic_utils.SharedObjectLoadingScope(): with generic_utils.CustomObjectScope(custom_objects or {}): with load_context.load_context(options): if (h5py is not None and (isinstance(filepath, h5py.File) or h5py.is_hdf5(filepath))): return hdf5_format.load_model_from_hdf5( filepath, custom_objects, compile) filepath = path_to_string(filepath) if isinstance(filepath, six.string_types): loader_impl.parse_saved_model(filepath) return saved_model_load.load(filepath, compile, options) raise IOError( 'Unable to load model. Filepath is not an hdf5 file (or h5py is not ' 'available) or SavedModel.')
def get_file(fname, origin, untar=False, md5_hash=None, file_hash=None, cache_subdir='datasets', hash_algorithm='auto', extract=False, archive_format='auto', cache_dir=None): """Downloads a file from a URL if it not already in the cache. By default the file at the url `origin` is downloaded to the cache_dir `~/.keras`, placed in the cache_subdir `datasets`, and given the filename `fname`. The final location of a file `example.txt` would therefore be `~/.keras/datasets/example.txt`. Files in tar, tar.gz, tar.bz, and zip formats can also be extracted. Passing a hash will verify the file after download. The command line programs `shasum` and `sha256sum` can compute the hash. Example: ```python path_to_downloaded_file = tf.keras.utils.get_file( "flower_photos", "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz", untar=True) ``` Arguments: fname: Name of the file. If an absolute path `/path/to/file.txt` is specified the file will be saved at that location. origin: Original URL of the file. untar: Deprecated in favor of `extract` argument. boolean, whether the file should be decompressed md5_hash: Deprecated in favor of `file_hash` argument. md5 hash of the file for verification file_hash: The expected hash string of the file after download. The sha256 and md5 hash algorithms are both supported. cache_subdir: Subdirectory under the Keras cache dir where the file is saved. If an absolute path `/path/to/folder` is specified the file will be saved at that location. hash_algorithm: Select the hash algorithm to verify the file. options are `'md5'`, `'sha256'`, and `'auto'`. The default 'auto' detects the hash algorithm in use. extract: True tries extracting the file as an Archive, like tar or zip. archive_format: Archive format to try for extracting the file. Options are `'auto'`, `'tar'`, `'zip'`, and `None`. `'tar'` includes tar, tar.gz, and tar.bz files. The default `'auto'` corresponds to `['tar', 'zip']`. None or an empty list will return no matches found. cache_dir: Location to store cached files, when None it defaults to the default directory `~/.keras/`. Returns: Path to the downloaded file """ if cache_dir is None: cache_dir = os.path.join(os.path.expanduser('~'), '.keras') if md5_hash is not None and file_hash is None: file_hash = md5_hash hash_algorithm = 'md5' datadir_base = os.path.expanduser(cache_dir) if not os.access(datadir_base, os.W_OK): datadir_base = os.path.join('/tmp', '.keras') datadir = os.path.join(datadir_base, cache_subdir) _makedirs_exist_ok(datadir) fname = path_to_string(fname) if untar: untar_fpath = os.path.join(datadir, fname) fpath = untar_fpath + '.tar.gz' else: fpath = os.path.join(datadir, fname) download = False if os.path.exists(fpath): # File found; verify integrity if a hash was provided. if file_hash is not None: if not validate_file(fpath, file_hash, algorithm=hash_algorithm): print('A local file was found, but it seems to be ' 'incomplete or outdated because the ' + hash_algorithm + ' file hash does not match the original value of ' + file_hash + ' so we will re-download the data.') download = True else: download = True if download: print('Downloading data from', origin) class ProgressTracker(object): # Maintain progbar for the lifetime of download. # This design was chosen for Python 2.7 compatibility. progbar = None def dl_progress(count, block_size, total_size): if ProgressTracker.progbar is None: if total_size == -1: total_size = None ProgressTracker.progbar = Progbar(total_size) else: ProgressTracker.progbar.update(count * block_size) error_msg = 'URL fetch failure on {}: {} -- {}' try: try: urlretrieve(origin, fpath, dl_progress) except HTTPError as e: raise Exception(error_msg.format(origin, e.code, e.msg)) except URLError as e: raise Exception(error_msg.format(origin, e.errno, e.reason)) except (Exception, KeyboardInterrupt) as e: if os.path.exists(fpath): os.remove(fpath) raise ProgressTracker.progbar = None if untar: if not os.path.exists(untar_fpath): _extract_archive(fpath, datadir, archive_format='tar') return untar_fpath if extract: _extract_archive(fpath, datadir, archive_format) return fpath
def plot_model( model, to_file="model.png", show_shapes=False, show_dtype=False, show_classes=False, show_layer_names=True, rankdir="TB", expand_nested=False, dpi=96, ): """Converts a Keras model to dot format and save to a file. Example: ```python input = tf.keras.Input(shape=(100,), dtype='int32', name='input') x = tf.keras.layers.Embedding( output_dim=512, input_dim=10000, input_length=100)(input) x = tf.keras.layers.LSTM(32)(x) x = tf.keras.layers.Dense(64, activation='relu')(x) x = tf.keras.layers.Dense(64, activation='relu')(x) x = tf.keras.layers.Dense(64, activation='relu')(x) output = tf.keras.layers.Dense(1, activation='sigmoid', name='output')(x) model = tf.keras.Model(inputs=[input], outputs=[output]) dot_img_file = '/tmp/model_1.png' tf.keras.utils.plot_model(model, to_file=dot_img_file, show_shapes=True) ``` Arguments: model: A Keras model instance to_file: File name of the plot image. show_shapes: whether to display shape information. show_dtype: whether to display layer dtypes. show_layer_names: whether to display layer names. rankdir: `rankdir` argument passed to PyDot, a string specifying the format of the plot: 'TB' creates a vertical plot; 'LR' creates a horizontal plot. expand_nested: Whether to expand nested models into clusters. dpi: Dots per inch. Returns: A Jupyter notebook Image object if Jupyter is installed. This enables in-line display of the model plots in notebooks. """ dot = model_to_dot( model, show_shapes=show_shapes, show_dtype=show_dtype, show_classes=show_classes, show_layer_names=show_layer_names, rankdir=rankdir, expand_nested=expand_nested, dpi=dpi, ) to_file = path_to_string(to_file) if dot is None: return _, extension = os.path.splitext(to_file) if not extension: extension = "png" else: extension = extension[1:] # Save image to disk. dot.write(to_file, format=extension) # Return the image as a Jupyter Image object, to be displayed in-line. # Note that we cannot easily detect whether the code is running in a # notebook, and thus we always return the Image if Jupyter is available. if extension != "pdf": try: from IPython import display return display.Image(filename=to_file) except ImportError: pass
def save_model(model, filepath, overwrite=True, include_optimizer=True, save_format=None, signatures=None, options=None): """Saves a model as a TensorFlow SavedModel or HDF5 file. Usage: >>> model = tf.keras.Sequential([ ... tf.keras.layers.Dense(5, input_shape=(3,)), ... tf.keras.layers.Softmax()]) >>> model.save('/tmp/model') >>> loaded_model = tf.keras.models.load_model('/tmp/model') >>> x = tf.random.uniform((10, 3)) >>> assert np.allclose(model.predict(x), loaded_model.predict(x)) The saved model contains: - the model's configuration (topology) - the model's weights - the model's optimizer's state (if any) Thus the saved model can be reinstantiated in the exact same state, without any of the code used for model definition or training. Note that the model weights may have different scoped names after being loaded. Scoped names include the model/layer names, such as `"dense_1/kernel:0"`. It is recommended that you use the layer properties to access specific variables, e.g. `model.get_layer("dense_1").kernel`. _SavedModel serialization_ The SavedModel serialization path uses `tf.saved_model.save` to save the model and all trackable objects attached to the model (e.g. layers and variables). `@tf.function`-decorated methods are also saved. Additional trackable objects and functions are added to the SavedModel to allow the model to be loaded back as a Keras Model object. Arguments: model: Keras model instance to be saved. filepath: One of the following: - String or `pathlib.Path` object, path where to save the model - `h5py.File` object where to save the model overwrite: Whether we should overwrite any existing model at the target location, or instead ask the user with a manual prompt. include_optimizer: If True, save optimizer's state together. save_format: Either 'tf' or 'h5', indicating whether to save the model to Tensorflow SavedModel or HDF5. Defaults to 'tf' in TF 2.X, and 'h5' in TF 1.X. signatures: Signatures to save with the SavedModel. Applicable to the 'tf' format only. Please see the `signatures` argument in `tf.saved_model.save` for details. options: Optional `tf.saved_model.SaveOptions` object that specifies options for saving to SavedModel. Raises: ImportError: If save format is hdf5, and h5py is not available. """ from tensorflow.python.keras.engine import sequential # pylint: disable=g-import-not-at-top default_format = 'tf' if tf2.enabled() else 'h5' save_format = save_format or default_format filepath = path_to_string(filepath) if (save_format == 'h5' or (h5py is not None and isinstance(filepath, h5py.File)) or os.path.splitext(filepath)[1] in _HDF5_EXTENSIONS): # TODO(b/130258301): add utility method for detecting model type. if (not model._is_graph_network and # pylint:disable=protected-access not isinstance(model, sequential.Sequential)): raise NotImplementedError( 'Saving the model to HDF5 format requires the model to be a ' 'Functional model or a Sequential model. It does not work for ' 'subclassed models, because such models are defined via the body of ' 'a Python method, which isn\'t safely serializable. Consider saving ' 'to the Tensorflow SavedModel format (by setting save_format="tf") ' 'or using `save_weights`.') hdf5_format.save_model_to_hdf5(model, filepath, overwrite, include_optimizer) else: saved_model_save.save(model, filepath, overwrite, include_optimizer, signatures, options)
def save_model(model, filepath, overwrite=True, include_optimizer=True, save_format=None, signatures=None, options=None, save_traces=True): # pylint: disable=line-too-long """Saves a model as a TensorFlow SavedModel or HDF5 file. See the [Serialization and Saving guide](https://keras.io/guides/serialization_and_saving/) for details. Usage: >>> model = tf.keras.Sequential([ ... tf.keras.layers.Dense(5, input_shape=(3,)), ... tf.keras.layers.Softmax()]) >>> model.save('/tmp/model') >>> loaded_model = tf.keras.models.load_model('/tmp/model') >>> x = tf.random.uniform((10, 3)) >>> assert np.allclose(model.predict(x), loaded_model.predict(x)) The SavedModel and HDF5 file contains: - the model's configuration (topology) - the model's weights - the model's optimizer's state (if any) Thus models can be reinstantiated in the exact same state, without any of the code used for model definition or training. Note that the model weights may have different scoped names after being loaded. Scoped names include the model/layer names, such as `"dense_1/kernel:0"`. It is recommended that you use the layer properties to access specific variables, e.g. `model.get_layer("dense_1").kernel`. __SavedModel serialization format__ Keras SavedModel uses `tf.saved_model.save` to save the model and all trackable objects attached to the model (e.g. layers and variables). The model config, weights, and optimizer are saved in the SavedModel. Additionally, for every Keras layer attached to the model, the SavedModel stores: * the config and metadata -- e.g. name, dtype, trainable status * traced call and loss functions, which are stored as TensorFlow subgraphs. The traced functions allow the SavedModel format to save and load custom layers without the original class definition. You can choose to not save the traced functions by disabling the `save_traces` option. This will decrease the time it takes to save the model and the amount of disk space occupied by the output SavedModel. If you enable this option, then you _must_ provide all custom class definitions when loading the model. See the `custom_objects` argument in `tf.keras.models.load_model`. Args: model: Keras model instance to be saved. filepath: One of the following: - String or `pathlib.Path` object, path where to save the model - `h5py.File` object where to save the model overwrite: Whether we should overwrite any existing model at the target location, or instead ask the user with a manual prompt. include_optimizer: If True, save optimizer's state together. save_format: Either 'tf' or 'h5', indicating whether to save the model to Tensorflow SavedModel or HDF5. Defaults to 'tf' in TF 2.X, and 'h5' in TF 1.X. signatures: Signatures to save with the SavedModel. Applicable to the 'tf' format only. Please see the `signatures` argument in `tf.saved_model.save` for details. options: (only applies to SavedModel format) `tf.saved_model.SaveOptions` object that specifies options for saving to SavedModel. save_traces: (only applies to SavedModel format) When enabled, the SavedModel will store the function traces for each layer. This can be disabled, so that only the configs of each layer are stored. Defaults to `True`. Disabling this will decrease serialization time and reduce file size, but it requires that all custom layers/models implement a `get_config()` method. Raises: ImportError: If save format is hdf5, and h5py is not available. """ # pylint: enable=line-too-long from tensorflow.python.keras.engine import sequential # pylint: disable=g-import-not-at-top default_format = 'tf' if tf2.enabled() else 'h5' save_format = save_format or default_format filepath = path_to_string(filepath) if (save_format == 'h5' or (h5py is not None and isinstance(filepath, h5py.File)) or saving_utils.is_hdf5_filepath(filepath)): # TODO(b/130258301): add utility method for detecting model type. if (not model._is_graph_network and # pylint:disable=protected-access not isinstance(model, sequential.Sequential)): raise NotImplementedError( 'Saving the model to HDF5 format requires the model to be a ' 'Functional model or a Sequential model. It does not work for ' 'subclassed models, because such models are defined via the body of ' 'a Python method, which isn\'t safely serializable. Consider saving ' 'to the Tensorflow SavedModel format (by setting save_format="tf") ' 'or using `save_weights`.') hdf5_format.save_model_to_hdf5(model, filepath, overwrite, include_optimizer) else: saved_model_save.save(model, filepath, overwrite, include_optimizer, signatures, options, save_traces)
def __init__(self, filepath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', save_freq='epoch', options=None, **kwargs): super(ModelCheckpoint, self).__init__() self._supports_tf_logs = True self.monitor = monitor self.verbose = verbose self.filepath = path_to_string(filepath) self.save_best_only = save_best_only self.save_weights_only = save_weights_only self.save_freq = save_freq self.epochs_since_last_save = 0 self._batches_seen_since_last_saving = 0 self._last_batch_seen = 0 if save_weights_only: if options is None or isinstance( options, checkpoint_options_lib.CheckpointOptions): self._options = options or checkpoint_options_lib.CheckpointOptions() else: raise TypeError('If save_weights_only is True, then `options` must be' 'either None or a tf.train.CheckpointOptions') else: if options is None or isinstance(options, save_options_lib.SaveOptions): self._options = options or save_options_lib.SaveOptions() else: raise TypeError('If save_weights_only is False, then `options` must be' 'either None or a tf.saved_model.SaveOptions') # Deprecated field `load_weights_on_restart` is for loading the checkpoint # file from `filepath` at the start of `model.fit()` # TODO(rchao): Remove the arg during next breaking release. if 'load_weights_on_restart' in kwargs: self.load_weights_on_restart = kwargs['load_weights_on_restart'] logging.warning('`load_weights_on_restart` argument is deprecated. ' 'Please use `model.load_weights()` for loading weights ' 'before the start of `model.fit()`.') else: self.load_weights_on_restart = False # Deprecated field `period` is for the number of epochs between which # the model is saved. if 'period' in kwargs: self.period = kwargs['period'] logging.warning('`period` argument is deprecated. Please use `save_freq` ' 'to specify the frequency in number of batches seen.') else: self.period = 1 if mode not in ['auto', 'min', 'max']: logging.warning('ModelCheckpoint mode %s is unknown, ' 'fallback to auto mode.', mode) mode = 'auto' if mode == 'min': self.monitor_op = np.less self.best = np.Inf elif mode == 'max': self.monitor_op = np.greater self.best = -np.Inf else: if 'acc' in self.monitor or self.monitor.startswith('fmeasure'): self.monitor_op = np.greater self.best = -np.Inf else: self.monitor_op = np.less self.best = np.Inf if self.save_freq != 'epoch' and not isinstance(self.save_freq, int): raise ValueError('Unrecognized save_freq: {}'.format(self.save_freq)) # Only the chief worker writes model checkpoints, but all workers # restore checkpoint at on_train_begin(). self._chief_worker_only = False
def get_file_from_google(fname, file_id, untar=False, md5_hash=None, file_hash=None, cache_subdir="datasets", hash_algorithm="auto", extract=False, archive_format="auto", cache_dir=None): if cache_dir is None: cache_dir = os.path.join(os.path.expanduser("~"), ".keras") if md5_hash is not None and file_hash is None: file_hash = md5_hash hash_algorithm = "md5" datadir_base = os.path.expanduser(cache_dir) if not os.access(datadir_base, os.W_OK): datadir_base = os.path.join("/tmp", ".keras") datadir = os.path.join(datadir_base, cache_subdir) os.makedirs(datadir, exist_ok=True) fname = path_to_string(fname) if untar: untar_fpath = os.path.join(datadir, fname) fpath = untar_fpath + ".tar.gz" else: fpath = os.path.join(datadir, fname) download = False if os.path.exists(fpath): # File found; verify integrity if a hash was provided. if file_hash is not None: if not _validate_file(fpath, file_hash, algorithm=hash_algorithm): print("A local file was found, but it seems to be " "incomplete or outdated because the " + hash_algorithm + " file hash does not match the original value of " + file_hash + " so we will re-download the data.") download = True else: download = True if download: print("Downloading data from Google Drive...") error_msg = "Failed on https://drive.google.com/file/d/{}: {} -- {}" try: try: url = "https://docs.google.com/uc?export=download" session = requests.Session() response = session.get(url, params={"id": file_id}, stream=True) token = _get_confirm_token(response) if token: params = {"id": file_id, "confirm": token} response = session.get(url, params=params, stream=True) _save_response_content(response, fpath) except HTTPError as e: raise Exception(error_msg.format(file_id, e.code, e.msg)) except URLError as e: raise Exception(error_msg.format(file_id, e.errno, e.reason)) except (Exception, KeyboardInterrupt) as e: if os.path.exists(fpath): os.remove(fpath) raise e if untar: if not os.path.exists(untar_fpath): _extract_archive(fpath, datadir, archive_format="tar") return untar_fpath if extract: _extract_archive(fpath, datadir, archive_format) return fpath