Пример #1
0
    def test_training_with_ndarry_distributed(self):
        keras_model = self.create_model()
        model = KerasModel(keras_model)

        x, y = self.create_training_data()

        model.fit(x, y, batch_size=4, distributed=True)
Пример #2
0
    def test_training_and_validation_with_dataset(self):
        keras_model = self.create_model()
        model = KerasModel(keras_model)

        dataset = self.create_training_dataset()

        model.fit(dataset)
Пример #3
0
    def test_tfdataset_with_tf_data_dataset_which_requires_table(self):

        keys = [1, 0, -1]
        dataset = tf.data.Dataset.from_tensor_slices([1, 2, -1, 5] * 40)
        table = tf.contrib.lookup.HashTable(
            initializer=tf.contrib.lookup.KeyValueTensorInitializer(
                keys=keys, values=list(reversed(keys))),
            default_value=100)
        dataset = dataset.map(table.lookup)

        def transform(x):
            float_x = tf.to_float(x)
            return float_x, 1

        dataset = dataset.map(transform)
        dataset = TFDataset.from_tf_data_dataset(dataset, batch_size=16)
        seq = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=()),
            tf.keras.layers.Dense(10, activation="softmax")
        ])
        seq.compile(optimizer=tf.keras.optimizers.RMSprop(),
                    loss='sparse_categorical_crossentropy',
                    metrics=['accuracy'])
        model = KerasModel(seq)
        model.fit(dataset)
Пример #4
0
    def test_training_with_ndarray(self):

        keras_model = self.create_model()
        model = KerasModel(keras_model)

        x, y = self.create_training_data()

        model.fit(x, y, batch_size=2)
Пример #5
0
    def test_training_with_validation_data_distributed(self):

        keras_model = self.create_model()
        model = KerasModel(keras_model)

        x, y = self.create_training_data()

        val_x, val_y = self.create_training_data()

        model.fit(x, y, validation_data=(val_x, val_y), batch_size=4, distributed=True)
Пример #6
0
    def test_training_with_validation_data_distributed_multi_heads(self):

        keras_model = self.create_multi_input_output_model()
        model = KerasModel(keras_model)

        x, y = self.create_training_data()

        val_x, val_y = self.create_training_data()
        model.fit([x, x], [y, y],
                  validation_data=([val_x, val_x], [val_y, val_y]),
                  batch_size=4,
                  distributed=True)
Пример #7
0
    def test_predict_with_ndarray_distributed(self):

        keras_model = self.create_model()
        model = KerasModel(keras_model)

        x, y = self.create_training_data()

        results_pre = model.evaluate(x, y)

        pred_y = np.argmax(model.predict(x, distributed=True), axis=1)
        acc = np.average((pred_y == y))
        print(results_pre)
        assert np.square(acc - results_pre["acc"]) < 0.000001
Пример #8
0
    def test_evaluate_and_distributed_evaluate(self):

        keras_model = self.create_model()
        model = KerasModel(keras_model)

        x, y = self.create_training_data()

        results_pre = model.evaluate(x, y)

        results_after = model.evaluate(x, y, distributed=True)

        assert np.square(results_pre["acc"] - results_after["acc Top1Accuracy"]) < 0.000001
        assert np.square(results_pre["loss"] - results_after["loss"]) < 0.000001
Пример #9
0
    def test_predict_with_dataset(self):

        keras_model = self.create_model()
        model = KerasModel(keras_model)

        x, y = self.create_training_data()
        results_pre = model.evaluate(x, y)

        pred_y = np.argmax(np.array(model.predict(
            self.create_predict_dataset()).collect()), axis=1)

        acc = np.average((pred_y == y))

        assert np.square(acc - results_pre["acc"]) < 0.000001
Пример #10
0
    def test_evaluate_with_dataset(self):

        keras_model = self.create_model()
        model = KerasModel(keras_model)

        x, y = self.create_training_data()

        results_pre = model.evaluate(x, y)

        dataset = self.create_evaluation_dataset()

        results_after = model.evaluate(dataset)

        assert np.square(results_pre["acc"] - results_after["acc Top1Accuracy"]) < 0.000001
        assert np.square(results_pre["loss"] - results_after["loss"]) < 0.000001
Пример #11
0
 def __init__(self, keras_model, metrics, model_dir, optimizer):
     self.model = KerasModel(keras_model, model_dir)
     self.load_checkpoint = False
     self.metrics = metrics
     self.tf_optimizer = None
     self.optimizer = optimizer
     from bigdl.orca.learn.optimizers import Optimizer
     if self.optimizer is not None and isinstance(self.optimizer,
                                                  Optimizer):
         self.optimizer = self.optimizer.get_optimizer()
     self.log_dir = None
     self.app_name = None
     self.clip_norm = None
     self.clip_min = None
     self.clip_max = None
Пример #12
0
    def load(self, model_path):
        """
        Load existing keras model

        :param model_path: Path to the existing keras model.
        :return:
        """
        self.model = KerasModel.load_model(model_path)
Пример #13
0
    def test_tensorflow_optimizer(self):
        data = tf.keras.layers.Input(shape=[10])

        x = tf.keras.layers.Flatten()(data)
        x = tf.keras.layers.Dense(10, activation='relu')(x)
        predictions = tf.keras.layers.Dense(2, activation='softmax')(x)

        model = tf.keras.models.Model(inputs=data, outputs=predictions)
        model.compile(optimizer=tf.train.AdamOptimizer(),
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

        keras_model = KerasModel(model)

        x, y = self.create_training_data()

        keras_model.fit(x, y, batch_size=4, distributed=True)
Пример #14
0
    def test_dataset_without_batch(self):
        x = np.random.rand(20, 10)
        y = np.random.randint(0, 2, (20))

        rdd_x = self.sc.parallelize(x)
        rdd_y = self.sc.parallelize(y)

        rdd = rdd_x.zip(rdd_y)

        dataset = TFDataset.from_rdd(rdd,
                                     features=(tf.float32, [10]),
                                     labels=(tf.int32, []),
                                     names=["features", "labels"],
                                     val_rdd=rdd)

        keras_model = self.create_model()
        model = KerasModel(keras_model)
        self.intercept(
            lambda: model.fit(dataset), "The batch_size of TFDataset must be" +
            " specified when used in KerasModel fit.")

        dataset = TFDataset.from_rdd(
            rdd,
            features=(tf.float32, [10]),
            labels=(tf.int32, []),
            names=["features", "labels"],
        )
        self.intercept(
            lambda: model.evaluate(dataset),
            "The batch_per_thread of TFDataset must be " +
            "specified when used in KerasModel evaluate.")

        dataset = TFDataset.from_rdd(
            rdd_x,
            features=(tf.float32, [10]),
            names=["features", "labels"],
        )
        self.intercept(
            lambda: model.predict(dataset),
            "The batch_per_thread of TFDataset must be" +
            " specified when used in KerasModel predict.")
Пример #15
0
    def create_image_model(self):

        data = tf.keras.layers.Input(shape=[224, 224, 3])
        x = tf.keras.layers.Flatten()(data)
        predictions = tf.keras.layers.Dense(10, activation='softmax')(x)

        model = tf.keras.models.Model(inputs=data, outputs=predictions)
        model.compile(optimizer='rmsprop',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

        return KerasModel(model)
Пример #16
0
    def check_dataset(self, create_ds):

        seq = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=(20, )),
            tf.keras.layers.Dense(10, activation="softmax")
        ])

        seq.compile(optimizer=tf.keras.optimizers.RMSprop(),
                    loss='sparse_categorical_crossentropy',
                    metrics=['accuracy'])
        model = KerasModel(seq)

        model.fit(create_ds("train"))
        model.predict(create_ds("predict")).collect()
        model.evaluate(create_ds("evaluate"))
Пример #17
0
    def test_evaluate_with_ndarray(self):

        keras_model = self.create_model()
        model = KerasModel(keras_model)

        x, y = self.create_training_data()

        results_pre = model.evaluate(x, y)

        model.fit(x, y, batch_size=4, epochs=10)

        results_after = model.evaluate(x, y)

        assert results_pre["loss"] > results_after["loss"]
Пример #18
0
    def test_evaluate_with_ndarray_distributed(self):

        keras_model = self.create_model()
        model = KerasModel(keras_model)

        x, y = self.create_training_data()

        results_pre = model.evaluate(x, y, batch_per_thread=1)

        model.fit(x, y, batch_size=4, epochs=10)

        results_after = model.evaluate(x, y, distributed=True, batch_per_thread=1)

        assert results_pre["loss"] > results_after["loss"]
Пример #19
0
    def test_invalid_data_handling(self):
        keras_model = self.create_multi_input_output_model()
        model = KerasModel(keras_model)
        x, y = self.create_training_data()
        val_x, val_y = self.create_training_data()

        # Number doesn't match
        with pytest.raises(AssertionError) as excinfo:
            model.fit([x, x], [y, y, y], batch_size=4, distributed=True)

        assert "model_target number does not match data number" in str(
            excinfo.value)

        # Dict as input
        with pytest.raises(AssertionError) as excinfo:
            model.fit({"input_1": x}, [y, y], batch_size=4, distributed=True)

        assert "all model_input names should exist in data" in str(
            excinfo.value)
Пример #20
0
    def test_tfdataset_with_tf_data_dataset(self):
        dataset = tf.data.Dataset.from_tensor_slices(
            (np.random.randn(102, 28, 28,
                             1), np.random.randint(0, 10, size=(102, ))))
        dataset = dataset.map(lambda feature, label:
                              (tf.to_float(feature), label))
        dataset = TFDataset.from_tf_data_dataset(dataset, batch_size=16)
        seq = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
            tf.keras.layers.Dense(10, activation="softmax")
        ])

        seq.compile(optimizer=tf.keras.optimizers.RMSprop(),
                    loss='sparse_categorical_crossentropy',
                    metrics=['accuracy'])
        model = KerasModel(seq)
        model.fit(dataset)
        dataset = tf.data.Dataset.from_tensor_slices(
            (np.random.randn(102, 28, 28,
                             1), np.random.randint(0, 10, size=(102, ))))
        dataset = dataset.map(lambda feature, label:
                              (tf.to_float(feature), label))
        dataset = TFDataset.from_tf_data_dataset(dataset, batch_per_thread=16)
        model.evaluate(dataset)
Пример #21
0
    def test_gradient_clipping(self):

        data = tf.keras.layers.Input(shape=[10])

        x = tf.keras.layers.Flatten()(data)
        x = tf.keras.layers.Dense(10, activation='relu')(x)
        predictions = tf.keras.layers.Dense(2, activation='softmax')(x)

        model = tf.keras.models.Model(inputs=data, outputs=predictions)
        model.compile(optimizer=tf.keras.optimizers.SGD(lr=1, clipvalue=1e-8),
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        model = KerasModel(model)

        pre_weights = model.get_weights()

        dataset = self.create_training_dataset()

        # 5 iterations
        model.fit(dataset)

        current_weight = model.get_weights()

        np.all(np.abs((current_weight[0] - pre_weights[0])) < 1e-7)
Пример #22
0
 def _load_model(labor, path):
     labor.load(path)
     model = KerasModel(labor.model)
     model.labor = labor
     return model
Пример #23
0
def main(max_epoch):
    args = parser.parse_args()
    cluster_mode = args.cluster_mode
    if cluster_mode.startswith("yarn"):
        hadoop_conf = os.environ.get("HADOOP_CONF_DIR")
        assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \
                "set the environment variable HADOOP_CONF_DIR"
        spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \
            .set("spark.executor.cores", 2) \
            .set("spark.executor.instances", 2) \
            .set("spark.executorEnv.HTTP_PROXY", "http://child-prc.intel.com:913") \
            .set("spark.executorEnv.HTTPS_PROXY", "http://child-prc.intel.com:913") \
            .set("spark.driver.memory", "2g")
        if cluster_mode == "yarn-client":
            sc = init_nncontext(spark_conf,
                                cluster_mode="yarn-client",
                                hadoop_conf=hadoop_conf)
        else:
            sc = init_nncontext(spark_conf,
                                cluster_mode="yarn-cluster",
                                hadoop_conf=hadoop_conf)
    else:
        sc = init_nncontext()

    training_rdd = get_data_rdd("train", sc)
    testing_rdd = get_data_rdd("test", sc)

    dataset = TFDataset.from_rdd(training_rdd,
                                 features=(tf.float32, [28, 28, 1]),
                                 labels=(tf.int32, []),
                                 batch_size=320,
                                 val_rdd=testing_rdd)

    model = tf.keras.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax'),
    ])

    model.compile(optimizer=tf.keras.optimizers.RMSprop(),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    keras_model = KerasModel(model)

    keras_model.fit(dataset, epochs=max_epoch, distributed=True)

    eval_dataset = TFDataset.from_rdd(testing_rdd,
                                      features=(tf.float32, [28, 28, 1]),
                                      labels=(tf.int32, []),
                                      batch_per_thread=80)
    result = keras_model.evaluate(eval_dataset)

    print(result)
    # >> [0.08865142822265625, 0.9722]

    # the following assert is used for internal testing
    assert result['acc Top1Accuracy'] > 0.95

    model.save_weights("/tmp/mnist_keras.h5")
Пример #24
0
class KerasEstimator(Estimator):
    def __init__(self, keras_model, metrics, model_dir, optimizer):
        self.model = KerasModel(keras_model, model_dir)
        self.load_checkpoint = False
        self.metrics = metrics
        self.tf_optimizer = None
        self.optimizer = optimizer
        from bigdl.orca.learn.optimizers import Optimizer
        if self.optimizer is not None and isinstance(self.optimizer,
                                                     Optimizer):
            self.optimizer = self.optimizer.get_optimizer()
        self.log_dir = None
        self.app_name = None
        self.clip_norm = None
        self.clip_min = None
        self.clip_max = None

    def fit(self,
            data,
            epochs=1,
            batch_size=32,
            feature_cols=None,
            label_cols=None,
            validation_data=None,
            session_config=None,
            checkpoint_trigger=None,
            auto_shard_files=True):
        """
        Train this keras model with train data.

        :param data: train data. It can be XShards, Spark DataFrame, tf.data.Dataset.
               If data is XShards, each partition can be a Pandas DataFrame or a dictionary of
               {'x': feature, 'y': label}, where feature(label) is a numpy array or a tuple of
               numpy arrays.
               If data is tf.data.Dataset, each element is [feature tensor tuple, label tensor
               tuple]
        :param epochs: number of epochs to train.
        :param batch_size: total batch size for each iteration.
        :param feature_cols: feature column names if train data is Spark DataFrame or XShards
               of Pandas DataFrame.
        :param label_cols: label column names if train data is Spark DataFrame or XShards of
               Pandas DataFrame.
        :param validation_data: validation data. Validation data type should be the same
               as train data.
        :param session_config: tensorflow session configuration for training.
               Should be object of tf.ConfigProto
        :param checkpoint_trigger: when to trigger checkpoint during training.
               Should be a bigdl.orca.learn.trigger, like EveryEpoch(), SeveralIteration(
               num_iterations),etc.
        :param auto_shard_files: whether to automatically detect if the dataset is file-based and
               and apply sharding on files, otherwise sharding on records. Default is False.
        """

        if isinstance(data, DataFrame):
            assert feature_cols is not None, \
                "feature columns is None; it should not be None in training"
            assert label_cols is not None, \
                "label columns is None; it should not be None in training"

        if isinstance(data, tf.data.Dataset):
            assert isinstance(data.element_spec, tuple), \
                "If data is tf.data.Dataset, each element should be " \
                "(feature tensors, label tensor), where each feature/label tensor can be " \
                "either a single tensor or a tuple of tensors"
            if validation_data is not None:
                assert isinstance(validation_data, tf.data.Dataset), \
                    "train data and validation data should be both tf.data.Dataset"
                assert isinstance(validation_data.element_spec, tuple), \
                    "If validation_data is tf.data.Dataset, each element should be " \
                    "(feature tensors, label tensor), where each feature/label tensor can be " \
                    "either a single tensor or a tuple of tensors"

        if isinstance(data, SparkXShards):
            if data._get_class_name() == 'pandas.core.frame.DataFrame':
                assert feature_cols is not None, \
                    "feature columns is None; it should not be None in training"
                assert label_cols is not None, \
                    "label columns is None; it should not be None in training"
                data, validation_data = process_xshards_of_pandas_dataframe(
                    data, feature_cols, label_cols, validation_data, "fit")

        if checkpoint_trigger is not None:
            checkpoint_trigger = Trigger.convert_trigger(checkpoint_trigger)

        if is_tf_data_dataset(data):
            data = data.map(_standardize_keras_target_data)
            validation_data = validation_data.map(
                _standardize_keras_target_data)

        memory_type = OrcaContext.train_data_store
        dataset = to_dataset(data,
                             batch_size=batch_size,
                             batch_per_thread=-1,
                             validation_data=validation_data,
                             feature_cols=feature_cols,
                             label_cols=label_cols,
                             hard_code_batch_size=False,
                             sequential_order=False,
                             shuffle=True,
                             auto_shard_files=auto_shard_files,
                             memory_type=memory_type)

        self.tf_optimizer = TFOptimizer.from_keras(
            self.model.model,
            dataset,
            model_dir=self.model.model_dir,
            session_config=session_config,
            metrics=self.metrics,
            optimizer=self.optimizer)

        if self.clip_norm:
            self.tf_optimizer.set_gradient_clipping_by_l2_norm(
                clip_norm=self.clip_norm)
        if self.clip_min and self.clip_max:
            self.tf_optimizer.set_constant_gradient_clipping(
                self.clip_min, self.clip_max)

        if self.load_checkpoint:
            self.tf_optimizer.load_checkpoint(self.checkpoint_path,
                                              self.checkpoint_version)

        if self.log_dir and self.app_name:
            self.tf_optimizer.estimator.set_tensorboard(
                self.log_dir, self.app_name)

        self.tf_optimizer.optimize(MaxEpoch(epochs),
                                   checkpoint_trigger=checkpoint_trigger)

        return self

    def predict(
        self,
        data,
        batch_size=4,
        feature_cols=None,
        auto_shard_files=False,
    ):
        """
        Predict input data

        :param data: data to be predicted.
               It can be XShards, Spark DataFrame, or tf.data.Dataset.
               If data is XShards, each partition can be a Pandas DataFrame or a dictionary of
               {'x': feature}, where feature is a numpy array or a tuple of numpy arrays.
               If data is tf.data.Dataset, each element is feature tensor tuple
        :param batch_size: batch size per thread
        :param feature_cols: list of feature column names if input data is Spark DataFrame or
               XShards of Pandas DataFrame.
        :param auto_shard_files: whether to automatically detect if the dataset is file-based and
               and apply sharding on files, otherwise sharding on records. Default is False.
        :return: predicted result.
                 If input data is XShards or tf.data.Dataset, the predict result is also a XShards,
                 and the schema for each result is: {'prediction': predicted numpy array or
                 list of predicted numpy arrays}.
                 If input data is Spark DataFrame, the predict result is a DataFrame which includes
                 original columns plus 'prediction' column. The 'prediction' column can be
                 FloatType, VectorUDT or Array of VectorUDT depending on model outputs shape.
        """

        if isinstance(data, DataFrame):
            assert feature_cols is not None, \
                "feature columns is None; it should not be None in prediction"

        if isinstance(data, SparkXShards):
            if data._get_class_name() == 'pandas.core.frame.DataFrame':
                assert feature_cols is not None, \
                    "feature columns is None; it should not be None in prediction"
                data = process_xshards_of_pandas_dataframe(data, feature_cols)

        assert not is_tf_data_dataset(data), "tf.data.Dataset currently cannot be used for" \
                                             "estimator prediction"

        dataset = to_dataset(
            data,
            batch_size=-1,
            batch_per_thread=batch_size,
            validation_data=None,
            feature_cols=feature_cols,
            label_cols=None,
            hard_code_batch_size=False,
            sequential_order=True,
            shuffle=False,
            auto_shard_files=auto_shard_files,
        )

        predicted_rdd = self.model.predict(dataset, batch_size)
        if isinstance(data, DataFrame):
            return convert_predict_rdd_to_dataframe(data, predicted_rdd)
        elif isinstance(data, SparkXShards):
            return convert_predict_rdd_to_xshard(data, predicted_rdd)
        else:
            return predicted_rdd

    def evaluate(self,
                 data,
                 batch_size=32,
                 feature_cols=None,
                 label_cols=None,
                 auto_shard_files=False):
        """
        Evaluate model.

        :param data: evaluation data. It can be XShards, Spark DataFrame, tf.data.Dataset.
               If data is XShards, each partition can be a Pandas DataFrame or a dictionary of
               {'x': feature, 'y': label}, where feature(label) is a numpy array or a tuple of
               numpy arrays.
               If data is tf.data.Dataset, each element is [feature tensor tuple, label tensor
               tuple]
        :param batch_size: batch size per thread.
        :param feature_cols: feature_cols: feature column names if train data is Spark DataFrame or
               XShards of Pandas DataFrame.
        :param label_cols: label column names if train data is Spark DataFrame or XShards
               of Pandas DataFrame.
        :param auto_shard_files: whether to automatically detect if the dataset is file-based and
               and apply sharding on files, otherwise sharding on records. Default is False.
        :return: evaluation result as a dictionary of {'metric name': metric value}
        """

        if isinstance(data, DataFrame):
            assert feature_cols is not None, \
                "feature columns is None; it should not be None in evaluation"
            assert label_cols is not None, \
                "label columns is None; it should not be None in evaluation"

        if isinstance(data, SparkXShards):
            if data._get_class_name() == 'pandas.core.frame.DataFrame':
                assert feature_cols is not None, \
                    "feature columns is None; it should not be None in evaluation"
                assert label_cols is not None, \
                    "label columns is None; it should not be None in evaluation"
                data = process_xshards_of_pandas_dataframe(
                    data, feature_cols, label_cols)

        dataset = to_dataset(data,
                             batch_size=-1,
                             batch_per_thread=batch_size,
                             validation_data=None,
                             feature_cols=feature_cols,
                             label_cols=label_cols,
                             hard_code_batch_size=False,
                             sequential_order=True,
                             shuffle=False,
                             auto_shard_files=auto_shard_files)

        return self.model.evaluate(dataset, batch_per_thread=batch_size)

    @enable_multi_fs_save
    def save_keras_model(self, path, overwrite=True):
        """
        Save tensorflow keras model in this estimator.

        :param path: keras model save path.
        :param overwrite: Whether to silently overwrite any existing file at the target location.
        """
        self.model.save_model(path, overwrite=overwrite)

    def get_model(self):
        """
        Get the trained Keras model

        :return: The trained Keras model
        """
        return self.model.model

    @enable_multi_fs_save
    def save(self, model_path, overwrite=True):
        """
        Save model to model_path

        :param model_path: path to save the trained model.
        :param overwrite: Whether to silently overwrite any existing file at the target location.

        :return:
        """
        self.save_keras_model(model_path, overwrite=overwrite)

    @enable_multi_fs_load
    def load(self, model_path):
        """
        Load existing keras model

        :param model_path: Path to the existing keras model.
        :return:
        """
        self.model = KerasModel.load_model(model_path)

    def clear_gradient_clipping(self):
        """
        Clear gradient clipping parameters. In this case, gradient clipping will not be applied.
        In order to take effect, it needs to be called before fit.

        :return:
        """
        self.clip_norm = None
        self.clip_min = None
        self.clip_max = None

    def set_constant_gradient_clipping(self, min, max):
        """
        Set constant gradient clipping during the training process.
        In order to take effect, it needs to be called before fit.

        :param min: The minimum value to clip by.
        :param max: The maximum value to clip by.
        :return:
        """
        assert min > 0, "clip value should be larger than 0"
        assert min < max, "clip max should be larger than clip min"
        self.clip_min = min
        self.clip_max = max

    def set_l2_norm_gradient_clipping(self, clip_norm):
        """
        Clip gradient to a maximum L2-Norm during the training process.
        In order to take effect, it needs to be called before fit.

        :param clip_norm: Gradient L2-Norm threshold.
        :return:
        """
        self.clip_norm = clip_norm

    @enable_multi_fs_save
    def save_keras_weights(self, filepath, overwrite=True, save_format=None):
        """
        Save tensorflow keras model weights in this estimator.

        :param filepath: keras model weights save path.
        :param overwrite: Whether to silently overwrite any existing file at the target location.
        :param save_format: Either 'tf' or 'h5'. A `filepath` ending in '.h5' or
               '.keras' will default to HDF5 if `save_format` is `None`. Otherwise
               `None` defaults to 'tf'.
        """
        self.model.save_weights(filepath, overwrite, save_format)

    @enable_multi_fs_load
    def load_keras_weights(self, filepath, by_name=False):
        """
        Save tensorflow keras model in this estimator.

        :param filepath: keras model weights save path.
        :param by_name: Boolean, whether to load weights by name or by topological
               order. Only topological loading is supported for weight files in
               TensorFlow format.
        """
        self.model.load_weights(filepath, by_name)
Пример #25
0
def main(max_epoch):
    args = parser.parse_args()
    cluster_mode = args.cluster_mode
    if cluster_mode.startswith("yarn"):
        hadoop_conf = os.environ.get("HADOOP_CONF_DIR")
        assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \
                "set the environment variable HADOOP_CONF_DIR"
        spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \
            .set("spark.executor.cores", 2) \
            .set("spark.executor.instances", 2) \
            .set("spark.driver.memory", "2g")
        if cluster_mode == "yarn-client":
            _ = init_nncontext(spark_conf,
                               cluster_mode="yarn-client",
                               hadoop_conf=hadoop_conf)
        else:
            _ = init_nncontext(spark_conf,
                               cluster_mode="yarn-cluster",
                               hadoop_conf=hadoop_conf)
    else:
        _ = init_nncontext()

    (training_images_data,
     training_labels_data) = mnist.read_data_sets("/tmp/mnist", "train")
    (testing_images_data,
     testing_labels_data) = mnist.read_data_sets("/tmp/mnist", "test")

    training_images_data = (training_images_data -
                            mnist.TRAIN_MEAN) / mnist.TRAIN_STD
    testing_images_data = (testing_images_data -
                           mnist.TRAIN_MEAN) / mnist.TRAIN_STD

    model = tf.keras.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax'),
    ])

    model.compile(optimizer='rmsprop',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    keras_model = KerasModel(model)

    keras_model.fit(training_images_data,
                    training_labels_data,
                    validation_data=(testing_images_data, testing_labels_data),
                    epochs=max_epoch,
                    batch_size=320,
                    distributed=True)

    result = keras_model.evaluate(testing_images_data,
                                  testing_labels_data,
                                  distributed=True,
                                  batch_per_thread=80)

    print(result)
    # >> [0.08865142822265625, 0.9722]

    # the following assert is used for internal testing
    assert result['acc Top1Accuracy'] > 0.95

    keras_model.save_weights("/tmp/mnist_keras.h5")