def test_estimator_graph_checkpoint(self):
        import zoo.orca.data.pandas
        tf.reset_default_graph()

        model = SimpleModel()
        file_path = os.path.join(resource_path, "orca/learn/ncf.csv")
        data_shard = zoo.orca.data.pandas.read_csv(file_path)

        def transform(df):
            result = {
                "x": (df['user'].to_numpy(), df['item'].to_numpy()),
                "y": df['label'].to_numpy()
            }
            return result

        data_shard = data_shard.transform_shard(transform)

        temp = tempfile.mkdtemp()
        model_dir = os.path.join(temp, "test_model")

        est = Estimator.from_graph(
            inputs=[model.user, model.item],
            labels=[model.label],
            loss=model.loss,
            optimizer=tf.train.AdamOptimizer(),
            metrics={"loss": model.loss},
            model_dir=model_dir
        )
        est.fit(data=data_shard,
                batch_size=8,
                epochs=6,
                validation_data=data_shard,
                checkpoint_trigger=SeveralIteration(4))

        est.sess.close()

        tf.reset_default_graph()

        model = SimpleModel()

        est = Estimator.from_graph(
            inputs=[model.user, model.item],
            labels=[model.label],
            loss=model.loss,
            optimizer=tf.train.AdamOptimizer(),
            metrics={"loss": model.loss},
            model_dir=model_dir
        )

        est.load_orca_checkpoint(model_dir)

        est.fit(data=data_shard,
                batch_size=8,
                epochs=10,
                validation_data=data_shard)

        result = est.evaluate(data_shard)
        assert "loss" in result
        print(result)
        shutil.rmtree(temp)
    def test_estimator_graph_pandas_dataframe(self):
        import zoo.orca.data.pandas
        tf.reset_default_graph()

        model = SimpleModel()
        file_path = os.path.join(resource_path, "orca/learn/ncf.csv")
        data_shard = zoo.orca.data.pandas.read_csv(file_path)

        est = Estimator.from_graph(
            inputs=[model.user, model.item],
            labels=[model.label],
            loss=model.loss,
            optimizer=tf.train.AdamOptimizer(),
            metrics={"loss": model.loss})
        est.fit(data=data_shard,
                batch_size=8,
                epochs=10,
                feature_cols=['user', 'item'],
                label_cols=['label'],
                validation_data=data_shard)
        result = est.evaluate(data_shard, feature_cols=['user', 'item'], label_cols=['label'])
        assert "loss" in result
        print(result)

        est = Estimator.from_graph(
            inputs=[model.user, model.item],
            outputs=[model.logits])
        predictions = est.predict(data_shard, feature_cols=['user', 'item']).collect()
        print(predictions)
    def test_estimator_keras_tensorboard(self):
        import zoo.orca.data.pandas

        tf.reset_default_graph()

        model = self.create_model()
        file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv")
        data_shard = zoo.orca.data.pandas.read_csv(file_path)

        def transform(df):
            result = {
                "x": (df['user'].to_numpy().reshape([-1, 1]),
                      df['item'].to_numpy().reshape([-1, 1])),
                "y":
                df['label'].to_numpy()
            }
            return result

        data_shard = data_shard.transform_shard(transform)

        temp = tempfile.mkdtemp()
        model_dir = os.path.join(temp, "test_model")

        est = Estimator.from_keras(keras_model=model, model_dir=model_dir)

        assert est.get_train_summary("Loss") is None
        assert est.get_validation_summary("Top1Accuracy") is None

        est.fit(data=data_shard,
                batch_size=8,
                epochs=10,
                validation_data=data_shard)

        train_loss = est.get_train_summary("Loss")
        assert len(train_loss) > 0
        val_scores = est.get_validation_summary("Top1Accuracy")
        assert len(val_scores) > 0

        tf.reset_default_graph()
        # no model dir
        model = self.create_model()
        est = Estimator.from_keras(keras_model=model)
        log_dir = os.path.join(temp, "log")
        est.set_tensorboard(log_dir, "test")

        est.fit(data=data_shard,
                batch_size=8,
                epochs=10,
                validation_data=data_shard)

        assert os.path.exists(os.path.join(log_dir, "test/train"))
        assert os.path.exists(os.path.join(log_dir, "test/validation"))

        train_loss = est.get_train_summary("Loss")
        val_scores = est.get_validation_summary("Loss")
        assert len(train_loss) > 0
        assert len(val_scores) > 0
        shutil.rmtree(temp)
    def test_estimator_keras_save_load(self):
        import zoo.orca.data.pandas

        tf.reset_default_graph()

        model = self.create_model()
        file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv")
        data_shard = zoo.orca.data.pandas.read_csv(file_path)

        def transform(df):
            result = {
                "x": (df['user'].to_numpy().reshape([-1, 1]),
                      df['item'].to_numpy().reshape([-1, 1])),
                "y": df['label'].to_numpy()
            }
            return result

        data_shard = data_shard.transform_shard(transform)

        est = Estimator.from_keras(keras_model=model)
        est.fit(data=data_shard,
                batch_size=8,
                epochs=10,
                validation_data=data_shard)

        eval_result = est.evaluate(data_shard)
        print(eval_result)

        temp = tempfile.mkdtemp()
        model_path = os.path.join(temp, 'test.h5')
        est.save_keras_model(model_path)

        tf.reset_default_graph()

        from tensorflow.python.keras import models
        from zoo.common.utils import load_from_file

        def load_func(file_path):
            return models.load_model(file_path)

        model = load_from_file(load_func, model_path)
        est = Estimator.from_keras(keras_model=model)

        data_shard = zoo.orca.data.pandas.read_csv(file_path)

        def transform(df):
            result = {
                "x": (df['user'].to_numpy().reshape([-1, 1]),
                      df['item'].to_numpy().reshape([-1, 1])),
            }
            return result

        data_shard = data_shard.transform_shard(transform)
        predictions = est.predict(data_shard).collect()
        assert predictions[0]['prediction'].shape[1] == 2
        shutil.rmtree(temp)
    def test_estimator_keras_xshards_options(self):
        import zoo.orca.data.pandas

        tf.reset_default_graph()

        model = self.create_model()
        file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv")
        data_shard = zoo.orca.data.pandas.read_csv(file_path)

        def transform(df):
            result = {
                "x": (df['user'].to_numpy().reshape([-1, 1]),
                      df['item'].to_numpy().reshape([-1, 1])),
                "y": df['label'].to_numpy()
            }
            return result

        data_shard = data_shard.transform_shard(transform)

        est = Estimator.from_keras(keras_model=model)
        # train with no validation
        est.fit(data=data_shard,
                batch_size=8,
                epochs=10)
        # train with different optimizer
        est = Estimator.from_keras(keras_model=model)
        est.fit(data=data_shard,
                batch_size=8,
                epochs=10
                )
        # train with session config
        tf_session_config = tf.ConfigProto(inter_op_parallelism_threads=1,
                                           intra_op_parallelism_threads=1)
        est = Estimator.from_keras(keras_model=model)
        est.fit(data=data_shard,
                batch_size=8,
                epochs=10,
                session_config=tf_session_config
                )
        # train with model dir
        temp = tempfile.mkdtemp()
        model_dir = os.path.join(temp, "model")
        est = Estimator.from_keras(keras_model=model, model_dir=model_dir)
        est.fit(data=data_shard,
                batch_size=8,
                epochs=10,
                validation_data=data_shard)
        assert len(os.listdir(model_dir)) > 0
        shutil.rmtree(temp)
    def test_estimator_keras_xshards_checkpoint(self):
        import zoo.orca.data.pandas

        tf.reset_default_graph()

        model = self.create_model()
        file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv")
        data_shard = zoo.orca.data.pandas.read_csv(file_path)

        def transform(df):
            result = {
                "x": (df['user'].to_numpy().reshape([-1, 1]),
                      df['item'].to_numpy().reshape([-1, 1])),
                "y":
                df['label'].to_numpy()
            }
            return result

        data_shard = data_shard.transform_shard(transform)

        temp = tempfile.mkdtemp()
        model_dir = os.path.join(temp, "test_model")

        est = Estimator.from_keras(keras_model=model, model_dir=model_dir)
        est.fit(data=data_shard,
                batch_size=8,
                epochs=6,
                validation_data=data_shard,
                checkpoint_trigger=SeveralIteration(4))

        eval_result = est.evaluate(data_shard)
        print(eval_result)

        tf.reset_default_graph()

        model = self.create_model()

        est = Estimator.from_keras(keras_model=model, model_dir=model_dir)
        est.load_orca_checkpoint(model_dir)
        est.fit(data=data_shard,
                batch_size=8,
                epochs=10,
                validation_data=data_shard,
                checkpoint_trigger=SeveralIteration(4))

        eval_result = est.evaluate(data_shard)
        print(eval_result)
        shutil.rmtree(temp)
    def test_estimator_keras_dataframe_mem_type(self):
        tf.reset_default_graph()

        model = self.create_model()
        sc = init_nncontext()
        sqlcontext = SQLContext(sc)
        file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv")
        df = sqlcontext.read.csv(file_path, header=True, inferSchema=True)
        from pyspark.sql.functions import array
        df = df.withColumn('user', array('user')) \
            .withColumn('item', array('item'))

        est = Estimator.from_keras(keras_model=model)
        OrcaContext.train_data_store = "DISK_2"
        est.fit(data=df,
                batch_size=4,
                epochs=4,
                feature_cols=['user', 'item'],
                label_cols=['label'],
                validation_data=df)

        eval_result = est.evaluate(df,
                                   feature_cols=['user', 'item'],
                                   label_cols=['label'])
        assert 'acc Top1Accuracy' in eval_result

        prediction_df = est.predict(df,
                                    batch_size=4,
                                    feature_cols=['user', 'item'])
        assert 'prediction' in prediction_df.columns
        predictions = prediction_df.collect()
        assert len(predictions) == 48
        OrcaContext.train_data_store = "DRAM"
    def test_estimator_graph_dataframe(self):
        tf.reset_default_graph()

        model = SimpleModel()
        file_path = os.path.join(resource_path, "orca/learn/ncf.csv")
        sc = init_nncontext()
        sqlcontext = SQLContext(sc)
        df = sqlcontext.read.csv(file_path, header=True, inferSchema=True)

        est = Estimator.from_graph(
            inputs=[model.user, model.item],
            labels=[model.label],
            outputs=[model.logits],
            loss=model.loss,
            optimizer=tf.train.AdamOptimizer(),
            metrics={"loss": model.loss})

        est.fit(data=df,
                batch_size=8,
                epochs=10,
                feature_cols=['user', 'item'],
                label_cols=['label'],
                validation_data=df)

        result = est.evaluate(df, batch_size=4, feature_cols=['user', 'item'],
                              label_cols=['label'])
        print(result)

        prediction_df = est.predict(df, batch_size=4, feature_cols=['user', 'item'])
        assert 'prediction' in prediction_df.columns
        predictions = prediction_df.collect()
        assert len(predictions) == 48
Пример #9
0
    def test_estimator_graph_tf_dataset(self):
        tf.reset_default_graph()

        model = SimpleModel()

        dataset = tf.data.Dataset.from_tensor_slices(
            (np.random.randint(0, 200, size=(100, )),
             np.random.randint(0, 50, size=(100, )),
             np.ones(shape=(100, ), dtype=np.int32)))

        est = Estimator.from_graph(inputs=[model.user, model.item],
                                   labels=[model.label],
                                   outputs=[model.logits],
                                   loss=model.loss,
                                   optimizer=tf.train.AdamOptimizer(),
                                   metrics={"loss": model.loss})
        est.fit(data=dataset, batch_size=8, epochs=10, validation_data=dataset)

        result = est.evaluate(dataset, batch_size=4)
        assert 'loss' in result

        predict_dataset = tf.data.Dataset.from_tensor_slices(
            (np.random.randint(0, 200, size=(20, )),
             np.random.randint(0, 50, size=(20, ))))
        predictions = est.predict(predict_dataset).collect()
        assert predictions[0]['prediction'].shape[1] == 2
Пример #10
0
def main(max_epoch, dataset_dir):

    mnist_train = tfds.load(name="mnist", split="train", data_dir=dataset_dir)
    mnist_test = tfds.load(name="mnist", split="test", data_dir=dataset_dir)

    mnist_train = mnist_train.map(preprocess)
    mnist_test = mnist_test.map(preprocess)

    # tensorflow inputs
    images = tf.placeholder(dtype=tf.float32, shape=(None, 28, 28, 1))
    # tensorflow labels
    labels = tf.placeholder(dtype=tf.int32, shape=(None,))

    logits = lenet(images)

    loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels))

    acc = accuracy(logits, labels)

    # create an estimator
    est = Estimator.from_graph(inputs=images,
                               outputs=logits,
                               labels=labels,
                               loss=loss,
                               optimizer=tf.train.AdamOptimizer(),
                               metrics={"acc": acc})
    est.fit(data=mnist_train,
            batch_size=320,
            epochs=max_epoch,
            validation_data=mnist_test)

    result = est.evaluate(mnist_test)
    print(result)

    est.save_tf_checkpoint("/tmp/lenet/model")
    def test_estimator_graph_with_bigdl_optim_method(self):
        import zoo.orca.data.pandas

        tf.reset_default_graph()

        model = SimpleModel()
        file_path = os.path.join(resource_path, "orca/learn/ncf.csv")
        data_shard = zoo.orca.data.pandas.read_csv(file_path)

        def transform(df):
            result = {
                "x": (df['user'].to_numpy(), df['item'].to_numpy()),
                "y": df['label'].to_numpy()
            }
            return result

        data_shard = data_shard.transform_shard(transform)
        from zoo.orca.learn.optimizers import SGD
        from zoo.orca.learn.optimizers.schedule import Plateau
        sgd = SGD(learningrate=0.1,
                  learningrate_schedule=Plateau("score",
                                                factor=0.1,
                                                patience=10,
                                                mode="min", ))
        est = Estimator.from_graph(
            inputs=[model.user, model.item],
            labels=[model.label],
            outputs=[model.logits],
            loss=model.loss,
            optimizer=sgd,
            metrics={"loss": model.loss})
        est.fit(data=data_shard,
                batch_size=8,
                epochs=10,
                validation_data=data_shard)
Пример #12
0
def test_estimator_graph_fit_dataset(estimator_for_spark_fixture):
    import zoo.orca.data.pandas
    tf.reset_default_graph()
    model = SimpleModel()
    sc = estimator_for_spark_fixture
    file_path = os.path.join(resource_path, "orca/learn/ncf.csv")
    data_shard = zoo.orca.data.pandas.read_csv(file_path, sc)

    def transform(df):
        result = {
            "x": (df['user'].to_numpy(), df['item'].to_numpy()),
            "y": df['label'].to_numpy()
        }
        return result

    data_shard = data_shard.transform_shard(transform)
    dataset = Dataset.from_tensor_slices(data_shard)

    est = Estimator.from_graph(inputs=[model.user, model.item],
                               labels=[model.label],
                               loss=model.loss,
                               optimizer=tf.train.AdamOptimizer(),
                               metrics={"loss": model.loss})
    est.fit(data=dataset, batch_size=8, steps=10, validation_data=dataset)

    result = est.evaluate(dataset, batch_size=4)
    assert 'loss' in result
Пример #13
0
def test_estimator_graph_evaluate(estimator_for_spark_fixture):
    import zoo.orca.data.pandas
    tf.reset_default_graph()

    model = SimpleModel()
    sc = estimator_for_spark_fixture
    file_path = os.path.join(resource_path, "orca/learn/ncf.csv")
    data_shard = zoo.orca.data.pandas.read_csv(file_path, sc)

    def transform(df):
        result = {
            "x": (df['user'].to_numpy(), df['item'].to_numpy()),
            "y": df['label'].to_numpy()
        }
        return result

    data_shard = data_shard.transform_shard(transform)

    est = Estimator.from_graph(inputs=[model.user, model.item],
                               labels=[model.label],
                               loss=model.loss,
                               optimizer=tf.train.AdamOptimizer(),
                               metrics={"loss": model.loss})
    result = est.evaluate(data_shard)
    assert "loss" in result
    print(result)
    def test_estimator_graph_fit(self):
        import zoo.orca.data.pandas
        tf.reset_default_graph()

        model = SimpleModel()
        file_path = os.path.join(resource_path, "orca/learn/ncf.csv")
        data_shard = zoo.orca.data.pandas.read_csv(file_path)

        def transform(df):
            result = {
                "x": (df['user'].to_numpy(), df['item'].to_numpy()),
                "y": df['label'].to_numpy()
            }
            return result

        data_shard = data_shard.transform_shard(transform)

        est = Estimator.from_graph(
            inputs=[model.user, model.item],
            labels=[model.label],
            loss=model.loss,
            optimizer=tf.train.AdamOptimizer(),
            metrics={"loss": model.loss})
        est.fit(data=data_shard,
                batch_size=8,
                epochs=10,
                validation_data=data_shard)
    def test_estimator_keras_xshards_with_mem_type(self):
        import zoo.orca.data.pandas

        tf.reset_default_graph()

        model = self.create_model()
        file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv")
        data_shard = zoo.orca.data.pandas.read_csv(file_path)

        def transform(df):
            result = {
                "x": (df['user'].to_numpy().reshape([-1, 1]),
                      df['item'].to_numpy().reshape([-1, 1])),
                "y":
                df['label'].to_numpy()
            }
            return result

        data_shard = data_shard.transform_shard(transform)

        est = Estimator.from_keras(keras_model=model)
        OrcaContext.train_data_store = "DISK_2"
        est.fit(data=data_shard,
                batch_size=4,
                epochs=10,
                validation_data=data_shard)

        eval_result = est.evaluate(data_shard)
        print(eval_result)
        OrcaContext.train_data_store = "DRAM"
    def test_estimator_keras_xshards_clip(self):
        import zoo.orca.data.pandas

        tf.reset_default_graph()

        model = self.create_model_with_clip()
        file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv")
        data_shard = zoo.orca.data.pandas.read_csv(file_path)

        def transform(df):
            result = {
                "x": (df['user'].to_numpy().reshape([-1, 1]),
                      df['item'].to_numpy().reshape([-1, 1])),
                "y":
                df['label'].to_numpy()
            }
            return result

        data_shard = data_shard.transform_shard(transform)

        est = Estimator.from_keras(keras_model=model)
        est.fit(data=data_shard,
                batch_size=8,
                epochs=10,
                validation_data=data_shard)
def main(max_epoch):
    sc = init_orca_context(cores=4, memory="2g")

    # get DataSet
    # as_supervised returns tuple (img, label) instead of dict {'image': img, 'label':label}
    mnist_train = tfds.load(name="mnist", split="train", as_supervised=True)
    mnist_test = tfds.load(name="mnist", split="test", as_supervised=True)

    # Normalizes images, unit8 -> float32
    def normalize_img(image, label):
        return tf.cast(image, tf.float32) / 255., label

    mnist_train = mnist_train.map(
        normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    mnist_test = mnist_test.map(
        normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(20,
                               kernel_size=(5, 5),
                               strides=(1, 1),
                               activation='tanh',
                               input_shape=(28, 28, 1),
                               padding='valid'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
                                     strides=(2, 2),
                                     padding='valid'),
        tf.keras.layers.Conv2D(50,
                               kernel_size=(5, 5),
                               strides=(1, 1),
                               activation='tanh',
                               padding='valid'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
                                     strides=(2, 2),
                                     padding='valid'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(500, activation='tanh'),
        tf.keras.layers.Dense(10, activation='softmax'),
    ])

    model.compile(optimizer=tf.keras.optimizers.RMSprop(),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    est = Estimator.from_keras(keras_model=model)
    est.fit(data=mnist_train,
            batch_size=320,
            epochs=max_epoch,
            validation_data=mnist_test)

    result = est.evaluate(mnist_test)
    print(result)

    est.save_keras_model("/tmp/mnist_keras.h5")
    stop_orca_context()
def main(max_epoch):

    # get DataSet
    (train_feature,
     train_label), (val_feature,
                    val_label) = tf.keras.datasets.mnist.load_data()

    # tf.data.Dataset.from_tensor_slices is for demo only. For production use, please use
    # file-based approach (e.g. tfrecord).
    train_dataset = tf.data.Dataset.from_tensor_slices(
        (train_feature, train_label))
    train_dataset = train_dataset.map(preprocess)
    val_dataset = tf.data.Dataset.from_tensor_slices((val_feature, val_label))
    val_dataset = val_dataset.map(preprocess)

    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(20,
                               kernel_size=(5, 5),
                               strides=(1, 1),
                               activation='tanh',
                               input_shape=(28, 28, 1),
                               padding='valid'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
                                     strides=(2, 2),
                                     padding='valid'),
        tf.keras.layers.Conv2D(50,
                               kernel_size=(5, 5),
                               strides=(1, 1),
                               activation='tanh',
                               padding='valid'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
                                     strides=(2, 2),
                                     padding='valid'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(500, activation='tanh'),
        tf.keras.layers.Dense(10, activation='softmax'),
    ])

    model.compile(optimizer=tf.keras.optimizers.RMSprop(),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    est = Estimator.from_keras(keras_model=model)
    est.fit(data=train_dataset,
            batch_size=320,
            epochs=max_epoch,
            validation_data=val_dataset)

    result = est.evaluate(val_dataset)
    print(result)

    est.save_keras_model("/tmp/mnist_keras.h5")
Пример #19
0
def test_train_simple(orca_context_fixture):
    sc = orca_context_fixture
    temp_dir = tempfile.mkdtemp()

    try:
        _write_ndarrays(images=np.random.randn(500, 28, 28,
                                               1).astype(np.float32),
                        labels=np.random.randint(0, 10,
                                                 (500, )).astype(np.int32),
                        output_path=temp_dir)
        dataset = ParquetDataset.read_as_tf(temp_dir)

        def preprocess(data):
            return data['image'], data["label"]

        dataset = dataset.map(preprocess)

        import tensorflow as tf
        model = tf.keras.Sequential([
            tf.keras.layers.Conv2D(20,
                                   kernel_size=(5, 5),
                                   strides=(1, 1),
                                   activation='tanh',
                                   input_shape=(28, 28, 1),
                                   padding='valid'),
            tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
                                         strides=(2, 2),
                                         padding='valid'),
            tf.keras.layers.Conv2D(50,
                                   kernel_size=(5, 5),
                                   strides=(1, 1),
                                   activation='tanh',
                                   padding='valid'),
            tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
                                         strides=(2, 2),
                                         padding='valid'),
            tf.keras.layers.Flatten(),
            tf.keras.layers.Dense(500, activation='tanh'),
            tf.keras.layers.Dense(10, activation='softmax'),
        ])

        model.compile(optimizer=tf.keras.optimizers.RMSprop(),
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

        est = Estimator.from_keras(keras_model=model)
        est.fit(data=dataset, batch_size=100, epochs=1)

    finally:
        shutil.rmtree(temp_dir)
Пример #20
0
def main(max_epoch):
    sc = init_orca_context(cores=4, memory="2g")

    # get DataSet
    mnist_train = tfds.load(name="mnist", split="train")
    mnist_test = tfds.load(name="mnist", split="test")

    # Normalizes images
    def normalize_img(data):
        data['image'] = tf.cast(data["image"], tf.float32) / 255.
        return data

    mnist_train = mnist_train.map(
        normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    mnist_test = mnist_test.map(
        normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    # tensorflow inputs
    images = tf.placeholder(dtype=tf.float32, shape=(None, 28, 28, 1))
    # tensorflow labels
    labels = tf.placeholder(dtype=tf.int32, shape=(None, ))

    with slim.arg_scope(lenet.lenet_arg_scope()):
        logits, end_points = lenet.lenet(images,
                                         num_classes=10,
                                         is_training=True)

    loss = tf.reduce_mean(
        tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels))

    acc = accuracy(logits, labels)

    # create an estimator
    est = Estimator.from_graph(inputs=images,
                               outputs=logits,
                               labels=labels,
                               loss=loss,
                               optimizer=tf.train.AdamOptimizer(),
                               metrics={"acc": acc})
    est.fit(data=mnist_train,
            batch_size=320,
            epochs=max_epoch,
            validation_data=mnist_test)

    result = est.evaluate(mnist_test)
    print(result)

    est.save_tf_checkpoint("/tmp/lenet/model")
    stop_orca_context()
    def test_estimator_keras_tf_dataset(self):
        tf.reset_default_graph()

        model = self.create_model()

        dataset = tf.data.Dataset.from_tensor_slices(
            (np.random.randint(0, 200, size=(100, 1)),
             np.random.randint(0, 50, size=(100, 1)),
             np.ones(shape=(100, ), dtype=np.int32)))
        dataset = dataset.map(lambda user, item, label: [(user, item), label])
        est = Estimator.from_keras(keras_model=model)
        est.fit(data=dataset, batch_size=8, epochs=10, validation_data=dataset)

        eval_result = est.evaluate(dataset)
        assert 'acc Top1Accuracy' in eval_result
Пример #22
0
    def test_estimator_graph_dataframe_exception(self):

        tf.reset_default_graph()

        model = SimpleModel()
        file_path = os.path.join(resource_path, "orca/learn/ncf.csv")
        sc = init_nncontext()
        sqlcontext = SQLContext(sc)
        df = sqlcontext.read.csv(file_path, header=True, inferSchema=True)

        est = Estimator.from_graph(inputs=[model.user, model.item],
                                   labels=[model.label],
                                   outputs=[model.logits],
                                   loss=model.loss,
                                   optimizer=tf.train.AdamOptimizer(),
                                   metrics={"loss": model.loss})

        with self.assertRaises(Exception) as context:
            est.fit(data=df,
                    batch_size=8,
                    epochs=10,
                    feature_cols=['user', 'item'],
                    validation_data=df)
        self.assertTrue(
            'label columns is None; it should not be None in training' in str(
                context.exception))

        est.fit(data=df,
                batch_size=8,
                epochs=10,
                feature_cols=['user', 'item'],
                labels_cols=['label'])
        with self.assertRaises(Exception) as context:
            predictions = est.predict(df, batch_size=4).collect()
        self.assertTrue(
            'feature columns is None; it should not be None in prediction' in
            str(context.exception))

        with self.assertRaises(Exception) as context:
            est.fit(data=df,
                    batch_size=8,
                    epochs=10,
                    feature_cols=['user', 'item'],
                    labels_cols=['label'],
                    validation_data=[1, 2, 3])
        self.assertTrue(
            'train data and validation data should be both Spark DataFrame' in
            str(context.exception))
Пример #23
0
    def _test_estimator_graph_tf_dataset(self, dataset_creator):
        tf.reset_default_graph()

        model = SimpleModel()

        dataset = dataset_creator()

        est = Estimator.from_graph(inputs=[model.user, model.item],
                                   labels=[model.label],
                                   outputs=[model.logits],
                                   loss=model.loss,
                                   optimizer=tf.train.AdamOptimizer(),
                                   metrics={"loss": model.loss})
        est.fit(data=dataset, batch_size=8, epochs=10, validation_data=dataset)

        result = est.evaluate(dataset, batch_size=4)
        assert 'loss' in result
Пример #24
0
def main(max_epoch, dataset_dir):

    mnist_train = tfds.load(name="mnist", split="train", data_dir=dataset_dir)
    mnist_test = tfds.load(name="mnist", split="test", data_dir=dataset_dir)

    mnist_train = mnist_train.map(preprocess)
    mnist_test = mnist_test.map(preprocess)

    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(20,
                               kernel_size=(5, 5),
                               strides=(1, 1),
                               activation='tanh',
                               input_shape=(28, 28, 1),
                               padding='valid'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
                                     strides=(2, 2),
                                     padding='valid'),
        tf.keras.layers.Conv2D(50,
                               kernel_size=(5, 5),
                               strides=(1, 1),
                               activation='tanh',
                               padding='valid'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
                                     strides=(2, 2),
                                     padding='valid'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(500, activation='tanh'),
        tf.keras.layers.Dense(10, activation='softmax'),
    ])

    model.compile(optimizer=tf.keras.optimizers.RMSprop(),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    est = Estimator.from_keras(keras_model=model)
    est.fit(data=mnist_train,
            batch_size=320,
            epochs=max_epoch,
            validation_data=mnist_test,
            auto_shard_files=False)

    result = est.evaluate(mnist_test, auto_shard_files=False)
    print(result)

    est.save_keras_model("/tmp/mnist_keras.h5")
Пример #25
0
def train(train_data, test_data, user_size, item_size):
    model = NCF(opt.embedding_size, user_size, item_size)

    estimator = Estimator.from_graph(inputs=[model.user, model.item],
                                     outputs=[model.class_number],
                                     labels=[model.label],
                                     loss=model.loss,
                                     optimizer=model.optim,
                                     model_dir=opt.model_dir,
                                     metrics={"loss": model.loss})

    estimator.fit(data=train_data,
                  batch_size=opt.batch_size,
                  epochs=opt.epochs,
                  validation_data=test_data)

    checkpoint_path = os.path.join(opt.model_dir, "NCF.ckpt")
    estimator.save_tf_checkpoint(checkpoint_path)
    estimator.sess.close()
Пример #26
0
def test_estimator_graph(estimator_for_spark_fixture):
    import zoo.orca.data.pandas

    sc = estimator_for_spark_fixture

    tf.reset_default_graph()

    model = SimpleModel()
    file_path = os.path.join(resource_path, "orca/learn/ncf.csv")
    data_shard = zoo.orca.data.pandas.read_csv(file_path, sc)

    def transform(df):
        result = {
            "x": (df['user'].to_numpy(), df['item'].to_numpy()),
            "y": df['label'].to_numpy()
        }
        return result

    data_shard = data_shard.transform_shard(transform)

    est = Estimator.from_graph(inputs=[model.user, model.item],
                               labels=[model.label],
                               outputs=[model.logits],
                               loss=model.loss,
                               optimizer=tf.train.AdamOptimizer(),
                               metrics={"loss": model.loss})
    est.fit(data=data_shard,
            batch_size=8,
            steps=10,
            validation_data=data_shard)

    data_shard = zoo.orca.data.pandas.read_csv(file_path, sc)

    def transform(df):
        result = {
            "x": (df['user'].to_numpy(), df['item'].to_numpy()),
        }
        return result

    data_shard = data_shard.transform_shard(transform)
    predictions = est.predict(data_shard).collect()
    print(predictions)
Пример #27
0
    def test_estimator_graph_predict_dataset(self):
        tf.reset_default_graph()

        model = SimpleModel()
        file_path = os.path.join(resource_path, "orca/learn/ncf.csv")
        data_shard = zoo.orca.data.pandas.read_csv(file_path)

        est = Estimator.from_graph(inputs=[model.user, model.item],
                                   outputs=[model.logits])

        def transform(df):
            result = {
                "x": (df['user'].to_numpy(), df['item'].to_numpy()),
            }
            return result

        data_shard = data_shard.transform_shard(transform)
        dataset = Dataset.from_tensor_slices(data_shard)
        predictions = est.predict(dataset).collect()
        assert len(predictions) == 10
    def test_estimator_keras_get_model(self):
        tf.reset_default_graph()

        model = self.create_model()
        sc = init_nncontext()
        sqlcontext = SQLContext(sc)
        file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv")
        df = sqlcontext.read.csv(file_path, header=True, inferSchema=True)
        from pyspark.sql.functions import array
        df = df.withColumn('user', array('user')) \
            .withColumn('item', array('item'))

        est = Estimator.from_keras(keras_model=model)
        est.fit(data=df,
                batch_size=4,
                epochs=4,
                feature_cols=['user', 'item'],
                label_cols=['label'],
                validation_data=df)
        assert est.get_model() is model
    def test_submodel_in_keras_squential(self):
        mnet = tf.keras.applications.MobileNetV2(input_shape=(160, 160, 3),
                                                 include_top=False,
                                                 weights='imagenet')

        model = tf.keras.Sequential([
            mnet,
            tf.keras.layers.GlobalAveragePooling2D(),
            tf.keras.layers.Dense(1, activation='sigmoid')
        ])

        model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001),
                      loss='binary_crossentropy',
                      metrics=['accuracy'])

        dataset = tf.data.Dataset.from_tensor_slices(
            (np.random.randn(16, 160, 160,
                             3), np.random.randint(0, 1000, (16, 1))))
        est = Estimator.from_keras(keras_model=model)
        est.fit(data=dataset, batch_size=4, epochs=1, validation_data=dataset)
    def test_estimator_keras_xshards(self):
        import zoo.orca.data.pandas

        tf.reset_default_graph()

        model = self.create_model()
        file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv")
        data_shard = zoo.orca.data.pandas.read_csv(file_path)

        def transform(df):
            result = {
                "x": (df['user'].to_numpy().reshape([-1, 1]),
                      df['item'].to_numpy().reshape([-1, 1])),
                "y":
                df['label'].to_numpy()
            }
            return result

        data_shard = data_shard.transform_shard(transform)

        est = Estimator.from_keras(keras_model=model)
        est.fit(data=data_shard,
                batch_size=8,
                epochs=10,
                validation_data=data_shard)

        eval_result = est.evaluate(data_shard)
        print(eval_result)

        data_shard = zoo.orca.data.pandas.read_csv(file_path)

        def transform(df):
            result = {
                "x": (df['user'].to_numpy().reshape([-1, 1]),
                      df['item'].to_numpy().reshape([-1, 1])),
            }
            return result

        data_shard = data_shard.transform_shard(transform)
        predictions = est.predict(data_shard).collect()
        assert predictions[0]['prediction'].shape[1] == 2