示例#1
0
    def test_tf_optimizer_metrics(self):

        features = np.random.randn(20, 10)
        labels = np.random.randint(0, 10, size=[20])
        with tf.Graph().as_default():
            dataset = TFDataset.from_ndarrays((features, labels),
                                              batch_size=4,
                                              val_tensors=(features, labels))
            feature_tensor, label_tensor = dataset.tensors
            features = tf.layers.dense(feature_tensor, 8)
            output = tf.layers.dense(features, 10)
            loss = tf.reduce_mean(tf.losses.
                                  sparse_softmax_cross_entropy(logits=output,
                                                               labels=label_tensor))
            optimizer = TFOptimizer.from_loss(loss, {"dense/": Adam(1e-3), "dense_1/": SGD(0.0)},
                                              val_outputs=[output],
                                              val_labels=[label_tensor],
                                              val_method=Accuracy(), metrics={"loss": loss})
            initial_weights = optimizer.tf_model.training_helper_layer.get_weights()
            optimizer.optimize(end_trigger=MaxEpoch(1))
            updated_weights = optimizer.tf_model.training_helper_layer.get_weights()
            for i in [0, 1]:  # weights and bias combined with "dense/" should be updated
                assert not np.allclose(initial_weights[i], updated_weights[i])
            for i in [2, 3]:  # weights and bias combined with "dense_1" should be unchanged
                assert np.allclose(initial_weights[i], updated_weights[i])
            optimizer.sess.close()
    def test_tf_optimizer_with_sparse_gradient_using_keras(self):
        import tensorflow as tf

        ids = np.random.randint(0, 10, size=[40])
        labels = np.random.randint(0, 5, size=[40])
        id_rdd = self.sc.parallelize(ids)
        label_rdd = self.sc.parallelize(labels)
        training_rdd = id_rdd.zip(label_rdd).map(lambda x: [x[0], x[1]])

        dataset = TFDataset.from_rdd(training_rdd,
                                     features=(tf.int32, []),
                                     labels=(tf.int32, []),
                                     batch_size=8)
        words_input = tf.keras.layers.Input(shape=(), name='words_input')
        embedding_layer = tf.keras.layers.Embedding(input_dim=10,
                                                    output_dim=5,
                                                    name='word_embedding')
        word_embeddings = embedding_layer(words_input)
        embedding = tf.keras.layers.Flatten()(word_embeddings)
        output = tf.keras.layers.Dense(5, activation="softmax")(embedding)
        model = tf.keras.models.Model(inputs=[words_input], outputs=[output])
        model.compile(optimizer="sgd", loss="sparse_categorical_crossentropy")

        optimizer = TFOptimizer.from_keras(model, dataset)
        optimizer.optimize()
示例#3
0
    def test_control_inputs(self):

        features = np.random.randn(20, 10)
        labels = np.random.randint(0, 10, size=[20])
        with tf.Graph().as_default():
            dataset = TFDataset.from_ndarrays((features, labels),
                                              batch_size=4,
                                              val_tensors=(features, labels))
            is_training = tf.placeholder(dtype=tf.bool, shape=())
            feature_tensor, label_tensor = dataset.tensors
            features = tf.layers.dense(feature_tensor, 8)
            features = tf.layers.dropout(features, training=is_training)
            output = tf.layers.dense(features, 10)
            loss = tf.reduce_mean(
                tf.losses.sparse_softmax_cross_entropy(logits=output,
                                                       labels=label_tensor))
            optimizer = TFOptimizer.from_loss(
                loss,
                Adam(),
                val_outputs=[output],
                val_labels=[label_tensor],
                val_method=Accuracy(),
                tensor_with_value={is_training: (True, False)},
                metrics={"loss": loss})
            optimizer.optimize(end_trigger=MaxEpoch(1))
            optimizer.sess.close()
示例#4
0
def main(max_epoch, data_num):
    sc = init_nncontext()

    # get data, pre-process and create TFDataset
    def get_data_rdd(dataset):
        (images_data,
         labels_data) = mnist.read_data_sets("/tmp/mnist", dataset)
        image_rdd = sc.parallelize(images_data[:data_num])
        labels_rdd = sc.parallelize(labels_data[:data_num])
        rdd = image_rdd.zip(labels_rdd) \
            .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD),
                                    np.array(rec_tuple[1])])
        return rdd

    training_rdd = get_data_rdd("train")
    testing_rdd = get_data_rdd("test")
    dataset = TFDataset.from_rdd(training_rdd,
                                 names=["features", "labels"],
                                 shapes=[[28, 28, 1], []],
                                 types=[tf.float32, tf.int32],
                                 batch_size=280,
                                 val_rdd=testing_rdd)

    # construct the model from TFDataset
    images, labels = dataset.tensors

    with slim.arg_scope(lenet.lenet_arg_scope()):
        logits, end_points = lenet.lenet(images,
                                         num_classes=10,
                                         is_training=True)

    loss = tf.reduce_mean(
        tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels))

    # create a optimizer
    optimizer = TFOptimizer(loss,
                            Adam(1e-3),
                            val_outputs=[logits],
                            val_labels=[labels],
                            val_method=Top1Accuracy(),
                            model_dir="/tmp/lenet/")
    # kick off training
    optimizer.optimize(end_trigger=MaxEpoch(max_epoch))

    saver = tf.train.Saver()
    saver.save(optimizer.sess, "/tmp/lenet/model")
示例#5
0
def main():
    sc = init_nncontext()

    global_batch_size = 256

    loss = create_model(creat_dataset(global_batch_size))

    optimizer = TFOptimizer.from_loss(loss, SGD(1e-3), model_dir="/tmp/lenet/")
    optimizer.optimize(end_trigger=MaxIteration(20))
示例#6
0
    def fit(self,
            data,
            epochs=1,
            batch_size=32,
            feature_cols=None,
            labels_cols=None,
            validation_data=None,
            hard_code_batch_size=False,
            session_config=None,
            feed_dict=None):

        assert self.labels is not None, \
            "labels is None; it should not be None in training"
        assert self.loss is not None, \
            "loss is None; it should not be None in training"
        assert self.optimizer is not None, \
            "optimizer is None; it should not be None in training"

        if isinstance(data, DataFrame):
            assert feature_cols is not None, \
                "feature columns is None; it should not be None in training"
            assert labels_cols is not None, \
                "label columns is None; it should not be None in training"

        dataset = to_dataset(data,
                             batch_size=batch_size,
                             batch_per_thread=-1,
                             validation_data=validation_data,
                             feature_cols=feature_cols,
                             labels_cols=labels_cols,
                             hard_code_batch_size=hard_code_batch_size,
                             sequential_order=False,
                             shuffle=True)

        if feed_dict is not None:
            tensor_with_value = {
                key: (value, value)
                for key, value in feed_dict.items()
            }
        else:
            tensor_with_value = None

        optimizer = TFOptimizer.from_train_op(
            train_op=self.train_op,
            loss=self.loss,
            inputs=self.inputs,
            labels=self.labels,
            dataset=dataset,
            metrics=self.metrics,
            updates=self.updates,
            sess=self.sess,
            tensor_with_value=tensor_with_value,
            session_config=session_config,
            model_dir=self.model_dir)

        optimizer.optimize(end_trigger=MaxEpoch(epochs))
        return self
示例#7
0
    def fit(self, data,
            epochs=1,
            batch_size=32,
            feature_cols=None,
            labels_cols=None,
            validation_data=None,
            hard_code_batch_size=False,
            session_config=None,
            checkpoint_trigger=None
            ):
        """
        Train this keras model with train data.
        :param data: train data. It can be XShards, Spark DataFrame, tf.data.Dataset.
        If data is XShards, each element needs to be {'x': a feature numpy array
         or a tuple of feature numpy arrays, 'y': a label numpy array or a tuple of
         label numpy arrays}
        If data is tf.data.Dataset, each element is [feature tensor tuple, label tensor tuple]
        :param epochs: number of epochs to train.
        :param batch_size: total batch size for each iteration.
        :param feature_cols: feature column names if train data is Spark DataFrame.
        :param labels_cols: label column names if train data is Spark DataFrame.
        :param validation_data: validation data. Validation data type should be the same
        as train data.
        :param hard_code_batch_size: whether hard code batch size for training. Default is False.
        :param session_config: tensorflow session configuration for training.
        Should be object of tf.ConfigProto
        :param checkpoint_trigger: when to trigger checkpoint during training.
        Should be bigdl optimzer trigger, like EveryEpoch(), SeveralIteration(num_iterations),etc.
        """

        if isinstance(data, DataFrame):
            assert feature_cols is not None, \
                "feature columns is None; it should not be None in training"
            assert labels_cols is not None, \
                "label columns is None; it should not be None in training"

        dataset = to_dataset(data, batch_size=batch_size, batch_per_thread=-1,
                             validation_data=validation_data,
                             feature_cols=feature_cols, labels_cols=labels_cols,
                             hard_code_batch_size=hard_code_batch_size,
                             sequential_order=False, shuffle=True
                             )

        self.tf_optimizer = TFOptimizer.from_keras(self.model.model, dataset,
                                                   model_dir=self.model.model_dir,
                                                   session_config=session_config,
                                                   metrics=self.metrics)

        if self.load_checkpoint:
            self.tf_optimizer.load_checkpoint(self.checkpoint_path, self.checkpoint_version)

        if self.log_dir and self.app_name:
            self.tf_optimizer.estimator.set_tensorboad(self.log_dir, self.app_name)

        self.tf_optimizer.optimize(MaxEpoch(epochs), checkpoint_trigger=checkpoint_trigger)

        return self
示例#8
0
    def fit(self,
            data,
            steps,
            batch_size=32,
            validation_data=None,
            feed_dict=None,
            session_config=None):

        assert self.labels is not None, \
            "labels is None; it should not be None in training"
        assert self.loss is not None, \
            "loss is None; it should not be None in training"
        assert self.optimizer is not None, \
            "optimizer is None; it not None in training"

        if isinstance(data, SparkXShards):
            dataset = _xshards_to_tf_dataset(
                data,
                batch_size=batch_size,
                validation_data_shard=validation_data)
        elif isinstance(data, Dataset):
            dataset = TFDataDataset2(data,
                                     batch_size=batch_size,
                                     batch_per_thread=-1,
                                     validation_dataset=validation_data)
        else:
            raise ValueError("data type {} is not supported; "
                             "it must be created by zoo.orca.data.package")

        if feed_dict is not None:
            tensor_with_value = {
                key: (value, value)
                for key, value in feed_dict.items()
            }
        else:
            tensor_with_value = None

        optimizer = TFOptimizer.from_train_op(
            train_op=self.train_op,
            loss=self.loss,
            inputs=self.inputs,
            labels=self.labels,
            dataset=dataset,
            metrics=self.metrics,
            updates=self.updates,
            sess=self.sess,
            tensor_with_value=tensor_with_value,
            session_config=session_config,
            model_dir=self.model_dir)

        optimizer.optimize(end_trigger=MaxIteration(steps))
        return self
示例#9
0
 def test_tfdataset_with_tfrecord(self):
     train_path = os.path.join(resource_path,
                               "tfrecord/mnist_train.tfrecord")
     test_path = os.path.join(resource_path, "tfrecord/mnist_test.tfrecord")
     dataset = TFDataset.from_tfrecord_file(self.sc,
                                            train_path,
                                            batch_size=16,
                                            validation_file_path=test_path)
     raw_bytes = dataset.tensors[0]
     images, labels = parse_fn(raw_bytes)
     flat = tf.layers.flatten(images)
     logits = tf.layers.dense(flat, 10)
     loss = tf.reduce_mean(
         tf.losses.sparse_softmax_cross_entropy(logits=logits,
                                                labels=labels))
     opt = TFOptimizer.from_loss(loss, Adam())
     opt.optimize()
示例#10
0
    def test_checkpoint(self):

        features = np.random.randn(20, 10)
        labels = np.random.randint(0, 10, size=[20])
        with tf.Graph().as_default():
            dataset = TFDataset.from_ndarrays((features, labels),
                                              batch_size=4,
                                              val_tensors=(features, labels))
            feature_tensor, label_tensor = dataset.tensors
            features = tf.layers.dense(feature_tensor, 8)
            output = tf.layers.dense(features, 10)
            loss = tf.reduce_mean(tf.losses.
                                  sparse_softmax_cross_entropy(logits=output,
                                                               labels=label_tensor))
            model_dir = tempfile.mkdtemp()
            try:
                optimizer = TFOptimizer.from_loss(loss, Adam(),
                                                  val_outputs=[output],
                                                  val_labels=[label_tensor],
                                                  val_method=Accuracy(),
                                                  metrics={"loss": loss}, model_dir=model_dir)
                optimizer.optimize(end_trigger=MaxEpoch(1))

                import re
                ckpt_path = None
                versions = []
                for (root, dirs, files) in os.walk(model_dir, topdown=True):
                    temp_versions = []
                    for file_name in files:
                        if re.match("^optimMethod-TFParkTraining\.[0-9]+$", file_name) is not None:
                            version = int(file_name.split(".")[1])
                            temp_versions.append(version)
                    if temp_versions:
                        ckpt_path = root
                        versions = temp_versions
                        break

                assert ckpt_path is not None, "Cannot fine checkpoint file"

                optimizer.load_checkpoint(ckpt_path, max(versions))
                optimizer.optimize(end_trigger=MaxEpoch(1))
                optimizer.sess.close()
            finally:
                import shutil
                shutil.rmtree(model_dir)
示例#11
0
    def test_tfdataset_with_tf_data_dataset_which_contains_bool(self):
        dataset = tf.data.Dataset.from_tensor_slices(
            (np.random.randn(102, 28, 28,
                             1), np.random.randint(0, 10, size=(102, )),
             np.ones(shape=(102, 28, 28, 1), dtype=np.bool)))
        dataset = TFDataset.from_tf_data_dataset(dataset, batch_size=16)

        feature, labels, mask = dataset.tensors

        float_mask = tf.to_float(mask)
        masked_feature = tf.to_float(feature) * float_mask
        flatten = tf.layers.flatten(masked_feature)
        logits = tf.layers.dense(flatten, 10)
        loss = tf.reduce_mean(
            tf.losses.sparse_softmax_cross_entropy(logits=logits,
                                                   labels=labels))
        opt = TFOptimizer.from_loss(loss, Adam())
        opt.optimize()
示例#12
0
def main(max_epoch, data_num):
    sc = init_nncontext()

    # get data, pre-process and create TFDataset
    def get_data_rdd(dataset):
        (images_data,
         labels_data) = mnist.read_data_sets("/tmp/mnist", dataset)
        image_rdd = sc.parallelize(images_data[:data_num])
        labels_rdd = sc.parallelize(labels_data[:data_num])
        rdd = image_rdd.zip(labels_rdd) \
            .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD),
                                    np.array(rec_tuple[1])])
        return rdd

    training_rdd = get_data_rdd("train")
    testing_rdd = get_data_rdd("test")
    dataset = TFDataset.from_rdd(training_rdd,
                                 names=["features", "labels"],
                                 shapes=[[28, 28, 1], []],
                                 types=[tf.float32, tf.int32],
                                 batch_size=280,
                                 val_rdd=testing_rdd)

    data = Input(shape=[28, 28, 1])

    x = Flatten()(data)
    x = Dense(64, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    predictions = Dense(10, activation='softmax')(x)

    model = Model(inputs=data, outputs=predictions)

    model.compile(optimizer='rmsprop',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    optimizer = TFOptimizer.from_keras(model,
                                       dataset,
                                       model_dir="/tmp/mnist_keras")

    # kick off training
    optimizer.optimize(end_trigger=MaxEpoch(max_epoch))

    model.save_weights("/tmp/mnist_keras/mnist_keras.h5")
示例#13
0
    def fit(self,
            data,
            epochs=1,
            batch_size=32,
            validation_data=None,
            session_config=None,
            feed_dict=None):

        assert self.labels is not None, \
            "labels is None; it should not be None in training"
        assert self.loss is not None, \
            "loss is None; it should not be None in training"
        assert self.optimizer is not None, \
            "optimizer is None; it not None in training"

        dataset = _to_dataset(data,
                              batch_size=batch_size,
                              batch_per_thread=-1,
                              validation_data=validation_data)

        if feed_dict is not None:
            tensor_with_value = {
                key: (value, value)
                for key, value in feed_dict.items()
            }
        else:
            tensor_with_value = None

        optimizer = TFOptimizer.from_train_op(
            train_op=self.train_op,
            loss=self.loss,
            inputs=self.inputs,
            labels=self.labels,
            dataset=dataset,
            metrics=self.metrics,
            updates=self.updates,
            sess=self.sess,
            tensor_with_value=tensor_with_value,
            session_config=session_config,
            model_dir=self.model_dir)

        optimizer.optimize(end_trigger=MaxEpoch(epochs))
        return self
示例#14
0
def main(max_epoch, data_num):
    sc = init_nncontext()

    # get data, pre-process and create TFDataset
    (train_images_data,
     train_labels_data) = mnist.read_data_sets("/tmp/mnist", "train")
    (test_images_data,
     test_labels_data) = mnist.read_data_sets("/tmp/mnist", "train")

    train_images_data = (train_images_data[:data_num] -
                         mnist.TRAIN_MEAN) / mnist.TRAIN_STD
    train_labels_data = train_labels_data[:data_num].astype(np.int)
    test_images_data = (test_images_data[:data_num] -
                        mnist.TRAIN_MEAN) / mnist.TRAIN_STD
    test_labels_data = (test_labels_data[:data_num]).astype(np.int)
    dataset = TFDataset.from_ndarrays(
        (train_images_data, train_labels_data),
        batch_size=360,
        val_tensors=(test_images_data, test_labels_data))

    # construct the model from TFDataset
    images, labels = dataset.tensors

    with slim.arg_scope(lenet.lenet_arg_scope()):
        logits, end_points = lenet.lenet(images,
                                         num_classes=10,
                                         is_training=True)

    loss = tf.reduce_mean(
        tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels))

    acc = accuracy(logits, labels)

    # create a optimizer
    optimizer = TFOptimizer.from_loss(loss,
                                      Adam(1e-3),
                                      metrics={"acc": acc},
                                      model_dir="/tmp/lenet/")
    # kick off training
    optimizer.optimize(end_trigger=MaxEpoch(max_epoch))

    saver = tf.train.Saver()
    saver.save(optimizer.sess, "/tmp/lenet/model")
示例#15
0
    def test_tf_optimizer_with_sparse_gradient(self):
        ids = np.random.randint(0, 10, size=[40])
        labels = np.random.randint(0, 5, size=[40])
        id_rdd = self.sc.parallelize(ids)
        label_rdd = self.sc.parallelize(labels)
        training_rdd = id_rdd.zip(label_rdd).map(lambda x: [x[0], x[1]])
        with tf.Graph().as_default():
            dataset = TFDataset.from_rdd(training_rdd,
                                         names=["ids", "labels"],
                                         shapes=[[], []],
                                         types=[tf.int32, tf.int32],
                                         batch_size=8)
            id_tensor, label_tensor = dataset.tensors
            embedding_table = tf.get_variable(name="word_embedding",
                                              shape=[10, 5])

            embedding = tf.nn.embedding_lookup(embedding_table, id_tensor)
            loss = tf.reduce_mean(
                tf.losses.sparse_softmax_cross_entropy(logits=embedding,
                                                       labels=label_tensor))
            optimizer = TFOptimizer.from_loss(loss, Adam(1e-3))
            optimizer.optimize(end_trigger=MaxEpoch(1))
            optimizer.sess.close()
示例#16
0
    def fit(self,
            data,
            epochs=1,
            batch_size=32,
            feature_cols=None,
            label_cols=None,
            validation_data=None,
            session_config=None,
            checkpoint_trigger=None,
            auto_shard_files=True):
        """
        Train this keras model with train data.

        :param data: train data. It can be XShards, Spark DataFrame, tf.data.Dataset.
               If data is XShards, each partition can be Pandas Dataframe or a dictionary of
               {'x': feature, 'y': label}, where feature(label) is a numpy array or a tuple of
               numpy arrays.
               If data is tf.data.Dataset, each element is [feature tensor tuple, label tensor
               tuple]
        :param epochs: number of epochs to train.
        :param batch_size: total batch size for each iteration.
        :param feature_cols: feature column names if train data is Spark DataFrame or XShards
         of Pandas DataFrame.
        :param label_cols: label column names if train data is Spark DataFrame or XShards of
        Pandas DataFrame.
        :param validation_data: validation data. Validation data type should be the same
               as train data.
        :param session_config: tensorflow session configuration for training.
               Should be object of tf.ConfigProto
        :param checkpoint_trigger: when to trigger checkpoint during training.
               Should be a zoo.orca.learn.trigger, like EveryEpoch(), SeveralIteration(
               num_iterations),etc.
        :param auto_shard_files: whether to automatically detect if the dataset is file-based and
               and apply sharding on files, otherwise sharding on records. Default is False.
        """

        if isinstance(data, DataFrame):
            assert feature_cols is not None, \
                "feature columns is None; it should not be None in training"
            assert label_cols is not None, \
                "label columns is None; it should not be None in training"

        if isinstance(data, tf.data.Dataset):
            assert isinstance(data.element_spec, tuple), \
                "If data is tf.data.Dataset, each element should be " \
                "(feature tensors, label tensor), where each feature/label tensor can be " \
                "either a single tensor or a tuple of tensors"
            if validation_data is not None:
                assert isinstance(validation_data, tf.data.Dataset), \
                    "train data and validation data should be both tf.data.Dataset"
                assert isinstance(validation_data.element_spec, tuple), \
                    "If validation_data is tf.data.Dataset, each element should be " \
                    "(feature tensors, label tensor), where each feature/label tensor can be " \
                    "either a single tensor or a tuple of tensors"

        if isinstance(data, SparkXShards):
            if data._get_class_name() == 'pandas.core.frame.DataFrame':
                assert feature_cols is not None, \
                    "feature columns is None; it should not be None in training"
                assert label_cols is not None, \
                    "label columns is None; it should not be None in training"
                data, validation_data = process_xshards_of_pandas_dataframe(
                    data, feature_cols, label_cols, validation_data, "fit")

        if checkpoint_trigger is not None:
            checkpoint_trigger = Trigger.convert_trigger(checkpoint_trigger)

        if is_tf_data_dataset(data):
            data = data.map(_standardize_keras_target_data)
            validation_data = validation_data.map(
                _standardize_keras_target_data)

        memory_type = OrcaContext.train_data_store
        dataset = to_dataset(data,
                             batch_size=batch_size,
                             batch_per_thread=-1,
                             validation_data=validation_data,
                             feature_cols=feature_cols,
                             label_cols=label_cols,
                             hard_code_batch_size=False,
                             sequential_order=False,
                             shuffle=True,
                             auto_shard_files=auto_shard_files,
                             memory_type=memory_type)

        self.tf_optimizer = TFOptimizer.from_keras(
            self.model.model,
            dataset,
            model_dir=self.model.model_dir,
            session_config=session_config,
            metrics=self.metrics,
            optimizer=self.optimizer)

        if self.clip_norm:
            self.tf_optimizer.set_gradient_clipping_by_l2_norm(
                clip_norm=self.clip_norm)
        if self.clip_min and self.clip_max:
            self.tf_optimizer.set_constant_gradient_clipping(
                self.clip_min, self.clip_max)

        if self.load_checkpoint:
            self.tf_optimizer.load_checkpoint(self.checkpoint_path,
                                              self.checkpoint_version)

        if self.log_dir and self.app_name:
            self.tf_optimizer.estimator.set_tensorboard(
                self.log_dir, self.app_name)

        self.tf_optimizer.optimize(MaxEpoch(epochs),
                                   checkpoint_trigger=checkpoint_trigger)

        return self
示例#17
0
    def fit(self,
            data,
            epochs=1,
            batch_size=32,
            feature_cols=None,
            label_cols=None,
            validation_data=None,
            session_config=None,
            checkpoint_trigger=None,
            auto_shard_files=False,
            feed_dict=None):
        """
        Train this graph model with train data.

        :param data: train data. It can be XShards, Spark DataFrame, tf.data.Dataset.
               If data is XShards, each partition can be Pandas Dataframe or a dictionary of
               {'x': feature, 'y': label}, where feature(label) is a numpy array or a tuple of
               numpy arrays.
               If data is tf.data.Dataset, each element is a tuple of input tensors.
        :param epochs: number of epochs to train.
        :param batch_size: total batch size for each iteration.
        :param feature_cols: feature column names if train data is Spark DataFrame or XShards
         of Pandas Dataframe.
        :param label_cols: label column names if train data is Spark DataFrame or XShards of
        Pandas Dataframe.
        :param validation_data: validation data. Validation data type should be the same
               as train data.
        :param auto_shard_files: whether to automatically detect if the dataset is file-based and
               and apply sharding on files, otherwise sharding on records. Default is False.
        :param session_config: tensorflow session configuration for training.
               Should be object of tf.ConfigProto
        :param feed_dict: a dictionary. The key is TensorFlow tensor, usually a
               placeholder, the value of the dictionary is a tuple of two elements. The first one of
               the tuple is the value to feed to the tensor in training phase and the second one
               is the value to feed to the tensor in validation phase.
        :param checkpoint_trigger: when to trigger checkpoint during training.
               Should be a zoo.orca.learn.trigger, like EveryEpoch(), SeveralIteration(
               num_iterations),etc.
        """

        assert self.labels is not None, \
            "labels is None; it should not be None in training"
        assert self.loss is not None, \
            "loss is None; it should not be None in training"
        assert self.optimizer is not None, \
            "optimizer is None; it should not be None in training"

        if isinstance(data, DataFrame):
            assert feature_cols is not None, \
                "feature columns is None; it should not be None in training"
            assert label_cols is not None, \
                "label columns is None; it should not be None in training"

        if isinstance(data, SparkXShards):
            if data._get_class_name() == 'pandas.core.frame.DataFrame':
                assert feature_cols is not None, \
                    "feature columns is None; it should not be None in training"
                assert label_cols is not None, \
                    "label columns is None; it should not be None in training"
                data, validation_data = process_xshards_of_pandas_dataframe(
                    data, feature_cols, label_cols, validation_data, "fit")

        if checkpoint_trigger is not None:
            checkpoint_trigger = Trigger.convert_trigger(checkpoint_trigger)

        memory_type = OrcaContext.train_data_store
        dataset = to_dataset(data,
                             batch_size=batch_size,
                             batch_per_thread=-1,
                             validation_data=validation_data,
                             feature_cols=feature_cols,
                             label_cols=label_cols,
                             hard_code_batch_size=False,
                             sequential_order=False,
                             shuffle=True,
                             auto_shard_files=auto_shard_files,
                             memory_type=memory_type)

        if feed_dict is not None:
            tensor_with_value = {
                key: (value[0], value[1])
                for key, value in feed_dict.items()
            }
        else:
            tensor_with_value = None

        if self.use_bigdl_optim:
            self.tf_optimizer = TFOptimizer.from_loss(
                self.loss,
                self.optimizer,
                session=self.sess,
                inputs=(self.inputs, self.labels),
                dataset=dataset,
                clip_norm=self.clip_norm,
                clip_value=self.clip_value,
                metrics=self.metrics,
                tensor_with_value=tensor_with_value,
                session_config=session_config,
                model_dir=self.model_dir,
                updates=self.updates)
        else:

            self.tf_optimizer = TFOptimizer.from_train_op(
                train_op=self.train_op,
                loss=self.loss,
                inputs=self.inputs,
                labels=self.labels,
                dataset=dataset,
                metrics=self.metrics,
                updates=self.updates,
                sess=self.sess,
                tensor_with_value=tensor_with_value,
                session_config=session_config,
                model_dir=self.model_dir)

        if self.load_checkpoint:
            self.tf_optimizer.load_checkpoint(self.checkpoint_path,
                                              self.checkpoint_version)

        if self.log_dir and self.app_name:
            self.tf_optimizer.estimator.set_tensorboard(
                self.log_dir, self.app_name)

        self.tf_optimizer.optimize(end_trigger=MaxEpoch(epochs),
                                   checkpoint_trigger=checkpoint_trigger)
        return self
示例#18
0
    def fit(self,
            data,
            epochs=1,
            batch_size=32,
            feature_cols=None,
            labels_cols=None,
            validation_data=None,
            hard_code_batch_size=False,
            session_config=None,
            feed_dict=None,
            checkpoint_trigger=None):
        """
        Train this graph model with train data.
        :param data: train data. It can be XShards, Spark DataFrame, tf.data.Dataset.
        If data is XShards, each element needs to be {'x': a feature numpy array
         or a tuple of feature numpy arrays, 'y': a label numpy array or a tuple of
         label numpy arrays}
        If data is tf.data.Dataset, each element is a tuple of input tensors.
        :param epochs: number of epochs to train.
        :param batch_size: total batch size for each iteration.
        :param feature_cols: feature column names if train data is Spark DataFrame.
        :param labels_cols: label column names if train data is Spark DataFrame.
        :param validation_data: validation data. Validation data type should be the same
        as train data.
        :param hard_code_batch_size: whether hard code batch size for training. Default is False.
        :param session_config: tensorflow session configuration for training.
        Should be object of tf.ConfigProto
        :param feed_dict: a dictionary. The key is TensorFlow tensor, usually a
        placeholder, the value of the dictionary is a tuple of two elements. The first one of
        the tuple is the value to feed to the tensor in training phase and the second one
        is the value to feed to the tensor in validation phase.
        :param checkpoint_trigger: when to trigger checkpoint during training.
        Should be bigdl optimzer trigger, like EveryEpoch(), SeveralIteration(num_iterations),etc.
        """

        assert self.labels is not None, \
            "labels is None; it should not be None in training"
        assert self.loss is not None, \
            "loss is None; it should not be None in training"
        assert self.optimizer is not None, \
            "optimizer is None; it should not be None in training"

        if isinstance(data, DataFrame):
            assert feature_cols is not None, \
                "feature columns is None; it should not be None in training"
            assert labels_cols is not None, \
                "label columns is None; it should not be None in training"

        dataset = to_dataset(data,
                             batch_size=batch_size,
                             batch_per_thread=-1,
                             validation_data=validation_data,
                             feature_cols=feature_cols,
                             labels_cols=labels_cols,
                             hard_code_batch_size=hard_code_batch_size,
                             sequential_order=False,
                             shuffle=True)

        if feed_dict is not None:
            tensor_with_value = {
                key: (value[0], value[1])
                for key, value in feed_dict.items()
            }
        else:
            tensor_with_value = None

        self.tf_optimizer = TFOptimizer.from_train_op(
            train_op=self.train_op,
            loss=self.loss,
            inputs=self.inputs,
            labels=self.labels,
            dataset=dataset,
            metrics=self.metrics,
            updates=self.updates,
            sess=self.sess,
            tensor_with_value=tensor_with_value,
            session_config=session_config,
            model_dir=self.model_dir)

        if self.load_checkpoint:
            self.tf_optimizer.load_checkpoint(self.checkpoint_path,
                                              self.checkpoint_version)

        if self.log_dir and self.app_name:
            self.tf_optimizer.estimator.set_tensorboad(self.log_dir,
                                                       self.app_name)

        self.tf_optimizer.optimize(end_trigger=MaxEpoch(epochs),
                                   checkpoint_trigger=checkpoint_trigger)
        return self
示例#19
0
    lrSchedule.add(Poly(0.5, maxIteration), polyIteration)
    optim = SGD(learningrate=options.learningRate, learningrate_decay=0.0,
                weightdecay=options.weightDecay, momentum=0.9, dampening=0.0,
                nesterov=False,
                leaningrate_schedule=lrSchedule)

    if options.maxEpoch:
        checkpoint_trigger = EveryEpoch()
        end_trigger = MaxEpoch(options.maxEpoch)
    else:
        checkpoint_trigger = SeveralIteration(options.checkpointIteration)
        end_trigger = MaxIteration(options.maxIteration)

    optimizer = TFOptimizer.from_loss(loss, optim,
                                      val_outputs=[logits],
                                      val_labels=[zero_based_label],
                                      val_method=[Accuracy(), Top5Accuracy(), Loss()],
                                      tensor_with_value={is_training: [True, False]},
                                      model_dir="/tmp/logs")

    if options.resumeTrainingCheckpoint is not None:
        assert options.resumeTrainingVersion is not None,\
            "--resumeTrainingVersion must be specified when --resumeTrainingCheckpoint is."
        optimizer.load_checkpoint(options.resumeTrainingCheckpoint,
                                  options.resumeTrainingVersion)

    optimizer.optimize(end_trigger=end_trigger, checkpoint_trigger=checkpoint_trigger)

    if options.checkpoint:
        saver = tf.train.Saver()
        saver.save(optimizer.sess, options.checkpoint)
示例#20
0
    def train(self, input_fn, end_trigger):

        with tf.Graph().as_default() as g:

            dataset = input_fn()

            generator_inputs = dataset.tensors[0]
            real_data = dataset.tensors[1]

            counter = tf.train.get_or_create_global_step()

            period = self._discriminator_steps + self._generator_steps

            is_discriminator_phase = tf.less(tf.mod(counter, period), self._discriminator_steps)

            with tf.variable_scope("Generator"):
                gen_data = self._call_fn_maybe_with_counter(self._generator_fn, counter,
                                                            generator_inputs)

            with tf.variable_scope("Discriminator"):
                fake_d_outputs = self._call_fn_maybe_with_counter(self._discriminator_fn,
                                                                  counter,
                                                                  gen_data, generator_inputs)

            with tf.variable_scope("Discriminator", reuse=True):
                real_d_outputs = self._call_fn_maybe_with_counter(self._discriminator_fn,
                                                                  counter,
                                                                  real_data, generator_inputs)

            with tf.name_scope("Generator_loss"):
                generator_loss = self._call_fn_maybe_with_counter(self._generator_loss_fn,
                                                                  counter,
                                                                  fake_d_outputs)
                gen_reg_loss = tf.losses.get_regularization_loss("Generator")

                generator_loss = generator_loss + gen_reg_loss

            with tf.name_scope("Discriminator_loss"):
                discriminator_loss = self._call_fn_maybe_with_counter(self._discriminator_loss_fn,
                                                                      counter,
                                                                      real_d_outputs,
                                                                      fake_d_outputs)
                dis_reg_loss = tf.losses.get_regularization_loss("Discriminator")
                discriminator_loss = discriminator_loss + dis_reg_loss

            generator_variables = tf.trainable_variables("Generator")
            discriminator_variables = tf.trainable_variables("Discriminator")

            def run_gen_compute():
                gen_grads_vars = self._gen_opt.compute_gradients(generator_loss,
                                                                 var_list=generator_variables)
                gen_grads = [grad for grad, var in gen_grads_vars]
                dis_grads = [tf.zeros_like(var) for var in discriminator_variables]

                return gen_grads + dis_grads

            def run_dis_compute():
                dis_grads_vars = self._gen_opt.compute_gradients(discriminator_loss,
                                                                 var_list=discriminator_variables)
                dis_grads = [grad for grad, var in dis_grads_vars]
                gen_gards = [tf.zeros_like(var) for var in generator_variables]
                return gen_gards + dis_grads

            grads = tf.cond(is_discriminator_phase, run_dis_compute, run_gen_compute)

            grads_vars = list(zip(grads, generator_variables + discriminator_variables))

            gen_grads_vars = grads_vars[:len(generator_variables)]
            dis_grads_vars = grads_vars[len(generator_variables):]

            grads = [grad for grad, var in grads_vars]

            _train_op = tf.cond(is_discriminator_phase,
                                lambda: self._dis_opt.apply_gradients(dis_grads_vars),
                                lambda: self._gen_opt.apply_gradients(gen_grads_vars))

            variables = generator_variables + discriminator_variables

            loss = tf.cond(is_discriminator_phase,
                           lambda: discriminator_loss,
                           lambda: generator_loss)

            with tf.control_dependencies([_train_op]):
                increase_counter = tf.assign_add(counter, 1)

            with tf.control_dependencies([increase_counter]):
                train_op = tf.no_op()

            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                saver = tf.train.Saver()
                kpt = tf.train.latest_checkpoint(self.model_dir)
                if kpt is not None:
                    saver.restore(sess, kpt)
                opt = TFOptimizer._from_grads(loss, sess,
                                              inputs=nest.flatten(dataset._original_tensors),
                                              labels=[],
                                              grads=grads, variables=variables, dataset=dataset,
                                              optim_method=FakeOptimMethod(),
                                              session_config=self._session_config,
                                              model_dir=os.path.join(self.model_dir, "tmp"),
                                              train_op=train_op)
                opt.optimize(end_trigger)
                saver = tf.train.Saver()
                saver.save(sess, self.checkpoint_path, global_step=counter)
示例#21
0
    def fit(
        self,
        data,
        epochs=1,
        batch_size=32,
        feature_cols=None,
        labels_cols=None,
        validation_data=None,
        hard_code_batch_size=False,
        session_config=None,
        checkpoint_trigger=None,
        auto_shard_files=True,
    ):
        """
        Train this keras model with train data.
        :param data: train data. It can be XShards, Spark DataFrame, tf.data.Dataset.
        If data is XShards, each element needs to be {'x': a feature numpy array
         or a tuple of feature numpy arrays, 'y': a label numpy array or a tuple of
         label numpy arrays}
        If data is tf.data.Dataset, each element is [feature tensor tuple, label tensor tuple]
        :param epochs: number of epochs to train.
        :param batch_size: total batch size for each iteration.
        :param feature_cols: feature column names if train data is Spark DataFrame.
        :param labels_cols: label column names if train data is Spark DataFrame.
        :param validation_data: validation data. Validation data type should be the same
        as train data.
        :param hard_code_batch_size: whether hard code batch size for training. Default is False.
        :param session_config: tensorflow session configuration for training.
        Should be object of tf.ConfigProto
        :param checkpoint_trigger: when to trigger checkpoint during training.
        Should be a zoo.orca.learn.trigger, like EveryEpoch(), SeveralIteration(num_iterations),etc.
        """

        if isinstance(data, DataFrame):
            assert feature_cols is not None, \
                "feature columns is None; it should not be None in training"
            assert labels_cols is not None, \
                "label columns is None; it should not be None in training"

        if isinstance(data, tf.data.Dataset):
            assert isinstance(data.element_spec, tuple), \
                "If data is tf.data.Dataset, each element should be " \
                "(feature tensors, label tensor), where each feature/label tensor can be " \
                "either a single tensor or a tuple of tensors"
            if validation_data is not None:
                assert isinstance(validation_data, tf.data.Dataset), \
                    "train data and validation data should be both tf.data.Dataset"
                assert isinstance(validation_data.element_spec, tuple), \
                    "If validation_data is tf.data.Dataset, each element should be " \
                    "(feature tensors, label tensor), where each feature/label tensor can be " \
                    "either a single tensor or a tuple of tensors"

        if checkpoint_trigger is not None:
            checkpoint_trigger = Trigger.convert_trigger(checkpoint_trigger)

        if is_tf_data_dataset(data):
            data = data.map(_standardize_keras_target_data)
            validation_data = validation_data.map(
                _standardize_keras_target_data)

        dataset = to_dataset(data,
                             batch_size=batch_size,
                             batch_per_thread=-1,
                             validation_data=validation_data,
                             feature_cols=feature_cols,
                             labels_cols=labels_cols,
                             hard_code_batch_size=hard_code_batch_size,
                             sequential_order=False,
                             shuffle=True,
                             auto_shard_files=auto_shard_files)

        if isinstance(dataset, TFNdarrayDataset):
            dataset = _standarize_feature_label_dataset(
                dataset, self.model.model)

        self.tf_optimizer = TFOptimizer.from_keras(
            self.model.model,
            dataset,
            model_dir=self.model.model_dir,
            session_config=session_config,
            metrics=self.metrics,
            optimizer=self.optimizer)

        if self.clip_norm:
            self.tf_optimizer.set_gradient_clipping_by_l2_norm(
                clip_norm=self.clip_norm)
        if self.clip_min and self.clip_max:
            self.tf_optimizer.set_constant_gradient_clipping(
                self.clip_min, self.clip_max)

        if self.load_checkpoint:
            self.tf_optimizer.load_checkpoint(self.checkpoint_path,
                                              self.checkpoint_version)

        if self.log_dir and self.app_name:
            self.tf_optimizer.estimator.set_tensorboard(
                self.log_dir, self.app_name)

        self.tf_optimizer.optimize(MaxEpoch(epochs),
                                   checkpoint_trigger=checkpoint_trigger)

        return self
示例#22
0
    def train(self, dataset, end_trigger):

        with tf.Graph().as_default() as g:

            generator_inputs = dataset.tensors[0]
            real_data = dataset.tensors[1]

            counter = tf.Variable(0, dtype=tf.int32)

            period = self._discriminator_steps + self._generator_steps

            is_discriminator_phase = tf.less(tf.mod(counter, period),
                                             self._discriminator_steps)

            with tf.variable_scope("generator"):
                gen_data = self._call_fn_maybe_with_counter(
                    self._generator_fn, counter, generator_inputs)

            with tf.variable_scope("discriminator"):
                fake_d_outputs = self._call_fn_maybe_with_counter(
                    self._discriminator_fn, counter, gen_data,
                    generator_inputs)

            with tf.variable_scope("discriminator", reuse=True):
                real_d_outputs = self._call_fn_maybe_with_counter(
                    self._discriminator_fn, counter, real_data,
                    generator_inputs)

            with tf.name_scope("generator_loss"):
                generator_loss = self._call_fn_maybe_with_counter(
                    self._generator_loss_fn, counter, fake_d_outputs)

            with tf.name_scope("discriminator_loss"):
                discriminator_loss = self._call_fn_maybe_with_counter(
                    self._discriminator_loss_fn, counter, real_d_outputs,
                    fake_d_outputs)

            generator_variables = tf.trainable_variables("generator")
            generator_grads = tf.gradients(generator_loss, generator_variables)
            discriminator_variables = tf.trainable_variables("discriminator")
            discriminator_grads = tf.gradients(discriminator_loss,
                                               discriminator_variables)

            variables = generator_variables + discriminator_variables

            def true_fn():
                return [tf.zeros_like(grad) for grad in generator_grads]

            def false_fn():
                return generator_grads

            g_grads = tf.cond(is_discriminator_phase,
                              true_fn=true_fn,
                              false_fn=false_fn)
            d_grads = tf.cond(
                is_discriminator_phase, lambda: discriminator_grads,
                lambda: [tf.zeros_like(grad) for grad in discriminator_grads])
            loss = tf.cond(is_discriminator_phase, lambda: discriminator_loss,
                           lambda: generator_loss)

            grads = g_grads + d_grads

            with tf.control_dependencies(grads):
                increase_counter = tf.assign_add(counter, 1)

            g_param_size = sum([np.product(g.shape) for g in g_grads])
            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                tf_model = TFModel.create_for_unfreeze(
                    loss,
                    sess,
                    inputs=dataset._original_tensors,
                    grads=grads,
                    variables=variables,
                    graph=g,
                    tensors_with_value=None,
                    session_config=None,
                    metrics=None,
                    updates=[increase_counter],
                    model_dir=self.checkpoint_path)

                optimizer = TFOptimizer(tf_model,
                                        GanOptimMethod(
                                            self._discriminator_optim_method,
                                            self._generator_optim_method,
                                            g_param_size.value,
                                            self._discriminator_steps,
                                            self._generator_steps),
                                        sess=sess,
                                        dataset=dataset,
                                        model_dir=self.checkpoint_path)
                optimizer.optimize(end_trigger)
                steps = sess.run(counter)
                saver = tf.train.Saver()
                saver.save(optimizer.sess,
                           self.checkpoint_path,
                           global_step=steps)