Пример #1
0
    def __init__(self, loss, optim_method, sess=None, dataset=None, inputs=None,
                 grads=None, variables=None, graph=None,
                 val_outputs=None, val_labels=None, val_method=None, val_split=0.0,
                 tensors_with_value=None, session_config=None):
        '''
        TFOptimizer is used for distributed training of TensorFlow
        on Spark/BigDL.

        :param loss: The loss tensor of the TensorFlow model, should be a scalar
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param sess: the current TensorFlow Session, if you want to used a pre-trained model, you
        should use the Session to load the pre-trained variables and pass it to TFOptimizer.
        '''

        import tensorflow as tf
        from tensorflow.python.util import nest
        from zoo.util.tf import export_tf

        if dataset is None:
            args = TFOptimizer._get_arguments_from_loss(loss, optim_method, sess,
                                                        val_outputs, val_labels, val_method)
            loss, optim_method, sess, dataset, inputs = args[:5]
            grads, variables, graph, val_outputs, val_labels, val_method = args[5:]

        additional_inputs = []
        additional_values = []
        all_required_inputs = _find_placeholders([loss])
        all_required_inputs_names = [v.name for v in all_required_inputs]
        if tensors_with_value:
            for t, v in tensors_with_value.items():
                if t.name in all_required_inputs_names:
                    additional_inputs.append(t)
                    additional_values.append(v)

        if not isinstance(inputs, list):
            inputs = nest.flatten(inputs)

        self.optim_method = optim_method
        self.sess = sess
        self.dataset = dataset
        self.inputs = inputs + additional_inputs
        self.graph = graph
        self.session_config = session_config

        from zoo.util.tf import process_grad
        grads = [process_grad(grad) for grad in grads]

        if self.dataset.batch_size <= 0:
            raise ValueError("You should set batch_size instead of batch_per_thread for training")

        if val_outputs is not None and val_labels is not None:
            with self.graph.as_default():
                val_labels = [tf.identity(v) for v in val_labels]
            outputs = val_outputs + val_labels + [loss]
        else:
            outputs = [loss]

        self.grads = grads
        self.outputs = outputs

        self.export_dir = tempfile.mkdtemp()
        export_tf(self.sess, self.export_dir,
                  inputs=self.inputs,
                  outputs=self.grads + self.outputs)

        variable_names = [v.name for v in variables]
        grad_names = [g.name for g in grads]
        output_names = [o.name for o in outputs]

        def to_floats(vs):
            return [float(v) for v in vs]

        meta = {
            "input_names": [i.name for i in self.inputs],
            "output_names": output_names,
            "variables": variable_names,
            "grad_variables": grad_names,
            "default_tensor_values": [to_floats(v) for v in additional_values]
        }

        with open(os.path.join(self.export_dir, "training_meta.json"), "w") as f:
            f.write(json.dumps(meta))

        self.variable_placeholders = []
        with self.graph.as_default():
            assigns = []
            for v in variables:
                p = tf.placeholder(dtype=tf.float32, shape=v.shape)
                a = tf.assign(v, p)
                self.variable_placeholders.append(p)
                assigns.append(a)
            assign = tf.group(*assigns)
        self.assign = assign
        try:
            self.training_helper_layer = TFTrainingHelper(self.export_dir, session_config)
        except Py4JJavaError as e:
            if "expects to be colocated with unknown node" in str(e):
                raise Exception("""
If you are using the embedding layer in tf.keras, then this is a
known issue of TensorFlow, see https://github.com/tensorflow/tensorflow/issues/21889.
Please add zoo.util.tf.variable_creator_scope before model construction.
For example:
from zoo.util.tf import variable_creator_scope
with variable_creator_scope():
    model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(1, 1, input_length=1)])
                """)
            else:
                raise e

        data = self.dataset.rdd
        batch_size = self.dataset.batch_size

        def to_sample(t):
            if isinstance(t, list):
                t = tuple(t)
            return Sample.from_ndarray(nest.flatten(t), [np.array([0.0])])

        sample_rdd = data.map(to_sample)
        if val_outputs is not None and val_labels is not None:
            if self.dataset.val_rdd is not None:
                val_rdd = self.dataset.val_rdd.map(to_sample)
                val_method = [TFValidationMethod(m, len(val_outputs), len(val_labels))
                              for m in to_list(val_method)]
                training_rdd = sample_rdd

            elif val_split != 0.0:
                training_rdd, val_rdd = sample_rdd.randomSplit([1 - val_split, val_split])
                val_method = [TFValidationMethod(m, len(val_outputs), len(val_labels))
                              for m in to_list(val_method)]
            else:
                raise ValueError("Validation data is not specified. Please set " +
                                 "val rdd in TFDataset, or set val_split larger than zero")

            self.optimizer = Optimizer.create(self.training_helper_layer,
                                              training_rdd,
                                              IdentityCriterion(),
                                              batch_size=batch_size,
                                              optim_method=self.optim_method)
            self.optimizer.set_validation(self.dataset.batch_size,
                                          val_rdd,
                                          EveryEpoch(),
                                          val_method)
        else:
            training_rdd = sample_rdd
            self.optimizer = Optimizer.create(self.training_helper_layer,
                                              training_rdd,
                                              IdentityCriterion(),
                                              batch_size=batch_size,
                                              optim_method=self.optim_method)
Пример #2
0
    def __init__(self,
                 loss,
                 optim_method,
                 sess=None,
                 val_outputs=None,
                 val_labels=None,
                 val_method=None):
        import tensorflow as tf
        from zoo.util.tf import export_tf
        '''
        TFOptimizer is used for distributed training of tensorflow
        on Spark/BigDL.

        :param loss: The loss tensor of the tensorflow model, should be a scalar
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param sess: the current tensorflow Session, if you want to used a pre-trained model, you
        should use the Session to load the pre-trained variables and pass it to TFOptimizer.
        '''
        self.optim_method = optim_method
        if sess is None:
            self.sess = tf.Session()
            self.sess.run(tf.global_variables_initializer())
        else:
            self.sess = sess
        grads_vars = tf.train.GradientDescentOptimizer(0).compute_gradients(
            loss)
        variables = []
        grads = []
        for (grad, var) in grads_vars:
            variables.append(var)
            grads.append(grad)
        self.export_dir = tempfile.mkdtemp()
        all_required_inputs = _find_placeholders([loss])
        self.dataset = tf.get_collection(all_required_inputs[0].name)[0]
        if self.dataset.batch_size <= 0:
            raise ValueError(
                "You should set batch_size instead of batch_per_thread for training"
            )
        self.inputs = self.dataset.tensors

        _check_the_same(all_required_inputs, self.inputs)

        if val_outputs is not None and val_labels is not None:
            outputs = val_outputs + val_labels + [loss]
        else:
            outputs = [loss]

        export_tf(self.sess,
                  self.export_dir,
                  inputs=self.inputs,
                  outputs=grads + outputs)

        variable_names = [v.name for v in variables]
        grad_names = [g.name for g in grads]
        output_names = [o.name for o in outputs]

        meta = {
            "input_names": [i.name for i in self.inputs],
            "output_names": output_names,
            "variables": variable_names,
            "grad_variables": grad_names
        }

        with open(os.path.join(self.export_dir, "training_meta.json"),
                  "w") as f:
            f.write(json.dumps(meta))

        self.training_helper_layer = TFTrainingHelper(self.export_dir)

        self.variable_placeholders = []
        assigns = []
        for v in variables:
            p = tf.placeholder(dtype=tf.float32, shape=v.shape)
            a = tf.assign(v, p)
            self.variable_placeholders.append(p)
            assigns.append(a)
        self.assign = tf.group(*assigns)

        data = self.dataset.rdd
        batch_size = self.dataset.batch_size
        sample_rdd = data.map(
            lambda t: Sample.from_ndarray(t, [np.array([0.0])]))

        self.optimizer = Optimizer.create(self.training_helper_layer,
                                          sample_rdd,
                                          IdentityCriterion(),
                                          batch_size=batch_size,
                                          optim_method=self.optim_method)

        if val_outputs is not None and val_labels is not None:
            val_sample_rdd = self.dataset.val_rdd\
                .map(lambda t: Sample.from_ndarray(t, [np.array([0.0])]))
            val_method = TFValidationMethod(val_method, len(val_outputs),
                                            len(val_labels))
            self.optimizer.set_validation(self.dataset.batch_size,
                                          val_sample_rdd, EveryEpoch(),
                                          val_method)
Пример #3
0
    def __init__(self,
                 loss,
                 optim_method,
                 sess=None,
                 dataset=None,
                 inputs=None,
                 grads=None,
                 variables=None,
                 graph=None,
                 val_outputs=None,
                 val_labels=None,
                 val_method=None,
                 val_split=0.0,
                 tensors_with_value=None,
                 session_config=None,
                 clip_norm=None,
                 clip_value=None,
                 metrics=None):
        '''
        TFOptimizer is used for distributed training of TensorFlow
        on Spark/BigDL.

        :param loss: The loss tensor of the TensorFlow model, should be a scalar
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param sess: the current TensorFlow Session, if you want to used a pre-trained model, you
        should use the Session to load the pre-trained variables and pass it to TFOptimizer.
        '''

        if dataset is None:
            args = TFOptimizer._get_arguments_from_loss(
                loss, optim_method, sess, val_outputs, val_labels, val_method)
            loss, optim_method, sess, dataset, inputs = args[:5]
            grads, variables, graph, val_outputs, val_labels, val_method = args[
                5:]

        self.optim_method = optim_method
        self.sess = sess
        self.dataset = dataset
        self.graph = graph

        self.clip_norm = clip_norm
        if clip_value is not None and not isinstance(clip_value, tuple):
            raise ValueError(
                "The clip_value argument should be a tuple (min_value, max_value)"
            )
        self.clip_constant = clip_value

        if self.dataset.batch_size <= 0:
            raise ValueError(
                "You should set batch_size instead of batch_per_thread for training"
            )

        if val_method is not None:
            val_methods = to_list(val_method)
            if metrics is None:
                metrics = {}

            for i, method in enumerate(val_methods):
                metrics['bigdl_metirc_' + str(i)] = BigDLMetric(
                    method, val_outputs, val_labels)

        self.tf_model = TFModel.create(loss, sess, inputs, grads, variables,
                                       graph, tensors_with_value,
                                       session_config, metrics)

        batch_size = self.dataset.batch_size

        sample_rdd = self.dataset.get_training_data()

        if val_split != 0.0:
            training_rdd, val_rdd = sample_rdd.randomSplit(
                [1 - val_split, val_split])
        else:
            training_rdd = sample_rdd
            val_rdd = self.dataset.get_validation_data()

        if self.tf_model.val_methods is not None and val_rdd is not None:

            self.optimizer = Optimizer.create(
                self.tf_model.training_helper_layer,
                training_rdd,
                IdentityCriterion(),
                batch_size=batch_size,
                optim_method=self.optim_method)
            self.optimizer.set_validation(self.dataset.batch_size, val_rdd,
                                          EveryEpoch(),
                                          self.tf_model.val_methods)
        else:
            self.optimizer = Optimizer.create(
                self.tf_model.training_helper_layer,
                training_rdd,
                IdentityCriterion(),
                batch_size=batch_size,
                optim_method=self.optim_method)

        if self.clip_norm:
            self.optimizer.set_gradclip_l2norm(self.clip_norm)
        if self.clip_constant:
            min_value, max_value = self.clip_constant
            self.optimizer.set_gradclip_const(min_value, max_value)
Пример #4
0
    def __init__(self, loss, optim_method, sess=None):
        self.optim_method = optim_method
        if sess is None:
            self.sess = tf.Session()
            self.sess.run(tf.global_variables_initializer())
        else:
            self.sess = sess
        grads_vars = tf.train.GradientDescentOptimizer(0).compute_gradients(
            loss)
        variables = []
        grads = []
        for (grad, var) in grads_vars:
            variables.append(var)
            grads.append(grad)
        self.export_dir = tempfile.mkdtemp()
        all_required_inputs = _find_placeholders([loss])
        self.dataset = tf.get_collection(all_required_inputs[0].name)[0]
        if self.dataset.batch_size <= 0:
            raise ValueError(
                "You should set batch_size instead of batch_per_core for training"
            )
        self.inputs = self.dataset.tensors

        _check_the_same(all_required_inputs, self.inputs)

        export_tf(self.sess,
                  self.export_dir,
                  inputs=self.inputs,
                  outputs=grads + [loss])

        variable_names = [v.name for v in variables]
        grad_names = [g.name for g in grads]

        meta = {
            "input_names": [i.name for i in self.inputs],
            "output_names": [loss.name],
            "variables": variable_names,
            "grad_variables": grad_names
        }

        with open(os.path.join(self.export_dir, "training_meta.json"),
                  "w") as f:
            f.write(json.dumps(meta))

        self.training_helper_layer = TFTrainingHelper(self.export_dir)

        self.variable_placeholders = []
        assigns = []
        for v in variables:
            p = tf.placeholder(dtype=tf.float32, shape=v.shape)
            a = tf.assign(v, p)
            self.variable_placeholders.append(p)
            assigns.append(a)
        self.assign = tf.group(*assigns)

        data = self.dataset.rdd
        batch_size = self.dataset.batch_size
        sample_rdd = data.map(
            lambda t: Sample.from_ndarray(t, [np.array([0.0])]))

        self.optimizer = Optimizer.create(self.training_helper_layer,
                                          sample_rdd,
                                          IdentityCriterion(),
                                          batch_size=batch_size,
                                          optim_method=self.optim_method)
Пример #5
0
    def __init__(self,
                 loss,
                 optim_method,
                 sess=None,
                 dataset=None,
                 inputs=None,
                 grads=None,
                 variables=None,
                 graph=None,
                 val_outputs=None,
                 val_labels=None,
                 val_method=None,
                 add_sample_weights_num=0):
        import tensorflow as tf
        from zoo.util.tf import export_tf
        '''
        TFOptimizer is used for distributed training of tensorflow
        on Spark/BigDL.

        :param loss: The loss tensor of the tensorflow model, should be a scalar
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param sess: the current tensorflow Session, if you want to used a pre-trained model, you
        should use the Session to load the pre-trained variables and pass it to TFOptimizer.
        '''

        if dataset is None:
            args = TFOptimizer._get_arguments_from_loss(
                loss, optim_method, sess, val_outputs, val_labels, val_method)
            loss, optim_method, sess, dataset, inputs = args[:5]
            grads, variables, graph, val_outputs, val_labels, val_method = args[
                5:]

        self.optim_method = optim_method
        self.sess = sess
        self.dataset = dataset
        self.inputs = inputs
        self.graph = graph

        if self.dataset.batch_size <= 0:
            raise ValueError(
                "You should set batch_size instead of batch_per_thread for training"
            )

        if val_outputs is not None and val_labels is not None:
            with self.graph.as_default():
                val_labels = [tf.identity(v) for v in val_labels]
            outputs = val_outputs + val_labels + [loss]
        else:
            outputs = [loss]

        self.export_dir = tempfile.mkdtemp()
        export_tf(self.sess,
                  self.export_dir,
                  inputs=self.inputs,
                  outputs=grads + outputs)

        variable_names = [v.name for v in variables]
        grad_names = [g.name for g in grads]
        output_names = [o.name for o in outputs]

        meta = {
            "input_names": [i.name for i in self.inputs],
            "output_names": output_names,
            "variables": variable_names,
            "grad_variables": grad_names
        }

        with open(os.path.join(self.export_dir, "training_meta.json"),
                  "w") as f:
            f.write(json.dumps(meta))

        self.variable_placeholders = []
        with self.graph.as_default():
            assigns = []
            for v in variables:
                p = tf.placeholder(dtype=tf.float32, shape=v.shape)
                a = tf.assign(v, p)
                self.variable_placeholders.append(p)
                assigns.append(a)
            assign = tf.group(*assigns)
        self.assign = assign

        self.training_helper_layer = TFTrainingHelper(self.export_dir)

        data = self.dataset.rdd
        batch_size = self.dataset.batch_size
        sample_rdd = data.map(lambda t: Sample.from_ndarray(
            t + [np.array(1.0)] * add_sample_weights_num, [np.array([0.0])]))

        self.optimizer = Optimizer.create(self.training_helper_layer,
                                          sample_rdd,
                                          IdentityCriterion(),
                                          batch_size=batch_size,
                                          optim_method=self.optim_method)

        if val_outputs is not None and val_labels is not None:
            val_sample_rdd = self.dataset.val_rdd\
                .map(lambda t: Sample.from_ndarray(t + [np.array(1.0)] * add_sample_weights_num,
                                                   [np.array([0.0])]))
            val_method = [
                TFValidationMethod(m, len(val_outputs), len(val_labels))
                for m in to_list(val_method)
            ]
            self.optimizer.set_validation(self.dataset.batch_size,
                                          val_sample_rdd, EveryEpoch(),
                                          val_method)