Exemplo n.º 1
0
    def _get_arguments_from_loss(loss, optim_method, session, val_outputs,
                                 val_labels, val_method):
        import tensorflow as tf
        if session is None:
            sess = tf.Session()
            sess.run(tf.global_variables_initializer())
        else:
            sess = session
        grads_vars = tf.train.GradientDescentOptimizer(0).compute_gradients(
            loss)
        variables = []
        grads = []
        from zoo.util.tf import process_grad
        for (grad, var) in grads_vars:
            variables.append(var)
            grad = process_grad(grad)
            grads.append(grad)

        all_required_inputs = _find_placeholders([loss])
        dataset = tf.get_collection(all_required_inputs[0].name)[0]

        inputs = dataset.tensors

        _check_the_same(all_required_inputs, inputs)

        return (loss, optim_method, sess, dataset, inputs, grads, variables,
                loss.graph, val_outputs, val_labels, val_method)
Exemplo n.º 2
0
 def compute_gradients(self, *args, **kwargs):
     """Compute gradients of all trainable variables.
     See Optimizer.compute_gradients() for more info.
     In DistributedOptimizer, compute_gradients() is overriden to also
     allreduce the gradients before returning them.
     """
     gradients = self._optimizer.compute_gradients(*args, **kwargs)
     results = []
     for grad_var in gradients:
         grad = grad_var[0]
         var = grad_var[1]
         grad = process_grad(grad)
         with tf.control_dependencies([var]):
             grad_i = tf.identity(grad, name="zoo_identity_op_for_grad")
         results.append((grad_i, var))
     return results
Exemplo n.º 3
0
def get_gradients_for_keras(optimizer, loss, params):
    from tensorflow.python.util import nest
    from tensorflow.python.keras import backend
    from tensorflow.python.ops import gradients
    from tensorflow.python.ops import clip_ops
    from tensorflow.python.keras.optimizers import TFOptimizer

    params = nest.flatten(params)
    if isinstance(optimizer, TFOptimizer):
        scope_name = optimizer.optimizer._name
    else:
        scope_name = optimizer._name

    with backend.get_graph().as_default(), backend.name_scope(scope_name +
                                                              "/gradients"):
        grads = gradients.gradients(loss, params)

        all_reduced_grads = []
        for grad, param in zip(grads, params):
            if grad is None:
                raise ValueError(
                    "Variable {} has `None` for gradient. "
                    "Please make sure that all of your ops have a "
                    "gradient defined (i.e. are differentiable). "
                    "Common ops without gradient: "
                    "K.argmax, K.round, K.eval.".format(param))
            grad = process_grad(grad)

            with tf.control_dependencies([param]):
                grad_i = tf.identity(grad, name="zoo_identity_op_for_grad")

            all_reduced_grads.append(grad_i)

        grads = all_reduced_grads

        if hasattr(optimizer, "clipnorm"):
            grads = [
                clip_ops.clip_by_norm(g, optimizer.clipnorm) for g in grads
            ]
        if hasattr(optimizer, "clipvalue"):
            grads = [
                clip_ops.clip_by_value(g, -optimizer.clipvalue,
                                       optimizer.clipvalue) for g in grads
            ]
    return grads
Exemplo n.º 4
0
    def _process_grads(graph, grads):

        with graph.as_default():
            from zoo.util.tf import process_grad
            grads = [process_grad(grad) for grad in grads]
        return grads
Exemplo n.º 5
0
    def __init__(self, loss, optim_method, sess=None, dataset=None, inputs=None,
                 grads=None, variables=None, graph=None,
                 val_outputs=None, val_labels=None, val_method=None, val_split=0.0,
                 tensors_with_value=None, session_config=None):
        '''
        TFOptimizer is used for distributed training of TensorFlow
        on Spark/BigDL.

        :param loss: The loss tensor of the TensorFlow model, should be a scalar
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param sess: the current TensorFlow Session, if you want to used a pre-trained model, you
        should use the Session to load the pre-trained variables and pass it to TFOptimizer.
        '''

        import tensorflow as tf
        from tensorflow.python.util import nest
        from zoo.util.tf import export_tf

        if dataset is None:
            args = TFOptimizer._get_arguments_from_loss(loss, optim_method, sess,
                                                        val_outputs, val_labels, val_method)
            loss, optim_method, sess, dataset, inputs = args[:5]
            grads, variables, graph, val_outputs, val_labels, val_method = args[5:]

        additional_inputs = []
        additional_values = []
        all_required_inputs = _find_placeholders([loss])
        all_required_inputs_names = [v.name for v in all_required_inputs]
        if tensors_with_value:
            for t, v in tensors_with_value.items():
                if t.name in all_required_inputs_names:
                    additional_inputs.append(t)
                    additional_values.append(v)

        if not isinstance(inputs, list):
            inputs = nest.flatten(inputs)

        self.optim_method = optim_method
        self.sess = sess
        self.dataset = dataset
        self.inputs = inputs + additional_inputs
        self.graph = graph
        self.session_config = session_config

        from zoo.util.tf import process_grad
        grads = [process_grad(grad) for grad in grads]

        if self.dataset.batch_size <= 0:
            raise ValueError("You should set batch_size instead of batch_per_thread for training")

        if val_outputs is not None and val_labels is not None:
            with self.graph.as_default():
                val_labels = [tf.identity(v) for v in val_labels]
            outputs = val_outputs + val_labels + [loss]
        else:
            outputs = [loss]

        self.grads = grads
        self.outputs = outputs

        self.export_dir = tempfile.mkdtemp()
        export_tf(self.sess, self.export_dir,
                  inputs=self.inputs,
                  outputs=self.grads + self.outputs)

        variable_names = [v.name for v in variables]
        grad_names = [g.name for g in grads]
        output_names = [o.name for o in outputs]

        def to_floats(vs):
            return [float(v) for v in vs]

        meta = {
            "input_names": [i.name for i in self.inputs],
            "output_names": output_names,
            "variables": variable_names,
            "grad_variables": grad_names,
            "default_tensor_values": [to_floats(v) for v in additional_values]
        }

        with open(os.path.join(self.export_dir, "training_meta.json"), "w") as f:
            f.write(json.dumps(meta))

        self.variable_placeholders = []
        with self.graph.as_default():
            assigns = []
            for v in variables:
                p = tf.placeholder(dtype=tf.float32, shape=v.shape)
                a = tf.assign(v, p)
                self.variable_placeholders.append(p)
                assigns.append(a)
            assign = tf.group(*assigns)
        self.assign = assign
        try:
            self.training_helper_layer = TFTrainingHelper(self.export_dir, session_config)
        except Py4JJavaError as e:
            if "expects to be colocated with unknown node" in str(e):
                raise Exception("""
If you are using the embedding layer in tf.keras, then this is a
known issue of TensorFlow, see https://github.com/tensorflow/tensorflow/issues/21889.
Please add zoo.util.tf.variable_creator_scope before model construction.
For example:
from zoo.util.tf import variable_creator_scope
with variable_creator_scope():
    model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(1, 1, input_length=1)])
                """)
            else:
                raise e

        data = self.dataset.rdd
        batch_size = self.dataset.batch_size

        def to_sample(t):
            if isinstance(t, list):
                t = tuple(t)
            return Sample.from_ndarray(nest.flatten(t), [np.array([0.0])])

        sample_rdd = data.map(to_sample)
        if val_outputs is not None and val_labels is not None:
            if self.dataset.val_rdd is not None:
                val_rdd = self.dataset.val_rdd.map(to_sample)
                val_method = [TFValidationMethod(m, len(val_outputs), len(val_labels))
                              for m in to_list(val_method)]
                training_rdd = sample_rdd

            elif val_split != 0.0:
                training_rdd, val_rdd = sample_rdd.randomSplit([1 - val_split, val_split])
                val_method = [TFValidationMethod(m, len(val_outputs), len(val_labels))
                              for m in to_list(val_method)]
            else:
                raise ValueError("Validation data is not specified. Please set " +
                                 "val rdd in TFDataset, or set val_split larger than zero")

            self.optimizer = Optimizer.create(self.training_helper_layer,
                                              training_rdd,
                                              IdentityCriterion(),
                                              batch_size=batch_size,
                                              optim_method=self.optim_method)
            self.optimizer.set_validation(self.dataset.batch_size,
                                          val_rdd,
                                          EveryEpoch(),
                                          val_method)
        else:
            training_rdd = sample_rdd
            self.optimizer = Optimizer.create(self.training_helper_layer,
                                              training_rdd,
                                              IdentityCriterion(),
                                              batch_size=batch_size,
                                              optim_method=self.optim_method)
Exemplo n.º 6
0
    def __init__(self,
                 loss,
                 optim_method,
                 sess=None,
                 val_outputs=None,
                 val_labels=None,
                 val_method=None):
        import tensorflow as tf
        from zoo.util.tf import export_tf
        '''
        TFOptimizer is used for distributed training of tensorflow
        on Spark/BigDL.

        :param loss: The loss tensor of the tensorflow model, should be a scalar
        :param optim_method: the optimization method to be used, such as bigdl.optim.optimizer.Adam
        :param sess: the current tensorflow Session, if you want to used a pre-trained model, you
        should use the Session to load the pre-trained variables and pass it to TFOptimizer.
        '''
        self.optim_method = optim_method
        if sess is None:
            self.sess = tf.Session()
            self.sess.run(tf.global_variables_initializer())
        else:
            self.sess = sess
        grads_vars = tf.train.GradientDescentOptimizer(0).compute_gradients(
            loss)
        variables = []
        grads = []
        from zoo.util.tf import process_grad
        for (grad, var) in grads_vars:
            variables.append(var)
            grad = process_grad(grad)
            grads.append(grad)
        self.export_dir = tempfile.mkdtemp()
        all_required_inputs = _find_placeholders([loss])
        self.dataset = tf.get_collection(all_required_inputs[0].name)[0]
        if self.dataset.batch_size <= 0:
            raise ValueError(
                "You should set batch_size instead of batch_per_thread for training"
            )
        self.inputs = self.dataset.tensors

        _check_the_same(all_required_inputs, self.inputs)

        if val_outputs is not None and val_labels is not None:
            outputs = val_outputs + val_labels + [loss]
        else:
            outputs = [loss]

        export_tf(self.sess,
                  self.export_dir,
                  inputs=self.inputs,
                  outputs=grads + outputs)

        variable_names = [v.name for v in variables]
        grad_names = [g.name for g in grads]
        output_names = [o.name for o in outputs]

        meta = {
            "input_names": [i.name for i in self.inputs],
            "output_names": output_names,
            "variables": variable_names,
            "grad_variables": grad_names
        }

        with open(os.path.join(self.export_dir, "training_meta.json"),
                  "w") as f:
            f.write(json.dumps(meta))

        self.training_helper_layer = TFTrainingHelper(self.export_dir)

        self.variable_placeholders = []
        assigns = []
        for v in variables:
            p = tf.placeholder(dtype=tf.float32, shape=v.shape)
            a = tf.assign(v, p)
            self.variable_placeholders.append(p)
            assigns.append(a)
        self.assign = tf.group(*assigns)

        data = self.dataset.rdd
        batch_size = self.dataset.batch_size
        sample_rdd = data.map(
            lambda t: Sample.from_ndarray(t, [np.array([0.0])]))

        self.optimizer = Optimizer.create(self.training_helper_layer,
                                          sample_rdd,
                                          IdentityCriterion(),
                                          batch_size=batch_size,
                                          optim_method=self.optim_method)

        if val_outputs is not None and val_labels is not None:
            val_sample_rdd = self.dataset.val_rdd\
                .map(lambda t: Sample.from_ndarray(t, [np.array([0.0])]))
            val_method = TFValidationMethod(val_method, len(val_outputs),
                                            len(val_labels))
            self.optimizer.set_validation(self.dataset.batch_size,
                                          val_sample_rdd, EveryEpoch(),
                                          val_method)
Exemplo n.º 7
0
    def create(loss, sess, inputs, grads, variables, graph, tensors_with_value,
               session_config, metrics):

        import tensorflow as tf
        from zoo.util.tf import export_tf
        additional_inputs = []
        additional_values = []
        all_required_inputs = _find_placeholders([loss])
        all_required_inputs_names = [v.name for v in all_required_inputs]
        if tensors_with_value:
            for t, v in tensors_with_value.items():
                if t.name in all_required_inputs_names:
                    additional_inputs.append(t)
                    additional_values.append(v)

        if not isinstance(inputs, list):
            inputs = nest.flatten(inputs)

        inputs = inputs + additional_inputs

        if session_config is not None:
            import tensorflow as tf
            assert isinstance(session_config, tf.ConfigProto),\
                "session_config should be a tf.ConfigProto"
            session_config.use_per_session_threads = True
        session_config = session_config

        from zoo.util.tf import process_grad
        grads = [process_grad(grad) for grad in grads]

        outputs = []
        val_methods = None
        if metrics is not None:
            idx = 0
            val_methods = []
            for metric_name in metrics:
                metric = metrics[metric_name]
                if tf.is_numeric_tensor(metric):
                    outputs.append(metric)
                    val_methods.append(StatelessMetric(metric_name, idx))
                    idx += 1
                else:
                    outputs += metric.outputs
                    with graph.as_default():
                        val_labels = [tf.identity(v) for v in metric.labels]
                    outputs += val_labels
                    method = TFValidationMethod(
                        metric.val_method, metric_name,
                        list(range(idx, idx + len(metric.outputs))),
                        list(
                            range(idx + len(metric.outputs), idx +
                                  len(metric.outputs) + len(val_labels))))
                    val_methods.append(method)
                    idx += len(metric.outputs) + len(val_labels)
            with graph.as_default():
                real_batch_size = tf.shape(inputs[0])[0]
            outputs.append(real_batch_size)

        outputs.append(loss)

        export_dir = tempfile.mkdtemp()
        export_tf(sess, export_dir, inputs=inputs, outputs=grads + outputs)

        variable_names = [v.name for v in variables]
        grad_names = [g.name for g in grads]
        output_names = [o.name for o in outputs]

        def to_floats(vs):
            return [float(v) for v in vs]

        meta = {
            "input_names": [i.name for i in inputs],
            "output_names": output_names,
            "variables": variable_names,
            "grad_variables": grad_names,
            "default_tensor_values": [to_floats(v) for v in additional_values]
        }

        with open(os.path.join(export_dir, "training_meta.json"), "w") as f:
            f.write(json.dumps(meta))
        variable_placeholders = []
        with graph.as_default():
            assigns = []
            for v in variables:
                p = tf.placeholder(dtype=tf.float32, shape=v.shape)
                a = tf.assign(v, p)
                variable_placeholders.append(p)
                assigns.append(a)
            assign = tf.group(*assigns)
        assign = assign
        training_helper_layer = TFTrainingHelper(export_dir, session_config,
                                                 assign, variable_placeholders)

        criterion = IdentityCriterion()

        return TFModel(training_helper_layer, criterion, val_methods)