def ReportGradient(self, request, _): model_version_valid = self._validate_model_version( request.model_version ) res = elasticdl_pb2.ReportGradientResponse() if not model_version_valid: logger.warning( "Task result for outdated version %d dropped", request.model_version, ) res.accepted = False res.model_version = self._version return res # TODO: Update task queue with task_id with self._lock: tmp = {} indexed_grads = {} edl_embedding_gradients = {} # Do sanity check before accumulating gradients. for k, v in request.gradient.items(): if k not in self._model: if v.indices: # grads of ElasticDL Embedding layer # TODO: check arr.shape[1] = embedding_dim of this # EdlEmbedding layer arr = tensor_to_ndarray(v) edl_embedding_gradients[k] = arr continue else: raise ValueError( "Gradient key: %s is not part of model", k ) arr = tensor_to_ndarray(v) if isinstance(arr, tf.IndexedSlices): if arr.values.shape[1] != self._model[k].numpy().shape[1]: raise ValueError( "Gradient key: %s has incompatible " "indexed slice dimension %d, expected %d" % ( k, arr.values.shape[1], self._model[k].numpy().shape[1], ) ) max_index = tf.math.reduce_max(arr.indices).numpy() if max_index >= self._model[k].numpy().shape[0]: raise ValueError( "Gradient key: %s has wrong indices %d, " "out of range %d" % ( k, max_index, self._model[k].numpy().shape[0] - 1, ) ) indexed_grads[k] = arr else: if arr.shape != self._model[k].numpy().shape: raise ValueError( "Gradient key: %s has incompatible dimension", k ) tmp[k] = arr # grads of ElasticDL Embedding layer for k, v in edl_embedding_gradients.items(): if k in self._edl_embedding_gradients: self._edl_embedding_gradients[k] = merge_indexed_slices( self._edl_embedding_gradients[k], v ) else: self._edl_embedding_gradients[k] = v # grads of Keras Embedding layer for k, v in indexed_grads.items(): if k not in self._gradient_sum_indexed: self._gradient_sum_indexed[k] = v else: grads_s = self._gradient_sum_indexed[k] self._gradient_sum_indexed[k] = merge_indexed_slices( grads_s, v ) # other grads for k, v in tmp.items(): if not self._use_async and k in self._gradient_sum: self._gradient_sum[k] = self._gradient_sum[k] + v else: self._gradient_sum[k] = v self._grad_n += 1 if self._use_async or self._grad_n >= self._grad_to_wait: self._update_model() self._update_evaluation() self._update_checkpoint() res.accepted = True res.model_version = self._version return res
def ReportGradient(self, request, _): model_version_valid = self._use_async or self._validate_model_version( request.model_version ) res = elasticdl_pb2.ReportGradientResponse() if not model_version_valid: logger.warning( "Task result for outdated version %d dropped", request.model_version, ) res.accepted = False res.model_version = self._version return res tmp = {} indexed_grads = {} edl_embedding_gradients = {} # Do sanity check before accumulating gradients. for k, v in request.gradient.items(): if k not in self._model: if v.indices: # grads of ElasticDL Embedding layer # TODO: check arr.shape[1] = embedding_dim of this # EdlEmbedding layer arr = tensor_to_ndarray(v) edl_embedding_gradients[k] = arr continue else: raise ValueError( "Gradient key: %s is not part of model", k ) arr = tensor_to_ndarray(v) if isinstance(arr, tf.IndexedSlices): if arr.values.shape[1] != self._model[k].numpy().shape[1]: raise ValueError( "Gradient key: %s has incompatible " "indexed slice dimension %d, expected %d" % ( k, arr.values.shape[1], self._model[k].numpy().shape[1], ) ) max_index = tf.math.reduce_max(arr.indices).numpy() if max_index >= self._model[k].numpy().shape[0]: raise ValueError( "Gradient key: %s has wrong indices %d, " "out of range %d" % (k, max_index, self._model[k].numpy().shape[0] - 1) ) indexed_grads[k] = arr else: if arr.shape != self._model[k].numpy().shape: raise ValueError( "Gradient key: %s has incompatible dimension", k ) tmp[k] = arr if not self._use_async: self._lock.acquire() self._process_gradients( edl_embedding_gradients, indexed_grads, tmp, request.model_version ) if not self._use_async: self._lock.release() res.accepted = True res.model_version = self._version return res
def ReportGradient(self, request, _): model_version_valid = self._use_async or self._validate_model_version( request.model_version) res = elasticdl_pb2.ReportGradientResponse() if not model_version_valid: logger.warning( "Task result for outdated version %d dropped", request.model_version, ) res.accepted = False res.model_version = self._version return res non_embedding_gradients = {} indexed_grads = {} edl_embedding_gradients = {} # Do sanity check before accumulating gradients. for v in request.gradient: tensor = Tensor.from_tensor_pb(v) name = tensor.name if name not in self._model: if tensor.is_indexed_slices(): # grads of ElasticDL Embedding layer # TODO: check arr.shape[1] = embedding_dim of this # EdlEmbedding layer edl_embedding_gradients[name] = tensor.to_tf_tensor() continue else: raise ValueError("Gradient key: %s is not part of model", name) if tensor.is_indexed_slices(): if (tensor.values.shape[1] != self._model[name].numpy().shape[1]): raise ValueError( "Gradient key: %s has incompatible " "indexed slice dimension %d, expected %d" % ( name, tensor.values.shape[1], self._model[name].numpy().shape[1], )) max_index = tf.math.reduce_max(tensor.indices).numpy() if max_index >= self._model[name].numpy().shape[0]: raise ValueError( "Gradient key: %s has wrong indices %d, " "out of range %d" % ( name, max_index, self._model[name].numpy().shape[0] - 1, )) indexed_grads[name] = tensor.to_tf_tensor() else: if tensor.values.shape != self._model[name].numpy().shape: raise ValueError( "Gradient key: %s has incompatible dimension", name) non_embedding_gradients[name] = tensor.to_tf_tensor() if not self._use_async: self._lock.acquire() self._process_gradients( edl_embedding_gradients, indexed_grads, non_embedding_gradients, request.model_version, ) if not self._use_async: self._lock.release() res.accepted = True res.model_version = self._version return res