def testtensor_to_ndarray(self): t = elasticdl_pb2.Tensor() # No dim defined, should raise. self.assertRaises(ValueError, tensor_to_ndarray, t) # Empty array, should be ok. t.dim.append(0) t.content = b"" arr = tensor_to_ndarray(t) np.testing.assert_array_equal(np.array([], dtype=np.float32), arr) # Pathological case, one of the dimensions is 0. del t.dim[:] t.dim.extend([2, 0, 1, 9]) t.content = b"" arr = tensor_to_ndarray(t) np.testing.assert_array_equal( np.ndarray(shape=[2, 0, 1, 9], dtype=np.float32), arr) t.content = b"\0" * (4 * 12) # Wrong content size, should raise del t.dim[:] t.dim.extend([11]) self.assertRaises(ValueError, tensor_to_ndarray, t) # Compatible dimensions, should be ok. for m in (1, 2, 3, 4, 6, 12): del t.dim[:] t.content = b"\0" * (4 * 12) t.dim.extend([m, 12 // m]) arr = tensor_to_ndarray(t)
def report_evaluation_metrics(self, evaluation_version, evaluation_metrics): if (self.model_version >= 0 and evaluation_version != self.model_version): logger.error( "Drop a wrong version evaluation: request %d, receive %d" % (self.model_version, evaluation_version)) return False for k, v in evaluation_metrics.items(): if k in self._evaluation_metrics: self._evaluation_metrics[k] += tensor_to_ndarray(v) else: self._evaluation_metrics[k] = np.copy(tensor_to_ndarray(v)) self._completed_minibatches += 1 return True
def report_evaluation_metrics(self, evaluation_version, model_outputs, labels): if (self.model_version >= 0 and evaluation_version != self.model_version): logger.error( "Drop a wrong version evaluation: request %d, receive %d" % (self.model_version, evaluation_version)) return False labels = tensor_to_ndarray(labels) for key, tensor in model_outputs.items(): metrics = self._metrics_dict.get(key, {}) if not metrics: continue outputs = tensor_to_ndarray(tensor) for metric_inst in metrics.values(): metric_inst.update_state(labels, outputs) return True
def get_model(self, version, method): """ get model from master, and update model_version """ req = elasticdl_pb2.GetModelRequest() req.version = version req.method = method model = self._stub.GetModel(req) for var in self._model.trainable_variables: # Assumes all trainable variables exist in model.param. var.assign(tensor_to_ndarray(model.param[var.name])) self._model_version = model.version
def _init_model_from_tensor_dict(self, tensor_dict): assert tensor_dict for name, val in tensor_dict.items(): self.set_model_var(name, tensor_to_ndarray(val))
def ReportGradient(self, request, _): model_version_valid = self._use_async or self._validate_model_version( request.model_version ) res = elasticdl_pb2.ReportGradientResponse() if not model_version_valid: logger.warning( "Task result for outdated version %d dropped", request.model_version, ) res.accepted = False res.model_version = self._version return res tmp = {} indexed_grads = {} edl_embedding_gradients = {} # Do sanity check before accumulating gradients. for k, v in request.gradient.items(): if k not in self._model: if v.indices: # grads of ElasticDL Embedding layer # TODO: check arr.shape[1] = embedding_dim of this # EdlEmbedding layer arr = tensor_to_ndarray(v) edl_embedding_gradients[k] = arr continue else: raise ValueError( "Gradient key: %s is not part of model", k ) arr = tensor_to_ndarray(v) if isinstance(arr, tf.IndexedSlices): if arr.values.shape[1] != self._model[k].numpy().shape[1]: raise ValueError( "Gradient key: %s has incompatible " "indexed slice dimension %d, expected %d" % ( k, arr.values.shape[1], self._model[k].numpy().shape[1], ) ) max_index = tf.math.reduce_max(arr.indices).numpy() if max_index >= self._model[k].numpy().shape[0]: raise ValueError( "Gradient key: %s has wrong indices %d, " "out of range %d" % (k, max_index, self._model[k].numpy().shape[0] - 1) ) indexed_grads[k] = arr else: if arr.shape != self._model[k].numpy().shape: raise ValueError( "Gradient key: %s has incompatible dimension", k ) tmp[k] = arr if not self._use_async: self._lock.acquire() self._process_gradients( edl_embedding_gradients, indexed_grads, tmp, request.model_version ) if not self._use_async: self._lock.release() res.accepted = True res.model_version = self._version return res
def testGetModel(self): master = MasterServicer( 2, 3, None, None, init_var=[], checkpoint_filename_for_init="", checkpoint_service=CheckpointService("", 0, 0, False), evaluation_service=None, ) master.set_model_var("x", np.array([1.0, 1.0], dtype=np.float32)) # Now master model is version 0 self.assertEqual(0, master._version) # Get version 0 with minimum method req = elasticdl_pb2.GetModelRequest() req.version = 0 req.method = elasticdl_pb2.MINIMUM model = master.GetModel(req, None) self.assertEqual(0, model.version) self.assertEqual(["x"], list(model.param.keys())) np.testing.assert_array_equal( np.array([1.0, 1.0]), tensor_to_ndarray(model.param["x"]) ) # Increase master model version to 1, but still request # version 0 with minimum method, we should get version 1 master._version = 1 master.set_model_var("x", np.array([2.0, 2.0], dtype=np.float32)) master.set_model_var("y", np.array([12.0, 13.0], dtype=np.float32)) model = master.GetModel(req, None) self.assertEqual(1, model.version) self.assertEqual(["x", "y"], list(sorted(model.param.keys()))) np.testing.assert_array_equal( np.array([2.0, 2.0]), tensor_to_ndarray(model.param["x"]) ) np.testing.assert_array_equal( np.array([12.0, 13.0]), tensor_to_ndarray(model.param["y"]) ) # Try to get version 2, it should raise exception. req.version = 2 self.assertRaises(ValueError, master.GetModel, req, None) # Get fixed version 1 req.method = elasticdl_pb2.FIXED req.version = 1 model = master.GetModel(req, None) self.assertEqual(1, model.version) self.assertEqual(["x", "y"], list(sorted(model.param.keys()))) np.testing.assert_array_equal( np.array([2.0, 2.0]), tensor_to_ndarray(model.param["x"]) ) np.testing.assert_array_equal( np.array([12.0, 13.0]), tensor_to_ndarray(model.param["y"]) ) # Previous model unavailable due to no checkpoint req.version = 0 model = master.GetModel(req, None) self.assertFalse(model.param) # Previous model available through checkpoint with tempfile.TemporaryDirectory() as tempdir: chk_dir = os.path.join(tempdir, "testGetModel") os.makedirs(chk_dir) req.version = master._version req.method = elasticdl_pb2.MINIMUM model = master.GetModel(req, None) master._checkpoint_service = CheckpointService( chk_dir, 2, 5, False ) master._checkpoint_service.save(master._version, model, False) master._version = 2 master.set_model_var("z", np.array([2.0, 2.0], dtype=np.float32)) req.version = 1 req.method = elasticdl_pb2.FIXED model = master.GetModel(req, None) self.assertEqual(1, model.version) self.assertEqual(["x", "y"], list(sorted(model.param.keys()))) np.testing.assert_array_equal( np.array([2.0, 2.0]), tensor_to_ndarray(model.param["x"]) ) np.testing.assert_array_equal( np.array([12.0, 13.0]), tensor_to_ndarray(model.param["y"]) )
def verify(a): b = tensor_to_ndarray(ndarray_to_tensor(a)) np.testing.assert_array_equal(a, b)
def ReportGradient(self, request, _): model_version_valid = self._validate_model_version( request.model_version ) res = elasticdl_pb2.ReportGradientResponse() if not model_version_valid: logger.warning( "Task result for outdated version %d dropped", request.model_version, ) res.accepted = False res.model_version = self._version return res # TODO: Update task queue with task_id with self._lock: tmp = {} indexed_grads = {} edl_embedding_gradients = {} # Do sanity check before accumulating gradients. for k, v in request.gradient.items(): if k not in self._model: if v.indices: # grads of ElasticDL Embedding layer # TODO: check arr.shape[1] = embedding_dim of this # EdlEmbedding layer arr = tensor_to_ndarray(v) edl_embedding_gradients[k] = arr continue else: raise ValueError( "Gradient key: %s is not part of model", k ) arr = tensor_to_ndarray(v) if isinstance(arr, tf.IndexedSlices): if arr.values.shape[1] != self._model[k].numpy().shape[1]: raise ValueError( "Gradient key: %s has incompatible " "indexed slice dimension %d, expected %d" % ( k, arr.values.shape[1], self._model[k].numpy().shape[1], ) ) max_index = tf.math.reduce_max(arr.indices).numpy() if max_index >= self._model[k].numpy().shape[0]: raise ValueError( "Gradient key: %s has wrong indices %d, " "out of range %d" % ( k, max_index, self._model[k].numpy().shape[0] - 1, ) ) indexed_grads[k] = arr else: if arr.shape != self._model[k].numpy().shape: raise ValueError( "Gradient key: %s has incompatible dimension", k ) tmp[k] = arr # grads of ElasticDL Embedding layer for k, v in edl_embedding_gradients.items(): if k in self._edl_embedding_gradients: self._edl_embedding_gradients[k] = merge_indexed_slices( self._edl_embedding_gradients[k], v ) else: self._edl_embedding_gradients[k] = v # grads of Keras Embedding layer for k, v in indexed_grads.items(): if k not in self._gradient_sum_indexed: self._gradient_sum_indexed[k] = v else: grads_s = self._gradient_sum_indexed[k] self._gradient_sum_indexed[k] = merge_indexed_slices( grads_s, v ) # other grads for k, v in tmp.items(): if not self._use_async and k in self._gradient_sum: self._gradient_sum[k] = self._gradient_sum[k] + v else: self._gradient_sum[k] = v self._grad_n += 1 if self._use_async or self._grad_n >= self._grad_to_wait: self._update_model() self._update_evaluation() self._update_checkpoint() res.accepted = True res.model_version = self._version return res