def test_dictionarize(self): v = { "gpu_capacity": make_resource("gpu", {"r1": "8"}), "user_status": { "username": "******", "user_gpu": make_resource("gpu", {"r1": "4"}), }, "node_status": [{ "gpu_used": make_resource("gpu", {"r1": "6"}), }], "none_field": None, } expected = { "gpu_capacity": { "r1": 8.0 }, "user_status": { "username": "******", "user_gpu": { "r1": 4.0 }, }, "node_status": [{ "gpu_used": { "r1": 6.0 } }], "none_field": None, } result = dictionarize(v) self.assertEqual(expected, result)
def test_ne(self): v1 = make_resource(self.r_type, {"r1": "1", "r2": "0"}) v2 = make_resource(self.r_type, {"r1": "1"}) self.assertFalse(v1 != v2) v1 = make_resource(self.r_type, {"r1": "1", "r2": "0"}) v2 = make_resource(self.r_type, {"r1": "1", "r2": "1"}) self.assertTrue(v1 != v2)
def normalize(self, r_type, request, limit): if request is None and limit is not None: request = limit elif request is not None and limit is None: limit = request elif request is not None and limit is not None: request_res = make_resource(r_type, {self.sku: request}) limit_res = make_resource(r_type, {self.sku: limit}) if request_res >= limit_res: request = limit return request, limit
def test_ge(self): v = make_resource(self.r_type, {"r1": "1", "r2": "0"}) self.assertTrue(v >= 0) self.assertFalse(v >= 1) v1 = make_resource(self.r_type, {"r1": "1", "r2": "2"}) v2 = make_resource(self.r_type, {"r1": "1"}) self.assertTrue(v1 >= v2) v1 = make_resource(self.r_type, {"r1": "1", "r2": "2"}) v2 = make_resource(self.r_type, {"r1": "2", "r2": "1"}) self.assertFalse(v1 >= v2)
def test_div_by_zero(self): # scalar v1 = make_resource(self.r_type, {"r1": "1", "r2": "2"}) result = v1 / 0 expected = make_resource(self.r_type, {"r1": "0", "r2": "0"}) self.assertEqual(expected, result) # v1 * v2 v1 = make_resource(self.r_type, {"r1": "1", "r2": "2"}) v2 = make_resource(self.r_type, {"r1": "4", "r2": "0"}) result = v1 / v2 expected = make_resource(self.r_type, {"r1": "0.25", "r2": "0"}) self.assertEqual(expected, result)
def test_imul(self): # scalar v1 = make_resource(self.r_type, {"r1": "1", "r2": "2"}) v1 *= 2 expected = make_resource(self.r_type, {"r1": "2", "r2": "4"}) self.assertEqual(expected, v1) # v1 * v2 v1 = make_resource(self.r_type, {"r1": "1", "r2": "2"}) v2 = make_resource(self.r_type, {"r1": "4", "r2": "0.5"}) v1 *= v2 expected = make_resource(self.r_type, {"r1": "4", "r2": "1"}) self.assertEqual(expected, v1)
def test_truediv(self): # scalar v1 = make_resource(self.r_type, {"r1": "1", "r2": "2"}) result = v1 / 2 expected = make_resource(self.r_type, {"r1": "0.5", "r2": "1"}) self.assertEqual(expected, result) # v1 * v2 v1 = make_resource(self.r_type, {"r1": "1", "r2": "2"}) v2 = make_resource(self.r_type, {"r1": "4", "r2": "0.5"}) result = v1 / v2 expected = make_resource(self.r_type, {"r1": "0.25", "r2": "4"}) self.assertEqual(expected, result)
def __init__(self, params=None): """Class for job resource requirement. Args: params: A dictionary containing "cpu", "memory", "gpu", "gpu_memory", i.e. paras = { "cpu": { "r1": ... }, "memory": { "r1": ... }, "gpu": { "r1": ... }, "gpu_memory": { "r1": ... }, } """ self.cpu = None self.memory = None self.gpu = None self.gpu_memory = None if params is None: params = {} for r_type in self.__dict__: self.__dict__[r_type] = make_resource(r_type, params.get(r_type))
def test_incompatible_type(self): v1 = make_resource(self.r_type, {"r1": "1", "r2": "0"}) v2 = DummyResource(params={"r1": "1"}) try: v1 + v2 self.fail("incompatible + should have crashed") except ValueError: self.assertTrue(True) try: v1 - v2 self.fail("incompatible - should have crashed") except ValueError: self.assertTrue(True) try: v1 += v2 self.fail("incompatible += should have crashed") except ValueError: self.assertTrue(True) try: v1 -= v2 self.fail("incompatible -= should have crashed") except ValueError: self.assertTrue(True) try: _ = v1 >= v2 self.fail("incompatible >= should have crashed") except ValueError: self.assertTrue(True)
def get_sku_resource_info(self, r_type): info = self.metadata.get(r_type, {}).get(self.sku, {}) per_node = make_resource(r_type, {self.sku: info.get("per_node", 0)}) schedulable_ratio = float(info.get("schedulable_ratio", 1)) return per_node, schedulable_ratio
def test_isub(self): v1 = make_resource(self.r_type, {"r1": "1", "r2": "2"}) v2 = make_resource(self.r_type, {"r1": "2"}) v1 -= v2 expected = make_resource(self.r_type, {"r1": "0", "r2": "2"}) self.assertEqual(expected, v1)
def test_sub(self): v1 = make_resource(self.r_type, {"r1": "1", "r2": "2"}) v2 = make_resource(self.r_type, {"r1": "2"}) result = v1 - v2 expected = make_resource(self.r_type, {"r1": "0", "r2": "2"}) self.assertEqual(expected, result)
def test_scalar(self): v = make_resource(self.r_type, {"r1": "1"}) self.assertEqual(1, v.scalar("r1")) self.assertIsNone(v.scalar("r2"))
def test_floor(self): v = make_resource(self.r_type, {"r1": "1.5"}) expected = make_resource(self.r_type, {"r1": "1"}) self.assertEqual(expected, v.floor)
def test_repr(self): v = make_resource(self.r_type, {"r1": "1"}) t = "{'r1': %s}" % (float(1)) self.assertEqual(t, repr(v))
def test_scalar(self): v = make_resource(self.r_type, {"r1": 1048576}) self.assertEqual("1Mi", v.scalar("r1")) self.assertIsNone(v.scalar("r2"))
def test_convert(self): v = make_resource(self.r_type, {"r1": "16Gi"}) expected = make_resource(self.r_type, {"r1": "17179869184"}) self.assertEqual(expected, v)
def test_convert(self): v = make_resource(self.r_type, {"r1": "1Gi"}) expected = make_resource(self.r_type, {"r1": "1073741824"}) self.assertEqual(expected, v)
def get_resource_params_from_job_params(params): cpu = make_resource("cpu") memory = make_resource("memory") gpu = make_resource("gpu") gpu_memory = make_resource("gpu_memory") job_type = params.get("jobtrainingtype", "RegularJob") sku = params.get("sku", "") # Default to 1 CPU, 0 memory if not specified # Consistent with pod.yaml.template cpu_request = params.get("cpurequest", 1) mem_request = params.get("memoryrequest", 0) try: resource_gpu = get_gpu_limit(params) except: logger.warning("Parsing resourcegpu in %s failed. Set to 0.", params) resource_gpu = 0 if job_type == "RegularJob": cpu = make_resource("cpu", {sku: cpu_request}) memory = make_resource("memory", {sku: mem_request}) gpu = make_resource("gpu", {sku: resource_gpu}) elif job_type == "PSDistJob": # Each ps reserves 1 CPU and 0 memory num_ps = int(params.get("numps", 0)) cpu += make_resource("cpu", {sku: num_ps}) memory += make_resource("memory", {sku: 0}) # Add worker CPU requirement num_worker = int(params.get("numpsworker", 0)) for i in range(num_worker): cpu += make_resource("cpu", {sku: cpu_request}) memory += make_resource("memory", {sku: mem_request}) gpu += make_resource("gpu", {sku: resource_gpu}) elif job_type == "InferenceJob": # Only support 1 GPU per worker now # Master cpu += make_resource("cpu", {sku: cpu_request}) memory += make_resource("memory", {sku: mem_request}) # Inference workers for i in range(resource_gpu): cpu += make_resource("cpu", {sku: cpu_request}) memory += make_resource("memory", {sku: mem_request}) gpu += make_resource("gpu", {sku: 1}) else: logger.warning("Unrecognized job type %s", job_type) return { "cpu": cpu.to_dict(), "memory": memory.to_dict(), "gpu": gpu.to_dict(), "gpu_memory": gpu_memory.to_dict(), }
def test_ceil(self): v = make_resource(self.r_type, {"r1": "1.5"}) expected = make_resource(self.r_type, {"r1": "2"}) self.assertEqual(expected, v.ceil)
def test_normalize(self): v = make_resource(self.r_type, None) v.res = {"r1": 1.0, "r2": -1.0} v.normalize() self.assertEqual({"r1": 1.0, "r2": 0}, v.res)
def test_convert(self): v = make_resource(self.r_type, {"r1": "100m"}) expected = make_resource(self.r_type, {"r1": "0.1"}) self.assertEqual(expected, v)