示例#1
0
 def test_create_container_with_resources(self):
     res1 = Resources(cpu=1, gpu=2, memMB=128)
     res2 = Resources(cpu=1, gpu=2, memMB=256)
     container = (Container("torch").require(res1, "default").require(
         res2, "test_scheduler"))
     self.assertEqual(2, len(container.resources))
     self.assertEqual(res1, container.resources["default"])
     self.assertEqual(res2, container.resources["test_scheduler"])
示例#2
0
 def test_get_resource_incorrect_input(self):
     res1 = Resources(cpu=1, gpu=2, memMB=128)
     res2 = Resources(cpu=1, gpu=2, memMB=256)
     with self.assertRaises(ValueError):
         Container("torch").require(
             {
                 "default": res1,
                 "test_scheduler": res2
             }, "new_scheduler")
示例#3
0
 def test_get_resource_none(self):
     res1 = Resources(cpu=1, gpu=2, memMB=128)
     res2 = Resources(cpu=1, gpu=2, memMB=256)
     container = Container("torch").require({
         "default": res1,
         "test_scheduler": res2
     })
     self.assertEqual(NULL_RESOURCE,
                      container.get_resources("non-existent"))
示例#4
0
 def test_get_resource(self):
     res1 = Resources(cpu=1, gpu=2, memMB=128)
     res2 = Resources(cpu=1, gpu=2, memMB=256)
     container = Container("torch").require({
         "default": res1,
         Container._ALL: res2
     })
     self.assertEqual(2, len(container.resources))
     self.assertEqual(res1, container.get_resources("default"))
     self.assertEqual(res2, container.get_resources(Container._ALL))
     self.assertEqual(res2, container.get_resources("unknown_scheduler"))
示例#5
0
 def test_copy_resources(self):
     old_capabilities = {"test_key": "test_value", "old_key": "old_value"}
     resources = Resources(1, 2, 3, old_capabilities)
     new_resources = Resources.copy(
         resources, test_key="test_value_new", new_key="new_value"
     )
     self.assertEqual(new_resources.cpu, 1)
     self.assertEqual(new_resources.gpu, 2)
     self.assertEqual(new_resources.memMB, 3)
     self.assertEqual(len(new_resources.capabilities), 3)
     self.assertEqual(new_resources.capabilities["old_key"], "old_value")
     self.assertEqual(new_resources.capabilities["test_key"], "test_value_new")
     self.assertEqual(new_resources.capabilities["new_key"], "new_value")
     self.assertEqual(resources.capabilities["test_key"], "test_value")
示例#6
0
 def test_validate_invalid_replicas(self):
     session = self.MockSession()
     with self.assertRaises(ValueError):
         container = Container("torch").require(
             Resources(cpu=1, gpu=0, memMB=500))
         role = (Role("no container").runs(
             "echo", "hello_world").on(container).replicas(0))
         app = Application("no container").of(role)
         session.run(app)
示例#7
0
    def test_json_serialization(self):
        """
        Tests that an ElasticRole can be serialized into json (dict)
        then recreated as a Role. An ElasticRole is really just a builder
        utility to make it easy for users to create a Role with the entrypoint
        being ``torchelastic.distributed.launch``
        """
        resources = Resources(cpu=1, gpu=0, memMB=512)
        container = Container(image="user_image",
                              resources={
                                  "default": resources
                              }).ports(tensorboard=8080)
        elastic_role = (ElasticRole("test_role",
                                    nnodes="2:4",
                                    rdzv_backend="etcd",
                                    rdzv_id="foobar").runs(
                                        "user_script.py", "--script_arg",
                                        "foo").on(container).replicas(3))

        # this is effectively JSON
        elastic_json = dataclasses.asdict(elastic_role)
        container_json = elastic_json.pop("container")
        resources_json = container_json.pop("resources")
        container_json["resources"] = {}
        for sched, resource_json in resources_json.items():
            container_json["resources"][sched] = Resources(**resource_json)

        role = Role(
            **elastic_json,
            container=Container(**container_json),
        )
        self.assertEqual(container, role.container)
        self.assertEqual(elastic_role.name, role.name)
        self.assertEqual(elastic_role.entrypoint, role.entrypoint)
        self.assertEqual(
            elastic_role.args,
            role.args,
        )
        self.assertEqual(dataclasses.asdict(elastic_role),
                         dataclasses.asdict(role))
示例#8
0
class Resource:
    SMALL = Resources(cpu=1, gpu=0, memMB=1024)
    MEDIUM = Resources(cpu=4, gpu=0, memMB=(4 * 1024))
    LARGE = Resources(cpu=16, gpu=0, memMB=(16 * 1024))
示例#9
0
 def test_create_container_no_backend(self):
     res1 = Resources(cpu=1, gpu=2, memMB=128)
     container = Container("torch").require(res1)
     self.assertEqual(1, len(container.resources))
     self.assertEqual(res1, container.resources[Container._ALL])