示例#1
0
    def test_invalid_cache_size(self):
        with self.assertRaises(ValueError):
            LocalScheduler(session_name="test_session", cache_size=0)

        with self.assertRaises(ValueError):
            LocalScheduler(session_name="test_session", cache_size=-1)
示例#2
0
class LocalSchedulerTest(unittest.TestCase):
    def setUp(self):
        self.test_dir = tempfile.mkdtemp("LocalSchedulerTest")
        write_shell_script(self.test_dir, "touch.sh", ["touch $1"])
        write_shell_script(self.test_dir, "fail.sh", ["exit 1"])
        write_shell_script(self.test_dir, "sleep.sh", ["sleep $1"])
        write_shell_script(self.test_dir, "echo.sh", ["echo $1"])
        write_shell_script(self.test_dir, "echo_stderr.sh", ["echo $1 1>&2"])
        write_shell_script(
            self.test_dir,
            "echo_range.sh",
            ["for i in $(seq 0 $1); do echo $i 1>&2; sleep $2; done"],
        )

        self.scheduler = LocalScheduler(session_name="test_session")
        self.test_container = Container(image=self.test_dir)

    def wait(
        self,
        app_id: str,
        scheduler: Optional[LocalScheduler] = None,
        timeout: float = 30,
    ) -> Optional[DescribeAppResponse]:
        """
        Waits for the app to finish or raise TimeoutError upon timeout (in seconds).
        If no timeout is specified waits indefinitely.

        Returns:
            The last return value from ``describe()``
        """
        scheduler_ = scheduler or self.scheduler

        interval = timeout / 100
        expiry = time.time() + timeout
        while expiry > time.time():
            desc = scheduler_.describe(app_id)

            if desc is None:
                return None
            elif is_terminal(desc.state):
                return desc

            time.sleep(interval)
        raise TimeoutError(f"timed out waiting for app: {app_id}")

    def tearDown(self):
        shutil.rmtree(self.test_dir)

    def test_submit(self):
        # make sure the macro substitution works
        # touch a file called {app_id}_{replica_id} in the img_root directory (self.test_dir)
        test_file_name = f"{macros.app_id}_{macros.replica_id}"
        num_replicas = 2
        role = (Role("role1").runs(
            "touch.sh", os.path.join(f"{macros.img_root}", test_file_name)).on(
                self.test_container).replicas(num_replicas))
        app = Application(name="test_app").of(role)
        expected_app_id = make_unique(app.name)
        with patch(LOCAL_SCHEDULER_MAKE_UNIQUE, return_value=expected_app_id):
            cfg = RunConfig()
            app_id = self.scheduler.submit(app, cfg)

        self.assertEqual(f"{expected_app_id}", app_id)
        self.assertEqual(AppState.SUCCEEDED, self.wait(app_id).state)

        for i in range(num_replicas):
            self.assertTrue(
                os.path.isfile(
                    os.path.join(self.test_dir, f"{expected_app_id}_{i}")))

        role = Role("role1").runs("fail.sh").on(
            self.test_container).replicas(2)
        app = Application(name="test_app").of(role)
        expected_app_id = make_unique(app.name)
        with patch(LOCAL_SCHEDULER_MAKE_UNIQUE, return_value=expected_app_id):
            app_id = self.scheduler.submit(app, cfg)

        self.assertEqual(f"{expected_app_id}", app_id)
        self.assertEqual(AppState.FAILED, self.wait(app_id).state)

    def _assert_file_content(self, filename: str, expected: str):
        with open(filename, "r") as f:
            self.assertEqual(expected, f.read())

    def test_submit_with_log_dir_stdout(self):
        num_replicas = 2
        role = (Role("role1").runs("echo.sh", "hello_world").on(
            self.test_container).replicas(num_replicas))

        log_dir = os.path.join(self.test_dir, "log")
        cfg = RunConfig({"log_dir": log_dir})
        app = Application(name="test_app").of(role)
        app_id = self.scheduler.submit(app, cfg)
        self.wait(app_id)

        success_file = os.path.join(log_dir, self.scheduler.session_name,
                                    app_id, "SUCCESS")
        with open(success_file, "r") as f:
            sf_json = json.load(f)
            self.assertEqual(app_id, sf_json["app_id"])
            self.assertEqual("test_app", sf_json["app_name"])

            for replica_id in range(num_replicas):
                replica_info = sf_json["roles"]["role1"][replica_id]
                self._assert_file_content(replica_info["stdout"],
                                          "hello_world\n")
                self._assert_file_content(replica_info["stderr"], "")

    def test_submit_with_log_dir_stderr(self):
        num_replicas = 2
        role = (Role("role1").runs("echo_stderr.sh", "hello_world").on(
            self.test_container).replicas(num_replicas))

        log_dir = os.path.join(self.test_dir, "log")
        cfg = RunConfig({"log_dir": log_dir})
        app = Application(name="test_app").of(role)
        app_id = self.scheduler.submit(app, cfg)
        self.wait(app_id)

        success_file = os.path.join(log_dir, self.scheduler.session_name,
                                    app_id, "SUCCESS")

        with open(success_file, "r") as f:
            sf_json = json.load(f)
            self.assertEqual(app_id, sf_json["app_id"])
            self.assertEqual("test_app", sf_json["app_name"])

            for replica_id in range(num_replicas):
                replica_info = sf_json["roles"]["role1"][replica_id]
                self._assert_file_content(replica_info["stdout"], "")
                self._assert_file_content(replica_info["stderr"],
                                          "hello_world\n")

    @patch(
        LOCAL_DIR_IMAGE_FETCHER_FETCH,
        return_value="",
    )
    def test_submit_dryrun(self, img_fetcher_fetch_mock):
        master = (Role("master").runs(
            "master.par", "arg1", ENV_VAR_1="VAL1").on(self.test_container))
        trainer = (Role("trainer").runs("trainer.par").on(
            self.test_container).replicas(2))

        app = Application(name="test_app").of(master, trainer)
        cfg = RunConfig()
        info = self.scheduler.submit_dryrun(app, cfg)
        print(info)
        self.assertEqual(2, len(info.request))
        master_info = info.request[0]["master"]
        trainer_info = info.request[1]["trainer"]
        self.assertEqual(1, len(master_info))
        self.assertEqual(2, len(trainer_info))
        self.assertEqual(
            {
                "args": ["master.par", "arg1"],
                "env": {
                    "ENV_VAR_1": "VAL1",
                },
            },
            master_info[0],
        )
        self.assertEqual({"args": ["trainer.par"], "env": {}}, trainer_info[0])
        self.assertEqual({"args": ["trainer.par"], "env": {}}, trainer_info[1])

    @patch(
        LOCAL_DIR_IMAGE_FETCHER_FETCH,
        return_value="",
    )
    def test_submit_dryrun_with_log_dir(self, img_fetcher_fetch_mock):
        trainer = (Role("trainer").runs("trainer.par").on(
            self.test_container).replicas(2))

        app = Application(name="test_app").of(trainer)
        cfg = RunConfig({"log_dir": "/tmp"})
        info = self.scheduler.submit_dryrun(app, cfg)
        print(info)
        trainer_info = info.request[0]["trainer"]
        self.assertEqual(2, len(trainer_info))

        self.assertEqual(
            {
                "args": ["trainer.par"],
                "env": {},
                "stdout":
                f"/tmp/{self.scheduler.session_name}/test_app_##/trainer/0/stdout.log",
                "stderr":
                f"/tmp/{self.scheduler.session_name}/test_app_##/trainer/0/stderr.log",
            },
            trainer_info[0],
        )
        self.assertEqual(
            {
                "args": ["trainer.par"],
                "env": {},
                "stdout":
                f"/tmp/{self.scheduler.session_name}/test_app_##/trainer/1/stdout.log",
                "stderr":
                f"/tmp/{self.scheduler.session_name}/test_app_##/trainer/1/stderr.log",
            },
            trainer_info[1],
        )

    def test_log_iterator(self):
        role = (Role("role1").runs("echo_range.sh", "10",
                                   "0.5").on(self.test_container).replicas(1))

        log_dir = os.path.join(self.test_dir, "log")
        cfg = RunConfig({"log_dir": log_dir})
        app = Application(name="test_app").of(role)
        app_id = self.scheduler.submit(app, cfg)

        for i, line in enumerate(self.scheduler.log_iter(app_id, "role1",
                                                         k=0)):
            self.assertEqual(str(i), line)

        # since and until ignored
        for i, line in enumerate(
                self.scheduler.log_iter(app_id,
                                        "role1",
                                        k=0,
                                        since=datetime.now(),
                                        until=datetime.now())):
            self.assertEqual(str(i), line)

        for i, line in enumerate(
                self.scheduler.log_iter(app_id, "role1", k=0,
                                        regex=r"[02468]")):
            self.assertEqual(str(i * 2), line)

    def test_log_iterator_no_log_dir(self):
        role = (Role("role1").runs("echo_range.sh", "10",
                                   "0.5").on(self.test_container).replicas(1))

        app = Application(name="test_app").of(role)

        with self.assertRaises(RuntimeError,
                               msg="log_dir must be set to iterate logs"):
            app_id = self.scheduler.submit(app, RunConfig())
            self.scheduler.log_iter(app_id, "role1", k=0)

    def test_submit_multiple_roles(self):
        test_file1 = os.path.join(self.test_dir, "test_file_1")
        test_file2 = os.path.join(self.test_dir, "test_file_2")
        role1 = (Role("role1").runs("touch.sh", test_file1).on(
            self.test_container).replicas(1))
        role2 = (Role("role2").runs("touch.sh", test_file2).on(
            self.test_container).replicas(1))
        app = Application(name="test_app").of(role1, role2)
        cfg = RunConfig()
        app_id = self.scheduler.submit(app, cfg)

        self.assertEqual(AppState.SUCCEEDED, self.wait(app_id).state)
        self.assertTrue(os.path.isfile(test_file1))
        self.assertTrue(os.path.isfile(test_file2))

    def test_describe(self):
        role = Role("role1").runs("sleep.sh",
                                  "2").on(self.test_container).replicas(1)
        app = Application(name="test_app").of(role)
        cfg = RunConfig()
        self.assertIsNone(self.scheduler.describe("test_app_0"))
        app_id = self.scheduler.submit(app, cfg)
        desc = self.scheduler.describe(app_id)
        self.assertEqual(AppState.RUNNING, desc.state)
        self.assertEqual(AppState.SUCCEEDED, self.wait(app_id).state)

    def test_cancel(self):
        role = Role("role1").runs("sleep.sh",
                                  "10").on(self.test_container).replicas(1)
        app = Application(name="test_app").of(role)
        cfg = RunConfig()
        app_id = self.scheduler.submit(app, cfg)
        desc = self.scheduler.describe(app_id)
        self.assertEqual(AppState.RUNNING, desc.state)
        self.scheduler.cancel(app_id)
        self.assertEqual(AppState.CANCELLED,
                         self.scheduler.describe(app_id).state)

    def test_exists(self):
        role = Role("role1").runs("sleep.sh",
                                  "10").on(self.test_container).replicas(1)
        app = Application(name="test_app").of(role)
        cfg = RunConfig()
        app_id = self.scheduler.submit(app, cfg)

        self.assertTrue(self.scheduler.exists(app_id))
        self.scheduler.cancel(app_id)
        self.assertTrue(self.scheduler.exists(app_id))

    def test_invalid_cache_size(self):
        with self.assertRaises(ValueError):
            LocalScheduler(session_name="test_session", cache_size=0)

        with self.assertRaises(ValueError):
            LocalScheduler(session_name="test_session", cache_size=-1)

    def test_cache_full(self):
        scheduler = LocalScheduler(session_name="test_session", cache_size=1)

        role = Role("role1").runs("sleep.sh",
                                  "10").on(self.test_container).replicas(1)
        app = Application(name="test_app").of(role)
        cfg = RunConfig()
        scheduler.submit(app, cfg)
        with self.assertRaises(IndexError):
            scheduler.submit(app, cfg)

    def test_cache_evict(self):
        scheduler = LocalScheduler(session_name="test_session", cache_size=1)
        test_file1 = os.path.join(self.test_dir, "test_file_1")
        test_file2 = os.path.join(self.test_dir, "test_file_2")
        role1 = Role("role1").runs("touch.sh",
                                   test_file1).on(self.test_container)
        role2 = Role("role2").runs("touch.sh",
                                   test_file2).on(self.test_container)
        app1 = Application(name="touch_test_file1").of(role1)
        app2 = Application(name="touch_test_file2").of(role2)
        cfg = RunConfig()

        app_id1 = scheduler.submit(app1, cfg)
        self.assertEqual(AppState.SUCCEEDED,
                         self.wait(app_id1, scheduler).state)

        app_id2 = scheduler.submit(app2, cfg)
        self.assertEqual(AppState.SUCCEEDED,
                         self.wait(app_id2, scheduler).state)

        # app1 should've been evicted
        self.assertIsNone(scheduler.describe(app_id1))
        self.assertIsNone(self.wait(app_id1, scheduler))

        self.assertIsNotNone(scheduler.describe(app_id2))
        self.assertIsNotNone(self.wait(app_id2, scheduler))
class LocalSchedulerTest(unittest.TestCase):
    def setUp(self):
        self.test_dir = tempfile.mkdtemp("LocalSchedulerTest")
        write_shell_script(self.test_dir, "touch.sh", ["touch $1"])
        write_shell_script(self.test_dir, "fail.sh", ["exit 1"])
        write_shell_script(self.test_dir, "sleep.sh", ["sleep $1"])

        self.image_fetcher = LocalDirectoryImageFetcher()
        self.scheduler = LocalScheduler(self.image_fetcher)

        self.test_container = Container(image=self.test_dir)

    def tearDown(self):
        shutil.rmtree(self.test_dir)

    def test_submit(self):
        test_file = os.path.join(self.test_dir, "test_file")
        role = (Role("role1").runs("touch.sh", test_file).on(
            self.test_container).replicas(2))
        app = Application(name="test_app").of(role)

        app_id = self.scheduler.submit(app, RunMode.HEADLESS)

        self.assertEqual("test_app_0", app_id)
        self.assertEqual(AppState.SUCCEEDED, self.scheduler.wait(app_id).state)
        self.assertTrue(os.path.isfile(test_file))

        role = Role("role1").runs("fail.sh").on(
            self.test_container).replicas(2)
        app = Application(name="test_app").of(role)
        app_id = self.scheduler.submit(app, RunMode.HEADLESS)

        self.assertEqual("test_app_1", app_id)
        self.assertEqual(AppState.FAILED, self.scheduler.wait(app_id).state)

    def test_submit_multiple_roles(self):
        test_file1 = os.path.join(self.test_dir, "test_file_1")
        test_file2 = os.path.join(self.test_dir, "test_file_2")
        role1 = (Role("role1").runs("touch.sh", test_file1).on(
            self.test_container).replicas(1))
        role2 = (Role("role2").runs("touch.sh", test_file2).on(
            self.test_container).replicas(1))
        app = Application(name="test_app").of(role1, role2)
        app_id = self.scheduler.submit(app, RunMode.HEADLESS)

        self.assertEqual(AppState.SUCCEEDED, self.scheduler.wait(app_id).state)
        self.assertTrue(os.path.isfile(test_file1))
        self.assertTrue(os.path.isfile(test_file2))

    def test_describe(self):
        role = Role("role1").runs("sleep.sh",
                                  "2").on(self.test_container).replicas(1)
        app = Application(name="test_app").of(role)
        self.assertIsNone(self.scheduler.describe("test_app_0"))
        app_id = self.scheduler.submit(app, RunMode.HEADLESS)
        desc = self.scheduler.describe(app_id)
        self.assertEqual(AppState.RUNNING, desc.state)
        self.assertEqual(AppState.SUCCEEDED, self.scheduler.wait(app_id).state)

    def test_cancel(self):
        role = Role("role1").runs("sleep.sh",
                                  "10").on(self.test_container).replicas(1)
        app = Application(name="test_app").of(role)
        app_id = self.scheduler.submit(app, RunMode.HEADLESS)
        desc = self.scheduler.describe(app_id)
        self.assertEqual(AppState.RUNNING, desc.state)
        self.scheduler.cancel(app_id)
        self.assertEqual(AppState.CANCELLED,
                         self.scheduler.describe(app_id).state)

    def test_exists(self):
        role = Role("role1").runs("sleep.sh",
                                  "10").on(self.test_container).replicas(1)
        app = Application(name="test_app").of(role)
        app_id = self.scheduler.submit(app, RunMode.HEADLESS)

        self.assertTrue(self.scheduler.exists(app_id))
        self.scheduler.cancel(app_id)
        self.assertTrue(self.scheduler.exists(app_id))

    def test_wait_timeout(self):
        role = Role("role1").runs("sleep.sh",
                                  "10").on(self.test_container).replicas(1)
        app = Application(name="test_app").of(role)
        app_id = self.scheduler.submit(app, RunMode.MANAGED)

        with self.assertRaises(TimeoutError):
            self.scheduler.wait(app_id, timeout=1)

    def test_invalid_cache_size(self):
        with self.assertRaises(ValueError):
            LocalScheduler(self.image_fetcher, cache_size=0)

        with self.assertRaises(ValueError):
            LocalScheduler(self.image_fetcher, cache_size=-1)

    def test_cache_full(self):
        scheduler = LocalScheduler(self.image_fetcher, cache_size=1)

        role = Role("role1").runs("sleep.sh",
                                  "10").on(self.test_container).replicas(1)
        app = Application(name="test_app").of(role)
        scheduler.submit(app, RunMode.MANAGED)
        with self.assertRaises(IndexError):
            scheduler.submit(app, RunMode.MANAGED)

    def test_cache_evict(self):
        scheduler = LocalScheduler(self.image_fetcher, cache_size=1)
        test_file1 = os.path.join(self.test_dir, "test_file_1")
        test_file2 = os.path.join(self.test_dir, "test_file_2")
        role1 = Role("role1").runs("touch.sh",
                                   test_file1).on(self.test_container)
        role2 = Role("role2").runs("touch.sh",
                                   test_file2).on(self.test_container)
        app1 = Application(name="touch_test_file1").of(role1)
        app2 = Application(name="touch_test_file2").of(role2)

        app_id1 = scheduler.submit(app1, RunMode.HEADLESS)
        self.assertEqual(AppState.SUCCEEDED, scheduler.wait(app_id1).state)

        app_id2 = scheduler.submit(app2, RunMode.HEADLESS)
        self.assertEqual(AppState.SUCCEEDED, scheduler.wait(app_id2).state)

        # app1 should've been evicted
        self.assertIsNone(scheduler.describe(app_id1))
        self.assertIsNone(scheduler.wait(app_id1))

        self.assertIsNotNone(scheduler.describe(app_id2))
        self.assertIsNotNone(scheduler.wait(app_id2))
示例#4
0
class LocalSchedulerTest(unittest.TestCase):
    def setUp(self):
        self.test_dir = tempfile.mkdtemp(prefix=f"{self.__class__.__name__}_")
        write_shell_script(self.test_dir, "touch.sh", ["touch $1"])
        write_shell_script(self.test_dir, "fail.sh", ["exit 1"])
        write_shell_script(self.test_dir, "sleep.sh", ["sleep $1"])
        write_shell_script(self.test_dir, "echo_stdout.sh", ["echo $1"])
        write_shell_script(self.test_dir, "echo_stderr.sh", ["echo $1 1>&2"])
        write_shell_script(
            self.test_dir,
            "echo_range.sh",
            ["for i in $(seq 0 $1); do echo $i 1>&2; sleep $2; done"],
        )
        write_shell_script(self.test_dir, "echo_env_foo.sh",
                           ["echo $FOO 1>&2"])
        self.scheduler = LocalScheduler(session_name="test_session")
        self.test_container = Container(image=self.test_dir)

    def wait(
        self,
        app_id: str,
        scheduler: Optional[LocalScheduler] = None,
        timeout: float = 30,
    ) -> Optional[DescribeAppResponse]:
        """
        Waits for the app to finish or raise TimeoutError upon timeout (in seconds).
        If no timeout is specified waits indefinitely.

        Returns:
            The last return value from ``describe()``
        """
        scheduler_ = scheduler or self.scheduler

        interval = timeout / 100
        expiry = time.time() + timeout
        while expiry > time.time():
            desc = scheduler_.describe(app_id)

            if desc is None:
                return None
            elif is_terminal(desc.state):
                return desc

            time.sleep(interval)
        raise TimeoutError(f"timed out waiting for app: {app_id}")

    def tearDown(self):
        shutil.rmtree(self.test_dir)

    def test_submit(self):
        # make sure the macro substitution works
        # touch a file called {app_id}_{replica_id} in the img_root directory (self.test_dir)
        test_file_name = f"{macros.app_id}_{macros.replica_id}"
        num_replicas = 2
        role = (Role("role1").runs(
            "touch.sh", join(f"{macros.img_root}", test_file_name)).on(
                self.test_container).replicas(num_replicas))
        app = Application(name="test_app").of(role)
        expected_app_id = make_unique(app.name)
        with patch(LOCAL_SCHEDULER_MAKE_UNIQUE, return_value=expected_app_id):
            cfg = RunConfig({"log_dir": self.test_dir})
            app_id = self.scheduler.submit(app, cfg)

        self.assertEqual(f"{expected_app_id}", app_id)
        self.assertEqual(AppState.SUCCEEDED, self.wait(app_id).state)

        for i in range(num_replicas):
            self.assertTrue(
                os.path.isfile(join(self.test_dir, f"{expected_app_id}_{i}")))

        role = Role("role1").runs("fail.sh").on(
            self.test_container).replicas(2)
        app = Application(name="test_app").of(role)
        expected_app_id = make_unique(app.name)
        with patch(LOCAL_SCHEDULER_MAKE_UNIQUE, return_value=expected_app_id):
            app_id = self.scheduler.submit(app, cfg)

        self.assertEqual(f"{expected_app_id}", app_id)
        self.assertEqual(AppState.FAILED, self.wait(app_id).state)

    @mock.patch.dict(os.environ, {"FOO": "bar"})
    def test_submit_inherit_parent_envs(self):
        role = Role("echo_foo").runs("echo_env_foo.sh").on(self.test_container)
        app = Application(name="check_foo_env_var").of(role)
        app_id = self.scheduler.submit(app,
                                       RunConfig({"log_dir": self.test_dir}))
        for line in self.scheduler.log_iter(app_id, "echo_foo"):
            self.assertEqual("bar", line)

        desc = self.wait(app_id, self.scheduler)
        self.assertEqual(AppState.SUCCEEDED, desc.state)

    @mock.patch.dict(os.environ, {"FOO": "bar"})
    def test_submit_override_parent_env(self):
        role = (Role("echo_foo").runs("echo_env_foo.sh",
                                      FOO="new_bar").on(self.test_container))
        app = Application(name="check_foo_env_var").of(role)
        app_id = self.scheduler.submit(app,
                                       RunConfig({"log_dir": self.test_dir}))
        for line in self.scheduler.log_iter(app_id, "echo_foo"):
            self.assertEqual("new_bar", line)

        desc = self.wait(app_id, self.scheduler)
        self.assertEqual(AppState.SUCCEEDED, desc.state)

    def _assert_file_content(self, filename: str, expected: str):
        with open(filename, "r") as f:
            self.assertEqual(expected, f.read())

    def test_submit_with_log_dir_stdout(self):
        num_replicas = 2

        for std_stream in ["stdout", "stderr"]:
            with self.subTest(std_stream=std_stream):
                log_dir = join(self.test_dir, f"test_{std_stream}_log")
                cfg = RunConfig({"log_dir": log_dir})

                role = (Role("role1").runs(
                    f"echo_{std_stream}.sh", "hello_world").on(
                        self.test_container).replicas(num_replicas))
                app = Application(name="test_app").of(role)

                app_id = self.scheduler.submit(app, cfg)
                self.wait(app_id)

                success_file = join(log_dir, self.scheduler.session_name,
                                    app_id, "SUCCESS")
                with open(success_file, "r") as f:
                    sf_json = json.load(f)
                    self.assertEqual(app_id, sf_json["app_id"])
                    self.assertEqual(
                        join(log_dir, self.scheduler.session_name, app_id),
                        sf_json["log_dir"],
                    )
                    self.assertEqual(AppState.SUCCEEDED.name,
                                     sf_json["final_state"])

                    for replica_id in range(num_replicas):
                        replica_info = sf_json["roles"]["role1"][replica_id]
                        self._assert_file_content(replica_info[std_stream],
                                                  "hello_world\n")

    @patch(LOCAL_DIR_IMAGE_FETCHER_FETCH, return_value="")
    def test_submit_dryrun_without_log_dir_cfg(self, img_fetcher_fetch_mock):
        master = (Role("master").runs(
            "master.par", "arg1", ENV_VAR_1="VAL1").on(self.test_container))
        trainer = (Role("trainer").runs("trainer.par").on(
            self.test_container).replicas(2))

        app = Application(name="test_app").of(master, trainer)
        cfg = RunConfig()
        info = self.scheduler.submit_dryrun(app, cfg)
        # intentional print (to make sure it actually prints with no errors)
        print(info)

        request = info.request
        role_params = request.role_params
        role_log_dirs = request.role_log_dirs
        self.assertEqual(2, len(role_params))
        self.assertEqual(2, len(role_log_dirs))

        master_params = role_params["master"]
        trainer_params = role_params["trainer"]

        app_log_dir = request.log_dir

        self.assertEqual(1, len(master_params))
        self.assertEqual(2, len(trainer_params))

        for role in app.roles:
            replica_params = role_params[role.name]
            replica_log_dirs = role_log_dirs[role.name]

            for j in range(role.num_replicas):
                replica_param = replica_params[j]
                replica_log_dir = replica_log_dirs[j]

                # dryrun should NOT create any directories
                self.assertFalse(os.path.isdir(replica_log_dir))
                self.assertTrue(replica_log_dir.startswith(app_log_dir))
                self.assertEqual([role.entrypoint, *role.args],
                                 replica_param.args)
                self.assertEqual(
                    {
                        ERR_FILE_ENV: join(replica_log_dir, "error.json"),
                        **role.env,
                    },
                    replica_param.env,
                )
                self.assertIsNone(replica_param.stdout)
                self.assertIsNone(replica_param.stderr)

    @patch(LOCAL_DIR_IMAGE_FETCHER_FETCH, return_value="")
    def test_submit_dryrun_with_log_dir_cfg(self, img_fetcher_fetch_mock):
        trainer = (Role("trainer").runs("trainer.par").on(
            self.test_container).replicas(2))

        app = Application(name="test_app").of(trainer)
        cfg = RunConfig({"log_dir": self.test_dir})
        info = self.scheduler.submit_dryrun(app, cfg)
        # intentional print (to make sure it actually prints with no errors)
        print(info)

        request = info.request
        role_params = request.role_params
        role_log_dirs = request.role_log_dirs
        self.assertEqual(1, len(role_params))
        self.assertEqual(1, len(role_log_dirs))

        trainer_params = role_params["trainer"]

        app_log_dir = request.log_dir

        self.assertEqual(2, len(trainer_params))

        for role in app.roles:
            replica_params = role_params[role.name]
            replica_log_dirs = role_log_dirs[role.name]

            for j in range(role.num_replicas):
                replica_param = replica_params[j]
                replica_log_dir = replica_log_dirs[j]
                # dryrun should NOT create any directories
                self.assertFalse(os.path.isdir(replica_log_dir))
                self.assertTrue(replica_log_dir.startswith(app_log_dir))
                self.assertEqual([role.entrypoint, *role.args],
                                 replica_param.args)
                self.assertEqual(
                    {
                        ERR_FILE_ENV: join(replica_log_dir, "error.json"),
                        **role.env,
                    },
                    replica_param.env,
                )
                stdout_path = join(replica_log_dir, "stdout.log")
                stderr_path = join(replica_log_dir, "stderr.log")
                self.assertEqual(stdout_path, replica_param.stdout)
                self.assertEqual(stderr_path, replica_param.stderr)

    def test_log_iterator(self):
        role = (Role("role1").runs("echo_range.sh", "10",
                                   "0.5").on(self.test_container).replicas(1))

        log_dir = join(self.test_dir, "log")
        cfg = RunConfig({"log_dir": log_dir})
        app = Application(name="test_app").of(role)
        app_id = self.scheduler.submit(app, cfg)

        for i, line in enumerate(self.scheduler.log_iter(app_id, "role1",
                                                         k=0)):
            self.assertEqual(str(i), line)

        # since and until ignored
        for i, line in enumerate(
                self.scheduler.log_iter(app_id,
                                        "role1",
                                        k=0,
                                        since=datetime.now(),
                                        until=datetime.now())):
            self.assertEqual(str(i), line)

        for i, line in enumerate(
                self.scheduler.log_iter(app_id, "role1", k=0,
                                        regex=r"[02468]")):
            self.assertEqual(str(i * 2), line)

    def test_log_iterator_no_log_dir(self):
        role = (Role("role1").runs("echo_range.sh", "10",
                                   "0.5").on(self.test_container).replicas(1))

        app = Application(name="test_app").of(role)

        with self.assertRaises(RuntimeError,
                               msg="log_dir must be set to iterate logs"):
            app_id = self.scheduler.submit(app, RunConfig())
            self.scheduler.log_iter(app_id, "role1", k=0)

    def test_submit_multiple_roles(self):
        test_file1 = join(self.test_dir, "test_file_1")
        test_file2 = join(self.test_dir, "test_file_2")
        role1 = (Role("role1").runs("touch.sh", test_file1).on(
            self.test_container).replicas(1))
        role2 = (Role("role2").runs("touch.sh", test_file2).on(
            self.test_container).replicas(1))
        app = Application(name="test_app").of(role1, role2)
        cfg = RunConfig({"log_dir": self.test_dir})
        app_id = self.scheduler.submit(app, cfg)

        self.assertEqual(AppState.SUCCEEDED, self.wait(app_id).state)
        self.assertTrue(os.path.isfile(test_file1))
        self.assertTrue(os.path.isfile(test_file2))

    def test_describe(self):
        role = Role("role1").runs("sleep.sh",
                                  "2").on(self.test_container).replicas(1)
        app = Application(name="test_app").of(role)
        cfg = RunConfig({"log_dir": self.test_dir})
        self.assertIsNone(self.scheduler.describe("test_app_0"))
        app_id = self.scheduler.submit(app, cfg)
        desc = self.scheduler.describe(app_id)
        self.assertEqual(AppState.RUNNING, desc.state)
        self.assertEqual(AppState.SUCCEEDED, self.wait(app_id).state)

    def test_cancel(self):
        role = Role("role1").runs("sleep.sh",
                                  "10").on(self.test_container).replicas(1)
        app = Application(name="test_app").of(role)
        cfg = RunConfig({"log_dir": self.test_dir})
        app_id = self.scheduler.submit(app, cfg)
        desc = self.scheduler.describe(app_id)
        self.assertEqual(AppState.RUNNING, desc.state)
        self.scheduler.cancel(app_id)
        self.assertEqual(AppState.CANCELLED,
                         self.scheduler.describe(app_id).state)

    def test_exists(self):
        role = Role("role1").runs("sleep.sh",
                                  "10").on(self.test_container).replicas(1)
        app = Application(name="test_app").of(role)
        cfg = RunConfig({"log_dir": self.test_dir})
        app_id = self.scheduler.submit(app, cfg)

        self.assertTrue(self.scheduler.exists(app_id))
        self.scheduler.cancel(app_id)
        self.assertTrue(self.scheduler.exists(app_id))

    def test_invalid_cache_size(self):
        with self.assertRaises(ValueError):
            LocalScheduler(session_name="test_session", cache_size=0)

        with self.assertRaises(ValueError):
            LocalScheduler(session_name="test_session", cache_size=-1)

    def test_cache_full(self):
        scheduler = LocalScheduler(session_name="test_session", cache_size=1)

        role = Role("role1").runs("sleep.sh",
                                  "10").on(self.test_container).replicas(1)
        app = Application(name="test_app").of(role)
        cfg = RunConfig({"log_dir": self.test_dir})
        scheduler.submit(app, cfg)
        with self.assertRaises(IndexError):
            scheduler.submit(app, cfg)

    def test_cache_evict(self):
        scheduler = LocalScheduler(session_name="test_session", cache_size=1)
        test_file1 = join(self.test_dir, "test_file_1")
        test_file2 = join(self.test_dir, "test_file_2")
        role1 = Role("role1").runs("touch.sh",
                                   test_file1).on(self.test_container)
        role2 = Role("role2").runs("touch.sh",
                                   test_file2).on(self.test_container)
        app1 = Application(name="touch_test_file1").of(role1)
        app2 = Application(name="touch_test_file2").of(role2)
        cfg = RunConfig({"log_dir": self.test_dir})

        app_id1 = scheduler.submit(app1, cfg)
        self.assertEqual(AppState.SUCCEEDED,
                         self.wait(app_id1, scheduler).state)

        app_id2 = scheduler.submit(app2, cfg)
        self.assertEqual(AppState.SUCCEEDED,
                         self.wait(app_id2, scheduler).state)

        # app1 should've been evicted
        self.assertIsNone(scheduler.describe(app_id1))
        self.assertIsNone(self.wait(app_id1, scheduler))

        self.assertIsNotNone(scheduler.describe(app_id2))
        self.assertIsNotNone(self.wait(app_id2, scheduler))
    def test_invalid_cache_size(self):
        with self.assertRaises(ValueError):
            LocalScheduler(self.image_fetcher, cache_size=0)

        with self.assertRaises(ValueError):
            LocalScheduler(self.image_fetcher, cache_size=-1)