Пример #1
0
def test_load_remote_batch():

    remote_batch_file = os.path.join(HERE, "remote_batch.json")
    batch_config_dict = load_json(remote_batch_file)

    batch_app = BatchApplication.load(batch_config_dict, "/tmp/test_remote_batch")
    print(batch_app)
    batch = batch_app.batch
    assert batch_app.batch.name == "remote-batch-example"
    assert batch_app.batch.data_dir_path.as_posix() == f"/tmp/test_remote_batch"

    assert len(batch.jobs) == 2
    job1: ShellJob = batch.jobs[0]
    assert job1.name == "job1"
    assert job1.params == { "learning_rate": 0.1}
    assert job1.command == "sleep 3;echo \"finished\""
    assert job1.data_dir == "/tmp/test_remote_batch/job1"
    assert job1.working_dir == job1.data_dir

    executor_manager = batch_app.job_scheduler.executor_manager
    assert isinstance(executor_manager, RemoteSSHExecutorManager)

    assert len(executor_manager.machines) == 2
    machine: SSHRemoteMachine = executor_manager.machines[0]
    assert machine.connection == {"hostname": "host1", "username": "******", "password": "******"}
    assert machine.environments == {"JAVA_HOME": "/usr/local/jdk"}

    job_scheduler = batch_app.job_scheduler
    assert job_scheduler.interval == 5000
    assert not job_scheduler.exit_on_finish

    assert batch_app.server_host == "localhost"
    assert batch_app.server_port == 8061
Пример #2
0
 def setup_class(cls):
     super(TestLocaBatch, cls).setup_class()
     batch = create_local_batch()
     app = BatchApplication(batch,
                            server_port=8082,
                            scheduler_exit_on_finish=True,
                            scheduler_interval=1000)
     cls.app = app
Пример #3
0
 def create_app(batches_data_dir, port):
     batch = batch_factory.create_local_batch(
         batches_data_dir=batches_data_dir)
     app = BatchApplication(batch,
                            server_port=port,
                            scheduler_exit_on_finish=True,
                            backend_conf={'type': 'local'},
                            scheduler_interval=1000)
     return app
Пример #4
0
 def setup_class(cls):
     super(TestMinimumLocalBatch, cls).setup_class()
     batch = create_minimum_batch()
     app = BatchApplication(batch,
                            server_port=8061,
                            scheduler_exit_on_finish=True,
                            scheduler_interval=1000,
                            scheduler_callbacks=[ConsoleCallback()])
     cls.app = app
Пример #5
0
def _load_batch_data_dir(batches_data_dir: str,
                         batch_name) -> BatchApplication:
    batch_data_dir_path = Path(batches_data_dir) / batch_name
    spec_file_path = batch_data_dir_path / Batch.FILE_CONFIG
    if not spec_file_path.exists():
        raise RuntimeError(f"batch {batch_name} not exists")

    batch_spec_dict = load_json(spec_file_path)
    return BatchApplication.load(batch_spec_dict, batch_data_dir_path)
Пример #6
0
def create_batch_app(batches_data_dir):

    batch = create_minimum_batch(batches_data_dir=batches_data_dir)

    app = BatchApplication(batch,
                           server_port=8086,
                           scheduler_exit_on_finish=True,
                           scheduler_interval=1000)

    return app
Пример #7
0
def test_batch_to_config():
    server_port = 8061
    scheduler_interval = 1
    # 1. create a batch
    batch = create_minimum_batch()
    app = BatchApplication(batch,
                           server_port=server_port,
                           scheduler_exit_on_finish=True,
                           scheduler_interval=scheduler_interval)

    # 2. to_config
    batch_config_dict = app.to_config()

    # 3. asset config content
    # 3.1. check jobs
    jobs_config = batch_config_dict['jobs']
    assert len(jobs_config) == 1
    job_config = jobs_config[0]

    assert job_config['name'] == 'job1'
    assert job_config['params']["learning_rate"] == 0.1

    assert job_config['command'] == 'pwd'
    assert job_config['data_dir']
    assert job_config['working_dir']

    # 3.2 TODO check backend
    # backend_config = batch_config_dict['backend']
    # assert backend_config['type'] == 'local'

    # 3.3 check server config
    server_config = batch_config_dict['server']
    assert server_config['host'] == 'localhost'
    assert server_config['port'] == server_port

    # 3.4 check scheduler
    scheduler_config = batch_config_dict['scheduler']
    assert scheduler_config['exit_on_finish'] is True
    assert scheduler_config['interval'] == 1

    # 3.4. check version
    assert batch_config_dict['version']
Пример #8
0
 def setup_class(cls):
     super(TestStopScheduler, cls).setup_class()
     batch = create_minimum_batch()
     app = BatchApplication(batch,
                            server_port=8086,
                            scheduler_exit_on_finish=False,
                            scheduler_interval=1000)
     runner = BatchRunner(app)
     runner.start()
     cls.runner = runner
     time.sleep(2)  # wait for starting
Пример #9
0
    def setup_class(cls):
        super(TestRemoteBatch, cls).setup_class()
        batch = create_remote_batch()

        backend_conf = create_remote_backend_conf()
        app = BatchApplication(batch,
                               server_port=8088,
                               backend_conf=backend_conf,
                               scheduler_exit_on_finish=True,
                               scheduler_interval=1000)
        cls.app = app
Пример #10
0
    def test_run_batch(self):
        app1 = self.app1
        # run the bach base on previous batch
        app2 = BatchApplication.load(app1.to_config(),
                                     batch_data_dir=app1.batch.data_dir_path)

        # app2 = create_batch_app(batches_data_dir)
        app2.start()
        app2._http_server.stop()
        scheduler2 = app2.job_scheduler

        # all ran jobs should not run again
        assert scheduler2.n_allocated == 0
        assert scheduler2.n_skipped == len(app1.batch.jobs)
Пример #11
0
    def setup_class(cls):
        super(TestLocalHostEnv, cls).setup_class()

        batch = batch_factory.create_assert_env_batch()
        app = BatchApplication(
            batch,
            server_port=8088,
            scheduler_exit_on_finish=True,
            backend_type='local',
            backend_conf=dict(
                environments={
                    "hyn_test_conda_home": "/home/hyperctl/miniconda3"
                }),
            scheduler_interval=1000)
        cls.app = app
Пример #12
0
    def setup_class(cls):
        super(TestKillLocalJob, cls).setup_class()
        batch = create_minimum_batch(command='sleep 6; echo "finished"')

        server_port = 8063
        cls.server_port = server_port

        app = BatchApplication(batch,
                               server_port=server_port,
                               scheduler_exit_on_finish=True,
                               scheduler_interval=1000)
        runner = BatchRunner(app)
        cls.app = app
        cls.runner = runner
        runner.start()
Пример #13
0
    def setup_class(cls):
        super(TestRunRemoteWithAssets, cls).setup_class()
        # clear ioloop
        asyncio.set_event_loop(asyncio.new_event_loop())

        # create a batch with assets
        batch = batch_factory.create_assets_batch(cls.data_dir)
        backend_conf = create_remote_backend_conf()
        app = BatchApplication(batch,
                               server_port=8089,
                               backend_type='remote',
                               backend_conf=backend_conf,
                               scheduler_exit_on_finish=True,
                               scheduler_interval=1000)
        cls.app = app
Пример #14
0
def run_batch_config(config_dict, batches_data_dir):
    # add batch name
    batch_name = config_dict.get('name')
    if batch_name is None:
        batch_name = common_util.generate_short_id()
        logger.debug(f"generated batch name {batch_name}")

    # add job name
    jobs_dict = config_dict['jobs']
    for job_dict in jobs_dict:
        if job_dict.get('name') is None:
            job_name = common_util.generate_short_id()
            logger.debug(f"generated job name {job_name}")
            job_dict['name'] = job_name

    app = BatchApplication.load(config_dict, batches_data_dir)

    app.start()
Пример #15
0
 def setup_class(cls):
     super(TestRemoteHostEnv, cls).setup_class()
     backend_conf = {
         "type":
         'remote',
         "machines": [{
             'connection': load_ssh_psw_config(),
             'environments': {
                 "hyn_test_conda_home": "/home/hyperctl/miniconda3"
             }
         }]
     }
     batch = batch_factory.create_assert_env_batch()
     app = BatchApplication(batch,
                            server_port=8089,
                            scheduler_exit_on_finish=True,
                            backend_conf=backend_conf,
                            scheduler_interval=1000)
     cls.app = app
Пример #16
0
def test_load_local_batch_config():
    # 1. load batch from config
    job2_data_dir = "/tmp/hyperctl-batch-data/job2"
    local_batch = {
        "name": "local_batch_test",
        "jobs": [
            {
                "name": "job1",
                "params": {
                    "learning_rate": 0.1
                },
                "command": "pwd"
            }, {
                "name": "job2",
                "params": {
                    "learning_rate": 0.2
                },
                "command": "sleep 3",
                "working_dir": job2_data_dir,
                "data_dir": job2_data_dir,
            }
        ],
        "backend": {
            "type": "local",
            "conf": {}
        },
        "scheduler": {
            "exit_on_finish": False,
            "interval": 1
        },
        "server": {
            "host": "local_machine",
            "port": 18060
        }
    }
    batch_working_dir = tempfile.mkdtemp(prefix="hyperctl-test-batches")

    batch_app = BatchApplication.load(local_batch, batch_working_dir)

    # 2. assert batch
    assert batch_app.batch.name == "local_batch_test"
    jobs = batch_app.batch.jobs

    assert len(jobs) == 2
    job1: ShellJob = jobs[0]
    assert isinstance(job1, ShellJob)
    assert job1.name == "job1"
    assert job1.params['learning_rate'] == 0.1
    assert job1.command == "pwd"
    assert job1.data_dir == (Path(batch_working_dir) / "job1").absolute().as_posix()
    assert job1.working_dir == (Path(batch_working_dir) / "job1").absolute().as_posix()

    job2: ShellJob = jobs[1]
    assert isinstance(job2, ShellJob)
    assert job2.name == "job2"
    assert job2.params['learning_rate'] == 0.2
    assert job2.command == "sleep 3"
    assert job2.data_dir == job2_data_dir
    assert job2.working_dir == job2_data_dir

    # check backend
    assert isinstance(batch_app.job_scheduler.executor_manager, LocalExecutorManager)

    # check server
    assert batch_app.server_host == 'local_machine'
    assert batch_app.server_port == 18060

    # check scheduler
    assert batch_app.job_scheduler.exit_on_finish is False
    assert batch_app.job_scheduler.interval == 1