def initialize(self, trainer): self._out = trainer.out CommandItem.remove_commands_file(trainer.out) CommandsState.run(trainer.out) if isinstance(trainer.stop_trigger, IntervalTrigger): trainer.stop_trigger = _CommandIntervalTrigger( trainer.stop_trigger) else: trainer.stop_trigger = _CommandTrigger(trainer.stop_trigger)
def test_call_no_command(self): out_path = os.path.join(self._dir, 'results') os.makedirs(out_path) commands_path = os.path.join(out_path, 'commands') open(commands_path, 'w').close() assert os.path.isfile(commands_path) target = CommandsExtension() trainer = _MockTrainer(out_path) target.initialize(trainer) assert type(trainer.stop_trigger) is _CommandIntervalTrigger assert not os.path.isfile(commands_path) assert CommandsState.job_status(out_path) == JobStatus.RUNNING target.finalize() assert CommandsState.job_status(out_path) == JobStatus.STOPPED
def post(self, result_id, project_id): """POST /api/v1/results/<int:id>/commands.""" result = db.session.query(Result).filter_by(id=result_id).first() if result is None: return jsonify({ 'result': None, 'message': 'No interface defined for URL.' }), 404 job_status = CommandsState.job_status(result.path_name) if job_status != JobStatus.RUNNING: if job_status == JobStatus.NO_EXTENSION_ERROR: return jsonify({ 'message': '\'CommandsExtension\' is not set or disabled.' }), 400 elif job_status == JobStatus.INITIALIZED: return jsonify( {'message': 'The target training job has not run, yet'}), 400 elif job_status == JobStatus.STOPPED: return jsonify( {'message': 'The target training job has already stopped'}), 400 else: return jsonify( {'message': 'Cannot get the target training job status'}), 400 request_json = request.get_json() if request_json is None: return jsonify({'message': 'Empty request.'}), 400 command_name = request_json.get('name', None) if command_name is None: return jsonify({'message': 'Name is required.'}), 400 schedule = request_json.get('schedule', None) if not CommandItem.is_valid_schedule(schedule): return jsonify({'message': 'Schedule is invalid.'}), 400 command = CommandItem(name=command_name, ) command.set_request(CommandItem.REQUEST_OPEN, request_json.get('body', None), request_json.get('schedule', None)) commands = CommandItem.load_commands(result.path_name) commands.append(command) CommandItem.dump_commands(commands, result.path_name) new_result = crawl_result(result, force=True) new_result_dict = new_result.serialize return jsonify({'commands': new_result_dict['commands']})
def finalize(self): if self._out != '': CommandsState.stop(self._out)
def test_post_result_command(func_dir, project, app): project2_path = os.path.join(func_dir, 'test_project2') result_path = os.path.join(project2_path, '10003') os.makedirs(result_path) with open(os.path.join(result_path, 'log'), 'w') as f: json.dump([], f) Project.create(project2_path, 'command-test-project') request_jsons = [ { 'name': 'adjust_hyperparams', 'body': { 'alpha': 0.0007, 'beta1': 0.8, 'beat2': 1.0, }, 'schedule': { 'value': 4, 'key': 'epoch', }, 'resultId': 1, }, { 'name': 'adjust_hyperparams', 'body': None, 'schedule': None, 'resultId': 2, }, { 'name': 'take_snapshot', 'schedule': { 'value': 4800, 'key': 'iteration', }, 'resultId': 3, }, ] # not set extension resp = app.post('/api/v1/projects/2/results/4/commands', data=json.dumps(request_jsons[0]), content_type='application/json') data = assert_json_api(resp, 400) assert len(data) == 1 assert isinstance(data['message'], string_types) assert 'not set' in data['message'] # job run on v0.1.0 so .chainerui_commands is not created with open(os.path.join(result_path, 'commands'), 'w') as f: json.dump([], f) resp = app.post('/api/v1/projects/2/results/4/commands', data=json.dumps(request_jsons[0]), content_type='application/json') data = assert_json_api(resp, 400) assert len(data) == 1 assert isinstance(data['message'], string_types) assert 'stopped' in data['message'] # extension is set up but not run os.remove(os.path.join(result_path, '.chainerui_commands')) CommandsState._dump(result_path, CommandsState._load(result_path, initialize=True)) resp = app.post('/api/v1/projects/2/results/4/commands', data=json.dumps(request_jsons[0]), content_type='application/json') data = assert_json_api(resp, 400) assert len(data) == 1 assert isinstance(data['message'], string_types) assert 'not run' in data['message'] # job has already started CommandsState.run(result_path) for i in range(3): resp = app.post('/api/v1/projects/2/results/4/commands', data=json.dumps(request_jsons[i]), content_type='application/json') data = assert_json_api(resp) assert len(data) == 1 assert len(data['commands']) > 0 command = data['commands'][0] assert isinstance(command['id'], int) assert isinstance(command['name'], string_types) assert len(command['request']) == 4 assert command['request']['schedule'] is None or isinstance( command['request']['schedule'], dict) assert command['request']['body'] is None or isinstance( command['request']['body'], dict) assert isinstance(command['request']['created_at'], string_types) assert isinstance(command['request']['status'], string_types) assert 'response' in command # job has stopped CommandsState.stop(result_path) resp = app.post('/api/v1/projects/2/results/4/commands', data=json.dumps(request_jsons[0]), content_type='application/json') data = assert_json_api(resp, 400) assert len(data) == 1 assert isinstance(data['message'], string_types) assert 'stopped' in data['message'] request_jsons = [ { 'name': 'invalid_schedule', 'schedule': { 'value': None, 'key': 'epoch', }, 'resultId': 1, }, { 'name': None, 'resultId': 2, }, ] for i in range(2): resp = app.post('/api/v1/projects/2/results/4/commands', data=json.dumps(request_jsons[i]), content_type='application/json') data = assert_json_api(resp, 400) assert len(data) == 1 assert isinstance(data['message'], string_types) resp = app.post('/api/v1/projects/2/results/4/commands') data = assert_json_api(resp, 400) assert len(data) == 1 assert isinstance(data['message'], string_types) resp = app.post('/api/v1/projects/2/results/12345/commands') data = assert_json_api(resp, 404) assert isinstance(data['message'], string_types) assert data['result'] is None
def test_call(self): out_path = os.path.join(self.dir, 'results') os.makedirs(out_path) commands_path = os.path.join(out_path, 'commands') open(commands_path, 'w').close() assert os.path.isfile(commands_path) # initialize target = CommandsExtension(trigger=(1, 'iteration')) trainer = _MockTrainer(out_path) target.initialize(trainer) assert not trainer.stop_trigger._loop_stop assert not os.path.isfile(commands_path) assert CommandsState.job_status(out_path) == JobStatus.RUNNING # setup valid command commands = CommandItem.load_commands(out_path) command = CommandItem(name='take_snapshot') command.set_request(CommandItem.REQUEST_OPEN, None, None) commands.append(command) command2 = CommandItem(name='stop') command2.set_request(CommandItem.REQUEST_OPEN, None, { 'key': 'epoch', 'value': 10 }) commands.append(command2) command3 = CommandItem(name='adjust_hyperparams') command3.set_request( CommandItem.REQUEST_OPEN, { 'optimizer': 'MomentumSGD', 'hyperparam': { 'lr': 0.01, 'beta': None, 'gamma': 1.0 } }, { 'key': 'iteration', 'value': 10 }) commands.append(command3) CommandItem.dump_commands(commands, out_path) # call but skip by interval trigger target(trainer) commands = CommandItem.load_commands(out_path) assert len(commands) == 3 assert commands[0].response is None assert commands[1].response is None assert commands[2].response is None # call 'take_sanpshot' trainer.updater.iteration = 1 target(trainer) commands = CommandItem.load_commands(out_path) assert len(commands) == 3 res = commands[0].response assert res['epoch'] == 0 assert res['iteration'] == 1 assert res['status'] == CommandItem.RESPONSE_SUCCESS assert commands[1].response is None assert commands[2].response is None # call 'adjust_hyperparams' trainer.updater.iteration = 10 target(trainer) commands = CommandItem.load_commands(out_path) assert len(commands) == 3 res = commands[2].response assert res['epoch'] == 0 assert res['iteration'] == 10 assert res['status'] == CommandItem.RESPONSE_SUCCESS assert res['body'] is not None assert res['body']['optimizer'] == 'MomentumSGD' assert res['body']['hyperparam'] == {'lr': 0.01} assert commands[1].response is None # call 'stop' trainer.updater.iteration = 100 trainer.updater.epoch = 10 target(trainer) commands = CommandItem.load_commands(out_path) assert len(commands) == 3 res = commands[1].response assert res['epoch'] == 10 assert res['iteration'] == 100 assert res['status'] == CommandItem.RESPONSE_SUCCESS assert res['body'] is None assert trainer.stop_trigger._loop_stop target.finalize() assert CommandsState.job_status(out_path) == JobStatus.STOPPED
def setup_test_project(root_path): # log only path = os.path.join(root_path, '10000') os.makedirs(path) log = [{ "main/loss": 0.1933198869228363, "validation/main/loss": 0.09147150814533234, "iteration": 600, "elapsed_time": 16.052587032318115, "epoch": 1, "main/accuracy": 0.9421835541725159, "validation/main/accuracy": 0.9703000783920288 }, { "main/loss": 0.07222291827201843, "validation/main/loss": 0.08141259849071503, "iteration": 1200, "elapsed_time": 19.54666304588318, "epoch": 2, "main/accuracy": 0.9771820902824402, "validation/main/accuracy": 0.975399911403656 }] with open(os.path.join(path, 'log'), 'w') as f: json.dump(log, f) # log, args path = os.path.join(root_path, '10001') os.makedirs(path) args = { "resume": "", "batchsize": 100, "epoch": 20, "frequency": -1, "gpu": 0, "unit": 1000, "out": "results" } with open(os.path.join(path, 'log'), 'w') as f: json.dump(log, f) with open(os.path.join(path, 'args'), 'w') as f: json.dump(args, f) # log, args, commands path = os.path.join(root_path, '10002') os.makedirs(path) commands = [{ "name": "take_snapshot", "request": { "created_at": "2017-09-26T16:44:33.410023", "status": "OPEN", "body": None, "schedule": { "value": 4, "key": "epoch" } }, "response": { "executed_at": "2017-09-26T16:44:35.730431", "epoch": 4, "iteration": 2400, "elapsed_time": 76.96686792373657, "status": "SUCCESS", "body": None } }] with open(os.path.join(path, 'log'), 'w') as f: json.dump(log, f) with open(os.path.join(path, 'args'), 'w') as f: json.dump(args, f) with open(os.path.join(path, 'commands'), 'w') as f: json.dump(commands, f) open(os.path.join(path, 'snapshot_iter_2400'), 'w').close() # log, args, commands, status(run) path = os.path.join(root_path, '10003') os.makedirs(path) with open(os.path.join(path, 'log'), 'w') as f: json.dump(log, f) with open(os.path.join(path, 'args'), 'w') as f: json.dump(args, f) with open(os.path.join(path, 'commands'), 'w') as f: json.dump(commands, f) CommandsState.run(path) # log, args, commands, status(stop) path = os.path.join(root_path, '10004') os.makedirs(path) with open(os.path.join(path, 'log'), 'w') as f: json.dump(log, f) with open(os.path.join(path, 'args'), 'w') as f: json.dump(args, f) with open(os.path.join(path, 'commands'), 'w') as f: json.dump(commands, f) CommandsState.stop(path) # log, args, commands, status(not run) path = os.path.join(root_path, '10005') os.makedirs(path) with open(os.path.join(path, 'log'), 'w') as f: json.dump(log, f) with open(os.path.join(path, 'args'), 'w') as f: json.dump(args, f) with open(os.path.join(path, 'commands'), 'w') as f: json.dump(commands, f) CommandsState._dump(path, CommandsState._load(path, initialize=True))