class ExperimentClient: def __init__(self, host): """ Submarine experiment client constructor :param host: An HTTP URI like http://submarine-server:8080. """ # TODO(pingsutw): support authentication for talking to the submarine server self.host = host configuration = Configuration() configuration.host = host + '/api' api_client = ApiClient(configuration=configuration) self.experiment_api = ExperimentApi(api_client=api_client) def create_experiment(self, experiment_spec): """ Create an experiment :param experiment_spec: submarine experiment spec :return: submarine experiment """ response = self.experiment_api.create_experiment( experiment_spec=experiment_spec) return response.result def wait_for_finish(self, id, polling_interval=10): """ Waits until experiment is finished or failed :param id: submarine experiment id :param polling_interval: How often to poll for the status of the experiment. :return: str: experiment logs """ index = 0 while True: status = self.get_experiment(id)['status'] if status == 'Succeeded' or status == 'Deleted': self._log_pod(id, index) break index = self._log_pod(id, index) time.sleep(polling_interval) def _log_pod(self, id, index): response = self.experiment_api.get_log(id) log_content = response.result['logContent'][0] for i, log in enumerate(log_content['podLog']): if i < index: continue index += 1 logging.info("%s", log) return index def patch_experiment(self, id, experiment_spec): """ Patch an experiment :param id: submarine experiment id :param experiment_spec: submarine experiment spec :return: submarine experiment """ response = self.experiment_api.patch_experiment( id=id, experiment_spec=experiment_spec) return response.result def get_experiment(self, id): """ Get the experiment's detailed info by id :param id: submarine experiment id :return: submarine experiment """ response = self.experiment_api.get_experiment(id=id) return response.result def list_experiments(self, status): """ List all experiment for the user :param status: Accepted, Created, Running, Succeeded, Deleted :return: List of submarine experiments """ response = self.experiment_api.list_experiments(status=status) return response.result def delete_experiment(self, id): """ Delete the Submarine experiment :param id: Submarine experiment id :return: The detailed info about deleted submarine experiment """ response = self.experiment_api.delete_experiment(id) return response.result def get_log(self, id, master=True): """ Get training logs of the experiment. By default only get the logs of Pod that has labels 'job-role: master'. :param master: By default get pod with label 'job-role: master' pod if True. If need to get more Pod Logs, set False. :param id: Experiment ID :return: str: experiment logs """ response = self.experiment_api.get_log(id) log_contents = response.result['logContent'] if master is True: log_contents = [log_contents[0]] for log_content in log_contents: logging.info("The logs of Pod %s:\n", log_content['podName']) for log in log_content['podLog']: logging.info("%s", log) def list_log(self, status): """ List experiment log :param status: Accepted, Created, Running, Succeeded, Deleted :return: List of submarine log """ response = self.experiment_api.list_log(status=status) return response.result
class ExperimentClient: def __init__(self, host=generate_host()): """ Submarine experiment client constructor :param host: An HTTP URI like http://submarine-server:8080. """ # TODO(pingsutw): support authentication for talking to the submarine server self.host = host configuration = Configuration() configuration.host = host + '/api' api_client = ApiClient(configuration=configuration) self.experiment_api = ExperimentApi(api_client=api_client) def create_experiment(self, experiment_spec): """ Create an experiment :param experiment_spec: submarine experiment spec :return: submarine experiment """ response = self.experiment_api.create_experiment(experiment_spec=experiment_spec) return response.result def wait_for_finish(self, id, polling_interval=10): """ Waits until experiment is finished or failed :param id: submarine experiment id :param polling_interval: How often to poll for the status of the experiment. :return: str: experiment logs """ index = 0 while True: status = self.get_experiment(id)['status'] if status == 'Succeeded' or status == 'Deleted': self._log_pod(id, index) break index = self._log_pod(id, index) time.sleep(polling_interval) def _log_pod(self, id, index): response = self.experiment_api.get_log(id) log_contents = response.result['logContent'] if len(log_contents) == 0: return index log_content = log_contents[0] for i, log in enumerate(log_content['podLog']): if i < index: continue index += 1 logging.info("%s", log) return index def patch_experiment(self, id, experiment_spec): """ Patch an experiment :param id: submarine experiment id :param experiment_spec: submarine experiment spec :return: submarine experiment """ response = self.experiment_api.patch_experiment(id=id, experiment_spec=experiment_spec) return response.result def get_experiment(self, id): """ Get the experiment's detailed info by id :param id: submarine experiment id :return: submarine experiment """ response = self.experiment_api.get_experiment(id=id) return response.result def list_experiments(self, status=None): """ List all experiment for the user :param status: Accepted, Created, Running, Succeeded, Deleted :return: List of submarine experiments """ response = self.experiment_api.list_experiments(status=status) return response.result def delete_experiment(self, id): """ Delete the Submarine experiment :param id: Submarine experiment id :return: The detailed info about deleted submarine experiment """ response = self.experiment_api.delete_experiment(id) return response.result def get_log(self, id, onlyMaster=False): """ Get training logs of all pod of the experiment. By default get all the logs of Pod :param id: experiment id :param onlyMaster: By default include pod log of "master" which might be Tensorflow PS/Chief or PyTorch master :return: str: pods logs """ response = self.experiment_api.get_log(id) log_contents = response.result['logContent'] if onlyMaster is True and len(log_contents) != 0: log_contents = [log_contents[0]] for log_content in log_contents: logging.info("The logs of Pod %s:\n", log_content['podName']) for log in log_content['podLog']: logging.info("%s", log) def list_log(self, status): """ List experiment log :param status: Accepted, Created, Running, Succeeded, Deleted :return: List of submarine log """ response = self.experiment_api.list_log(status=status) return response.result