示例#1
0
 def virtual_clusters(self, user_info: dict = None):
     user_info = na(user_info, self.rest_api_user())
     assert user_info, f'failed to get user information from {self.alias}'
     my_virtual_clusters = user_info["virtualCluster"]
     if isinstance(my_virtual_clusters, str):
         my_virtual_clusters = my_virtual_clusters.split(",")
     return my_virtual_clusters
示例#2
0
文件: job.py 项目: zjuter06060126/pai
 def single_task_logs(status: dict,
                      task_role: str = 'main',
                      index: int = 0,
                      log_type: dict = None,
                      return_urls: bool = False):
     """change to use containerLog"""
     log_type = na(
         log_type, {
             "stdout": "user.pai.stdout/?start=0",
             "stderr": "user.pai.stderr/?start=0"
         })
     containers = status.get("taskRoles",
                             {}).get(task_role, {}).get("taskStatuses", [])
     if len(containers) < index + 1:
         return None
     containerLog = containers[index].get("containerLog", None)
     if not containerLog:
         return None
     urls = {k: "{}{}".format(containerLog, v) for k, v in log_type.items()}
     if return_urls:
         return urls
     else:
         html_contents = {
             k: get_response('GET', v).text
             for k, v in urls.items()
         }
         try:
             from html2text import html2text
             return {k: html2text(v) for k, v in html_contents.items()}
         except ImportError:
             return html_contents
示例#3
0
文件: job.py 项目: simplesoftMX/pai
 def submit(self, cluster_alias: str = None, virtual_cluster: str = None):
     cluster_alias = na(cluster_alias, self.param("cluster_alias", None))
     self.select_cluster(cluster_alias, virtual_cluster)
     self.validate().local_process()
     to_screen("submit job %s to cluster %s" % (self.name, cluster_alias))
     self.client.rest_api_submit(self.get_config())
     job_link = self.client.get_job_link(self.name)
     return {"job_link": job_link, "job_name": self.name}
示例#4
0
文件: job.py 项目: simplesoftMX/pai
 def get_logs_url(self,
                  status: dict = None,
                  task_role: str = 'main',
                  index: int = 0,
                  logs: dict = None):
     """change to use containerLog"""
     logs = na(logs, {
         "stdout": "user.pai.stdout",
         "stderr": "user.pai.stderr"
     })
     status = na(status, self.status())
     containers = status.get("taskRoles",
                             {}).get(task_role, {}).get("taskStatuses", [])
     if len(containers) < index + 1:
         return None
     containerLog = containers[index].get("containerLog", None)
     if not containerLog:
         return None
     return {k: "{}{}".format(containerLog, v) for k, v in logs.items()}
示例#5
0
文件: job.py 项目: simplesoftMX/pai
 def logs(self,
          status: dict = None,
          task_role: str = 'main',
          index: int = 0,
          logs: dict = None):
     status = na(status, self.status())
     urls = self.get_logs_url(status, task_role, index, logs)
     if not urls:
         return None
     return {
         k: html2text(get_response(v, method='GET').text)
         for k, v in urls.items()
     }
示例#6
0
文件: job.py 项目: simplesoftMX/pai
 def connect_jupyter_batch(self, status: dict = None):
     "fetch the html result if ready"
     status = na(status, self.status())
     state = self.state(status)
     url = None
     if state in __job_states__["successful"]:
         html_file = self.param("notebook_file") + ".html"
         local_path = html_file
         remote_path = '{}/output/{}'.format(self.param("work_directory"),
                                             html_file)
         self.client.get_storage().download(remote_path=remote_path,
                                            local_path=local_path)
         url = pathlib.Path(os.path.abspath(html_file)).as_uri()
     return dict(state=state, notebook=url)
示例#7
0
 def do_action_notebook(self, args):
     self.__job__.new(args.job_name).from_notebook(
         nb_file=args.notebook, mode="interactive" if args.interactive else "silent", token=args.token,
         image=args.image,
         cluster=extract_args(
             args, ["cluster_alias", "virtual_cluster", "workspace"]),
         resources=extract_args(args, ["gpu", "cpu", "memoryMB", "mem"]),
         sources=args.sources, pip_installs=args.pip_installs,
     )
     self.__job__.protocol["parameters"]["python_path"] = args.python
     result = self.submit_it(args)
     if not args.preview:
         result.update(na(self.connect_notebook(), {}))
     return result
示例#8
0
文件: job.py 项目: zjuter06060126/pai
 def submit(self, cluster_alias: str = None, virtual_cluster: str = None):
     cluster_alias = na(cluster_alias, self.param("cluster_alias", None))
     self.select_cluster(cluster_alias, virtual_cluster)
     self.validate().local_process()
     to_screen("submit job %s to cluster %s" % (self.name, cluster_alias))
     try:
         self.client.rest_api_submit(self.get_config())
         job_link = self.client.get_job_link(self.name)
         return {"job_link": job_link, "job_name": self.name}
     except Exception as identifier:
         to_screen(f"submit failed due to {repr(identifier)}",
                   _type="error")
         to_screen(self.get_config())
         raise identifier
示例#9
0
 def __init__(self,
              name: str,
              include: list = None,
              exclude: list = None,
              file: str = None,
              values: dict = None,
              allow_unknown: bool = True):
     self.name = name
     self.file = file
     self.values = from_file(file, {}, silent=True) if file else na(
         values, {})
     self.definitions = OrganizedList(__flags__.default_var_definitions(),
                                      _key="name").filter(
                                          None, include,
                                          exclude)  # type: OrganizedList
示例#10
0
 def get_storage(self, alias: str = None):
     # ! every cluster should have a builtin storage
     storage_cfg = self.config.get("storages",
                                   {}).get(na(alias, "builtin"), None)
     assert storage_cfg, alias
     if storage_cfg["protocol"] == "hdfs":
         uri = storage_cfg.get("uri", self.pai_uri).strip("/")
         if self.config.get("pylon_enabled", True):
             uri += "/webhdfs"
         else:
             uri += ":%d" % storage_cfg.get("ports", {}).get(
                 "webhdfs", 50070)
         return Storage(protocol='webHDFS',
                        url=uri,
                        user=storage_cfg.get('user', self.user))
     raise NotImplementedError
示例#11
0
 def check(self):
     to_screen("try to connect cluster {}".format(self.alias))
     storages = self.rest_api_storages()
     for i, s in enumerate(storages):
         s.setdefault("storage_alias", s["protocol"] + f'-{i}')
     cluster_info = na(self.rest_api_cluster_info(), {})
     if cluster_info.get("authnMethod", "basic") == "OIDC":
         assert self.config[
             "token"], "must use authentication token (instead of password) in OIDC mode"
     self.config.update(
         info=cluster_info,
         storages=storages,
         virtual_clusters=self.virtual_clusters(),
     )
     # ! will check authentication types according to AAD enabled or not
     return self
示例#12
0
文件: job.py 项目: zjuter06060126/pai
 def load(self,
          fname: str = None,
          job_name: str = None,
          cluster_alias: str = None):
     if cluster_alias:  # load job config from cluster by REST api
         job_name = na(job_name, self.name)
         self.protocol = get_cluster(cluster_alias).rest_api_job_info(
             job_name, 'config')
     else:  # load from local file
         if not fname:
             fname = Job(job_name).protocol_file
         if os.path.isfile(fname):
             self.protocol = from_file(fname, default="==FATAL==")
     self.protocol.setdefault(
         'protocolVersion',
         '1')  # v1 protocol (json) has no protocolVersion
     return self
示例#13
0
文件: job.py 项目: simplesoftMX/pai
 def connect_jupyter_interactive(self, status: dict = None):
     "get the url of notebook if ready"
     status = na(status, self.status())
     state = self.state(status)
     url = None
     if state == "RUNNING":
         log_url = self.get_logs_url(status)["stderr"]
         job_log = html2text(get_response(log_url,
                                          method='GET').text).split('\n')
         for line in job_log:
             if re.search("The Jupyter Notebook is running at:", line):
                 container = status["taskRoles"]["main"]["taskStatuses"][0]
                 url = "http://{}:{}/notebooks/{}".format(
                     container["containerIp"],
                     container["containerPorts"]["jupyter"],
                     self.param("notebook_file") + ".ipynb",
                 )
                 break
     return dict(state=state, notebook=url)
示例#14
0
 def load(self, fname: str = None):
     fname = na(fname, self.default_config_file)
     self.clusters = OrganizedList(from_file(fname, default=[]),
                                   _key="cluster_alias")
     return self
示例#15
0
文件: job.py 项目: zjuter06060126/pai
 def from_notebook(self,
                   nb_file: str,
                   mode: str = "interactive",
                   token: str = "abcd",
                   image: str = None,
                   cluster: dict = None,
                   resources: dict = None,
                   sources: list = None,
                   pip_installs: list = None):
     """
     mode: interactive / silent / script
     """
     assert mode in ["interactive", "silent",
                     "script"], "unsupported mode %s" % mode
     if not nb_file:
         mode, nb_file = "interactive", ""
     else:
         assert os.path.isfile(
             nb_file), "cannot read the ipython notebook {}".format(nb_file)
         sources = na(sources, [])
         sources.append(nb_file)
     self.set_param(
         "notebook_file",
         os.path.splitext(os.path.basename(nb_file))[0] if nb_file else "")
     resources = JobResource(resources)
     if mode == "interactive":
         resources.add_port("jupyter")
         self.set_secret("token", token)
         cmds = [
             " ".join([
                 "jupyter notebook",
                 "--no-browser",
                 "--ip 0.0.0.0",
                 "--port $PAI_CONTAINER_HOST_jupyter_PORT_LIST",
                 "--NotebookApp.token=<% $secrets.token %>",
                 "--allow-root --NotebookApp.file_to_run=<% $parameters.notebook_file %>.ipynb",
             ]),
         ]
     elif mode == "silent":
         cmds = [
             " ".join([
                 "jupyter nbconvert --ExecutePreprocessor.timeout=-1 --ExecutePreprocessor.allow_errors=True",
                 "--to html --execute <% $parameters.notebook_file %>.ipynb",
             ]),
             "opai storage upload <% $parameters.notebook_file %>.html <% $parameters.work_directory %>/output/<% $parameters.notebook_file %>.html",
         ]
     else:
         cmds = [
             "jupyter nbconvert --to script <% $parameters.notebook_file %>.ipynb --output openpai_submitter_entry",
             "echo ======================== Python Script Starts ========================",
             # execute notebook by iPython. To remove color information, we use "--no-term-title" and sed below
             """ipython --no-term-title openpai_submitter_entry.py | sed -r "s/\\x1B\\[([0-9]{1,2}(;[0-9]{1,2})?)?[mGK]//g" | tr -dc '[[:print:]]\\n'""",
         ]
     self.one_liner(cmds, image, cluster, resources.as_dict, sources,
                    na(pip_installs, []) + ["jupyter"])
     mode_to_tag = {
         "interactive": "interactive_nb",
         "silent": "batch_nb",
         "script": "script_nb"
     }
     self.add_tag(__internal_tags__[mode_to_tag[mode]])
     return self
示例#16
0
文件: job.py 项目: zjuter06060126/pai
    def sdk_job_template(self,
                         cluster_alias_lst: str = [],
                         workspace: str = None,
                         sources: list = None,
                         pip_installs: list = None):
        "generate the job template for a sdk-submitted job"
        # secrets
        clusters = [
            get_cluster(alias, get_client=False) for alias in cluster_alias_lst
        ]
        workspace = na(workspace, LayeredSettings.get("workspace"))
        workspace = na(workspace,
                       f"{__flags__.storage_root}/{clusters[0]['user']}")
        self.set_secret("clusters", json.dumps(clusters))
        self.set_param("cluster_alias",
                       cluster_alias_lst[0] if cluster_alias_lst else None)
        self.set_param(
            "work_directory",
            '{}/jobs/{}'.format(workspace, self.name) if workspace else None)

        # parameters
        self.set_param("python_path", "python")

        # signature
        self.add_tag(__internal_tags__["sdk"])

        # sdk.plugins
        sdk_install_uri = "-U {}".format(get_install_uri())
        c_dir = '~/{}'.format(__flags__.cache)
        c_file = '%s/%s' % (c_dir, __flags__.cluster_cfg_file)

        plugins = []
        if sources:
            plugins.append({
                "plugin": "local.uploadFiles",
                "parameters": {
                    "files": list(set([os.path.relpath(s) for s in sources])),
                },
            })

        plugins.extend([
            {
                "plugin":
                "container.preCommands",  # commands to install essential pip packages
                "parameters": {
                    "commands": [
                        "<% $parameters.python_path %> -m pip install {}".
                        format(p)
                        for p in [sdk_install_uri] + na(pip_installs, [])
                    ]
                }
            },
            {
                "plugin": "container.preCommands",  # copy cluster information
                "parameters": {
                    "commands": [
                        "mkdir %s" % c_dir,
                        "echo \"write config to {}\"".format(c_file),
                        "echo <% $secrets.clusters %> > {}".format(c_file),
                        "opai cluster select <% $parameters.cluster_alias %>",
                    ]
                }
            }
        ])

        if sources:
            a_file = os.path.basename(self.temp_archive)
            plugins.append({
                "plugin": "container.preCommands",
                "parameters": {
                    "commands": [
                        "opai storage download <% $parameters.work_directory %>/source/{} {}"
                        .format(a_file, a_file), "tar xvfz {}".format(a_file)
                    ]
                }
            })
        self.set_extra("sdk.plugins", plugins)
        return self
示例#17
0
文件: job.py 项目: simplesoftMX/pai
 def state(self, status: dict = None):
     status = na(status, self.status())
     return status.get("jobStatus", {}).get("state", None)
示例#18
0
 def virtual_clusters(self, user_info: dict = None):
     user_info = na(user_info, self.rest_api_user())
     my_virtual_clusters = user_info["virtualCluster"]
     if isinstance(my_virtual_clusters, str):
         my_virtual_clusters = my_virtual_clusters.split(",")
     return my_virtual_clusters