Exemplo n.º 1
0
 def run_cluster_diagnostics(self, cluster_id, output_directory):
     try:
         output = cluster_diagnostic_helper.run(self, cluster_id,
                                                output_directory)
         return output
     except batch_error.BatchErrorException as e:
         raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 2
0
 def cluster_copy(self, cluster_id: str, source_path: str,
                  destination_path: str):
     try:
         return self.__cluster_copy(cluster_id, 'spark', source_path,
                                    destination_path)
     except batch_error.BatchErrorException as e:
         raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 3
0
def delete_cluster(core_cluster_operations,
                   cluster_id: str,
                   keep_logs: bool = False):
    try:
        return core_cluster_operations.delete(cluster_id, keep_logs)
    except BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 4
0
    def create_cluster(self,
                       cluster_conf: models.ClusterConfiguration,
                       wait: bool = False):
        try:
            zip_resource_files = upload_node_scripts.zip_scripts(
                self.blob_client, cluster_conf.cluster_id,
                cluster_conf.custom_scripts, cluster_conf.spark_configuration,
                cluster_conf.user_configuration)

            start_task = create_cluster_helper.generate_cluster_start_task(
                self, zip_resource_files, cluster_conf.gpu_enabled,
                cluster_conf.docker_repo, cluster_conf.file_shares)

            software_metadata_key = "spark"

            vm_image = models.VmImage(publisher='Canonical',
                                      offer='UbuntuServer',
                                      sku='16.04')

            cluster = self.__create_pool_and_job(cluster_conf,
                                                 software_metadata_key,
                                                 start_task, vm_image)

            # Wait for the master to be ready
            if wait:
                util.wait_for_master_to_be_ready(self, cluster.id)
                cluster = self.get_cluster(cluster.id)

            return cluster

        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 5
0
def get_application_state(core_cluster_operations, cluster_id: str,
                          app_name: str):
    try:
        return ApplicationState(
            core_cluster_operations.get_task_state(cluster_id, app_name).value)
    except BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 6
0
 def get_application(self, job_id, application_name):
     try:
         return models.Application(
             job_submit_helper.get_application(self, job_id,
                                               application_name))
     except batch_error.BatchErrorException as e:
         raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 7
0
def get_recent_job(core_job_operations, id):
    try:
        job_schedule = core_job_operations.batch_client.job_schedule.get(id)
        return core_job_operations.batch_client.job.get(
            job_schedule.execution_info.recent_job.id)
    except BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 8
0
 def wait_until_application_done(self, cluster_id: str, task_id: str):
     try:
         helpers.wait_for_task_to_complete(job_id=cluster_id,
                                           task_id=task_id,
                                           batch_client=self.batch_client)
     except batch_error.BatchErrorException as e:
         raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 9
0
def get_application_status(core_cluster_operations, cluster_id: str,
                           app_name: str):
    try:
        task = core_cluster_operations.batch_client.task.get(
            cluster_id, app_name)
        return task.state._value_
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 10
0
 def wait_until_cluster_is_ready(self, cluster_id: str):
     try:
         util.wait_for_master_to_be_ready(self, cluster_id)
         pool = self.batch_client.pool.get(cluster_id)
         nodes = self.batch_client.compute_node.list(pool_id=cluster_id)
         return models.Cluster(pool, nodes)
     except batch_error.BatchErrorException as e:
         raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 11
0
 def list_jobs(self):
     try:
         return [
             models.Job(cloud_job_schedule)
             for cloud_job_schedule in job_submit_helper.list_jobs(self)
         ]
     except batch_error.BatchErrorException as e:
         raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 12
0
def list_jobs(core_job_operations):
    try:
        return [
            models.Job(cloud_job_schedule)
            for cloud_job_schedule in _list_jobs(core_job_operations)
        ]
    except BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 13
0
 def list_clusters(self):
     try:
         return [
             models.Cluster(pool)
             for pool in self.__list_clusters(aztk.models.Software.spark)
         ]
     except batch_error.BatchErrorException as e:
         raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 14
0
def get_job_application_log(core_job_operations, spark_job_operations, job_id,
                            application_name):
    try:
        return models.ApplicationLog(
            _get_application_log(core_job_operations, spark_job_operations,
                                 job_id, application_name))
    except BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 15
0
    def submit_job(self, job_configuration):
        try:
            cluster_data = self._get_cluster_data(job_configuration.id)
            node_data = NodeData(
                job_configuration.as_cluster_config()).add_core().done()
            zip_resource_files = cluster_data.upload_node_data(
                node_data).to_resource_file()

            start_task = create_cluster_helper.generate_cluster_start_task(
                self,
                zip_resource_files,
                job_configuration.gpu_enabled,
                job_configuration.docker_repo,
                worker_on_master=job_configuration.worker_on_master)

            application_tasks = []
            for application in job_configuration.applications:
                application_tasks.append((application,
                                          cluster_submit_helper.generate_task(
                                              self, job_configuration.id,
                                              application)))

            job_manager_task = job_submit_helper.generate_task(
                self, job_configuration, application_tasks)

            software_metadata_key = "spark"

            vm_image = models.VmImage(publisher='Canonical',
                                      offer='UbuntuServer',
                                      sku='16.04')

            if job_configuration.max_dedicated_nodes and not job_configuration.max_low_pri_nodes:
                autoscale_formula = "maxNumberofVMs = {0}; targetNumberofVMs = {1};" \
                                    " $TargetDedicatedNodes=min(maxNumberofVMs, targetNumberofVMs)".format(
                                        job_configuration.max_dedicated_nodes, job_configuration.max_dedicated_nodes)
            elif job_configuration.max_low_pri_nodes and not job_configuration.max_dedicated_nodes:
                autoscale_formula = "maxNumberofVMs = {0}; targetNumberofVMs = {1};" \
                                    " $TargetLowPriorityNodes=min(maxNumberofVMs, targetNumberofVMs)".format(
                                        job_configuration.max_low_pri_nodes, job_configuration.max_low_pri_nodes)
            else:
                raise error.AztkError("Jobs do not support both dedicated and low priority nodes." \
                                      " JobConfiguration fields max_dedicated_nodes and max_low_pri_nodes are mutually exclusive values.")

            job = self.__submit_job(
                job_configuration=job_configuration,
                start_task=start_task,
                job_manager_task=job_manager_task,
                autoscale_formula=autoscale_formula,
                software_metadata_key=software_metadata_key,
                vm_image_model=vm_image,
                application_metadata='\n'.join(
                    application.name
                    for application in (job_configuration.applications or [])))

            return models.Job(job)

        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 16
0
def delete(core_job_operations,
           spark_job_operations,
           job_id: str,
           keep_logs: bool = False):
    try:
        return _delete(core_job_operations, spark_job_operations, job_id,
                       keep_logs)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 17
0
Arquivo: client.py Projeto: gridl/aztk
 def cluster_run(self, cluster_id: str, command: str, host=False, internal: bool = False, timeout=None):
     try:
         return self.__cluster_run(cluster_id,
                                   command,
                                   internal,
                                   container_name='spark' if not host else None,
                                   timeout=timeout)
     except batch_error.BatchErrorException as e:
         raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 18
0
Arquivo: client.py Projeto: gridl/aztk
 def create_user(self, cluster_id: str, username: str, password: str = None, ssh_key: str = None) -> str:
     try:
         cluster = self.get_cluster(cluster_id)
         master_node_id = cluster.master_node_id
         if not master_node_id:
             raise error.ClusterNotReadyError("The master has not yet been picked, a user cannot be added.")
         self.__create_user(cluster.id, master_node_id, username, password, ssh_key)
     except batch_error.BatchErrorException as e:
         raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 19
0
def list_applications(core_job_operations, job_id):
    try:
        applications = _list_applications(core_job_operations, job_id)
        for item in applications:
            if applications[item]:
                applications[item] = models.Application(applications[item])
        return applications
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 20
0
 def submit(self,
            cluster_id: str,
            application: models.ApplicationConfiguration,
            wait: bool = False):
     try:
         cluster_submit_helper.submit_application(self, cluster_id,
                                                  application, wait)
     except batch_error.BatchErrorException as e:
         raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 21
0
Arquivo: list.py Projeto: cmatc13/aztk
def list_clusters(core_cluster_operations):
    try:
        software_metadata_key = base_models.Software.spark
        return [
            models.Cluster(cluster)
            for cluster in core_cluster_operations.list(software_metadata_key)
        ]
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 22
0
 def list_applications(self, job_id):
     try:
         applications = job_submit_helper.list_applications(self, job_id)
         for item in applications:
             if applications[item]:
                 applications[item] = models.Application(applications[item])
         return applications
     except batch_error.BatchErrorException as e:
         raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 23
0
    def submit_job(self, job_configuration: models.JobConfiguration):
        try:
            job_configuration = _apply_default_for_job_config(
                job_configuration)
            job_configuration.validate()
            cluster_data = self._get_cluster_data(job_configuration.id)
            node_data = NodeData(
                job_configuration.to_cluster_config()).add_core().done()
            zip_resource_files = cluster_data.upload_node_data(
                node_data).to_resource_file()

            start_task = create_cluster_helper.generate_cluster_start_task(
                self,
                zip_resource_files,
                job_configuration.id,
                job_configuration.gpu_enabled,
                job_configuration.get_docker_repo(),
                mixed_mode=job_configuration.mixed_mode(),
                worker_on_master=job_configuration.worker_on_master)

            application_tasks = []
            for application in job_configuration.applications:
                application_tasks.append((application,
                                          cluster_submit_helper.generate_task(
                                              self, job_configuration.id,
                                              application)))

            job_manager_task = job_submit_helper.generate_task(
                self, job_configuration, application_tasks)

            software_metadata_key = "spark"

            vm_image = models.VmImage(publisher='Canonical',
                                      offer='UbuntuServer',
                                      sku='16.04')

            autoscale_formula = "$TargetDedicatedNodes = {0}; " \
                                "$TargetLowPriorityNodes = {1}".format(
                                    job_configuration.max_dedicated_nodes,
                                    job_configuration.max_low_pri_nodes)

            job = self.__submit_job(
                job_configuration=job_configuration,
                start_task=start_task,
                job_manager_task=job_manager_task,
                autoscale_formula=autoscale_formula,
                software_metadata_key=software_metadata_key,
                vm_image_model=vm_image,
                application_metadata='\n'.join(
                    application.name
                    for application in (job_configuration.applications or [])))

            return models.Job(job)

        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 24
0
def create_cluster(core_cluster_operations,
                   spark_cluster_operations,
                   cluster_conf: models.ClusterConfiguration,
                   vm_image: base_models.VmImage,
                   wait: bool = False):
    """
    Create a new aztk spark cluster

    Args:
        cluster_conf(aztk.spark.models.models.ClusterConfiguration): Configuration for the the cluster to be created
        wait(bool): If you should wait for the cluster to be ready before returning
        vm_image: models for cluster vm

    Returns:
        :obj:`aztk.spark.models.Cluster`
    """
    cluster_conf = _apply_default_for_cluster_config(cluster_conf)
    cluster_conf.validate()

    cluster_data = core_cluster_operations.get_cluster_data(
        cluster_conf.cluster_id)
    try:
        zip_resource_files = None
        node_data = NodeData(cluster_conf).add_core().done()
        zip_resource_files = cluster_data.upload_node_data(
            node_data).to_resource_file()

        start_task = spark_cluster_operations._generate_cluster_start_task(
            core_cluster_operations,
            zip_resource_files,
            cluster_conf.cluster_id,
            cluster_conf.gpu_enabled(),
            cluster_conf.get_docker_repo(),
            cluster_conf.get_docker_run_options(),
            cluster_conf.file_shares,
            cluster_conf.mixed_mode(),
            cluster_conf.worker_on_master,
        )

        software_metadata_key = base_models.Software.spark

        cluster = core_cluster_operations.create(cluster_conf,
                                                 software_metadata_key,
                                                 start_task, vm_image)

        # Wait for the master to be ready
        if wait:
            util.wait_for_master_to_be_ready(core_cluster_operations,
                                             spark_cluster_operations,
                                             cluster.id)
            cluster = spark_cluster_operations.get(cluster.id)

        return cluster

    except BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 25
0
def get_application_log(base_operations,
                        cluster_id: str,
                        application_name: str,
                        tail=False,
                        current_bytes: int = 0):
    try:
        return get_log(base_operations, cluster_id, application_name, tail,
                       current_bytes)
    except BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 26
0
def list_applications(core_operations, cluster_id):
    try:
        scheduling_target = core_operations.get_cluster_configuration(cluster_id).scheduling_target
        if scheduling_target is not SchedulingTarget.Any:
            tasks = core_operations.list_task_table_entries(cluster_id)
        else:
            tasks = core_operations.list_batch_tasks(cluster_id)
        return [Application(task) for task in tasks]
    except BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 27
0
def run_cluster_diagnostics(spark_cluster_operations,
                            cluster_id,
                            output_directory=None,
                            brief=False):
    try:
        output = _run(spark_cluster_operations, cluster_id, output_directory,
                      brief)
        return output
    except BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 28
0
def submit(core_cluster_operations,
           spark_cluster_operations,
           cluster_id: str,
           application: models.ApplicationConfiguration,
           remote: bool = False,
           wait: bool = False):
    try:
        submit_application(core_cluster_operations, spark_cluster_operations, cluster_id, application, remote, wait)
    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 29
0
def submit_job(core_job_operations,
               spark_job_operations,
               job_configuration: models.JobConfiguration,
               wait: bool = False):
    try:
        job_configuration = _apply_default_for_job_config(job_configuration)
        job_configuration.validate()
        cluster_data = core_job_operations.get_cluster_data(job_configuration.id)
        node_data = NodeData(job_configuration.to_cluster_config()).add_core().done()
        zip_resource_files = cluster_data.upload_node_data(node_data).to_resource_file()

        start_task = spark_job_operations._generate_cluster_start_task(
            core_job_operations,
            zip_resource_files,
            job_configuration.id,
            job_configuration.gpu_enabled,
            job_configuration.get_docker_repo(),
            job_configuration.get_docker_run_options(),
            mixed_mode=job_configuration.mixed_mode(),
            worker_on_master=job_configuration.worker_on_master,
        )

        application_tasks = []
        for application in job_configuration.applications:
            application_tasks.append((
                application,
                spark_job_operations._generate_application_task(core_job_operations, job_configuration.id, application),
            ))

        job_manager_task = generate_job_manager_task(core_job_operations, job_configuration, application_tasks)

        software_metadata_key = base_models.Software.spark

        vm_image = models.VmImage(publisher="Canonical", offer="UbuntuServer", sku="16.04")

        autoscale_formula = "$TargetDedicatedNodes = {0}; " "$TargetLowPriorityNodes = {1}".format(
            job_configuration.max_dedicated_nodes, job_configuration.max_low_pri_nodes)

        job = core_job_operations.submit(
            job_configuration=job_configuration,
            start_task=start_task,
            job_manager_task=job_manager_task,
            autoscale_formula=autoscale_formula,
            software_metadata_key=software_metadata_key,
            vm_image_model=vm_image,
            application_metadata="\n".join(application.name for application in (job_configuration.applications or [])),
        )

        if wait:
            spark_job_operations.wait(id=job_configuration.id)

        return models.Job(job)

    except batch_error.BatchErrorException as e:
        raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 30
0
 def get_application_log(self,
                         cluster_id: str,
                         application_name: str,
                         tail=False,
                         current_bytes: int = 0):
     try:
         return get_log_helper.get_log(self.batch_client, self.blob_client,
                                       cluster_id, application_name, tail,
                                       current_bytes)
     except batch_error.BatchErrorException as e:
         raise error.AztkError(helpers.format_batch_exception(e))