Пример #1
0
def _apply_default_for_cluster_config(
        configuration: models.ClusterConfiguration):
    cluster_conf = models.ClusterConfiguration()
    cluster_conf.merge(configuration)
    # if cluster_conf.scheduling_target is None:
    #     cluster_conf.scheduling_target = _default_scheduling_target(cluster_conf.size)
    return cluster_conf
Пример #2
0
    def create_cluster(self, configuration: models.ClusterConfiguration, wait: bool = False):
        """
        Create a new aztk spark cluster

        Args:
            cluster_conf(aztk.spark.models.models.ClusterConfiguration): Configuration for the the cluster to be created
            wait(bool): If you should wait for the cluster to be ready before returning

        Returns:
            aztk.spark.models.Cluster
        """
        cluster_conf = models.ClusterConfiguration()
        cluster_conf.merge(DEFAULT_CLUSTER_CONFIG)
        cluster_conf.merge(configuration)
        cluster_conf.validate()
        cluster_data = self._get_cluster_data(cluster_conf.cluster_id)
        try:
            zip_resource_files = None
            node_data = NodeData(cluster_conf).add_core().done()
            zip_resource_files = cluster_data.upload_node_data(node_data).to_resource_file()

            start_task = create_cluster_helper.generate_cluster_start_task(self,
                                                                           zip_resource_files,
                                                                           cluster_conf.cluster_id,
                                                                           cluster_conf.gpu_enabled(),
                                                                           cluster_conf.get_docker_repo(),
                                                                           cluster_conf.file_shares,
                                                                           cluster_conf.plugins,
                                                                           cluster_conf.mixed_mode(),
                                                                           cluster_conf.worker_on_master)

            software_metadata_key = "spark"

            vm_image = models.VmImage(
                publisher='Canonical',
                offer='UbuntuServer',
                sku='16.04')

            cluster = self.__create_pool_and_job(
                cluster_conf, software_metadata_key, start_task, vm_image)

            # Wait for the master to be ready
            if wait:
                util.wait_for_master_to_be_ready(self, cluster.id)
                cluster = self.get_cluster(cluster.id)

            return cluster

        except batch_error.BatchErrorException as e:
            raise error.AztkError(helpers.format_batch_exception(e))
Пример #3
0
import aztk
from aztk import error
from aztk.client import Client as BaseClient
from aztk.spark import models
from aztk.utils import helpers
from aztk.spark.helpers import create_cluster as create_cluster_helper
from aztk.spark.helpers import submit as cluster_submit_helper
from aztk.spark.helpers import job_submission as job_submit_helper
from aztk.spark.helpers import get_log as get_log_helper
from aztk.spark.helpers import cluster_diagnostic_helper
from aztk.spark.utils import util
from aztk.internal.cluster_data import NodeData


DEFAULT_CLUSTER_CONFIG = models.ClusterConfiguration(
    worker_on_master=True,
)

class Client(BaseClient):
    """
    Aztk Spark Client
    This is the main entry point for using aztk for spark

    Args:
        secrets_config(aztk.spark.models.models.SecretsConfiguration): Configuration with all the needed credentials
    """
    def __init__(self, secrets_config):
        super().__init__(secrets_config)

    def create_cluster(self, configuration: models.ClusterConfiguration, wait: bool = False):
        """