Exemplo n.º 1
0
 def wait_until_application_done(self, cluster_id: str, task_id: str):
     try:
         helpers.wait_for_task_to_complete(job_id=cluster_id,
                                           task_id=task_id,
                                           batch_client=self.batch_client)
     except batch_error.BatchErrorException as e:
         raise error.AztkError(helpers.format_batch_exception(e))
Exemplo n.º 2
0
def submit_application(
    core_cluster_operations,
    spark_cluster_operations,
    cluster_id,
    application,
    remote: bool = False,
    wait: bool = False,
    internal: bool = False,
):
    """
    Submit a spark app
    """
    task = spark_cluster_operations._generate_application_task(
        core_cluster_operations, cluster_id, application, remote)
    task = affinitize_task_to_master(core_cluster_operations,
                                     spark_cluster_operations, cluster_id,
                                     task)

    scheduling_target = get_cluster_scheduling_target(core_cluster_operations,
                                                      cluster_id)
    if scheduling_target is not models.SchedulingTarget.Any:
        schedule_with_target(core_cluster_operations, spark_cluster_operations,
                             cluster_id, scheduling_target, task, wait,
                             internal)
    else:
        # Add task to batch job (which has the same name as cluster_id)
        core_cluster_operations.batch_client.task.add(job_id=cluster_id,
                                                      task=task)

    if wait:
        helpers.wait_for_task_to_complete(
            job_id=cluster_id,
            task_id=task.id,
            batch_client=core_cluster_operations.batch_client)
Exemplo n.º 3
0
Arquivo: submit.py Projeto: gridl/aztk
def submit_application(spark_client, cluster_id, application, wait: bool = False):
    """
    Submit a spark app
    """
    task = generate_task(spark_client, cluster_id, application)
    task = affinitize_task_to_master(spark_client, cluster_id, task)


    # Add task to batch job (which has the same name as cluster_id)
    job_id = cluster_id
    spark_client.batch_client.task.add(job_id=job_id, task=task)

    if wait:
        helpers.wait_for_task_to_complete(job_id=job_id, task_id=task.id, batch_client=spark_client.batch_client)
Exemplo n.º 4
0
def submit_application(core_cluster_operations,
                       spark_cluster_operations,
                       cluster_id,
                       application,
                       remote: bool = False,
                       wait: bool = False):
    """
    Submit a spark app
    """
    task = spark_cluster_operations._generate_application_task(core_cluster_operations, cluster_id, application, remote)
    task = affinitize_task_to_master(core_cluster_operations, spark_cluster_operations, cluster_id, task)

    # Add task to batch job (which has the same name as cluster_id)
    job_id = cluster_id
    core_cluster_operations.batch_client.task.add(job_id=job_id, task=task)

    if wait:
        helpers.wait_for_task_to_complete(
            job_id=job_id, task_id=task.id, batch_client=core_cluster_operations.batch_client)
Exemplo n.º 5
0
def submit_application(spark_client, cluster_id, application, wait: bool = False):
    """
    Submit a spark app
    """

    resource_files = []

    app_resource_file = helpers.upload_file_to_container(container_name=application.name,
                                                         file_path=application.application,
                                                         blob_client=spark_client.blob_client,
                                                         use_full_path=False)

    # Upload application file
    resource_files.append(app_resource_file)

    # Upload dependent JARS
    jar_resource_file_paths = []
    for jar in application.jars:
        current_jar_resource_file_path = helpers.upload_file_to_container(container_name=application.name,
                                                                          file_path=jar,
                                                                          blob_client=spark_client.blob_client,
                                                                          use_full_path=False)
        jar_resource_file_paths.append(current_jar_resource_file_path)
        resource_files.append(current_jar_resource_file_path)

    # Upload dependent python files
    py_files_resource_file_paths = []
    for py_file in application.py_files:
        current_py_files_resource_file_path = helpers.upload_file_to_container(container_name=application.name,
                                                                               file_path=py_file,
                                                                               blob_client=spark_client.blob_client,
                                                                               use_full_path=False)
        py_files_resource_file_paths.append(
            current_py_files_resource_file_path)
        resource_files.append(current_py_files_resource_file_path)

    # Upload other dependent files
    files_resource_file_paths = []
    for file in application.files:
        files_resource_file_path = helpers.upload_file_to_container(container_name=application.name,
                                                                    file_path=file,
                                                                    blob_client=spark_client.blob_client,
                                                                    use_full_path=False)
        files_resource_file_paths.append(files_resource_file_path)
        resource_files.append(files_resource_file_path)

    # create command to submit task
    cmd = __app_submit_cmd(
        spark_client=spark_client,
        cluster_id=cluster_id,
        name=application.name,
        app=app_resource_file.file_path,
        app_args=application.application_args,
        main_class=application.main_class,
        jars=[jar_resource_file_path.file_path for jar_resource_file_path in jar_resource_file_paths],
        py_files=[py_files_resource.file_path for py_files_resource in py_files_resource_file_paths],
        files=[file_resource_file_path.file_path for file_resource_file_path in files_resource_file_paths],
        driver_java_options=application.driver_java_options,
        driver_library_path=application.driver_library_path,
        driver_class_path=application.driver_class_path,
        driver_memory=application.driver_memory,
        executor_memory=application.executor_memory,
        driver_cores=application.driver_cores,
        executor_cores=application.executor_cores)

    # Get cluster size
    cluster = spark_client.get_cluster(cluster_id)

    # Affinitize task to master node
    # master_node_affinity_id = helpers.get_master_node_id(cluster_id, spark_client.batch_client)
    rls = spark_client.get_remote_login_settings(cluster.id, cluster.master_node_id)

    # Create task
    task = batch_models.TaskAddParameter(
        id=application.name,
        affinity_info=batch_models.AffinityInformation(
            affinity_id=cluster.master_node_id),
        command_line=helpers.wrap_commands_in_shell(cmd),
        resource_files=resource_files,
        user_identity=batch_models.UserIdentity(
            auto_user=batch_models.AutoUserSpecification(
                scope=batch_models.AutoUserScope.task,
                elevation_level=batch_models.ElevationLevel.admin))
    )

    # Add task to batch job (which has the same name as cluster_id)
    job_id = cluster_id
    spark_client.batch_client.task.add(job_id=job_id, task=task)

    if wait:
        helpers.wait_for_task_to_complete(job_id=job_id, task_id=task.id, batch_client=spark_client.batch_client)