Пример #1
0
def generate_task(spark_client, job, application_tasks):
    resource_files = []
    for application, task in application_tasks:
        task_definition_resource_file = helpers.upload_text_to_container(
            container_name=job.id,
            application_name=application.name + ".yaml",
            file_path=application.name + ".yaml",
            content=yaml.dump(task),
            blob_client=spark_client.blob_client,
        )
        resource_files.append(task_definition_resource_file)

    task_cmd = __app_cmd()

    # Create task
    task = batch_models.JobManagerTask(
        id=job.id,
        command_line=helpers.wrap_commands_in_shell([task_cmd]),
        resource_files=resource_files,
        kill_job_on_completion=False,
        allow_low_priority_node=True,
        user_identity=batch_models.UserIdentity(
            auto_user=batch_models.AutoUserSpecification(
                scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin)),
    )

    return task
def generate_cluster_start_task(
    core_base_operations,
    zip_resource_file: batch_models.ResourceFile,
    cluster_id: str,
    gpu_enabled: bool,
    docker_repo: str = None,
    docker_run_options: str = None,
    file_shares: List[models.FileShare] = None,
    mixed_mode: bool = False,
    worker_on_master: bool = True,
):
    """
        This will return the start task object for the pool to be created.
        :param cluster_id str: Id of the cluster(Used for uploading the resource files)
        :param zip_resource_file: Resource file object pointing to the zip file containing scripts to run on the node
    """

    resource_files = [zip_resource_file]
    spark_web_ui_port = constants.DOCKER_SPARK_WEB_UI_PORT
    spark_worker_ui_port = constants.DOCKER_SPARK_WORKER_UI_PORT
    spark_job_ui_port = constants.DOCKER_SPARK_JOB_UI_PORT

    spark_container_name = constants.DOCKER_SPARK_CONTAINER_NAME
    spark_submit_logs_file = constants.SPARK_SUBMIT_LOGS_FILE

    # TODO use certificate
    environment_settings = (
        __get_secrets_env(core_base_operations) + [
            batch_models.EnvironmentSetting(name="SPARK_WEB_UI_PORT",
                                            value=spark_web_ui_port),
            batch_models.EnvironmentSetting(name="SPARK_WORKER_UI_PORT",
                                            value=spark_worker_ui_port),
            batch_models.EnvironmentSetting(name="SPARK_JOB_UI_PORT",
                                            value=spark_job_ui_port),
            batch_models.EnvironmentSetting(name="SPARK_CONTAINER_NAME",
                                            value=spark_container_name),
            batch_models.EnvironmentSetting(name="SPARK_SUBMIT_LOGS_FILE",
                                            value=spark_submit_logs_file),
            batch_models.EnvironmentSetting(
                name="AZTK_GPU_ENABLED", value=helpers.bool_env(gpu_enabled)),
        ] + __get_docker_credentials(core_base_operations) +
        _get_aztk_environment(cluster_id, worker_on_master, mixed_mode))

    # start task command
    command = __cluster_install_cmd(zip_resource_file, gpu_enabled,
                                    docker_repo, docker_run_options,
                                    file_shares)

    return batch_models.StartTask(
        command_line=helpers.wrap_commands_in_shell(command),
        resource_files=resource_files,
        environment_settings=environment_settings,
        user_identity=POOL_ADMIN_USER_IDENTITY,
        wait_for_success=True,
        max_task_retry_count=2,
    )
Пример #3
0
def generate_cluster_start_task(
        spark_client,
        zip_resource_file: batch_models.ResourceFile,
        gpu_enabled: bool,
        docker_repo: str = None,
        file_shares: List[aztk_models.FileShare] = None):
    """
        This will return the start task object for the pool to be created.
        :param cluster_id str: Id of the cluster(Used for uploading the resource files)
        :param zip_resource_file: Resource file object pointing to the zip file containing scripts to run on the node
    """

    resource_files = [zip_resource_file]
    spark_web_ui_port = constants.DOCKER_SPARK_WEB_UI_PORT
    spark_worker_ui_port = constants.DOCKER_SPARK_WORKER_UI_PORT
    spark_jupyter_port = constants.DOCKER_SPARK_JUPYTER_PORT
    spark_job_ui_port = constants.DOCKER_SPARK_JOB_UI_PORT
    spark_rstudio_server_port = constants.DOCKER_SPARK_RSTUDIO_SERVER_PORT

    # TODO use certificate
    environment_settings = [
        batch_models.EnvironmentSetting(
            name="BATCH_ACCOUNT_KEY", value=spark_client.batch_config.account_key),
        batch_models.EnvironmentSetting(
            name="BATCH_ACCOUNT_URL", value=spark_client.batch_config.account_url),
        batch_models.EnvironmentSetting(
            name="STORAGE_ACCOUNT_NAME", value=spark_client.blob_config.account_name),
        batch_models.EnvironmentSetting(
            name="STORAGE_ACCOUNT_KEY", value=spark_client.blob_config.account_key),
        batch_models.EnvironmentSetting(
            name="STORAGE_ACCOUNT_SUFFIX", value=spark_client.blob_config.account_suffix),
        batch_models.EnvironmentSetting(
            name="SPARK_WEB_UI_PORT", value=spark_web_ui_port),
        batch_models.EnvironmentSetting(
            name="SPARK_WORKER_UI_PORT", value=spark_worker_ui_port),
        batch_models.EnvironmentSetting(
            name="SPARK_JUPYTER_PORT", value=spark_jupyter_port),
        batch_models.EnvironmentSetting(
            name="SPARK_JOB_UI_PORT", value=spark_job_ui_port),
        batch_models.EnvironmentSetting(
            name="SPARK_RSTUDIO_SERVER_PORT", value=spark_rstudio_server_port),
    ] + __get_docker_credentials(spark_client)

    # start task command
    command = __cluster_install_cmd(zip_resource_file, gpu_enabled, docker_repo, file_shares)

    return batch_models.StartTask(
        command_line=helpers.wrap_commands_in_shell(command),
        resource_files=resource_files,
        environment_settings=environment_settings,
        user_identity=POOL_ADMIN_USER_IDENTITY,
        wait_for_success=True)
Пример #4
0
def submit_application(spark_client, cluster_id, application, wait: bool = False):
    """
    Submit a spark app
    """

    resource_files = []

    app_resource_file = helpers.upload_file_to_container(container_name=application.name,
                                                         file_path=application.application,
                                                         blob_client=spark_client.blob_client,
                                                         use_full_path=False)

    # Upload application file
    resource_files.append(app_resource_file)

    # Upload dependent JARS
    jar_resource_file_paths = []
    for jar in application.jars:
        current_jar_resource_file_path = helpers.upload_file_to_container(container_name=application.name,
                                                                          file_path=jar,
                                                                          blob_client=spark_client.blob_client,
                                                                          use_full_path=False)
        jar_resource_file_paths.append(current_jar_resource_file_path)
        resource_files.append(current_jar_resource_file_path)

    # Upload dependent python files
    py_files_resource_file_paths = []
    for py_file in application.py_files:
        current_py_files_resource_file_path = helpers.upload_file_to_container(container_name=application.name,
                                                                               file_path=py_file,
                                                                               blob_client=spark_client.blob_client,
                                                                               use_full_path=False)
        py_files_resource_file_paths.append(
            current_py_files_resource_file_path)
        resource_files.append(current_py_files_resource_file_path)

    # Upload other dependent files
    files_resource_file_paths = []
    for file in application.files:
        files_resource_file_path = helpers.upload_file_to_container(container_name=application.name,
                                                                    file_path=file,
                                                                    blob_client=spark_client.blob_client,
                                                                    use_full_path=False)
        files_resource_file_paths.append(files_resource_file_path)
        resource_files.append(files_resource_file_path)

    # create command to submit task
    cmd = __app_submit_cmd(
        spark_client=spark_client,
        cluster_id=cluster_id,
        name=application.name,
        app=app_resource_file.file_path,
        app_args=application.application_args,
        main_class=application.main_class,
        jars=[jar_resource_file_path.file_path for jar_resource_file_path in jar_resource_file_paths],
        py_files=[py_files_resource.file_path for py_files_resource in py_files_resource_file_paths],
        files=[file_resource_file_path.file_path for file_resource_file_path in files_resource_file_paths],
        driver_java_options=application.driver_java_options,
        driver_library_path=application.driver_library_path,
        driver_class_path=application.driver_class_path,
        driver_memory=application.driver_memory,
        executor_memory=application.executor_memory,
        driver_cores=application.driver_cores,
        executor_cores=application.executor_cores)

    # Get cluster size
    cluster = spark_client.get_cluster(cluster_id)

    # Affinitize task to master node
    # master_node_affinity_id = helpers.get_master_node_id(cluster_id, spark_client.batch_client)
    rls = spark_client.get_remote_login_settings(cluster.id, cluster.master_node_id)

    # Create task
    task = batch_models.TaskAddParameter(
        id=application.name,
        affinity_info=batch_models.AffinityInformation(
            affinity_id=cluster.master_node_id),
        command_line=helpers.wrap_commands_in_shell(cmd),
        resource_files=resource_files,
        user_identity=batch_models.UserIdentity(
            auto_user=batch_models.AutoUserSpecification(
                scope=batch_models.AutoUserScope.task,
                elevation_level=batch_models.ElevationLevel.admin))
    )

    # Add task to batch job (which has the same name as cluster_id)
    job_id = cluster_id
    spark_client.batch_client.task.add(job_id=job_id, task=task)

    if wait:
        helpers.wait_for_task_to_complete(job_id=job_id, task_id=task.id, batch_client=spark_client.batch_client)
Пример #5
0
def generate_task(spark_client, container_id, application, remote=False):
    resource_files = []

    # The application provided is not hosted remotely and therefore must be uploaded
    if not remote:
        app_resource_file = helpers.upload_file_to_container(
            container_name=container_id,
            application_name=application.name,
            file_path=application.application,
            blob_client=spark_client.blob_client,
            use_full_path=False,
        )

        # Upload application file
        resource_files.append(app_resource_file)

        application.application = "$AZ_BATCH_TASK_WORKING_DIR/" + os.path.basename(
            application.application)

    # Upload dependent JARS
    jar_resource_file_paths = []
    for jar in application.jars:
        current_jar_resource_file_path = helpers.upload_file_to_container(
            container_name=container_id,
            application_name=application.name,
            file_path=jar,
            blob_client=spark_client.blob_client,
            use_full_path=False,
        )
        jar_resource_file_paths.append(current_jar_resource_file_path)
        resource_files.append(current_jar_resource_file_path)

    # Upload dependent python files
    py_files_resource_file_paths = []
    for py_file in application.py_files:
        current_py_files_resource_file_path = helpers.upload_file_to_container(
            container_name=container_id,
            application_name=application.name,
            file_path=py_file,
            blob_client=spark_client.blob_client,
            use_full_path=False,
        )
        py_files_resource_file_paths.append(
            current_py_files_resource_file_path)
        resource_files.append(current_py_files_resource_file_path)

    # Upload other dependent files
    files_resource_file_paths = []
    for file in application.files:
        files_resource_file_path = helpers.upload_file_to_container(
            container_name=container_id,
            application_name=application.name,
            file_path=file,
            blob_client=spark_client.blob_client,
            use_full_path=False,
        )
        files_resource_file_paths.append(files_resource_file_path)
        resource_files.append(files_resource_file_path)

    # Upload application definition
    application.jars = [os.path.basename(jar) for jar in application.jars]
    application.py_files = [
        os.path.basename(py_files) for py_files in application.py_files
    ]
    application.files = [
        os.path.basename(files) for files in application.files
    ]
    application_definition_file = helpers.upload_text_to_container(
        container_name=container_id,
        application_name=application.name,
        file_path="application.yaml",
        content=yaml.dump(vars(application)),
        blob_client=spark_client.blob_client,
    )
    resource_files.append(application_definition_file)

    # create command to submit task
    task_cmd = CommandBuilder("sudo docker exec")
    task_cmd.add_argument("-i")
    task_cmd.add_option(
        "-e", "AZ_BATCH_TASK_WORKING_DIR=$AZ_BATCH_TASK_WORKING_DIR")
    task_cmd.add_option("-e",
                        "STORAGE_LOGS_CONTAINER={0}".format(container_id))
    task_cmd.add_argument("spark /bin/bash >> output.log 2>&1")
    task_cmd.add_argument(
        r'-c "source ~/.bashrc; '
        r"export PYTHONPATH=$PYTHONPATH:\$AZTK_WORKING_DIR; "
        r"cd \$AZ_BATCH_TASK_WORKING_DIR; "
        r'\$AZTK_WORKING_DIR/.aztk-env/.venv/bin/python \$AZTK_WORKING_DIR/aztk/node_scripts/submit.py"'
    )

    # Create task
    task = batch_models.TaskAddParameter(
        id=application.name,
        command_line=helpers.wrap_commands_in_shell([task_cmd.to_str()]),
        resource_files=resource_files,
        constraints=batch_models.TaskConstraints(
            max_task_retry_count=application.max_retry_count),
        user_identity=batch_models.UserIdentity(
            auto_user=batch_models.AutoUserSpecification(
                scope=batch_models.AutoUserScope.task,
                elevation_level=batch_models.ElevationLevel.admin)),
    )

    return task
Пример #6
0
def generate_task(spark_client, container_id, application):
    resource_files = []

    app_resource_file = helpers.upload_file_to_container(
        container_name=container_id,
        application_name=application.name,
        file_path=application.application,
        blob_client=spark_client.blob_client,
        use_full_path=False)

    # Upload application file
    resource_files.append(app_resource_file)

    # Upload dependent JARS
    jar_resource_file_paths = []
    for jar in application.jars:
        current_jar_resource_file_path = helpers.upload_file_to_container(
            container_name=container_id,
            application_name=application.name,
            file_path=jar,
            blob_client=spark_client.blob_client,
            use_full_path=False)
        jar_resource_file_paths.append(current_jar_resource_file_path)
        resource_files.append(current_jar_resource_file_path)

    # Upload dependent python files
    py_files_resource_file_paths = []
    for py_file in application.py_files:
        current_py_files_resource_file_path = helpers.upload_file_to_container(
            container_name=container_id,
            application_name=application.name,
            file_path=py_file,
            blob_client=spark_client.blob_client,
            use_full_path=False)
        py_files_resource_file_paths.append(
            current_py_files_resource_file_path)
        resource_files.append(current_py_files_resource_file_path)

    # Upload other dependent files
    files_resource_file_paths = []
    for file in application.files:
        files_resource_file_path = helpers.upload_file_to_container(
            container_name=container_id,
            application_name=application.name,
            file_path=file,
            blob_client=spark_client.blob_client,
            use_full_path=False)
        files_resource_file_paths.append(files_resource_file_path)
        resource_files.append(files_resource_file_path)

    # Upload application definition
    application.application = os.path.basename(application.application)
    application.jars = [os.path.basename(jar) for jar in application.jars]
    application.py_files = [
        os.path.basename(py_files) for py_files in application.py_files
    ]
    application.files = [
        os.path.basename(files) for files in application.files
    ]
    application_definition_file = helpers.upload_text_to_container(
        container_name=container_id,
        application_name=application.name,
        file_path='application.yaml',
        content=yaml.dump(vars(application)),
        blob_client=spark_client.blob_client)
    resource_files.append(application_definition_file)

    # create command to submit task
    task_cmd = CommandBuilder('sudo docker exec')
    task_cmd.add_argument('-i')
    task_cmd.add_option(
        '-e', 'AZ_BATCH_TASK_WORKING_DIR=$AZ_BATCH_TASK_WORKING_DIR')
    task_cmd.add_option('-e',
                        'STORAGE_LOGS_CONTAINER={0}'.format(container_id))
    task_cmd.add_argument('spark /bin/bash >> output.log 2>&1')
    task_cmd.add_argument('-c "source ~/.bashrc; '\
                          'cd $AZ_BATCH_TASK_WORKING_DIR; ' \
                          '\$(pyenv root)/versions/\$AZTK_PYTHON_VERSION/bin/python ' \
                          '\$DOCKER_WORKING_DIR/aztk/node_scripts/submit.py"')

    # Create task
    task = batch_models.TaskAddParameter(
        id=application.name,
        command_line=helpers.wrap_commands_in_shell([task_cmd.to_str()]),
        resource_files=resource_files,
        constraints=batch_models.TaskConstraints(
            max_task_retry_count=application.max_retry_count),
        user_identity=batch_models.UserIdentity(
            auto_user=batch_models.AutoUserSpecification(
                scope=batch_models.AutoUserScope.task,
                elevation_level=batch_models.ElevationLevel.admin)))

    return task