def test_history_server(tag, role, image_uri, sagemaker_session, region):
    spark = PySparkProcessor(
        base_job_name="sm-spark",
        framework_version=tag,
        image_uri=image_uri,
        role=role,
        instance_count=1,
        instance_type="ml.c5.xlarge",
        max_runtime_in_seconds=1200,
        sagemaker_session=sagemaker_session,
    )
    bucket = sagemaker_session.default_bucket()
    spark_event_logs_key_prefix = "spark/spark-history-fs"
    spark_event_logs_s3_uri = "s3://{}/{}".format(bucket, spark_event_logs_key_prefix)
    spark_event_log_local_path = "test/resources/data/files/sample_spark_event_logs"
    file_name = "sample_spark_event_logs"
    file_size = os.path.getsize(spark_event_log_local_path)

    with open("test/resources/data/files/sample_spark_event_logs") as data:
        body = data.read()
        S3Uploader.upload_string_as_file_body(
            body=body, desired_s3_uri=f"{spark_event_logs_s3_uri}/{file_name}", sagemaker_session=sagemaker_session,
        )

    _wait_for_file_to_be_uploaded(region, bucket, spark_event_logs_key_prefix, file_name, file_size)
    spark.start_history_server(spark_event_logs_s3_uri=spark_event_logs_s3_uri)

    try:
        response = _request_with_retry(HISTORY_SERVER_ENDPOINT)
        assert response.status == 200

        response = _request_with_retry(f"{HISTORY_SERVER_ENDPOINT}{SPARK_APPLICATION_URL_SUFFIX}", max_retries=15)
        print(f"Subpage response status code: {response.status}")
    finally:
        spark.terminate_history_server()
Exemplo n.º 2
0
def test_history_server(tag, role, image_uri, sagemaker_session, region):
    spark = PySparkProcessor(
        base_job_name="sm-spark",
        framework_version=tag,
        image_uri=image_uri,
        role=role,
        instance_count=1,
        instance_type="ml.c5.xlarge",
        max_runtime_in_seconds=1200,
        sagemaker_session=sagemaker_session,
    )
    bucket = sagemaker_session.default_bucket()
    spark_event_logs_key_prefix = "spark/spark-history-fs"
    spark_event_logs_s3_uri = "s3://{}/{}".format(bucket,
                                                  spark_event_logs_key_prefix)
    spark_event_log_local_path = "test/resources/data/files/sample_spark_event_logs"
    file_name = "sample_spark_event_logs"
    file_size = os.path.getsize(spark_event_log_local_path)

    with open("test/resources/data/files/sample_spark_event_logs") as data:
        body = data.read()
        S3Uploader.upload_string_as_file_body(
            body=body,
            desired_s3_uri=f"{spark_event_logs_s3_uri}/{file_name}",
            sagemaker_session=sagemaker_session,
        )

    _wait_for_file_to_be_uploaded(region, bucket, spark_event_logs_key_prefix,
                                  file_name, file_size)
    spark.start_history_server(spark_event_logs_s3_uri=spark_event_logs_s3_uri)

    try:
        response = _request_with_retry(HISTORY_SERVER_ENDPOINT)
        assert response.status == 200

        # spark has redirect behavior, this request verify that page navigation works with redirect
        response = _request_with_retry(
            f"{HISTORY_SERVER_ENDPOINT}{SPARK_APPLICATION_URL_SUFFIX}")
        if response.status != 200:
            print(subprocess.run(["docker", "logs", "history_server"]))

        assert response.status == 200

        html_content = response.data.decode("utf-8")
        assert "Completed Jobs (4)" in html_content
        assert "collect at /opt/ml/processing/input/code/test_long_duration.py:32" in html_content
    finally:
        spark.terminate_history_server()
def test_history_server_with_expected_failure(tag, role, image_uri, sagemaker_session, caplog):
    spark = PySparkProcessor(
        base_job_name="sm-spark",
        framework_version=tag,
        image_uri=image_uri,
        role=role,
        instance_count=1,
        instance_type="ml.c5.xlarge",
        max_runtime_in_seconds=1200,
        sagemaker_session=sagemaker_session,
    )

    caplog.set_level(logging.ERROR)
    spark.start_history_server(spark_event_logs_s3_uri="invalids3uri")
    response = _request_with_retry(HISTORY_SERVER_ENDPOINT, max_retries=5)
    assert response is None
    assert "History server failed to start. Please run 'docker logs history_server' to see logs" in caplog.text