def run_data_service_test(ec2_connection, tensorflow_training, cmd): ec2_connection.run('python3 -m pip install --upgrade pip') ec2_connection.run('pip3 install tensorflow==2.4') container_test_local_dir = os.path.join("$HOME", "container_tests") ec2_connection.run(f'cd {container_test_local_dir}/bin && screen -d -m python3 start_dataservice.py') execute_ec2_training_test(ec2_connection, tensorflow_training, cmd, host_network=True)
def test_pytorch_linear_regression_cpu(pytorch_training, ec2_connection, cpu_only): execute_ec2_training_test(ec2_connection, pytorch_training, PT_REGRESSION_CMD)
def test_pytorch_telemetry_cpu(pytorch_training, ec2_connection, cpu_only, pt15_and_above_only): execute_ec2_training_test(ec2_connection, pytorch_training, PT_TELEMETRY_CMD)
def test_tensorflow_standalone_cpu(tensorflow_training, ec2_connection, cpu_only): test_script = TF1_STANDALONE_CMD if is_tf_version("1", tensorflow_training) else TF2_STANDALONE_CMD execute_ec2_training_test(ec2_connection, tensorflow_training, test_script)
def test_tensorflow_train_mnist_cpu(tensorflow_training, ec2_connection, cpu_only): execute_ec2_training_test(ec2_connection, tensorflow_training, TF_MNIST_CMD)
def test_tensorflow_tensorboard_cpu(tensorflow_training, ec2_connection, tf2_only, cpu_only): execute_ec2_training_test(ec2_connection, tensorflow_training, TF_TENSORBOARD_CMD)
def test_tensorflow_standalone_hpu(tensorflow_training_habana, ec2_connection, upload_habana_test_artifact): execute_ec2_training_test(ec2_connection, tensorflow_training_habana, TF_HABANA_TEST_SUITE_CMD, container_name="ec2_training_habana_tensorflow_container")
def test_tensorflow_with_horovod_cpu(tensorflow_training, ec2_connection, cpu_only): test_script = TF1_HVD_CMD if is_tf1(tensorflow_training) else TF2_HVD_CMD execute_ec2_training_test(ec2_connection, tensorflow_training, test_script)
def test_mxnet_telemetry_cpu(mxnet_training, ec2_connection, cpu_only): execute_ec2_training_test(ec2_connection, mxnet_training, MX_TELEMETRY_CMD)
def test_tensorflow_tensorboard_cpu(tensorflow_training, ec2_connection, cpu_only): if is_tf1(tensorflow_training): pytest.skip("This test is for TF2 only") execute_ec2_training_test(ec2_connection, tensorflow_training, TF_TENSORBOARD_CMD)
def test_tensorflow_addons_cpu(tensorflow_training, ec2_connection, cpu_only): if is_tf1(tensorflow_training): pytest.skip("This test is for TF2 only") execute_ec2_training_test(ec2_connection, tensorflow_training, TF_ADDONS_CMD)
def test_tensorflow_keras_horovod_fp32(tensorflow_training, ec2_connection, gpu_only): if is_tf1(tensorflow_training): pytest.skip("This test is for TF2 and later only") execute_ec2_training_test(ec2_connection, tensorflow_training, TF_KERAS_HVD_CMD_FP32)
def test_tensorflow_opencv_cpu(tensorflow_training, ec2_connection, cpu_only): if is_tf1(tensorflow_training): pytest.skip("This test is for TF2 only") execute_ec2_training_test(ec2_connection, tensorflow_training, TF_OPENCV_CMD)
def test_mxnet_train_dgl_gpu(mxnet_training, ec2_connection, gpu_only, py3_only): execute_ec2_training_test(ec2_connection, mxnet_training, MX_DGL_CMD)
def test_tensorflow_telemetry_cpu(tensorflow_training, ec2_connection, cpu_only): execute_ec2_training_test(ec2_connection, tensorflow_training, TF_TELEMETRY_CMD)
def test_mxnet_standalone_cpu(mxnet_training, ec2_connection, cpu_only): execute_ec2_training_test(ec2_connection, mxnet_training, MX_STANDALONE_CMD)
def test_tensorflow_keras_horovod_fp32(tensorflow_training, ec2_connection, tf2_only, gpu_only, ec2_instance_type): if test_utils.is_image_incompatible_with_instance_type(tensorflow_training, ec2_instance_type): pytest.skip(f"Image {tensorflow_training} is incompatible with instance type {ec2_instance_type}") execute_ec2_training_test(ec2_connection, tensorflow_training, TF_KERAS_HVD_CMD_FP32)
def test_mxnet_train_mnist_cpu(mxnet_training, ec2_connection, cpu_only): execute_ec2_training_test(ec2_connection, mxnet_training, MX_MNIST_CMD)
def test_tensorflow_addons_cpu(tensorflow_training, ec2_connection, tf2_only, cpu_only): execute_ec2_training_test(ec2_connection, tensorflow_training, TF_ADDONS_CMD)
def test_mxnet_keras_cpu(mxnet_training, ec2_connection, cpu_only): execute_ec2_training_test(ec2_connection, mxnet_training, MX_KERAS_CMD)
def test_tensorflow_standalone_gpu(tensorflow_training, ec2_connection, gpu_only, ec2_instance_type): if test_utils.is_image_incompatible_with_instance_type(tensorflow_training, ec2_instance_type): pytest.skip(f"Image {tensorflow_training} is incompatible with instance type {ec2_instance_type}") test_script = TF1_STANDALONE_CMD if is_tf_version("1", tensorflow_training) else TF2_STANDALONE_CMD execute_ec2_training_test(ec2_connection, tensorflow_training, test_script)
def test_mxnet_train_dgl_gpu(mxnet_training, ec2_connection, gpu_only, py3_only): if "cu110" in mxnet_training: pytest.skip("Skipping dgl tests on cuda 11.0 until available") execute_ec2_training_test(ec2_connection, mxnet_training, MX_DGL_CMD)
def test_tensorflow_train_mnist_gpu(tensorflow_training, ec2_connection, gpu_only, ec2_instance_type): if test_utils.is_image_incompatible_with_instance_type(tensorflow_training, ec2_instance_type): pytest.skip(f"Image {tensorflow_training} is incompatible with instance type {ec2_instance_type}") execute_ec2_training_test(ec2_connection, tensorflow_training, TF_MNIST_CMD)
def test_mxnet_train_nlp_cpu(mxnet_training, ec2_connection, cpu_only, py3_only): execute_ec2_training_test(ec2_connection, mxnet_training, MX_NLP_CMD)
def _run_dependency_check_test(image, ec2_connection): # Record any whitelisted medium/low severity CVEs; I.E. allowed_vulnerabilities = {CVE-1000-5555, CVE-9999-9999} allowed_vulnerabilities = { # Those vulnerabilities are fixed. Current openssl version is 1.1.1g. These are false positive "CVE-2016-2109", "CVE-2016-2177", "CVE-2016-6303", "CVE-2016-2182", # CVE-2020-13936: vulnerability found in apache velocity package which is a dependency for dependency-check package. Hence, ignoring. "CVE-2020-13936", } processor = get_processor_from_image_uri(image) # Whitelist CVE #CVE-2021-3711 for DLCs where openssl is installed using apt-get framework, _ = get_framework_and_version_from_tag(image) short_fw_version = re.search(r"(\d+\.\d+)", image).group(1) allow_openssl_cve_fw_versions = { "tensorflow": { "1.15": ["cpu", "gpu", "neuron"], "2.3": ["cpu", "gpu"], "2.4": ["cpu", "gpu"], "2.5": ["cpu", "gpu"], "2.6": ["cpu", "gpu"], }, "mxnet": { "1.8": ["neuron"], "1.9": ["cpu", "gpu"] }, "pytorch": {}, "huggingface_pytorch": { "1.8": ["cpu", "gpu"], "1.9": ["cpu", "gpu"] }, "huggingface_tensorflow": { "2.4": ["cpu", "gpu"], "2.5": ["cpu", "gpu"] }, "autogluon": { "0.3": ["graviton"] }, } if processor in allow_openssl_cve_fw_versions.get(framework, {}).get( short_fw_version, []): allowed_vulnerabilities.add("CVE-2021-3711") container_name = f"dep_check_{processor}" report_addon = get_container_name("depcheck-report", image) dependency_check_report = f"{report_addon}.html" html_file = f"{container_name}:/build/dependency-check-report.html" test_script = os.path.join(CONTAINER_TESTS_PREFIX, "testDependencyCheck") # Execute test, copy results to s3 ec2.execute_ec2_training_test(ec2_connection, image, test_script, container_name=container_name, bin_bash_entrypoint=True) ec2_connection.run(f"docker cp {html_file} ~/{dependency_check_report}") ec2_connection.run( f"aws s3 cp ~/{dependency_check_report} s3://dlc-dependency-check") # Check for any vulnerabilities not mentioned in allowed_vulnerabilities html_output = ec2_connection.run(f"cat ~/{dependency_check_report}", hide=True).stdout cves = re.findall(r">(CVE-\d+-\d+)</a>", html_output) vulnerabilities = set(cves) - allowed_vulnerabilities if vulnerabilities: vulnerability_severity = {} # Check NVD for vulnerability severity to provide this useful info in error message. for vulnerability in vulnerabilities: try: cve_url = f"https://services.nvd.nist.gov/rest/json/cve/1.0/{vulnerability}" session = requests.Session() session.mount( "https://", requests.adapters.HTTPAdapter(max_retries=Retry( total=5, status_forcelist=[404, 504, 502])), ) response = session.get(cve_url) if response.status_code == 200: severity = (response.json().get("result", {}).get( "CVE_Items", [{}])[0].get("impact", {}).get("baseMetricV2", {}).get("severity", "UNKNOWN")) if vulnerability_severity.get(severity): vulnerability_severity[severity].append(vulnerability) else: vulnerability_severity[severity] = [vulnerability] except ConnectionError: LOGGER.exception( f"Failed to load NIST data for CVE {vulnerability}") # TODO: Remove this once we have whitelisted appropriate LOW/MEDIUM vulnerabilities if not (vulnerability_severity.get("CRITICAL") or vulnerability_severity.get("HIGH")): return raise DependencyCheckFailure( f"Unrecognized CVEs have been reported : {vulnerability_severity}. " f"Allowed vulnerabilities are {allowed_vulnerabilities or None}. Please see " f"{dependency_check_report} for more details.")
def test_mxnet_with_horovod_gpu(mxnet_training, ec2_connection, gpu_only): execute_ec2_training_test(ec2_connection, mxnet_training, MX_HVD_CMD)
def test_pytorch_train_dgl_cpu(pytorch_training, ec2_connection, cpu_only, py3_only): execute_ec2_training_test(ec2_connection, pytorch_training, PT_DGL_CMD)
def test_tensorflow_opencv_cpu(tensorflow_training, ec2_connection, tf2_only, cpu_only): execute_ec2_training_test(ec2_connection, tensorflow_training, TF_OPENCV_CMD)
def test_pytorch_standalone_cpu(pytorch_training, ec2_connection, cpu_only): execute_ec2_training_test(ec2_connection, pytorch_training, PT_STANDALONE_CMD)
def test_tensorflow_keras_horovod_fp32(tensorflow_training, ec2_connection, tf2_only, gpu_only): execute_ec2_training_test(ec2_connection, tensorflow_training, TF_KERAS_HVD_CMD_FP32)