def test_package_name(self): self.assertEqual( "spark-3.0.0-bin-hadoop3.2", checked_package_name("spark-3.0.0", "hadoop3.2", "hive2.3")) spark_version, hadoop_version, hive_version = checked_versions( "3.2.0", "2", "2.3") self.assertEqual( "spark-3.2.0-bin-hadoop2.7", checked_package_name(spark_version, hadoop_version, hive_version), ) spark_version, hadoop_version, hive_version = checked_versions( "3.3.0", "2", "2.3") self.assertEqual( "spark-3.3.0-bin-hadoop2", checked_package_name(spark_version, hadoop_version, hive_version), ) spark_version, hadoop_version, hive_version = checked_versions( "3.2.0", "3", "2.3") self.assertEqual( "spark-3.2.0-bin-hadoop3.2", checked_package_name(spark_version, hadoop_version, hive_version), ) spark_version, hadoop_version, hive_version = checked_versions( "3.3.0", "3", "2.3") self.assertEqual( "spark-3.3.0-bin-hadoop3", checked_package_name(spark_version, hadoop_version, hive_version), )
def test_install_spark(self): # Test only one case. Testing this is expensive because it needs to download # the Spark distribution. spark_version, hadoop_version, hive_version = checked_versions( "3.0.1", "3.2", "2.3") with tempfile.TemporaryDirectory() as tmp_dir: install_spark(dest=tmp_dir, spark_version=spark_version, hadoop_version=hadoop_version, hive_version=hive_version) self.assertTrue(os.path.isdir("%s/jars" % tmp_dir)) self.assertTrue(os.path.exists("%s/bin/spark-submit" % tmp_dir)) self.assertTrue(os.path.exists("%s/RELEASE" % tmp_dir))
def test_checked_versions(self): test_version = "3.0.1" # Just pick one version to test. # Positive test cases self.assertEqual( ("spark-3.0.0", "hadoop2.7", "hive2.3"), checked_versions("spark-3.0.0", "hadoop2", "hive2.3"), ) self.assertEqual(("spark-3.0.0", "hadoop2.7", "hive2.3"), checked_versions("3.0.0", "2", "2.3")) self.assertEqual( ("spark-2.4.1", "without-hadoop", "hive2.3"), checked_versions("2.4.1", "without", "2.3"), ) self.assertEqual( ("spark-3.0.1", "without-hadoop", "hive2.3"), checked_versions("spark-3.0.1", "without-hadoop", "hive2.3"), ) self.assertEqual( ("spark-3.3.0", "hadoop3", "hive2.3"), checked_versions("spark-3.3.0", "hadoop3", "hive2.3"), ) self.assertEqual( ("spark-3.3.0", "hadoop2", "hive2.3"), checked_versions("spark-3.3.0", "hadoop2", "hive2.3"), ) # Negative test cases for (hadoop_version, hive_version) in UNSUPPORTED_COMBINATIONS: with self.assertRaisesRegex(RuntimeError, "Hive.*should.*Hadoop"): checked_versions( spark_version=test_version, hadoop_version=hadoop_version, hive_version=hive_version, ) with self.assertRaisesRegex( RuntimeError, "Spark version should start with 'spark-'"): checked_versions(spark_version="malformed", hadoop_version=DEFAULT_HADOOP, hive_version=DEFAULT_HIVE) with self.assertRaisesRegex(RuntimeError, "Spark distribution.*malformed.*"): checked_versions(spark_version=test_version, hadoop_version="malformed", hive_version=DEFAULT_HIVE) with self.assertRaisesRegex(RuntimeError, "Spark distribution.*malformed.*"): checked_versions(spark_version=test_version, hadoop_version=DEFAULT_HADOOP, hive_version="malformed") with self.assertRaisesRegex( RuntimeError, "Spark distribution of hive1.2 is not supported"): checked_versions(spark_version=test_version, hadoop_version="hadoop3", hive_version="hive1.2")