def test_range_operation_single_thread(self): """ Check that 'Range' operation works in single-threaded mode and raises an Exception in multi-threaded mode. """ backend = Backend.SparkBackend() with self.assertRaises(Exception): backend.check_supported("Range")
def test_set_spark_context_default(self, connection): """ Check that a `SparkContext` object is created with default options for the current system. """ backend = Backend.SparkBackend() assert isinstance(backend.sc, pyspark.SparkContext)
def test_set_spark_context_with_conf(self, connection): """ Check that a `SparkContext` object is correctly created for a given `SparkConf` object in the config dictionary. """ backend = Backend.SparkBackend(sparkcontext=connection) assert isinstance(backend.sc, pyspark.SparkContext) appname = backend.sc.getConf().get("spark.app.name") assert appname == "roottest-distrdf-spark"
def test_optimize_npartitions(self, connection): """ The optimize_npartitions function returns the value of the `defaultParallelism` attribute of the `SparkContext`. This should be equal to the number of available cores in case of a context created on a single machine. """ backend = Backend.SparkBackend(sparkcontext=connection) assert backend.optimize_npartitions() == 2
def test_optimize_npartitions_with_num_executors(self): """ Check that the number of partitions is correctly set to number of executors in the SparkConf dictionary. """ conf = {"spark.executor.instances": 10} sconf = pyspark.SparkConf().setAll(conf.items()) sc = pyspark.SparkContext(conf=sconf) backend = Backend.SparkBackend(sparkcontext=sc) self.assertEqual(backend.optimize_npartitions(), 10)
def test_unsupported_operations(self): """Check that unsupported operations raise an Exception.""" backend = Backend.SparkBackend() with self.assertRaises(Exception): backend.check_supported("Take") with self.assertRaises(Exception): backend.check_supported("Foreach") with self.assertRaises(Exception): backend.check_supported("Range")
def test_optimize_npartitions(self): """ The optimize_npartitions function returns the value of the `defaultParallelism` attribute of the `SparkContext`. This should be equal to the number of available cores in case of a context created on a single machine. """ ncores = 4 sconf = pyspark.SparkConf().setMaster(f"local[{ncores}]") sc = pyspark.SparkContext(conf=sconf) backend = Backend.SparkBackend(sparkcontext=sc) self.assertEqual(backend.optimize_npartitions(), ncores)
def test_set_spark_context_with_conf(self): """ Check that a `SparkContext` object is correctly created for a given `SparkConf` object in the config dictionary. """ conf = {"spark.app.name": "my-pyspark-app1"} sconf = pyspark.SparkConf().setAll(conf.items()) sc = pyspark.SparkContext(conf=sconf) backend = Backend.SparkBackend(sparkcontext=sc) self.assertIsInstance(backend.sc, pyspark.SparkContext) appname = backend.sc.getConf().get("spark.app.name") self.assertEqual(appname, "my-pyspark-app1")
def test_none(self): """Check that incorrect operations raise an Exception.""" backend = Backend.SparkBackend() with self.assertRaises(Exception): backend.check_supported("random")
def test_transformation(self): """Check that transformation nodes are classified accurately.""" backend = Backend.SparkBackend() backend.check_supported("Define")
def test_action(self): """Check that action nodes are classified accurately.""" backend = Backend.SparkBackend() backend.check_supported("Histo1D")