def test_app_create(self): with patch('pyspark.SparkContext.__init__', return_value=None) as mock_spark: a = App() self.assertTrue(a.sc) mock_spark.assert_called_with(appName='spark-hep', master='local') mock_spark.clear() App(appName="foo", master="spark-master") mock_spark.assert_called_with(appName='foo', master='spark-master')
def test_provisioned_dataset_manager(self): mock_datasource_manager = Mock(DatasetManager) mock_executor = MagicMock(Executor) mock_datasource_manager.provisioned = True a = App( Config(executor=mock_executor, dataset_manager=mock_datasource_manager)) self.assertTrue(a.datasets)
def test_read_dataset(self): # Create a datasource manager that returns our two files mock_datasource_manager = Mock(DatasetManager) mock_datasource_manager.provisioned = True mock_datasource_manager.get_file_list = Mock( return_value=["/tmp/foo.root", "/tmp/bar.root"]) mock_dataset = Mock() mock_executor = Mock(Executor) mock_executor.read_files = Mock(return_value=mock_dataset) a = App( Config(executor=mock_executor, num_partitions=42, dataset_manager=mock_datasource_manager)) rslt = a.read_dataset("mydataset") mock_datasource_manager.get_file_list.assert_called_with("mydataset") mock_executor.read_files.assert_called_with( "mydataset", ["/tmp/foo.root", "/tmp/bar.root"]) self.assertEqual(rslt, mock_dataset)
def test_unprovisioned_dataset_manager(self): mock_datasource_manager = Mock(DatasetManager) mock_datasource_manager.provision = Mock() mock_executor = MagicMock(Executor) mock_datasource_manager.provisioned = False a = App( Config(executor=mock_executor, dataset_manager=mock_datasource_manager)) self.assertTrue(a.datasets) mock_datasource_manager.provision.assert_called_once()
def test_app_create(self): builder = pyspark.sql.session.SparkSession.Builder() mock_session = MagicMock(SparkSession) builder.master = Mock(return_value=builder) builder.appName = Mock(return_value=builder) builder.getOrCreate = Mock(return_value=mock_session) mock_dataset_manager = MagicMock(DatasetManager) mock_executor = MagicMock(Executor) a = App( Config(executor=mock_executor, dataset_manager=mock_dataset_manager)) assert a self.assertEqual(a.dataset_manager, mock_dataset_manager) self.assertEqual(a.executor, mock_executor)
from irishep.executors.uproot_executor import UprootExecutor from irishep.executors.spark_executor import SparkExecutor from irishep.app import App from irishep.config import Config from irishep.datasets.inmemory_files_dataset_manager import \ InMemoryFilesDatasetManager from zpeak_analysis import ZpeakAnalysis executor = UprootExecutor("zpeak") # executor = SparkExecutor("local", "ZPeak", 20) config = Config( executor = executor, dataset_manager=InMemoryFilesDatasetManager(database_file="demo_datasets.csv") ) app = App(config=config) print(app.datasets.get_names()) print(app.datasets.get_file_list("ZJetsToNuNu_HT-600To800_13TeV-madgraph")) # Create a broadcast variable for the non-event data weightsext = lookup_tools.extractor() correctionDescriptions = open("newCorrectionFiles.txt").readlines() weightsext.add_weight_sets(correctionDescriptions) weightsext.finalize() weights_eval = weightsext.make_evaluator() dataset = app.read_dataset("DY Jets") print(dataset.columns) print(dataset.count())