Exemplo n.º 1
0
    def test_app_create(self):
        with patch('pyspark.SparkContext.__init__',
                   return_value=None) as mock_spark:
            a = App()
            self.assertTrue(a.sc)
            mock_spark.assert_called_with(appName='spark-hep', master='local')

            mock_spark.clear()
            App(appName="foo", master="spark-master")
            mock_spark.assert_called_with(appName='foo', master='spark-master')
Exemplo n.º 2
0
    def test_provisioned_dataset_manager(self):
        mock_datasource_manager = Mock(DatasetManager)
        mock_executor = MagicMock(Executor)

        mock_datasource_manager.provisioned = True
        a = App(
            Config(executor=mock_executor,
                   dataset_manager=mock_datasource_manager))
        self.assertTrue(a.datasets)
Exemplo n.º 3
0
    def test_read_dataset(self):
        # Create a datasource manager that returns our two files
        mock_datasource_manager = Mock(DatasetManager)
        mock_datasource_manager.provisioned = True
        mock_datasource_manager.get_file_list = Mock(
            return_value=["/tmp/foo.root", "/tmp/bar.root"])

        mock_dataset = Mock()
        mock_executor = Mock(Executor)
        mock_executor.read_files = Mock(return_value=mock_dataset)

        a = App(
            Config(executor=mock_executor,
                   num_partitions=42,
                   dataset_manager=mock_datasource_manager))

        rslt = a.read_dataset("mydataset")
        mock_datasource_manager.get_file_list.assert_called_with("mydataset")
        mock_executor.read_files.assert_called_with(
            "mydataset", ["/tmp/foo.root", "/tmp/bar.root"])
        self.assertEqual(rslt, mock_dataset)
Exemplo n.º 4
0
    def test_unprovisioned_dataset_manager(self):
        mock_datasource_manager = Mock(DatasetManager)
        mock_datasource_manager.provision = Mock()

        mock_executor = MagicMock(Executor)

        mock_datasource_manager.provisioned = False
        a = App(
            Config(executor=mock_executor,
                   dataset_manager=mock_datasource_manager))

        self.assertTrue(a.datasets)
        mock_datasource_manager.provision.assert_called_once()
Exemplo n.º 5
0
    def test_app_create(self):
        builder = pyspark.sql.session.SparkSession.Builder()
        mock_session = MagicMock(SparkSession)

        builder.master = Mock(return_value=builder)
        builder.appName = Mock(return_value=builder)
        builder.getOrCreate = Mock(return_value=mock_session)

        mock_dataset_manager = MagicMock(DatasetManager)
        mock_executor = MagicMock(Executor)

        a = App(
            Config(executor=mock_executor,
                   dataset_manager=mock_dataset_manager))

        assert a
        self.assertEqual(a.dataset_manager, mock_dataset_manager)
        self.assertEqual(a.executor, mock_executor)
Exemplo n.º 6
0
from irishep.executors.uproot_executor import UprootExecutor
from irishep.executors.spark_executor import SparkExecutor
from irishep.app import App
from irishep.config import Config
from irishep.datasets.inmemory_files_dataset_manager import \
    InMemoryFilesDatasetManager
from zpeak_analysis import ZpeakAnalysis

executor = UprootExecutor("zpeak")
# executor = SparkExecutor("local", "ZPeak", 20)

config = Config(
    executor = executor,
    dataset_manager=InMemoryFilesDatasetManager(database_file="demo_datasets.csv")
)
app = App(config=config)
print(app.datasets.get_names())
print(app.datasets.get_file_list("ZJetsToNuNu_HT-600To800_13TeV-madgraph"))


# Create a broadcast variable for the non-event data
weightsext = lookup_tools.extractor()
correctionDescriptions = open("newCorrectionFiles.txt").readlines()
weightsext.add_weight_sets(correctionDescriptions)
weightsext.finalize()
weights_eval = weightsext.make_evaluator()


dataset = app.read_dataset("DY Jets")
print(dataset.columns)
print(dataset.count())