示例#1
0
    def test_esk604(self):
        """Test Esk-604: Execute Spark-SQL query."""
        # check if running in local mode
        sc = process_manager.service(SparkManager).get_session().sparkContext
        self.assertRegex(
            sc.getConf().get('spark.master', ''), 'local\[[.*]\]',
            'Spark not running in local mode, required for testing with local files'
        )

        # run Eskapade
        self.eskapade_run(resources.tutorial('esk604_spark_execute_query.py'))
        ds = process_manager.service(DataStore)

        # check data frame
        self.assertIn('spark_df_sql', ds,
                      'no object with key "spark_df_sql" in data store')
        self.assertIsInstance(ds['spark_df_sql'], pyspark.sql.DataFrame,
                              '"spark_df_sql" is not a Spark data frame')
        self.assertEqual(ds['spark_df_sql'].count(), 4,
                         'unexpected number of rows in filtered data frame')
        self.assertListEqual(ds['spark_df_sql'].columns,
                             ['loc', 'sumx', 'sumy'],
                             'unexpected columns in data frame')
        self.assertEqual(
            ds['spark_df_sql'].schema,
            process_manager.get('ApplySQL').get('SparkSQL').schema,
            'schema of data frame does not correspond to schema stored in link'
        )
        self.assertSetEqual(
            set(tuple(r) for r in ds['spark_df_sql'].collect()),
            set([('e', 10, 15), ('d', 2, 11), ('b', 6, 16), ('a', 2, 18)]),
            'unexpected values in loc/sumx/sumy columns')
示例#2
0
    def test_esk407(self):
        """Test Esk-407: Classification unbiased fit estimate."""
        # run Eskapade
        macro = resources.tutorial(
            'esk407_classification_unbiased_fit_estimate.py')
        self.eskapade_run(macro)
        ds = process_manager.service(DataStore)
        ws = process_manager.service(RooFitManager).ws

        # roofit objects check in datastore
        self.assertIn('fit_result', ds)
        self.assertIsInstance(ds['fit_result'], ROOT.RooFitResult)

        # roofit objects check in workspace
        mdata = ws.data('data')
        self.assertFalse(not mdata)
        self.assertEqual(1000, mdata.numEntries())
        mpdf = ws.pdf('hist_model')
        self.assertFalse(not mpdf)

        # successful fit result
        fit_result = ds['fit_result']
        self.assertEqual(0, fit_result.status())
        self.assertEqual(3, fit_result.covQual())

        lo_risk = ws.var('N_low_risk')
        self.assertFalse(not lo_risk)
        self.assertTrue(lo_risk.getVal() < 1000)
        self.assertTrue(lo_risk.getError() > 0)
        hi_risk = ws.var('N_high_risk')
        self.assertFalse(not hi_risk)
        self.assertTrue(hi_risk.getVal() > 0)
        self.assertTrue(hi_risk.getError() > 0)
示例#3
0
    def test_esk303(self):
        settings = process_manager.service(ConfigObject)
        settings['batchMode'] = True

        self.eskapade_run(resources.tutorial('esk303_hgr_filler_plotter.py'))

        settings = process_manager.service(ConfigObject)
        ds = process_manager.service(DataStore)

        # data-generation checks
        self.assertIn('n_sum_rc', ds)
        self.assertEqual(650, ds['n_sum_rc'])
        self.assertIn('hist', ds)
        self.assertIsInstance(ds['hist'], dict)
        col_names = ['date', 'isActive', 'age', 'eyeColor', 'gender', 'company', 'latitude', 'longitude',
                     'isActive:age', 'latitude:longitude']
        self.assertListEqual(sorted(ds['hist'].keys()), sorted(col_names))

        # data-summary checks
        f_bases = ['date', 'isActive', 'age', 'eyeColor', 'gender', 'company', 'latitude', 'longitude',
                   'latitude_vs_longitude']
        file_names = ['report.tex'] + ['hist_{}.pdf'.format(col) for col in f_bases]
        for fname in file_names:
            path = '{0:s}/{1:s}/data/v0/report/{2:s}'.format(settings['resultsDir'], settings['analysisName'], fname)
            self.assertTrue(os.path.exists(path))
            statinfo = os.stat(path)
            self.assertTrue(statinfo.st_size > 0)
示例#4
0
    def test_esk405(self):
        """Test Esk-405: Simulation based on binned data"""

        # run Eskapade
        self.eskapade_run(
            resources.tutorial('esk405_simulation_based_on_binned_data.py'))
        ds = process_manager.service(DataStore)
        ws = process_manager.service(RooFitManager).ws

        # data-generation checks
        self.assertIn('n_rdh_accounts', ds)
        self.assertEqual(650, ds['n_rdh_accounts'])

        # roofit objects check in workspace
        self.assertIn('hpdf_Ndim', ws)
        self.assertIn('rdh_accounts', ws)

        mcats = ws.set('rdh_cats')
        self.assertFalse(not mcats)
        self.assertEqual(1, len(mcats))
        mvars = ws.set('rdh_vars')
        self.assertFalse(not mvars)
        self.assertEqual(3, len(mvars))
        mdata = ws.data('rdh_accounts')
        self.assertEqual(650, mdata.sumEntries())
示例#5
0
    def execute(self):
        """Execute the link."""
        # get process manager and data store
        ds = process_manager.service(DataStore)

        # fetch data from data store
        if self.read_key not in ds:
            err_msg = 'No input data found in data store with key "{}".'.format(
                self.read_key)
            if not self.fail_missing_data:
                self.logger.error(err_msg.capitalize())
                return StatusCode.Success
            raise KeyError(err_msg)
        data = ds[self.read_key]

        # create data frame
        spark = process_manager.service(SparkManager).get_session()
        self.logger.debug(
            'Converting data of type "{type}" to a Spark data frame.',
            type=type(data))
        ds[self.store_key] = data_conversion.create_spark_df(
            spark,
            data,
            schema=self.schema,
            process_methods=self._process_methods,
            **self.kwargs)

        return StatusCode.Success
示例#6
0
    def test_esk404(self):
        """Test Esk-404: Workspace create PDF, simulate, fit, plot"""

        # run Eskapade
        self.eskapade_run(
            resources.tutorial(
                'esk404_workspace_createpdf_simulate_fit_plot.py'))
        ds = process_manager.service(DataStore)
        ws = process_manager.service(RooFitManager).ws

        # data-generation checks
        self.assertIn('n_df_simdata', ds)
        self.assertEqual(1000, ds['n_df_simdata'])

        # roofit objects check in datastore
        self.assertIn('fit_result', ds)
        self.assertIsInstance(ds['fit_result'], ROOT.RooFitResult)

        # successful fit result
        fit_result = ds['fit_result']
        self.assertEqual(0, fit_result.status())
        self.assertEqual(3, fit_result.covQual())

        self.assertIn('simdata', ds)
        self.assertIsInstance(ds['simdata'], ROOT.RooDataSet)
        self.assertIn('simdata_plot', ds)
        self.assertIsInstance(ds['simdata_plot'], ROOT.RooPlot)

        # roofit objects check in workspace
        self.assertIn('model', ws)
        self.assertIn('bkg', ws)
        self.assertIn('sig', ws)
示例#7
0
    def test_esk301(self):
        settings = process_manager.service(ConfigObject)
        settings['batchMode'] = True

        self.eskapade_run(resources.tutorial('esk301_dfsummary_plotter.py'))

        settings = process_manager.service(ConfigObject)

        ds = process_manager.service(DataStore)

        columns = ['var_a', 'var_b', 'var_c']

        # data-generation checks
        self.assertIn('data', ds)
        self.assertIsInstance(ds['data'], pd.DataFrame)
        self.assertListEqual(list(ds['data'].columns), columns)
        self.assertEqual(10000, len(ds['data']))

        # data-summary checks
        file_names = ['report.tex'] + ['hist_{}.pdf'.format(col) for col in columns]
        for fname in file_names:
            path = '{0:s}/{1:s}/data/v0/report/{2:s}'.format(settings['resultsDir'], settings['analysisName'], fname)
            self.assertTrue(os.path.exists(path))
            statinfo = os.stat(path)
            self.assertTrue(statinfo.st_size > 0)
示例#8
0
    def test_esk607(self):
        """Test Esk-607: Add column to Spark dataframe."""
        # check if running in local mode
        sc = process_manager.service(SparkManager).get_session().sparkContext
        self.assertRegex(
            sc.getConf().get('spark.master', ''), 'local\[[.*]\]',
            'Spark not running in local mode, required for testing with local files'
        )

        # run Eskapade
        self.eskapade_run(resources.tutorial('esk607_spark_with_column.py'))
        ds = process_manager.service(DataStore)

        # check data frame
        self.assertIn('new_spark_df', ds,
                      'no object with key "new_spark_df" in data store')
        self.assertIsInstance(ds['new_spark_df'], pyspark.sql.DataFrame,
                              '"new_spark_df" is not a Spark data frame')
        self.assertEqual(ds['new_spark_df'].count(), 5,
                         'unexpected number of rows in filtered data frame')
        self.assertListEqual(
            ds['new_spark_df'].columns,
            ['dummy', 'date', 'loc', 'x', 'y', 'pow_xy1', 'pow_xy2'],
            'unexpected columns in data frame')
        self.assertSetEqual(
            set(tuple(r) for r in ds['new_spark_df'].collect()),
            set([('bla', 20090103, 'c', 5, 7, 78125.0, 78125.0),
                 ('bal', 20090102, 'b', 3, 8, 6561.0, 6561.0),
                 ('flo', 20090104, 'e', 3, 5, 243.0, 243.0),
                 ('bar', 20090101, 'a', 1, 9, 1.0, 1.0),
                 ('foo', 20090104, 'd', 1, 6, 1.0, 1.0)]),
            'unexpected values in columns')
示例#9
0
    def execute(self):
        """Execute the link."""
        # get process manager and data store
        ds = process_manager.service(DataStore)

        # check if data frame exists in data store
        if self.read_key not in ds:
            err_msg = 'No input data found in data store with key "{}".'.format(
                self.read_key)
            if not self.fail_missing_data:
                self.logger.error(err_msg.capitalize())
                return StatusCode.Success
            raise KeyError(err_msg)

        # fetch data from data store
        data = ds[self.read_key]
        if not isinstance(data, pyspark.sql.DataFrame):
            spark = process_manager.service(SparkManager).get_session()
            self.logger.debug(
                'Converting data of type "{type}" to a Spark data frame.',
                type=type(data))
            data = data_conversion.create_spark_df(spark,
                                                   data,
                                                   schema=self.schema)

        # create data-frame writer with requested number of partitions/output files
        df_writer = data.repartition(self.num_files).write

        # call data-frame writer methods
        apply_transform_funcs(df_writer, self._write_methods)

        return StatusCode.Success
示例#10
0
    def test_esk408(self):
        """Test Esk-408: Classification error propagation after fit."""
        # run Eskapade
        self.eskapade_run(
            resources.tutorial(
                'esk408_classification_error_propagation_after_fit.py'))
        ds = process_manager.service(DataStore)
        ws = process_manager.service(RooFitManager).ws

        # data-generation checks
        self.assertIn('n_df_pvalues', ds)
        self.assertEqual(500, ds['n_df_pvalues'])
        self.assertIn('df_pvalues', ds)
        self.assertIsInstance(ds['df_pvalues'], pd.DataFrame)
        df = ds['df_pvalues']
        self.assertTrue('high_risk_pvalue' in df.columns)
        self.assertTrue('high_risk_perror' in df.columns)

        # roofit objects check in workspace
        fit_result = ws.obj('fit_result')
        self.assertFalse(not fit_result)
        self.assertIsInstance(fit_result, ROOT.RooFitResult)
        # test for successful fit result
        self.assertEqual(0, fit_result.status())
        self.assertEqual(3, fit_result.covQual())

        frac = ws.var('frac')
        self.assertFalse(not frac)
        self.assertTrue(frac.getVal() > 0)
        self.assertTrue(frac.getError() > 0)
示例#11
0
    def execute(self):
        """Execute the link."""
        ds = process_manager.service(DataStore)
        if self.into_ws:
            ws = process_manager.service(RooFitManager).ws

        for key in self.keys:
            obj = self.in_file.Get(key)
            if not obj:
                self.logger.warning(
                    'Object with key "{key}" not found in "{path}"; skipping.',
                    key=key,
                    path=self.path)
                continue
            # a. put object into the workspace
            if self.into_ws:
                try:
                    ws[key] = obj
                except BaseException:
                    raise RuntimeError(
                        'Could not import object "{}" into workspace.'.format(
                            key))
            # b. put object into datastore
            else:
                ds[key] = obj

        return StatusCode.Success
示例#12
0
    def test_esk602(self):
        """Test Esk-602: Read CSV files into a Spark data frame."""
        # check if running in local mode
        sc = process_manager.service(SparkManager).get_session().sparkContext
        self.assertRegex(
            sc.getConf().get('spark.master', ''), 'local\[[.*]\]',
            'Spark not running in local mode, required for testing with local files'
        )

        # run Eskapade
        self.eskapade_run(resources.tutorial('esk602_read_csv_to_spark_df.py'))
        ds = process_manager.service(DataStore)

        # check data frame
        self.assertIn('spark_df', ds,
                      'no object with key "spark_df" in data store')
        self.assertIsInstance(ds['spark_df'], pyspark.sql.DataFrame,
                              '"spark_df" is not a Spark data frame')
        self.assertEqual(ds['spark_df'].rdd.getNumPartitions(), 5,
                         'unexpected number of partitions in data frame')
        self.assertEqual(ds['spark_df'].count(), 12,
                         'unexpected number of rows in data frame')
        self.assertListEqual(ds['spark_df'].columns, ['date', 'loc', 'x', 'y'],
                             'unexpected columns in data frame')
        self.assertSetEqual(
            set((r['date'], r['loc']) for r in ds['spark_df'].collect()),
            set([(20090101, 'a'), (20090102, 'b'), (20090103, 'c'),
                 (20090104, 'd'), (20090104, 'e'), (20090106, 'a'),
                 (20090107, 'b'), (20090107, 'c'), (20090107, 'd'),
                 (20090108, 'e'), (20090109, 'e'), (20090109, 'f')]),
            'unexpected values in date/loc columns')
示例#13
0
    def execute(self):
        """Execute the link."""
        self.logger.debug(
            'Applying following SQL-query to object(s) in DataStore: {query:s}.',
            query=self.query)

        ds = process_manager.service(DataStore)

        # register all objects in DataStore as SQL temporary views
        for key in ds:
            spark_df = ds[key]
            spark_df.createOrReplaceTempView(key)

        # get existing SparkSession
        spark = process_manager.service(SparkManager).get_session()

        # apply SQL-query to temporary view(s)
        result = spark.sql(self.query)

        # store dataframe schema
        self.schema = result.schema

        # convert to different data format if required
        if self.output_format == 'rdd':
            # convert to RDD of tuples
            result = result.rdd.map(tuple)
        elif self.output_format == 'pd':
            # convert to Pandas dataframe
            result = result.toPandas()

        ds[self.store_key] = result

        return StatusCode.Success
示例#14
0
    def execute(self):
        """Execute the link."""
        # get process manager and services
        ds = process_manager.service(DataStore)
        rfm = process_manager.service(RooFitManager)

        # get PDF from RooFitManager
        model = rfm.model(self.model_name)

        # check if dataset with upper bounds exists in data store
        if self.max_var_data_key not in ds:
            self.logger.warning(
                'No range upper-bound data in data store; generating {n:d} dummy bounds',
                n=NUM_DUMMY_EVENTS)
            ds[self.max_var_data_key] = gen_max_var_data(model)

        # get max-var data
        max_var_data = ds.get(self.max_var_data_key)
        if not isinstance(max_var_data, ROOT.RooAbsData):
            raise TypeError('data with key "{}" are not RooFit data'.format(
                self.read_key))

        # select max-var data
        mv_sel_data = sel_max_var_data(model, max_var_data, self.event_frac)

        # generate data
        proto_arg = RooFit.ProtoData(mv_sel_data, False, False)
        data = model.pdf.generate(model.var_set, proto_arg,
                                  *self._gen_cmd_args.values())
        ds[self.store_key] = data

        return StatusCode.Success
示例#15
0
    def test_esk108reduce(self):
        settings = process_manager.service(ConfigObject)
        settings['TESTING'] = True
        self.eskapade_run(resources.tutorial('esk108_reduce.py'))

        ds = process_manager.service(DataStore)

        self.assertEqual(20, ds['n_products'])
示例#16
0
    def test_esk110(self):
        self.eskapade_run(resources.tutorial('esk110_code_profiling.py'))

        settings = process_manager.service(ConfigObject)
        ds = process_manager.service(DataStore)

        self.assertEqual(0, len(process_manager))
        self.assertEqual(0, len(ds))
        self.assertTrue('doCodeProfiling' in settings)
        self.assertEqual('cumulative', settings['doCodeProfiling'])
示例#17
0
    def setUp(self):
        """Setup test environment"""

        settings = process_manager.service(ConfigObject)
        settings['analysisName'] = 'DataConversionTest'
        # ensure local testing
        spark_settings = [('spark.app.name', settings['analysisName']),
                          ('spark.master', 'local[*]'),
                          ('spark.driver.host', 'localhost')]
        process_manager.service(SparkManager).create_session(
            eskapade_settings=settings, spark_settings=spark_settings)
示例#18
0
    def setUp(self):
        """Set up test."""
        TutorialMacrosTest.setUp(self)
        settings = process_manager.service(ConfigObject)
        settings['analysisName'] = 'SparkAnalysisTutorialMacrosTest'

        # ensure local testing
        spark_settings = [('spark.app.name', settings['analysisName']),
                          ('spark.master', 'local[*]'),
                          ('spark.driver.host', 'localhost')]
        process_manager.service(SparkManager).create_session(
            eskapade_settings=settings, spark_settings=spark_settings)
示例#19
0
    def execute(self):
        """Execute the link.

        Reads the input file(s) and puts the dataframe in the datastore.
        """
        ds = process_manager.service(DataStore)
        settings = process_manager.service(ConfigObject)

        # 0. when in fork mode, need to reconfigure paths read out. lock ensures it's only done once.
        if settings.get('fork', False):
            self.configure_paths(lock=True)

        # 1. handle first the case of no iteration. Concatenate into one dataframe.
        if not self._iterate:
            self.logger.debug('Reading datasets from files [{files}]',
                              files=', '.join('"{}"'.format(p)
                                              for p in self._paths))
            df = pd.concat(
                set_reader(p, self.reader, **self.kwargs) for p in self._paths)
            numentries = len(df.index)
        # 2. handle case where iteration has been turned on
        else:
            # try picking up new dataset from iterator
            df = next(self)
            while self.latest_data_length() == 0 and not self.is_finished():
                df = next(self)

            # at end of loop
            if self.latest_data_length() == 0:
                assert self.is_finished(
                ), 'Got empty dataset but not at end of iterator.'
                # at end of loop; skip rest of chain execution (but do perform finalize)
                return StatusCode.BreakChain

            # do we have more datasets to go?
            # pass this information to the (possible) repeater at the end of chain
            reqstr = 'chainRepeatRequestBy_' + self.name
            settings[reqstr] = not self.is_finished()

            numentries = self.latest_data_length()
            sumentries = self.sum_data_length()
            self.logger.info(
                'Read next <{n:d}> records; summing up to <{sum_n:d}>.',
                n=numentries,
                sum_n=sumentries)
            ds['n_sum_' + self.key] = sumentries

        # store dataframe and number of entries
        ds[self.key] = df
        ds['n_' + self.key] = numentries

        return StatusCode.Success
示例#20
0
    def execute(self):
        """Execute the link."""
        # create data-frame reader
        spark = process_manager.service(SparkManager).get_session()
        data = spark.read

        # call data-frame reader methods
        data = apply_transform_funcs(data, self._read_methods)

        # store data in data store
        process_manager.service(DataStore)[self.store_key] = data

        return StatusCode.Success
示例#21
0
    def execute(self):
        """Execute the link.

        :returns: status code of execution
        :rtype: StatusCode
        """
        settings = process_manager.service(ConfigObject)
        ds = process_manager.service(DataStore)

        # --- your algorithm code goes here
        self.logger.debug("Now executing link: {link}.", link=self.name)

        return StatusCode.Success
示例#22
0
    def test_esk202(self):
        self.eskapade_run(resources.tutorial('esk202_writedata.py'))

        settings = process_manager.service(ConfigObject)

        ds = process_manager.service(DataStore)

        self.assertEqual(36, ds['n_test'])
        path = settings['resultsDir'] + '/' + settings['analysisName'] + '/data/v0/tmp3.csv'
        self.assertTrue(os.path.exists(path))
        # check file is non-empty
        statinfo = os.stat(path)
        self.assertTrue(statinfo.st_size > 0)
示例#23
0
    def test_esk106(self):
        settings = process_manager.service(ConfigObject)
        # fake a setting from the cmd-line. picked up in the macro
        settings['do_chain0'] = False

        self.eskapade_run(resources.tutorial('esk106_cmdline_options.py'))

        settings = process_manager.service(ConfigObject)

        self.assertEqual(1, len(process_manager))
        self.assertEqual('Chain1', list(process_manager)[0].name)
        self.assertEqual(False, settings.get('do_chain0', True))
        self.assertEqual(True, settings.get('do_chain1', True))
        self.assertEqual('Universe', list(list(process_manager)[0])[0].hello)
示例#24
0
    def setUp(self):

        test_df = pd.DataFrame({
            'dt': [
                '2017-01-01 12:00:00', '2017-01-01 13:00:00',
                '2017-01-03 12:45:23'
            ],
            'a': [1, 2, 5],
            'b': [1, 2, 1]
        })
        settings = process_manager.service(ConfigObject)
        spark = process_manager.service(SparkManager).create_session(
            eskapade_settings=settings)
        ds = process_manager.service(DataStore)
        ds['test_input'] = spark.createDataFrame(test_df)
示例#25
0
    def test_spark_setup(self):
        """Test if Spark setup is working properly"""

        settings = process_manager.service(ConfigObject)
        settings['analysisName'] = 'spark_setup'

        sm = process_manager.service(SparkManager)
        spark = sm.create_session(eskapade_settings=settings)

        df = spark.createDataFrame([(0, 'foo'), (1, 'bar')], ['id', 'value'])

        self.assertSetEqual(set(tuple(r) for r in df.collect()),
                            set([(0, 'foo'), (1, 'bar')]),
                            'unexpected values in columns')
        sm.finish()
示例#26
0
    def test_esk411(self):
        """Test Esk-411: Predictive maintenance Weibull fit."""
        # run Eskapade
        macro = resources.tutorial('esk411_weibull_predictive_maintenance.py')
        self.eskapade_run(macro)
        ds = process_manager.service(DataStore)
        ws = process_manager.service(RooFitManager).ws

        # roofit objects check in datastore
        self.assertIn('fit_result', ds)
        self.assertIsInstance(ds['fit_result'], ROOT.RooFitResult)

        # roofit objects check in workspace
        self.assertIn('binnedData', ds)
        self.assertIsInstance(ds['binnedData'], ROOT.RooDataHist)
        mdata = ds['binnedData']
        self.assertTrue(mdata)
        self.assertEqual(300, mdata.numEntries())
        mpdf = ws.pdf('sum3pdf')
        self.assertTrue(mpdf)

        # successful fit result
        fit_result = ds['fit_result']
        self.assertEqual(0, fit_result.status())
        self.assertEqual(3, fit_result.covQual())

        n1 = ws.var('N1')
        self.assertTrue(n1)
        self.assertGreater(n1.getVal(), 2.e5)
        n2 = ws.var('N2')
        self.assertTrue(n2)
        self.assertGreater(n2.getVal(), 4.e5)
        n3 = ws.var('N3')
        self.assertTrue(n3)
        self.assertGreater(n3.getVal(), 5.e4)

        # data-summary checks
        file_names = [
            'weibull_fit_report.tex', 'correlation_matrix_fit_result.pdf',
            'floating_pars_fit_result.tex',
            'fit_of_time_difference_medium_range.pdf'
        ]
        for fname in file_names:
            path = persistence.io_path('results_data',
                                       'report/{}'.format(fname))
            self.assertTrue(os.path.exists(path))
            statinfo = os.stat(path)
            self.assertGreater(statinfo.st_size, 0)
示例#27
0
    def test_esk406(self):
        """Test Esk-406: Simulation based on unbinned data"""

        # run Eskapade
        macro = resources.tutorial(
            'esk406_simulation_based_on_unbinned_data.py')
        self.eskapade_run(macro)
        ds = process_manager.service(DataStore)

        # data-generation checks
        self.assertIn('n_correlated_data', ds)
        self.assertEqual(500, ds['n_correlated_data'])
        self.assertIn('n_rds_correlated_data', ds)
        self.assertEqual(500, ds['n_rds_correlated_data'])
        self.assertIn('n_df_simdata', ds)
        self.assertEqual(5000, ds['n_df_simdata'])

        self.assertIn('df_simdata', ds)
        self.assertIsInstance(ds['df_simdata'], pd.DataFrame)
        self.assertIn('hist', ds)
        self.assertIsInstance(ds['hist'], dict)

        # roofit objects check
        self.assertIn('keys_varset', ds)
        self.assertIsInstance(ds['keys_varset'], ROOT.RooArgSet)
        self.assertEqual(2, len(ds['keys_varset']))
        self.assertIn('rds_correlated_data', ds)
        self.assertIsInstance(ds['rds_correlated_data'], ROOT.RooDataSet)
        self.assertIn('simdata', ds)
        self.assertIsInstance(ds['simdata'], ROOT.RooDataSet)
示例#28
0
    def initialize(self):
        """Initialize the link."""
        # check input arguments
        self.check_arg_types(store_key=str,
                             max_var_data_key=str,
                             model_name=str,
                             event_frac=float)
        self.check_arg_vals('store_key', 'max_var_data_key', 'model_name',
                            'event_frac')

        # check if model exists
        rfm = process_manager.service(RooFitManager)
        model = rfm.model(self.model_name)
        if not model:
            self.logger.warning(
                'Model "{model}" does not exist; creating with default values.',
                model=self.model_name)
            model = rfm.model(self.model_name, model_cls=TruncExponential)

        # check if model PDF has been built
        if not model.is_built:
            model.build_model()

        # process command arguments for generate function
        self._gen_cmd_args = create_roofit_opts(create_linked_list=False,
                                                **self.kwargs)

        return StatusCode.Success
示例#29
0
    def test_esk402(self):
        """Test Esk-402: RooDataHist fill"""

        # run Eskapade
        self.eskapade_run(resources.tutorial('esk402_roodatahist_fill.py'))
        ds = process_manager.service(DataStore)

        # data-generation checks
        self.assertIn('n_accounts', ds)
        self.assertEqual(650, ds['n_accounts'])
        self.assertIn('n_rdh_accounts', ds)
        self.assertEqual(650, ds['n_rdh_accounts'])
        self.assertIn('to_factorized', ds)
        self.assertIsInstance(ds['to_factorized'], dict)
        self.assertIn('to_original', ds)
        self.assertIsInstance(ds['to_original'], dict)
        self.assertIn('map_rdh_accounts_to_original', ds)
        self.assertIsInstance(ds['map_rdh_accounts_to_original'], dict)

        # roofit objects check
        self.assertIn('accounts_catset', ds)
        self.assertIsInstance(ds['accounts_catset'], ROOT.RooArgSet)
        self.assertEqual(2, len(ds['accounts_catset']))
        self.assertIn('accounts_varset', ds)
        self.assertIsInstance(ds['accounts_varset'], ROOT.RooArgSet)
        self.assertEqual(6, len(ds['accounts_varset']))
        self.assertIn('rdh_accounts', ds)
        self.assertIsInstance(ds['rdh_accounts'], ROOT.RooDataHist)
示例#30
0
    def test_esk409(self):
        """Test Esk-409: Unredeemed vouchers."""
        # run Eskapade
        macro = resources.tutorial('esk409_unredeemed_vouchers.py')
        self.eskapade_run(macro)
        ds = process_manager.service(DataStore)

        # check generated data
        self.assertIn('voucher_redeems', ds)
        self.assertIn('voucher_ages', ds)
        self.assertIsInstance(ds['voucher_redeems'], ROOT.RooDataSet)
        self.assertIsInstance(ds['voucher_ages'], ROOT.RooDataSet)
        self.assertLess(ds['voucher_redeems'].numEntries(), 6000)
        self.assertGreater(ds['voucher_redeems'].numEntries(), 0)
        self.assertEqual(ds['voucher_ages'].numEntries(), 10000)

        # check fit result
        fit_link = process_manager.get('Fitting').get('Fit')
        self.assertEqual(fit_link.fit_result.status(), 0)
        n_ev_pull = (fit_link.results['n_ev'][0] -
                     6000.) / fit_link.results['n_ev'][1]
        self.assertGreater(n_ev_pull, -3.)
        self.assertLess(n_ev_pull, 3.)

        # check plot output
        plot_path = persistence.io_path('results_data', 'voucher_redeem.pdf')
        self.assertTrue(os.path.exists(plot_path))
        statinfo = os.stat(plot_path)
        self.assertGreater(statinfo.st_size, 0)