def test_esk604(self): """Test Esk-604: Execute Spark-SQL query.""" # check if running in local mode sc = process_manager.service(SparkManager).get_session().sparkContext self.assertRegex( sc.getConf().get('spark.master', ''), 'local\[[.*]\]', 'Spark not running in local mode, required for testing with local files' ) # run Eskapade self.eskapade_run(resources.tutorial('esk604_spark_execute_query.py')) ds = process_manager.service(DataStore) # check data frame self.assertIn('spark_df_sql', ds, 'no object with key "spark_df_sql" in data store') self.assertIsInstance(ds['spark_df_sql'], pyspark.sql.DataFrame, '"spark_df_sql" is not a Spark data frame') self.assertEqual(ds['spark_df_sql'].count(), 4, 'unexpected number of rows in filtered data frame') self.assertListEqual(ds['spark_df_sql'].columns, ['loc', 'sumx', 'sumy'], 'unexpected columns in data frame') self.assertEqual( ds['spark_df_sql'].schema, process_manager.get('ApplySQL').get('SparkSQL').schema, 'schema of data frame does not correspond to schema stored in link' ) self.assertSetEqual( set(tuple(r) for r in ds['spark_df_sql'].collect()), set([('e', 10, 15), ('d', 2, 11), ('b', 6, 16), ('a', 2, 18)]), 'unexpected values in loc/sumx/sumy columns')
def test_esk407(self): """Test Esk-407: Classification unbiased fit estimate.""" # run Eskapade macro = resources.tutorial( 'esk407_classification_unbiased_fit_estimate.py') self.eskapade_run(macro) ds = process_manager.service(DataStore) ws = process_manager.service(RooFitManager).ws # roofit objects check in datastore self.assertIn('fit_result', ds) self.assertIsInstance(ds['fit_result'], ROOT.RooFitResult) # roofit objects check in workspace mdata = ws.data('data') self.assertFalse(not mdata) self.assertEqual(1000, mdata.numEntries()) mpdf = ws.pdf('hist_model') self.assertFalse(not mpdf) # successful fit result fit_result = ds['fit_result'] self.assertEqual(0, fit_result.status()) self.assertEqual(3, fit_result.covQual()) lo_risk = ws.var('N_low_risk') self.assertFalse(not lo_risk) self.assertTrue(lo_risk.getVal() < 1000) self.assertTrue(lo_risk.getError() > 0) hi_risk = ws.var('N_high_risk') self.assertFalse(not hi_risk) self.assertTrue(hi_risk.getVal() > 0) self.assertTrue(hi_risk.getError() > 0)
def test_esk303(self): settings = process_manager.service(ConfigObject) settings['batchMode'] = True self.eskapade_run(resources.tutorial('esk303_hgr_filler_plotter.py')) settings = process_manager.service(ConfigObject) ds = process_manager.service(DataStore) # data-generation checks self.assertIn('n_sum_rc', ds) self.assertEqual(650, ds['n_sum_rc']) self.assertIn('hist', ds) self.assertIsInstance(ds['hist'], dict) col_names = ['date', 'isActive', 'age', 'eyeColor', 'gender', 'company', 'latitude', 'longitude', 'isActive:age', 'latitude:longitude'] self.assertListEqual(sorted(ds['hist'].keys()), sorted(col_names)) # data-summary checks f_bases = ['date', 'isActive', 'age', 'eyeColor', 'gender', 'company', 'latitude', 'longitude', 'latitude_vs_longitude'] file_names = ['report.tex'] + ['hist_{}.pdf'.format(col) for col in f_bases] for fname in file_names: path = '{0:s}/{1:s}/data/v0/report/{2:s}'.format(settings['resultsDir'], settings['analysisName'], fname) self.assertTrue(os.path.exists(path)) statinfo = os.stat(path) self.assertTrue(statinfo.st_size > 0)
def test_esk405(self): """Test Esk-405: Simulation based on binned data""" # run Eskapade self.eskapade_run( resources.tutorial('esk405_simulation_based_on_binned_data.py')) ds = process_manager.service(DataStore) ws = process_manager.service(RooFitManager).ws # data-generation checks self.assertIn('n_rdh_accounts', ds) self.assertEqual(650, ds['n_rdh_accounts']) # roofit objects check in workspace self.assertIn('hpdf_Ndim', ws) self.assertIn('rdh_accounts', ws) mcats = ws.set('rdh_cats') self.assertFalse(not mcats) self.assertEqual(1, len(mcats)) mvars = ws.set('rdh_vars') self.assertFalse(not mvars) self.assertEqual(3, len(mvars)) mdata = ws.data('rdh_accounts') self.assertEqual(650, mdata.sumEntries())
def execute(self): """Execute the link.""" # get process manager and data store ds = process_manager.service(DataStore) # fetch data from data store if self.read_key not in ds: err_msg = 'No input data found in data store with key "{}".'.format( self.read_key) if not self.fail_missing_data: self.logger.error(err_msg.capitalize()) return StatusCode.Success raise KeyError(err_msg) data = ds[self.read_key] # create data frame spark = process_manager.service(SparkManager).get_session() self.logger.debug( 'Converting data of type "{type}" to a Spark data frame.', type=type(data)) ds[self.store_key] = data_conversion.create_spark_df( spark, data, schema=self.schema, process_methods=self._process_methods, **self.kwargs) return StatusCode.Success
def test_esk404(self): """Test Esk-404: Workspace create PDF, simulate, fit, plot""" # run Eskapade self.eskapade_run( resources.tutorial( 'esk404_workspace_createpdf_simulate_fit_plot.py')) ds = process_manager.service(DataStore) ws = process_manager.service(RooFitManager).ws # data-generation checks self.assertIn('n_df_simdata', ds) self.assertEqual(1000, ds['n_df_simdata']) # roofit objects check in datastore self.assertIn('fit_result', ds) self.assertIsInstance(ds['fit_result'], ROOT.RooFitResult) # successful fit result fit_result = ds['fit_result'] self.assertEqual(0, fit_result.status()) self.assertEqual(3, fit_result.covQual()) self.assertIn('simdata', ds) self.assertIsInstance(ds['simdata'], ROOT.RooDataSet) self.assertIn('simdata_plot', ds) self.assertIsInstance(ds['simdata_plot'], ROOT.RooPlot) # roofit objects check in workspace self.assertIn('model', ws) self.assertIn('bkg', ws) self.assertIn('sig', ws)
def test_esk301(self): settings = process_manager.service(ConfigObject) settings['batchMode'] = True self.eskapade_run(resources.tutorial('esk301_dfsummary_plotter.py')) settings = process_manager.service(ConfigObject) ds = process_manager.service(DataStore) columns = ['var_a', 'var_b', 'var_c'] # data-generation checks self.assertIn('data', ds) self.assertIsInstance(ds['data'], pd.DataFrame) self.assertListEqual(list(ds['data'].columns), columns) self.assertEqual(10000, len(ds['data'])) # data-summary checks file_names = ['report.tex'] + ['hist_{}.pdf'.format(col) for col in columns] for fname in file_names: path = '{0:s}/{1:s}/data/v0/report/{2:s}'.format(settings['resultsDir'], settings['analysisName'], fname) self.assertTrue(os.path.exists(path)) statinfo = os.stat(path) self.assertTrue(statinfo.st_size > 0)
def test_esk607(self): """Test Esk-607: Add column to Spark dataframe.""" # check if running in local mode sc = process_manager.service(SparkManager).get_session().sparkContext self.assertRegex( sc.getConf().get('spark.master', ''), 'local\[[.*]\]', 'Spark not running in local mode, required for testing with local files' ) # run Eskapade self.eskapade_run(resources.tutorial('esk607_spark_with_column.py')) ds = process_manager.service(DataStore) # check data frame self.assertIn('new_spark_df', ds, 'no object with key "new_spark_df" in data store') self.assertIsInstance(ds['new_spark_df'], pyspark.sql.DataFrame, '"new_spark_df" is not a Spark data frame') self.assertEqual(ds['new_spark_df'].count(), 5, 'unexpected number of rows in filtered data frame') self.assertListEqual( ds['new_spark_df'].columns, ['dummy', 'date', 'loc', 'x', 'y', 'pow_xy1', 'pow_xy2'], 'unexpected columns in data frame') self.assertSetEqual( set(tuple(r) for r in ds['new_spark_df'].collect()), set([('bla', 20090103, 'c', 5, 7, 78125.0, 78125.0), ('bal', 20090102, 'b', 3, 8, 6561.0, 6561.0), ('flo', 20090104, 'e', 3, 5, 243.0, 243.0), ('bar', 20090101, 'a', 1, 9, 1.0, 1.0), ('foo', 20090104, 'd', 1, 6, 1.0, 1.0)]), 'unexpected values in columns')
def execute(self): """Execute the link.""" # get process manager and data store ds = process_manager.service(DataStore) # check if data frame exists in data store if self.read_key not in ds: err_msg = 'No input data found in data store with key "{}".'.format( self.read_key) if not self.fail_missing_data: self.logger.error(err_msg.capitalize()) return StatusCode.Success raise KeyError(err_msg) # fetch data from data store data = ds[self.read_key] if not isinstance(data, pyspark.sql.DataFrame): spark = process_manager.service(SparkManager).get_session() self.logger.debug( 'Converting data of type "{type}" to a Spark data frame.', type=type(data)) data = data_conversion.create_spark_df(spark, data, schema=self.schema) # create data-frame writer with requested number of partitions/output files df_writer = data.repartition(self.num_files).write # call data-frame writer methods apply_transform_funcs(df_writer, self._write_methods) return StatusCode.Success
def test_esk408(self): """Test Esk-408: Classification error propagation after fit.""" # run Eskapade self.eskapade_run( resources.tutorial( 'esk408_classification_error_propagation_after_fit.py')) ds = process_manager.service(DataStore) ws = process_manager.service(RooFitManager).ws # data-generation checks self.assertIn('n_df_pvalues', ds) self.assertEqual(500, ds['n_df_pvalues']) self.assertIn('df_pvalues', ds) self.assertIsInstance(ds['df_pvalues'], pd.DataFrame) df = ds['df_pvalues'] self.assertTrue('high_risk_pvalue' in df.columns) self.assertTrue('high_risk_perror' in df.columns) # roofit objects check in workspace fit_result = ws.obj('fit_result') self.assertFalse(not fit_result) self.assertIsInstance(fit_result, ROOT.RooFitResult) # test for successful fit result self.assertEqual(0, fit_result.status()) self.assertEqual(3, fit_result.covQual()) frac = ws.var('frac') self.assertFalse(not frac) self.assertTrue(frac.getVal() > 0) self.assertTrue(frac.getError() > 0)
def execute(self): """Execute the link.""" ds = process_manager.service(DataStore) if self.into_ws: ws = process_manager.service(RooFitManager).ws for key in self.keys: obj = self.in_file.Get(key) if not obj: self.logger.warning( 'Object with key "{key}" not found in "{path}"; skipping.', key=key, path=self.path) continue # a. put object into the workspace if self.into_ws: try: ws[key] = obj except BaseException: raise RuntimeError( 'Could not import object "{}" into workspace.'.format( key)) # b. put object into datastore else: ds[key] = obj return StatusCode.Success
def test_esk602(self): """Test Esk-602: Read CSV files into a Spark data frame.""" # check if running in local mode sc = process_manager.service(SparkManager).get_session().sparkContext self.assertRegex( sc.getConf().get('spark.master', ''), 'local\[[.*]\]', 'Spark not running in local mode, required for testing with local files' ) # run Eskapade self.eskapade_run(resources.tutorial('esk602_read_csv_to_spark_df.py')) ds = process_manager.service(DataStore) # check data frame self.assertIn('spark_df', ds, 'no object with key "spark_df" in data store') self.assertIsInstance(ds['spark_df'], pyspark.sql.DataFrame, '"spark_df" is not a Spark data frame') self.assertEqual(ds['spark_df'].rdd.getNumPartitions(), 5, 'unexpected number of partitions in data frame') self.assertEqual(ds['spark_df'].count(), 12, 'unexpected number of rows in data frame') self.assertListEqual(ds['spark_df'].columns, ['date', 'loc', 'x', 'y'], 'unexpected columns in data frame') self.assertSetEqual( set((r['date'], r['loc']) for r in ds['spark_df'].collect()), set([(20090101, 'a'), (20090102, 'b'), (20090103, 'c'), (20090104, 'd'), (20090104, 'e'), (20090106, 'a'), (20090107, 'b'), (20090107, 'c'), (20090107, 'd'), (20090108, 'e'), (20090109, 'e'), (20090109, 'f')]), 'unexpected values in date/loc columns')
def execute(self): """Execute the link.""" self.logger.debug( 'Applying following SQL-query to object(s) in DataStore: {query:s}.', query=self.query) ds = process_manager.service(DataStore) # register all objects in DataStore as SQL temporary views for key in ds: spark_df = ds[key] spark_df.createOrReplaceTempView(key) # get existing SparkSession spark = process_manager.service(SparkManager).get_session() # apply SQL-query to temporary view(s) result = spark.sql(self.query) # store dataframe schema self.schema = result.schema # convert to different data format if required if self.output_format == 'rdd': # convert to RDD of tuples result = result.rdd.map(tuple) elif self.output_format == 'pd': # convert to Pandas dataframe result = result.toPandas() ds[self.store_key] = result return StatusCode.Success
def execute(self): """Execute the link.""" # get process manager and services ds = process_manager.service(DataStore) rfm = process_manager.service(RooFitManager) # get PDF from RooFitManager model = rfm.model(self.model_name) # check if dataset with upper bounds exists in data store if self.max_var_data_key not in ds: self.logger.warning( 'No range upper-bound data in data store; generating {n:d} dummy bounds', n=NUM_DUMMY_EVENTS) ds[self.max_var_data_key] = gen_max_var_data(model) # get max-var data max_var_data = ds.get(self.max_var_data_key) if not isinstance(max_var_data, ROOT.RooAbsData): raise TypeError('data with key "{}" are not RooFit data'.format( self.read_key)) # select max-var data mv_sel_data = sel_max_var_data(model, max_var_data, self.event_frac) # generate data proto_arg = RooFit.ProtoData(mv_sel_data, False, False) data = model.pdf.generate(model.var_set, proto_arg, *self._gen_cmd_args.values()) ds[self.store_key] = data return StatusCode.Success
def test_esk108reduce(self): settings = process_manager.service(ConfigObject) settings['TESTING'] = True self.eskapade_run(resources.tutorial('esk108_reduce.py')) ds = process_manager.service(DataStore) self.assertEqual(20, ds['n_products'])
def test_esk110(self): self.eskapade_run(resources.tutorial('esk110_code_profiling.py')) settings = process_manager.service(ConfigObject) ds = process_manager.service(DataStore) self.assertEqual(0, len(process_manager)) self.assertEqual(0, len(ds)) self.assertTrue('doCodeProfiling' in settings) self.assertEqual('cumulative', settings['doCodeProfiling'])
def setUp(self): """Setup test environment""" settings = process_manager.service(ConfigObject) settings['analysisName'] = 'DataConversionTest' # ensure local testing spark_settings = [('spark.app.name', settings['analysisName']), ('spark.master', 'local[*]'), ('spark.driver.host', 'localhost')] process_manager.service(SparkManager).create_session( eskapade_settings=settings, spark_settings=spark_settings)
def setUp(self): """Set up test.""" TutorialMacrosTest.setUp(self) settings = process_manager.service(ConfigObject) settings['analysisName'] = 'SparkAnalysisTutorialMacrosTest' # ensure local testing spark_settings = [('spark.app.name', settings['analysisName']), ('spark.master', 'local[*]'), ('spark.driver.host', 'localhost')] process_manager.service(SparkManager).create_session( eskapade_settings=settings, spark_settings=spark_settings)
def execute(self): """Execute the link. Reads the input file(s) and puts the dataframe in the datastore. """ ds = process_manager.service(DataStore) settings = process_manager.service(ConfigObject) # 0. when in fork mode, need to reconfigure paths read out. lock ensures it's only done once. if settings.get('fork', False): self.configure_paths(lock=True) # 1. handle first the case of no iteration. Concatenate into one dataframe. if not self._iterate: self.logger.debug('Reading datasets from files [{files}]', files=', '.join('"{}"'.format(p) for p in self._paths)) df = pd.concat( set_reader(p, self.reader, **self.kwargs) for p in self._paths) numentries = len(df.index) # 2. handle case where iteration has been turned on else: # try picking up new dataset from iterator df = next(self) while self.latest_data_length() == 0 and not self.is_finished(): df = next(self) # at end of loop if self.latest_data_length() == 0: assert self.is_finished( ), 'Got empty dataset but not at end of iterator.' # at end of loop; skip rest of chain execution (but do perform finalize) return StatusCode.BreakChain # do we have more datasets to go? # pass this information to the (possible) repeater at the end of chain reqstr = 'chainRepeatRequestBy_' + self.name settings[reqstr] = not self.is_finished() numentries = self.latest_data_length() sumentries = self.sum_data_length() self.logger.info( 'Read next <{n:d}> records; summing up to <{sum_n:d}>.', n=numentries, sum_n=sumentries) ds['n_sum_' + self.key] = sumentries # store dataframe and number of entries ds[self.key] = df ds['n_' + self.key] = numentries return StatusCode.Success
def execute(self): """Execute the link.""" # create data-frame reader spark = process_manager.service(SparkManager).get_session() data = spark.read # call data-frame reader methods data = apply_transform_funcs(data, self._read_methods) # store data in data store process_manager.service(DataStore)[self.store_key] = data return StatusCode.Success
def execute(self): """Execute the link. :returns: status code of execution :rtype: StatusCode """ settings = process_manager.service(ConfigObject) ds = process_manager.service(DataStore) # --- your algorithm code goes here self.logger.debug("Now executing link: {link}.", link=self.name) return StatusCode.Success
def test_esk202(self): self.eskapade_run(resources.tutorial('esk202_writedata.py')) settings = process_manager.service(ConfigObject) ds = process_manager.service(DataStore) self.assertEqual(36, ds['n_test']) path = settings['resultsDir'] + '/' + settings['analysisName'] + '/data/v0/tmp3.csv' self.assertTrue(os.path.exists(path)) # check file is non-empty statinfo = os.stat(path) self.assertTrue(statinfo.st_size > 0)
def test_esk106(self): settings = process_manager.service(ConfigObject) # fake a setting from the cmd-line. picked up in the macro settings['do_chain0'] = False self.eskapade_run(resources.tutorial('esk106_cmdline_options.py')) settings = process_manager.service(ConfigObject) self.assertEqual(1, len(process_manager)) self.assertEqual('Chain1', list(process_manager)[0].name) self.assertEqual(False, settings.get('do_chain0', True)) self.assertEqual(True, settings.get('do_chain1', True)) self.assertEqual('Universe', list(list(process_manager)[0])[0].hello)
def setUp(self): test_df = pd.DataFrame({ 'dt': [ '2017-01-01 12:00:00', '2017-01-01 13:00:00', '2017-01-03 12:45:23' ], 'a': [1, 2, 5], 'b': [1, 2, 1] }) settings = process_manager.service(ConfigObject) spark = process_manager.service(SparkManager).create_session( eskapade_settings=settings) ds = process_manager.service(DataStore) ds['test_input'] = spark.createDataFrame(test_df)
def test_spark_setup(self): """Test if Spark setup is working properly""" settings = process_manager.service(ConfigObject) settings['analysisName'] = 'spark_setup' sm = process_manager.service(SparkManager) spark = sm.create_session(eskapade_settings=settings) df = spark.createDataFrame([(0, 'foo'), (1, 'bar')], ['id', 'value']) self.assertSetEqual(set(tuple(r) for r in df.collect()), set([(0, 'foo'), (1, 'bar')]), 'unexpected values in columns') sm.finish()
def test_esk411(self): """Test Esk-411: Predictive maintenance Weibull fit.""" # run Eskapade macro = resources.tutorial('esk411_weibull_predictive_maintenance.py') self.eskapade_run(macro) ds = process_manager.service(DataStore) ws = process_manager.service(RooFitManager).ws # roofit objects check in datastore self.assertIn('fit_result', ds) self.assertIsInstance(ds['fit_result'], ROOT.RooFitResult) # roofit objects check in workspace self.assertIn('binnedData', ds) self.assertIsInstance(ds['binnedData'], ROOT.RooDataHist) mdata = ds['binnedData'] self.assertTrue(mdata) self.assertEqual(300, mdata.numEntries()) mpdf = ws.pdf('sum3pdf') self.assertTrue(mpdf) # successful fit result fit_result = ds['fit_result'] self.assertEqual(0, fit_result.status()) self.assertEqual(3, fit_result.covQual()) n1 = ws.var('N1') self.assertTrue(n1) self.assertGreater(n1.getVal(), 2.e5) n2 = ws.var('N2') self.assertTrue(n2) self.assertGreater(n2.getVal(), 4.e5) n3 = ws.var('N3') self.assertTrue(n3) self.assertGreater(n3.getVal(), 5.e4) # data-summary checks file_names = [ 'weibull_fit_report.tex', 'correlation_matrix_fit_result.pdf', 'floating_pars_fit_result.tex', 'fit_of_time_difference_medium_range.pdf' ] for fname in file_names: path = persistence.io_path('results_data', 'report/{}'.format(fname)) self.assertTrue(os.path.exists(path)) statinfo = os.stat(path) self.assertGreater(statinfo.st_size, 0)
def test_esk406(self): """Test Esk-406: Simulation based on unbinned data""" # run Eskapade macro = resources.tutorial( 'esk406_simulation_based_on_unbinned_data.py') self.eskapade_run(macro) ds = process_manager.service(DataStore) # data-generation checks self.assertIn('n_correlated_data', ds) self.assertEqual(500, ds['n_correlated_data']) self.assertIn('n_rds_correlated_data', ds) self.assertEqual(500, ds['n_rds_correlated_data']) self.assertIn('n_df_simdata', ds) self.assertEqual(5000, ds['n_df_simdata']) self.assertIn('df_simdata', ds) self.assertIsInstance(ds['df_simdata'], pd.DataFrame) self.assertIn('hist', ds) self.assertIsInstance(ds['hist'], dict) # roofit objects check self.assertIn('keys_varset', ds) self.assertIsInstance(ds['keys_varset'], ROOT.RooArgSet) self.assertEqual(2, len(ds['keys_varset'])) self.assertIn('rds_correlated_data', ds) self.assertIsInstance(ds['rds_correlated_data'], ROOT.RooDataSet) self.assertIn('simdata', ds) self.assertIsInstance(ds['simdata'], ROOT.RooDataSet)
def initialize(self): """Initialize the link.""" # check input arguments self.check_arg_types(store_key=str, max_var_data_key=str, model_name=str, event_frac=float) self.check_arg_vals('store_key', 'max_var_data_key', 'model_name', 'event_frac') # check if model exists rfm = process_manager.service(RooFitManager) model = rfm.model(self.model_name) if not model: self.logger.warning( 'Model "{model}" does not exist; creating with default values.', model=self.model_name) model = rfm.model(self.model_name, model_cls=TruncExponential) # check if model PDF has been built if not model.is_built: model.build_model() # process command arguments for generate function self._gen_cmd_args = create_roofit_opts(create_linked_list=False, **self.kwargs) return StatusCode.Success
def test_esk402(self): """Test Esk-402: RooDataHist fill""" # run Eskapade self.eskapade_run(resources.tutorial('esk402_roodatahist_fill.py')) ds = process_manager.service(DataStore) # data-generation checks self.assertIn('n_accounts', ds) self.assertEqual(650, ds['n_accounts']) self.assertIn('n_rdh_accounts', ds) self.assertEqual(650, ds['n_rdh_accounts']) self.assertIn('to_factorized', ds) self.assertIsInstance(ds['to_factorized'], dict) self.assertIn('to_original', ds) self.assertIsInstance(ds['to_original'], dict) self.assertIn('map_rdh_accounts_to_original', ds) self.assertIsInstance(ds['map_rdh_accounts_to_original'], dict) # roofit objects check self.assertIn('accounts_catset', ds) self.assertIsInstance(ds['accounts_catset'], ROOT.RooArgSet) self.assertEqual(2, len(ds['accounts_catset'])) self.assertIn('accounts_varset', ds) self.assertIsInstance(ds['accounts_varset'], ROOT.RooArgSet) self.assertEqual(6, len(ds['accounts_varset'])) self.assertIn('rdh_accounts', ds) self.assertIsInstance(ds['rdh_accounts'], ROOT.RooDataHist)
def test_esk409(self): """Test Esk-409: Unredeemed vouchers.""" # run Eskapade macro = resources.tutorial('esk409_unredeemed_vouchers.py') self.eskapade_run(macro) ds = process_manager.service(DataStore) # check generated data self.assertIn('voucher_redeems', ds) self.assertIn('voucher_ages', ds) self.assertIsInstance(ds['voucher_redeems'], ROOT.RooDataSet) self.assertIsInstance(ds['voucher_ages'], ROOT.RooDataSet) self.assertLess(ds['voucher_redeems'].numEntries(), 6000) self.assertGreater(ds['voucher_redeems'].numEntries(), 0) self.assertEqual(ds['voucher_ages'].numEntries(), 10000) # check fit result fit_link = process_manager.get('Fitting').get('Fit') self.assertEqual(fit_link.fit_result.status(), 0) n_ev_pull = (fit_link.results['n_ev'][0] - 6000.) / fit_link.results['n_ev'][1] self.assertGreater(n_ev_pull, -3.) self.assertLess(n_ev_pull, 3.) # check plot output plot_path = persistence.io_path('results_data', 'voucher_redeem.pdf') self.assertTrue(os.path.exists(plot_path)) statinfo = os.stat(plot_path) self.assertGreater(statinfo.st_size, 0)