示例#1
0
    def test_esk205(self):
        self.eskapade_run(resources.tutorial('esk205_concatenate_pandas_dfs.py'))

        ds = process_manager.service(DataStore)

        self.assertTrue('outgoing' in ds)
        self.assertEqual(ds['n_outgoing'], 12)
示例#2
0
    def test_esk101(self):
        self.eskapade_run(resources.tutorial('esk101_helloworld.py'))

        settings = process_manager.service(ConfigObject)

        self.assertTrue(settings['do_hello'])
        self.assertEqual(2, settings['n_repeat'])
示例#3
0
    def test_esk409(self):
        """Test Esk-409: Unredeemed vouchers."""
        # run Eskapade
        macro = resources.tutorial('esk409_unredeemed_vouchers.py')
        self.eskapade_run(macro)
        ds = process_manager.service(DataStore)

        # check generated data
        self.assertIn('voucher_redeems', ds)
        self.assertIn('voucher_ages', ds)
        self.assertIsInstance(ds['voucher_redeems'], ROOT.RooDataSet)
        self.assertIsInstance(ds['voucher_ages'], ROOT.RooDataSet)
        self.assertLess(ds['voucher_redeems'].numEntries(), 6000)
        self.assertGreater(ds['voucher_redeems'].numEntries(), 0)
        self.assertEqual(ds['voucher_ages'].numEntries(), 10000)

        # check fit result
        fit_link = process_manager.get('Fitting').get('Fit')
        self.assertEqual(fit_link.fit_result.status(), 0)
        n_ev_pull = (fit_link.results['n_ev'][0] -
                     6000.) / fit_link.results['n_ev'][1]
        self.assertGreater(n_ev_pull, -3.)
        self.assertLess(n_ev_pull, 3.)

        # check plot output
        plot_path = persistence.io_path('results_data', 'voucher_redeem.pdf')
        self.assertTrue(os.path.exists(plot_path))
        statinfo = os.stat(plot_path)
        self.assertGreater(statinfo.st_size, 0)
示例#4
0
    def test_esk609(self):
        """Test Esk-609: Map data-frame groups."""
        # run Eskapade
        self.eskapade_run(resources.tutorial('esk609_map_df_groups.py'))
        ds = process_manager.service(DataStore)

        # check input data
        for key in ('map_rdd', 'flat_map_rdd'):
            self.assertIn(key, ds, 'no data found with key "{}"'.format(key))
            self.assertIsInstance(
                ds[key], pyspark.RDD,
                'object "{0:s}" is not an RDD (type "{1!s}")'.format(
                    key, type(ds[key])))

        # sums of "bar" variable
        bar_sums = [(0, 27.5), (1, 77.5), (2, 127.5), (3, 177.5), (4, 227.5),
                    (5, 277.5), (6, 327.5), (7, 377.5), (8, 427.5), (9, 477.5)]
        flmap_rows = [(it, 'foo{:d}'.format(it), (it + 1) / 2.,
                       bar_sums[it // 10][1]) for it in range(100)]

        # check mapped data frames
        self.assertListEqual(sorted(ds['map_rdd'].collect()), bar_sums,
                             'unexpected values in "map_rdd"')
        self.assertListEqual(sorted(ds['flat_map_rdd'].collect()), flmap_rows,
                             'unexpected values in "flat_map_rdd"')
示例#5
0
    def test_esk607(self):
        """Test Esk-607: Add column to Spark dataframe."""
        # check if running in local mode
        sc = process_manager.service(SparkManager).get_session().sparkContext
        self.assertRegex(
            sc.getConf().get('spark.master', ''), 'local\[[.*]\]',
            'Spark not running in local mode, required for testing with local files'
        )

        # run Eskapade
        self.eskapade_run(resources.tutorial('esk607_spark_with_column.py'))
        ds = process_manager.service(DataStore)

        # check data frame
        self.assertIn('new_spark_df', ds,
                      'no object with key "new_spark_df" in data store')
        self.assertIsInstance(ds['new_spark_df'], pyspark.sql.DataFrame,
                              '"new_spark_df" is not a Spark data frame')
        self.assertEqual(ds['new_spark_df'].count(), 5,
                         'unexpected number of rows in filtered data frame')
        self.assertListEqual(
            ds['new_spark_df'].columns,
            ['dummy', 'date', 'loc', 'x', 'y', 'pow_xy1', 'pow_xy2'],
            'unexpected columns in data frame')
        self.assertSetEqual(
            set(tuple(r) for r in ds['new_spark_df'].collect()),
            set([('bla', 20090103, 'c', 5, 7, 78125.0, 78125.0),
                 ('bal', 20090102, 'b', 3, 8, 6561.0, 6561.0),
                 ('flo', 20090104, 'e', 3, 5, 243.0, 243.0),
                 ('bar', 20090101, 'a', 1, 9, 1.0, 1.0),
                 ('foo', 20090104, 'd', 1, 6, 1.0, 1.0)]),
            'unexpected values in columns')
示例#6
0
    def test_esk605(self):
        """Test Esk-605: Create Spark data frame."""
        # run Eskapade
        self.eskapade_run(resources.tutorial('esk605_create_spark_df.py'))
        ds = process_manager.service(DataStore)

        # check created data frames
        cols = (StructField('index',
                            LongType()), StructField('foo', StringType()),
                StructField('bar', DoubleType()))
        rows = [(it, 'foo{:d}'.format(it), (it + 1) / 2.)
                for it in range(20, 100)]
        for key in ('rows_df', 'rdd_df', 'df_df', 'pd_df'):
            self.assertIn(key, ds,
                          'no object with key {} in data store'.format(key))
            df = ds[key]
            self.assertIsInstance(
                df, pyspark.sql.DataFrame,
                'object with key {0:s} is not a data frame (type {1!s})'.
                format(key, type(df)))
            self.assertTupleEqual(
                tuple(df.schema), cols,
                'unexpected data-frame schema for {}'.format(key))
            self.assertListEqual(
                sorted(tuple(r) for r in df.collect()), rows,
                'unexpected data-frame content for {}'.format(key))
            self.assertTrue(df.is_cached,
                            'data frame {} is not cached'.format(key))
            self.assertLessEqual(
                df.rdd.getNumPartitions(), 2,
                'unexpected number of data-frame partitions for {}'.format(
                    key))
示例#7
0
    def test_esk604(self):
        """Test Esk-604: Execute Spark-SQL query."""
        # check if running in local mode
        sc = process_manager.service(SparkManager).get_session().sparkContext
        self.assertRegex(
            sc.getConf().get('spark.master', ''), 'local\[[.*]\]',
            'Spark not running in local mode, required for testing with local files'
        )

        # run Eskapade
        self.eskapade_run(resources.tutorial('esk604_spark_execute_query.py'))
        ds = process_manager.service(DataStore)

        # check data frame
        self.assertIn('spark_df_sql', ds,
                      'no object with key "spark_df_sql" in data store')
        self.assertIsInstance(ds['spark_df_sql'], pyspark.sql.DataFrame,
                              '"spark_df_sql" is not a Spark data frame')
        self.assertEqual(ds['spark_df_sql'].count(), 4,
                         'unexpected number of rows in filtered data frame')
        self.assertListEqual(ds['spark_df_sql'].columns,
                             ['loc', 'sumx', 'sumy'],
                             'unexpected columns in data frame')
        self.assertEqual(
            ds['spark_df_sql'].schema,
            process_manager.get('ApplySQL').get('SparkSQL').schema,
            'schema of data frame does not correspond to schema stored in link'
        )
        self.assertSetEqual(
            set(tuple(r) for r in ds['spark_df_sql'].collect()),
            set([('e', 10, 15), ('d', 2, 11), ('b', 6, 16), ('a', 2, 18)]),
            'unexpected values in loc/sumx/sumy columns')
示例#8
0
    def test_esk602(self):
        """Test Esk-602: Read CSV files into a Spark data frame."""
        # check if running in local mode
        sc = process_manager.service(SparkManager).get_session().sparkContext
        self.assertRegex(
            sc.getConf().get('spark.master', ''), 'local\[[.*]\]',
            'Spark not running in local mode, required for testing with local files'
        )

        # run Eskapade
        self.eskapade_run(resources.tutorial('esk602_read_csv_to_spark_df.py'))
        ds = process_manager.service(DataStore)

        # check data frame
        self.assertIn('spark_df', ds,
                      'no object with key "spark_df" in data store')
        self.assertIsInstance(ds['spark_df'], pyspark.sql.DataFrame,
                              '"spark_df" is not a Spark data frame')
        self.assertEqual(ds['spark_df'].rdd.getNumPartitions(), 5,
                         'unexpected number of partitions in data frame')
        self.assertEqual(ds['spark_df'].count(), 12,
                         'unexpected number of rows in data frame')
        self.assertListEqual(ds['spark_df'].columns, ['date', 'loc', 'x', 'y'],
                             'unexpected columns in data frame')
        self.assertSetEqual(
            set((r['date'], r['loc']) for r in ds['spark_df'].collect()),
            set([(20090101, 'a'), (20090102, 'b'), (20090103, 'c'),
                 (20090104, 'd'), (20090104, 'e'), (20090106, 'a'),
                 (20090107, 'b'), (20090107, 'c'), (20090107, 'd'),
                 (20090108, 'e'), (20090109, 'e'), (20090109, 'f')]),
            'unexpected values in date/loc columns')
示例#9
0
    def test_esk402(self):
        """Test Esk-402: RooDataHist fill"""

        # run Eskapade
        self.eskapade_run(resources.tutorial('esk402_roodatahist_fill.py'))
        ds = process_manager.service(DataStore)

        # data-generation checks
        self.assertIn('n_accounts', ds)
        self.assertEqual(650, ds['n_accounts'])
        self.assertIn('n_rdh_accounts', ds)
        self.assertEqual(650, ds['n_rdh_accounts'])
        self.assertIn('to_factorized', ds)
        self.assertIsInstance(ds['to_factorized'], dict)
        self.assertIn('to_original', ds)
        self.assertIsInstance(ds['to_original'], dict)
        self.assertIn('map_rdh_accounts_to_original', ds)
        self.assertIsInstance(ds['map_rdh_accounts_to_original'], dict)

        # roofit objects check
        self.assertIn('accounts_catset', ds)
        self.assertIsInstance(ds['accounts_catset'], ROOT.RooArgSet)
        self.assertEqual(2, len(ds['accounts_catset']))
        self.assertIn('accounts_varset', ds)
        self.assertIsInstance(ds['accounts_varset'], ROOT.RooArgSet)
        self.assertEqual(6, len(ds['accounts_varset']))
        self.assertIn('rdh_accounts', ds)
        self.assertIsInstance(ds['rdh_accounts'], ROOT.RooDataHist)
示例#10
0
    def test_esk407(self):
        """Test Esk-407: Classification unbiased fit estimate."""
        # run Eskapade
        macro = resources.tutorial(
            'esk407_classification_unbiased_fit_estimate.py')
        self.eskapade_run(macro)
        ds = process_manager.service(DataStore)
        ws = process_manager.service(RooFitManager).ws

        # roofit objects check in datastore
        self.assertIn('fit_result', ds)
        self.assertIsInstance(ds['fit_result'], ROOT.RooFitResult)

        # roofit objects check in workspace
        mdata = ws.data('data')
        self.assertFalse(not mdata)
        self.assertEqual(1000, mdata.numEntries())
        mpdf = ws.pdf('hist_model')
        self.assertFalse(not mpdf)

        # successful fit result
        fit_result = ds['fit_result']
        self.assertEqual(0, fit_result.status())
        self.assertEqual(3, fit_result.covQual())

        lo_risk = ws.var('N_low_risk')
        self.assertFalse(not lo_risk)
        self.assertTrue(lo_risk.getVal() < 1000)
        self.assertTrue(lo_risk.getError() > 0)
        hi_risk = ws.var('N_high_risk')
        self.assertFalse(not hi_risk)
        self.assertTrue(hi_risk.getVal() > 0)
        self.assertTrue(hi_risk.getError() > 0)
示例#11
0
    def test_esk211(self):
        self.eskapade_run(resources.tutorial('esk211_fork_read_data_itr.py'))

        ds = process_manager.service(DataStore)

        self.assertTrue('reduced_data' in ds)
        self.assertEqual(24, len(ds['reduced_data'].index))
示例#12
0
    def test_esk405(self):
        """Test Esk-405: Simulation based on binned data"""

        # run Eskapade
        self.eskapade_run(
            resources.tutorial('esk405_simulation_based_on_binned_data.py'))
        ds = process_manager.service(DataStore)
        ws = process_manager.service(RooFitManager).ws

        # data-generation checks
        self.assertIn('n_rdh_accounts', ds)
        self.assertEqual(650, ds['n_rdh_accounts'])

        # roofit objects check in workspace
        self.assertIn('hpdf_Ndim', ws)
        self.assertIn('rdh_accounts', ws)

        mcats = ws.set('rdh_cats')
        self.assertFalse(not mcats)
        self.assertEqual(1, len(mcats))
        mvars = ws.set('rdh_vars')
        self.assertFalse(not mvars)
        self.assertEqual(3, len(mvars))
        mdata = ws.data('rdh_accounts')
        self.assertEqual(650, mdata.sumEntries())
示例#13
0
    def test_esk406(self):
        """Test Esk-406: Simulation based on unbinned data"""

        # run Eskapade
        macro = resources.tutorial(
            'esk406_simulation_based_on_unbinned_data.py')
        self.eskapade_run(macro)
        ds = process_manager.service(DataStore)

        # data-generation checks
        self.assertIn('n_correlated_data', ds)
        self.assertEqual(500, ds['n_correlated_data'])
        self.assertIn('n_rds_correlated_data', ds)
        self.assertEqual(500, ds['n_rds_correlated_data'])
        self.assertIn('n_df_simdata', ds)
        self.assertEqual(5000, ds['n_df_simdata'])

        self.assertIn('df_simdata', ds)
        self.assertIsInstance(ds['df_simdata'], pd.DataFrame)
        self.assertIn('hist', ds)
        self.assertIsInstance(ds['hist'], dict)

        # roofit objects check
        self.assertIn('keys_varset', ds)
        self.assertIsInstance(ds['keys_varset'], ROOT.RooArgSet)
        self.assertEqual(2, len(ds['keys_varset']))
        self.assertIn('rds_correlated_data', ds)
        self.assertIsInstance(ds['rds_correlated_data'], ROOT.RooDataSet)
        self.assertIn('simdata', ds)
        self.assertIsInstance(ds['simdata'], ROOT.RooDataSet)
示例#14
0
    def test_esk301(self):
        settings = process_manager.service(ConfigObject)
        settings['batchMode'] = True

        self.eskapade_run(resources.tutorial('esk301_dfsummary_plotter.py'))

        settings = process_manager.service(ConfigObject)

        ds = process_manager.service(DataStore)

        columns = ['var_a', 'var_b', 'var_c']

        # data-generation checks
        self.assertIn('data', ds)
        self.assertIsInstance(ds['data'], pd.DataFrame)
        self.assertListEqual(list(ds['data'].columns), columns)
        self.assertEqual(10000, len(ds['data']))

        # data-summary checks
        file_names = ['report.tex'] + ['hist_{}.pdf'.format(col) for col in columns]
        for fname in file_names:
            path = '{0:s}/{1:s}/data/v0/report/{2:s}'.format(settings['resultsDir'], settings['analysisName'], fname)
            self.assertTrue(os.path.exists(path))
            statinfo = os.stat(path)
            self.assertTrue(statinfo.st_size > 0)
示例#15
0
    def test_esk404(self):
        """Test Esk-404: Workspace create PDF, simulate, fit, plot"""

        # run Eskapade
        self.eskapade_run(
            resources.tutorial(
                'esk404_workspace_createpdf_simulate_fit_plot.py'))
        ds = process_manager.service(DataStore)
        ws = process_manager.service(RooFitManager).ws

        # data-generation checks
        self.assertIn('n_df_simdata', ds)
        self.assertEqual(1000, ds['n_df_simdata'])

        # roofit objects check in datastore
        self.assertIn('fit_result', ds)
        self.assertIsInstance(ds['fit_result'], ROOT.RooFitResult)

        # successful fit result
        fit_result = ds['fit_result']
        self.assertEqual(0, fit_result.status())
        self.assertEqual(3, fit_result.covQual())

        self.assertIn('simdata', ds)
        self.assertIsInstance(ds['simdata'], ROOT.RooDataSet)
        self.assertIn('simdata_plot', ds)
        self.assertIsInstance(ds['simdata_plot'], ROOT.RooPlot)

        # roofit objects check in workspace
        self.assertIn('model', ws)
        self.assertIn('bkg', ws)
        self.assertIn('sig', ws)
示例#16
0
    def test_esk303(self):
        settings = process_manager.service(ConfigObject)
        settings['batchMode'] = True

        self.eskapade_run(resources.tutorial('esk303_hgr_filler_plotter.py'))

        settings = process_manager.service(ConfigObject)
        ds = process_manager.service(DataStore)

        # data-generation checks
        self.assertIn('n_sum_rc', ds)
        self.assertEqual(650, ds['n_sum_rc'])
        self.assertIn('hist', ds)
        self.assertIsInstance(ds['hist'], dict)
        col_names = ['date', 'isActive', 'age', 'eyeColor', 'gender', 'company', 'latitude', 'longitude',
                     'isActive:age', 'latitude:longitude']
        self.assertListEqual(sorted(ds['hist'].keys()), sorted(col_names))

        # data-summary checks
        f_bases = ['date', 'isActive', 'age', 'eyeColor', 'gender', 'company', 'latitude', 'longitude',
                   'latitude_vs_longitude']
        file_names = ['report.tex'] + ['hist_{}.pdf'.format(col) for col in f_bases]
        for fname in file_names:
            path = '{0:s}/{1:s}/data/v0/report/{2:s}'.format(settings['resultsDir'], settings['analysisName'], fname)
            self.assertTrue(os.path.exists(path))
            statinfo = os.stat(path)
            self.assertTrue(statinfo.st_size > 0)
示例#17
0
    def test_esk408(self):
        """Test Esk-408: Classification error propagation after fit."""
        # run Eskapade
        self.eskapade_run(
            resources.tutorial(
                'esk408_classification_error_propagation_after_fit.py'))
        ds = process_manager.service(DataStore)
        ws = process_manager.service(RooFitManager).ws

        # data-generation checks
        self.assertIn('n_df_pvalues', ds)
        self.assertEqual(500, ds['n_df_pvalues'])
        self.assertIn('df_pvalues', ds)
        self.assertIsInstance(ds['df_pvalues'], pd.DataFrame)
        df = ds['df_pvalues']
        self.assertTrue('high_risk_pvalue' in df.columns)
        self.assertTrue('high_risk_perror' in df.columns)

        # roofit objects check in workspace
        fit_result = ws.obj('fit_result')
        self.assertFalse(not fit_result)
        self.assertIsInstance(fit_result, ROOT.RooFitResult)
        # test for successful fit result
        self.assertEqual(0, fit_result.status())
        self.assertEqual(3, fit_result.covQual())

        frac = ws.var('frac')
        self.assertFalse(not frac)
        self.assertTrue(frac.getVal() > 0)
        self.assertTrue(frac.getError() > 0)
示例#18
0
    def test_esk105a(self):
        self.eskapade_run(resources.tutorial('esk105_A_dont_store_results.py'))

        settings = process_manager.service(ConfigObject)
        path = settings['resultsDir'] + '/' + settings['analysisName']

        self.assertFalse(os.path.exists(path))
示例#19
0
    def test_esk104(self):
        self.eskapade_run(
            resources.tutorial('esk104_basic_datastore_operations.py'))

        ds = process_manager.service(DataStore)

        self.assertEqual(1, len(ds))
        self.assertEqual(1, ds['a'])
示例#20
0
    def test_esk108reduce(self):
        settings = process_manager.service(ConfigObject)
        settings['TESTING'] = True
        self.eskapade_run(resources.tutorial('esk108_reduce.py'))

        ds = process_manager.service(DataStore)

        self.assertEqual(20, ds['n_products'])
示例#21
0
    def test_esk107(self):
        self.eskapade_run(resources.tutorial('esk107_chain_looper.py'))

        ds = process_manager.service(DataStore)

        # chain is repeated 10 times, with nothing put in datastore
        self.assertEqual(0, len(ds))
        self.assertEqual(10, list(list(process_manager)[0])[1].maxcount)
示例#22
0
    def test_esk702_only_ordered(self):

        # run Eskapade

        self.eskapade_run(
            resources.tutorial('esk702_mimic_data_only_unordered.py'))
        ds = process_manager.service(DataStore)

        # -- make sure all was saved to the data store
        self.assertIn('df', ds)
        self.assertIn('ids', ds)
        self.assertIn('maps', ds)
        self.assertIn('new_column_order', ds)
        self.assertIn('qts', ds)
        self.assertIn('data', ds)
        self.assertIn('data_smoothed', ds)
        self.assertIn('data_no_nans', ds)
        self.assertIn('data_normalized', ds)
        self.assertIn('unordered_categorical_i', ds)
        self.assertIn('ordered_categorical_i', ds)
        self.assertIn('continuous_i', ds)
        self.assertIn('bw', ds)
        self.assertIn('data_resample', ds)
        self.assertIn('df_resample', ds)
        self.assertIn('chi2', ds)
        self.assertIn('p_value', ds)

        # -- make sure they're of the right type
        self.assertIsInstance(ds['df'], pd.DataFrame)
        self.assertIsInstance(ds['ids'], np.ndarray)
        self.assertIsInstance(ds['maps'], dict)
        self.assertIsInstance(ds['new_column_order'], list)
        self.assertIsInstance(ds['qts'], list)
        self.assertIsInstance(ds['data'], np.ndarray)
        self.assertIsInstance(ds['data_smoothed'], np.ndarray)
        self.assertIsInstance(ds['data_no_nans'], np.ndarray)
        self.assertIsInstance(ds['data_normalized'], np.ndarray)
        self.assertIsInstance(ds['unordered_categorical_i'], list)
        self.assertIsInstance(ds['ordered_categorical_i'], list)
        self.assertIsInstance(ds['continuous_i'], list)
        self.assertIsInstance(ds['bw'], np.ndarray)
        self.assertIsInstance(ds['data_resample'], np.ndarray)
        self.assertIsInstance(ds['df_resample'], pd.DataFrame)
        self.assertIsInstance(ds['chi2'], np.float64)
        self.assertIsInstance(ds['p_value'], np.float64)

        self.assertEqual(ds['df'].shape[1], 2)
        self.assertEqual(ds['data'].shape[1], 2)
        self.assertEqual(ds['data_smoothed'].shape[1], 2)
        self.assertEqual(ds['data_no_nans'].shape[1], 2)
        self.assertEqual(len(ds['data_normalized']), 0)
        self.assertEqual(len(ds['unordered_categorical_i']), 2)
        self.assertEqual(len(ds['ordered_categorical_i']), 0)
        self.assertEqual(len(ds['continuous_i']), 0)
        self.assertEqual(ds['bw'].shape[0], 2)
        self.assertEqual(ds['data_resample'].shape[1], 2)
        self.assertEqual(ds['df_resample'].shape[1], 3)
示例#23
0
    def test_esk103(self):
        self.eskapade_run(resources.tutorial('esk103_printdatastore.py'))

        ds = process_manager.service(DataStore)

        self.assertEqual('world', ds['hello'])
        self.assertEqual(1, ds['d']['a'])
        self.assertEqual(2, ds['d']['b'])
        self.assertEqual(3, ds['d']['c'])
示例#24
0
    def test_esk206(self):
        self.eskapade_run(resources.tutorial('esk206_merge_pandas_dfs.py'))

        ds = process_manager.service(DataStore)

        self.assertTrue('outgoing' in ds)
        df = ds['outgoing']
        self.assertEqual(len(df.index), 4)
        self.assertEqual(len(df.columns), 5)
示例#25
0
    def test_esk109(self):
        settings = process_manager.service(ConfigObject)
        # this flag turns off ipython embed link
        settings['TESTING'] = True

        self.eskapade_run(resources.tutorial('esk109_debugging_tips.py'),
                          StatusCode.Failure)

        self.assertTrue(isinstance(list(list(process_manager)[0])[2], Break))
示例#26
0
    def test_esk106_script(self, mock_argv):
        """Test Eskapade run with esk106 macro from script"""

        # get file paths
        settings = process_manager.service(ConfigObject)
        settings['analysisName'] = 'esk106_cmdline_options'
        settings_ = settings.copy()
        macro_path = resources.tutorial('esk106_cmdline_options.py')

        # mock command-line arguments
        args = []
        mock_argv.__getitem__ = lambda s, k: args.__getitem__(k)

        # base settings
        args_ = [macro_path, '-LDEBUG', '--batch-mode']
        settings_['macro'] = macro_path
        settings_['logLevel'] = LogLevel.DEBUG
        settings_['batchMode'] = True

        def do_run(name, args, args_, settings_, add_args, add_settings,
                   chains):
            # set arguments
            args.clear()
            args += args_ + add_args
            settings = settings_.copy()
            settings.update(add_settings)

            # run Eskapade
            process_manager.reset()
            entry_points.eskapade_run()
            settings_run = process_manager.service(ConfigObject)

            # check results
            self.assertListEqual(
                [c.name for c in process_manager.chains], chains,
                'unexpected chain names in "{}" test'.format(name))
            self.assertDictEqual(
                settings_run, settings,
                'unexpected settings in "{}" test'.format(name))

        # run both chains
        do_run(
            'both chains', args, args_, settings_,
            ['--store-all', '-cdo_chain0=True', '-cdo_chain1=True'],
            dict(storeResultsEachChain=True, do_chain0=True,
                 do_chain1=True), ['Chain0', 'Chain1'])

        # run only last chain by skipping the first
        do_run('skip first', args, args_, settings_,
               ['-bChain1', '-cdo_chain0=True', '-cdo_chain1=True'],
               dict(beginWithChain='Chain1', do_chain0=True,
                    do_chain1=True), ['Chain0', 'Chain1'])

        # run only last chain by not defining the first
        do_run('no first', args, args_, settings_,
               ['-cdo_chain0=False', '-cdo_chain1=True'],
               dict(do_chain0=False, do_chain1=True), ['Chain1'])
示例#27
0
    def test_esk203(self):
        self.eskapade_run(resources.tutorial('esk203_apply_func_to_pandas_df.py'))

        ds = process_manager.service(DataStore)

        self.assertTrue('transformed_data' in ds)
        df = ds['transformed_data']
        self.assertTrue('xx' in df.columns)
        self.assertTrue('yy' in df.columns)
示例#28
0
    def test_esk201(self):
        self.eskapade_run(resources.tutorial('esk201_readdata.py'))

        ds = process_manager.service(DataStore)

        self.assertTrue('test1' in ds)
        self.assertTrue('test2' in ds)
        self.assertEqual(12, ds['n_test1'])
        self.assertEqual(36, ds['n_test2'])
示例#29
0
    def test_esk102(self):
        self.eskapade_run(resources.tutorial('esk102_multiple_chains.py'))

        settings = process_manager.service(ConfigObject)

        self.assertTrue(settings['do_chain0'])
        self.assertTrue(settings['do_chain1'])
        self.assertTrue(settings['do_chain2'])
        self.assertEqual(3, len(process_manager))
示例#30
0
    def test_esk501(self):
        """Test Esk-501: fixing pandas dataframe"""

        # run Eskapade

        self.eskapade_run(resources.tutorial('esk501_fix_pandas_dataframe.py'))
        ds = process_manager.service(DataStore)

        self.assertIn('vrh', ds)
        self.assertIn('vrh_fix1', ds)
        self.assertIn('vrh_fix2', ds)
        self.assertIn('vrh_fix3', ds)

        self.assertIsInstance(ds['vrh'], pd.DataFrame)
        self.assertIsInstance(ds['vrh_fix1'], pd.DataFrame)
        self.assertIsInstance(ds['vrh_fix2'], pd.DataFrame)
        self.assertIsInstance(ds['vrh_fix3'], pd.DataFrame)

        self.assertEqual(len(ds['vrh'].index), 5)
        self.assertEqual(len(ds['vrh_fix1'].index), 5)
        self.assertEqual(len(ds['vrh_fix2'].index), 5)
        self.assertEqual(len(ds['vrh_fix3'].index), 5)

        self.assertIsInstance(ds['vrh']['B'].dtype, np.object)
        self.assertIsInstance(ds['vrh']['C'].dtype, np.object)
        self.assertIsInstance(ds['vrh']['D'].dtype.type(), np.float64)

        self.assertListEqual(ds['vrh']['A'].values.tolist(), [True, False, np.nan, np.nan, np.nan])
        self.assertListEqual(ds['vrh']['B'].values.tolist(), ['foo', 'bar', '3', np.nan, np.nan])
        self.assertListEqual(ds['vrh']['C'].values.tolist(), ['1.0', '2.0', 'bal', np.nan, np.nan])
        self.assertListEqual(ds['vrh']['D'].values.tolist()[:3], [1.0, 2.0, 3.0])
        self.assertListEqual(ds['vrh']['E'].values.tolist(), ['1', '2', 'bla', np.nan, np.nan])
        self.assertListEqual(ds['vrh']['F'].values.tolist(), ['1', '2.5', 'bar', np.nan, np.nan])
        self.assertListEqual(ds['vrh']['G'].values.tolist(), ['a', 'b', 'c', 'd', np.nan])
        self.assertListEqual(ds['vrh']['H'].values.tolist(), ['a', 'b', '1', '2', '3'])

        self.assertListEqual(ds['vrh_fix1']['A'].values.tolist()[:2], [1.0, 0.0])
        self.assertListEqual(ds['vrh_fix1']['B'].values.tolist(), ['foo', 'bar', '3', np.nan, np.nan])
        self.assertListEqual(ds['vrh_fix1']['C'].values.tolist()[:2], [1.0, 2.0])
        self.assertListEqual(ds['vrh_fix1']['D'].values.tolist()[:3], [1.0, 2.0, 3.0])
        self.assertListEqual(ds['vrh_fix1']['E'].values.tolist()[:2], [1, 2])
        self.assertListEqual(ds['vrh_fix1']['F'].values.tolist()[:3], ['1', '2.5', 'bar'])
        self.assertListEqual(ds['vrh_fix1']['G'].values.tolist(), ['a', 'b', 'c', 'd', np.nan])
        self.assertListEqual(ds['vrh_fix1']['H'].values.tolist()[2:5], [1.0, 2.0, 3.0])

        self.assertListEqual(ds['vrh_fix2']['B'].values.tolist()[2:3], [3])
        self.assertListEqual(ds['vrh_fix2']['C'].values.tolist(), ['1.0', '2.0', 'bal', np.nan, np.nan])

        self.assertListEqual(ds['vrh_fix3']['A'].values.tolist()[:2], [1.0, 0.0])
        self.assertListEqual(ds['vrh_fix3']['B'].values.tolist(), ['foo', 'bar', '3', 'not_a_str', 'not_a_str'])
        self.assertListEqual(ds['vrh_fix3']['C'].values.tolist()[:2], [1.0, 2.0])
        self.assertListEqual(ds['vrh_fix3']['D'].values.tolist()[:3], [1.0, 2.0, 3.0])
        self.assertListEqual(ds['vrh_fix3']['E'].values.tolist(), [1, 2, -999, -999, -999])
        self.assertListEqual(ds['vrh_fix3']['F'].values.tolist(), ['1', '2.5', 'bar', 'not_a_str', 'not_a_str'])
        self.assertListEqual(ds['vrh_fix3']['G'].values.tolist(), ['a', 'b', 'c', 'd', 'GREPME'])
        self.assertListEqual(ds['vrh_fix3']['H'].values.tolist(), [-999, -999, 1, 2, 3])