Exemplo n.º 1
0
    def test_esk306(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk306_concatenate_reports.py'
        settings['batchMode'] = True

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        # report checks
        self.assertTrue(status.isSuccess())
        self.assertIn('report_pages', ds)
        self.assertIsInstance(ds['report_pages'], list)
        self.assertEqual(19, len(ds['report_pages']))

        # data-summary checks
        file_names = ['report.tex']
        for fname in file_names:
            path = '{0:s}/{1:s}/data/v0/report/{2:s}'.format(
                settings['resultsDir'], settings['analysisName'], fname)
            self.assertTrue(os.path.exists(path))
            statinfo = os.stat(path)
            self.assertTrue(statinfo.st_size > 0)
Exemplo n.º 2
0
    def test_esk302(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk302_histogram_filler_plotter.py'
        settings['batchMode'] = True

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)
        columns = [
            'date', 'isActive', 'age', 'eyeColor', 'gender', 'company',
            'latitude', 'longitude'
        ]

        # data-generation checks
        self.assertTrue(status.isSuccess())
        self.assertIn('n_sum_rc', ds)
        self.assertEqual(1300, ds['n_sum_rc'])
        self.assertIn('hist', ds)
        self.assertIsInstance(ds['hist'], dict)
        self.assertListEqual(sorted(ds['hist'].keys()), sorted(columns))

        # data-summary checks
        file_names = ['report.tex'
                      ] + ['hist_{}.pdf'.format(col) for col in columns]
        for fname in file_names:
            path = '{0:s}/{1:s}/data/v0/report/{2:s}'.format(
                settings['resultsDir'], settings['analysisName'], fname)
            self.assertTrue(os.path.exists(path))
            statinfo = os.stat(path)
            self.assertTrue(statinfo.st_size > 0)
Exemplo n.º 3
0
    def test_esk208(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk208_record_factorizer.py'

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        self.assertTrue(status.isSuccess())
        self.assertTrue('test1' in ds)
        self.assertTrue('test1_fact' in ds)
        self.assertTrue('test1_refact' in ds)
        self.assertTrue('to_original' in ds)
        df1 = ds['test1']
        df2 = ds['test1_refact']
        self.assertEqual(len(df1.index), 12)
        self.assertEqual(len(df2.index), 12)
        self.assertTrue('dummy' in df1.columns)
        self.assertTrue('loc' in df1.columns)
        self.assertTrue('dummy' in df2.columns)
        self.assertTrue('loc' in df2.columns)
        self.assertListEqual(df1['dummy'].values.tolist(),
                             df2['dummy'].values.tolist())
        self.assertListEqual(df1['loc'].values.tolist(),
                             df2['loc'].values.tolist())
Exemplo n.º 4
0
    def test_esk304(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings[
            'macro'] = settings['esRoot'] + '/tutorials/esk304_df_boxplot.py'
        settings['batchMode'] = True

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        # data-generation checks
        self.assertTrue(status.isSuccess())
        self.assertIn('data', ds)
        self.assertIsInstance(ds['data'], pd.DataFrame)
        self.assertEqual(10000, len(ds['data']))
        self.assertListEqual(sorted(ds['data'].columns),
                             ['var_a', 'var_b', 'var_c'])

        # data-summary checks
        file_names = [
            'report_boxplots.tex', 'boxplot_var_a.pdf', 'boxplot_var_c.pdf'
        ]
        for fname in file_names:
            path = '{0:s}/{1:s}/data/v0/report/{2:s}'.format(
                settings['resultsDir'], settings['analysisName'], fname)
            self.assertTrue(os.path.exists(path))
            statinfo = os.stat(path)
            self.assertTrue(statinfo.st_size > 0)
Exemplo n.º 5
0
    def run_eskapade(self,
                     macro,
                     return_status=definitions.StatusCode.Success):
        """Run Eskapade"""

        proc_mgr = ProcessManager()
        settings = proc_mgr.service(ConfigObject)
        settings['macro'] = persistence.io_path('macros', settings.io_conf(),
                                                macro)
        status = execution.run_eskapade(settings)
        self.assertTrue(status == return_status)
Exemplo n.º 6
0
    def test_esk105bc(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk105_B_store_each_chain.py'

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        # results of all three chains have been persisted
        self.assertTrue(status.isSuccess())
        path = '{0:s}/{1:s}/proc_service_data/v0/_chain{{:d}}/{2:s}.pkl'.format(
            settings['resultsDir'], settings['analysisName'], str(DataStore))
        for path_it in range(1, 4):
            self.assertTrue(os.path.exists(path.format(path_it)))

        execution.reset_eskapade()

        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk105_C_begin_at_chain3.py'

        status = execution.run_eskapade(settings)

        ds = ProcessManager().service(DataStore)

        # object from all three chains are present
        self.assertTrue(status.isSuccess())
        self.assertTrue('f' in ds)
        self.assertTrue('g' in ds)
        self.assertTrue('h' in ds)
        self.assertEqual(3, len(ds.keys()))
        self.assertEqual(7, ds['f']['n_favorite'])
        self.assertEqual(1, ds['g']['a'])
        self.assertEqual(7, ds['h'][1])
Exemplo n.º 7
0
    def test_esk101(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings[
            'macro'] = settings['esRoot'] + '/tutorials/esk101_helloworld.py'

        status = execution.run_eskapade(settings)

        settings = ProcessManager().service(ConfigObject)

        self.assertTrue(status.isSuccess())
        self.assertTrue(settings['do_hello'])
        self.assertEqual(2, settings['n_repeat'])
Exemplo n.º 8
0
    def test_esk108map(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings['esRoot'] + '/tutorials/esk108_map.py'
        settings['TESTING'] = True

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        self.assertTrue(status.isSuccess())
Exemplo n.º 9
0
    def test_esk104(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk104_basic_datastore_operations.py'

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        self.assertTrue(status.isSuccess())
        self.assertEqual(1, len(ds.keys()))
        self.assertEqual(1, ds['a'])
Exemplo n.º 10
0
    def test_esk205(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk205_concatenate_pandas_dfs.py'

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        self.assertTrue(status.isSuccess())
        self.assertTrue('outgoing' in ds)
        self.assertEqual(ds['n_outgoing'], 12)
Exemplo n.º 11
0
    def test_esk105a(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk105_A_dont_store_results.py'

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        self.assertTrue(status.isSuccess())
        path = settings['resultsDir'] + '/' + settings['analysisName']
        self.assertFalse(os.path.exists(path))
Exemplo n.º 12
0
    def test_esk107(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings[
            'macro'] = settings['esRoot'] + '/tutorials/esk107_chain_looper.py'

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        # chain is repeated 10 times, with nothing put in datastore
        self.assertTrue(status.isSuccess())
        self.assertEqual(0, len(ds.keys()))
        self.assertEqual(10, pm.chains[0].links[1].maxcount)
Exemplo n.º 13
0
    def test_esk102(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk102_multiple_chains.py'

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        self.assertTrue(status.isSuccess())
        self.assertTrue(settings['do_chain0'])
        self.assertTrue(settings['do_chain1'])
        self.assertTrue(settings['do_chain2'])
        self.assertEqual(3, len(pm.chains))
Exemplo n.º 14
0
    def test_esk109(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk109_debugging_tips.py'

        # this flag turns off ipython embed link
        settings['TESTING'] = True

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        self.assertTrue(isinstance(pm.chains[0].links[2], Break))
        self.assertTrue(status.isFailure())
Exemplo n.º 15
0
    def test_esk103(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk103_printdatastore.py'

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        self.assertTrue(status.isSuccess())
        self.assertEqual('world', ds['hello'])
        self.assertEqual(1, ds['d']['a'])
        self.assertEqual(2, ds['d']['b'])
        self.assertEqual(3, ds['d']['c'])
Exemplo n.º 16
0
    def test_esk206(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk206_merge_pandas_dfs.py'

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        self.assertTrue(status.isSuccess())
        self.assertTrue('outgoing' in ds)
        df = ds['outgoing']
        self.assertEqual(len(df.index), 4)
        self.assertEqual(len(df.columns), 5)
Exemplo n.º 17
0
    def test_esk201(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings[
            'macro'] = settings['esRoot'] + '/tutorials/esk201_readdata.py'

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        self.assertTrue(status.isSuccess())
        self.assertTrue('test1' in ds)
        self.assertTrue('test2' in ds)
        self.assertEqual(12, ds['n_test1'])
        self.assertEqual(36, ds['n_test2'])
Exemplo n.º 18
0
    def test_esk203(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk203_apply_func_to_pandas_df.py'

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        self.assertTrue(status.isSuccess())
        self.assertTrue('transformed_data' in ds)
        df = ds['transformed_data']
        self.assertTrue('xx' in df.columns)
        self.assertTrue('yy' in df.columns)
Exemplo n.º 19
0
    def test_esk110(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk110_code_profiling.py'

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        self.assertTrue(status.isSuccess())
        self.assertEqual(0, len(pm.chains))
        self.assertEqual(0, len(ds.keys()))
        self.assertTrue('doCodeProfiling' in settings)
        self.assertEqual('cumulative', settings['doCodeProfiling'])
Exemplo n.º 20
0
    def test_esk305(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk305_correlation_summary.py'
        settings['batchMode'] = True

        status = execution.run_eskapade(settings)
        self.assertTrue(status.isSuccess())

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        # input data checks
        all_col_names = ['x1', 'x2', 'x3', 'x4', 'x5', 'Unnamed: 5']

        self.assertIn('input_data', ds)
        self.assertIsInstance(ds['input_data'], pd.DataFrame)
        self.assertListEqual(list(ds['input_data'].columns), all_col_names)

        self.assertIn('correlations', ds)
        self.assertIsInstance(ds['correlations'], list)
        corr_list = ds['correlations']
        self.assertEqual(4, len(corr_list))

        # correlation matrix checks
        col_names = ['x1', 'x2', 'x3', 'x4', 'x5']

        for corr in corr_list:
            self.assertIsInstance(corr, pd.DataFrame)
            #self.assertListEqual(list(corr.columns), col_names)
            self.assertListEqual(list(corr.index), col_names)

        # heatmap pdf checks
        io_conf = settings.io_conf()
        results_path = persistence.io_path('results_data', io_conf, 'report')

        correlations = ['pearson', 'kendall', 'spearman', 'correlation_ratio']
        for corr in correlations:
            path = '{0:s}/correlations_input_data_{1:s}.pdf'.format(
                results_path, corr)
            self.assertTrue(os.path.exists(path))
            statinfo = os.stat(path)
            self.assertTrue(statinfo.st_size > 0)
Exemplo n.º 21
0
    def test_esk204(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk204_apply_query_to_pandas_df.py'

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        self.assertTrue(status.isSuccess())
        self.assertTrue('outgoing_records' in ds)
        self.assertTrue(ds['n_outgoing_records'] > 0)
        df = ds['outgoing_records']
        self.assertTrue('a' in df.columns)
        self.assertFalse('b' in df.columns)
        self.assertTrue('c' in df.columns)
Exemplo n.º 22
0
    def test_esk202(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings[
            'macro'] = settings['esRoot'] + '/tutorials/esk202_writedata.py'

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        self.assertTrue(status.isSuccess())
        self.assertEqual(36, ds['n_test'])
        path = settings['resultsDir'] + '/' + settings[
            'analysisName'] + '/data/v0/tmp3.csv'
        self.assertTrue(os.path.exists(path))
        # check file is non-empty
        statinfo = os.stat(path)
        self.assertTrue(statinfo.st_size > 0)
Exemplo n.º 23
0
    def test_esk106(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk106_cmdline_options.py'

        # fake a setting from the cmd-line. picked up in the macro
        settings['do_chain0'] = False

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        self.assertTrue(status.isSuccess())
        self.assertEqual(1, len(pm.chains))
        self.assertEqual('Chain1', pm.chains[0].name)
        self.assertEqual(False, settings.get('do_chain0', True))
        self.assertEqual(True, settings.get('do_chain1', True))
        self.assertEqual('Universe', pm.chains[0].links[0].hello)
Exemplo n.º 24
0
    def test_esk207(self):
        settings = ProcessManager().service(ConfigObject)
        settings['logLevel'] = definitions.LOG_LEVELS['DEBUG']
        settings['macro'] = settings[
            'esRoot'] + '/tutorials/esk207_record_vectorizer.py'

        status = execution.run_eskapade(settings)

        pm = ProcessManager()
        settings = ProcessManager().service(ConfigObject)
        ds = ProcessManager().service(DataStore)

        columns = sorted([
            'x_1', 'x_3', 'x_5', 'x_4', 'y_9', 'y_8', 'y_7', 'y_6', 'y_5',
            'y_4'
        ])

        self.assertTrue(status.isSuccess())
        self.assertTrue('vect_test' in ds)
        df = ds['vect_test']
        self.assertEqual(len(df.index), 12)
        self.assertListEqual(sorted(df.columns.tolist()), columns)