def test_correlate_categories_builder(self):
     builder = Wrangle.from_env('test', has_contract=False)
     builder.set_persist_contract(
         ConnectorContract(uri="eb://synthetic_members",
                           module_name='ds_engines.handlers.event_handlers',
                           handler='EventPersistHandler'))
     df = pd.DataFrame()
     df['pcp_tax_id'] = [
         993406113, 133757370, 260089066, 448512481, 546434723
     ] * 2
     correlations = [993406113, 133757370, 260089066, 448512481, 546434723]
     actions = {
         0: 'LABCORP OF AMERICA',
         1: 'LPCH MEDICAL GROUP',
         2: 'ST JOSEPH HERITAGE MEDICAL',
         3: 'MONARCH HEALTHCARE',
         4: 'PRIVIA MEICAL GROUP'
     }
     df['pcp_name'] = builder.tools.correlate_categories(
         df,
         header='pcp_tax_id',
         correlations=correlations,
         actions=actions,
         column_name='pcp_name')
     result = builder.tools.run_intent_pipeline(df)
     self.assertEqual((10, 2), result.shape)
    def setUp(self):
        # clean out any old environments
        for key in os.environ.keys():
            if key.startswith('HADRON'):
                del os.environ[key]

        os.environ['HADRON_PM_PATH'] = os.path.join('work', 'config')
        os.environ['HADRON_DEFAULT_PATH'] = os.path.join('work', 'data')
        try:
            os.makedirs(os.environ['HADRON_PM_PATH'])
            os.makedirs(os.environ['HADRON_DEFAULT_PATH'])
        except:
            raise IOError('Unable to create directories')
        PropertyManager._remove_all()
        builder = SyntheticBuilder.from_env('task1', has_contract=False)
        builder.set_persist()
        builder.pm_persist()
        tr = Transition.from_env('task2', has_contract=False)
        tr.set_source_uri(builder.get_persist_contract().raw_uri)
        tr.set_persist()
        tr.pm_persist()
        wr = Wrangle.from_env('task3', has_contract=False)
        wr.set_source_uri(tr.get_persist_contract().raw_uri)
        wr.set_persist()
        wr.pm_persist()
Пример #3
0
 def setUp(self):
     # clean out any old environments
     for key in os.environ.keys():
         if key.startswith('HADRON'):
             del os.environ[key]
     # Local Domain Contract
     os.environ['HADRON_PM_PATH'] = os.path.join('working', 'contracts')
     os.environ['HADRON_PM_TYPE'] = 'json'
     # Local Connectivity
     os.environ['HADRON_DEFAULT_PATH'] = Path('working/data').as_posix()
     # Specialist Component
     try:
         os.makedirs(os.environ['HADRON_PM_PATH'])
     except:
         pass
     try:
         os.makedirs(os.environ['HADRON_DEFAULT_PATH'])
     except:
         pass
     PropertyManager._remove_all()
     tr = Transition.from_env('task1', has_contract=False)
     tr.set_source_uri(
         "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/titanic.csv"
     )
     tr.set_persist()
     wr = Wrangle.from_env('task2', has_contract=False)
     wr.set_source_uri(tr.get_persist_contract().raw_uri)
     wr.set_persist()
     controller = Controller.from_env(has_contract=False)
     controller.intent_model.transition(canonical=pd.DataFrame(),
                                        task_name='task1',
                                        intent_level='transition')
     controller.intent_model.wrangle(canonical=pd.DataFrame(),
                                     task_name='task2',
                                     intent_level='wrangle')
Пример #4
0
 def test_repeat_iterations(self):
     wr = Wrangle.from_env('task2')
     wr.set_persist(
         wr.pm.file_pattern(name='tester',
                            prefix='result1_',
                            file_type='parquet'))
     controller = Controller.from_env()
     controller.run_controller(repeat=1)
     self.assertEqual(['result1_tester.parquet'], os.listdir('work/data/'))
     shutil.rmtree('work/data')
     os.makedirs(os.environ['HADRON_DEFAULT_PATH'])
     wr.set_persist(
         wr.pm.file_pattern(name='tester',
                            prefix='result1_',
                            file_type='parquet',
                            stamped='ns'))
     controller.run_controller(repeat=3)
     self.assertEqual(3, len(os.listdir('work/data/')))
     shutil.rmtree('work/data')
     os.makedirs(os.environ['HADRON_DEFAULT_PATH'])
     controller.run_controller(repeat=2, sleep=1)
     self.assertEqual(2, len(os.listdir('work/data/')))
     shutil.rmtree('work/data')
     os.makedirs(os.environ['HADRON_DEFAULT_PATH'])
     controller.run_controller(repeat=2, sleep=1, run_time=4)
     self.assertEqual(4, len(os.listdir('work/data/')))
Пример #5
0
    def setUp(self):
        # clean out any old environments
        for key in os.environ.keys():
            if key.startswith('HADRON'):
                del os.environ[key]

        os.environ['HADRON_PM_PATH'] = os.path.join('work', 'config')
        os.environ['HADRON_DEFAULT_PATH'] = os.path.join('work', 'data')
        try:
            os.makedirs(os.environ['HADRON_PM_PATH'])
            os.makedirs(os.environ['HADRON_DEFAULT_PATH'])
        except:
            pass
        PropertyManager._remove_all()
        tr = Transition.from_env('task1', has_contract=False)
        tr.set_source_uri(
            "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/titanic.csv"
        )
        tr.set_persist()
        wr = Wrangle.from_env('task2', has_contract=False)
        wr.set_source_uri(tr.get_persist_contract().raw_uri)
        wr.set_persist()
        controller = Controller.from_env(has_contract=False)
        controller.intent_model.transition(canonical=pd.DataFrame(),
                                           task_name='task1',
                                           intent_level='task1_tr')
        controller.intent_model.wrangle(canonical=pd.DataFrame(),
                                        task_name='task2',
                                        intent_level='task2_wr')
 def test_runs(self):
     """Basic smoke test"""
     im = Wrangle.from_env('tester',
                           default_save=False,
                           default_save_intent=False,
                           reset_templates=False,
                           has_contract=False).intent_model
     self.assertTrue(WrangleIntentModel, type(im))
 def tools(self) -> WrangleIntentModel:
     return Wrangle.scratch_pad()
 def test_model_explode(self):
     df = pd.DataFrame({"A": [1, 2, 3], "B": [[2, 2], [3], [7, 8, 9]]})
     wr = Wrangle.from_memory(default_save_intent=False)
     df = wr.tools.model_explode(df, header="B")
     self.assertEqual([1, 1, 2, 3, 3, 3], df["A"].to_list())
     self.assertEqual([2, 2, 3, 7, 8, 9], df["B"].to_list())