def test_source_handler(self):
     file = os.path.join(os.environ['HADRON_DEFAULT_PATH'], 'example01.csv')
     handler = PandasSourceHandler(ConnectorContract(file, '', ''))
     self.assertTrue(isinstance(handler.supported_types(), list) and len(handler.supported_types()) > 0)
     df = handler.load_canonical()
     self.assertEqual((1000,11), df.shape)
     self.assertTrue(handler.has_changed())
     file = os.path.join(os.environ['HADRON_DEFAULT_PATH'], 'example01.dat')
     handler = PandasSourceHandler(ConnectorContract(file, '', '', file_type='csv'))
     df = handler.load_canonical()
     self.assertEqual((1000,11), df.shape)
    def add_book_contract(self,
                          book_name: str,
                          with_log: bool = None,
                          file_type: str = None,
                          versioned: bool = None,
                          stamped: bool = None,
                          save: bool = None,
                          **kwargs):
        """ adds an event book connector using the book connector template and appending a book pattern to the URI path

        :param book_name: the name of the event book
        :param with_log: (optional) if an events log connector should be created
        :param file_type: (optional) a file type extension. defaults to 'pickle'
        :param versioned: (optional) if the connector uri should be versioned
        :param stamped: (optional) if the connector uri should be timestamped
        :param save: (optional) override of the default save action set at initialisation.
        :param kwargs: extra kwargs to pass to the connector
        """
        if not self.pm.has_connector(
                connector_name=self.BOOK_TEMPLATE_CONNECTOR):
            raise ConnectionError(
                f"The book template connector has not been set")
        template = self.pm.get_connector_contract(self.BOOK_TEMPLATE_CONNECTOR)
        uri_file = self.pm.file_pattern(name=book_name,
                                        file_type=file_type,
                                        versioned=versioned,
                                        stamped=stamped)
        uri = os.path.join(template.path, uri_file)
        if not isinstance(kwargs, dict):
            kwargs = {}
        kwargs.update(template.raw_kwargs)
        cc = ConnectorContract(uri=uri,
                               module_name=template.module_name,
                               handler=template.handler,
                               **kwargs)
        self.add_connector_contract(connector_name=book_name,
                                    connector_contract=cc,
                                    template_aligned=True,
                                    save=save)
        # add the log persist
        if isinstance(with_log, bool) and with_log:
            log = f"{book_name}_log"
            uri_log = self.pm.file_pattern(name=log, file_type=file_type)
            lc = ConnectorContract(uri=uri,
                                   module_name=template.module_name,
                                   handler=template.handler,
                                   **kwargs)
            self.add_connector_contract(connector_name=uri_log,
                                        connector_contract=lc,
                                        template_aligned=True,
                                        save=save)
        return
 def test_persist_backup(self):
     handler = PandasPersistHandler(ConnectorContract('work/data/2_transition/example01.json', '', ''))
     df = SyntheticBuilder.scratch_pad().model_noise(pd.DataFrame(index=range(1000)), num_columns=10)
     self.assertTrue(handler.persist_canonical(df))
     df = pd.DataFrame(data=handler.load_canonical())
     self.assertEqual((1000, 10), df.shape)
     handler.remove_canonical()
     self.assertFalse(handler.exists())
     # Backup
     uri = 'work/data/2_transition/example01.pq.bak?file_type=parquet'
     self.assertFalse(os.path.exists(uri))
     handler.backup_canonical(canonical=df, uri=uri)
     self.assertTrue(os.path.exists(ConnectorContract.parse_address(uri)))
    def set_book_contract_template(self,
                                   uri_path: str = None,
                                   module_name: str = None,
                                   handler: str = None,
                                   save: bool = None,
                                   **kwargs):
        """ sets the book template connector that is used as the base for all event book persistence. for
        parameters not given, the persist connector template is used.

        :param uri_path: a uri path
        :param module_name: a module package name
        :param handler: a handler
        :param save: override of the default save action set at initialisation.
        :param kwargs: additional kwargs
        """
        template = self.pm.get_connector_contract(self.TEMPLATE_PERSIST)
        uri_path = uri_path if isinstance(uri_path, str) else template.raw_uri
        module_name = module_name if isinstance(
            module_name, str) else template.raw_module_name
        handler = handler if isinstance(handler, str) else template.raw_handler
        if not isinstance(kwargs, dict):
            kwargs = {}
        kwargs.update(template.raw_kwargs)
        book_template = ConnectorContract(uri=uri_path,
                                          module_name=module_name,
                                          handler=handler,
                                          **kwargs)
        if self.pm.has_connector(self.BOOK_TEMPLATE_CONNECTOR):
            self.remove_connector_contract(
                connector_name=self.BOOK_TEMPLATE_CONNECTOR)
        self.pm.set_connector_contract(
            connector_name=self.BOOK_TEMPLATE_CONNECTOR,
            connector_contract=book_template)
        self.pm_persist(save=save)
        return
 def test_persist(self):
     state_uri = os.path.join(os.environ['HADRON_PM_PATH'], 'state.pickle')
     events_uri = os.path.join(os.environ['HADRON_PM_PATH'],
                               'events_log.pickle')
     state_connector = ConnectorContract(uri=state_uri,
                                         module_name=self.MODULE,
                                         handler=self.HANDLER)
     events_connector = ConnectorContract(uri=events_uri,
                                          module_name=self.MODULE,
                                          handler=self.HANDLER)
     engine = PandasEventBook('test',
                              state_connector=state_connector,
                              events_log_connector=events_connector)
     self.assertEqual(False, os.path.exists(state_uri))
     self.assertEqual(False, os.path.exists(events_uri))
     for i in range(10):
         engine.increment_event(event=pd.DataFrame(
             data={'A': [i, i * 2, i * 3]}))
     self.assertEqual(0, len(engine._current_events_log.keys()), "loop run")
     self.assertEqual(False, os.path.exists(state_uri))
     self.assertEqual(False, os.path.exists(events_uri))
     engine.set_count_distance(3)
     engine.set_events_log_distance(2)
     # add one
     engine.increment_event(event=pd.DataFrame(data={'A': [1, 1, 1]}))
     self.assertEqual(False, os.path.exists(state_uri))
     self.assertEqual(False, os.path.exists(events_uri))
     self.assertEqual(1, len(engine._current_events_log.keys()), "loop One")
     # add two
     engine.increment_event(event=pd.DataFrame(data={'A': [1, 1, 1]}))
     self.assertEqual(False, os.path.exists(state_uri))
     self.assertEqual(True, os.path.exists(events_uri))
     self.assertEqual(0, len(engine._current_events_log.keys()), "loop Two")
     # add three
     engine.increment_event(event=pd.DataFrame(data={'A': [1, 1, 1]}))
     self.assertEqual(True, os.path.exists(state_uri))
     self.assertEqual(True, os.path.exists(events_uri))
     self.assertEqual(0, len(engine._current_events_log.keys()),
                      "loop Three")
     # add four
     engine.increment_event(event=pd.DataFrame(data={'A': [1, 1, 1]}))
     self.assertEqual(1, len(engine._current_events_log.keys()),
                      "loop Four")
 def test_json(self):
     df = SyntheticBuilder.scratch_pad().model_noise(pd.DataFrame(index=range(1000)), num_columns=10)
     handler = PandasPersistHandler(ConnectorContract('work/data/2_transition/handler_test.json',
                                                      '', '', file_type='json'))
     handler.persist_canonical(df)
     self.assertTrue(handler.exists())
     result = pd.DataFrame(data=handler.load_canonical())
     self.assertEqual(df.shape, result.shape)
     self.assertCountEqual(df.columns, result.columns)
     handler.remove_canonical()
     self.assertFalse(handler.exists())
 def test_aws_connector_init(self):
     handler = S3PersistHandler(connector_contract=ConnectorContract(
         uri='s3://project-hadron-cs-repo/factory/healthcare/members',
         module_name='',
         handler=''))
     data = {'a': [1, 2, 3, 4, 5]}
     handler.persist_canonical(data)
     result = handler.load_canonical()
     self.assertTrue(isinstance(result, dict))
     result = handler.load_canonical(read_params={'as_dataframe': True})
     self.assertTrue(isinstance(result, pd.DataFrame))
Пример #8
0
 def test_handler(self):
     """Basic smoke test"""
     cc = ConnectorContract(uri='eb://test_book',
                            module_name='',
                            handler='')
     handler = EventPersistHandler(connector_contract=cc)
     # test persist and load
     df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [7, 2, 1, 4]})
     handler.persist_canonical(df)
     result = handler.load_canonical()
     self.assertEqual(['A', 'B'], list(result.columns))
     self.assertEqual((4, 2), result.shape)
 def test_file_column(self):
     tools = DataBuilderTools
     df = pd.DataFrame()
     df['cat'] = tools.get_category(list('MFU'), size=10, seed=31)
     df['values'] = tools.get_number(10, size=10, seed=31)
     df.to_csv("test_df.csv", sep=',', index=False)
     connector_contract = ConnectorContract(
         uri="test_df.csv",
         module_name='aistac.handlers.python_handlers',
         handler='PythonSourceHandler')
     result = tools.get_column('cat', connector_contract, size=3, seed=31)
     self.assertEqual((3, 1), result.shape)
Пример #10
0
 def test_transition_summary_report(self):
     tr: Transition = Transition.from_env('test',
                                          default_save=False,
                                          default_save_intent=False,
                                          has_contract=False)
     cc = ConnectorContract(uri=os.path.join(os.environ['HOME'], 'code',
                                             'projects', 'data', 'sample',
                                             'synthetic_customer.csv'),
                            module_name=tr.DEFAULT_MODULE,
                            handler=tr.DEFAULT_SOURCE_HANDLER)
     tr.set_source_contract(connector_contract=cc)
     report = tr.report_quality_summary(as_dict=True)
     self.assertEqual(
         ['score', 'data_shape', 'data_type', 'usability', 'cost'],
         list(report.keys()))
 def test_change_flags(self):
     """Basic smoke test"""
     file = os.path.join(os.environ['HADRON_DEFAULT_PATH'], 'example01.csv')
     open(file, 'a').close()
     cc = ConnectorContract(uri=file, module_name='', handler='')
     source = PandasSourceHandler(cc)
     self.assertTrue(source.has_changed())
     _ = source.load_canonical()
     self.assertFalse(source.has_changed())
     source.reset_changed(True)
     self.assertTrue(source.has_changed())
     source.reset_changed()
     self.assertFalse(source.has_changed())
     # touch the file
     os.remove(file)
     with open(file, 'a'):
         os.utime(file, None)
     self.assertTrue(source.has_changed())
Пример #12
0
 def test_has_changed(self):
     # test the handler
     cc = ConnectorContract(uri='eb://test_portfolio/test_book',
                            module_name='',
                            handler='')
     handler = EventPersistHandler(connector_contract=cc)
     # Test has changed
     self.assertFalse(handler.has_changed())
     df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [7, 2, 1, 4]})
     handler.persist_canonical(df)
     self.assertTrue(handler.has_changed())
     result = handler.load_canonical()
     self.assertDictEqual(df.to_dict(), result.to_dict())
     self.assertTrue(handler.has_changed())
     handler.reset_changed()
     self.assertFalse(handler.has_changed())
     df = pd.DataFrame({'C': [9, 8, 7, 3], 'B': [4, 2, 1, 0]})
     handler.persist_canonical(df, reset_state=False)
     result = handler.load_canonical()
     print(result)
 def test_persist_handler(self):
     handler = PandasPersistHandler(ConnectorContract('work/data/2_transition/example01.pkl', '', '', file_type='pickle'))
     self.assertTrue(isinstance(handler.supported_types(), list) and len(handler.supported_types()) > 0)
     self.assertFalse(handler.has_changed())
     self.assertFalse(handler.exists())
     # create the file and persist
     df = SyntheticBuilder.scratch_pad().model_noise(pd.DataFrame(index=range(1000)), num_columns=10)
     self.assertTrue(handler.persist_canonical(df))
     self.assertTrue(handler.exists())
     self.assertTrue(handler.has_changed())
     df = handler.load_canonical()
     self.assertEqual((1000, 10), df.shape)
     # write again to check modified
     df['value'] = [0] * df.shape[0]
     self.assertTrue(handler.persist_canonical(df))
     df = handler.load_canonical()
     self.assertEqual((1000, 11), df.shape)
     self.assertTrue(handler.has_changed())
     df = df.drop('value', axis='columns')
     self.assertTrue(handler.persist_canonical(df))
     self.assertEqual((1000, 10), df.shape)
     self.assertTrue(handler.has_changed())
    def backup_canonical(self,
                         canonical: pd.DataFrame,
                         uri: str,
                         reset_state: bool = None,
                         **kwargs) -> bool:
        """ persists the canonical into the event book extending or replacing the current state

        :param canonical: the canonical to persist to the event book
        :param uri: the uri of the event book
        :param reset_state: True - resets the event book (Default)
                            False - merges the canonical to the current state based on their index
        """
        _schema, _book_name, _ = ConnectorContract.parse_address_elements(
            uri=uri)
        if _schema != 'eb' or len(_book_name) == 0:
            raise ValueError(
                f"The connector contract uri must be in  the format 'eb://<book_name>' as a minimum"
            )
        self._controller.add_event_book(book_name=_book_name, reset=True)
        self._controller.add_event(book_name=self._book_name,
                                   event=canonical,
                                   fix_index=False)
        return True
 def test_runs(self):
     """Basic smoke test"""
     PandasSourceHandler(ConnectorContract('work/data/0_raw/example01.csv', '', '', file_type='csv'))
     PandasPersistHandler(ConnectorContract('work/data/2_transition/example01.pkl', '', '', file_type='pickle'))