def sync_data(self): table = self.__class__.TABLE subscriber_dao = SubscriberDataAccessObject(self.config, self.state, self.auth_stub, self.subscriber_catalog) # pass config to return start date if not bookmark is found start = get_last_record_value_for_table(self.state, table, self.config) pagination_unit = self.config.get( 'pagination__list_subscriber_interval_unit', 'days') pagination_quantity = self.config.get( 'pagination__list_subscriber_interval_quantity', 1) unit = {pagination_unit: int(pagination_quantity)} end = increment_date(start, unit) all_subscribers_list = self._get_all_subscribers_list() while before_now(start): stream = request('ListSubscriber', FuelSDK.ET_List_Subscriber, self.auth_stub, _get_list_subscriber_filter( all_subscribers_list, start, unit), batch_size=self.batch_size) batch_size = 100 if self.replicate_subscriber: subscriber_dao.write_schema() catalog_copy = copy.deepcopy(self.catalog) for list_subscribers_batch in partition_all(stream, batch_size): for list_subscriber in list_subscribers_batch: list_subscriber = self.filter_keys_and_parse( list_subscriber) if list_subscriber.get('ModifiedDate'): self.state = incorporate( self.state, table, 'ModifiedDate', list_subscriber.get('ModifiedDate')) self.write_records_with_transform(list_subscriber, catalog_copy, table) if self.replicate_subscriber: # make the list of subscriber keys subscriber_keys = list( map(_get_subscriber_key, list_subscribers_batch)) # pass the list of 'subscriber_keys' to fetch subscriber details subscriber_dao.pull_subscribers_batch(subscriber_keys) save_state(self.state) start = end end = increment_date(start, unit)
def sync_data(self): table = self.__class__.TABLE subscriber_dao = SubscriberDataAccessObject(self.config, self.state, self.auth_stub, self.subscriber_catalog) start = get_last_record_value_for_table(self.state, table, self.config.get('start_date')) if start is None: start = self.config.get('start_date') pagination_unit = self.config.get( 'pagination__list_subscriber_interval_unit', 'days') pagination_quantity = self.config.get( 'pagination__list_subsctiber_interval_quantity', 1) unit = {pagination_unit: int(pagination_quantity)} end = increment_date(start, unit) all_subscribers_list = self._get_all_subscribers_list() while before_now(start): stream = request( 'ListSubscriber', FuelSDK.ET_List_Subscriber, self.auth_stub, _get_list_subscriber_filter(all_subscribers_list, start, unit)) batch_size = 100 if self.replicate_subscriber: subscriber_dao.write_schema() for list_subscribers_batch in partition_all(stream, batch_size): for list_subscriber in list_subscribers_batch: list_subscriber = self.filter_keys_and_parse( list_subscriber) if list_subscriber.get('ModifiedDate'): self.state = incorporate( self.state, table, 'ModifiedDate', list_subscriber.get('ModifiedDate')) list_subscriber = self.remove_sensitive_data( list_subscriber) singer.write_records(table, [list_subscriber]) if self.replicate_subscriber: subscriber_keys = list( map(_get_subscriber_key, list_subscribers_batch)) subscriber_dao.pull_subscribers_batch(subscriber_keys) save_state(self.state) start = end end = increment_date(start, unit)
def test__partition_all(self): # verify that the 'partion_all' correctly divides the records into the specified chunk size self.assertEqual(list(partition_all([1, 2, 3, 4, 5, 6, 7], 3)), [[1, 2, 3], [4, 5, 6], [7]])
def test__partition_all(self): self.assertEqual(list(partition_all([1, 2, 3, 4, 5, 6, 7], 3)), [[1, 2, 3], [4, 5, 6], [7]])