def run(self, records): """Runs the batch upload :param records: an iterable containing queue entries """ self_name = type(self).__name__ for i, batch in enumerate( grouper(records, self.BATCH_SIZE, skip_missing=True), 1): self.logger.info('%s processing batch %d', self_name, i) try: for j, proc_batch in enumerate( grouper(process_records(batch).items(), self.BATCH_SIZE, skip_missing=True), 1): self.logger.info('%s uploading chunk #%d (batch %d)', self_name, j, i) self.upload_records({k: v for k, v in proc_batch}, from_queue=True) except Exception: self.logger.exception('%s could not upload batch', self_name) return self.logger.info('%s finished batch %d', self_name, i) self.processed_records(batch) self.logger.info('%s finished', self_name)
def test_grouper(): assert list(grouper([], 3)) == [] assert list(grouper(range(6), 3)) == [(0, 1, 2), (3, 4, 5)] assert list(grouper(range(7), 3)) == [(0, 1, 2), (3, 4, 5), (6, None, None)] assert list(grouper(range(7), 3, fillvalue='x')) == [(0, 1, 2), (3, 4, 5), (6, 'x', 'x')] assert list(grouper(range(7), 3, skip_missing=True)) == [(0, 1, 2), (3, 4, 5), (6, )]
def run_initial(self, events): """Runs the initial batch upload :param events: an iterable containing events """ self_name = type(self).__name__ for i, batch in enumerate( grouper(events, self.INITIAL_BATCH_SIZE, skip_missing=True), 1): self.logger.debug('%s processing initial batch %d', self_name, i) for j, processed_batch in enumerate( grouper(batch, self.BATCH_SIZE, skip_missing=True), 1): self.logger.info('%s uploading initial chunk #%d (batch %d)', self_name, j, i) self.upload_records(processed_batch, from_queue=False)
def run_initial_export(self, events): uploader = DebugUploader(self) uploader.run_initial(events) for i, batch in enumerate(grouper(events, 10, skip_missing=True), 1): print() print(cformat('%{white!}Batch {}:%{reset}').format(i)) print(MARCXMLGenerator.objects_to_xml(event for event in batch if event is not None)) print()