def __iter__(self): if self.fast_mode: reader_factory = FastReader else: reader_factory = SlowReader with self.csv_input_file_reader() as csvfile: reader = reader_factory(csvfile, self.encoding, self._ui) fieldnames = reader.fieldnames has_content = False t0 = time() rows_read = 0 for chunk in iter_chunks(reader, self.chunksize): has_content = True n_rows = len(chunk) if (rows_read, n_rows) not in self.already_processed_batches: yield Batch(rows_read, n_rows, fieldnames, chunk, self.rty_cnt) rows_read += n_rows if not has_content: raise ValueError("Input file '{}' is empty.".format( self.dataset)) self._ui.info('chunking {} rows took {}'.format(rows_read, time() - t0))
def test_iter_chunks(): csvfile = [[1, 'a'], [2, 'b'], [3, 'c']] it = iter_chunks(csvfile, 2) chunk1 = next(it) assert [[1, 'a'], [2, 'b']] == chunk1 chunk2 = next(it) assert [[3, 'c']] == chunk2 with pytest.raises(StopIteration): next(it)