def test_unicoded(self): """ Ensure utf-8 strings in the data are converted to unicode sequences. """ rows = list(csv_reader(self.data)) self.assertEqual( [ [u"WĄŻ", u"utf-8 sample"], [u"dróżką", u"utf-8 sample"] ], rows)
def test_iterative(self): """Ensure unicode reader consumes the data iteratively. """ data = iter(self.data) rows = csv_reader(data) first_row = next(rows) self.assertEqual([u"WĄŻ", u"utf-8 sample"], first_row) remaining_data = next(data) self.assertEqual("dróżką,utf-8 sample", remaining_data)
def i_get_csv_data(file_name, *args, **kwargs): """A generator for reading a csv file. """ buffering = kwargs.get('buffering', FILE_BUFFER_SIZE) read_file_kwargs = dict(buffering=buffering) if is_py3(): read_file_kwargs.update(dict(binary=False)) read_file_kwargs.update(dict(py3_csv_read=True)) data = i_read_buffered_file(file_name, **read_file_kwargs) for row in csv_reader(data, *args, **kwargs): yield row