def test_iterates_csv_without_header(self): data = easyfile.CsvFile(self.fp.name, header=False) data._prepare_reading() expected = [line.split(',') for line in self.lines] self.assertSequenceEqual(data, expected) for x, y in zip(data, expected): self.assertEqual(x, y)
def test_iterates_csv_with_header(self): from collections import OrderedDict data = easyfile.CsvFile(self.fp.name, header=True) data._prepare_reading() expected = [OrderedDict(zip(data._filednames, line.split(','))) for line in self.lines[1:]] self.assertSequenceEqual(data, expected) for x, y in zip(data, expected): self.assertEqual(x, y)
def __init__(self, path: str, encoding: str = 'utf-8', delimiter: str = ',', header: bool = False) -> None: super().__init__( easyfile.CsvFile(path=path, encoding=encoding, delimiter=delimiter, header=header))
def test_dunder_init(self): data = easyfile.CsvFile(self.fp.name) self.assertEqual(data._path, self.fp.name) self.assertEqual(data._encoding, 'utf-8') self.assertFalse(data._ready) self.assertIsNone(data._length) self.assertIsNone(data._offsets) self.assertIsNone(data._mm) self.assertEqual(data._delimiter, ',') self.assertFalse(data._header) self.assertIsNone(data._filednames)
def easyfile_creator(path): dataset = {} archive_path = gdown.cached_download(url) with tarfile.open(archive_path, 'r') as archive: print(f'Extracting to {root}...') archive.extractall(root) dataset = {} for split in ('train', 'test'): filename = f'{key}_csv/{split}.csv' dataset[split] = easyfile.CsvFile(os.path.join(root, filename)) with io.open(path, 'wb') as f: pickle.dump(dataset, f) return dataset
def test_slices_items(self): data = easyfile.CsvFile(self.fp.name) self.assertSequenceEqual(data[:len(self.lines)], data)
def test_loads_csv_with_header(self): data = easyfile.CsvFile(self.fp.name, header=True) self.assertTrue(data._header) data._prepare_reading() self.assertListEqual(data._filednames, self.lines[0].split(','))