def test_csv_only(self): """ Ensure only the paths of the csv files are returned. """ input_path = os.path.join(os.path.dirname(__file__), "test_data", "things_kinds") csv_paths = list(i_walk_csv_paths(input_path)) self.assertEqual(2, len(csv_paths)) for path in csv_paths: self.assertIn('.csv', path)
def main(): """ Consume many csv files as if one. """ import pathlib input_dir = pathlib.Path('test_data/things_kinds') # # Use a generator expression # iterables = (karld.io.i_get_csv_data(data_path) # for data_path in i_walk_csv_paths(str(input_dir))) # # or a generator map. iterables = imap(karld.io.i_get_csv_data, i_walk_csv_paths(str(input_dir))) items = chain.from_iterable(iterables) for item in items: print(item[0], item[1])
def main(): """ Merge a number of homogeneous small csv files on a key. Small means they all together fit in your computer's memory. """ import pathlib input_dir = pathlib.Path('test_data/things_kinds') data_items_iter = (karld.io.i_get_csv_data(data_path) for data_path in i_walk_csv_paths(str(input_dir))) KINDS = 1 groups = sort_merge_group(data_items_iter, itemgetter(KINDS)) for group in groups: print(group[0]) for item in group[1]: print('\t' + item[0]) print()