def test_threadsafe_resumable(self, tmpdir): log = defaultdict(list) def listener(name, row): log[name].append(list(row)) def job(payload): i, row = payload s = int(row[2]) time.sleep(s * .1) return i, row output_path = str(tmpdir.join('./enriched_resumable_threadsafe.csv')) resumer = ThreadSafeResumer(output_path, listener=listener) with open('./test/resources/people_unordered.csv') as f, resumer: enricher = casanova.threadsafe_enricher( f, resumer, add=('x2',), keep=('name',) ) for j, (i, row) in enumerate(imap_unordered(enricher, job, 3)): enricher.writerow(i, row, [(i + 1) * 2]) if j == 1: break def sort_output(o): return sorted(tuple(i) for i in o) assert sort_output(collect_csv(output_path)) == sort_output([ ['name', 'index', 'x2'], ['Mary', '1', '4'], ['Julia', '2', '6'] ]) with open('./test/resources/people_unordered.csv') as f, resumer: enricher = casanova.threadsafe_enricher( f, resumer, add=('x2',), keep=('name',) ) for j, (i, row) in enumerate(imap_unordered(enricher, job, 3)): enricher.writerow(i, row, [(i + 1) * 2]) assert sort_output(collect_csv(output_path)) == sort_output([ ['name', 'index', 'x2'], ['Mary', '1', '4'], ['Julia', '2', '6'], ['John', '0', '2'] ]) assert sort_output(log['output.row']) == sort_output([['Mary', '1', '4'], ['Julia', '2', '6']]) assert sort_output(log['filter.row']) == sort_output([[1, ['Mary', 'Sue', '1']], [2, ['Julia', 'Stone', '2']]])
def test_resumable(self, tmpdir): output_path = str(tmpdir.join('./written_resumable.csv')) def stream(offset=0): return range(offset, 6) with LastCellResumer(output_path, 'index') as resumer: writer = Writer(resumer, ['index']) for i in stream(resumer.get_state() or 0): if i == 3: break writer.writerow([i]) assert collect_csv(output_path) == [['index'], ['0'], ['1'], ['2']] with LastCellResumer(output_path, 'index') as resumer: writer = Writer(resumer, ['index']) assert resumer.get_state() == '2' n = resumer.pop_state() assert n == '2' assert resumer.pop_state() is None for i in stream(int(n) + 1): writer.writerow([i]) assert collect_csv(output_path) == [['index'], ['0'], ['1'], ['2'], ['3'], ['4'], ['5']]
def test_resumable(self, tmpdir): log = defaultdict(list) def listener(name, row): log[name].append(list(row)) output_path = str(tmpdir.join('./enriched_resumable.csv')) resumer = RowCountResumer(output_path, listener=listener) with open('./test/resources/people.csv') as f, resumer: enricher = casanova.enricher( f, resumer, add=('x2',), keep=('name',) ) row = next(iter(enricher)) enricher.writerow(row, [2]) assert collect_csv(output_path) == [ ['name', 'x2'], ['John', '2'] ] with open('./test/resources/people.csv') as f, resumer: enricher = casanova.enricher( f, resumer, add=('x2',), keep=('name',) ) for i, row in enumerate(enricher): enricher.writerow(row, [(i + 2) * 2]) assert collect_csv(output_path) == [ ['name', 'x2'], ['John', '2'], ['Mary', '4'], ['Julia', '6'] ] assert log == { 'output.row': [['John', '2']], 'input.row': [['John', 'Matthews']] }
def test_batch_enricher(self, tmpdir): output_path = str(tmpdir.join('./enriched.csv')) with open('./test/resources/people.csv') as f, \ open(output_path, 'w', newline='') as of: enricher = casanova.batch_enricher(f, of, add=('color',), keep=('surname',)) for row in enricher: enricher.writebatch(row, [['blue'], ['red']], cursor='next') enricher.writebatch(row, [['purple'], ['cyan']]) assert collect_csv(output_path) == [ ['surname', 'cursor', 'color'], ['Matthews', '', 'blue'], ['Matthews', 'next', 'red'], ['Matthews', '', 'purple'], ['Matthews', 'end', 'cyan'], ['Sue', '', 'blue'], ['Sue', 'next', 'red'], ['Sue', '', 'purple'], ['Sue', 'end', 'cyan'], ['Stone', '', 'blue'], ['Stone', 'next', 'red'], ['Stone', '', 'purple'], ['Stone', 'end', 'cyan'] ]
def test_threadsafe_resuming_soundness(self, tmpdir): output_path = str(tmpdir.join('./threadsafe_resuming_soundness.csv')) with open('./test/resources/more_people.csv') as f, open(output_path, 'w', newline='') as of: enricher = casanova.threadsafe_enricher(f, of) for index, row in enricher: enricher.writerow(index, row) if index >= 2: break resumer = ThreadSafeResumer(output_path) with casanova.threadsafe_enricher('./test/resources/more_people.csv', resumer) as enricher, resumer: for index, row in enricher: enricher.writerow(index, row) assert collect_csv(output_path) == [ ['name', 'index'], ['John', '0'], ['Lisa', '1'], ['Mary', '2'], ['Alexander', '3'], ['Gary', '4'] ]
def test_threadsafe(self, tmpdir): def job(payload): i, row = payload s = int(row[2]) time.sleep(s * .01) return i, row output_path = str(tmpdir.join('./enriched_resumable_threadsafe.csv')) with open('./test/resources/people_unordered.csv') as f, \ open(output_path, 'w', newline='') as of: enricher = casanova.threadsafe_enricher( f, of, add=('x2',), keep=('name',) ) for i, row in imap_unordered(enricher, job, 3): enricher.writerow(i, row, [(i + 1) * 2]) def sort_output(o): return sorted(tuple(i) for i in o) assert sort_output(collect_csv(output_path)) == sort_output([ ['name', 'index', 'x2'], ['Mary', '1', '4'], ['Julia', '2', '6'], ['John', '0', '2'] ])
def test_basics(self): output = StringIO() writer = Writer(output, ['name', 'surname']) writer.writerow(['John', 'Cage']) writer.writerow(['Julia', 'Andrews']) assert collect_csv(output) == [['name', 'surname'], ['John', 'Cage'], ['Julia', 'Andrews']]
def test_dialect(self, tmpdir): output_path = str(tmpdir.join('./enriched.csv')) with open('./test/resources/semicolons.csv') as f, \ open(output_path, 'w', newline='') as of: enricher = casanova.enricher(f, of, add=('line',), delimiter=';') for i, row in enumerate(enricher): enricher.writerow(row, [i]) assert collect_csv(output_path) == [ ['name', 'surname', 'line'], ['Rose', 'Philips', '0'], ['Luke', 'Atman', '1'] ]
def test_keep(self, tmpdir): output_path = str(tmpdir.join('./enriched_keep.csv')) with open('./test/resources/people.csv') as f, \ open(output_path, 'w', newline='') as of: enricher = casanova.enricher(f, of, keep=('name',), add=('line',)) for i, row in enumerate(enricher): enricher.writerow(row, [i]) assert collect_csv(output_path) == [ ['name', 'line'], ['John', '0'], ['Mary', '1'], ['Julia', '2'] ]