def test_execute_skipping(tmp_path: Path, caplog: LogCaptureFixture) -> None: batch_file = tmp_path / "batch.jsonl" results_dir = tmp_path / "out" # Execute request for the first time. batch = Batch() batch.append(Search("trump", max_tweets=50)) batch.dump(batch_file) assert batch.execute(results_dir) _assert_results_dir_structure(results_dir, list(batch)) batch_entry = batch[0] meta_file = results_dir / batch_entry.meta_file_name data_file = results_dir / batch_entry.data_file_name meta_stat1 = meta_file.stat() data_stat1 = data_file.stat() # Execute same request again (should be skipped). batch = Batch( ) # Recreate from dumped batch file so that batch entry IDs match. batch.load(batch_file) caplog.clear() assert batch.execute(results_dir) assert 1 == len( [record for record in caplog.records if "Skipping" in record.msg]) _assert_results_dir_structure(results_dir, list(batch)) meta_stat2 = meta_file.stat() data_stat2 = data_file.stat() # Verify that files were not modified. assert meta_stat1.st_mtime_ns == meta_stat2.st_mtime_ns assert data_stat1.st_mtime_ns == data_stat2.st_mtime_ns
def run(self) -> None: request = self._build_request() if self.to_batch: batch = Batch() if self.to_batch.exists(): batch.load(self.to_batch) self._batch_submit(batch, request) batch.dump(self.to_batch) else: for tweet in request.request(): sys.stdout.write(json.dumps(tweet.to_json()) + "\n")
def test_dump_load_single(request_: Request, tmp_path: Path) -> None: batch_file = tmp_path / "batch.jsonl" batch = Batch() batch.append(request_) batch.dump(batch_file) lines = list(read_lines_file(batch_file)) assert 1 == len(lines) assert 0 != len(lines[0]) batch2 = Batch() batch2.load(batch_file) assert list(batch) == list(batch2)
def test_dump_load_multiple(num_batch_entries: int, tmp_path: Path) -> None: batch_file = tmp_path / "batch.jsonl" batch = Batch() for i in range(1, num_batch_entries + 1): batch.append(Search(str(i), max_tweets=i, batch_size=i)) batch.dump(batch_file) lines = list(read_lines_file(batch_file)) assert num_batch_entries == len(lines) for line in lines: assert 0 != len(line) batch2 = Batch() batch2.load(batch_file) assert list(batch) == list(batch2)
def test_correct_call_to_batch_exists( old_request: Request, new_request: Request, capsys: CaptureFixture, tmp_path: Path, ) -> None: batch_file = tmp_path / "batch.jsonl" batch = Batch() batch.append(old_request) batch.dump(batch_file) main(*_make_args(new_request, to_batch=batch_file)) assert capsys.readouterr().out == "" batch = Batch() batch.load(batch_file) assert len(batch) == 2 for batch_entry, expected_request in zip(batch, [old_request, new_request]): assert batch_entry.request == expected_request assert batch_entry.id assert batch_entry.completed_at is None assert batch_entry.exception is None