def fetch(fmgr): """ Fetch verdicts. """ crawler = FJUDCrawler() while fmgr.has_fjud_record(): rec = fmgr.pop_fjud_record() crawler.fetch(court=rec.court, needle=rec.needle, year=rec.get_start_year()) fmgr.remove_fjud_record(rec) print("Sleep 60s for polite interaction with the server.") time.sleep(1*60) crawler.quit()
class FJUDCrawlerTest(unittest.TestCase): def setUp(self): self.crawler = FJUDCrawler(enable_unittest=True) self.crawler.driver = WebdriverStub() def tearDown(self): self.crawler.quit() def test_is_already_written(self): test_dir = os.path.abspath(os.path.join(__file__, "..")) self.assertFalse(fjud.is_already_written(ur"89,台上,2581", test_dir)) self.assertTrue(fjud.is_already_written(ur"query_result.html", test_dir)) self.assertFalse(fjud.is_already_written(ur"query_result.html")) def test_save_as(self): output_path = fjud.get_default_output_path() serial = ur"89,台上,2581____test_only" fjud.save_as(serial, "test only") output_file = os.path.join(output_path, fjud.normalize_serial(serial)) self.assertTrue(os.path.isfile(output_file)) os.unlink(output_file) self.assertFalse(os.path.isfile(output_file)) def test_get_courts(self): all_courts = fjud.get_courts() self.assertTrue(len(all_courts) > 0) def test_normalize(self): serial = ur"89,台上,2581" self.assertEqual( serial, fjud.denormalize_serial(fjud.normalize_serial(serial))) @unittest.skip("disable temporarily") def disable_test_query(self): crawler = FJUDCrawler() ret = crawler.query() with file("ret.txt", "w") as fp: fp.write(ret) print ret
def main(): """ Search for bad verdicts and download them again. """ # If <pre> exists, the file has verdict text in it. serials = files_without_word(u"<pre>") for serial in serials: serial = serial.decode("utf-8") crawler = FJUDCrawler() if u"台上" in serial: crawler.fetch(court=u"最高法院", serial=serial, overwrite=True) else: crawler.fetch(court=u"臺灣高等法院", serial=serial, overwrite=True) crawler.quit() time.sleep(3)
def setUp(self): self.crawler = FJUDCrawler(enable_unittest=True) self.crawler.driver = WebdriverStub()
def disable_test_query(self): crawler = FJUDCrawler() ret = crawler.query() with file("ret.txt", "w") as fp: fp.write(ret) print ret