def test_get_by_place_id(sites_csv_text): to_patch = "court_scraper.sites_meta.SitesMeta._get_sites_csv_text" with patch(to_patch) as mock_method: mock_method.return_value = sites_csv_text meta = SitesMeta() row = meta.get("ca_san_mateo") assert row["site_type"] == "odyssey"
def test_get_by_place_id(sites_csv_text): to_patch = 'court_scraper.sites_meta.SitesMeta._get_sites_csv_text' with patch(to_patch) as mock_method: mock_method.return_value = sites_csv_text meta = SitesMeta() row = meta.get('ca_san_mateo') assert row['site_type'] == 'odyssey'
def test_url_lookup_by_state_county(sites_csv_text): to_patch = "court_scraper.sites_meta.SitesMeta._get_sites_csv_text" with patch(to_patch) as mock_method: mock_method.return_value = sites_csv_text meta = SitesMeta() actual = meta.get_url(state="ga", county="dekalb") expected = "https://ody.dekalbcountyga.gov/portal/Home/Dashboard/29" assert actual == expected
def test_url_lookup_by_state_county(sites_csv_text): to_patch = 'court_scraper.sites_meta.SitesMeta._get_sites_csv_text' with patch(to_patch) as mock_method: mock_method.return_value = sites_csv_text s = SitesMeta() actual = s.get_url(state='ga', county='dekalb') expected = 'https://ody.dekalbcountyga.gov/portal/Home/Dashboard/29' assert actual == expected
def test_url_lookup_multiname_county(sites_csv_text): to_patch = 'court_scraper.sites_meta.SitesMeta._get_sites_csv_text' with patch(to_patch) as mock_method: mock_method.return_value = sites_csv_text meta = SitesMeta() actual = meta.get_url(state='ca', county='san mateo') expected = 'https://odyportal-ext.sanmateocourt.org/Portal-External/Home/Dashboard/29' assert actual == expected
def _get_runner(place_id): # Site types for one-off scrapers should live in the scrapers # namespace in a module named by state and county, e.g. ny_westchester. # Platform site classes should live in platforms namespace # in a snake_case module (e.g. odyssey). # In both cases, sites_meta.csv should specify the module name # in the site_type field as a snake_case value (ny_westchester, odyssey). meta = SitesMeta() site_type = meta.get(place_id)["site_type"] if place_id == site_type: parent_mod = "scrapers" else: parent_mod = "platforms" target_module = "court_scraper.{}.{}.runner".format(parent_mod, site_type) mod = importlib.import_module(target_module) return getattr(mod, "Runner")
def get_site_meta(cls, place_id): sm = SitesMeta() state = place_id[0:2] county = place_id[3:].replace('_', ' ').strip() key = (state, county) site_info = sm.data[key] cls._site_meta = site_info return cls._site_meta
def info(): msg = "\nAvailable scrapers:\n\n" meta = SitesMeta() for state, county in meta.data.keys(): entry = " * {} - {} ({})\n".format(state.upper(), county.title(), '_'.join((state, county))) msg += entry end_note = "\nNOTE: Scraper IDs (in parentheses) should be " +\ "used with the search command's --place-id argument." msg += end_note click.echo(msg)
def site_meta(self): try: return self._site_meta except AttributeError: sm = SitesMeta() state = self.place_id[0:2] county = self.place_id[3:].replace("_", " ").strip() key = (state, county) site_info = sm.data[key] self._site_meta = site_info return self._site_meta