def test_update(): data = {"foo": "bar", "case_num": 1} data2 = {"baz": "bang"} case_info = CaseInfo(data) case_info.update(data2) assert case_info.data["baz"] == "bang" assert case_info.baz == "bang"
def test_update(): data = {'foo': 'bar', 'case_num': 1} data2 = {'baz': 'bang'} case_info = CaseInfo(data) case_info.update(data2) assert case_info.data['baz'] == 'bang' assert case_info.baz == 'bang'
def test_merge(): data = {"foo": "bar", "case_num": 1} data2 = {"baz": "bang", "case_num": 1} case_info = CaseInfo(data) case_info2 = CaseInfo(data2) case_info.merge(case_info2) assert case_info.case_num == 1 assert case_info.foo == "bar" assert case_info.baz == "bang"
def test_merge(): data = {'foo': 'bar', 'case_num': 1} data2 = {'baz': 'bang', 'case_num': 1} case_info = CaseInfo(data) case_info2 = CaseInfo(data2) case_info.merge(case_info2) assert case_info.case_num == 1 assert case_info.foo == 'bar' assert case_info.baz == 'bang'
def results(self): """Data from Search Results page Returns: List of CaseInfo instances """ # Search results contain an entry for every party # to a case, so we need to deduplicate results = {} # Only grab result rows (i.e. skip header) for row in self.soup.table.find_all("tr", class_="resultTableRow"): case_id_cell, filing_date, case_name, found_party = row.find_all("td") case_id = case_id_cell.a.text.strip() try: case_info = results[case_id] except KeyError: data = { "place_id": self.place_id, "number": case_id, "filing_date": filing_date.text.strip(), "name": case_name.text.strip(), "parties": [], } case_info = CaseInfo(data) results[case_id] = case_info case_info.parties.append(found_party.text.strip()) return list(results.values())
def test_attribute_mapping(): mapping = {"case_num": "number"} data = {"foo": "bar", "case_num": "1"} CaseInfo._map = mapping ci = CaseInfo(data) assert hasattr(ci, "case_num") is False assert ci.number == "1" assert ci.foo == "bar"
def test_attribute_mapping(): mapping = {'case_num': 'number'} data = {'foo': 'bar', 'case_num': '1'} CaseInfo._map = mapping ci = CaseInfo(data) assert hasattr(ci, 'case_num') is False assert ci.number == '1' assert ci.foo == 'bar'
def search(self, case_numbers=[]): results = [] for case_number in case_numbers: page = CaseDetailPage(self.place_id, case_number) # Prepare CaseInfo class instances # for any valid case detail pages data = {"place_id": self.place_id} data.update(page.data) case = CaseInfo(data) results.append(case) return results
def _get_by_case_number(self, case_number) -> CaseInfo: """ Scrapes the data for the provided case number. Returns a CaseInfo object ready to be archived. """ # Open the homepage home_page = HomePage(self.driver) home_page.open() # Open the search selection page home_page.start_search() search_selection_page = SearchSelectionPage(self.driver) # Open the trial court search page search_selection_page.open_trial_court_search() # Switch to the case number search tab search_trial_court_page = SearchTrialCourtPage(self.driver) search_trial_court_page.open_case_number_search_tab() # Parse the the id and type out of the case number case_dict = parsers.case_numbers.parse(case_number) # Convert our place_id into the county id that the form expects county_dict = parsers.counties.parse(self.place_id) # Search for the case search_trial_court_page.search_for_case_by_number( county_dict['id'], case_dict['type_id'], case_dict['id']) # Open the case detail page search_results_page = SearchResultsPage(self.driver) search_results_page.open_case_detail_page(case_number) # Parse the case detail page case_detail_page = CaseDetailPage(self.driver) case_detail_page.open() # Parse the case detail page obj = CaseInfo({ 'place_id': self.place_id, 'number': case_number, 'page_source': self.driver.page_source, 'url': case_detail_page.url }) # Return the result return obj
def _extract_case_data(self, table): # Get section header data = [] # Section headers precede table tag and # contain generic case types, e.g. 'Civil Misc. (CV)' case_type = table.find_previous('font').text.strip() for row in table.find_all('tr'): # Get case data cell1, cell2 = row.find_all('td') row_data = { 'place_id': self.place_id, 'type_short': case_type, 'number': cell1.a.text.strip(), 'parties_short': cell2.text.strip(), } case_info = CaseInfo(row_data) data.append(case_info) return data
def _extract_case_data(self, table): # Get section header data = [] # Section headers precede table tag and # contain generic case types, e.g. 'Civil Misc. (CV)' case_type = table.find_previous("font").text.strip() for row in table.find_all("tr"): # Get case data cell1, cell2 = row.find_all("td") row_data = { "place_id": self.place_id, "type_short": case_type, "number": cell1.a.text.strip(), "parties_short": cell2.text.strip(), } case_info = CaseInfo(row_data) data.append(case_info) return data
def test_standardized_data(): mapping = { 'case_num': 'number', } data = { 'place_id': 'ga_dekalb', 'case_num': '1', 'status': 'Open', 'foo': 'bar', } # Number should be standardized, # and foo should not appear expected = { 'place_id': 'ga_dekalb', 'number': '1', 'status': 'Open', } CaseInfo._map = mapping ci = CaseInfo(data) assert ci.standard_data == expected
def test_standardized_data(): mapping = { "case_num": "number", } data = { "place_id": "ga_dekalb", "case_num": "1", "status": "Open", "foo": "bar", } # Number should be standardized, # and foo should not appear expected = { "place_id": "ga_dekalb", "number": "1", "status": "Open", "filing_date": None, } CaseInfo._map = mapping ci = CaseInfo(data) assert ci.standard_data == expected
def test_scraper_caching(court_scraper_dir, monkeypatch): data = [ CaseInfo({ 'number': '20A123', 'status': 'Open', 'page_source': '<html>foo</html>' }) ] # Need to monkeypatch because Configs class is instanstiated # in global scope of cli.py, and the import at top of this # test file executes cli.py before this test runs (therefore # standard patching doesn't work b/c it occurs too late) monkeypatch.setattr(cli.configs, 'cache_dir', court_scraper_dir) with patch('court_scraper.runner.Runner.search') as mock_method: mock_method.return_value = data runner = CliRunner() runner.invoke(cli.cli, ['search', '-p', 'ga_dekalb', '-s', '20A123']) cache_file = Path(court_scraper_dir)\ .joinpath('cache/ga_dekalb/20A123.html') expected = data[0].page_source actual = file_contents(cache_file) assert expected == actual
"download_dir": court_scraper_dir, "headless": True, } # Get the args and kwargs (2nd and 3rd items) from the # first call which is Site instantiation args, kwargs = site_class.mock_calls[0][1:] assert args == expected_args assert kwargs == expected_kwargs login_call, search_call = site_class.mock_calls[1:3] assert login_call == call().login(username, password) assert search_call == call().search(case_numbers=["foo"]) @pytest.mark.usefixtures("create_scraper_dir", "create_config") def test_page_source_caching(court_scraper_dir, config_path): case = CaseInfo({"number": "20A123", "page_source": "<html>foo</html>"}) r = Runner(court_scraper_dir, config_path, "ga_dekalb") # Supply CaseInfo instances in a list r.cache_detail_pages([case]) cache_file = Path(court_scraper_dir).joinpath( "cache/ga_dekalb/20A123.html") actual = file_contents(cache_file) assert case.page_source == actual @pytest.mark.usefixtures("create_scraper_dir", "create_config") def test_multiword_county(court_scraper_dir, config_path): "Multiword counties should not raise errors" site_class = Mock(name="OdysseySite") to_patch = "court_scraper.platforms.odyssey.runner.Runner._get_site_class" with patch(to_patch) as mock_method:
mock_method.return_value = site_class r = Runner(court_scraper_dir, config_path, 'ga_dekalb') r.search(search_terms=['foo']) username = '******' password = '******' expected_args = ( 'https://ody.dekalbcountyga.gov/portal/Home/Dashboard/29', court_scraper_dir, ) expected_kwargs = {'headless': True} # Get the args and kwargs (2nd and 3rd items) from the # first call which is Site instantiation args, kwargs = site_class.mock_calls[0][1:] assert args == expected_args assert kwargs == expected_kwargs login_call, search_call = site_class.mock_calls[1:3] assert login_call == call().login(username, password) assert search_call == call().search(search_terms=['foo']) @pytest.mark.usefixtures('create_scraper_dir', 'create_config') def test_page_source_caching(court_scraper_dir, config_path): case = CaseInfo({'number': '20A123', 'page_source': '<html>foo</html>'}) r = Runner(court_scraper_dir, config_path, 'ga_dekalb') # Supply CaseInfo instances in a list r.cache_detail_pages([case]) cache_file = Path(court_scraper_dir)\ .joinpath('cache/ga_dekalb/20A123.html') actual = file_contents(cache_file) assert case.page_source == actual