Пример #1
0
def test_update():
    data = {"foo": "bar", "case_num": 1}
    data2 = {"baz": "bang"}
    case_info = CaseInfo(data)
    case_info.update(data2)
    assert case_info.data["baz"] == "bang"
    assert case_info.baz == "bang"
Пример #2
0
def test_update():
    data = {'foo': 'bar', 'case_num': 1}
    data2 = {'baz': 'bang'}
    case_info = CaseInfo(data)
    case_info.update(data2)
    assert case_info.data['baz'] == 'bang'
    assert case_info.baz == 'bang'
Пример #3
0
def test_merge():
    data = {"foo": "bar", "case_num": 1}
    data2 = {"baz": "bang", "case_num": 1}
    case_info = CaseInfo(data)
    case_info2 = CaseInfo(data2)
    case_info.merge(case_info2)
    assert case_info.case_num == 1
    assert case_info.foo == "bar"
    assert case_info.baz == "bang"
Пример #4
0
def test_merge():
    data = {'foo': 'bar', 'case_num': 1}
    data2 = {'baz': 'bang', 'case_num': 1}
    case_info = CaseInfo(data)
    case_info2 = CaseInfo(data2)
    case_info.merge(case_info2)
    assert case_info.case_num == 1
    assert case_info.foo == 'bar'
    assert case_info.baz == 'bang'
Пример #5
0
    def results(self):
        """Data from Search Results page

        Returns:

            List of CaseInfo instances
        """
        # Search results contain an entry for every party
        # to a case, so we need to deduplicate
        results = {}
        # Only grab result rows (i.e. skip header)
        for row in self.soup.table.find_all("tr", class_="resultTableRow"):
            case_id_cell, filing_date, case_name, found_party = row.find_all("td")
            case_id = case_id_cell.a.text.strip()
            try:
                case_info = results[case_id]
            except KeyError:
                data = {
                    "place_id": self.place_id,
                    "number": case_id,
                    "filing_date": filing_date.text.strip(),
                    "name": case_name.text.strip(),
                    "parties": [],
                }
                case_info = CaseInfo(data)
                results[case_id] = case_info
            case_info.parties.append(found_party.text.strip())
        return list(results.values())
Пример #6
0
def test_attribute_mapping():
    mapping = {"case_num": "number"}
    data = {"foo": "bar", "case_num": "1"}
    CaseInfo._map = mapping
    ci = CaseInfo(data)
    assert hasattr(ci, "case_num") is False
    assert ci.number == "1"
    assert ci.foo == "bar"
Пример #7
0
def test_attribute_mapping():
    mapping = {'case_num': 'number'}
    data = {'foo': 'bar', 'case_num': '1'}
    CaseInfo._map = mapping
    ci = CaseInfo(data)
    assert hasattr(ci, 'case_num') is False
    assert ci.number == '1'
    assert ci.foo == 'bar'
Пример #8
0
 def search(self, case_numbers=[]):
     results = []
     for case_number in case_numbers:
         page = CaseDetailPage(self.place_id, case_number)
         # Prepare CaseInfo class instances
         # for any valid case detail pages
         data = {"place_id": self.place_id}
         data.update(page.data)
         case = CaseInfo(data)
         results.append(case)
     return results
Пример #9
0
    def _get_by_case_number(self, case_number) -> CaseInfo:
        """
        Scrapes the data for the provided case number.

        Returns a CaseInfo object ready to be archived.
        """
        # Open the homepage
        home_page = HomePage(self.driver)
        home_page.open()

        # Open the search selection page
        home_page.start_search()
        search_selection_page = SearchSelectionPage(self.driver)

        # Open the trial court search page
        search_selection_page.open_trial_court_search()

        # Switch to the case number search tab
        search_trial_court_page = SearchTrialCourtPage(self.driver)
        search_trial_court_page.open_case_number_search_tab()

        # Parse the the id and type out of the case number
        case_dict = parsers.case_numbers.parse(case_number)

        # Convert our place_id into the county id that the form expects
        county_dict = parsers.counties.parse(self.place_id)

        # Search for the case
        search_trial_court_page.search_for_case_by_number(
            county_dict['id'], case_dict['type_id'], case_dict['id'])

        # Open the case detail page
        search_results_page = SearchResultsPage(self.driver)
        search_results_page.open_case_detail_page(case_number)

        # Parse the case detail page
        case_detail_page = CaseDetailPage(self.driver)
        case_detail_page.open()

        # Parse the case detail page
        obj = CaseInfo({
            'place_id': self.place_id,
            'number': case_number,
            'page_source': self.driver.page_source,
            'url': case_detail_page.url
        })

        # Return the result
        return obj
Пример #10
0
 def _extract_case_data(self, table):
     # Get section header
     data = []
     # Section headers precede table tag and
     # contain generic case types, e.g. 'Civil Misc. (CV)'
     case_type = table.find_previous('font').text.strip()
     for row in table.find_all('tr'):
         # Get case data
         cell1, cell2 = row.find_all('td')
         row_data = {
             'place_id': self.place_id,
             'type_short': case_type,
             'number': cell1.a.text.strip(),
             'parties_short': cell2.text.strip(),
         }
         case_info = CaseInfo(row_data)
         data.append(case_info)
     return data
 def _extract_case_data(self, table):
     # Get section header
     data = []
     # Section headers precede table tag and
     # contain generic case types, e.g. 'Civil Misc. (CV)'
     case_type = table.find_previous("font").text.strip()
     for row in table.find_all("tr"):
         # Get case data
         cell1, cell2 = row.find_all("td")
         row_data = {
             "place_id": self.place_id,
             "type_short": case_type,
             "number": cell1.a.text.strip(),
             "parties_short": cell2.text.strip(),
         }
         case_info = CaseInfo(row_data)
         data.append(case_info)
     return data
Пример #12
0
def test_standardized_data():
    mapping = {
        'case_num': 'number',
    }
    data = {
        'place_id': 'ga_dekalb',
        'case_num': '1',
        'status': 'Open',
        'foo': 'bar',
    }
    # Number should be standardized,
    # and foo should not appear
    expected = {
        'place_id': 'ga_dekalb',
        'number': '1',
        'status': 'Open',
    }
    CaseInfo._map = mapping
    ci = CaseInfo(data)
    assert ci.standard_data == expected
Пример #13
0
def test_standardized_data():
    mapping = {
        "case_num": "number",
    }
    data = {
        "place_id": "ga_dekalb",
        "case_num": "1",
        "status": "Open",
        "foo": "bar",
    }
    # Number should be standardized,
    # and foo should not appear
    expected = {
        "place_id": "ga_dekalb",
        "number": "1",
        "status": "Open",
        "filing_date": None,
    }
    CaseInfo._map = mapping
    ci = CaseInfo(data)
    assert ci.standard_data == expected
Пример #14
0
def test_scraper_caching(court_scraper_dir, monkeypatch):
    data = [
        CaseInfo({
            'number': '20A123',
            'status': 'Open',
            'page_source': '<html>foo</html>'
        })
    ]
    # Need to monkeypatch because Configs class is instanstiated
    # in global scope of cli.py, and the import at top of this
    # test file executes cli.py before this test runs (therefore
    # standard patching doesn't work b/c it occurs too late)
    monkeypatch.setattr(cli.configs, 'cache_dir', court_scraper_dir)
    with patch('court_scraper.runner.Runner.search') as mock_method:
        mock_method.return_value = data
        runner = CliRunner()
        runner.invoke(cli.cli, ['search', '-p', 'ga_dekalb', '-s', '20A123'])
        cache_file = Path(court_scraper_dir)\
            .joinpath('cache/ga_dekalb/20A123.html')
        expected = data[0].page_source
        actual = file_contents(cache_file)
        assert expected == actual
            "download_dir": court_scraper_dir,
            "headless": True,
        }
        # Get the args and kwargs (2nd and 3rd items) from the
        # first call which is Site instantiation
        args, kwargs = site_class.mock_calls[0][1:]
        assert args == expected_args
        assert kwargs == expected_kwargs
        login_call, search_call = site_class.mock_calls[1:3]
        assert login_call == call().login(username, password)
        assert search_call == call().search(case_numbers=["foo"])


@pytest.mark.usefixtures("create_scraper_dir", "create_config")
def test_page_source_caching(court_scraper_dir, config_path):
    case = CaseInfo({"number": "20A123", "page_source": "<html>foo</html>"})
    r = Runner(court_scraper_dir, config_path, "ga_dekalb")
    # Supply CaseInfo instances in a list
    r.cache_detail_pages([case])
    cache_file = Path(court_scraper_dir).joinpath(
        "cache/ga_dekalb/20A123.html")
    actual = file_contents(cache_file)
    assert case.page_source == actual


@pytest.mark.usefixtures("create_scraper_dir", "create_config")
def test_multiword_county(court_scraper_dir, config_path):
    "Multiword counties should not raise errors"
    site_class = Mock(name="OdysseySite")
    to_patch = "court_scraper.platforms.odyssey.runner.Runner._get_site_class"
    with patch(to_patch) as mock_method:
Пример #16
0
        mock_method.return_value = site_class
        r = Runner(court_scraper_dir, config_path, 'ga_dekalb')
        r.search(search_terms=['foo'])
        username = '******'
        password = '******'
        expected_args = (
            'https://ody.dekalbcountyga.gov/portal/Home/Dashboard/29',
            court_scraper_dir,
        )
        expected_kwargs = {'headless': True}
        # Get the args and kwargs (2nd and 3rd items) from the
        # first call which is Site instantiation
        args, kwargs = site_class.mock_calls[0][1:]
        assert args == expected_args
        assert kwargs == expected_kwargs
        login_call, search_call = site_class.mock_calls[1:3]
        assert login_call == call().login(username, password)
        assert search_call == call().search(search_terms=['foo'])


@pytest.mark.usefixtures('create_scraper_dir', 'create_config')
def test_page_source_caching(court_scraper_dir, config_path):
    case = CaseInfo({'number': '20A123', 'page_source': '<html>foo</html>'})
    r = Runner(court_scraper_dir, config_path, 'ga_dekalb')
    # Supply CaseInfo instances in a list
    r.cache_detail_pages([case])
    cache_file = Path(court_scraper_dir)\
        .joinpath('cache/ga_dekalb/20A123.html')
    actual = file_contents(cache_file)
    assert case.page_source == actual