Exemplo n.º 1
0
    def search_by_date(
        self,
        start_date=None,
        end_date=None,
        case_details=False,
        case_types=[],
        download_dir=None,
        headless=True,
    ):
        """
        Scrape case metadata and/or details by date ranges.

        Defaults to current day if no dates provided. Supports
        the ability to pass in additional query parameters supported
        by the Advanced Search in order to further limit search results.

        Args:

            start_date (str): start date in YYYY-MM-DD format (optional)
            end_date (str): end date in YYYY-MM-DD format (optional)
            case_details (boolean): Whether to scrape detailed case data. (optional; defaults to False)
            case_types (list<str>): One or more case type codes (optional)
            download_dir (str): Override Selenium download directory (defaults to standard court-scraper)
            headless (boolean): Run Selenium in headless mode for case detail searches (defaults to True)

        Returns:
            List of CaseInfo instances

        """
        if not start_date:
            start_date, end_date = self.current_day, self.current_day
        results = []
        county = self.place_id[3:]  # Clip the state prefix from place_id
        if case_details:
            results = self.search(
                start_date=start_date,
                end_date=end_date,
                case_types=case_types,
                download_dir=download_dir or self.get_download_dir(),
                headless=headless,
            )
        else:
            # Case metadata can be gathered using just Requests
            date_format = "%m-%d-%Y"
            dates = dates_for_range(start_date,
                                    end_date,
                                    output_format=date_format)
            for date_str in dates:
                api = SearchApi(county)
                extra_params = {}
                if case_types:
                    extra_params["caseType"] = ",".join(case_types)
                cases = api.search_by_filing_date(date_str, date_str,
                                                  extra_params)
                results.extend(cases)
        return results
Exemplo n.º 2
0
 def search(self,
            start_date,
            end_date,
            extra_params={},
            case_details=False):
     date_format = "%m/%d/%Y"
     dates = dates_for_range(start_date,
                             end_date,
                             output_format=date_format)
     search_results = []
     for date_str in dates:
         # Convert date_str to standard YYYY-MM-DD for upstream usage
         date_key = self._standardize_date(date_str, date_format,
                                           "%Y-%m-%d")
         # Always limit query to a single filing date, to minimize
         # chances of truncate results
         search_params = {
             "FiledDateL": date_str,  # start filing date - MM/DD/YYYY
             "FiledDateH": date_str,  # end filing date - MM/DD/YYYY
         }
         # Merge any additional query parameters
         search_params.update(extra_params)
         html, basic_case_data = self._run_search(search_params)
         # Skip if there were no results for date
         if not basic_case_data:
             continue
         # Warn if results were truncated
         if "results are limited to 500" in html:
             msg = (
                 "WARNING: Results were truncated for your search."
                 " Try using a more targeted query, e.g. with a case type, "
                 " to avoid losing records.")
             logger.warning(msg)
         if case_details:
             results = self._scrape_case_details(date_key, basic_case_data)
             search_results.extend(results)
         else:
             # Add the filing date to CaseInfo instances if it's only a metadata search
             # since it's not listed on results page
             for case in basic_case_data:
                 case.update({"filing_date": date_key})
                 search_results.append(case)
Exemplo n.º 3
0
 def search_by_date(self, county, start_date, end_date, case_types=[]):
     date_format = "%m-%d-%Y"
     dates = dates_for_range(start_date,
                             end_date,
                             output_format=date_format)
     payload = []
     for idx, day in enumerate(dates):
         self.go_to()  # advanced search page
         self._execute_date_search(county, day, day, case_types)
         if not self.search_has_results(self.driver.current_url):
             continue
         # Solve the captcha on the first search,
         # save the solution for re-use, and apply the solution
         # on the first case of the first day's search results
         # (using it on subsequent case detail API calls causes errors)
         result_kwargs = {"use_captcha_solution": False}
         if idx == 0:
             captcha_solution = self.solve_captcha()
             result_kwargs["use_captcha_solution"] = True
         # Searches that yield a single result redirect automatically
         # to case detail page rather than search results listing page.
         # For these cases, immediately execute the case detail query
         if "caseDetail" in self.driver.current_url:
             case_info = self._get_case_details(
                 county,
                 self.driver.current_url,
                 captcha_solution,
                 result_kwargs["use_captcha_solution"],
             )
             results = [case_info]
         else:
             results_page = SearchResultsPage(self.driver, county,
                                              self.captcha_api_key,
                                              captcha_solution)
             results = results_page.results.get(**result_kwargs)
         # TODO: if results_page.results_found():
         #    results_page.display_max_results()
         payload.extend(results)
     return payload
Exemplo n.º 4
0
 def search(self, start_date, end_date, case_details=False):
     date_format = "%m-%d-%y"
     dates = dates_for_range(start_date,
                             end_date,
                             output_format=date_format)
     search_results = []
     for date_str in dates:
         # Convert date_str to standard YYYY-MM-DD for upstream usage
         date_key = self._standardize_date(date_str, date_format,
                                           "%Y-%m-%d")
         basic_case_data = self._run_search_for_day(date_str)
         # Skip if there were no results for date
         if not basic_case_data:
             continue
         if case_details:
             results = self._scrape_case_details(date_key, basic_case_data)
             search_results.extend(results)
         else:
             # Add the filing date to CaseInfo instances if it's only a metadata search
             # since it's not listed on results page
             for case in basic_case_data:
                 case.update({'filing_date': date_key})
                 search_results.append(case)
Exemplo n.º 5
0
def test_dates_for_range(start, end, kwargs, expected):
    dates = dates_for_range(start, end, **kwargs)
    assert dates == expected