def update_docket_info_iquery(self, d_pk): cookies = get_or_cache_pacer_cookies( "pacer_scraper", settings.PACER_USERNAME, password=settings.PACER_PASSWORD, ) s = PacerSession( cookies=cookies, username=settings.PACER_USERNAME, password=settings.PACER_PASSWORD, ) d = Docket.objects.get(pk=d_pk) report = CaseQuery(map_cl_to_pacer_id(d.court_id), s) try: report.query(d.pacer_case_id) except (requests.Timeout, requests.RequestException) as exc: logger.warning( "Timeout or unknown RequestException on iquery crawl. " "Trying again if retries not exceeded." ) if self.request.retries == self.max_retries: return raise self.retry(exc=exc) d = update_docket_metadata(d, report.data) d.save() add_bankruptcy_data_to_docket(d, report.data) add_items_to_solr([d.pk], "search.Docket")
class PacerCaseQueryTest(unittest.TestCase): """A test of basic info for the Case Query""" def setUp(self): self.session = get_pacer_session() self.session.login() self.report = CaseQuery("cand", self.session) self.pacer_case_id = "186730" # 4:06-cv-07294 Foley v. Bates @SKIP_IF_NO_PACER_LOGIN def test_query(self): """Can we get the basic info?""" self.report.query(self.pacer_case_id) self.assertIn( "Foley v. Bates", self.report.response.text, msg="Super basic query failed", ) metadata = self.report.metadata self.assertIn( "Foley v. Bates et al", self.report.metadata["case_name_raw"], msg="case_name_raw query failed", ) self.assertEqual( date(2007, 11, 29), self.report.metadata["date_last_filing"], msg="date_last_filing query failed", ) self.assertEqual( date(2007, 5, 7), self.report.metadata["date_terminated"], msg="date_terminated query failed", ) self.assertEqual( date(2006, 11, 27), self.report.metadata["date_filed"], msg="date_filed query failed", )
def update_docket_info_iquery(self, d_pk: int, court_id: str) -> None: """Update the docket info from iquery :param self: The Celery task :param d_pk: The ID of the docket :param court_id: The court of the docket. Needed for throttling by court. :return: None """ cookies = get_or_cache_pacer_cookies( "pacer_scraper", settings.PACER_USERNAME, password=settings.PACER_PASSWORD, ) s = PacerSession( cookies=cookies, username=settings.PACER_USERNAME, password=settings.PACER_PASSWORD, ) d = Docket.objects.get(pk=d_pk, court_id=court_id) report = CaseQuery(map_cl_to_pacer_id(d.court_id), s) try: report.query(d.pacer_case_id) except (requests.Timeout, requests.RequestException) as exc: logger.warning( "Timeout or unknown RequestException on iquery crawl. " "Trying again if retries not exceeded." ) if self.request.retries == self.max_retries: return raise self.retry(exc=exc) if not report.data: return save_iquery_to_docket( self, report.data, d, tag_names=None, add_to_solr=True, )
def do_case_query_by_pacer_case_id(self, data, court_id, cookies, tag_names=None): """Run a case query (iquery.pl) query on a case and save the data :param data: A dict containing at least the following: { 'pacer_case_id': The internal pacer case ID for the item. } :param court_id: A courtlistener court ID :param cookies: A requests.cookies.RequestsCookieJar with the cookies of a logged-in PACER user. :param tag_names: A list of tag names to associate with the docket when saving it in the DB. :return: A dict with the pacer_case_id and docket_pk values. """ s = PacerSession(cookies=cookies) if data is None: logger.info("Empty data argument. Terminating " "chains and exiting.") self.request.callbacks = None return pacer_case_id = data.get('pacer_case_id') report = CaseQuery(map_cl_to_pacer_id(court_id), s) logger.info("Querying docket report %s.%s" % (court_id, pacer_case_id)) try: d = Docket.objects.get( pacer_case_id=pacer_case_id, court_id=court_id, ) except Docket.DoesNotExist: d = None except Docket.MultipleObjectsReturned: d = None report.query(pacer_case_id) docket_data = report.data logger.info("Querying and parsing complete for %s.%s" % (court_id, pacer_case_id)) if not docket_data: logger.info("No valid docket data for %s.%s", court_id, pacer_case_id) self.request.callbacks = None return # Merge the contents into CL. if d is None: d, count = find_docket_object(court_id, pacer_case_id, docket_data['docket_number']) if count > 1: d = d.earliest('date_created') add_recap_source(d) update_docket_metadata(d, docket_data) d.save() tags = [] if tag_names is not None: for tag_name in tag_names: tag, _ = Tag.objects.get_or_create(name=tag_name) tag.tag_object(d) tags.append(tag) # Add the HTML to the docket in case we need it someday. pacer_file = PacerHtmlFiles(content_object=d, upload_type=UPLOAD_TYPE.CASE_REPORT_PAGE) pacer_file.filepath.save( 'case_report.html', # We only care about the ext w/UUIDFileSystemStorage ContentFile(report.response.text), ) logger.info("Created/updated docket: %s" % d) return { 'pacer_case_id': pacer_case_id, 'docket_pk': d.pk, }