def process_docket_data(d, filepath, report_type): """Process docket data file. :param d: A docket object to work on. :param filepath: The path to a saved HTML file containing docket or docket history report data. :param report_type: Whether it's a docket or a docket history report. """ from cl.recap.mergers import ( add_docket_entries, add_parties_and_attorneys, update_docket_appellate_metadata, update_docket_metadata, add_bankruptcy_data_to_docket, add_claims_to_docket, ) court_id = map_cl_to_pacer_id(d.court_id) if report_type == UPLOAD_TYPE.DOCKET: report = DocketReport(court_id) elif report_type == UPLOAD_TYPE.DOCKET_HISTORY_REPORT: report = DocketHistoryReport(court_id) elif report_type == UPLOAD_TYPE.APPELLATE_DOCKET: report = AppellateDocketReport(court_id) elif report_type == UPLOAD_TYPE.IA_XML_FILE: report = InternetArchive(court_id) elif report_type == UPLOAD_TYPE.CASE_REPORT_PAGE: report = CaseQuery(court_id) elif report_type == UPLOAD_TYPE.CLAIMS_REGISTER: report = ClaimsRegister(court_id) else: raise NotImplementedError("The report type with id '%s' is not yet " "supported. Perhaps you need to add it?" % report_type) with open(filepath, "r") as f: text = f.read().decode("utf-8") report._parse_text(text) data = report.data if data == {}: return None if report_type == UPLOAD_TYPE.CLAIMS_REGISTER: add_bankruptcy_data_to_docket(d, data) add_claims_to_docket(d, data["claims"]) else: update_docket_metadata(d, data) d, og_info = update_docket_appellate_metadata(d, data) if og_info is not None: og_info.save() d.originating_court_information = og_info d.save() if data.get("docket_entries"): add_docket_entries(d, data["docket_entries"]) if report_type in ( UPLOAD_TYPE.DOCKET, UPLOAD_TYPE.APPELLATE_DOCKET, UPLOAD_TYPE.IA_XML_FILE, ): add_parties_and_attorneys(d, data["parties"]) return d.pk
def process_recap_claims_register(self, pk): """Merge bankruptcy claims registry HTML into RECAP :param pk: The primary key of the processing queue item you want to work on :type pk: int :return: None :rtype: None """ pq = ProcessingQueue.objects.get(pk=pk) if pq.debug: # Proper debugging not supported on this endpoint. Just abort. mark_pq_successful(pq) self.request.chain = None return None mark_pq_status(pq, "", PROCESSING_STATUS.IN_PROGRESS) logger.info("Processing RECAP item (debug is: %s): %s" % (pq.debug, pq)) try: text = pq.filepath_local.read().decode("utf-8") except IOError as exc: msg = "Internal processing error (%s: %s)." % (exc.errno, exc.strerror) if (self.request.retries == self.max_retries) or pq.debug: mark_pq_status(pq, msg, PROCESSING_STATUS.FAILED) return None else: mark_pq_status(pq, msg, PROCESSING_STATUS.QUEUED_FOR_RETRY) raise self.retry(exc=exc) report = ClaimsRegister(map_cl_to_pacer_id(pq.court_id)) report._parse_text(text) data = report.data logger.info("Parsing completed for item %s" % pq) if not data: # Bad HTML msg = "Not a valid claims registry page or other parsing failure" mark_pq_status(pq, msg, PROCESSING_STATUS.INVALID_CONTENT) self.request.chain = None return None # Merge the contents of the docket into CL. d, docket_count = find_docket_object( pq.court_id, pq.pacer_case_id, data["docket_number"] ) if docket_count > 1: logger.info( "Found %s dockets during lookup. Choosing oldest." % docket_count ) d = d.earliest("date_created") # Merge the contents into CL d.add_recap_source() update_docket_metadata(d, data) try: d.save() except IntegrityError as exc: logger.warning( "Race condition experienced while attempting docket save." ) error_message = "Unable to save docket due to IntegrityError." if self.request.retries == self.max_retries: mark_pq_status(pq, error_message, PROCESSING_STATUS.FAILED) self.request.chain = None return None else: mark_pq_status( pq, error_message, PROCESSING_STATUS.QUEUED_FOR_RETRY ) raise self.retry(exc=exc) add_bankruptcy_data_to_docket(d, data) add_claims_to_docket(d, data["claims"]) logger.info("Created/updated claims data for %s", pq) # Add the HTML to the docket in case we need it someday. pacer_file = PacerHtmlFiles( content_object=d, upload_type=UPLOAD_TYPE.CLAIMS_REGISTER ) pacer_file.filepath.save( # We only care about the ext w/UUIDFileSystemStorage "claims_registry.html", ContentFile(text), ) mark_pq_successful(pq, d_id=d.pk) return {"docket_pk": d.pk}
def process_docket_data( d: Docket, report_type: int, filepath: str = None, ) -> Optional[int]: """Process docket data file. :param d: A docket object to work on. :param report_type: Whether it's a docket or a docket history report. :param filepath: A local path where the item can be found. If not provided, the filepath_local field of the docket object will be attempted. """ from cl.recap.mergers import ( add_bankruptcy_data_to_docket, add_claims_to_docket, add_docket_entries, add_parties_and_attorneys, update_docket_appellate_metadata, update_docket_metadata, ) court_id = map_cl_to_pacer_id(d.court_id) if report_type == UPLOAD_TYPE.DOCKET: report = DocketReport(court_id) elif report_type == UPLOAD_TYPE.DOCKET_HISTORY_REPORT: report = DocketHistoryReport(court_id) elif report_type == UPLOAD_TYPE.APPELLATE_DOCKET: report = AppellateDocketReport(court_id) elif report_type == UPLOAD_TYPE.IA_XML_FILE: report = InternetArchive(court_id) elif report_type == UPLOAD_TYPE.CASE_REPORT_PAGE: report = CaseQuery(court_id) elif report_type == UPLOAD_TYPE.CLAIMS_REGISTER: report = ClaimsRegister(court_id) else: raise NotImplementedError( "The report type with id '%s' is not yet " "supported. Perhaps you need to add it?" % report_type ) if filepath: with open(filepath, "r") as f: text = f.read() else: # This is an S3 path, so get it remotely. text = d.filepath_local.read().decode() report._parse_text(text) data = report.data if data == {}: return None if report_type == UPLOAD_TYPE.CLAIMS_REGISTER: add_bankruptcy_data_to_docket(d, data) add_claims_to_docket(d, data["claims"]) else: update_docket_metadata(d, data) d, og_info = update_docket_appellate_metadata(d, data) if og_info is not None: og_info.save() d.originating_court_information = og_info d.save() if data.get("docket_entries"): add_docket_entries(d, data["docket_entries"]) if report_type in ( UPLOAD_TYPE.DOCKET, UPLOAD_TYPE.APPELLATE_DOCKET, UPLOAD_TYPE.IA_XML_FILE, ): add_parties_and_attorneys(d, data["parties"]) return d.pk