def test_init(): try: parse_pdf( pdf=open("tests/data/CourtSummaryReport.pdf", "rb"), tempdir="tests/data/tmp") except: pytest.fail("Creating Summary object failed.")
def test_get_cases(): summary = parse_pdf( pdf="tests/data/CourtSummaryReport.pdf", tempdir="tests/data/tmp") assert len(summary.get_cases()) > 0 assert len(summary.get_cases()) > 0 assert isinstance(summary.get_cases()[0], Case)
def test_get_defendant(): summary = parse_pdf( pdf="tests/data/CourtSummaryReport.pdf", tempdir="tests/data/tmp") assert len(summary.get_defendant().first_name) > 0 assert len(summary.get_defendant().last_name) > 0 assert summary.get_defendant().date_of_birth > date(1900, 1, 1)
def test_add_summary_to_crecord(): summary = parse_pdf(pdf="tests/data/CourtSummaryReport.pdf", tempdir="tests/data/tmp") rec = CRecord(Person("John", "Smith", date(1998, 1, 1))) rec.add_summary(summary, override_person=True) assert len(rec.person.first_name) > 0 assert rec.person.first_name != "John"
def test_get_arrest_date(): summary = parse_pdf( pdf=open("tests/data/CourtSummaryReport.pdf", "rb"), tempdir="tests/data/tmp") cases = summary.get_cases() # There's not a standard example summary pdf to run tests on, so can't assume much about the contents of # the summary being parsed here. # In the summary being parsed, an arrest date might be missing from a case, # but its unlikely there's _no_ case with an arrest date. # If you're testing this on a summary that has no arrest dates ... # find a different summary to use for testing. arrest_dates = [case.arrest_date for case in cases if case.arrest_date is not None]
def test_get_sentences(): summary = parse_pdf( pdf="tests/data/CourtSummaryReport.pdf", tempdir="tests/data/tmp") cases = summary.get_cases() for case in cases: for charge in case.charges: for sentence in charge.sentences: try: assert (isinstance(sentence.sentence_length.max_time, timedelta) or sentence.sentence_length.max_time is None) except: pytest.fail("Could not get sentence from charge.")
def test_bulk_parse_pdf_from_path(caplog): caplog.set_level(logging.INFO) paths = os.listdir("tests/data/summaries") if len(paths) == 0: pytest.fail("No summaries to parse in /tests/data/summaries.") fails = [] logging.info("Successful parses:") for path in paths: try: summary = parse_pdf(pdf=os.path.join(f"tests/data/summaries", path), tempdir="tests/data/tmp") logging.info(path) except: print(path) fails.append(os.path.split(path)[1]) if len(fails) > 0: logging.error(f"{ len(fails) } / {len(paths)} summaries failed to parse:") for fail in fails: logging.error(f" - {fail}") pytest.fail("Summaries failed to parse.")
def put(self, request, *args, **kwargs): """ Accept a CRecord and a set of SourceRecords. Incorporate the information that the SourceRecords contain into the CRecord. TODO this should replace FileUpload view. """ try: serializer = IntegrateSourcesSerializer(data=request.data) if serializer.is_valid(): crecord = CRecord.from_dict( serializer.validated_data["crecord"]) for source_record_data in serializer.validated_data[ "source_records"]: source_record = SourceRecord.objects.get( id=source_record_data["id"]) if source_record.record_type == SourceRecord.RecTypes.SUMMARY_PDF: summary = parse_pdf(source_record.file.path) crecord.add_summary( summary, case_merge_strategy="overwrite_old", override_person=True) elif source_record.record_type == SourceRecord.RecTypes.DOCKET_PDF: docket, errs = Docket.from_pdf(source_record.file.path) crecord.add_docket(docket) else: logger.error( f"Cannot parse a source record with type {source_record.record_type}" ) return Response({'crecord': CRecordSerializer(crecord).data}, status=status.HTTP_200_OK) else: return Response({"errors": serializer.errors}, status=status.HTTP_400_BAD_REQUEST) except Exception as err: return Response({"errors": [str(err)]}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
def dir(directory, archive, expungement_template, sealing_template, atty_name, atty_org, atty_org_addr, atty_org_phone, atty_bar_id, tempdir): if not os.path.exists(directory): print(f"The directory {directory} does not exist.") return files = [ os.path.join(directory, f) for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f)) ] summaries = [] dockets = [] atty = Attorney(name=atty_name, organization=atty_org, organization_address=atty_org_addr, organization_phone=atty_org_phone, bar_id=atty_bar_id) for f in files: print(f" Processing {f}") try: dk = Docket.from_pdf(f, tempdir=tempdir) print(f" It looks like {f} is a docket.") dockets.append(dk) except: try: sm = parse_pdf(f, tempdir=tempdir) print(f" It looks like {f} is a summary.") summaries.append(sm) except: print(f" It seems {f} is neither a summary nor a docket.") crec = CRecord() [crec.add_summary(summary) for summary in summaries] [crec.add_docket(docket) for docket in dockets] analysis = (Analysis(crec).rule(expunge_deceased).rule( expunge_over_70).rule(expunge_nonconvictions).rule( expunge_summary_convictions).rule(seal_convictions)) petitions = [ petition for decision in analysis.decisions for petition in decision.value ] for petition in petitions: petition.attorney = atty with open(sealing_template, "rb") as doc: for petition in petitions: if petition.petition_type == "Sealing": petition.set_template(doc) with open(expungement_template, "rb") as doc: for petition in petitions: if petition.petition_type == "Expungement": petition.set_template(doc) petition_tuples = [] for pt in petitions: petition_tuples.append((pt.file_name(), pt.render())) pkg = Compressor(archive, petition_tuples, tempdir=tempdir) pkg.save() print("*********************************") print("****** COMPLETE *****************") print("*********************************")
def test_parse_pdf_from_path(): summary = parse_pdf( pdf="tests/data/CourtSummaryReport.pdf", tempdir="tests/data/tmp") assert len(summary.get_cases()) > 0