Пример #1
0
def test_add_summary_merge_strategies(example_summary):
    summary2 = copy.deepcopy(example_summary)
    summary2.get_cases()[0].otn = "a_different_otn"
    # default merge_strategy is to ignore new duplicates or
    # new Person
    rec = CRecord(Person("Dummy", "Name", None))
    rec.add_summary(example_summary)
    rec.add_summary(summary2)
    assert rec.cases[0].otn == example_summary.get_cases()[0].otn
    assert rec.person.first_name == "Dummy"

    # alternate merge strategy overwrites duplicates w/ new case
    # but doesn't touch the Person
    rec = CRecord(Person("Dummy", "Name", None))
    rec.add_summary(example_summary)
    rec.add_summary(summary2, case_merge_strategy="overwrite_old")
    assert rec.cases[0].otn == summary2.get_cases()[0].otn
    assert rec.person.first_name == "Dummy"

    # override_person param provides for overwriting the Person with the new summary's
    # Person
    rec = CRecord(Person("Dummy", "Name", None))
    rec.add_summary(example_summary)
    rec.add_summary(summary2, override_person=True)
    assert rec.cases[0].otn != summary2.get_cases()[0].otn
    assert rec.person.first_name == summary2.get_defendant().first_name
Пример #2
0
def test_init():
    dob = date(2010, 1, 1)
    person = Person(**{
        "first_name": "Joe",
        "last_name": "Smith",
        "date_of_birth": dob
    })
    rec = CRecord(**{"person": person})
    assert rec.person.first_name == "Joe"
    rec = CRecord(person=Person(
        first_name="Joan", last_name="Smythe", date_of_birth=dob))
    assert rec.person.last_name == "Smythe"
Пример #3
0
 def from_dict(dct: dict) -> Expungement:
     dct.update({
         "attorney": Attorney.from_dict(dct["attorney"]),
         "client": Person.from_dict(dct["client"]),
         "cases": [Case.from_dict(c) for c in dct["cases"]],
     })
     return Expungement(**dct)
Пример #4
0
def get_person(stree: etree) -> Person:
    """
    Extract a Person the xml of a docket, parsed into sections.

    Returns an empty Person object on errors.

    Args:
        stree: xml tree of a docket, parsed into a header and some number of sections
    
    Returns:
        a Person object
    """
    try:
        name = stree.xpath(
            "docket/header/caption/defendant_line")[0].text.strip()
        first_name, last_name = split_first_name(name)
    except IndexError:
        first_name = ""
        last_name = ""

    aliases = xpath_or_empty_list(stree, "//alias")
    date_of_birth = xpath_date_or_blank(stree, "//birth_date")
    return Person(
        first_name=first_name,
        last_name=last_name,
        date_of_birth=date_of_birth,
        aliases=aliases,
    )
Пример #5
0
def example_person(example_address):
    return Person(
        first_name="Jane",
        last_name="Smorp",
        aliases=["JSmo", "SmorpyJJ"],
        address=example_address,
        date_of_birth=date(2010, 1, 1),
        ssn="999-99-9999",
    )
Пример #6
0
def parse_person(txt: str) -> Tuple[Person, List[str]]:
    """
    Extract a Person from the text of a CP docket.
    """
    person = Person(first_name=None, last_name=None, date_of_birth=None)
    errs = []
    defendant_name, d_errs = find_pattern(
        "defendant_name",
        r"^Defendant\s+(?P<last_name>.*), (?P<first_name>.*)",
        txt,
        re.M,
    )
    if defendant_name is not None:
        person.first_name = defendant_name.group("first_name")
        person.last_name = defendant_name.group("last_name")
    else:
        errs.extend(d_errs)

    defendant_dob, dob_errs = find_pattern(
        "date_of_birth",
        r"Date Of Birth:?\s+(?P<date_of_birth>\d{1,2}\/\d{1,2}\/\d{4})",
        txt,
    )
    if defendant_dob is not None:
        person.date_of_birth = date_or_none(
            defendant_dob.group("date_of_birth"))
    else:
        errs.extend(dob_errs)

    defendant_info_section, d_section_errs = find_pattern(
        "defendant_info",
        r"DEFENDANT INFORMATION(?P<defendant_info>.*)\s+CASE PARTICIPANTS",
        txt,
        re.DOTALL,
    )
    if defendant_info_section is not None:
        defendant_info_text = defendant_info_section.group("defendant_info")
        alias_search, a_errs = find_pattern(
            "aliases", r"Alias Name\s*\n+(?P<aliases>(.+\s*\n*)*)",
            defendant_info_text)
        if alias_search is not None:
            person.aliases = [
                a.strip() for a in alias_search.group("aliases").split("\n")
                if len(a) > 0
            ]
        else:
            errs.extend(a_errs)

        addr_search, addr_errs = find_pattern(
            "address", r"City/State/Zip:\s*(?P<addr>.*)\s*",
            defendant_info_text)
        if addr_search is not None:
            person.address = Address(addr_search.group("addr"), "")
        else:
            errs.extend(addr_errs)
    else:
        errs.extend(d_section_errs)

    return person, errs
Пример #7
0
def get_defendant(summary_xml: etree.Element) -> Person:
    full_name = summary_xml.find("caption/defendant_name").text
    last_first = [n.strip() for n in full_name.split(",")]
    def_dob = summary_xml.find("caption/def_dob").text.strip()
    aliases = [el.text.strip() for el in summary_xml.xpath("//alias")]
    try:
        def_dob = datetime.strptime(def_dob, "%m/%d/%Y").date()
    except ValueError:
        def_dob = None
    return Person(last_first[1], last_first[0], def_dob, aliases=aliases)
Пример #8
0
def test_serializing_person(example_person):
    ser = to_serializable(example_person)
    assert ser["first_name"] == example_person.first_name
    assert ser["aliases"] == example_person.aliases

    deser = PersonSerializer(data=ser)
    assert deser.is_valid(), deser.error_messages
    deser = deser.validated_data
    deser = Person.from_dict(deser)
    assert isinstance(deser, Person)
    assert deser == example_person
Пример #9
0
def test_person():
    per = Person("John",
                 "Smeth",
                 date(2010, 1, 1),
                 date_of_death=date(2020, 1, 1),
                 aliases=["SmithGuy"],
                 ssn="999-99-9999",
                 address=Address(line_one="1234 Main St.",
                                 city_state_zip="Philadelphia, PA 19103"))
    assert per.first_name == "John"
    assert per.last_name == "Smeth"
    assert per.date_of_birth.year == 2010
    assert per.date_of_death.year == 2020
    assert per.aliases == ["SmithGuy"]
    assert per.ssn == "999-99-9999"
    assert per.address.line_one == "1234 Main St."
Пример #10
0
def test_person_todict():
    per = Person("John",
                 "Smeth",
                 date(2010, 1, 1),
                 aliases=["JJ", "Smelly"],
                 ssn="999-99-9999",
                 address=Address(line_one="1234 Main St.",
                                 city_state_zip="Philadelphia, PA 19103"))
    assert to_serializable(per) == {
        "first_name": "John",
        "last_name": "Smeth",
        "date_of_birth": date(2010, 1, 1).isoformat(),
        "aliases": ["JJ", "Smelly"],
        "ssn": "999-99-9999",
        "address": {
            "line_one": "1234 Main St.",
            "city_state_zip": "Philadelphia, PA 19103"
        }
    }
Пример #11
0
def test_person_from_dict(example_person):
    ser = to_serializable(example_person)
    per2 = Person.from_dict(ser)
    assert example_person.last_name == per2.last_name
    assert isinstance(per2.date_of_birth, date)
Пример #12
0
                )
                if resp.status_code != 200:
                    continue
                filename = os.path.join(td, case["docket_number"])
                with open(filename, "wb") as fp:
                    fp.write(resp.content)
                case[f"{source_type}_text"] = get_text_from_pdf(filename)
            if output_dir is not None:
                for doc in os.listdir(td):
                    shutil.copy(os.path.join(td, doc),
                                os.path.join(output_dir, doc))

    # Read the source records and integrate them into a CRecord
    # representing the person't full criminal record.
    sourcerecords = list()
    crecord = CRecord(person=Person(
        first_name=first_name, last_name=last_name, date_of_birth=dob))
    for case in search_results:
        parser = pick_pdf_parser(case["docket_number"])
        if parser is None:
            continue
        sr = SourceRecord(case["docket_sheet_text"], parser)
        sourcerecords.append(sr)
        crecord.add_sourcerecord(sr, case_merge_strategy="overwrite_old")

    logger.info("Built CRecord.")
    logger.info(f"   -time so far:{(datetime.now() - starttime).seconds}")
    # Create and Analysis using the CRecord. This Analysis will explain
    # what charges and cases are expungeable, what will be automatically sealed,
    # what could be sealed by petition.

    analysis = (Analysis(crecord).rule(rd.expunge_deceased).rule(
Пример #13
0
def test_add_docket(example_docket):
    rec = CRecord(Person("dummy", "name", None))
    rec.add_docket(example_docket)
    assert len(rec.cases) == 1
    assert rec.person.first_name != "dummy"
Пример #14
0
def test_add_empty_sourcerecord():
    rec = CRecord(Person("dummy", "name", None))
    sr = SourceRecord("anysource", parser=None)
    rec.add_sourcerecord(sr, override_person=True)
    assert len(rec.cases) == 0
    assert rec.person.first_name == "dummy"
Пример #15
0
def test_add_sourcerecord(example_sourcerecord):
    rec = CRecord(Person("dummy", "name", None))
    rec.add_sourcerecord(example_sourcerecord, override_person=True)
    assert len(rec.cases) == len(example_sourcerecord.cases)
    assert rec.person.first_name != "dummy"
Пример #16
0
def test_add_summary_doesnt_add_duplicates(example_summary):
    summary2 = copy.deepcopy(example_summary)
    rec = CRecord(Person("Dummy", "Name", None))
    rec.add_summary(example_summary)
    rec.add_summary(summary2)
    assert len(rec.cases) == len(example_summary.get_cases())
Пример #17
0
def test_add_summary_to_crecord():
    summary, _ = Summary.from_pdf(pdf="tests/data/CourtSummaryReport.pdf")
    rec = CRecord(Person("John", "Smith", date(1998, 1, 1)))
    rec.add_summary(summary, override_person=True)
    assert len(rec.person.first_name) > 0
    assert rec.person.first_name != "John"
Пример #18
0
def parse_mdj_pdf_text(txt: str) -> Tuple[Person, List[Case], List[str]]:
    """
    Parse MDJ docket, given the formatted text of the pdf.
    This function uses the original Expungement Generator's technique: regexes and nested loops, 
    iterating over the lines of the docket.

    see https://github.com/NateV/Expungement-Generator/blob/master/Expungement-Generator/Record.php:64

    """
    already_searched_aliases = False

    case_info = dict()
    case_info["charges"] = []
    person_info = dict()
    person_info["aliases"] = []

    lines = txt.split("\n")
    for idx, line in enumerate(lines):
        m = PATTERNS.mdj_district_number.search(line)
        if m:
            # what's the mdj district number for?
            case_info["mdj_district_number"] = m.group(1)

        m = PATTERNS.mdj_county_and_disposition.search(line)
        if m:
            case_info["county"] = m.group(1)
            case_info["disposition_date"] = m.group(2)

        m = PATTERNS.docket_number.search(line)
        if m:
            case_info["docket_number"] = m.group(1)

        m = PATTERNS.otn.search(line)
        if m:
            case_info["otn"] = m.group(1)

        m = PATTERNS.dc_number.search(line)
        if m:
            case_info["dc_num"] = m.group(1)

        m = PATTERNS.arrest_agency_and_date.search(line)
        if m:
            case_info["arresting_agency"] = m.group(1)
            try:
                case_info["arrest_date"] = m.group(2)
            except:
                pass

        m = PATTERNS.complaint_date.search(line)
        if m:
            case_info["complaint_date"] = m.group(1)

        m = PATTERNS.affiant.search(line)
        if m:
            # TODO - mdj docket parse should reverse order of names of affiant
            case_info["affiant"] = m.group(1)

        # MHollander said:
        #  the judge name can appear in multiple places.  Start by checking to see if the
        # judge's name appears in the Judge Assigned field.  If it does, then set it.
        # Later on, we'll check in the "Final Issuing Authority" field.  If it appears there
        # and doesn't show up as "migrated," we'll reassign the judge name.
        m = PATTERNS.judge_assigned.search(line)
        if m:
            judge = m.group(1).strip()
            next_line = lines[idx + 1]
            overflow_match = PATTERNS.judge_assigned_overflow.search(next_line)
            if overflow_match:
                judge = f"{judge} {overflow_match.group(1).strip()}"

            if "igrated" not in judge:
                case_info["judge"] = judge

        m = PATTERNS.judge.search(line)
        if m:
            if len(m.group(1)) > 0 and "igrated" not in m.group(1):
                case_info["judge"] = m.group(1)

        m = PATTERNS.dob.search(line)
        if m:
            person_info["date_of_birth"] = m.group(1)

        m = PATTERNS.name.search(line)
        if m:
            person_info["first_name"] = m.group(2)
            person_info["last_name"] = m.group(1)
            person_info["aliases"].append(f"{m.group(1)}, {m.group(2)}")

        m = PATTERNS.alias_names_start.search(line)
        if already_searched_aliases is False and m:
            idx2 = idx + 1
            end_of_aliases = False
            while not end_of_aliases:
                if PATTERNS.end_of_page.search(lines[idx2]):
                    continue
                if re.search(r"\w", lines[idx2]):
                    person_info["aliases"].append(lines[idx2].strip())
                idx2 += 1

                end_of_aliases = PATTERNS.alias_names_end.search(lines[idx2])
                if end_of_aliases:
                    already_searched_aliases = True

        m = PATTERNS.charges.search(line)  # Arrest.php;595
        if m:
            charge_info = dict()
            charge_info["statute"] = m.group(1)
            charge_info["grade"] = m.group(3)
            charge_info["offense"] = m.group(4)
            charge_info["disposition"] = m.group(6)
            m2 = PATTERNS.charges_search_overflow.search(lines[idx + 1])
            if m2:
                charge_info[
                    "offense"
                ] = f"{charge_info['offense'].strip()} {m2.group(1).strip()}"

            ## disposition date is on the next line
            if "disposition_date" in case_info.keys():
                charge_info["disposition_date"] = case_info["disposition_date"]

            case_info["charges"].append(charge_info)

        m = PATTERNS.bail.search(line)
        if m:
            # TODO charges won't use the detailed bail info yet.
            case_info["bail_charged"] = m.group(1)
            case_info["bail_paid"] = m.group(2)
            case_info["bail_adjusted"] = m.group(3)
            case_info["bail_total"] = m.group(5)

        m = PATTERNS.costs.search(line)
        if m:
            case_info["total_fines"] = m.group(1)
            case_info["fines_paid"] = m.group(2)
            case_info["costs_adjusted"] = m.group(3)
            case_info["costs_total"] = m.group(5)
    case_info = {
        k: (v.strip() if isinstance(v, str) else v) for k, v in case_info.items()
    }
    person_info = {
        k: (v.strip() if isinstance(v, str) else v) for k, v in person_info.items()
    }
    person = Person.from_dict(person_info)
    case = Case.from_dict(case_info)
    logger.info("Finished parsing MDJ docket")

    return person, [case], []
Пример #19
0
def is_over_age(person: Person, age_limit: int) -> Decision:
    return Decision(
        name=f"Is {person.first_name} over {age_limit}?",
        value=person.age() > age_limit,
        reasoning=f"{person.first_name} is {person.age()}",
    )
Пример #20
0
def test_person_age():
    per = Person("John", "Smeth", date(2000, 1, 1))
    assert per.age() > 17