示例#1
0
def get_judge(html, case_path=None):
    path = "//p[position() <= 60]//text()[not(parent::span)][not(ancestor::center)][not(ancestor::i)]"
    text_elements = html.xpath(path)

    # Get the first paragraph that starts with two uppercase letters after we've stripped out any star pagination.
    judge = None
    for t in text_elements:
        t = clean_string(t)
        judge, reason = get_judge_from_str(t)
        if judge:
            break
        if reason == "TOO_LONG":
            # We've begun doing paragraphs...
            break

    if not judge:
        try:
            judge = fixes[case_path]["judge"]
        except KeyError:
            if "input_judge" in DEBUG:
                subprocess.Popen(["firefox", "file://%s" % case_path], shell=False).communicate()
                judge = raw_input("No judge identified! What should be here? ")
                add_fix(case_path, {"judge": judge})
            if "log_bad_judges" in DEBUG:
                with open("missing_judges.txt", "a") as out:
                    out.write("%s\n" % case_path)

    if "judge" in DEBUG:
        log_print("  Judge: %s" % judge)

    return judge
示例#2
0
def get_judge(html, case_path=None):
    path = '//p[position() <= 60]//text()[not(parent::span)][not(ancestor::center)][not(ancestor::i)]'
    text_elements = html.xpath(path)

    # Get the first paragraph that starts with two uppercase letters after we've stripped out any star pagination.
    judge = None
    for t in text_elements:
        t = clean_string(t)
        judge, reason = get_judge_from_str(t)
        if judge:
            break
        if reason == 'TOO_LONG':
            # We've begun doing paragraphs...
            break

    if not judge:
        try:
            judge = fixes[case_path]['judge']
        except KeyError:
            if 'input_judge' in DEBUG:
                subprocess.Popen(
                    ['firefox', 'file://%s' % case_path],
                    shell=False).communicate()
                judge = raw_input("No judge identified! What should be here? ")
                add_fix(case_path, {'judge': judge})
            if 'log_bad_judges' in DEBUG:
                with open('missing_judges.txt', 'a') as out:
                    out.write('%s\n' % case_path)

    if 'judge' in DEBUG:
        log_print('  Judge: %s' % judge)

    return judge
示例#3
0
    def test_extracting_judges_from_strings(self):
        pairs = (
            ("The following is the order of Judge Brailsford", (u"Brailsford", REASONS[12])),
            (
                "Before INGRAHAM, Circuit Judge, and SEALS and COWAN, District Judges.",
                (u"Ingraham, Circuit Judge, and Seals and Cowan, District Judges", REASONS[14]),
            ),
            (
                "J. H. Reddy, Chattanooga, Tenn., James F. Neal, John J. Hooker, Sr., Special Atty., Nashville, Tenn., "
                "Charles W. Shaffer, Jr., Dept. of Justice, Washington, D. C., for the United States.",
                (None, REASONS[10]),
            ),
            ("MR. JUSTICE CLARK delivered the opinion of the Court.", (u"Clark", REASONS[5])),
            ("Justice THEIS delivered the judgment of the court, with opinion.", (u"Theis", REASONS[5])),
            (
                "Kennedy, J., announced the judgment of the Court and delivered the opinion of the Court, except...",
                (u"Kennedy", REASONS[5]),
            ),
            ("Kendy, J., announced the judgment of the Court ", (u"Kendy", REASONS[5])),
            ("U.S.C. 22, JUSTICE Eats Apples", (None, REASONS[3])),  # Has a judiciary word, but not at the end.
            ("PER CURIAM", (u"Per Curiam", REASONS[6])),
            ("Per Curiam", (u"Per Curiam", REASONS[6])),
            ("L. CHANDLER WATSON, Jr., Bankruptcy Judge.", (u"L. Chandler Watson, Jr.", REASONS[7])),
            ("VOLINN, Bankruptcy Judge:", (u"Volinn", REASONS[7])),
            ("McGOVERN, District Judge.", (u"McGovern", REASONS[7])),
            ("JOHN TeSELLE, Bankruptcy Judge.", (u"John Teselle", REASONS[7])),
            ("LEAPHART, Justice", (u"Leaphart", REASONS[7])),
            ("SIMPSON, C.J.", (u"Simpson", REASONS[7])),
            ("LANSING, Judge.", (u"Lansing", REASONS[7])),
            (
                "BRAUN, PLAINTIFF, Kendrick, Finkbeiner, Schafer & Murphy (by Michael J. W. Horn), for defendants.",
                (None, REASONS[4]),
            ),
            ("OPINION BY MR. JUSTICE JONES, May 25953", (u"Mr. Justice Jones", REASONS[8])),
            ("Opinion by Justice ROSS", (u"Ross", REASONS[8])),
            ("SPENCE, J.", (u"Spence", REASONS[7])),
            ("Spencer, J.,", (u"Spencer", REASONS[7])),
            ("SPENCE", (None, REASONS[9])),
            ("Nourse, P. J.", (u"Nourse", REASONS[7])),
            ("A. SPENCE, J.", (u"A. Spence", REASONS[7])),
            ("Van SICKLE, District Judge.", (u"Van Sickle", REASONS[7])),
            ("VanSICKLE, District Judge.", (u"Vansickle", REASONS[7])),
            ("LeGRAND, Justice.", (u"Legrand", REASONS[7])),
            ("DAVID R. STRAWBRIDGE; United States Magistrate Judge.", (u"David R. Strawbridge", REASONS[7])),
            ("CARRICO, J., delivered the opinion of the court.", (u"Carrico", REASONS[5])),
            ("Justice HARTMAN delivered the opinion of the court", (u"Hartman", REASONS[5])),
            ("Justice APPLETON delivered the opinion of the court", (u"Appleton", REASONS[5])),
            ("The opinion of the Court was delivered by HANDLER, J.", (u"Handler", REASONS[11])),
            ("The following is the order of Judge Brailsford", (u"Brailsford", REASONS[12])),
            (
                "Before: NEFF, P.J., and MICHAEL J. KELLY and HOOD, JJ.",
                (u"Neff, P.J., and Michael J. Kelly and Hood", REASONS[14]),
            ),
            ("Chief Judge FULD", (u"Fuld", REASONS[15])),
            ("FOTH, C.", (u"Foth", REASONS[7])),
            ("Robert L. KRECHEVSKY, Bankruptcy Judge.", (u"Robert L. Krechevsky", REASONS[7])),
            (
                "Ernstrom & Dreste, Rochester, NY (J. William Ernstrom, of counsel), for Northland Associates, Inc.",
                (None, REASONS[10]),
            ),
            # memorandum looks like a bad_word, but it's not
            (
                "BREITEL and Judge JASEN, GABRIELLI, JONES, WACHTLER and COOKE Concur in Memorandum",
                (u"Breitel and Judge Jasen, Gabrielli, Jones, Wachtler and Cooke Concur in Memorandum", REASONS[16]),
            ),
            # but if it starts with Memorandum, it's no good.
            ("Memorandum of Decision on R.C. Allen Instruments", (None, REASONS[10])),
            (
                "CONCLUDING That the Aggravating Circumstances Outweighed the Mitigating Circumstances.",
                (None, REASONS[4]),
            ),
            (
                'Considering Factor (A), "The Ultimate and Decisive Test," We Examine Factors (E), (F) and (H)',
                (None, REASONS[10]),
            ),
            ("Decision Denying Application to Retain Rebecca J. Habbert", (None, REASONS[10])),
            ("Accepting Appellant's Pleas of Guilty, the Record Reflects the Following Occurred:", (None, REASONS[10])),
            ("ADDRESSING Ourselves to the Substance of These Questions We Think It Appropriate", (None, REASONS[4])),
            (
                "ADMITTING a Statement as a Dying Declaration, the Trial Court Must Make a Preliminary",
                (None, REASONS[4]),
            ),
            ("AMENDED Findings of Fact", (None, REASONS[4])),
            ("AMICUS Curiae Brief Was Filed by Bruce A. Olsen", (None, REASONS[4])),
            ("LAWRENCE S. Robbins Argued the Cause for Appellants. With Him", (None, REASONS[4])),
            ("DISCUSSING These Cases We Must Separate Them According to The", (None, REASONS[4])),
            ("EXAMINING These and the Other Defenses Which Comdisco Has Raised, However", (None, REASONS[4])),
            ("GOING Into the Question of the Public", (None, REASONS[4])),
            # Going is bad, but foregoing is good
            ("JUDGE BLATCHFORD After Stating the Facts in the Foregoing Language", (u"Judge Blatchford", REASONS[7])),
            (
                "DECISION Granting Judgment to the Trustee in Bankruptcy for Comprehensive Business Systems",
                (None, REASONS[4]),
            ),
            ("THESE Arguments That Both Sides Would Be Allowed Wide Latitude in Arguing", (None, REASONS[4])),
            ("DECISION Denying Application to Retain Rebecca J. Habbert", (None, REASONS[4])),
            ("TRIAL, Appellant Argued That It Was a Third-Party Benefic", (None, REASONS[4])),
            ("FINDINGS of Fact and Conclusion of Law on Eastgroup", (None, REASONS[4])),
            ("PROCEEDING Further a General Description of the Area Will Be Helpful", (None, REASONS[4])),
            ("TURNING Them Over to His Counsel on the Morning of July 24", (None, REASONS[4])),
            # Starting with a number.
            ("1975, SECTION 594 Did Not Describe What Kind judge ", (None, REASONS[3])),
            # Starting with a regex special char
            ('("DGCL") SEEKING judge Advancement of Reasonable Attorney\'s Fees', (None, REASONS[3])),
            ('"DGCL") SEEKING judge Advancement of Reasonable Attorney\'s Fees', (None, REASONS[3])),
            (':"DGCL") SEEKING judge Advancement of Reasonable Attorney\'s Fees', (None, REASONS[3])),
            ('>"DGCL") SEEKING judge Advancement of Reasonable Attorney\'s Fees', (None, REASONS[3])),
            ('["DGCL") SEEKING judge Advancement of Reasonable Attorney\'s Fees', (None, REASONS[3])),
            ('{"DGCL") SEEKING judge Advancement of Reasonable Attorney\'s Fees', (None, REASONS[3])),
            ('}"DGCL") SEEKING judge Advancement of Reasonable Attorney\'s Fees', (None, REASONS[3])),
            # Starts with "The", but is a valid form
            ("The Cause Was Argued Before Anderson", (u"Anderson", REASONS[17])),
            # Lowercase 'the' is no good, however
            ("the Water Heater Was Installed, the Slates, j.", (None, REASONS[18])),
            # Starting with "There " is no good, but "Theresa" is
            ("THERE is No Merit in the Claim of Improper Comment of the Commonwealth, J.", (None, REASONS[3])),
            ("THERESA CRAFT, J.", (u"Theresa Craft", REASONS[7])),
            # Nothing with utf-8 as first char is good.
            (u"\xe2\xa7\xe2\xa7 19-1-102(1), JUDGE", (None, REASONS[2])),
            # Argued Before is ok, but Argued is not.
            ("Argued before Lissner", (u"Lissner", REASONS[17])),
            ("ARGUED: amy louise howe, before so-and-so, Justice", (None, REASONS[3])),
        )

        for q, a in pairs:
            self.assertEqual(tuple(get_judge_from_str(q)), a)
示例#4
0
    def test_extracting_judges_from_strings(self):
        pairs = (
            ('The following is the order of Judge Brailsford', (u'Brailsford', REASONS[12])),
            ('Before INGRAHAM, Circuit Judge, and SEALS and COWAN, District Judges.',
             (u'Ingraham, Circuit Judge, and Seals and Cowan, District Judges', REASONS[14])),
            ('J. H. Reddy, Chattanooga, Tenn., James F. Neal, John J. Hooker, Sr., Special Atty., Nashville, Tenn., '
             'Charles W. Shaffer, Jr., Dept. of Justice, Washington, D. C., for the United States.', (None, REASONS[10])),
            ('MR. JUSTICE CLARK delivered the opinion of the Court.', (u'Clark', REASONS[5])),
            ('Justice THEIS delivered the judgment of the court, with opinion.',
             (u'Theis', REASONS[5])),
            ('Kennedy, J., announced the judgment of the Court and delivered the opinion of the Court, except...',
             (u'Kennedy', REASONS[5])),
            ('Kendy, J., announced the judgment of the Court ', (u'Kendy', REASONS[5])),
            ('U.S.C. 22, JUSTICE Eats Apples', (None, REASONS[3])),  # Has a judiciary word, but not at the end.
            ('PER CURIAM', (u'Per Curiam', REASONS[6])),
            ('Per Curiam', (u'Per Curiam', REASONS[6])),
            ('L. CHANDLER WATSON, Jr., Bankruptcy Judge.',
             (u'L. Chandler Watson, Jr.', REASONS[7])),
            ('VOLINN, Bankruptcy Judge:', (u'Volinn', REASONS[7])),
            ('McGOVERN, District Judge.', (u'McGovern', REASONS[7])),
            ('JOHN TeSELLE, Bankruptcy Judge.', (u'John Teselle', REASONS[7])),
            ('LEAPHART, Justice', (u'Leaphart', REASONS[7])),
            ('SIMPSON, C.J.', (u'Simpson', REASONS[7])),
            ('LANSING, Judge.', (u'Lansing', REASONS[7])),
            ('BRAUN, PLAINTIFF, Kendrick, Finkbeiner, Schafer & Murphy (by Michael J. W. Horn), for defendants.',
             (None, REASONS[4])),
            ('OPINION BY MR. JUSTICE JONES, May 25953', (u'Mr. Justice Jones', REASONS[8])),
            ('Opinion by Justice ROSS', (u'Ross', REASONS[8])),
            ('SPENCE, J.', (u'Spence', REASONS[7])),
            ('Spencer, J.,', (u'Spencer', REASONS[7])),
            ('SPENCE', (None, REASONS[9])),
            ('Nourse, P. J.', (u'Nourse', REASONS[7])),
            ('A. SPENCE, J.', (u'A. Spence', REASONS[7])),
            ('Van SICKLE, District Judge.', (u'Van Sickle', REASONS[7])),
            ('VanSICKLE, District Judge.', (u'Vansickle', REASONS[7])),
            ('LeGRAND, Justice.', (u'Legrand', REASONS[7])),
            ('DAVID R. STRAWBRIDGE; United States Magistrate Judge.', (u'David R. Strawbridge', REASONS[7])),
            ('CARRICO, J., delivered the opinion of the court.', (u'Carrico', REASONS[5])),
            ('Justice HARTMAN delivered the opinion of the court', (u'Hartman', REASONS[5])),
            ('Justice APPLETON delivered the opinion of the court', (u'Appleton', REASONS[5])),
            ('The opinion of the Court was delivered by HANDLER, J.', (u'Handler', REASONS[11])),
            ('The following is the order of Judge Brailsford', (u'Brailsford', REASONS[12])),
            ('Before: NEFF, P.J., and MICHAEL J. KELLY and HOOD, JJ.',
             (u'Neff, P.J., and Michael J. Kelly and Hood', REASONS[14])),
            ('Chief Judge FULD', (u'Fuld', REASONS[15])),
            ('FOTH, C.', (u'Foth', REASONS[7])),
            ('Robert L. KRECHEVSKY, Bankruptcy Judge.', (u'Robert L. Krechevsky', REASONS[7])),
            ('Ernstrom & Dreste, Rochester, NY (J. William Ernstrom, of counsel), for Northland Associates, Inc.',
             (None, REASONS[10])),

            # memorandum looks like a bad_word, but it's not
            ('BREITEL and Judge JASEN, GABRIELLI, JONES, WACHTLER and COOKE Concur in Memorandum',
             (u'Breitel and Judge Jasen, Gabrielli, Jones, Wachtler and Cooke Concur in Memorandum',
              REASONS[16])),
            # but if it starts with Memorandum, it's no good.
            ('Memorandum of Decision on R.C. Allen Instruments', (None, REASONS[10])),
            ('CONCLUDING That the Aggravating Circumstances Outweighed the Mitigating Circumstances.',
             (None, REASONS[4])),
            ('Considering Factor (A), "The Ultimate and Decisive Test," We Examine Factors (E), (F) and (H)',
             (None, REASONS[10])),
            ('Decision Denying Application to Retain Rebecca J. Habbert', (None, REASONS[10])),
            ("Accepting Appellant's Pleas of Guilty, the Record Reflects the Following Occurred:",(None, REASONS[10])),
            ('ADDRESSING Ourselves to the Substance of These Questions We Think It Appropriate', (None, REASONS[4])),
            ('ADMITTING a Statement as a Dying Declaration, the Trial Court Must Make a Preliminary',
             (None, REASONS[4])),
            ('AMENDED Findings of Fact', (None, REASONS[4])),
            ('AMICUS Curiae Brief Was Filed by Bruce A. Olsen', (None, REASONS[4])),
            ('LAWRENCE S. Robbins Argued the Cause for Appellants. With Him', (None, REASONS[4])),
            ('DISCUSSING These Cases We Must Separate Them According to The', (None, REASONS[4])),
            ('EXAMINING These and the Other Defenses Which Comdisco Has Raised, However', (None, REASONS[4])),
            ('GOING Into the Question of the Public',(None, REASONS[4])),
            # Going is bad, but foregoing is good
            ('JUDGE BLATCHFORD After Stating the Facts in the Foregoing Language',
             (u'Judge Blatchford', REASONS[7])),
            ('DECISION Granting Judgment to the Trustee in Bankruptcy for Comprehensive Business Systems',
             (None, REASONS[4])),
            ('THESE Arguments That Both Sides Would Be Allowed Wide Latitude in Arguing', (None, REASONS[4])),
            ('DECISION Denying Application to Retain Rebecca J. Habbert', (None, REASONS[4])),
            ('TRIAL, Appellant Argued That It Was a Third-Party Benefic', (None, REASONS[4])),
            ('FINDINGS of Fact and Conclusion of Law on Eastgroup', (None, REASONS[4])),
            ('PROCEEDING Further a General Description of the Area Will Be Helpful', (None, REASONS[4])),
            ('TURNING Them Over to His Counsel on the Morning of July 24', (None, REASONS[4])),
            # Starting with a number.
            ('1975, SECTION 594 Did Not Describe What Kind judge ', (None, REASONS[3])),
            # Starting with a regex special char
            ('("DGCL") SEEKING judge Advancement of Reasonable Attorney\'s Fees', (None, REASONS[3])),
            ('"DGCL") SEEKING judge Advancement of Reasonable Attorney\'s Fees', (None, REASONS[3])),
            (':"DGCL") SEEKING judge Advancement of Reasonable Attorney\'s Fees', (None, REASONS[3])),
            ('>"DGCL") SEEKING judge Advancement of Reasonable Attorney\'s Fees', (None, REASONS[3])),
            ('["DGCL") SEEKING judge Advancement of Reasonable Attorney\'s Fees', (None, REASONS[3])),
            ('{"DGCL") SEEKING judge Advancement of Reasonable Attorney\'s Fees', (None, REASONS[3])),
            ('}"DGCL") SEEKING judge Advancement of Reasonable Attorney\'s Fees', (None, REASONS[3])),
            # Starts with "The", but is a valid form
            ('The Cause Was Argued Before Anderson', (u'Anderson', REASONS[17])),
            # Lowercase 'the' is no good, however
            ('the Water Heater Was Installed, the Slates, j.', (None, REASONS[18])),
            # Starting with "There " is no good, but "Theresa" is
            ('THERE is No Merit in the Claim of Improper Comment of the Commonwealth, J.', (None, REASONS[3])),
            ('THERESA CRAFT, J.', (u'Theresa Craft', REASONS[7])),
            # Nothing with utf-8 as first char is good.
            (u'\xe2\xa7\xe2\xa7 19-1-102(1), JUDGE', (None, REASONS[2])),
            # Argued Before is ok, but Argued is not.
            ('Argued before Lissner', (u'Lissner', REASONS[17])),
            ('ARGUED: amy louise howe, before so-and-so, Justice', (None, REASONS[3])),
        )

        for q, a in pairs:
            self.assertEqual(tuple(get_judge_from_str(q)), a)