class PublicLawFinder(FDSYSFinder, FinderBase):
    """Public Law"""
    CITE_TYPE = 'PUBLIC_LAW'
    GRAMMAR = QuickSearchable(
        Marker("Public") + Marker("Law") +
        Word(string.digits).setResultsName("congress") + Suppress("-") +
        Word(string.digits).setResultsName("lawnum"))
    CONST_PARAMS = dict(collection='plaw', lawtype='public')
Exemplo n.º 2
0
def parsed_title(text, appendix_letter):
    digit_str_parser = (Marker(appendix_letter) + Suppress('-') +
                        grammar.a1.copy().leaveWhitespace() +
                        Optional(grammar.markerless_upper) +
                        Optional(grammar.paren_upper | grammar.paren_lower) +
                        Optional(grammar.paren_digit))
    part_roman_parser = Marker("part") + grammar.aI
    parser = QuickSearchable(LineStart() +
                             (digit_str_parser | part_roman_parser))

    for match, _, _ in parser.scanString(text):
        return match
class PublicLawFinder(FinderBase):
    """Public Law"""
    CITE_TYPE = 'PUBLIC_LAW'
    GRAMMAR = QuickSearchable(
        Marker("Public") + Marker("Law") +
        Word(string.digits).setResultsName("congress") + Suppress("-") +
        Word(string.digits).setResultsName("lawnum"))

    def find(self, node):
        for match, start, end in self.GRAMMAR.scanString(node.text):
            components = {'congress': match.congress, 'lawnum': match.lawnum}
            yield Cite(
                self.CITE_TYPE, start, end, components,
                fdsys_url(collection='plaw', lawtype='public', **components))
class USCFinder(FinderBase):
    """U.S. Code"""
    CITE_TYPE = 'USC'
    GRAMMAR = QuickSearchable(
        Word(string.digits).setResultsName("title") + Marker("U.S.C.") +
        Word(string.digits).setResultsName("section"))

    def find(self, node):
        for match, start, end in self.GRAMMAR.scanString(node.text):
            components = {'title': match.title, 'section': match.section}
            yield Cite(self.CITE_TYPE, start, end, components,
                       fdsys_url(collection='uscode', **components))
Exemplo n.º 5
0
part = Word(string.digits).setResultsName("part")

section = Regex(r"[0-9]+[a-z]*").setResultsName("section")

appendix = Regex(r"[A-Z]+[0-9]*\b").setResultsName("appendix")
appendix_digit = Word(string.digits).setResultsName("appendix_digit")

subpart = Word(string.ascii_uppercase).setResultsName("subpart")

section_marker = Suppress(Regex(u"§|Section|section"))
sections_marker = Suppress(Regex(u"§§|Sections|sections"))

# Most of these markers could be SuffixMarkers (which arise due to errors in
# the regulation text). We'll wait until we see explicit examples before
# converting them though, to limit false matches
paragraph_marker = Marker("paragraph")
paragraphs_marker = SuffixMarker("paragraphs")

part_marker = Marker("part")
parts_marker = Marker("parts")

subpart_marker = Marker("subpart")

comment_marker = ((Marker("comment") | Marker("commentary") |
                   (Marker("official") + Marker("interpretations")) |
                   (Marker("supplement") + Suppress(WordBoundaries("I")))) +
                  Optional(Marker("of") | Marker("to")))
comments_marker = Marker("comments")

appendix_marker = Marker("appendix")
appendices_marker = Marker("appendices")
Exemplo n.º 6
0
        section = match.appendix_digit
        if pars:
            section += '(' + ')('.join(el for el in pars) + ')'
        return section
    else:
        return None


appendix_with_section = QuickSearchable(atomic.appendix + '-' + (
    atomic.appendix_digit + ZeroOrMore(atomic.lower_p | atomic.roman_p
                                       | atomic.digit_p | atomic.upper_p)
).setParseAction(appendix_section).setResultsName("appendix_section"))

appendix_with_part = QuickSearchable(
    keep_pos(atomic.appendix_marker).setResultsName("marker") +
    atomic.appendix + Suppress(",") + Marker('part') + atomic.upper_roman_a +
    Optional(any_a) + Optional(any_a) + Optional(any_a))

marker_appendix = QuickSearchable(
    keep_pos(atomic.appendix_marker).setResultsName("marker") +
    (appendix_with_section | atomic.appendix))

marker_part = (keep_pos(atomic.part_marker).setResultsName("marker") +
               atomic.part)

marker_subpart = (keep_pos(atomic.subpart_marker).setResultsName("marker") +
                  atomic.subpart)

marker_subpart_title = (
    keep_pos(atomic.subpart_marker).setResultsName("marker") + atomic.subpart +
    Optional(Suppress(Literal(u"—"))) +
Exemplo n.º 7
0
import attr
from pyparsing import (CaselessLiteral, FollowedBy, LineEnd, Literal,
                       OneOrMore, Optional, QuotedString, Suppress, Word,
                       ZeroOrMore)
from six.moves import reduce

from regparser.grammar import atomic, tokens, unified
from regparser.grammar.utils import Marker, QuickSearchable, WordBoundaries
from regparser.tree.paragraph import hash_for_paragraph, p_levels
from regparser.tree.reg_text import subjgrp_label

logger = logging.getLogger(__name__)


intro_text_marker = (
    (Marker("introductory") + WordBoundaries(CaselessLiteral("text"))) |
    (Marker("subject") + Marker("heading")).setParseAction(lambda _: "text")
)

of_connective = (Marker("of") | Marker("for") | Marker("to"))

passive_marker = (
    Marker("is") | Marker("are") | Marker("was") | Marker("were") |
    Marker("and").setResultsName("and_prefix").setParseAction(
        lambda _: True))


and_token = Marker("and").setParseAction(lambda _: tokens.AndToken())


# Verbs
Exemplo n.º 8
0
import string

from pyparsing import (CaselessLiteral, FollowedBy, LineEnd, Literal,
                       OneOrMore, Optional, QuotedString, Suppress, Word,
                       ZeroOrMore)
from six.moves import reduce

from regparser.grammar import atomic, tokens, unified
from regparser.grammar.utils import Marker, QuickSearchable, WordBoundaries
from regparser.tree.paragraph import p_levels, hash_for_paragraph
from regparser.tree.reg_text import subjgrp_label

logger = logging.getLogger(__name__)

intro_text_marker = (
    (Marker("introductory") + WordBoundaries(CaselessLiteral("text"))) |
    (Marker("subject") + Marker("heading")).setParseAction(lambda _: "text"))

of_connective = (Marker("of") | Marker("for") | Marker("to"))

passive_marker = (
    Marker("is") | Marker("are") | Marker("was") | Marker("were") |
    Marker("and").setResultsName("and_prefix").setParseAction(lambda _: True))

and_token = Marker("and").setParseAction(lambda _: tokens.AndToken())


# Verbs
def generate_verb(word_list, verb, active):
    """Short hand for making tokens.Verb from a list of trigger words"""
    word_list = [CaselessLiteral(w) for w in word_list]
Exemplo n.º 9
0
smart_quotes = QuickSearchable(
    Suppress(DocLiteral(u'“', "left-smart-quote")) + keep_pos(
        SkipTo(DocLiteral(u'”', "right-smart-quote"))).setResultsName("term"))

e_tag = (
    Suppress(Regex(r"<E[^>]*>")) +
    keep_pos(OneOrMore(Word(srange("[a-zA-Z-]")))).setResultsName("term") +
    Suppress(Literal("</E>")))

xml_term_parser = QuickSearchable(
    LineStart() + Optional(Suppress(unified.any_depth_p)) +
    e_tag.setResultsName("head") +
    ZeroOrMore((atomic.conj_phrases +
                e_tag).setResultsName("tail", listAllMatches=True)) +
    Suppress(ZeroOrMore(Regex(r",[a-zA-Z ]+,"))) +
    Suppress(ZeroOrMore((Marker("this") | Marker("the")) + Marker("term"))) +
    ((Marker("mean") | Marker("means"))
     | (Marker("refers") + ZeroOrMore(Marker("only")) + Marker("to")) | (
         (Marker("has") | Marker("have")) + Marker("the") + Marker("same") +
         Marker("meaning") + Marker("as"))))

key_term_parser = QuickSearchable(
    LineStart() + Optional(Suppress(unified.any_depth_p)) +
    Suppress(Regex(r"<E[^>]*>")) +
    keep_pos(OneOrMore(Word(srange("[a-zA-Z-,]")))).setResultsName("term") +
    Optional(Suppress(".")) + Suppress(Literal("</E>")))

scope_term_type_parser = QuickSearchable(
    Marker("purposes") + Marker("of") + Optional(Marker("this")) +
    SkipTo(",").setResultsName("scope") + Literal(",") +
    Optional(Marker("the") + Marker("term")) +
Exemplo n.º 10
0
#vim: set encoding=utf-8
import logging
import string

from pyparsing import CaselessLiteral, FollowedBy, OneOrMore, Optional
from pyparsing import Suppress, Word, LineEnd

from regparser.grammar import atomic, tokens, unified
from regparser.grammar.utils import Marker, WordBoundaries
from regparser.tree.paragraph import p_levels

intro_text_marker = (
    (Marker("introductory") + WordBoundaries(CaselessLiteral("text")))
    | (Marker("subject") + Marker("heading")).setParseAction(lambda _: "text"))

of_connective = (Marker("of") | Marker("for") | Marker("to"))

passive_marker = (
    Marker("is") | Marker("are") | Marker("was") | Marker("were")
    |
    Marker("and").setResultsName("and_prefix").setParseAction(lambda _: True))

and_token = Marker("and").setParseAction(lambda _: tokens.AndToken())


#Verbs
def generate_verb(word_list, verb, active):
    """Short hand for making tokens.Verb from a list of trigger words"""
    word_list = [CaselessLiteral(w) for w in word_list]
    if not active:
        word_list = [passive_marker + w for w in word_list]
Exemplo n.º 11
0
     ZeroOrMore(atomic.lower_p | atomic.roman_p | atomic.digit_p
                | atomic.upper_p)
     ).setParseAction(appendix_section).setResultsName("appendix_section"))

# "the" appendix implies there's only one, so it better be appendix A
section_of_appendix_to_this_part = (
    atomic.section_marker +
    atomic.upper_roman_a.copy().setResultsName("appendix_section") +
    Literal("of the appendix to this part").setResultsName(
        "appendix").setParseAction(lambda: 'A'))

appendix_par_of_part = (atomic.paragraph_marker.copy().setParseAction(
    keep_pos).setResultsName("marker") +
                        (Word(string.ascii_uppercase) | Word(string.digits)) +
                        Optional(any_a) + Optional(any_a) + Suppress(".") +
                        Marker("of") + Marker("part") + atomic.upper_roman_a)

appendix_with_part = (atomic.appendix_marker.copy().setParseAction(
    keep_pos).setResultsName("marker") + atomic.appendix + Suppress(",") +
                      Marker('part') + atomic.upper_roman_a + Optional(any_a) +
                      Optional(any_a) + Optional(any_a))

marker_appendix = (atomic.appendix_marker.copy().setParseAction(
    keep_pos).setResultsName("marker") +
                   (appendix_with_section | atomic.appendix))

marker_part = (
    atomic.part_marker.copy().setParseAction(keep_pos).setResultsName("marker")
    + atomic.part)

marker_subpart = (atomic.subpart_marker.copy().setParseAction(
Exemplo n.º 12
0
        section = match.appendix_digit
        if pars:
            section += '(' + ')('.join(el for el in pars) + ')'
        return section
    else:
        return None


appendix_with_section = QuickSearchable(atomic.appendix + '-' + (
    atomic.appendix_digit + ZeroOrMore(atomic.lower_p | atomic.roman_p
                                       | atomic.digit_p | atomic.upper_p)
).setParseAction(appendix_section).setResultsName("appendix_section"))

appendix_with_part = QuickSearchable(atomic.appendix_marker.copy(
).setParseAction(keep_pos).setResultsName("marker") + atomic.appendix +
                                     Suppress(",") + Marker('part') +
                                     atomic.upper_roman_a + Optional(any_a) +
                                     Optional(any_a) + Optional(any_a))

marker_appendix = QuickSearchable(atomic.appendix_marker.copy().setParseAction(
    keep_pos).setResultsName("marker") +
                                  (appendix_with_section | atomic.appendix))

marker_part = (
    atomic.part_marker.copy().setParseAction(keep_pos).setResultsName("marker")
    + atomic.part)

marker_subpart = (atomic.subpart_marker.copy().setParseAction(
    keep_pos).setResultsName("marker") + atomic.subpart)

marker_subpart_title = (atomic.subpart_marker.copy().setParseAction(
Exemplo n.º 13
0
            section += '(' + ')('.join(el for el in pars) + ')'
        return section
    else:
        return None


appendix_with_section = (
    atomic.appendix + '-' +
    (atomic.appendix_digit +
     ZeroOrMore(atomic.lower_p | atomic.roman_p | atomic.digit_p
                | atomic.upper_p)
     ).setParseAction(appendix_section).setResultsName("appendix_section"))

appendix_with_part = (atomic.appendix_marker.copy().setParseAction(
    keep_pos).setResultsName("marker") + atomic.appendix + Suppress(",") +
                      Marker('part') + atomic.upper_roman_a + Optional(any_a) +
                      Optional(any_a) + Optional(any_a))

marker_appendix = (atomic.appendix_marker.copy().setParseAction(
    keep_pos).setResultsName("marker") +
                   (appendix_with_section | atomic.appendix))

marker_part = (
    atomic.part_marker.copy().setParseAction(keep_pos).setResultsName("marker")
    + atomic.part)

marker_subpart = (atomic.subpart_marker.copy().setParseAction(
    keep_pos).setResultsName("marker") + atomic.subpart)
marker_subpart_title = (atomic.subpart_marker.copy().setParseAction(
    keep_pos).setResultsName("marker") + atomic.subpart +
                        Suppress(Literal(u"—")) +
Exemplo n.º 14
0
#vim: set encoding=utf-8
from pyparsing import (LineStart, Literal, OneOrMore, Optional, Regex, SkipTo,
                       srange, Suppress, Word, ZeroOrMore)

from regparser.grammar import atomic, unified
from regparser.grammar.utils import DocLiteral, keep_pos, Marker

smart_quotes = (Suppress(DocLiteral(u'“', "left-smart-quote")) +
                SkipTo(DocLiteral(u'”', "right-smart-quote")).setParseAction(
                    keep_pos).setResultsName("term"))

e_tag = (Suppress(Regex(r"<E[^>]*>")) + OneOrMore(Word(
    srange("[a-zA-Z-]"))).setParseAction(keep_pos).setResultsName("term") +
         Suppress(Literal("</E>")))

xml_term_parser = (LineStart() + Suppress(unified.any_depth_p) +
                   e_tag.setResultsName("head") + ZeroOrMore(
                       (atomic.conj_phrases + e_tag).setResultsName(
                           "tail", listAllMatches=True)) +
                   ((Marker("mean") | Marker("means"))
                    | ((Marker("has") | Marker("have")) + Marker("the") +
                       Marker("same") + Marker("meaning") + Marker("as"))))

scope_term_type_parser = (
    Marker("purposes") + Marker("of") + Optional(Marker("this")) +
    SkipTo(",").setResultsName("scope") + Literal(",") +
    Optional(Marker("the") + Marker("term")) +
    SkipTo(Marker("means")
           | (Marker("refers") +
              Marker("to"))).setParseAction(keep_pos).setResultsName("term"))