Пример #1
0
def test1():
    grammar = Grammar("""
        selector = root item*
        item = dot identifier ("[" index "]")*
        index = ~'([1-9][0-9]+)|[0-9]'
        identifier = ~'[a-zA-Z0-9_-]+'
        root = "$"
        dot = "."
        """)
    data = '$.a.b[1][2].c[3]'
    print(grammar.parse(data))
    data = '$.a.b[10][2]'
    print(grammar.parse(data))
Пример #2
0
def test13(grammar: Grammar):
    data = '{($.user.id = 2 && $.users[0].email = "nonmatch") || $.actions[2] = "GET"}'
    print(grammar.parse(data))

    data = [
        ' { ($.user.id = 1) && ($.users[0].email = "*****@*****.**") } ',
        '{($.user.id = 2 && $.users[0].email = "nonmatch") || $.actions[2] = "GET"}',
        '{ $.user.email = "*****@*****.**" || $.coordinates[0][1] = nonmatch && $.actions[2] = nomatch }',
        '{ ($.user.email = "*****@*****.**" || $.coordinates[0][1] = nonmatch) && $.actions[2] = nomatch }'
    ]

    for datum in data:
        print(grammar.parse(datum))
Пример #3
0
def text_to_pages(txt: str) -> Tuple[str, List[str]]:
    """ Convert raw text of a docket to an xml-string, where the nodes are the pages and sections of the docket.
    
    i.e.

    .. code-block:: 

        <docket>
            <page>
                <section> 
                </section
            </page>
            <page>
                <section_continued>
                </section_continued>
            </page>
        </docket>
    """
    errors = []
    grammar = Grammar(docket_sections)
    try:
        nodes = grammar.parse(txt)
        visitor = CustomVisitorFactory(
            common_terminals,
            docket_sections_nonterminals,
            docket_sections_custom_nodevisitors,
        ).create_instance()
        return visitor.visit(nodes), errors
    except Exception as e:
        slines = txt.split("\n")
        logger.error("text_to_pages failed.")
        errors.append("Could not extract pages from docket text.")
        return "<docket></docket>", errors
def parse(section_text):
  section_text += "\n"
  grammar = Grammar(grammars[0])
  visitor = DefendantInfoVisitor()
  root = grammar.parse(section_text)
  section_xml = visitor.visit(root)
  return section_xml
Пример #5
0
def text_to_pages(txt: str) -> str:
    """ Convert raw text of a docket to an xml-string, where the nodes are the pages and sections of the docket.
    
    i.e. 
    <docket>
        <page>
            <section> 
            </section
        </page>
        <page>
            <section_continued>
            </section_continued>
        </page>
    </docket>"""
    grammar = Grammar(docket_sections)
    try:
        nodes = grammar.parse(txt)
        visitor = CustomVisitorFactory(
            common_terminals, docket_sections_nonterminals,
            docket_sections_custom_nodevisitors).create_instance()
        return visitor.visit(nodes)
    except Exception as e:
        slines = txt.split("\n")
        logging.error("text_to_pages failed.")
        return "<docket></docket>"
def test_custom_visitor_factory():
    text = """Hi there, partner"""
    grammar = r"""
  text = greeting punctuation identifier
  greeting = hi_there?
  punctuation = comma?
  identifier = partner?

  hi_there = "Hi there"
  comma = ", "
  partner = "partner"
  """
    grammar = Grammar(grammar)
    terminals = ["hi_there", "comma", "partner"]
    nonterminals = ["text", "greeting", "punctuation", "identifier"]
    custom_visitor = CustomVisitorFactory(terminals, nonterminals,
                                          dict()).create_instance()
    #custom_visitor = custom_visitor.create_instance()
    root = grammar.parse(text)
    #   print("The parse tree:")
    #   print(root.prettily())
    xml = custom_visitor.visit(root)
    assert xml == "<text> <greeting> Hi there </greeting><punctuation> ,  </punctuation><identifier> partner </identifier> </text>"


#   print(xml)
#   print("Finished.")
Пример #7
0
def test2():
    grammar = Grammar(r"""
        text = text_quoted / text_simple
        text_quoted = ~r'"([^"\\]|\\.)*"'
        text_simple = ~'[a-zA-Z0-9.*_-]+'
        """)
    data = '"z,@! \\" ok \\" "'
    print(grammar.parse(data))
Пример #8
0
def parse_pdf(pdf: Union[BinaryIO, str], tempdir=None) -> Tuple[Person, Case]:
    """
    Parse the a pdf of a criminal record docket. 

    The 'see' references are to the DocketParse library, which also parses pdf dockets. 

    Args:
        pdf: a binary reader or a string path to a pdf file.
        tempdir: The pdf must be written to txt with pdftotext, so we need a temporary directory for it.
    
    Returns:
        The Person to whom the docket relates, and the Case to which the Docket relates.
    """
    # a list of strings
    errors = []
    # pdf to raw text
    txt = get_text_from_pdf(pdf, tempdir=tempdir)
    # text to xml sections (see DocketParse.sectionize). This handles page breaks.
    pages_tree = etree.fromstring(text_to_pages(txt))
    sections_tree = sections_from_pages(pages_tree)
    # parse individual sections with grammars for those sections
    # TODO add try catch blocks that allow for continuing even after certain parts fail, like
    #       if a single section fails to parse.
    for section_name, grammar, terminals, nonterminals, custom_visitors in section_grammars:
        try:
            section = sections_tree.xpath(
                f"//section[@name='{section_name}']")[0]
            # remove blank lines at the ends of the section.
            section_text = "\n".join(
                [ln for ln in section.text.split("\n") if ln.strip()])
            grammar = Grammar(grammar)
            try:
                nodes = grammar.parse(section_text)
            except Exception as e:
                slines = section_text.split("\n")
                errors.append(f"    Text for {section_name} failed to parse.")
                logging.error(f"    Text for {section_name} failed to parse.")
                continue
            visitor = CustomVisitorFactory(terminals, nonterminals,
                                           custom_visitors).create_instance()
            parsed_section_text = visitor.visit(nodes)
            parsed_section_xml = etree.fromstring(parsed_section_text)
            # replace original unparsed section's text w/ the parsed xml.
            sections_tree.xpath(
                f"//section[@name='{section_name}']")[0].text = ""
            sections_tree.xpath(f"//section[@name='{section_name}']"
                                )[0].append(parsed_section_xml)
        except (Exception, IndexError) as e:
            # not all dockets have all sections, so not being able to find a section is not
            # necessarily an error.
            #slines = section_text.split("\n")
            logging.info(f"    Could not find section {section_name}")
            #slines = etree.tostring(sections_tree, encoding="unicode").split("\n")
    # extract Person and Case information from xml.
    # i.e. defendant_name = section_tree.xpath("//caption/name")[0].text
    defendant = get_person(sections_tree)
    case = get_case(sections_tree)
    return defendant, case, errors
Пример #9
0
def test_parameters_ok(value):
    test_grammar = Grammar(
        grammar.params
        + grammar.type_
        + grammar.symbols
        + grammar.ident
        + grammar.ws)
    tree = test_grammar.parse(value)
    assert tree is not None
Пример #10
0
def test_annotationlist_ok(value):
    test_grammar = Grammar(
        "start = annotationlist\n"
        + grammar.annotation
        + grammar.symbols
        + grammar.ident
        + grammar.ws)
    tree = test_grammar.parse(value)
    assert tree is not None
Пример #11
0
def test_enum_ok(value):
    test_grammar = Grammar(
        "start=enum\n"
        + grammar.enum
        + grammar.ws
        + grammar.symbols
        + grammar.ident
    )
    tree = test_grammar.parse(value)
    assert tree is not None
Пример #12
0
def test12(grammar: Grammar):
    data = [
        ' { $.eventType = "UpdateTrail" } ',
        '{ $.sourceIPAddress != 123.123.* }', '{ $.arrayKey[0] = "value" }',
        '{ $.objectList[1].id = 2 }', '{ $.SomeObject IS NULL }',
        '{ $.SomeOtherObject NOT EXISTS }', '{ $.ThisFlag IS TRUE } '
    ]

    for datum in data:
        print(grammar.parse(datum))
Пример #13
0
def test_param_list_ok(value):
    test_grammar = Grammar(
        "start = param_list\n"
        + grammar.params
        + grammar.type_
        + grammar.symbols
        + grammar.ident
        + grammar.ws)
    tree = test_grammar.parse(value)
    assert tree is not None
Пример #14
0
def parse(section_text):

  grammar = Grammar(grammars[0])
  custom_visitor = CustomVisitorFactory(terminals, nonterminals, dict()).create_instance()
  root = grammar.parse(section_text)
#   print("Parse tree:")
#   print(root.prettily())
  xml = custom_visitor.visit(root)
  # print(xml)
  return xml
Пример #15
0
def parse(section_text):

    grammar = Grammar(grammars[0])
    custom_visitor = CustomVisitorFactory(terminals, nonterminals,
                                          dict()).create_instance()
    root = grammar.parse(section_text)
    #   print("Parse tree:")
    #   print(root.prettily())
    xml = custom_visitor.visit(root)
    # print(xml)
    return xml
Пример #16
0
class MetaParser:
    def __init__(self, meta_rules=meta_rules):
        #TODO: 99 automatically insert the _ and end rules, or have literals..
        self.meta_grammar = Grammar(meta_rules, 'meta')
    def __call__(self, text):
        """ Parse a text following the meta grammar.
        @return: a cleaned tree without '_' and 'end' rules.
        """
        return clean_node(self.meta_grammar.parse(text),
                          to_prune=['_','end'],
                          txt_leaf=['_basic_word', 'regex'])
Пример #17
0
class LyricParser(NodeVisitor):
    """Parses .drmw lyric files for association with Doremi tunes"""
    def __init__(self, text):
        NodeVisitor.__init__(self)

        # start with a new empty lyric
        self.lyric = Lyric()
        # add an empty voice to it
        self.lyric.voices.append(LyricVoice())

        # build an abstract syntax tree
        self.grammar = Grammar(open("lyric-grammar", "r").read())
        self.syntax = self.grammar.parse(text)
        
    def convert(self):
        """Convert the syntax tree to our internal representation"""
        self.visit(self.syntax)

        # remove any extra empty voices
        self.lyric.voices = [voice for voice in self.lyric.voices
                             if voice.name != ""]

        # remove any extra empty verses
        for voice in self.lyric.voices:
            voice.verses = voice.verses[:-1]
            
        return self.lyric

    def visit_title(self, node, vc):
        self.lyric.title = get_string_val(node)

    def visit_author(self, node, vc):
        self.lyric.author = get_string_val(node)

    def visit_meter(self, node, vc):
        self.lyric.meter = get_string_val(node)

    def visit_voicespec(self, node, vc):
        # the current voice is complete, so start a new one
        self.lyric.voices.append(LyricVoice())
        
    def visit_voice(self, node, vc):
        self.lyric.voices[-1].name = get_node_val(node, "name")

    def visit_verse(self, node, vc):
        # the verse is complete, so start a new one
        self.lyric.voices[-1].verses.append(Verse())

    def visit_word(self, node, vc):
        self.lyric.voices[-1].verses[-1].words.append(node.text.strip())

    def generic_visit(self, node, vc):
        pass
Пример #18
0
def parse(section_text):
  clean_section_text = clean_headers(section_text)
#   print("====")
#   print(clean_section_text)
#   print("====")
#   print("----")
#   print(temp_text)
#   print("-----")
  grammar = Grammar(grammars[0])
  visitor = DispositionVisitor()
  root = grammar.parse(clean_section_text)
  reconstituted_xml = visitor.visit(root)
  return reconstituted_xml
Пример #19
0
def test_use_regex_library():
    grammar = Grammar(r'''
    unicode_word = ~"[\p{L}]*"
    ''',
                      use_regex_library=True)
    text = 'Тест'
    expected = RegexNode(expr=Regex(pattern=r'[\p{L}]*',
                                    use_regex_library=True),
                         full_text=text,
                         start=0,
                         end=4)
    result = grammar.parse(text=text)
    eq_(result, expected)
Пример #20
0
def test_func_body_ok(value):
    test_grammar = Grammar(
        "start=func_body\n"
        + grammar.annotation
        + grammar.function
        + grammar.params
        + grammar.type_
        + grammar.qualifier
        + grammar.ws
        + grammar.symbols
        + grammar.ident
    )
    tree = test_grammar.parse(value)
    assert tree is not None
Пример #21
0
def test_class_ok(value):
    test_grammar = Grammar(
        grammar.class_
        + grammar.annotation
        + grammar.function
        + grammar.params
        + grammar.type_
        + grammar.qualifier
        + grammar.ws
        + grammar.symbols
        + grammar.ident
    )
    tree = test_grammar.parse(value)
    assert tree is not None
Пример #22
0
class PegParser(TemplateParser):
    class TigrVisitor(NodeVisitor):
        def visit_line(self, node, visited_children):
            """
            Makes a dict of the section (as key)and the key/value pairs.
            """
            s, *_ = visited_children
            return s

        def visit_statement(self, node, visited_children):
            directive, _, parameter = visited_children
            if parameter:
                return directive.text, parameter[0].text
            else:
                return directive.text, ''

        def generic_visit(self, node, visited_children):
            """ The generic visit method. """
            return visited_children or node

    def __init__(self, drawer):
        super().__init__(drawer)
        self.peg_grammar = Grammar(r'''
            line = statement ws? comment? ws
            statement   = directive ws? parameter?
            directive   = ~"P|X|Y|D|W|N|E|S|U"
            parameter   = ~"-?\d{0,}\.{0,1}\d{0,}"
            comment     = ~"#.*"
            ws          = ~"\s*"
        ''')
        self.peg_visitor = self.TigrVisitor()

    def do_parse_line(self, line_number, line):
        line_uppercased = line.upper()
        if not line_uppercased:
            # skip empty line
            raise self.SkipParseException()
        if line_uppercased.startswith('#'):
            # skip comment line
            raise self.SkipParseException()
        try:
            ast = self.peg_grammar.parse(line_uppercased)
            command, data = self.peg_visitor.visit(ast)
        except Exception:
            raise self.ParseException(
                'you have a syntax error at Line {}: {}'.format(
                    line_number, line))
        else:
            return command, data
Пример #23
0
def test5(grammar: Grammar):
    data = ' { ( a || ( a )) } '
    print(grammar.parse(data))
    data = ' { a || ( a ) } '
    print(grammar.parse(data))
    data = ' { a || a || a } '
    print(grammar.parse(data))
    data = ' { ( (a) ) || ( a ) } '
    print(grammar.parse(data))
    data = ' { ( a || ( a || ( a ) ) ) } '
    print(grammar.parse(data))
    data = ' { (a || (a || ((a || a) || (a || ( a || a)) ))  ) || (a) } '
    print(grammar.parse(data))
    data = ' { ( a ) || a || ( a || ( a ) ) } '
    print(grammar.parse(data))
    data = ' { ( a ) || a || ( a || a) } '
    print(grammar.parse(data))
Пример #24
0
def test3(grammar: Grammar):
    data = '(a||(a))'
    print(grammar.parse(data))
    data = 'a||(a)'
    print(grammar.parse(data))
    data = 'a||a||a'
    print(grammar.parse(data))
    data = '(a)||(a)'
    print(grammar.parse(data))
    data = '(a||(a||(a)))'
    print(grammar.parse(data))
    data = '(a||(a||((a||a)||(a||(a||a)))))||(a)'
    print(grammar.parse(data))
    data = '(a)||a||(a||(a))'
    print(grammar.parse(data))
    data = '(a)||a||(a||a)'
    print(grammar.parse(data))
Пример #25
0
def test_visitor():
    """Assert a tree gets visited correctly."""
    grammar = Grammar(r'''
        bold_text  = bold_open text bold_close
        text       = ~'[a-zA-Z 0-9]*'
        bold_open  = '(('
        bold_close = '))'
    ''')
    text = '((o hai))'
    tree = Node(grammar['bold_text'], text, 0, 9,
                [Node(grammar['bold_open'], text, 0, 2),
                 Node(grammar['text'], text, 2, 7),
                 Node(grammar['bold_close'], text, 7, 9)])
    eq_(grammar.parse(text), tree)
    result = HtmlFormatter().visit(tree)
    eq_(result, '<b>o hai</b>')
def test_visitor():
    """Assert a tree gets visited correctly."""
    grammar = Grammar(r'''
        bold_text  = bold_open text bold_close
        text       = ~'[a-zA-Z 0-9]*'
        bold_open  = '(('
        bold_close = '))'
    ''')
    text = '((o hai))'
    tree = Node(grammar['bold_text'], text, 0, 9,
                [Node(grammar['bold_open'], text, 0, 2),
                 Node(grammar['text'], text, 2, 7),
                 Node(grammar['bold_close'], text, 7, 9)])
    eq_(grammar.parse(text), tree)
    result = HtmlFormatter().visit(tree)
    eq_(result, '<b>o hai</b>')
Пример #27
0
def test7(grammar: Grammar):
    data = ' { ( a = 12 || ( b = 34 )) } '
    print(grammar.parse(data))
    data = ' { a = 12 || ( b = 34 ) } '
    print(grammar.parse(data))
    data = ' { a = 12 && b != 34 || c = 56 } '
    print(grammar.parse(data))
    data = ' { ( ( a = 12) ) || ( b != 34 ) } '
    print(grammar.parse(data))
    data = ' { ( a = 12 && ( b = 34 && ( c != 56 ) ) ) } '
    print(grammar.parse(data))
    data = ' { (a = 12 || (b IS TRUE && ((c = 56 && a=78) || (b = 2 && ( a =1  || c = 2)) ))  ) || (b = 4) } '
    print(grammar.parse(data))

    data = ' { ( a = 12 ) || a != 2 || ( a IS NULL|| ( a NOT EXISTS ) ) } '
    print(grammar.parse(data))
    data = ' { ( a NOT EXISTS ) || a IS TRUE || ( a IS FALSE || a NOT EXISTS) } '
    print(grammar.parse(data))
Пример #28
0
def test():
    grammar = Grammar(
        #pattern = "{" ws text ws "=" ws text ws "}"
        """
        #pattern6 = _ "{" pattern5 "}" _
        #pattern6 = "(" pattern4 (logical pattern4)*
        #pattern5 = pattern4 (logical pattern4)*
        
        #pattern6 = pattern5 / pattern4
        
        
        #pattern7 = _ pattern6 _
        
        # (a || (b || c))
        # ((a || b) || c)
        # ((a || b || c))
        
        pattern7 = "(" pattern7 (logical pattern6)* ")"
        
        pattern6 = pattern5 / pattern4
        # (a || b || c)
        pattern5 = "(" pattern4 ")"
        # a || b || c
        pattern4 = pattern3 (logical pattern3)*
        
        pattern3 = _ (pattern2 / pattern1) _
        pattern2 = "(" _ (pattern2/pattern1) _ ")"
        pattern1 = _ (compare_eq / compare_ne) _
        
        compare_eq = text _ "=" _ text
        compare_ne = text _ "!=" _ text
        logical = "&&" / "||"
        _ = ~"[ \t]"*
        text = ~"[a-zA-Z0-9_-]+"
        
        item = dot identifier ("[" index "]")*
        index = ~"[0-9]|[1-9][0+9]+"
        identifier = ~"[a-zA-Z0-9_-]+"
        
        selector = root item*
        
        root = "$"
        dot = "."
        """)
    #data = ' { ( -a_bc= 123 ) || abc = 123 } '
    data = '(((-a_bc=123))||abc=12)'
    print(grammar.parse(data))
Пример #29
0
def parse(path):
  """
  Parse a pdf docket into an xml document.
  This xml document will be of the form:
  <docket>
    <page>
      <caption> </caption>
      <body>
        <section name='a'> </section>
        <section name='b'> </section>
        ...
      </body>
      <footer> </footer>
    </page>
    <page>
      ...
    </page>
  </docket>
  ...

  This xml most closely resembles the original docket. (The caveat
  is that section names are removed from the text and turned into name
  attributes of the section xml elements).

  But some sections extend across pages, and this xml schema leves these
  sections separated from each other.


  TODO: Turn this into a real .xsd schema definition.
  """
  print("Starting parse {}".format(path)) #
  start = datetime.now() #
  docket_text = pdf_to_text(path)
  pdf2text_time = (datetime.now()-start).microseconds #
  start = datetime.now() #
  grammar = Grammar(grammar_list[0])
  create_grammar_time = (datetime.now()-start).microseconds #
  visitor = DocketVisitor()
  start = datetime.now() #
  root = grammar.parse(docket_text)
  parse_grammar_time = (datetime.now()-start).microseconds #
  start = datetime.now() #
  results = visitor.visit(root)
  node_visitor_time = (datetime.now()-start).microseconds #
  logging.info("{}, {}, {}, {}".format(pdf2text_time, create_grammar_time, parse_grammar_time, node_visitor_time))
  return results
Пример #30
0
    def __init__(self, tune_fn):
        NodeVisitor.__init__(self)
        # start with an empty tune, voice, note, and list of modifiers
        self.tune = Tune()
        self.voice = Voice()
        self.note = Note()
        self.note_modifiers = []

        # at the outset, we are not in a voice's content
        self.in_content = False

        # set up the actual parser
        grammar = Grammar(open("doremi-grammar", "r").read())

        # read and parse the tune
        tune_text = codecs.open(tune_fn, "r", "utf-8").read()
        self.syntax = grammar.parse(tune_text)
Пример #31
0
def test11(grammar: Grammar):
    test10(grammar)
    data = ' { ( $.a = "12" || ( $.b = 34 )) } '
    print(grammar.parse(data))
    data = ' { ( $.a.bc IS NULL || ( $.b.xy = "ab" )) } '
    print(grammar.parse(data))
    data = ' { ( $.a[0] NOT EXISTS || ( $.b[2][1] = "34" )) } '
    print(grammar.parse(data))
    data = ' { ( $.a[0]<=12 || ( $.b[2][1].c = "34" )) } '
    print(grammar.parse(data))
    data = ' { ($.a[0] <="12" || ( $.b[10][1] =34 )) } '
    print(grammar.parse(data))
    data = ' { $.a.b ="12" && $.b != 3.4.5 || $.c = 56 } '
    print(grammar.parse(data))
    data = ' { $.a="12" && $.b != 3.4.5 || $.c = 56 } '
    print(grammar.parse(data))
Пример #32
0
def test10(grammar: Grammar):
    data = ' { ( $.a = 12 || ( $.b = 34 )) } '
    print(grammar.parse(data))
    data = ' { ( $.a.bc IS NULL || ( $.b.xy = ab )) } '
    print(grammar.parse(data))
    data = ' { ( $.a[0] NOT EXISTS || ( $.b[2][1] = 34 )) } '
    print(grammar.parse(data))
    data = ' { ( $.a[0]<=12 || ( $.b[2][1].c = 34 )) } '
    print(grammar.parse(data))
    data = ' { ($.a[0] <= 12 || ( $.b[10][1] =34 )) } '
    print(grammar.parse(data))
    data = ' { $.a.b = 12 && $.b != 34 || $.c = 56 } '
    print(grammar.parse(data))
def test_custom_visitor_factory():
  text = """Hi there, partner"""
  grammar = r"""
  text = greeting punctuation identifier
  greeting = hi_there?
  punctuation = comma?
  identifier = partner?

  hi_there = "Hi there"
  comma = ", "
  partner = "partner"
  """
  grammar = Grammar(grammar)
  terminals = ["hi_there", "comma", "partner"]
  nonterminals = ["text", "greeting", "punctuation", "identifier"]
  custom_visitor = CustomVisitorFactory(terminals, nonterminals, dict()).create_instance()
  #custom_visitor = custom_visitor.create_instance()
  root = grammar.parse(text)
#   print("The parse tree:")
#   print(root.prettily())
  xml = custom_visitor.visit(root)
  assert xml=="<text> <greeting> Hi there </greeting><punctuation> ,  </punctuation><identifier> partner </identifier> </text>"
#   print(xml)
#   print("Finished.")
Пример #34
0
  def stringify_list(self, list):
    output = ""
    for element in list:
      output += element
    return output
# End of Class

test_num = 0

# grammar = Grammar(grammars[test_num])
# root = grammar.parse(texts[0])
# print("Parsed okay.")
# visitor = CaseInfoVisitor()
# results = visitor.visit(root)
# print(results)
# for r in results:
#   print(r)
# print(root.prettily())

#with open("./sample_dockets/CP-51-CR-0000001-2011.txt") as f:
with open("./sample_dockets/CP-51-CR-0005727-2011.txt") as f:
  grammar = Grammar(grammars[test_num])
  root = grammar.parse(f.read())
  visitor = DocketVisitor_2()
  print("Parse succeeded.")
  with open("output2.txt", 'w+') as f2:
    f2.write(visitor.visit(root))
  f2.close()
f.close()

Пример #35
0
def test_qualifier_notok():
    test_grammar = Grammar(r"""
        start = qualifierlist
        """ + grammar.qualifier + grammar.ws)
    with pytest.raises(IncompleteParseError):
        test_grammar.parse("public, static, final")
Пример #36
0
def test_block_line_comment_ok(value):
    test_grammar = Grammar(r"""
    start = line_comment
    """ + grammar.ws)
    tree = test_grammar.parse(value)
    assert tree is not None
Пример #37
0
class Parser(object):
    def __init__(self):
        super().__init__()
        self.line_endings = []
        self.grammar = Grammar(tscript_grammar)

    def lint(self, script):
        script += '\n'  # just incase the end of the script lacks a \n otherwise the *line* will not match
        self.line_endings = [i for i, c in enumerate(script) if c == '\n']
        try:
            ast = self.grammar.parse(script)
        except IncompleteParseError as e:
            return 'Incomplete Parsing on line: {0} column: {1}'.format(
                e.line(), e.column())
        except ParseError as e:
            return 'Error Parsing on line: {0} column: {1}'.format(
                e.line(), e.column())

        try:
            self._eval(ast)
        except Exception as e:
            return 'Exception Parsing "{0}"'.format(e)

        return None

    def parse(self, script):
        script += '\n'  # just incase the end of the script lacks a \n otherwise the *line* will not match
        self.line_endings = [i for i, c in enumerate(script) if c == '\n']
        try:
            ast = self.grammar.parse(script)
        except IncompleteParseError as e:
            raise ParserError(e.line(), e.column(), 'Incomplete Parse')
        except ParseError as e:
            raise ParserError(e.line(), e.column(), 'Error Parsing')

        return (Types.SCOPE, {'_children': self._eval(ast)})

    def _eval(self, node):
        if node.expr_name[0:3] in ('ws_', 'nl_', 'em_'):  # ignore wite space
            raise IsEmpty()

        try:
            handler = getattr(self, node.expr_name)
        except AttributeError:
            handler = self.anonymous

        return handler(node)

    def anonymous(self, node):
        if len(node.children) < 1:
            raise IsEmpty()

        return self._eval(node.children[0])

    def lines(self, node):
        if not node.children:
            return []

        result = []
        for child in node.children:
            try:
                result.append(self._eval(child))
            except IsEmpty:
                pass

        return result

    def line(self, node):
        for i in range(0, len(self.line_endings)):
            if self.line_endings[i] > node.start:
                break

        return (Types.LINE, self._eval(node.children[0]), i + 1)

    def expression(self, node):
        return self._eval(node.children[1])

    def value_expression(self, node):
        return self._eval(node.children[1])

    def constant_expression(self, node):
        return self._eval(node.children[1])

    def block(self, node):
        options = self._eval(node.children[1])
        options['_children'] = self._eval(node.children[3])

        return (Types.SCOPE, options)

    def paramater_map(self, node):
        children = node.children[0].children
        if len(children) == 0:
            return {}

        children = children[0].children
        groups = [children[1:]]
        for item in children[0]:
            groups.append(item.children)

        result = {}
        for item in groups:
            try:
                result[item[1].text] = self._eval(item[4])
            except IsEmpty:
                raise Exception(
                    'Paramater values are not allowed to be IsEmpty')

        return result

    def const_paramater_map(self, node):
        result = self.paramater_map(node)
        for key in result.keys():
            if result[key][0] != Types.CONSTANT:
                raise Exception(
                    'Expected Constant paramater, got type "{0}"'.format(
                        result[key][0]))

            result[key] = result[key][1]

        return result

    def jump_point(self, node):
        return (Types.JUMP_POINT, node.children[1].text)

    def goto(self, node):
        return (Types.GOTO, node.children[1].text)

    def time(self, node):  # days:hours:mins:seconds
        parts = [int(i) for i in node.text.split(':')]

        if len(parts) == 4:
            return (Types.CONSTANT,
                    timedelta(days=parts[0],
                              hours=parts[1],
                              minutes=parts[2],
                              seconds=parts[3]))
        elif len(parts) == 3:
            return (Types.CONSTANT,
                    timedelta(hours=parts[0],
                              minutes=parts[1],
                              seconds=parts[2]))
        else:
            return (Types.CONSTANT,
                    timedelta(minutes=parts[0], seconds=parts[1]))

    def number_float(self, node):
        return (Types.CONSTANT, float(node.text))

    def number_int(self, node):
        return (Types.CONSTANT, int(node.text))

    def boolean(self, node):
        return (Types.CONSTANT, True if node.text.lower() == 'true' else False)

    def text(self, node):
        return (Types.CONSTANT, node.children[0].children[1].text)

    def none(self, node):
        return (Types.CONSTANT, None)

    def exists(self, node):
        return (Types.EXISTS, self._eval(node.children[2]))

    def array(self, node):
        children = node.children[1].children
        if len(children) == 0:
            return (Types.ARRAY, [])

        children = children[0].children
        values = []
        for item in children[0]:
            values.append(self._eval(item))

        values.append(self._eval(children[1]))

        return (Types.ARRAY, values)

    def map(self, node):
        values = self._eval(node.children[1])

        return (Types.MAP, values)

    def variable(self, node):
        if len(node.children[1].children) > 0:
            module = node.children[1].children[0].children[0].text
        else:
            module = None

        return (Types.VARIABLE, {
            'module': module,
            'name': node.children[2].text
        })

    def array_map_item(self, node):
        variable = self._eval(node.children[0])
        if variable[0] != Types.VARIABLE:
            raise Exception('Can only index variables')

        index = self._eval(node.children[2])
        return (Types.ARRAY_MAP_ITEM, {
            'module': variable[1]['module'],
            'name': variable[1]['name'],
            'index': index
        })

    def infix(self, node):
        return (Types.INFIX, {
            'operator': node.children[2].text,
            'left': self._eval(node.children[1]),
            'right': self._eval(node.children[3])
        })

    def not_(self,
             node):  # we are going to abuse the INFIX functino for this one
        return (Types.INFIX, {
            'operator': 'not',
            'left': self._eval(node.children[1]),
            'right': (Types.CONSTANT, None)
        })

    def other(self, node):
        return (Types.OTHER, node.children[0].text)

    def whiledo(self, node):
        return (Types.WHILE, {
            'condition': self._eval(node.children[1]),
            'expression': self._eval(node.children[4])
        })

    def ifelse(self, node):
        branches = []
        branches.append({
            'condition': self._eval(node.children[1]),
            'expression': self._eval(node.children[4])
        })

        for item in node.children[5].children:
            branches.append({
                'condition': self._eval(item.children[2]),
                'expression': self._eval(item.children[5])
            })

        if len(node.children[6].children) > 0:
            branches.append({
                'condition':
                None,
                'expression':
                self._eval(node.children[6].children[0].children[3])
            })

        return (Types.IFELSE, branches)

    def function(self, node):
        params = self._eval(node.children[4])

        if len(node.children[1].children) > 0:
            module = node.children[1].children[0].children[0].text
        else:
            module = None

        return (Types.FUNCTION, {
            'module': module,
            'name': node.children[2].text,
            'paramaters': params
        })

    def assignment(self, node):
        target = self._eval(node.children[0])

        return (Types.ASSIGNMENT, {
            'target': target,
            'value': self._eval(node.children[3])
        })
Пример #38
0
               Defendant eligible for work release.
          Probation                                                                 Max of 3.00 Years                                  12/20/2011
                                                                                    3 years
               All conditions previously imposed to remain.
""",
    """
      Manufacture or Deliver
         Shreeves-Johns, Karen                                                     07/13/2011
            Probation                                                                Max of 3.00 Years                                   07/13/2011
                                                                                     3 years
                  Defendant is to pay imposed mandatory court costs.
                  To submit to random drug screens.
                  To pursue a prescribed secular course of study or vocational training.
                  Case relisted for status of compliance on 9/22/11 courtroom 605.
       Shreeves-Johns, Karen                                                     12/20/2011
          Confinement                                                              Min of 11.00 Months 15.00 Days                      12/20/2011
                                                                                   Max of 23.00 Months
                                                                                   11 1/2 - 23 months
               Defendant eligible for work release.
          Probation                                                                 Max of 3.00 Years                                  12/20/2011
                                                                                    3 years
               All conditions previously imposed to remain.
""",
]

grammar = Grammar(grammars[0])
root = grammar.parse(texts[0])
print("parsed.")
visitor = DetailsVisitor()
print(visitor.visit(root))
Пример #39
0
def test14(grammar: Grammar):
    data = '{ ($.user.id = 2 && $.user.id = "a \\" b") || $.a="1"}'
    print(grammar.parse(data))