def __init__(self): """Parser for instruction. Example: {{<a>},{<a>},{<a>},{<a>}} {{<!>},{<!>},{<!>},{<a>}} <{o"i!a,<{i<a> """ debug = False self.garbo_count = 0 LBRACK, RBRACK, LBRACE, RBRACE, BANG = map(Suppress, "<>{}!") nonspecial = CharsNotIn('<>{}!') ignored = Word('!', printables, exact=2) enclosed_garbo = SkipTo(Literal('>'), ignore=ignored) val_str = Forward() garbo_str = Forward() item = Forward() # a parsed item item = (ignored | garbo_str | val_str | nonspecial).setDebug(debug) # stuff in {}s val_str << nestedExpr('{', '}', content=item, ignoreExpr=None).setDebug(debug) # stuff in <>s (suppressed) garbo_str << (LBRACK + Optional(enclosed_garbo) + RBRACK).setDebug(debug) def cvt_list(toks): return toks.asList() val_str.setParseAction(cvt_list) def take_garbo(s, loc, toks): m = toks[0] ig_str = re.sub(r'!.', '', m) ln = len(ig_str) self.garbo_count += ln return f"<GARBO: {ln}>" enclosed_garbo.setParseAction(take_garbo) ignored.setParseAction(lambda: '!IGNORED') # pattern build self._pattern = item
def patt(cs_list): ''' Remove the cs with its arguments with recursion on curly brackets ''' cs_lit_list = oneOf(cs_list).suppress() bslash = Literal('\\').suppress() lbrace = Literal('{').suppress() rbrace = Literal('}').suppress() parens = Word("()%\\") inside = SkipTo(rbrace) allchars = Word(printables, excludeChars="{}") inside = ZeroOrMore(allchars) inside.setParseAction(lambda tok: " ".join(tok)) content = Forward() content << OneOrMore(allchars | (lbrace + ZeroOrMore(content) + rbrace)) #content << (allchars + lbrace + ZeroOrMore(content) + rbrace) content.setParseAction(lambda tok: " ".join(tok)) return bslash + cs_lit_list + lbrace + content + rbrace
# URL extractor # Copyright 2004, Paul McGuire from pyparsing import makeHTMLTags, SkipTo, pyparsing_common import urllib.request from contextlib import closing import pprint linkOpenTag, linkCloseTag = makeHTMLTags('a') linkBody = SkipTo(linkCloseTag) linkBody.setParseAction(pyparsing_common.stripHTMLTags) linkBody.addParseAction(lambda toks: ' '.join(toks[0].strip().split())) link = linkOpenTag + linkBody("body") + linkCloseTag.suppress() # Go get some HTML with some links in it. with closing( urllib.request.urlopen("https://www.yahoo.com/")) as serverListPage: htmlText = serverListPage.read().decode("UTF-8") # scanString is a generator that loops through the input htmlText, and for each # match yields the tokens and start and end locations (for this application, we are # not interested in the start and end values). for toks, strt, end in link.scanString(htmlText): print(toks.asList()) # Create dictionary from list comprehension, assembled from each pair of tokens returned # from a matched URL. pprint.pprint( {toks.body: toks.href for toks, strt, end in link.scanString(htmlText)})
# URL extractor # Copyright 2004, Paul McGuire from pyparsing import makeHTMLTags, SkipTo, pyparsing_common import urllib.request from contextlib import closing import pprint linkOpenTag, linkCloseTag = makeHTMLTags('a') linkBody = SkipTo(linkCloseTag) linkBody.setParseAction(pyparsing_common.stripHTMLTags) linkBody.addParseAction(lambda toks: ' '.join(toks[0].strip().split())) link = linkOpenTag + linkBody("body") + linkCloseTag.suppress() # Go get some HTML with some links in it. with closing(urllib.request.urlopen("http://www.yahoo.com")) as serverListPage: htmlText = serverListPage.read().decode("UTF-8") # scanString is a generator that loops through the input htmlText, and for each # match yields the tokens and start and end locations (for this application, we are # not interested in the start and end values). for toks,strt,end in link.scanString(htmlText): print(toks.asList()) # Create dictionary from list comprehension, assembled from each pair of tokens returned # from a matched URL. pprint.pprint( dict((toks.body, toks.href) for toks,strt,end in link.scanString(htmlText)) )
with open('../tests/tex_files/reinhardt/reinhardt-optimal-control.tex', 'r') as rein_file: rein = rein_file.read() #with open('../tests/tex_files/short_xymatrix_example.tex') as xymatrix_file: # short_example = xymatrix_file.read() #with open('../../stacks-tests/orig/perfect.tex') as xymatrix_file: # stacks_example = xymatrix_file.read() # + cstikzfig = oneOf(["\\tikzfig", "\\mathcal"]).suppress() lbrace = Literal('{').suppress() rbrace = Literal('}').suppress() parens = Word("()%\\") inside = SkipTo(rbrace) allchars = Word(printables, excludeChars="{}") inside = ZeroOrMore(allchars) inside.setParseAction(lambda tok: " ".join(tok)) content = Forward() content << OneOrMore(allchars|(lbrace + ZeroOrMore(content) + rbrace)) #content << (allchars + lbrace + ZeroOrMore(content) + rbrace) content.setParseAction(lambda tok: " ".join(tok)) tikzfig = cstikzfig + lbrace + inside + rbrace + lbrace + inside + rbrace + lbrace + content + rbrace csxymatrix = oneOf(["\\xymatrix","\\mathcal"]).suppress() xymatrix = csxymatrix + lbrace + content + rbrace search_res = tikzfig.searchString(rein) search_res = xymatrix.searchString(short_example) #tikzfig.setParseAction(lambda s: ' ') #clean_str = tikzfig.transformString(rein)
# URL extractor # Copyright 2004, Paul McGuire from pyparsing import makeHTMLTags, SkipTo, pyparsing_common as ppc import urllib.request from contextlib import closing import pprint linkOpenTag, linkCloseTag = makeHTMLTags('a') linkBody = SkipTo(linkCloseTag) linkBody.setParseAction(ppc.stripHTMLTags) linkBody.addParseAction(lambda toks: ' '.join(toks[0].strip().split())) link = linkOpenTag + linkBody("body") + linkCloseTag.suppress() # Go get some HTML with some links in it. with closing( urllib.request.urlopen("https://www.yahoo.com/")) as serverListPage: htmlText = serverListPage.read().decode("UTF-8") # scanString is a generator that loops through the input htmlText, and for each # match yields the tokens and start and end locations (for this application, we are # not interested in the start and end values). for toks, strt, end in link.scanString(htmlText): print(toks.asList()) # Create dictionary from list comprehension, assembled from each pair of tokens returned # from a matched URL. pprint.pprint( {toks.body: toks.href for toks, strt, end in link.scanString(htmlText)})