def parse_depsub(s, defc): regex.set_syntax(regex_syntax.RE_SYNTAX_EMACS) whitespace = regex.compile('^[ \t]*$') vstrs = regsub.split(s, '[,]') tret = [] for vs in vstrs: ostrs = regsub.split(vs, '[|]') ret = [] for s in ostrs: if (whitespace.search(s) >= 0): pass else: ret.append(parse_depversion(s)) if (len(ret) == 0): pass elif (len(ret) == 1): tret.append(ret[0]) else: tret.append(['|'] + ret) if (len(tret) == 0): return None elif (len(tret) == 1): return tret[0] else: return [defc] + tret
def parse_version(v): regex.set_syntax(regex_syntax.RE_SYNTAX_EMACS) ereg = regex.compile('^[0-9]+$') ureg = regex.compile('^[-:.+A-Za-z0-9]+$') dreg = regex.compile('^[.+A-Za-z0-9]+$') s = v r = string.find(s, ':') if (r >= 0): epoch = s[0:r] if (ereg.match(epoch) < 0): raise ValueError, 'epoch ("%s") has invalid format for version string "%s"' % ( epoch, v) s = s[r + 1:] else: epoch = None r = string.rfind(s, '-') if (r > 0): debian = s[r + 1:] if (dreg.match(debian) < 0): raise ValueError, 'debian-revision ("%s") has invalid format for version string "%s"' % ( debian, v) s = s[:r] else: debian = None upstream = s[0:] if (ureg.match(upstream) < 0): raise ValueError, 'upstream-version ("%s") has invalid format for version string "%s"' % ( upstream, v) return (epoch, upstream, debian)
def check_deps(clause, pfunc): regex.set_syntax(regex_syntax.RE_SYNTAX_EMACS) depstr = regex.compile('^<<\\|<=\\|=\\|>=\\|>>\\|<\\|>') if (clause == None): return 1 if (len(clause) == 0): return 1 if (clause[0] == '&'): ret = 1 for sclause in clause[1:]: cret = check_deps(sclause, pfunc) if (cret == 0): ret = 0 return ret if (clause[0] == '|'): ret = 0 for sclause in clause[1:]: cret = check_deps(sclause, pfunc) if (cret != 0): ret = 1 return ret if (type(clause) == types.StringType): return pfunc(clause) if (depstr.search(clause[0]) >= 0): return pfunc(clause) raise ValueError, 'invalid dependency clause "%s"' % str(clause)
def compile(pattern, syntax): syntax = regex.set_syntax(syntax) try: pattern = regex.compile(pattern) finally: # restore original syntax regex.set_syntax(syntax) return pattern
def _strip_nondigit(s): regex.set_syntax(regex_syntax.RE_SYNTAX_EMACS) dstr = regex.compile('^\\([^0-9]*\\)\\(.*\\)$') if (dstr.match(s) < 0): raise ValueError, 'internal error' prefix = s[dstr.regs[1][0]:dstr.regs[1][1]] rest = s[dstr.regs[2][0]:dstr.regs[2][1]] return prefix, rest
def _strip_digit(s): regex.set_syntax(regex_syntax.RE_SYNTAX_EMACS) dstr = regex.compile('^\\([0-9]*\\)\\(.*\\)$') if (dstr.match(s) < 0): raise ValueError, 'internal error' prefix = s[dstr.regs[1][0]:dstr.regs[1][1]] rest = s[dstr.regs[2][0]:dstr.regs[2][1]] if (prefix == ''): prefix = '0' return string.atol(prefix), rest
def parse_output (f): diffs = [] df = backupfile (f) while (1): while (1): l = df.rl () if (l == ''): break if (l == '--------------------------\n'): df.backup () break if (l == ''): break header = parse_output_header (df) header['hunks'] = [] regex.set_syntax (regex_syntax.RE_NO_BK_PARENS | regex_syntax.RE_NO_BK_VBAR) hunkr = regex.compile ('^Hunk \#([0-9]+) ([a-zA-Z]+) at ([0-9]+)( \(offset (\-?[0-9]+) lines?\))?\.$') i = 1 while (1): l = df.rl () if (l == ''): break if (l[0:5] == 'Hunk '): if (hunkr.match (l) < 0): raise ValueError, ('unable to parse hunk result \"%s\"' % l) hunkno = string.atoi (hunkr.group (1)) status = hunkr.group (2) lineno = string.atoi (hunkr.group (3)) if (hunkr.group (5)): offset = string.atoi (hunkr.group (5)) else: offset = 0 if (hunkno != i): raise ValueError, ('invalid hunk number %d' % hunkno) i = i + 1 header['hunks'].append ([status, lineno, offset]) if (l == '--------------------------\n'): df.backup () break diffs.append (header) if (l == ''): break return diffs
def fnmatchcase(name, pat): """Test wheter FILENAME matches PATTERN, including case. This is a version of fnmatch() which doesn't case-normalize its arguments. """ if not _cache.has_key(pat): res = translate(pat) import regex save_syntax = regex.set_syntax(0) _cache[pat] = regex.compile(res) save_syntax = regex.set_syntax(save_syntax) return _cache[pat].match(name) == len(name)
def check_version(v1, constraint, v2): regex.set_syntax(regex_syntax.RE_SYNTAX_EMACS) eqreg = regex.compile('^=\\|<=\\|>=\\|<\\|>$') ltreg = regex.compile('^<=\\|<<\\|<$') gtreg = regex.compile('^>=\\|>>\\|>$') r = compare_versions(v1, v2) if (r == 0): return (eqreg.match(constraint) > 0) elif (r > 0): return (gtreg.match(constraint) > 0) else: # (r < 0): return (ltreg.match(constraint) > 0)
def parse_package_name(s): regex.set_syntax(regex_syntax.RE_SYNTAX_EMACS) pregex = regex.compile( '^\\([-.+A-Za-z0-9]+\\)\\(_\\([^_]+\\)\\)?\\(_\\([^_]+\\)\\)?$') r = {} if (pregex.search(s) < 0): raise ValueError, 'invalid package name "%s"' % s r['package'] = pregex.group(1) if (pregex.group(3) != None): r['version'] = pregex.group(3) if (pregex.group(5) != None): r['architecture'] = pregex.group(5) return r
def parse_provides(s): regex.set_syntax(regex_syntax.RE_SYNTAX_EMACS) depstr = regex.compile( '^[ \t]*\\([-.+A-Za-z0-9]+\\)[ \t]*\\((.*)\\)?[ \t]*$') whitespace = regex.compile('^[ \t]*$') strs = regsub.split(s, '[ \t]*[,][ \t]*') ret = [] for s in strs: if (whitespace.search(s) >= 0): pass else: ret.append(s) return ret
def package_satisfies(p, clause): regex.set_syntax(regex_syntax.RE_SYNTAX_EMACS) depstr = regex.compile('^<<\\|<=\\|=\\|>=\\|>>\\|<\\|>') if (type(clause) == types.StringType): if (p['package'] == clause): return 1 if (p.has_key('provides') and (clause in p['provides'])): return 1 return 0 elif (depstr.search(clause[0]) >= 0): constraint = clause[0] package = clause[1] version = clause[2] if (p['package'] == package): return 1 return dpkg_version.check_version(package_canon_version(p), constraint, version) else: raise ValueError, 'invalid dependency clause "%s"' % str(clause)
def process_client(db, r, w): regex.set_syntax(regex_syntax.RE_SYNTAX_EMACS) cregex = regex.compile('^CERT[ \t]+\([-+_:.A-Za-z0-9]+\)$') w.write('220 package verification server ready\r\n') w.flush() while 1: l = r.readline() if (l == ''): break l = chop(l) if (regex.match('^[ \t]*$', l) >= 0): continue elif (regex.match('^LIST[ \t]*$', l) >= 0): w.write('211 certificate list follows\r\n') for key in db.keys(): w.write(key + '\r\n') w.write('.\r\n') w.flush() elif (cregex.match(l) >= 0): key = cregex.group(1) if (db.has_key(key)): w.write('210 certificate follows\r\n') cert = db.fetch_key(key) cert = regsub.gsub('\n', '\r\n', cert) w.write(cert) w.write('.\r\n') else: w.write('501 no such certificate\r\n') w.flush() elif (regex.match('^HELP[ \t]*$', l) >= 0): w.write( '100 legal commands\r\n CERT [package-name]\r\n HELP\r\n LIST\r\n QUIT\r\n.\r\n' ) w.flush() elif (regex.match('^QUIT[ \t]*$', l) >= 0): w.write('221 closing connection\r\n') w.flush() break else: w.fwrite('500 invalid command\r\n') w.flush()
def ggrep(syntax, pat, files): if len(files) == 1 and type(files[0]) == type([]): files = files[0] global opt_show_filename opt_show_filename = (len(files) != 1) syntax = regex.set_syntax(syntax) try: prog = regex.compile(pat) finally: syntax = regex.set_syntax(syntax) for filename in files: fp = open(filename, 'r') lineno = 0 while 1: line = fp.readline() if not line: break lineno = lineno + 1 if prog.search(line) >= 0: showline(filename, lineno, line, prog) fp.close()
def check_dependency(clause, index=index, details=details): regex.set_syntax(regex_syntax.RE_SYNTAX_EMACS) depstr = regex.compile('^<<\\|<=\\|=\\|>=\\|>>\\|<\\|>') if (type(clause) == types.StringType): constraint, package, version = None, clause, None elif (depstr.search(clause[0]) >= 0): constraint = clause[0] package = clause[1] version = clause[2] else: raise ValueError, 'invalid dependency clause "%s"' % str(clause) if (index.has_key(package)): plist = index[package] else: plist = [] found = 0 for t in plist: pver = dpkg_packages.package_canon_version(t) if ((constraint == None) or (dpkg_version.check_version(pver, constraint, version))): details.append( ('debug', 'resolved dependency on "%s" with "%s" version "%s"' % (str(clause), t['package'], pver))) found = 1 else: details.append(( 'debug', 'failed to resolve dependency on "%s" with "%s" version "%s"' % (str(clause), t['package'], pver))) if (not found): details.append( ('error', 'unresolved dependency on "%s"' % str(clause))) return 0 else: return 1
def parse_depversion(s): regex.set_syntax(regex_syntax.RE_SYNTAX_EMACS) depstr = regex.compile( '^[ \t]*\\([-.+A-Za-z0-9]+\\)[ \t]*\\((.*)\\)?[ \t]*$') version = regex.compile('^(\\([<>=]+\\)[ \t]*\\(.*\\))$') whitespace = regex.compile('^[ \t]*$') if (depstr.search(s) >= 0): dep = depstr.group(1) ver = depstr.group(2) if ((ver == None) or (whitespace.search(ver) >= 0)): return dep elif (version.search(ver) >= 0): relation = version.group(1) value = version.group(2) return [relation, dep, value] else: raise ValueError, 'syntax error parsing dependency version "%s"' % ver else: raise ValueError, 'syntax error parsing dependency "%s"' % s
def load(input): import regex from regex_syntax import * program = {} _ = regex.set_syntax(RE_SYNTAX_AWK) re = regex.compile("<([0-9]+),'(.)'>--><([0-9]+),(L|R|'.')>$") lines = input.readlines() for lineno, line in filter( lambda pair: len(pair[1]) > 0, map(lambda x, y: (x + 1, strip(y)), xrange(len(lines)), lines)): if re.match(line) == -1: raise SyntaxError, ` lineno ` + ': Bad syntax for state transition' state, char, new_state, operation = re.group(1, 2, 3, 4) program[(atoi(state), char)] = (atoi(new_state), operation) return program
def load(input): import regex from regex_syntax import * program = {} _ = regex.set_syntax(RE_SYNTAX_AWK) re = regex.compile("<([0-9]+),'(.)'>--><([0-9]+),(L|R|'.')>$") lines = input.readlines() for lineno, line in filter(lambda pair: len(pair[1]) > 0, map(lambda x,y: (x+1, strip(y)), xrange(len(lines)), lines)): if re.match(line) == -1: raise SyntaxError, `lineno` + ': Bad syntax for state transition' state, char, new_state, operation = re.group(1, 2, 3, 4) program[(atoi(state), char)] = (atoi(new_state), operation) return program
# fixed checksum calculations for certain cases # 2/7/99 - v0.02, fixed LH parity pattern for EAN13. It's not the check digit! # 2/7/99 - initial release # ================================================================================ # # barCodeSymbol - the whole printed symbol, including bar code(s) and product code(s). # UPC, UPCA, UPC5, EAN13 - the number itself, with check digit, string representation, # and barcode bits # import sys import regsub from regex_syntax import * import regex regex.set_syntax(RE_SYNTAX_AWK) from types import * BooklandError = "Something wrong" A="A";B="B";C="C";O="O";E="E" UPCABITS = [{O:"0001101",E:"1110010"}, {O:"0011001",E:"1100110"}, {O:"0010011",E:"1101100"}, {O:"0111101",E:"1000010"}, {O:"0100011",E:"1011100"}, {O:"0110001",E:"1001110"}, {O:"0101111",E:"1010000"}, {O:"0111011",E:"1000100"}, {O:"0110111",E:"1001000"},
# examples of how to use these methods # # For bugs you can send mail to me at [email protected] import sys, posix, time, regex, string, os from regex_syntax import * from socket import * from StringIO import * from rfc822 import * from lockfile import * servname = 'telnet' MULTILINE = 1 regex.set_syntax(RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS) isnum = regex.compile('[0-9]*') class MailTransport: def __init__(self, debug=0): self.s = socket(AF_INET, SOCK_STREAM) self.f = self.s.makefile('r') self.multiend = regex.compile('^\.\r\n') self.okay = regex.compile('^\+OK.*') self.error = regex.compile('^\-ERR.*') self.log = Log('mtranlog').log self.port = 7 self.debug = debug self.exception = 'MailTransportError'
#! /usr/bin/env python # 1) Regular Expressions Test # # Read a file of (extended per egrep) regular expressions (one per line), # and apply those to all files whose names are listed on the command line. # Basically, an 'egrep -f' simulator. Test it with 20 "vt100" patterns # against a five /etc/termcap files. Tests using more elaborate patters # would also be interesting. Your code should not break if given hundreds # of regular expressions or binary files to scan. # This implementation: # - combines all patterns into a single one using ( ... | ... | ... ) # - reads patterns from stdin, scans files given as command line arguments # - produces output in the format <file>:<lineno>:<line> # - is only about 2.5 times as slow as egrep (though I couldn't run # Tom's test -- this system, a vanilla SGI, only has /etc/terminfo) import string import sys import regex from regex_syntax import * regex.set_syntax(RE_SYNTAX_EGREP) def main(): pats = map(chomp, sys.stdin.readlines()) bigpat = '(' + string.joinfields(pats, '|') + ')' prog = regex.compile(bigpat) for file in sys.argv[1:]: try: fp = open(file, 'r') except IOError, msg: print "%s: %s" % (file, msg)
def newsyntax(self): syntax = 0 for var in self.vars: syntax = syntax | var.get() regex.set_syntax(syntax) self.recompile()
sys.exit(1) def compile_patterns(pattfile): res = [] for pattstr in open(pattfile, 'r').readlines(): try: res.append(regex.compile(pattstr[:-1])) # make regex object except: # strip end-of-line print 'pattern ignored:', pattstr # or use regex.match return res def searcher(pattfile, srchfiles): patts = compile_patterns(pattfile) # compile for speed for file in glob.glob(srchfiles): # all matching files lineno = 1 # glob uses regex too print '\n[%s]' % file for line in open(file, 'r').readlines(): # all lines in file for patt in patts: if patt.search(line) >= 0: # try all patterns print '%04d)' % lineno, line, # report line match break lineno = lineno + 1 if __name__ == '__main__': from regex_syntax import * regex.set_syntax(RE_SYNTAX_EGREP) # emacs is the default apply(searcher, handle_args())
# would also be interesting. Your code should not break if given hundreds # of regular expressions or binary files to scan. # This implementation: # - combines all patterns into a single one using ( ... | ... | ... ) # - reads patterns from stdin, scans files given as command line arguments # - produces output in the format <file>:<lineno>:<line> # - is only about 2.5 times as slow as egrep (though I couldn't run # Tom's test -- this system, a vanilla SGI, only has /etc/terminfo) import string import sys import regex from regex_syntax import * regex.set_syntax(RE_SYNTAX_EGREP) def main(): pats = map(chomp, sys.stdin.readlines()) bigpat = '(' + string.joinfields(pats, '|') + ')' prog = regex.compile(bigpat) for file in sys.argv[1:]: try: fp = open(file, 'r') except IOError, msg: print "%s: %s" % (file, msg) continue lineno = 0 while 1: line = fp.readline()
def parse_unified_diff (df, template): regex.set_syntax (regex_syntax.RE_NO_BK_PARENS | regex_syntax.RE_NO_BK_VBAR) fromr = regex.compile ('^--- ([^\t ]*)[\n\t ]') tor = regex.compile ('^\+\+\+ ([^\t ]*)[\n\t ]') startr = regex.compile ('^@@ -(([0-9]+),)?([0-9]+) \+(([0-9]+),)?([0-9]+) @@$') l = df.checkrl () if (fromr.match (l) < 0): raise ValueError, ('unable to determine name of original file from diff line "%s"' % l) difffrom = fromr.group (1); l = df.checkrl () if (tor.match (l) < 0): raise ValueError, ('unable determine to name of patched file from diff line "%s"' % l) diffto = tor.group (1); diffs = [] while (1): l = df.rl () if (l == ''): return diffs if (startr.match (l) < 0): raise ValueError, ("file did not have diff start block in appropriate location: '%s'" % l) diff = copy.deepcopy (template) diff['type'] = 'unified' diff['linespec'] = l[0:-1] diff['diff-from'] = difffrom diff['diff-to'] = diffto diff['lines'] = [] while (1): l = df.rl () if (l == ''): break if (l[0:2] == '@@'): df.backup () break if (l[0:5] == 'diff '): df.backup () break if (l[0:4] == '--- '): df.backup () break if (l[0:4] == '+++ '): df.backup () break if (l[0:4] == '*** '): df.backup () break if (l[0:12] == '\ No newline'): diff['lines'].append (l) continue if (l[0] not in [' ', '+', '-', '!']): raise ValueError, ("diff line starts with invalid character '%s': '%s'" % (quote_char (l[0]), l)) diff['lines'].append (l) continue diffs.append (diff) l = df.peekrl () if (l == ''): break if (l[0:5] == 'diff '): break if (l[0:4] == '--- '): break if (l[0:4] == '+++ '): break if (l[0:4] == '*** '): break return diffs
# preformatted section <more than two spaces> # # Heading level is determined by the number of (<number><period>) segments. # Blank lines force a separation of elements; if none of the above four # types is indicated, a new paragraph begins. A line beginning with many # spaces is interpreted as a continuation (instead of preformatted) after # a list element. Headings are anchored; paragraphs starting with "Q." are # emphasized, and those marked with "A." get their first sentence emphasized. # # Hyperlinks are created from references to: # URLs, explicitly marked using <URL:scheme://host...> # other questions, of the form "question <number>(<period><number>)*" # sections, of the form "section <number>". import sys, string, regex, regsub, regex_syntax regex.set_syntax(regex_syntax.RE_SYNTAX_AWK) # --------------------------------------------------------- regular expressions orditemprog = regex.compile(' ?([1-9][0-9]*\.)+ +') itemprog = regex.compile(' ? ?[-*] +') headingprog = regex.compile('([1-9][0-9]*\.)+ +') prefmtprog = regex.compile(' ') blankprog = regex.compile('^[ \t\r\n]$') questionprog = regex.compile(' *Q\. +') answerprog = regex.compile(' *A\. +') sentprog = regex.compile('(([^.:;?!(]|[.:;?!][^ \t\r\n])+[.:;?!]?)') mailhdrprog = regex.compile('^(Subject|Newsgroups|Followup-To|From|Reply-To' '|Approved|Archive-Name|Version|Last-Modified): +', regex.casefold) urlprog = regex.compile('<URL:([^&]+)>') addrprog = regex.compile('<([^>@:]+@[^&@:]+)>')
# ================================================================================ # # barCodeSymbol - the whole printed symbol, including bar code(s) and product code(s). # UPC, UPCA, UPC5, EAN13 - the number itself, with check digit, string representation, # and barcode bits # import re # we should get rid of regsub and regex in favor of re # hope there's no conflict. import string import sys import regsub from regex_syntax import * import regex regex.set_syntax(RE_SYNTAX_AWK) from types import * BooklandError = "Something wrong" A = "A" B = "B" C = "C" O = "O" E = "E" UPCABITS = [{ O: "0001101", E: "1110010" }, { O: "0011001", E: "1100110"
String = '\'\(\\\\.\|[^\\\n\']\)*\'' + '\|' + '"\(\\\\.\|[^\\\n"]\)*"' # Note: this module *recognizes* double quotes, but for backward # compatibility, it doesn't *use* them! Operator = '~\|\+\|-\|\*\|/\|%\|\^\|&\||\|<<\|>>\|==\|<=\|<>\|!=\|>=\|=\|<\|>' Bracket = '[][(){}]' Special = '[:;.,`\n]' Funny = Operator + '\|' + Bracket + '\|' + Special PlainToken = Name + '\|' + Number + '\|' + String + '\|' + Funny Token = Ignore + '\(' + PlainToken + '\)' try: save_syntax = regex.set_syntax(0) # Use default syntax tokenprog = regex.compile(Token) finally: if save_syntax != 0: dummy = regex.set_syntax(save_syntax) # Restore original syntax def test(file): f = open(file, 'r') while 1: line = f.readline() if not line: break i, n = 0, len(line) while i < n: j = tokenprog.match(line, i) if j < 0:
# examples of how to use these methods # # For bugs you can send mail to me at [email protected] import sys, posix, time, regex, string, os from regex_syntax import * from socket import * from StringIO import * from rfc822 import * from lockfile import * servname = 'telnet' MULTILINE = 1 regex.set_syntax(RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS) isnum = regex.compile('[0-9]*') class MailTransport: def __init__(self, debug = 0): self.s = socket(AF_INET, SOCK_STREAM) self.f = self.s.makefile('r') self.multiend = regex.compile('^\.\r\n') self.okay = regex.compile('^\+OK.*') self.error = regex.compile('^\-ERR.*') self.log = Log('mtranlog').log self.port = 7 self.debug = debug self.exception = 'MailTransportError' def open_server(self, host):
# it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. import sys, string, regex, regex_syntax, regsub, dpkg_message, dpkg_packages, outstr, stat regex.set_syntax(regex_syntax.RE_SYNTAX_EMACS) nregex = regex.compile('^[0-9]+$') def parse_ugid(s): if (nregex.match(s) >= 0): return string.atoi(s) else: return s regex.set_syntax(regex_syntax.RE_SYNTAX_EMACS) dvregex = regex.compile( '^\([^ ]+\) d \([0-7]+\) \([-a-zA-Z0-9]+\)/\([-a-zA-Z0-9]+\)') fvregex = regex.compile( '^\([^ ]+\) f \([0-7]+\) \([-a-zA-Z0-9]+\)/\([-a-zA-Z0-9]+\) \([0-9a-f]+\)'
def parse_context_diff (df, template): regex.set_syntax (regex_syntax.RE_NO_BK_PARENS | regex_syntax.RE_NO_BK_VBAR) fromr = regex.compile ('^\*\*\* ([^\t ]*)[\n\t ]') tor = regex.compile ('^--- ([^\t ]*)[\n\t ]') dstartr = regex.compile ('^\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*$') fromstartr = regex.compile ('^\*\*\* ([0-9]+),([0-9]+) \*\*\*\*$') tostartr = regex.compile ('^--- ([0-9]+),([0-9]+) ----$') diff = {} diff['type'] = 'context' l = df.checkrl () if (fromr.match (l) < 0): raise ValueError, ('unable to determine name of original file from diff line "%s"' % l) diff['from'] = fromr.group (1); l = df.checkrl () if (tor.match (l) < 0): raise ValueError, ('unable determine to name of patched file from diff line "%s"' % l) diff['to'] = tor.group (1); while (1): l = df.rl () if (l == ''): return diffs if (dstartr.match (l) < 0): raise ValueError, ('file did not have diff start block in appropriate location: %s' % l) l = df.checkrl () if (fromstartr.match (l) < 0): raise ValueError, ('file did not have diff from block in appropriate location: %s' % l) fromskip = string.atoi (fromstartr.group (2)) for i in range (fromskip): l = df.checkrl () if (l[0] not in [' ', '+', '-', '!']): raise ValueError, ('diff line contains invalid character: %s' % l) l = df.checkrl () if (l[0:12] != '\ No newline'): df.backup () l = df.checkrl () if (tostartr.match (l) < 0): raise ValueError, ('file did not have diff to block in appropriate location: %s' % l) toskip = string.atoi (tostartr.group (2)) for i in range (toskip): l = df.checkrl () if (l[0] not in [' ', '+', '-', '!']): raise ValueError, ('diff line contains invalid character: %s' % l) l = df.rl () if (l == ''): break if (l[0:12] != '\ No newline'): df.backup () l = df.peekrl () if (l == ''): break if (l[0:5] == 'diff '): break return diff
DeprecationWarning, __name__) import regex from regex_syntax import * re = 'a+b+c+' print 'no match:', regex.match(re, 'hello aaaabcccc world') print 'successful search:', regex.search(re, 'hello aaaabcccc world') try: cre = regex.compile('\(' + re) except regex.error: print 'caught expected exception' else: print 'expected regex.error not raised' print 'failed awk syntax:', regex.search('(a+)|(b+)', 'cdb') prev = regex.set_syntax(RE_SYNTAX_AWK) print 'successful awk syntax:', regex.search('(a+)|(b+)', 'cdb') regex.set_syntax(prev) print 'failed awk syntax:', regex.search('(a+)|(b+)', 'cdb') re = '\(<one>[0-9]+\) *\(<two>[0-9]+\)' print 'matching with group names and compile()' cre = regex.compile(re) print cre.match('801 999') try: print cre.group('one') except regex.error: print 'caught expected exception' else: print 'expected regex.error not raised'
# 'grep'
return sys.argv[1], sys.argv[2] except: print help_string sys.exit(1) def compile_patterns(pattfile): res = [] for pattstr in open(pattfile, 'r').readlines(): try: res.append(regex.compile(pattstr[:-1])) # make regex object except: # strip end-of-line print 'pattern ignored:', pattstr # or use regex.match return res def searcher(pattfile, srchfiles): patts = compile_patterns(pattfile) # compile for speed for file in glob.glob(srchfiles): # all matching files lineno = 1 # glob uses regex too print '\n[%s]' % file for line in open(file, 'r').readlines(): # all lines in file for patt in patts: if patt.search(line) >= 0: # try all patterns print '%04d)' % lineno, line, # report line match break lineno = lineno+1 if __name__ == '__main__': from regex_syntax import * regex.set_syntax(RE_SYNTAX_EGREP) # emacs is the default apply(searcher, handle_args())
def __init__(self, pat): save_syntax = regex.set_syntax(RE_SYNTAX_AWK) try: self.prog = regex.compile(pat) finally: xxx = regex.set_syntax(save_syntax)
# preformatted section <more than two spaces> # # Heading level is determined by the number of (<number><period>) segments. # Blank lines force a separation of elements; if none of the above four # types is indicated, a new paragraph begins. A line beginning with many # spaces is interpreted as a continuation (instead of preformatted) after # a list element. Headings are anchored; paragraphs starting with "Q." are # emphasized, and those marked with "A." get their first sentence emphasized. # # Hyperlinks are created from references to: # URLs, explicitly marked using <URL:scheme://host...> # other questions, of the form "question <number>(<period><number>)*" # sections, of the form "section <number>". import sys, string, regex, regsub, regex_syntax regex.set_syntax(regex_syntax.RE_SYNTAX_AWK) # --------------------------------------------------------- regular expressions orditemprog = regex.compile(' ?([1-9][0-9]*\.)+ +') itemprog = regex.compile(' ? ?[-*] +') headingprog = regex.compile('([1-9][0-9]*\.)+ +') prefmtprog = regex.compile(' ') blankprog = regex.compile('^[ \t\r\n]$') questionprog = regex.compile(' *Q\. +') answerprog = regex.compile(' *A\. +') sentprog = regex.compile('(([^.:;?!(]|[.:;?!][^ \t\r\n])+[.:;?!]?)') mailhdrprog = regex.compile( '^(Subject|Newsgroups|Followup-To|From|Reply-To' '|Approved|Archive-Name|Version|Last-Modified): +', regex.casefold) urlprog = regex.compile('<URL:([^&]+)>')
from test_support import verbose, sortdict import warnings warnings.filterwarnings("ignore", "the regex module is deprecated", DeprecationWarning, __name__) import regex from regex_syntax import * re = 'a+b+c+' print 'no match:', regex.match(re, 'hello aaaabcccc world') print 'successful search:', regex.search(re, 'hello aaaabcccc world') try: cre = regex.compile('\(' + re) except regex.error: print 'caught expected exception' else: print 'expected regex.error not raised' print 'failed awk syntax:', regex.search('(a+)|(b+)', 'cdb') prev = regex.set_syntax(RE_SYNTAX_AWK) print 'successful awk syntax:', regex.search('(a+)|(b+)', 'cdb') regex.set_syntax(prev) print 'failed awk syntax:', regex.search('(a+)|(b+)', 'cdb') re = '\(<one>[0-9]+\) *\(<two>[0-9]+\)' print 'matching with group names and compile()' cre = regex.compile(re) print cre.match('801 999') try: print cre.group('one') except regex.error: print 'caught expected exception' else: print 'expected regex.error not raised'