def parse_depsub(s, defc): regex.set_syntax(regex_syntax.RE_SYNTAX_EMACS) whitespace = regex.compile('^[ \t]*$') vstrs = regsub.split(s, '[,]') tret = [] for vs in vstrs: ostrs = regsub.split(vs, '[|]') ret = [] for s in ostrs: if (whitespace.search(s) >= 0): pass else: ret.append(parse_depversion(s)) if (len(ret) == 0): pass elif (len(ret) == 1): tret.append(ret[0]) else: tret.append(['|'] + ret) if (len(tret) == 0): return None elif (len(tret) == 1): return tret[0] else: return [defc] + tret
def unique_signals(msg): signal_table = {} for line in msg: line = line[:-1] if match(event_string, line) != -1: #print 'line is "' + line + '"' symbol = sub(symbol_regex, symbol_regsub, line) date = sub(date_regex, date_regsub, line) #print 'symbol, date: "' + symbol + '", "' + date + '"' ymd = split(date, "/") year = 0 month = 0 day = 0 year = eval(ymd[2]) month = eval(sub("^0", "", ymd[0])) day = eval(sub("^0", "", ymd[1])) signal_date = Date(year, month, day) signal = Signal(symbol, signal_date, line) if signal_table.has_key(symbol): #print 'Adding signal ',; print signal signal_table[symbol].append(signal) else: #print 'Putting signal ',; print signal signal_table[symbol] = [signal] signal_list = [] for symbol in signal_table.keys(): signals = signal_table[symbol] last_signal = signals[0] for i in range(1, len(signals)): if signals[i].date > last_signal.date: last_signal = signals[i] #print '<<<Appending signal ',; print last_signal,;print '>>>' signal_list.append(last_signal) return signal_list
def piece_replace( delimiter, source, replace, index_zero_based ): stripped_string = string.strip( source ) record = regsub.split( stripped_string, delimiter ) if index_zero_based >= len( record ): return "" record[ index_zero_based ] = replace record_join_string = get_record_join_string( record, delimiter ) return record_join_string
def sample1(filename, aft=None, fore=None, top=None, home=None): doc = SeriesDocument('HTMLgen.rc') doc.goprev, doc.gonext, doc.gotop, doc.gohome = aft, fore, top, home doc.background = '../image/texturec.jpg' doc.banner = ('../image/historic.gif', 472, 60) doc.author = '1776 Thomas Jefferson' doc.email = '*****@*****.**' doc.logo = ('../image/eagle21.gif', 64, 54) # parse Declaration of Independence re_hline = regex.compile('^--+$') re_title = regex.compile('^Title:\(.*$\)') font2 = Font(size='+2') s = open(os.path.join(datadir, 'DoI.txt')).read() paragraphs = regsub.split(s, '\n\([\t ]*\n\)+') for para in paragraphs: if not para: continue if re_title.search(para) > -1: doc.title = re_title.group(1) elif re_hline.search(para) > -1: doc.append(HR()) else: p = Paragraph(para) # using \` to match beginning of paragraph # ^ won't work because it'll match all the newlines n = p.markup('\`\(\w\)', font2, reg_type='regex') doc.append(p) doc.write(os.path.join(htmldir, filename))
def fromstring(self, data): import string, regsub new = [] for part in regsub.split(data, self.sep): list = [] for subp in regsub.split(part, self.rng): s = string.strip(subp) list.append(string.atoi(s)) if len(list) == 1: new.append((list[0], list[0])) elif len(list) == 2 and list[0] <= list[1]: new.append((list[0], list[1])) else: raise ValueError, 'bad data passed to IntSet' self.pairs = self.pairs + new self.normalize()
def unique_signals(msg): signal_table = {} for line in msg: line = line[:-1] if match(event_string, line) != -1: #print 'line is "' + line + '"' symbol = sub(symbol_regex, symbol_regsub, line) date = sub(date_regex, date_regsub, line) #print 'symbol, date: "' + symbol + '", "' + date + '"' ymd = split(date, "/") year = 0; month = 0; day = 0 year = eval(ymd[2]) month = eval(sub("^0", "", ymd[0])) day = eval(sub("^0", "", ymd[1])) signal_date = Date(year, month, day) signal = Signal(symbol, signal_date, line) if signal_table.has_key(symbol): #print 'Adding signal ',; print signal signal_table[symbol].append(signal) else: #print 'Putting signal ',; print signal signal_table[symbol] = [signal] signal_list = [] for symbol in signal_table.keys(): signals = signal_table[symbol] last_signal = signals[0] for i in range(1, len(signals)): if signals[i].date > last_signal.date: last_signal = signals[i] #print '<<<Appending signal ',; print last_signal,;print '>>>' signal_list.append(last_signal) return signal_list
def sample1(filename, aft=None, fore=None, top=None, home=None): doc = SeriesDocument('HTMLgen.rc') doc.goprev,doc.gonext,doc.gotop,doc.gohome = aft,fore,top,home doc.background = '../image/texturec.jpg' doc.banner = ('../image/historic.gif', 472, 60) doc.author = '1776 Thomas Jefferson' doc.email = '*****@*****.**' doc.logo = ('../image/eagle21.gif', 64, 54) # parse Declaration of Independence re_hline = regex.compile('^--+$') re_title = regex.compile('^Title:\(.*$\)') font2 = Font(size='+2') s = open(os.path.join(datadir, 'DoI.txt')).read() paragraphs = regsub.split(s, '\n\([\t ]*\n\)+') for para in paragraphs: if not para: continue if re_title.search(para) > -1: doc.title = re_title.group(1) elif re_hline.search(para) > -1: doc.append(HR()) else: p = Paragraph( para ) # using \` to match beginning of paragraph # ^ won't work because it'll match all the newlines n = p.markup('\`\(\w\)', font2, reg_type='regex') doc.append(p) doc.write(os.path.join(htmldir, filename))
def parse_area_coords(self, shape, text): """Parses coordinate string into list of numbers. Coordinates are stored differently depending on the shape of the object. Raise string.atoi_error when bad numbers occur. Raise IndexError when not enough coordinates are specified. """ import regsub coords = [] terms = map(string.atoi, regsub.split(string.strip(text), '[, ]+')) if shape == 'poly': # list of (x,y) tuples while len(terms) > 0: coords.append((terms[0], terms[1])) del terms[:2] if coords[0] != coords[-1:]: # make sure the polygon is closed coords.append(coords[0]) elif shape == 'rect': # (x,y) tuples for upper left, lower right coords.append((terms[0], terms[1])) coords.append((terms[2], terms[3])) elif shape == 'circle': # (x,y) tuple for center, followed by int for radius coords.append((terms[0], terms[1])) coords.append(terms[2]) return coords
def getfields(self): # returns list of fields split by self.separators if self.line >= len(self.lines): return [] if self.merge: seps = '[' + self.separators + ']+' else: seps = '[' + self.separators + ']' #print "regsub.split(self.lines[%s], %s) = %s" % (self.line, seps, regsub.split(self.lines[self.line], seps)) return regsub.split(self.lines[self.line], seps)
def __init__(self, aStructuredString, level=1, isdoc=1): """Create a LaTeX object.""" self.level = level aStructuredString = ST.untabify(aStructuredString) if isdoc: if line2_re.match(aStructuredString) != -1: aStructuredString = line2_re.group(2) + aStructuredString aStructuredString, self.hrefs = _strip_hrefs(aStructuredString) aStructuredString = _separate_bullets(aStructuredString) paragraphs = regsub.split(aStructuredString, ST.paragraph_divider) paragraphs = map(ST.indent_level, paragraphs) self.structure = ST.structure(paragraphs)
def parse_provides(s): regex.set_syntax(regex_syntax.RE_SYNTAX_EMACS) depstr = regex.compile( '^[ \t]*\\([-.+A-Za-z0-9]+\\)[ \t]*\\((.*)\\)?[ \t]*$') whitespace = regex.compile('^[ \t]*$') strs = regsub.split(s, '[ \t]*[,][ \t]*') ret = [] for s in strs: if (whitespace.search(s) >= 0): pass else: ret.append(s) return ret
def parse(self, lines): import regsub, string, regex active = [] for line in lines: if self.debug: print '>', line, # blank line terminates current record if not line[:-1]: active = [] continue # remove optional comment and strip line line = string.strip(line[:string.find(line, '#')]) if not line: continue line = regsub.split(line, ' *: *') if len(line) == 2: line[0] = string.lower(line[0]) if line[0] == 'user-agent': # this record applies to this user agent if self.debug: print '>> user-agent:', line[1] active.append(line[1]) if not self.rules.has_key(line[1]): self.rules[line[1]] = [] elif line[0] == 'disallow': if line[1]: if self.debug: print '>> disallow:', line[1] for agent in active: self.rules[agent].append(regex.compile(line[1])) else: pass for agent in active: if self.debug: print '>> allow', agent self.rules[agent] = [] else: if self.debug: print '>> unknown:', line self.modified()
#print conf.vars set_default(conf, 'defaultCutoff', 90) #set_default(conf, 'cutoff', {}) set_default(conf, 'exclude', "/dev/fd*") set_default(conf, 'ignore', "") set_default(conf, 'mailTo', "root@localhost") set_default(conf, 'mailFrom', "Disk Usage Monitor <root>") set_default(conf, 'mailProg', "/usr/sbin/sendmail") cutoff = {} for key in conf.keys(): if key[:7] == 'cutoff[': seps = '[\'"]+' var = regsub.split(key, seps) if var and len(var) == 3: cutoff[var[1]] = conf[key] #print "cutoff[%s] = %s" % (var[1], conf[key]) hostname = gethostname() list = os.popen("df -hP -x none -x tmpfs -x iso9660 %s" % conf['ignore']).readlines() message = [] message.append("To: %s\n" % conf['mailTo']) message.append("From: %s\n" % conf['mailFrom']) message.append("Subject: Low disk space warning\n\n") message.append("Disk usage for %s:\n\n" % hostname)
def batchInput ( filename # string; name of the tab-delimited file to input ): # Purpose: read filename, parse, and make needed changes to TRs # Returns: integer number of errors found # Assumes: 1. current user has permission to read "filename"; # 2. db's SQL routines have been initialized # Effects: reads the file and updates the tracking record tables as # needed in the WTS database # Throws: 1. propagates wtslib.sqlError if there are problems updating # the database; 2. propagates IOError if there are problems # reading "filename" tdf = TabFile.TabFile (filename) for row in tdf.getList (): try: tr_nr = row ['TR Nr'] tr = TrackRec.TrackRec (string.atoi (tr_nr)) except ValueError, KeyError: log_error (tdf.getLine (row), \ 'Cannot parse value for TR number, ' + \ 'or cannot load the specified TR') continue rowKeys = Set.Set () for k in row.keys (): rowKeys.add (k) rowKeys.remove ('TR Nr') # already handled this one rowKeys.remove ('Directory') # managed by system rowKeys.remove ('Project Definition') # excluded by spec rowKeys.remove ('Progress Notes') # excluded by spec plusMinus = regex.compile ('[+-]') for k in rowKeys.values (): if row[k] == '': pass elif k in TrackRec.SINGLE_VALUED_CV: value = regsub.gsub ('[+-]', '', row[k]) tr.set_Values ({k : value}) elif k in TrackRec.MULTI_VALUED: old_value = tr.dict ()[k] if plusMinus.search (row[k]) == -1: tr.set_Values ({k : row[k]}) else: changes = regsub.split (row[k], ' *, *') for c in changes: if c[0] == '+': tr.addToCV (k, c[1:]) elif c[0] == '-': tr.removeFromCV (k, c[1:]) else: log_error (tdf.getLine ( row), 'Missing +/- ' ' in %s' % k) else: tr.set_Values ({k : row[k]}) try: vals = TrackRec.validate_TrackRec_Entry (tr.dict ()) tr.set_Values (vals) tr.lock () tr.save () except TrackRec.alreadyLocked, wtslib.sqlError: log_error (tdf.getLine (row), sys.exc_value)
def read(f): data = f.read() lines = regsub.split(data, '[\r\n]') return parse_lines(lines)
def piece( delimiter, source, index_zero_based ): stripped_string = string.strip( source ) record = regsub.split( stripped_string, delimiter ) if index_zero_based >= len( record ): return "" stripped_string = string.strip( record[ index_zero_based ] ) return stripped_string
def list2array( delimiter, list_string ): a = myarray.myarray() r = regsub.split( string.strip( list_string ), delimiter ) for x in r: a.append( x ) return a
def process(msg): candidates = [] # See Mailman.Message.GetSender :( sender = msg.get('sender') if sender: name, addr = msg.getaddr('sender') else: name, addr = msg.getaddr('from') if addr and type(addr) == StringType: who_info = string.lower(addr) elif msg.unixfrom: who_info = string.lower(string.split(msg.unixfrom)[1]) else: return None at_index = string.find(who_info, '@') if at_index != -1: who_from = who_info[:at_index] remote_host = who_info[at_index+1:] else: who_from = who_info remote_host = mm_cfg.DEFAULT_HOST_NAME if not who_from in ['mailer-daemon', 'postmaster', 'orphanage', 'postoffice', 'ucx_smtp', 'a2']: return 0 mime_info = msg.getheader('content-type') boundry = None if mime_info: mime_info_parts = regsub.splitx( mime_info, '[Bb][Oo][Uu][Nn][Dd][Aa][Rr][Yy]="[^"]+"') if len(mime_info_parts) > 1: boundry = regsub.splitx(mime_info_parts[1], '"[^"]+"')[1][1:-1] # snag out the message body msg.rewindbody() msgbody = msg.fp.read() if boundry: relevant_text = string.split(msgbody, '--%s' % boundry) # Invalid MIME messages shouldn't cause exceptions if len(relevant_text) >= 2: relevant_text = relevant_text[1] else: relevant_text = relevant_text[0] else: # This looks strange, but at least 2 are going to be no-ops. relevant_text = regsub.split(msgbody, '^.*Message header follows.*$')[0] relevant_text = regsub.split(relevant_text, '^The text you sent follows:.*$')[0] relevant_text = regsub.split( relevant_text, '^Additional Message Information:.*$')[0] relevant_text = regsub.split(relevant_text, '^-+Your original message-+.*$')[0] BOUNCE = 1 REMOVE = 2 # Bounce patterns where it's simple to figure out the email addr. email_regexp = '<?\([^ \t@|<>]+@[^ \t@<>]+\.[^ \t<>.]+\)>?' simple_bounce_pats = ( (regex.compile('.*451 %s.*' % email_regexp), BOUNCE), (regex.compile('.*554 %s.*' % email_regexp), BOUNCE), (regex.compile('.*552 %s.*' % email_regexp), BOUNCE), (regex.compile('.*501 %s.*' % email_regexp), BOUNCE), (regex.compile('.*553 %s.*' % email_regexp), BOUNCE), (regex.compile('.*550 %s.*' % email_regexp), BOUNCE), (regex.compile('%s .bounced.*' % email_regexp), BOUNCE), (regex.compile('.*%s\.\.\. Deferred.*' % email_regexp), BOUNCE), (regex.compile('.*User %s not known.*' % email_regexp), REMOVE), (regex.compile('.*%s: User unknown.*' % email_regexp), REMOVE), (regex.compile('.*%s\.\.\. User unknown' % email_regexp), REMOVE)) # patterns we can't directly extract the email (special case these) messy_pattern_1 = regex.compile('^Recipient .*$') messy_pattern_2 = regex.compile('^Addressee: .*$') messy_pattern_3 = regex.compile('^User .* not listed.*$') messy_pattern_4 = regex.compile('^550 [^ ]+\.\.\. User unknown.*$') messy_pattern_5 = regex.compile('^User [^ ]+ is not defined.*$') messy_pattern_6 = regex.compile('^[ \t]*[^ ]+: User unknown.*$') messy_pattern_7 = regex.compile('^[^ ]+ - User currently disabled.*$') # Patterns for cases where email addr is separate from error cue. separate_cue_1 = re.compile( '^554 .+\.\.\. unknown mailer error.*$', re.I) separate_addr_1 = regex.compile('expanded from: %s' % email_regexp) message_grokked = 0 use_prospects = 0 prospects = [] # If bad but no candidates found. for line in string.split(relevant_text, '\n'): for pattern, action in simple_bounce_pats: if pattern.match(line) <> -1: email = extract(line) candidates.append((string.split(email,',')[0], action)) message_grokked = 1 # Now for the special case messages that are harder to parse... if (messy_pattern_1.match(line) <> -1 or messy_pattern_2.match(line) <> -1): username = string.split(line)[1] candidates.append(('%s@%s' % (username, remote_host), BOUNCE)) message_grokked = 1 continue if (messy_pattern_3.match(line) <> -1 or messy_pattern_4.match(line) <> -1 or messy_pattern_5.match(line) <> -1): username = string.split(line)[1] candidates.append(('%s@%s' % (username, remote_host), REMOVE)) message_grokked = 1 continue if messy_pattern_6.match(line) <> -1: username = string.split(string.strip(line))[0][:-1] candidates.append(('%s@%s' % (username, remote_host), REMOVE)) message_grokked = 1 continue if messy_pattern_7.match(line) <> -1: username = string.split(string.strip(line))[0] candidates.append(('%s@%s' % (username, remote_host), REMOVE)) message_grokked = 1 continue if separate_cue_1.match(line): # Here's an error message that doesn't contain the addr. # Set a flag to use prospects found on separate lines. use_prospects = 1 if separate_addr_1.search(line) != -1: # Found an addr that *might* be part of an error message. # Register it on prospects, where it will only be used if a # separate check identifies this message as an error message. prospects.append((separate_addr_1.group(1), BOUNCE)) if use_prospects and prospects: candidates = candidates + prospects did = [] for who, action in candidates: # First clean up some cruft around the addrs. el = string.find(who, "...") if el != -1: who = who[:el] if len(who) > 1 and who[0] == '<': # Use stuff after open angle and before (optional) close: who = regsub.splitx(who[1:], ">")[0] if who not in did: did.append(who) ## return message_grokked return did