def FilterWMSColnum(fout, text, sdate): stamp = StampUrl(sdate) # for error messages colnum = -1 for fss in recomb.split(text): #import pdb;pdb.set_trace() columng = recolumnumvals.match(fss) if columng: ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date if sdate != ldate: raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp) lcolnum = string.atoi(columng.group(2)) if (colnum == -1) or (lcolnum == colnum + 1): pass # good elif lcolnum < colnum: raise ContextException("Colnum not incrementing %d -- %s" % (lcolnum, fss), fragment=fss, stamp=stamp) colnum = lcolnum stamp.stamp = '<stamp coldate="%s" colnum="%sWS"/>' % (sdate, lcolnum) fout.write(' ') fout.write(stamp.stamp) continue columncontg = recolnumcontvals.match(fss) if columncontg: ldate = mx.DateTime.DateTimeFrom(columncontg.group(1)).date if sdate != ldate: raise ContextException("Cont column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp) lcolnum = string.atoi(columncontg.group(2)) if colnum != lcolnum: raise ContextException("Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp) continue # anchor names from HTML <a name="xxx"> anameg = reanamevals.match(fss) if anameg: aname = anameg.group(1) stamp.aname = '<stamp aname="%s"/>' % aname fout.write(stamp.aname) continue # nothing detected # check if we've missed anything obvious if recomb.match(fss): raise ContextException('regexpvals not general enough', fragment=fss, stamp=stamp) #if remarginal.search(fss): # raise ContextException('marginal colnum detection case', # fragment=remarginal.search(fss).group(0), # stamp=stamp) fout.write(fss)
def FilterDebateColTime(fout, text, sdate, typ): # old style fixing (before patches existed) if typ == "debate": text = ApplyFixSubstitutions(text, sdate, fixsubs) stamp = StampUrl(sdate) # for error messages btodaytype = re.match('<pagex [^>]*type="today"', text) if btodaytype: fout.write('<stamp colnum="000"/>\n') colnum = -1 previoustime = [] for fss in recomb.split(text): # column number type columng = recolumnumvals.match(fss) if columng: assert not btodaytype # no columns in today # check date ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date if sdate != ldate: raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), stamp=stamp, fragment=fss) # check number lcolnum = string.atoi(columng.group(2)) if lcolnum == colnum - 1: pass # spurious decrementing of column number stamps elif (colnum == -1) or (lcolnum == colnum + 1): pass # good # column numbers do get skipped during division listings elif lcolnum < colnum: raise ContextException("Colnum not incrementing %d smaller than %d -- %s" % (lcolnum, colnum, fss), stamp=stamp, fragment=fss) # write a column number stamp (has to increase no matter what) if lcolnum > colnum: colnum = lcolnum stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum) fout.write('<stamp coldate="%s" colnum="%s"/>' % (sdate, colnum)) continue columncg = recolnumcontvals.match(fss) if columncg: ldate = mx.DateTime.DateTimeFrom(columncg.group(1)).date if sdate != ldate: raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), stamp=stamp, fragment=fss) lcolnum = string.atoi(columncg.group(2)) if colnum != lcolnum and sdate<'2006-05-08': raise ContextException("Cont column number disagrees %d -- %s" % (colnum, fss), stamp=stamp, fragment=fss) continue timeg = retimevals.match(fss) if timeg: time = TimeProcessing(timeg.group(1), previoustime, (timeg.group(0)[0] == '['), stamp) if not time: raise ContextException("Time not matched: " + timeg.group(1), stamp=stamp, fragment=fss) fout.write('<stamp time="%s"/>' % time) previoustime.append(time) continue # anchor names from HTML <a name="xxx"> anameg = reanamevals.match(fss) if anameg: aname = anameg.group(1) stamp.aname = '<stamp aname="%s"/>' % aname fout.write('<stamp aname="%s"/>' % aname) continue # nothing detected # check if we've missed anything obvious if recomb.match(fss): print "$$$", fss, "$$$" print regcolnumcont print re.match(regcolnumcont + "(?i)", fss) raise ContextException('regexpvals not general enough', stamp=stamp, fragment=fss) if remarginal.search(fss): print fss print '--------------------------------\n' print "marginal found: ", remarginal.search(fss).groups() print "zeroth: ", remarginal.search(fss).group(0) print '--------------------------------\n' raise ContextException('marginal coltime/a detection case', stamp=stamp, fragment=fss) fout.write(fss)
def FilterWransColnum(fout, text, sdate): # Legacy individual substitution rules text = ApplyFixSubstitutions(text, sdate, fixsubs) # Remove junk text = text.replace("{**con**}{**/con**}", "") stamp = StampUrl(sdate) # for error messages colnum = -1 for fss in recomb.split(text): columng = recolumnumvals.match(fss) if columng: ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date if sdate != ldate: raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp) lcolnum = string.atoi(columng.group(2)) if (colnum == -1) or (lcolnum == colnum + 1): pass # good elif lcolnum < colnum: raise ContextException("Colnum not incrementing %d -- %s" % (lcolnum, fss), fragment=fss, stamp=stamp) # column numbers do get skipped during division listings colnum = lcolnum stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum) fout.write(' ') fout.write(stamp.stamp) continue columncontg = recolnumcontvals.match(fss) if columncontg: ldate = columncontg.group(1) or columncontg.group(3) or None lcolnum = columncontg.group(2) or columncontg.group(4) or None if ldate: ldate = mx.DateTime.DateTimeFrom(ldate).date if sdate != ldate: raise ContextException( "Cont column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp) lcolnum = string.atoi(lcolnum) if colnum != lcolnum and sdate < '2006-05-08': raise ContextException( "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp) # no need to output anything fout.write(' ') continue if columncontg.group(5): lcolnum = string.atoi(columncontg.group(5)) if colnum != lcolnum and colnum != lcolnum + 1: raise ContextException( "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp) fout.write(' ') continue if columncontg.group(6): lcolnum = string.atoi(columncontg.group(6)) if colnum + 1 != lcolnum: raise ContextException( "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp) colnum = lcolnum stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum) fout.write(' ') fout.write(stamp.stamp) continue # anchor names from HTML <a name="xxx"> anameg = reanamevals.match(fss) if anameg: aname = anameg.group(1) stamp.aname = '<stamp aname="%s"/>' % aname fout.write(stamp.aname) continue # nothing detected # check if we've missed anything obvious if recomb.match(fss): raise ContextException('regexpvals not general enough', fragment=fss, stamp=stamp) # Removed FAI 2007-05-25, I really don't care! #if remarginal.search(fss): # raise ContextException('marginal colnum detection case', # fragment=remarginal.search(fss).group(0), # stamp=stamp) fout.write(fss)
def FilterDebateColTime(fout, text, sdate, typ): # old style fixing (before patches existed) if typ == "debate": text = ApplyFixSubstitutions(text, sdate, fixsubs) stamp = StampUrl(sdate) # for error messages btodaytype = re.match('<pagex [^>]*type="today"', text) if btodaytype: fout.write('<stamp colnum="000"/>\n') colnum = -1 previoustime = [] for fss in recomb.split(text): # column number type columng = recolumnumvals.match(fss) if columng: assert not btodaytype # no columns in today # check date ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date if sdate != ldate: raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), stamp=stamp, fragment=fss) # check number lcolnum = string.atoi(columng.group(2)) if lcolnum == colnum - 1: pass # spurious decrementing of column number stamps elif (colnum == -1) or (lcolnum == colnum + 1): pass # good # column numbers do get skipped during division listings elif lcolnum < colnum: raise ContextException( "Colnum not incrementing %d smaller than %d -- %s" % (lcolnum, colnum, fss), stamp=stamp, fragment=fss) # write a column number stamp (has to increase no matter what) if lcolnum > colnum: colnum = lcolnum stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum) fout.write('<stamp coldate="%s" colnum="%s"/>' % (sdate, colnum)) continue columncg = recolnumcontvals.match(fss) if columncg: ldate = mx.DateTime.DateTimeFrom(columncg.group(1)).date if sdate != ldate: raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), stamp=stamp, fragment=fss) lcolnum = string.atoi(columncg.group(2)) if colnum != lcolnum and sdate < '2006-05-08': raise ContextException( "Cont column number disagrees %d -- %s" % (colnum, fss), stamp=stamp, fragment=fss) continue timeg = retimevals.match(fss) if timeg: time = TimeProcessing(timeg.group(1), previoustime, (timeg.group(0)[0] == '['), stamp) if not time: raise ContextException("Time not matched: " + timeg.group(1), stamp=stamp, fragment=fss) fout.write('<stamp time="%s"/>' % time) previoustime.append(time) continue # anchor names from HTML <a name="xxx"> anameg = reanamevals.match(fss) if anameg: aname = anameg.group(1) stamp.aname = '<stamp aname="%s"/>' % aname fout.write('<stamp aname="%s"/>' % aname) continue # nothing detected # check if we've missed anything obvious if recomb.match(fss): print "$$$", fss, "$$$" print regcolnumcont print re.match(regcolnumcont + "(?i)", fss) raise ContextException('regexpvals not general enough', stamp=stamp, fragment=fss) if remarginal.search(fss): print fss print '--------------------------------\n' print "marginal found: ", remarginal.search(fss).groups() print "zeroth: ", remarginal.search(fss).group(0) print '--------------------------------\n' raise ContextException('marginal coltime/a detection case', stamp=stamp, fragment=fss) fout.write(fss)
def FilterWransColnum(fout, text, sdate): # Legacy individual substitution rules text = ApplyFixSubstitutions(text, sdate, fixsubs) # Remove junk text = text.replace("{**con**}{**/con**}", "") stamp = StampUrl(sdate) # for error messages colnum = -1 for fss in recomb.split(text): columng = recolumnumvals.match(fss) if columng: ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date if sdate != ldate: raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp) lcolnum = string.atoi(columng.group(2)) if (colnum == -1) or (lcolnum == colnum + 1): pass # good elif lcolnum < colnum: raise ContextException("Colnum not incrementing %d -- %s" % (lcolnum, fss), fragment=fss, stamp=stamp) # column numbers do get skipped during division listings colnum = lcolnum stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum) fout.write(" ") fout.write(stamp.stamp) continue columncontg = recolnumcontvals.match(fss) if columncontg: ldate = columncontg.group(1) or columncontg.group(3) or None lcolnum = columncontg.group(2) or columncontg.group(4) or None if ldate: ldate = mx.DateTime.DateTimeFrom(ldate).date if sdate != ldate: raise ContextException( "Cont column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp ) lcolnum = string.atoi(lcolnum) if colnum != lcolnum and sdate < "2006-05-08": raise ContextException( "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp ) # no need to output anything fout.write(" ") continue if columncontg.group(5): lcolnum = string.atoi(columncontg.group(5)) if colnum != lcolnum and colnum != lcolnum + 1: raise ContextException( "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp ) fout.write(" ") continue if columncontg.group(6): lcolnum = string.atoi(columncontg.group(6)) if colnum + 1 != lcolnum: raise ContextException( "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp ) colnum = lcolnum stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum) fout.write(" ") fout.write(stamp.stamp) continue # anchor names from HTML <a name="xxx"> anameg = reanamevals.match(fss) if anameg: aname = anameg.group(1) stamp.aname = '<stamp aname="%s"/>' % aname fout.write(stamp.aname) continue # nothing detected # check if we've missed anything obvious if recomb.match(fss): raise ContextException("regexpvals not general enough", fragment=fss, stamp=stamp) # Removed FAI 2007-05-25, I really don't care! # if remarginal.search(fss): # raise ContextException('marginal colnum detection case', # fragment=remarginal.search(fss).group(0), # stamp=stamp) fout.write(fss)
def FilterWMSColnum(fout, text, sdate): stamp = StampUrl(sdate) # for error messages colnum = -1 for fss in recomb.split(text): #import pdb;pdb.set_trace() columng = recolumnumvals.match(fss) if columng: ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date if sdate != ldate: raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp) lcolnum = string.atoi(columng.group(2)) if (colnum == -1) or (lcolnum == colnum + 1): pass # good elif lcolnum < colnum: raise ContextException("Colnum not incrementing %d -- %s" % (lcolnum, fss), fragment=fss, stamp=stamp) colnum = lcolnum stamp.stamp = '<stamp coldate="%s" colnum="%sWS"/>' % (sdate, lcolnum) fout.write(' ') fout.write(stamp.stamp) continue columncontg = recolnumcontvals.match(fss) if columncontg: ldate = mx.DateTime.DateTimeFrom(columncontg.group(1)).date if sdate != ldate: raise ContextException("Cont column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp) lcolnum = string.atoi(columncontg.group(2)) if colnum != lcolnum: raise ContextException( "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp) continue # anchor names from HTML <a name="xxx"> anameg = reanamevals.match(fss) if anameg: aname = anameg.group(1) stamp.aname = '<stamp aname="%s"/>' % aname fout.write(stamp.aname) continue # nothing detected # check if we've missed anything obvious if recomb.match(fss): raise ContextException('regexpvals not general enough', fragment=fss, stamp=stamp) #if remarginal.search(fss): # raise ContextException('marginal colnum detection case', # fragment=remarginal.search(fss).group(0), # stamp=stamp) fout.write(fss)