Пример #1
0
def FilterWMSColnum(fout, text, sdate):
	stamp = StampUrl(sdate) # for error messages

	colnum = -1
	for fss in recomb.split(text):
                #import pdb;pdb.set_trace()
		columng = recolumnumvals.match(fss)
		if columng:
			ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date
			if sdate != ldate:
				raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp)

			lcolnum = string.atoi(columng.group(2))


			if (colnum == -1) or (lcolnum == colnum + 1):
				pass  # good
			elif lcolnum < colnum:
				raise ContextException("Colnum not incrementing %d -- %s" % (lcolnum, fss), fragment=fss, stamp=stamp)
			colnum = lcolnum
			stamp.stamp = '<stamp coldate="%s" colnum="%sWS"/>' % (sdate, lcolnum)
			fout.write(' ')
			fout.write(stamp.stamp)
			continue

		columncontg = recolnumcontvals.match(fss)
		if columncontg:
			ldate = mx.DateTime.DateTimeFrom(columncontg.group(1)).date
			if sdate != ldate:
				raise ContextException("Cont column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp)
			lcolnum = string.atoi(columncontg.group(2))
			if colnum != lcolnum:
				raise ContextException("Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp)

			continue

		# anchor names from HTML <a name="xxx">
		anameg = reanamevals.match(fss)
		if anameg:
			aname = anameg.group(1)
			stamp.aname = '<stamp aname="%s"/>' % aname
			fout.write(stamp.aname)
			continue

                # nothing detected
		# check if we've missed anything obvious
		if recomb.match(fss):
			raise ContextException('regexpvals not general enough', fragment=fss, stamp=stamp)
		#if remarginal.search(fss):
		#	raise ContextException('marginal colnum detection case',
		#	        fragment=remarginal.search(fss).group(0),
		#		      stamp=stamp)
		fout.write(fss)
Пример #2
0
def FilterLordsColtime(fout, text, sdate):
	colnum = -1
	time = ''

	stampurl = StampUrl(sdate)
	previoustime = []
	for fss in recomb.split(text):
		# column number type

		# we need some very elaboirate checking to sort out the sections, by
		# titles that are sometimes on the wrong side of the first column,
		# and by colnums that miss the GC code in that section.
		# column numbers are also missed during divisions, and this exception
		# should be detected and noted.

		# That implies that this is the filter which detects the boundaries
		# between the standard four sections.
		columng = recolumnumvals.match(fss)
		if columng:
			# check date
			ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date
			if sdate != ldate:
				raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), stamp=stampurl, fragment=fss)

			# check number
                        # ltype = columng.group(2)
			lcolnum = string.atoi(columng.group(3))
			if lcolnum == colnum - 1:
				pass	# spurious decrementing of column number stamps
			elif lcolnum == colnum:
				pass	# spurious repeat of column number stamps
			# good (we get skipped columns in divisions)
			elif (colnum == -1) or (colnum + 1 <= lcolnum <= colnum + 5):  # was 2 but this caused us to miss ones
				colnum = lcolnum
				fout.write('<stamp coldate="%s" colnum="%s%s"/>' % (sdate, colnum, ""))

			# column numbers do get skipped during division listings
			else:
				pass #print "Colnum not incrementing %d -- %d -- %s" % (colnum, lcolnum, fss)
				#raise Exception, "Colnum not incrementing %d -- %d -- %s" % (colnum, lcolnum, fss)

			#print (ldate, colnum, lindexstyle)
			continue

		timeg = retimevals.match(fss)
		if timeg:
			time = timeg.group(1)
			if not re.match('(?:</h5>|</st>)(?i)', time):
				time = TimeProcessing(time, previoustime, False, stampurl)
				fout.write('<stamp time="%s"/>' % time)
				if time:
                                        previoustime.append(time)
			continue

		# special lift a time out of the heading
		regtime3 = regtime3vals.match(fss)
		if regtime3:
			fout.write(fss) # put this heading back into the flow of text
			assert not previoustime
			lntimematch = re.match("(half[\- ]past )?(\w+)(-thirty)?$", regtime3.group(1))
			lnhour = lntimematch and lntimematch.group(2)
			# strange way to do it, but I'm keeping tab on examples, and the transition between am and pm
			if lnhour == "two":
				lntimep = "2:%s pm"
			elif lnhour == "three":
				lntimep = "3:%s pm"
			elif lnhour == "six":
				lntimep = "6:%s pm"
			elif lnhour == "nine":
				lntimep = "9:%s am"
			elif lnhour == "eleven":
				lntimep = "11:%s am"
			elif lnhour == "ten":
				lntimep = "10:%s am"
			else:
				print "-------------'%s'" % regtime3.group(1)
				assert False
			assert not lntimematch.group(1) or not lntimematch.group(3)
			ntime = lntimep % ((lntimematch.group(1) or lntimematch.group(3)) and "30" or "00")
			time = TimeProcessing(ntime, previoustime, False, stampurl)
			fout.write('<stamp time="%s"/>' % time)
			continue

		# anchor names from HTML <a name="xxx">
		anameg = reanamevals.match(fss)
		if anameg:
			aname = anameg.group(1)
			fout.write('<stamp aname="%s"/>' % aname)
			stampurl.aname = aname
			continue

		# nothing detected
		# check if we've missed anything obvious
		if recomb.match(fss):
			print "$$$", fss, "$$-$"
			raise ContextException(' regexpvals not general enough ', stamp=stampurl, fragment=fss) # a programming error between splitting and matching
		if remarginal.search(fss):
			print remarginal.search(fss).group(0)
			lregcolumnum6 = '<p>\s*</ul>\s*<a name="column_\d+">(?:</a>)?\s*<b>[^:<]*:\s*column\s*\d+\s*</b></p>\s*<ul><font size=3>(?i)'
			print re.findall(lregcolumnum6, fss)
			#print fss
			raise ContextException(' marginal coltime detection case ', stamp=stampurl, fragment=fss)
		fout.write(fss)
Пример #3
0
def FilterDebateColTime(fout, text, sdate, typ):
	# old style fixing (before patches existed)
	if typ == "debate":
		text = ApplyFixSubstitutions(text, sdate, fixsubs)

	stamp = StampUrl(sdate) # for error messages
	btodaytype = re.match('<pagex [^>]*type="today"', text)
	if btodaytype:
		fout.write('<stamp colnum="000"/>\n')

	colnum = -1
	previoustime = []
	for fss in recomb.split(text):
		# column number type
		columng = recolumnumvals.match(fss)
		if columng:
			assert not btodaytype  # no columns in today

			# check date
			ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date
			if sdate != ldate:
				raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), stamp=stamp, fragment=fss)

			# check number
			lcolnum = string.atoi(columng.group(2))
			if lcolnum == colnum - 1:
				pass	# spurious decrementing of column number stamps
			elif (colnum == -1) or (lcolnum == colnum + 1):
				pass  # good
			# column numbers do get skipped during division listings
			elif lcolnum < colnum:
				raise ContextException("Colnum not incrementing %d smaller than %d -- %s" % (lcolnum, colnum, fss), stamp=stamp, fragment=fss)

			# write a column number stamp (has to increase no matter what)
			if lcolnum > colnum:
				colnum = lcolnum
				stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum)
			fout.write('<stamp coldate="%s" colnum="%s"/>' % (sdate, colnum))
			continue

		columncg = recolnumcontvals.match(fss)
		if columncg:
			ldate = mx.DateTime.DateTimeFrom(columncg.group(1)).date
			if sdate != ldate:
				raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), stamp=stamp, fragment=fss)

			lcolnum = string.atoi(columncg.group(2))
			if colnum != lcolnum and sdate<'2006-05-08':
				raise ContextException("Cont column number disagrees %d -- %s" % (colnum, fss), stamp=stamp, fragment=fss)

			continue

		timeg = retimevals.match(fss)
		if timeg:
			time = TimeProcessing(timeg.group(1), previoustime, (timeg.group(0)[0] == '['), stamp)
			if not time:
				raise ContextException("Time not matched: " + timeg.group(1), stamp=stamp, fragment=fss)

			fout.write('<stamp time="%s"/>' % time)
			previoustime.append(time)
			continue

		# anchor names from HTML <a name="xxx">
		anameg = reanamevals.match(fss)
		if anameg:
                        aname = anameg.group(1)
                        stamp.aname = '<stamp aname="%s"/>' % aname
                        fout.write('<stamp aname="%s"/>' % aname)
                        continue


		# nothing detected
		# check if we've missed anything obvious
		if recomb.match(fss):
			print "$$$", fss, "$$$"
			print regcolnumcont
			print re.match(regcolnumcont + "(?i)", fss)
			raise ContextException('regexpvals not general enough', stamp=stamp, fragment=fss)
		if remarginal.search(fss):
			print fss
			print '--------------------------------\n'
			print "marginal found: ", remarginal.search(fss).groups()
			print "zeroth: ", remarginal.search(fss).group(0)
			print '--------------------------------\n'
			raise ContextException('marginal coltime/a detection case', stamp=stamp, fragment=fss)
		fout.write(fss)
Пример #4
0
def FilterWransColnum(fout, text, sdate):
    # Legacy individual substitution rules
    text = ApplyFixSubstitutions(text, sdate, fixsubs)

    # Remove junk
    text = text.replace("{**con**}{**/con**}", "")

    stamp = StampUrl(sdate)  # for error messages

    colnum = -1
    for fss in recomb.split(text):
        columng = recolumnumvals.match(fss)
        if columng:
            ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date
            if sdate != ldate:
                raise ContextException("Column date disagrees %s -- %s" %
                                       (sdate, fss),
                                       fragment=fss,
                                       stamp=stamp)

            lcolnum = string.atoi(columng.group(2))
            if (colnum == -1) or (lcolnum == colnum + 1):
                pass  # good
            elif lcolnum < colnum:
                raise ContextException("Colnum not incrementing %d -- %s" %
                                       (lcolnum, fss),
                                       fragment=fss,
                                       stamp=stamp)
            # column numbers do get skipped during division listings

            colnum = lcolnum
            stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate,
                                                                  lcolnum)
            fout.write(' ')
            fout.write(stamp.stamp)
            continue

        columncontg = recolnumcontvals.match(fss)
        if columncontg:
            ldate = columncontg.group(1) or columncontg.group(3) or None
            lcolnum = columncontg.group(2) or columncontg.group(4) or None
            if ldate:
                ldate = mx.DateTime.DateTimeFrom(ldate).date
                if sdate != ldate:
                    raise ContextException(
                        "Cont column date disagrees %s -- %s" % (sdate, fss),
                        fragment=fss,
                        stamp=stamp)
                lcolnum = string.atoi(lcolnum)
                if colnum != lcolnum and sdate < '2006-05-08':
                    raise ContextException(
                        "Cont column number disagrees %d -- %s" %
                        (colnum, fss),
                        fragment=fss,
                        stamp=stamp)

                # no need to output anything
                fout.write(' ')
                continue
            if columncontg.group(5):
                lcolnum = string.atoi(columncontg.group(5))
                if colnum != lcolnum and colnum != lcolnum + 1:
                    raise ContextException(
                        "Cont column number disagrees %d -- %s" %
                        (colnum, fss),
                        fragment=fss,
                        stamp=stamp)
                fout.write(' ')
                continue
            if columncontg.group(6):
                lcolnum = string.atoi(columncontg.group(6))
                if colnum + 1 != lcolnum:
                    raise ContextException(
                        "Cont column number disagrees %d -- %s" %
                        (colnum, fss),
                        fragment=fss,
                        stamp=stamp)
                colnum = lcolnum
                stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate,
                                                                      lcolnum)
                fout.write(' ')
                fout.write(stamp.stamp)
                continue

        # anchor names from HTML <a name="xxx">
        anameg = reanamevals.match(fss)
        if anameg:
            aname = anameg.group(1)
            stamp.aname = '<stamp aname="%s"/>' % aname
            fout.write(stamp.aname)
            continue

        # nothing detected
        # check if we've missed anything obvious
        if recomb.match(fss):
            raise ContextException('regexpvals not general enough',
                                   fragment=fss,
                                   stamp=stamp)
# Removed FAI 2007-05-25, I really don't care!
        #if remarginal.search(fss):
        #	raise ContextException('marginal colnum detection case',
        #	        fragment=remarginal.search(fss).group(0),
        #                stamp=stamp)

        fout.write(fss)
Пример #5
0
def FilterDebateColTime(fout, text, sdate, typ):
    # old style fixing (before patches existed)
    if typ == "debate":
        text = ApplyFixSubstitutions(text, sdate, fixsubs)

    stamp = StampUrl(sdate)  # for error messages
    btodaytype = re.match('<pagex [^>]*type="today"', text)
    if btodaytype:
        fout.write('<stamp colnum="000"/>\n')

    colnum = -1
    previoustime = []
    for fss in recomb.split(text):
        # column number type
        columng = recolumnumvals.match(fss)
        if columng:
            assert not btodaytype  # no columns in today

            # check date
            ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date
            if sdate != ldate:
                raise ContextException("Column date disagrees %s -- %s" %
                                       (sdate, fss),
                                       stamp=stamp,
                                       fragment=fss)

            # check number
            lcolnum = string.atoi(columng.group(2))
            if lcolnum == colnum - 1:
                pass  # spurious decrementing of column number stamps
            elif (colnum == -1) or (lcolnum == colnum + 1):
                pass  # good
            # column numbers do get skipped during division listings
            elif lcolnum < colnum:
                raise ContextException(
                    "Colnum not incrementing %d smaller than %d -- %s" %
                    (lcolnum, colnum, fss),
                    stamp=stamp,
                    fragment=fss)

            # write a column number stamp (has to increase no matter what)
            if lcolnum > colnum:
                colnum = lcolnum
                stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate,
                                                                      lcolnum)
            fout.write('<stamp coldate="%s" colnum="%s"/>' % (sdate, colnum))
            continue

        columncg = recolnumcontvals.match(fss)
        if columncg:
            ldate = mx.DateTime.DateTimeFrom(columncg.group(1)).date
            if sdate != ldate:
                raise ContextException("Column date disagrees %s -- %s" %
                                       (sdate, fss),
                                       stamp=stamp,
                                       fragment=fss)

            lcolnum = string.atoi(columncg.group(2))
            if colnum != lcolnum and sdate < '2006-05-08':
                raise ContextException(
                    "Cont column number disagrees %d -- %s" % (colnum, fss),
                    stamp=stamp,
                    fragment=fss)

            continue

        timeg = retimevals.match(fss)
        if timeg:
            time = TimeProcessing(timeg.group(1), previoustime,
                                  (timeg.group(0)[0] == '['), stamp)
            if not time:
                raise ContextException("Time not matched: " + timeg.group(1),
                                       stamp=stamp,
                                       fragment=fss)

            fout.write('<stamp time="%s"/>' % time)
            previoustime.append(time)
            continue

        # anchor names from HTML <a name="xxx">
        anameg = reanamevals.match(fss)
        if anameg:
            aname = anameg.group(1)
            stamp.aname = '<stamp aname="%s"/>' % aname
            fout.write('<stamp aname="%s"/>' % aname)
            continue

        # nothing detected
        # check if we've missed anything obvious
        if recomb.match(fss):
            print "$$$", fss, "$$$"
            print regcolnumcont
            print re.match(regcolnumcont + "(?i)", fss)
            raise ContextException('regexpvals not general enough',
                                   stamp=stamp,
                                   fragment=fss)
        if remarginal.search(fss):
            print fss
            print '--------------------------------\n'
            print "marginal found: ", remarginal.search(fss).groups()
            print "zeroth: ", remarginal.search(fss).group(0)
            print '--------------------------------\n'
            raise ContextException('marginal coltime/a detection case',
                                   stamp=stamp,
                                   fragment=fss)
        fout.write(fss)
Пример #6
0
def FilterWransColnum(fout, text, sdate):
    # Legacy individual substitution rules
    text = ApplyFixSubstitutions(text, sdate, fixsubs)

    # Remove junk
    text = text.replace("{**con**}{**/con**}", "")

    stamp = StampUrl(sdate)  # for error messages

    colnum = -1
    for fss in recomb.split(text):
        columng = recolumnumvals.match(fss)
        if columng:
            ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date
            if sdate != ldate:
                raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp)

            lcolnum = string.atoi(columng.group(2))
            if (colnum == -1) or (lcolnum == colnum + 1):
                pass  # good
            elif lcolnum < colnum:
                raise ContextException("Colnum not incrementing %d -- %s" % (lcolnum, fss), fragment=fss, stamp=stamp)
                # column numbers do get skipped during division listings

            colnum = lcolnum
            stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum)
            fout.write(" ")
            fout.write(stamp.stamp)
            continue

        columncontg = recolnumcontvals.match(fss)
        if columncontg:
            ldate = columncontg.group(1) or columncontg.group(3) or None
            lcolnum = columncontg.group(2) or columncontg.group(4) or None
            if ldate:
                ldate = mx.DateTime.DateTimeFrom(ldate).date
                if sdate != ldate:
                    raise ContextException(
                        "Cont column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp
                    )
                lcolnum = string.atoi(lcolnum)
                if colnum != lcolnum and sdate < "2006-05-08":
                    raise ContextException(
                        "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp
                    )

                    # no need to output anything
                fout.write(" ")
                continue
            if columncontg.group(5):
                lcolnum = string.atoi(columncontg.group(5))
                if colnum != lcolnum and colnum != lcolnum + 1:
                    raise ContextException(
                        "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp
                    )
                fout.write(" ")
                continue
            if columncontg.group(6):
                lcolnum = string.atoi(columncontg.group(6))
                if colnum + 1 != lcolnum:
                    raise ContextException(
                        "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp
                    )
                colnum = lcolnum
                stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum)
                fout.write(" ")
                fout.write(stamp.stamp)
                continue

        # anchor names from HTML <a name="xxx">
        anameg = reanamevals.match(fss)
        if anameg:
            aname = anameg.group(1)
            stamp.aname = '<stamp aname="%s"/>' % aname
            fout.write(stamp.aname)
            continue

            # nothing detected
            # check if we've missed anything obvious
        if recomb.match(fss):
            raise ContextException("regexpvals not general enough", fragment=fss, stamp=stamp)
            # Removed FAI 2007-05-25, I really don't care!
            # if remarginal.search(fss):
            # 	raise ContextException('marginal colnum detection case',
            # 	        fragment=remarginal.search(fss).group(0),
            #                stamp=stamp)

        fout.write(fss)
Пример #7
0
def FilterWMSColnum(fout, text, sdate):
    stamp = StampUrl(sdate)  # for error messages

    colnum = -1
    for fss in recomb.split(text):
        #import pdb;pdb.set_trace()
        columng = recolumnumvals.match(fss)
        if columng:
            ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date
            if sdate != ldate:
                raise ContextException("Column date disagrees %s -- %s" %
                                       (sdate, fss),
                                       fragment=fss,
                                       stamp=stamp)

            lcolnum = string.atoi(columng.group(2))

            if (colnum == -1) or (lcolnum == colnum + 1):
                pass  # good
            elif lcolnum < colnum:
                raise ContextException("Colnum not incrementing %d -- %s" %
                                       (lcolnum, fss),
                                       fragment=fss,
                                       stamp=stamp)
            colnum = lcolnum
            stamp.stamp = '<stamp coldate="%s" colnum="%sWS"/>' % (sdate,
                                                                   lcolnum)
            fout.write(' ')
            fout.write(stamp.stamp)
            continue

        columncontg = recolnumcontvals.match(fss)
        if columncontg:
            ldate = mx.DateTime.DateTimeFrom(columncontg.group(1)).date
            if sdate != ldate:
                raise ContextException("Cont column date disagrees %s -- %s" %
                                       (sdate, fss),
                                       fragment=fss,
                                       stamp=stamp)
            lcolnum = string.atoi(columncontg.group(2))
            if colnum != lcolnum:
                raise ContextException(
                    "Cont column number disagrees %d -- %s" % (colnum, fss),
                    fragment=fss,
                    stamp=stamp)

            continue

        # anchor names from HTML <a name="xxx">
        anameg = reanamevals.match(fss)
        if anameg:
            aname = anameg.group(1)
            stamp.aname = '<stamp aname="%s"/>' % aname
            fout.write(stamp.aname)
            continue

# nothing detected
        # check if we've missed anything obvious
        if recomb.match(fss):
            raise ContextException('regexpvals not general enough',
                                   fragment=fss,
                                   stamp=stamp)
        #if remarginal.search(fss):
        #	raise ContextException('marginal colnum detection case',
        #	        fragment=remarginal.search(fss).group(0),
        #		      stamp=stamp)
        fout.write(fss)