Пример #1
0
def FilterWMSColnum(fout, text, sdate):
	stamp = StampUrl(sdate) # for error messages

	colnum = -1
	for fss in recomb.split(text):
                #import pdb;pdb.set_trace()
		columng = recolumnumvals.match(fss)
		if columng:
			ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date
			if sdate != ldate:
				raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp)

			lcolnum = string.atoi(columng.group(2))


			if (colnum == -1) or (lcolnum == colnum + 1):
				pass  # good
			elif lcolnum < colnum:
				raise ContextException("Colnum not incrementing %d -- %s" % (lcolnum, fss), fragment=fss, stamp=stamp)
			colnum = lcolnum
			stamp.stamp = '<stamp coldate="%s" colnum="%sWS"/>' % (sdate, lcolnum)
			fout.write(' ')
			fout.write(stamp.stamp)
			continue

		columncontg = recolnumcontvals.match(fss)
		if columncontg:
			ldate = mx.DateTime.DateTimeFrom(columncontg.group(1)).date
			if sdate != ldate:
				raise ContextException("Cont column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp)
			lcolnum = string.atoi(columncontg.group(2))
			if colnum != lcolnum:
				raise ContextException("Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp)

			continue

		# anchor names from HTML <a name="xxx">
		anameg = reanamevals.match(fss)
		if anameg:
			aname = anameg.group(1)
			stamp.aname = '<stamp aname="%s"/>' % aname
			fout.write(stamp.aname)
			continue

                # nothing detected
		# check if we've missed anything obvious
		if recomb.match(fss):
			raise ContextException('regexpvals not general enough', fragment=fss, stamp=stamp)
		#if remarginal.search(fss):
		#	raise ContextException('marginal colnum detection case',
		#	        fragment=remarginal.search(fss).group(0),
		#		      stamp=stamp)
		fout.write(fss)
Пример #2
0
def FilterDebateColTime(fout, text, sdate, typ):
	# old style fixing (before patches existed)
	if typ == "debate":
		text = ApplyFixSubstitutions(text, sdate, fixsubs)

	stamp = StampUrl(sdate) # for error messages
	btodaytype = re.match('<pagex [^>]*type="today"', text)
	if btodaytype:
		fout.write('<stamp colnum="000"/>\n')

	colnum = -1
	previoustime = []
	for fss in recomb.split(text):
		# column number type
		columng = recolumnumvals.match(fss)
		if columng:
			assert not btodaytype  # no columns in today

			# check date
			ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date
			if sdate != ldate:
				raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), stamp=stamp, fragment=fss)

			# check number
			lcolnum = string.atoi(columng.group(2))
			if lcolnum == colnum - 1:
				pass	# spurious decrementing of column number stamps
			elif (colnum == -1) or (lcolnum == colnum + 1):
				pass  # good
			# column numbers do get skipped during division listings
			elif lcolnum < colnum:
				raise ContextException("Colnum not incrementing %d smaller than %d -- %s" % (lcolnum, colnum, fss), stamp=stamp, fragment=fss)

			# write a column number stamp (has to increase no matter what)
			if lcolnum > colnum:
				colnum = lcolnum
				stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum)
			fout.write('<stamp coldate="%s" colnum="%s"/>' % (sdate, colnum))
			continue

		columncg = recolnumcontvals.match(fss)
		if columncg:
			ldate = mx.DateTime.DateTimeFrom(columncg.group(1)).date
			if sdate != ldate:
				raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), stamp=stamp, fragment=fss)

			lcolnum = string.atoi(columncg.group(2))
			if colnum != lcolnum and sdate<'2006-05-08':
				raise ContextException("Cont column number disagrees %d -- %s" % (colnum, fss), stamp=stamp, fragment=fss)

			continue

		timeg = retimevals.match(fss)
		if timeg:
			time = TimeProcessing(timeg.group(1), previoustime, (timeg.group(0)[0] == '['), stamp)
			if not time:
				raise ContextException("Time not matched: " + timeg.group(1), stamp=stamp, fragment=fss)

			fout.write('<stamp time="%s"/>' % time)
			previoustime.append(time)
			continue

		# anchor names from HTML <a name="xxx">
		anameg = reanamevals.match(fss)
		if anameg:
                        aname = anameg.group(1)
                        stamp.aname = '<stamp aname="%s"/>' % aname
                        fout.write('<stamp aname="%s"/>' % aname)
                        continue


		# nothing detected
		# check if we've missed anything obvious
		if recomb.match(fss):
			print "$$$", fss, "$$$"
			print regcolnumcont
			print re.match(regcolnumcont + "(?i)", fss)
			raise ContextException('regexpvals not general enough', stamp=stamp, fragment=fss)
		if remarginal.search(fss):
			print fss
			print '--------------------------------\n'
			print "marginal found: ", remarginal.search(fss).groups()
			print "zeroth: ", remarginal.search(fss).group(0)
			print '--------------------------------\n'
			raise ContextException('marginal coltime/a detection case', stamp=stamp, fragment=fss)
		fout.write(fss)
Пример #3
0
def FilterWransColnum(fout, text, sdate):
    # Legacy individual substitution rules
    text = ApplyFixSubstitutions(text, sdate, fixsubs)

    # Remove junk
    text = text.replace("{**con**}{**/con**}", "")

    stamp = StampUrl(sdate)  # for error messages

    colnum = -1
    for fss in recomb.split(text):
        columng = recolumnumvals.match(fss)
        if columng:
            ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date
            if sdate != ldate:
                raise ContextException("Column date disagrees %s -- %s" %
                                       (sdate, fss),
                                       fragment=fss,
                                       stamp=stamp)

            lcolnum = string.atoi(columng.group(2))
            if (colnum == -1) or (lcolnum == colnum + 1):
                pass  # good
            elif lcolnum < colnum:
                raise ContextException("Colnum not incrementing %d -- %s" %
                                       (lcolnum, fss),
                                       fragment=fss,
                                       stamp=stamp)
            # column numbers do get skipped during division listings

            colnum = lcolnum
            stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate,
                                                                  lcolnum)
            fout.write(' ')
            fout.write(stamp.stamp)
            continue

        columncontg = recolnumcontvals.match(fss)
        if columncontg:
            ldate = columncontg.group(1) or columncontg.group(3) or None
            lcolnum = columncontg.group(2) or columncontg.group(4) or None
            if ldate:
                ldate = mx.DateTime.DateTimeFrom(ldate).date
                if sdate != ldate:
                    raise ContextException(
                        "Cont column date disagrees %s -- %s" % (sdate, fss),
                        fragment=fss,
                        stamp=stamp)
                lcolnum = string.atoi(lcolnum)
                if colnum != lcolnum and sdate < '2006-05-08':
                    raise ContextException(
                        "Cont column number disagrees %d -- %s" %
                        (colnum, fss),
                        fragment=fss,
                        stamp=stamp)

                # no need to output anything
                fout.write(' ')
                continue
            if columncontg.group(5):
                lcolnum = string.atoi(columncontg.group(5))
                if colnum != lcolnum and colnum != lcolnum + 1:
                    raise ContextException(
                        "Cont column number disagrees %d -- %s" %
                        (colnum, fss),
                        fragment=fss,
                        stamp=stamp)
                fout.write(' ')
                continue
            if columncontg.group(6):
                lcolnum = string.atoi(columncontg.group(6))
                if colnum + 1 != lcolnum:
                    raise ContextException(
                        "Cont column number disagrees %d -- %s" %
                        (colnum, fss),
                        fragment=fss,
                        stamp=stamp)
                colnum = lcolnum
                stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate,
                                                                      lcolnum)
                fout.write(' ')
                fout.write(stamp.stamp)
                continue

        # anchor names from HTML <a name="xxx">
        anameg = reanamevals.match(fss)
        if anameg:
            aname = anameg.group(1)
            stamp.aname = '<stamp aname="%s"/>' % aname
            fout.write(stamp.aname)
            continue

        # nothing detected
        # check if we've missed anything obvious
        if recomb.match(fss):
            raise ContextException('regexpvals not general enough',
                                   fragment=fss,
                                   stamp=stamp)
# Removed FAI 2007-05-25, I really don't care!
        #if remarginal.search(fss):
        #	raise ContextException('marginal colnum detection case',
        #	        fragment=remarginal.search(fss).group(0),
        #                stamp=stamp)

        fout.write(fss)
Пример #4
0
def FilterDebateColTime(fout, text, sdate, typ):
    # old style fixing (before patches existed)
    if typ == "debate":
        text = ApplyFixSubstitutions(text, sdate, fixsubs)

    stamp = StampUrl(sdate)  # for error messages
    btodaytype = re.match('<pagex [^>]*type="today"', text)
    if btodaytype:
        fout.write('<stamp colnum="000"/>\n')

    colnum = -1
    previoustime = []
    for fss in recomb.split(text):
        # column number type
        columng = recolumnumvals.match(fss)
        if columng:
            assert not btodaytype  # no columns in today

            # check date
            ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date
            if sdate != ldate:
                raise ContextException("Column date disagrees %s -- %s" %
                                       (sdate, fss),
                                       stamp=stamp,
                                       fragment=fss)

            # check number
            lcolnum = string.atoi(columng.group(2))
            if lcolnum == colnum - 1:
                pass  # spurious decrementing of column number stamps
            elif (colnum == -1) or (lcolnum == colnum + 1):
                pass  # good
            # column numbers do get skipped during division listings
            elif lcolnum < colnum:
                raise ContextException(
                    "Colnum not incrementing %d smaller than %d -- %s" %
                    (lcolnum, colnum, fss),
                    stamp=stamp,
                    fragment=fss)

            # write a column number stamp (has to increase no matter what)
            if lcolnum > colnum:
                colnum = lcolnum
                stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate,
                                                                      lcolnum)
            fout.write('<stamp coldate="%s" colnum="%s"/>' % (sdate, colnum))
            continue

        columncg = recolnumcontvals.match(fss)
        if columncg:
            ldate = mx.DateTime.DateTimeFrom(columncg.group(1)).date
            if sdate != ldate:
                raise ContextException("Column date disagrees %s -- %s" %
                                       (sdate, fss),
                                       stamp=stamp,
                                       fragment=fss)

            lcolnum = string.atoi(columncg.group(2))
            if colnum != lcolnum and sdate < '2006-05-08':
                raise ContextException(
                    "Cont column number disagrees %d -- %s" % (colnum, fss),
                    stamp=stamp,
                    fragment=fss)

            continue

        timeg = retimevals.match(fss)
        if timeg:
            time = TimeProcessing(timeg.group(1), previoustime,
                                  (timeg.group(0)[0] == '['), stamp)
            if not time:
                raise ContextException("Time not matched: " + timeg.group(1),
                                       stamp=stamp,
                                       fragment=fss)

            fout.write('<stamp time="%s"/>' % time)
            previoustime.append(time)
            continue

        # anchor names from HTML <a name="xxx">
        anameg = reanamevals.match(fss)
        if anameg:
            aname = anameg.group(1)
            stamp.aname = '<stamp aname="%s"/>' % aname
            fout.write('<stamp aname="%s"/>' % aname)
            continue

        # nothing detected
        # check if we've missed anything obvious
        if recomb.match(fss):
            print "$$$", fss, "$$$"
            print regcolnumcont
            print re.match(regcolnumcont + "(?i)", fss)
            raise ContextException('regexpvals not general enough',
                                   stamp=stamp,
                                   fragment=fss)
        if remarginal.search(fss):
            print fss
            print '--------------------------------\n'
            print "marginal found: ", remarginal.search(fss).groups()
            print "zeroth: ", remarginal.search(fss).group(0)
            print '--------------------------------\n'
            raise ContextException('marginal coltime/a detection case',
                                   stamp=stamp,
                                   fragment=fss)
        fout.write(fss)
Пример #5
0
def FilterWransColnum(fout, text, sdate):
    # Legacy individual substitution rules
    text = ApplyFixSubstitutions(text, sdate, fixsubs)

    # Remove junk
    text = text.replace("{**con**}{**/con**}", "")

    stamp = StampUrl(sdate)  # for error messages

    colnum = -1
    for fss in recomb.split(text):
        columng = recolumnumvals.match(fss)
        if columng:
            ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date
            if sdate != ldate:
                raise ContextException("Column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp)

            lcolnum = string.atoi(columng.group(2))
            if (colnum == -1) or (lcolnum == colnum + 1):
                pass  # good
            elif lcolnum < colnum:
                raise ContextException("Colnum not incrementing %d -- %s" % (lcolnum, fss), fragment=fss, stamp=stamp)
                # column numbers do get skipped during division listings

            colnum = lcolnum
            stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum)
            fout.write(" ")
            fout.write(stamp.stamp)
            continue

        columncontg = recolnumcontvals.match(fss)
        if columncontg:
            ldate = columncontg.group(1) or columncontg.group(3) or None
            lcolnum = columncontg.group(2) or columncontg.group(4) or None
            if ldate:
                ldate = mx.DateTime.DateTimeFrom(ldate).date
                if sdate != ldate:
                    raise ContextException(
                        "Cont column date disagrees %s -- %s" % (sdate, fss), fragment=fss, stamp=stamp
                    )
                lcolnum = string.atoi(lcolnum)
                if colnum != lcolnum and sdate < "2006-05-08":
                    raise ContextException(
                        "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp
                    )

                    # no need to output anything
                fout.write(" ")
                continue
            if columncontg.group(5):
                lcolnum = string.atoi(columncontg.group(5))
                if colnum != lcolnum and colnum != lcolnum + 1:
                    raise ContextException(
                        "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp
                    )
                fout.write(" ")
                continue
            if columncontg.group(6):
                lcolnum = string.atoi(columncontg.group(6))
                if colnum + 1 != lcolnum:
                    raise ContextException(
                        "Cont column number disagrees %d -- %s" % (colnum, fss), fragment=fss, stamp=stamp
                    )
                colnum = lcolnum
                stamp.stamp = '<stamp coldate="%s" colnum="%sW"/>' % (sdate, lcolnum)
                fout.write(" ")
                fout.write(stamp.stamp)
                continue

        # anchor names from HTML <a name="xxx">
        anameg = reanamevals.match(fss)
        if anameg:
            aname = anameg.group(1)
            stamp.aname = '<stamp aname="%s"/>' % aname
            fout.write(stamp.aname)
            continue

            # nothing detected
            # check if we've missed anything obvious
        if recomb.match(fss):
            raise ContextException("regexpvals not general enough", fragment=fss, stamp=stamp)
            # Removed FAI 2007-05-25, I really don't care!
            # if remarginal.search(fss):
            # 	raise ContextException('marginal colnum detection case',
            # 	        fragment=remarginal.search(fss).group(0),
            #                stamp=stamp)

        fout.write(fss)
Пример #6
0
def FilterWMSColnum(fout, text, sdate):
    stamp = StampUrl(sdate)  # for error messages

    colnum = -1
    for fss in recomb.split(text):
        #import pdb;pdb.set_trace()
        columng = recolumnumvals.match(fss)
        if columng:
            ldate = mx.DateTime.DateTimeFrom(columng.group(1)).date
            if sdate != ldate:
                raise ContextException("Column date disagrees %s -- %s" %
                                       (sdate, fss),
                                       fragment=fss,
                                       stamp=stamp)

            lcolnum = string.atoi(columng.group(2))

            if (colnum == -1) or (lcolnum == colnum + 1):
                pass  # good
            elif lcolnum < colnum:
                raise ContextException("Colnum not incrementing %d -- %s" %
                                       (lcolnum, fss),
                                       fragment=fss,
                                       stamp=stamp)
            colnum = lcolnum
            stamp.stamp = '<stamp coldate="%s" colnum="%sWS"/>' % (sdate,
                                                                   lcolnum)
            fout.write(' ')
            fout.write(stamp.stamp)
            continue

        columncontg = recolnumcontvals.match(fss)
        if columncontg:
            ldate = mx.DateTime.DateTimeFrom(columncontg.group(1)).date
            if sdate != ldate:
                raise ContextException("Cont column date disagrees %s -- %s" %
                                       (sdate, fss),
                                       fragment=fss,
                                       stamp=stamp)
            lcolnum = string.atoi(columncontg.group(2))
            if colnum != lcolnum:
                raise ContextException(
                    "Cont column number disagrees %d -- %s" % (colnum, fss),
                    fragment=fss,
                    stamp=stamp)

            continue

        # anchor names from HTML <a name="xxx">
        anameg = reanamevals.match(fss)
        if anameg:
            aname = anameg.group(1)
            stamp.aname = '<stamp aname="%s"/>' % aname
            fout.write(stamp.aname)
            continue

# nothing detected
        # check if we've missed anything obvious
        if recomb.match(fss):
            raise ContextException('regexpvals not general enough',
                                   fragment=fss,
                                   stamp=stamp)
        #if remarginal.search(fss):
        #	raise ContextException('marginal colnum detection case',
        #	        fragment=remarginal.search(fss).group(0),
        #		      stamp=stamp)
        fout.write(fss)