Пример #1
0
def untagged_par():
    return SEQ(
        DEBUG('checking for untagged paragraph'), fd_parse.TRACE(False),
        pattern(
            '\s*(?P<paragraph_text>(?![^<]*\[Adjourned)([^<]|<i>|</i>)+)\s*(</ul>|</p>)?'
        ), START('paragraph'), ATTRIBUTES(type='"untagged"'),
        fd_parse.TRACE(False, envlength=512), OUT('$paragraph_text'),
        END('paragraph'))
Пример #2
0
def untagged_par():
	return SEQ(
	DEBUG('checking for untagged paragraph'),
	fd_parse.TRACE(False),
	pattern('\s*(?P<paragraph_text>(?![^<]*\[Adjourned)([^<]|<i>|</i>)+)\s*(</ul>|</p>)?'),
	START('paragraph'),
	ATTRIBUTES(type='"untagged"'),
	fd_parse.TRACE(False, envlength=512),
	OUT('$paragraph_text'),
	END('paragraph')
	)
Пример #3
0
def paragraph_text(partype='plain', no=None):
    #	if no:
    #		attrmap={'no' : no}
    #	else:
    #		attrmap=None

    return SEQ(
        pattern('(?P<paragraph_text>([^<]|<i>|</i>)+)'),
        ELEMENT('paragraph',
                body='$paragraph_text',
                type='%s' % repr(partype)
                #			attrlit={'type' : partype},
                #			attrmap=attrmap
                ))
Пример #4
0
def paragraph_text(partype='plain', no=None):
#	if no:
#		attrmap={'no' : no}
#	else:
#		attrmap=None

	return SEQ(
		pattern('(?P<paragraph_text>([^<]|<i>|</i>)+)'),
		ELEMENT('paragraph',
			body='$paragraph_text',
                        type='%s' % repr(partype)
#			attrlit={'type' : partype},
#			attrmap=attrmap
			)
		)
Пример #5
0
#from fd_core import nextdayinstance 

m2=Match('(?P<no>\d+)')
m3=Match('(?P<no>one|two)')

g2=m2.prog.groupindex
g3=m3.prog.groupindex

overlap=set(g2) & set(g3)

b=Match.join(m2, m3)
c=Match.join(m2, m3, lambda x,y: x+"|" + y)

import fd_parse

p1=fd_parse.pattern('(?P<no>one)')
p2=fd_parse.pattern('(?P<no>two)')

d=fd_parse.OR(p1, p2)

from fd_dates import *

f=futureday

p=plaindate()

s1=SEQ(p1,p2)

tenv={'today' : datetime.date.today(), 'test' : '"testvalue"', 'no' : 4}

today=datetime.date.today()
Пример #6
0
engnumber12s = 'one|two|three|four|five|six(?!ty)|seven|eight|nine|ten|eleven|twelve'
engnumber12p = '(%s)' % engnumber12s

engnumber60p = '(' + engnumber12s + '|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|((twenty|thirty|forty|fifty)(-(one|two|three|four|five|six|seven|eight|nine))?))'

# half an hour
# an hour and a half
# three and a half hours

# three hours and 27 minutes
timequantump = '(an hour and a (half|quarter)|((a quarter|three quarters) of|half) an hour|%(engnumber60p)s minutes|%(engnumber12p)s (and a half hours|hours( and %(engnumber60p)s minutes)?))' % {
    'engnumber12p': engnumber12p,
    'engnumber60p': engnumber60p
}

DEFINE('timequantum', pattern(timequantump))

# English times, eg "three minutes to four o'clock"

archtimep = '(twelve (noon|midnight)|(a quarter past|half-past|a quarter to|' + engnumber60p + ' minutes (to|past)|)\s*' + engnumber12p + '(\s*o\'\s*clock))?'

archtime = DEFINE(
    'archtime',
    SEQ(pattern('\s*(?P<archtime>' + archtimep + ')(?i)'),
        ELEMENT('time', archtime='$archtime')))

# Date handling

monthnamep = '(January|February|March|April|May|June|July|August|September|October|November|December)'
daynamep = '(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)'
ordinalp = '(st|nd|rd|th)'
Пример #7
0
import os.path
import re
import fd_parse
from fd_dates import *

from fd_parse import SEQ, OR, ANY, POSSIBLY, IF, START, END, OBJECT, NULL, OUT, DEBUG, STOP, FORCE, CALL, ATTRIBUTES, ELEMENT, DEFINE, pattern, tagged, plaintextpar, plaintext

sys.path.append("../")
from xmlfilewrite import WriteXMLHeader
from contextexception import ContextException
from miscfuncs import toppath

splitparagraphs = ANY(
    SEQ(
        pattern(
            '\s*(<p([^>]*?)>(<ul>)?)(?P<partext>([^<]|<i>|</i>)*)(</ul>)?(</p>)?'
        ), ELEMENT('paragraph', body='$partext')))


def namepattern(label='name'):
    return "(?P<" + label + ">[-A-Za-z .']+)"


# Patterns specific to Votes and Proceedings that are used frequently

DEFINE('mp', pattern('[-A-Za-z .]+'))
DEFINE('act', pattern('[-a-z.,A-Z0-9()\s]*?'), fragment=True)
DEFINE('speaker', pattern('(Deputy )?Speaker'))
DEFINE('number', pattern('\d+'))
DEFINE('ordinal', pattern('1st|2nd|3rd|\d+th'))
DEFINE('text', pattern('[a-z .?;,()]*?'), fragment=True)
Пример #8
0
import os.path
import re
import fd_parse
from fd_dates import *


from fd_parse import SEQ, OR,  ANY, POSSIBLY, IF, START, END, OBJECT, NULL, OUT, DEBUG, STOP, FORCE, CALL, ATTRIBUTES, ELEMENT,  DEFINE, pattern, tagged, plaintextpar, plaintext

sys.path.append("../")
from xmlfilewrite import WriteXMLHeader
from contextexception import ContextException
from miscfuncs import toppath

splitparagraphs=ANY(
	SEQ(
		pattern('\s*(<p([^>]*?)>(<ul>)?)(?P<partext>([^<]|<i>|</i>)*)(</ul>)?(</p>)?'),
		ELEMENT('paragraph',body='$partext')
		)
	)

def namepattern(label='name'):
	return "(?P<"+label+">[-A-Za-z .']+)"


# Patterns specific to Votes and Proceedings that are used frequently

DEFINE('mp', pattern('[-A-Za-z .]+'))
DEFINE('act', pattern('[-a-z.,A-Z0-9()\s]*?'), fragment=True)
DEFINE('speaker', pattern('(Deputy )?Speaker'))
DEFINE('number', pattern('\d+'))
DEFINE('ordinal', pattern('1st|2nd|3rd|\d+th'))
Пример #9
0
# English numbers up to 12, and up to 60

engnumber12s='one|two|three|four|five|six(?!ty)|seven|eight|nine|ten|eleven|twelve'
engnumber12p='(%s)' % engnumber12s

engnumber60p='(' + engnumber12s + '|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|((twenty|thirty|forty|fifty)(-(one|two|three|four|five|six|seven|eight|nine))?))'

# half an hour
# an hour and a half
# three and a half hours

# three hours and 27 minutes
timequantump='(an hour and a (half|quarter)|((a quarter|three quarters) of|half) an hour|%(engnumber60p)s minutes|%(engnumber12p)s (and a half hours|hours( and %(engnumber60p)s minutes)?))' % {'engnumber12p' : engnumber12p, 'engnumber60p' : engnumber60p}

DEFINE('timequantum', pattern(timequantump))

# English times, eg "three minutes to four o'clock"

archtimep='(twelve (noon|midnight)|(a quarter past|half-past|a quarter to|'+engnumber60p+' minutes (to|past)|)\s*' + engnumber12p + '(\s*o\'\s*clock))?'

archtime=DEFINE('archtime',
	SEQ(
		pattern('\s*(?P<archtime>'+archtimep+')(?i)'),
		ELEMENT('time', archtime='$archtime')
		)
	)

# Date handling

monthnamep='(January|February|March|April|May|June|July|August|September|October|November|December)'