def prep_para(para):
		para['text'] = _para_text_content(para)
		def next_para():
			paras = section['paragraphs']
			next_index = para['index'] - paras[0]['index'] + 1
			if next_index >= len(paras):
				return None
			next_p = prep_para(copy.deepcopy(paras[next_index]))
			if matchers.empty(next_p):
				next_p = next_p['next']()
			return next_p
		para['next'] = next_para;
		return para
Пример #2
0
    def prep_para(para):
        para['text'] = _para_text_content(para)

        def next_para():
            paras = section['paragraphs']
            next_index = para['index'] - paras[0]['index'] + 1
            if next_index >= len(paras):
                return None
            next_p = prep_para(copy.deepcopy(paras[next_index]))
            if matchers.empty(next_p):
                next_p = next_p['next']()
            return next_p

        para['next'] = next_para
        return para
def parse_doc_section(section, dom):
	parser = Parser(dom)

	unhandled_count = 0
	handled_count = 0
	for para in section["paragraphs"]:
		para['text'] = _para_text_content(para)
		if not para['text']:
			continue
		success = parser(para)
		if not success and para['text']:
			unhandled_count += 1
			print('unhandled para {}:'.format(para['index']), para, '\n', file=sys.stderr)
		elif success:
			handled_count += 1
	print('handled paras: {}'.format(handled_count), file=sys.stderr)
	print('unhandled paras: {}'.format(unhandled_count), file=sys.stderr)
Пример #4
0
def parse_doc_section(section, dom):
    parser = Parser(dom)

    unhandled_count = 0
    handled_count = 0
    for para in section["paragraphs"]:
        para['text'] = _para_text_content(para)
        if not para['text']:
            continue
        success = parser(para)
        if not success and para['text']:
            unhandled_count += 1
            print('unhandled para {}:'.format(para['index']),
                  para,
                  '\n',
                  file=sys.stderr)
        elif success:
            handled_count += 1
    print('handled paras: {}'.format(handled_count), file=sys.stderr)
    print('unhandled paras: {}'.format(unhandled_count), file=sys.stderr)