Exemplo n.º 1
0
def domorph(tree):
    """Replace POS tags with morphological tags if available."""
    for node in tree.subtrees(lambda n: n and not isinstance(n[0], Tree)):
        x = (node.source[treebank.MORPH]
             if hasattr(node, 'source') and node.source else None)
        if x and x != '--':
            treebank.handlemorphology('add', None, node, node.source)
Exemplo n.º 2
0
def replacemorph(tree):
	"""Replace POS tags with morphological tags if available."""
	for node in tree.subtrees(
			lambda n: n and not isinstance(n[0], Tree)):
		x = (node.source[treebank.MORPH]
				if hasattr(node, 'source') and node.source else None)
		if x and x != '--':
			treebank.handlemorphology('replace', None, node, node.source)
		node.label = node.label.replace('[]', '')
Exemplo n.º 3
0
def parse():
	"""Parse sentence and return a textual representation of a parse tree.

	Output is either in a HTML fragment or in plain text. To be invoked by an
	AJAX call."""
	sent = request.args.get('sent', None)
	est = request.args.get('est', 'rfe')
	marg = request.args.get('marg', 'nbest')
	objfun = request.args.get('objfun', 'mpp')
	coarse = request.args.get('coarse', None)
	html = 'html' in request.args
	lang = request.args.get('lang', 'detect')
	if not sent:
		return ''
	frags = nbest = None
	senttok = tokenize(sent)
	if not senttok or not 1 <= len(senttok) <= LIMIT:
		return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT)
	if lang == 'detect':
		lang = guesslang(senttok)
	elif lang not in PARSERS:
		return 'unknown language %r; languages: %r' % (lang, PARSERS.keys())
	key = (senttok, est, marg, objfun, coarse, lang)
	resp = CACHE.get(key)
	if resp is None:
		link = 'parse?' + url_encode(dict(sent=sent, est=est, marg=marg,
				objfun=objfun, coarse=coarse, html=html))
		PARSERS[lang].stages[-1].estimator = est
		PARSERS[lang].stages[-1].objective = objfun
		PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both')
		PARSERS[lang].stages[-1].sample = marg in ('sample', 'both')
		if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse:
			PARSERS[lang].stages[0].mode = coarse
			PARSERS[lang].stages[1].k = (1e-5
					if coarse == 'pcfg-posterior' else 50)

		results = list(PARSERS[lang].parse(senttok))
		if results[-1].noparse:
			parsetrees = []
			result = 'no parse!'
			frags = nbest = ''
		else:
			if SHOWMORPH:
				for node in results[-1].parsetree.subtrees(
						lambda n: n and not isinstance(n[0], Tree)):
					treebank.handlemorphology(
							'replace', None, node, node.source)
					node.label = node.label.replace('[]', '')
			if SHOWFUNC:
				treebank.handlefunctions('add', results[-1].parsetree, pos=True)
			tree = str(results[-1].parsetree)
			prob = results[-1].prob
			parsetrees = results[-1].parsetrees or []
			parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1))
			parsetrees_ = []
			fragments = results[-1].fragments or ()
			APP.logger.info('[%s] %s', probstr(prob), tree)
			tree = Tree.parse(tree, parse_leaf=int)
			result = Markup(DrawTree(tree, senttok).text(
					unicodelines=True, html=html, funcsep='-'))
			frags = Markup('Phrasal fragments used in the most probable '
					'derivation of the highest ranked parse tree:\n'
					+ '\n\n'.join(
					DrawTree(frag).text(unicodelines=True, html=html)
					for frag in fragments if frag.count('(') > 1))
			for tree, prob, x in parsetrees:
				tree = PARSERS[lang].postprocess(tree, senttok, -1)[0]
				if SHOWMORPH:
					for node in tree.subtrees(
							lambda n: n and not isinstance(n[0], Tree)):
						treebank.handlemorphology(
								'replace', None, node, node.source)
				if SHOWFUNC:
					treebank.handlefunctions('add', tree, pos=True)
				parsetrees_.append((tree, prob, x))
			nbest = Markup('\n\n'.join('%d. [%s]\n%s' % (n + 1, probstr(prob),
						DrawTree(tree, senttok).text(
							unicodelines=True, html=html, funcsep='-'))
					for n, (tree, prob, _) in enumerate(parsetrees_)))
		msg = '\n'.join(stage.msg for stage in results)
		elapsed = [stage.elapsedtime for stage in results]
		elapsed = 'CPU time elapsed: %s => %gs' % (
				' '.join('%gs' % a for a in elapsed), sum(elapsed))
		info = '\n'.join(('length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % (
				len(senttok), lang, est, objfun, marg), msg, elapsed,
				'10 most probable parse trees:',
				'\n'.join('%d. [%s] %s' % (n + 1, probstr(prob),
						writediscbrackettree(tree, senttok))
						for n, (tree, prob, _) in enumerate(parsetrees))
				+ '\n'))
		CACHE.set(key, (sent, result, frags, nbest, info, link), timeout=5000)
	else:
		(sent, result, frags, nbest,  # pylint: disable=unpacking-non-sequence
				info, link) = resp  # pylint: disable=unpacking-non-sequence
	if html:
		return render_template('parsetree.html', sent=sent, result=result,
				frags=frags, nbest=nbest, info=info, link=link,
				randid=randid())
	else:
		return Response('\n'.join((nbest, frags, info, result)),
				mimetype='text/plain')