def test_transforms(): """Test reversibility of Tiger transformations.""" from discodop.treebanktransforms import transform, reversetransform, \ bracketings from discodop.treebank import NegraCorpusReader, handlefunctions headrules = None # 'alpino.headrules' n = NegraCorpusReader('alpinosample.export', headrules=headrules) nn = NegraCorpusReader('alpinosample.export', headrules=headrules) transformations = ('S-RC', 'VP-GF', 'NP') trees = [ transform(tree, sent, transformations) for tree, sent in zip(nn.trees().values(), nn.sents().values()) ] print('\ntransformed') correct = exact = e = 0 for a, b, c, d in islice( zip(n.trees().values(), n.sents().values(), trees, count()), 100): transformc = reversetransform(c.copy(True), b, transformations) c1 = bracketings(canonicalize(a)) c2 = bracketings(canonicalize(transformc)) z = -1 # 825 if c1 != c2 or e == z: precision = len(set(c1) & set(c2)) / len(set(c1)) recall = len(set(c1) & set(c2)) / len(set(c2)) if precision != 1.0 or recall != 1.0 or d == z: print( d, ' '.join(':'.join((str(n), a.encode('unicode-escape'))) for n, a in enumerate(b))) print('no match', precision, recall) print(len(c1), len(c2), 'gold-transformed', set(c2) - set(c1), 'transformed-gold', set(c1) - set(c2)) print(a) print(transformc) handlefunctions('add', a) print(a, '\n', b, '\n\n') else: correct += 1 else: exact += 1 correct += 1 e += 1 print('matches', correct, '/', e, 100 * correct / e, '%') print('exact', exact)
def getparses(sent, require=(), block=(), objfun='mpp', est='rfe', coarse='pcfg', root=None): """Parse sentence and return a textual representation of a parse tree.""" senttok, tags = postokenize(sent) if len(senttok) > LIMIT: return [], [], 'sentence too long', None PARSER.stages[-1].estimator = est PARSER.stages[-1].objective = objfun if PARSER.stages[0].mode.startswith('pcfg') and coarse: PARSER.stages[0].mode = ('pcfg' if coarse == 'pcfg-posterior' else coarse) results = list( PARSER.parse(senttok, tags=tags, require=require, block=block, root=root)) parsetrees = results[-1].parsetrees parsetrees = applythreshold(parsetrees) parsetrees = sorted(parsetrees, key=itemgetter(1), reverse=True) parsetrees_ = OrderedDict() for treestr, prob, deriv in parsetrees: # FIXME limit? tree = PARSER.postprocess(treestr, senttok, -1)[0] if SHOWFUNC: treebank.handlefunctions('add', tree, pos=True, root=True) if SHOWMORPH: domorph(tree) y = str(tree) if y in parsetrees_: oldprob, tree, treestr, deriv = parsetrees_[y] parsetrees_[y] = (prob + oldprob, tree, treestr, deriv) else: parsetrees_[y] = (prob, tree, treestr, deriv) # parsetrees: trees as str # parsetrees_: list of postprocessed ParentedTree objects # (may be shorter due to spurious ambiguities of state splits) parsetrees = sorted(parsetrees_.values(), key=itemgetter(0), reverse=True) messages = [stage.msg for stage in results] elapsed = [stage.elapsedtime for stage in results] return senttok, parsetrees, messages, elapsed
def testtransforms(): """ Test whether the Tiger transformations (transform / reversetransform) are reversible. """ from discodop.treetransforms import canonicalize from discodop.treebank import NegraCorpusReader, handlefunctions headrules = None # 'alpino.headrules' n = NegraCorpusReader('.', 'alpinosample.export', headrules=headrules) nn = NegraCorpusReader('.', 'alpinosample.export', headrules=headrules) transformations = ('S-RC', 'VP-GF', 'NP') trees = [transform(tree, sent, transformations) for tree, sent in zip(nn.parsed_sents().values(), nn.sents().values())] print('\ntransformed') correct = exact = d = 0 for a, b, c in islice(zip(n.parsed_sents().values(), trees, n.sents().values()), 100): transformb = reversetransform(b.copy(True), transformations) b1 = bracketings(canonicalize(a)) b2 = bracketings(canonicalize(transformb)) z = -1 # 825 if b1 != b2 or d == z: precision = len(set(b1) & set(b2)) / len(set(b1)) recall = len(set(b1) & set(b2)) / len(set(b2)) if precision != 1.0 or recall != 1.0 or d == z: print(d, ' '.join(':'.join((str(n), a.encode('unicode-escape'))) for n, a in enumerate(c))) print('no match', precision, recall) print(len(b1), len(b2), 'gold-transformed', set(b2) - set(b1), 'transformed-gold', set(b1) - set(b2)) print(a) print(transformb) handlefunctions('add', a) print(a) print(b) print() else: correct += 1 else: exact += 1 correct += 1 d += 1 print('matches', correct, '/', d, 100 * correct / d, '%') print('exact', exact)
def test_transforms(): """Test reversibility of Tiger transformations.""" from discodop.treebanktransforms import transform, reversetransform, \ bracketings from discodop.treebank import NegraCorpusReader, handlefunctions headrules = None # 'alpino.headrules' n = NegraCorpusReader('alpinosample.export', headrules=headrules) nn = NegraCorpusReader('alpinosample.export', headrules=headrules) transformations = ('S-RC', 'VP-GF', 'NP') trees = [transform(tree, sent, transformations) for tree, sent in zip(nn.trees().values(), nn.sents().values())] print('\ntransformed') correct = exact = e = 0 for a, b, c, d in islice(zip(n.trees().values(), n.sents().values(), trees, count()), 100): transformc = reversetransform(c.copy(True), transformations) c1 = bracketings(canonicalize(a)) c2 = bracketings(canonicalize(transformc)) z = -1 # 825 if c1 != c2 or e == z: precision = len(set(c1) & set(c2)) / len(set(c1)) recall = len(set(c1) & set(c2)) / len(set(c2)) if precision != 1.0 or recall != 1.0 or d == z: print(d, ' '.join(':'.join((str(n), a.encode('unicode-escape'))) for n, a in enumerate(b))) print('no match', precision, recall) print(len(c1), len(c2), 'gold-transformed', set(c2) - set(c1), 'transformed-gold', set(c1) - set(c2)) print(a) print(transformc) handlefunctions('add', a) print(a, '\n', b, '\n\n') else: correct += 1 else: exact += 1 correct += 1 e += 1 print('matches', correct, '/', e, 100 * correct / e, '%') print('exact', exact)
def parse(): """Parse sentence and return a textual representation of a parse tree. Output is either in a HTML fragment or in plain text. To be invoked by an AJAX call.""" sent = request.args.get('sent', None) est = request.args.get('est', 'rfe') marg = request.args.get('marg', 'nbest') objfun = request.args.get('objfun', 'mpp') coarse = request.args.get('coarse', None) html = 'html' in request.args lang = request.args.get('lang', 'detect') if not sent: return '' frags = nbest = None senttok = tokenize(sent) if not senttok or not 1 <= len(senttok) <= LIMIT: return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT) if lang == 'detect': lang = guesslang(senttok) elif lang not in PARSERS: return 'unknown language %r; languages: %r' % (lang, PARSERS.keys()) key = (senttok, est, marg, objfun, coarse, lang) resp = CACHE.get(key) if resp is None: link = 'parse?' + url_encode(dict(sent=sent, est=est, marg=marg, objfun=objfun, coarse=coarse, html=html)) PARSERS[lang].stages[-1].estimator = est PARSERS[lang].stages[-1].objective = objfun PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both') PARSERS[lang].stages[-1].sample = marg in ('sample', 'both') if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse: PARSERS[lang].stages[0].mode = coarse PARSERS[lang].stages[1].k = (1e-5 if coarse == 'pcfg-posterior' else 50) results = list(PARSERS[lang].parse(senttok)) if results[-1].noparse: parsetrees = [] result = 'no parse!' frags = nbest = '' else: if SHOWMORPH: for node in results[-1].parsetree.subtrees( lambda n: n and not isinstance(n[0], Tree)): treebank.handlemorphology( 'replace', None, node, node.source) node.label = node.label.replace('[]', '') if SHOWFUNC: treebank.handlefunctions('add', results[-1].parsetree, pos=True) tree = str(results[-1].parsetree) prob = results[-1].prob parsetrees = results[-1].parsetrees or [] parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1)) parsetrees_ = [] fragments = results[-1].fragments or () APP.logger.info('[%s] %s', probstr(prob), tree) tree = Tree.parse(tree, parse_leaf=int) result = Markup(DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) frags = Markup('Phrasal fragments used in the most probable ' 'derivation of the highest ranked parse tree:\n' + '\n\n'.join( DrawTree(frag).text(unicodelines=True, html=html) for frag in fragments if frag.count('(') > 1)) for tree, prob, x in parsetrees: tree = PARSERS[lang].postprocess(tree, senttok, -1)[0] if SHOWMORPH: for node in tree.subtrees( lambda n: n and not isinstance(n[0], Tree)): treebank.handlemorphology( 'replace', None, node, node.source) if SHOWFUNC: treebank.handlefunctions('add', tree, pos=True) parsetrees_.append((tree, prob, x)) nbest = Markup('\n\n'.join('%d. [%s]\n%s' % (n + 1, probstr(prob), DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) for n, (tree, prob, _) in enumerate(parsetrees_))) msg = '\n'.join(stage.msg for stage in results) elapsed = [stage.elapsedtime for stage in results] elapsed = 'CPU time elapsed: %s => %gs' % ( ' '.join('%gs' % a for a in elapsed), sum(elapsed)) info = '\n'.join(('length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % ( len(senttok), lang, est, objfun, marg), msg, elapsed, '10 most probable parse trees:', '\n'.join('%d. [%s] %s' % (n + 1, probstr(prob), writediscbrackettree(tree, senttok)) for n, (tree, prob, _) in enumerate(parsetrees)) + '\n')) CACHE.set(key, (sent, result, frags, nbest, info, link), timeout=5000) else: (sent, result, frags, nbest, # pylint: disable=unpacking-non-sequence info, link) = resp # pylint: disable=unpacking-non-sequence if html: return render_template('parsetree.html', sent=sent, result=result, frags=frags, nbest=nbest, info=info, link=link, randid=randid()) else: return Response('\n'.join((nbest, frags, info, result)), mimetype='text/plain')
def parse(): """ Parse sentence and return a textual representation of a parse tree, in a HTML fragment or plain text. To be invoked by an AJAX call.""" sent = request.args.get('sent', None) est = request.args.get('est', 'dop1') marg = request.args.get('marg', 'nbest') objfun = request.args.get('objfun', 'mpp') coarse = request.args.get('coarse', None) html = 'html' in request.args lang = request.args.get('lang', 'detect') if not sent: return '' frags = nbest = None senttok = tokenize(sent) if not senttok or not 1 <= len(senttok) <= LIMIT: return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT) if lang == 'detect': lang = guesslang(senttok) elif lang not in PARSERS: return 'unknown language %r; languages: %r' % (lang, PARSERS.keys()) key = (senttok, est, marg, objfun, coarse, lang, html) if CACHE.get(key) is not None: return CACHE.get(key) link = url_encode(dict(sent=sent, est=est, marg=marg, objfun=objfun, coarse=coarse, html=html)) PARSERS[lang].stages[-1].estimator = est PARSERS[lang].stages[-1].objective = objfun PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both') PARSERS[lang].stages[-1].sample = marg in ('sample', 'both') if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse: PARSERS[lang].stages[0].mode = coarse PARSERS[lang].stages[1].k = 1e-5 if coarse == 'pcfg-posterior' else 50 results = list(PARSERS[lang].parse(senttok)) if results[-1].noparse: parsetrees = {} result = 'no parse!' frags = nbest = '' else: if PARSERS[lang].relationalrealizational: treebank.handlefunctions('add', results[-1].parsetree, pos=True) tree = str(results[-1].parsetree) prob = results[-1].prob parsetrees = results[-1].parsetrees or {} parsetrees = heapq.nlargest(10, parsetrees.items(), key=itemgetter(1)) fragments = results[-1].fragments or () APP.logger.info('[%s] %s' % (probstr(prob), tree)) tree = Tree.parse(tree, parse_leaf=int) result = Markup(DrawTree(tree, senttok, abbr=True).text( unicodelines=True, html=html)) frags = Markup('Phrasal fragments used in the most probable derivation' ' of the highest ranked parse tree:\n' + '\n\n'.join( DrawTree(Tree.parse(frag, parse_leaf=int), terminals).text( unicodelines=True, html=html) for frag, terminals in fragments)) nbest = Markup('\n\n'.join('%d. [%s]\n%s' % (n + 1, probstr(prob), DrawTree(PARSERS[lang].postprocess(tree)[0], senttok, abbr=True).text(unicodelines=True, html=html)) for n, (tree, prob) in enumerate(parsetrees))) msg = '\n'.join(stage.msg for stage in results) elapsed = [stage.elapsedtime for stage in results] elapsed = 'CPU time elapsed: %s => %gs' % ( ' '.join('%gs' % a for a in elapsed), sum(elapsed)) info = '\n'.join(('length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % ( len(senttok), lang, est, objfun, marg), msg, elapsed, '10 most probable parse trees:', '\n'.join('%d. [%s] %s' % (n + 1, probstr(prob), tree) for n, (tree, prob) in enumerate(parsetrees)) + '\n')) if html: CACHE.set(key, render_template('parsetree.html', sent=sent, result=result, frags=frags, nbest=nbest, info=info, link=link, randid=randid()), timeout=5000) else: CACHE.set(key, Response('\n'.join((nbest, frags, info, result)), mimetype='text/plain'), timeout=5000) return CACHE.get(key)
def parse(): """Parse sentence and return a textual representation of a parse tree. Output is either in a HTML fragment or in plain text. To be invoked by an AJAX call.""" sent = request.args.get('sent', None) objfun = request.args.get('objfun', 'mpp') est = request.args.get('est', 'rfe') marg = request.args.get('marg', 'nbest') coarse = request.args.get('coarse', 'pcfg') html = 'html' in request.args lang = request.args.get('lang', 'detect') require = request.args.get('require', None) block = request.args.get('block', None) if not sent: return '' nbest = None if POSTAGS.match(sent): senttok, tags = zip(*(a.rsplit('/', 1) for a in sent.split())) else: senttok, tags = tuple(tokenize(sent)), None if not senttok or not 1 <= len(senttok) <= LIMIT: return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT) if lang == 'detect': lang = guesslang(senttok) elif lang not in PARSERS: return 'unknown language %r; languages: %r' % (lang, PARSERS.keys()) if require: require = tuple((label, tuple(indices)) for label, indices in sorted(json.loads(require))) if block: block = tuple((label, tuple(indices)) for label, indices in sorted(json.loads(block))) key = (senttok, tags, est, marg, objfun, coarse, lang, require, block) resp = CACHE.get(key) if resp is None: urlparams = dict(sent=sent, lang=lang, est=est, marg=marg, objfun=objfun, coarse=coarse, html=html) if require: urlparams['require'] = json.dumps(require) if block: urlparams['block'] = json.dumps(block) link = '?' + url_encode(urlparams) PARSERS[lang].stages[-1].estimator = est PARSERS[lang].stages[-1].objective = objfun PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both') PARSERS[lang].stages[-1].sample = marg in ('sample', 'both') if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse: PARSERS[lang].stages[0].mode = ('pcfg' if coarse == 'pcfg-posterior' else coarse) if len(PARSERS[lang].stages) > 1: PARSERS[lang].stages[1].k = (1e-5 if coarse == 'pcfg-posterior' else 50) results = list(PARSERS[lang].parse(senttok, tags=tags, require=require, block=block)) if SHOWMORPH: replacemorph(results[-1].parsetree) if SHOWFUNC: treebank.handlefunctions('add', results[-1].parsetree, pos=True) tree = str(results[-1].parsetree) prob = results[-1].prob parsetrees = results[-1].parsetrees or [] parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1)) parsetrees_ = [] LOG.info('[%s] %s', probstr(prob), tree) tree = Tree.parse(tree, parse_leaf=int) result = Markup( DrawTree(tree, senttok).text(unicodelines=True, html=html, funcsep='-')) for tree, prob, x in parsetrees: tree = PARSERS[lang].postprocess(tree, senttok, -1)[0] if SHOWMORPH: replacemorph(tree) if SHOWFUNC: treebank.handlefunctions('add', tree, pos=True) parsetrees_.append((tree, prob, x)) if PARSERS[lang].headrules: xtree = PARSERS[lang].postprocess(parsetrees[0][0], senttok, -1)[0] dep = treebank.writedependencies(xtree, senttok, 'conll') depsvg = Markup(DrawDependencies.fromconll(dep).svg()) else: dep = depsvg = '' rid = randid() nbest = Markup('\n\n'.join( '%d. [%s] ' '<a href=\'javascript: toggle("f%s%d"); \'>' 'derivation</a>\n' '<span id=f%s%d style="display: none; margin-left: 3em; ">' 'Fragments used in the highest ranked derivation' ' of this parse tree:\n%s</span>\n%s' % ( n + 1, probstr(prob), rid, n + 1, rid, n + 1, '\n\n'.join( '%s\n%s' % (w, DrawTree(frag).text(unicodelines=True, html=html)) for frag, w in fragments or () # if frag.count('(') > 1 ), DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) for n, (tree, prob, fragments) in enumerate(parsetrees_))) deriv = Markup( 'Fragments used in the highest ranked derivation' ' of best parse tree:\n%s' % ( '\n\n'.join( '%s\n%s' % (w, DrawTree(frag).text(unicodelines=True, html=html)) for frag, w in parsetrees_[0][2] or () # if frag.count('(') > 1 ))) if parsetrees_ else '' msg = '\n'.join(stage.msg for stage in results) elapsed = [stage.elapsedtime for stage in results] elapsed = 'CPU time elapsed: %s => %gs' % (' '.join( '%gs' % a for a in elapsed), sum(elapsed)) info = '\n'.join(( 'length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % (len(senttok), lang, est, objfun, marg), msg, elapsed, '10 most probable parse trees:', ''.join('%d. [%s] %s' % (n + 1, probstr(prob), writediscbrackettree(tree, senttok)) for n, (tree, prob, _) in enumerate(parsetrees)) + '\n')) CACHE.set(key, (sent, result, nbest, deriv, info, link, dep, depsvg), timeout=5000) else: (sent, result, nbest, deriv, info, link, dep, depsvg) = resp if html: return render_template('parsetree.html', sent=sent, result=result, nbest=nbest, deriv=deriv, info=info, link=link, dep=dep, depsvg=depsvg, randid=randid()) else: return Response('\n'.join((nbest, info, result)), mimetype='text/plain')
def parse(): """Parse sentence and return a textual representation of a parse tree. Output is either in a HTML fragment or in plain text. To be invoked by an AJAX call.""" sent = request.args.get('sent', None) est = request.args.get('est', 'rfe') marg = request.args.get('marg', 'nbest') objfun = request.args.get('objfun', 'mpp') coarse = request.args.get('coarse', None) html = 'html' in request.args lang = request.args.get('lang', 'detect') if not sent: return '' frags = nbest = None senttok = tokenize(sent) if not senttok or not 1 <= len(senttok) <= LIMIT: return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT) if lang == 'detect': lang = guesslang(senttok) elif lang not in PARSERS: return 'unknown language %r; languages: %r' % (lang, PARSERS.keys()) key = (senttok, est, marg, objfun, coarse, lang) resp = CACHE.get(key) if resp is None: link = 'parse?' + url_encode( dict(sent=sent, est=est, marg=marg, objfun=objfun, coarse=coarse, html=html)) PARSERS[lang].stages[-1].estimator = est PARSERS[lang].stages[-1].objective = objfun PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both') PARSERS[lang].stages[-1].sample = marg in ('sample', 'both') if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse: PARSERS[lang].stages[0].mode = coarse PARSERS[lang].stages[1].k = (1e-5 if coarse == 'pcfg-posterior' else 50) results = list(PARSERS[lang].parse(senttok)) if results[-1].noparse: parsetrees = [] result = 'no parse!' frags = nbest = '' else: if SHOWMORPH: replacemorph(results[-1].parsetree) if SHOWFUNC: treebank.handlefunctions('add', results[-1].parsetree, pos=True) tree = str(results[-1].parsetree) prob = results[-1].prob parsetrees = results[-1].parsetrees or [] parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1)) parsetrees_ = [] fragments = results[-1].fragments or () APP.logger.info('[%s] %s', probstr(prob), tree) tree = Tree.parse(tree, parse_leaf=int) result = Markup( DrawTree(tree, senttok).text(unicodelines=True, html=html, funcsep='-')) frags = Markup( 'Phrasal fragments used in the most probable ' 'derivation of the highest ranked parse tree:\n' + '\n\n'.join( DrawTree(frag).text(unicodelines=True, html=html) for frag in fragments if frag.count('(') > 1)) for tree, prob, x in parsetrees: tree = PARSERS[lang].postprocess(tree, senttok, -1)[0] if SHOWMORPH: replacemorph(tree) if SHOWFUNC: treebank.handlefunctions('add', tree, pos=True) parsetrees_.append((tree, prob, x)) nbest = Markup('\n\n'.join( '%d. [%s]\n%s' % (n + 1, probstr(prob), DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) for n, (tree, prob, _) in enumerate(parsetrees_))) msg = '\n'.join(stage.msg for stage in results) elapsed = [stage.elapsedtime for stage in results] elapsed = 'CPU time elapsed: %s => %gs' % (' '.join( '%gs' % a for a in elapsed), sum(elapsed)) info = '\n'.join( ('length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % (len(senttok), lang, est, objfun, marg), msg, elapsed, '10 most probable parse trees:', '\n'.join( '%d. [%s] %s' % (n + 1, probstr(prob), writediscbrackettree(tree, senttok)) for n, (tree, prob, _) in enumerate(parsetrees)) + '\n')) CACHE.set(key, (sent, result, frags, nbest, info, link), timeout=5000) else: ( sent, result, frags, nbest, # pylint: disable=unpacking-non-sequence info, link) = resp # pylint: disable=unpacking-non-sequence if html: return render_template('parsetree.html', sent=sent, result=result, frags=frags, nbest=nbest, info=info, link=link, randid=randid()) else: return Response('\n'.join((nbest, frags, info, result)), mimetype='text/plain')
def parse(): """Parse sentence and return a textual representation of a parse tree. Output is either in a HTML fragment or in plain text. To be invoked by an AJAX call.""" sent = request.args.get('sent', None) objfun = request.args.get('objfun', 'mpp') est = request.args.get('est', 'rfe') marg = request.args.get('marg', 'nbest') coarse = request.args.get('coarse', 'pcfg') html = 'html' in request.args lang = request.args.get('lang', 'detect') require = request.args.get('require', None) block = request.args.get('block', None) if not sent: return '' nbest = None if POSTAGS.match(sent): senttok, tags = zip(*(a.rsplit('/', 1) for a in sent.split())) else: senttok, tags = tuple(tokenize(sent)), None if not senttok or not 1 <= len(senttok) <= LIMIT: return 'Sentence too long: %d words, max %d' % (len(senttok), LIMIT) if lang == 'detect': lang = guesslang(senttok) elif lang not in PARSERS: return 'unknown language %r; languages: %r' % (lang, PARSERS.keys()) if require: require = tuple((label, tuple(indices)) for label, indices in sorted(json.loads(require))) if block: block = tuple((label, tuple(indices)) for label, indices in sorted(json.loads(block))) key = (senttok, tags, est, marg, objfun, coarse, lang, require, block) resp = CACHE.get(key) if resp is None: urlparams = dict(sent=sent, est=est, marg=marg, objfun=objfun, coarse=coarse, html=html) if require: urlparams['require'] = json.dumps(require) if block: urlparams['block'] = json.dumps(block) link = 'parse?' + url_encode(urlparams) PARSERS[lang].stages[-1].estimator = est PARSERS[lang].stages[-1].objective = objfun PARSERS[lang].stages[-1].kbest = marg in ('nbest', 'both') PARSERS[lang].stages[-1].sample = marg in ('sample', 'both') if PARSERS[lang].stages[0].mode.startswith('pcfg') and coarse: PARSERS[lang].stages[0].mode = ( 'pcfg' if coarse == 'pcfg-posterior' else coarse) if len(PARSERS[lang].stages) > 1: PARSERS[lang].stages[1].k = (1e-5 if coarse == 'pcfg-posterior' else 50) results = list(PARSERS[lang].parse( senttok, tags=tags, require=require, block=block)) if results[-1].noparse: parsetrees = [] result = 'no parse!' nbest = dep = depsvg = '' else: if SHOWMORPH: replacemorph(results[-1].parsetree) if SHOWFUNC: treebank.handlefunctions('add', results[-1].parsetree, pos=True) tree = str(results[-1].parsetree) prob = results[-1].prob parsetrees = results[-1].parsetrees or [] parsetrees = heapq.nlargest(10, parsetrees, key=itemgetter(1)) parsetrees_ = [] APP.logger.info('[%s] %s', probstr(prob), tree) tree = Tree.parse(tree, parse_leaf=int) result = Markup(DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) for tree, prob, x in parsetrees: tree = PARSERS[lang].postprocess(tree, senttok, -1)[0] if SHOWMORPH: replacemorph(tree) if SHOWFUNC: treebank.handlefunctions('add', tree, pos=True) parsetrees_.append((tree, prob, x)) if PARSERS[lang].headrules: xtree = PARSERS[lang].postprocess( parsetrees[0][0], senttok, -1)[0] dep = treebank.writedependencies(xtree, senttok, 'conll') depsvg = Markup(DrawDependencies.fromconll(dep).svg()) else: dep = depsvg = '' rid = randid() nbest = Markup('\n\n'.join('%d. [%s] ' '<a href=\'javascript: toggle("f%s%d"); \'>' 'derivation</a>\n' '<span id=f%s%d style="display: none; margin-left: 3em; ">' 'Fragments used in the highest ranked derivation' ' of this parse tree:\n%s</span>\n%s' % ( n + 1, probstr(prob), rid, n + 1, rid, n + 1, '\n\n'.join('%s\n%s' % (w, DrawTree(frag).text(unicodelines=True, html=html)) for frag, w in fragments or () # if frag.count('(') > 1 ), DrawTree(tree, senttok).text( unicodelines=True, html=html, funcsep='-')) for n, (tree, prob, fragments) in enumerate(parsetrees_))) msg = '\n'.join(stage.msg for stage in results) elapsed = [stage.elapsedtime for stage in results] elapsed = 'CPU time elapsed: %s => %gs' % ( ' '.join('%gs' % a for a in elapsed), sum(elapsed)) info = '\n'.join(('length: %d; lang=%s; est=%s; objfun=%s; marg=%s' % ( len(senttok), lang, est, objfun, marg), msg, elapsed, '10 most probable parse trees:', ''.join('%d. [%s] %s' % (n + 1, probstr(prob), writediscbrackettree(tree, senttok)) for n, (tree, prob, _) in enumerate(parsetrees)) + '\n')) CACHE.set(key, (sent, result, nbest, info, link, dep, depsvg), timeout=5000) else: (sent, result, nbest, info, link, dep, depsvg) = resp if html: return render_template('parsetree.html', sent=sent, result=result, nbest=nbest, info=info, link=link, dep=dep, depsvg=depsvg, randid=randid()) else: return Response('\n'.join((nbest, info, result)), mimetype='text/plain')