def test_incrementaltreereader(self): data = ''' (top (smain (noun 0=Het) (verb 1=had) (inf (verb 5=kunnen) (inf (np (det 2=een) (adj 3=prachtige) (noun 4=dag)) (verb 6=zijn) (pp (prep 7=in) (noun 8=Londen))))) (punct 9=.)) ''' result = list(incrementaltreereader([data])) assert len(result) == 1 _tree, sent, _rest = result[0] assert sent[0] == u'Het', sent[0] assert len(sent) == 10 assert sent == ( u'Het had een prachtige dag kunnen zijn in Londen .'.split()) data = ''' #BOS 0 is VB -- -- 500 John NP -- -- 0 rich JJ -- -- 500 ? ? -- -- 0 #500 VP -- -- 0 #EOS 0 ''' result = list(incrementaltreereader(data.splitlines())) assert len(result) == 1 _tree, sent, _rest = result[0] assert sent[0] == u'is', sent[0] assert len(sent) == 4 data = '''(S (NP Mary) (VP (VB is) (JJ rich)) (. .))''' result = list(incrementaltreereader(data.splitlines())) assert len(result) == 1
def test_incrementaltreereader(self): data = ''' (top (smain (noun 0) (verb 1) (inf (verb 5) (inf (np (det 2) (adj 3) (noun 4)) (verb 6) (pp (prep 7) (noun 8))))) (punct 9)) Het had een prachtige dag kunnen zijn in Londen . ''' result = list(incrementaltreereader([data])) assert len(result) == 1 _tree, sent, _rest = result[0] assert sent[0] == u'Het', sent[0] assert len(sent) == 10 data = ''' #BOS 0 is VB -- -- 500 John NP -- -- 0 rich JJ -- -- 500 ? ? -- -- 0 #500 VP -- -- 0 #EOS 0 ''' result = list(incrementaltreereader(data.splitlines())) assert len(result) == 1 _tree, sent, _rest = result[0] assert sent[0] == u'is', sent[0] assert len(sent) == 4 data = '''(S (NP Mary) (VP (VB is) (JJ rich)) (. .))''' result = list(incrementaltreereader(data.splitlines())) assert len(result) == 1
def draw(): """ Wrapper to parse & draw tree(s). """ if len(request.args['tree']) > LIMIT: return 'Too much data. Limit: %d bytes' % LIMIT dts = [DrawTree(tree, sent, abbr='abbr' in request.args) for tree, sent in incrementaltreereader( request.args['tree'].splitlines())] return drawtrees(request.args, dts)
def main(): """Text-based tree viewer.""" from getopt import gnu_getopt, GetoptError flags = ('test', 'help', 'abbr', 'plain') options = ('fmt=', 'encoding=', 'functions=', 'morphology=', 'numtrees=') try: opts, args = gnu_getopt(sys.argv[1:], 'n:', flags + options) except GetoptError as err: print('error: %s\n%s' % (err, USAGE)) sys.exit(2) opts = dict(opts) if '--test' in opts: test() return elif '--help' in opts: print(USAGE) return limit = opts.get('--numtrees', opts.get('-n')) limit = int(limit) if limit else None if args and opts.get('--fmt', 'export') != 'auto': reader = READERS[opts.get('--fmt', 'export')] corpora = [] for path in args: corpus = reader( path, encoding=opts.get('--encoding', 'utf8'), functions=opts.get('--functions'), morphology=opts.get('--morphology')) corpora.append((corpus.trees(), corpus.sents())) numsents = len(corpus.sents()) print('Viewing:', ' '.join(args)) for n, sentid in enumerate(islice(corpora[0][0], 0, limit), 1): print('%d of %s (sentid=%s; len=%d):' % ( n, numsents, sentid, len(corpora[0][1][sentid]))) for trees, sents in corpora: tree, sent = trees[sentid], sents[sentid] print(DrawTree(tree, sent, abbr='--abbr' in opts ).text(unicodelines=True, ansi='--plain' not in opts)) else: # read from stdin + detect format reader = codecs.getreader(opts.get('--encoding', 'utf8')) stdin = (chain.from_iterable(reader(open(a)) for a in args) if args else reader(sys.stdin)) trees = islice(incrementaltreereader(stdin, morphology=opts.get('--morphology'), functions=opts.get('--functions')), 0, limit) try: for n, (tree, sent, rest) in enumerate(trees, 1): print('%d. (len=%d): %s' % (n, len(sent), rest)) print(DrawTree(tree, sent, abbr='--abbr' in opts).text( unicodelines=True, ansi='--plain' not in opts)) except (IOError, KeyboardInterrupt): pass
def draw(): """ Wrapper to parse & draw tree(s). """ if len(request.args['tree']) > LIMIT: return 'Too much data. Limit: %d bytes' % LIMIT dts = [] try: trees = list(incrementaltreereader( request.args['tree'].splitlines(), morphology='between' if 'morph' in request.args else None, functions='between' if 'func' in request.args else None)) except Exception as err: return Response(str(err), mimetype='text/plain') for tree, sent, _rest in trees: try: dts.append(DrawTree(tree, sent, abbr='abbr' in request.args)) except Exception as err: return Response(str(err), mimetype='text/plain') if not dts: return Response('No trees!', mimetype='text/plain') return drawtrees(request.args, dts)
def draw(): """ Wrapper to parse & draw tree(s). """ if len(request.args['tree']) > LIMIT: return 'Too much data. Limit: %d bytes' % LIMIT dts = [] try: trees = list(incrementaltreereader( request.args['tree'].splitlines(), morphology='add' if 'morph' in request.args else None, functions='add' if 'func' in request.args else None)) except Exception as err: # pylint: disable=broad-except return Response(str(err), mimetype='text/plain') for tree, sent, _rest in trees: try: dts.append(DrawTree(tree, sent, abbr='abbr' in request.args)) except Exception as err: # pylint: disable=broad-except return Response(str(err), mimetype='text/plain') if not dts: return Response('No trees!', mimetype='text/plain') return drawtrees(request.args, dts)