Python stanford_to_saf示例，xtas.tasks._corenlp.stanford_to_saf Python示例

示例#1

0

显示文件

def test_lemmatize_unicode():
    _check_corenlp()
    lines = parse(u"H\xf6v\xedk hit C\xe9sar",
                  annotators=['tokenize', 'ssplit', 'pos', 'lemma'])
    saf = stanford_to_saf(lines)
    assert_equal({t['lemma'] for t in saf['tokens']},
                 {'Cesar', 'hit', 'Hovik'})

示例#2

0

显示文件

文件： test_corenlp.py 项目： fanfannothing/xtas

def test_lemmatize_unicode():
    _check_corenlp()
    lines = parse(u"\u0540\u0578\u057e\u056b\u056f hit C\xe9sar",
                  annotators=['tokenize', 'ssplit', 'pos', 'lemma'])
    saf = stanford_to_saf(lines)
    assert_equal({t['lemma'] for t in saf['tokens']},
                 {'Cesar', 'hit', 'Hovik'})

示例#3

0

显示文件

文件： test_corenlp.py 项目： fanfannothing/xtas

def test_ner():
    _check_corenlp()
    annotators = ['tokenize', 'ssplit', 'pos', 'lemma', 'ner']
    saf = stanford_to_saf(parse("John lives in Amsterdam",
                                annotators=annotators))
    lemmata = {t['id']: t['lemma'] for t in saf['tokens']}
    entities = {lemmata[e['tokens'][0]]: e['type'] for e in saf['entities']}
    assert_equal(entities, {'John': 'PERSON', 'Amsterdam': 'LOCATION'})

示例#4

0

显示文件

def test_ner():
    _check_corenlp()
    annotators = ['tokenize', 'ssplit', 'pos', 'lemma', 'ner']
    saf = stanford_to_saf(parse("John lives in Amsterdam",
                                annotators=annotators))
    lemmata = {t['id']: t['lemma'] for t in saf['tokens']}
    entities = {lemmata[e['tokens'][0]]: e['type'] for e in saf['entities']}
    assert_equal(entities, {'John': 'PERSON', 'Amsterdam': 'LOCATION'})

示例#5

0

显示文件

文件： test_corenlp.py 项目： chagge/xtas

def test_parse_xml():
    xml = open(os.path.join(os.path.dirname(__file__), "test_corenlp.xml")).read()
    saf = stanford_to_saf(xml)
    assert_equal({t['lemma'] for t in saf['tokens']},
                 {"John", "attack", "I", "in", "London", "hit", "he", "back", "."})
    london = [t for t in saf['tokens'] if t['lemma'] == 'London'][0]
    assert_equal(london['pos'], 'NNP')
    assert_in({"type": "LOCATION", "tokens": [london['id']]}, saf['entities'])

示例#6

0

显示文件

def test_parse_xml():
    with open(join(dirname(__file__), "test_corenlp.xml")) as f:
        xml = f.read()
    saf = stanford_to_saf(xml)
    assert_equal({t['lemma'] for t in saf['tokens']},
                 set("John attack I in London hit he back .".split()))
    london = [t for t in saf['tokens'] if t['lemma'] == 'London'][0]
    assert_equal(london['pos'], 'NNP')
    assert_in({"type": "LOCATION", "tokens": [london['id']]}, saf['entities'])

示例#7

0

显示文件

def test_lemmatize():
    _check_corenlp()
    lines = parse("He jumped. \n\n Cool!",
                  annotators=['tokenize', 'ssplit', 'pos', 'lemma'])
    saf = stanford_to_saf(lines)
    assert_equal(set(saf.keys()), {'tokens', 'header'})

    assert_equal({t['lemma']
                  for t in saf['tokens']}, {'he', 'jump', 'cool', '!', '.'})
    assert_equal({t['sentence'] for t in saf['tokens']}, {1, 2})

示例#8

0

显示文件

文件： test_corenlp.py 项目： fanfannothing/xtas

def test_lemmatize():
    _check_corenlp()
    lines = parse("He jumped. \n\n Cool!",
                  annotators=['tokenize', 'ssplit', 'pos', 'lemma'])
    saf = stanford_to_saf(lines)
    assert_equal(set(saf.keys()), {'tokens', 'header'})

    assert_equal({t['lemma'] for t in saf['tokens']},
                 {'he', 'jump', 'cool', '!', '.'})
    assert_equal({t['sentence'] for t in saf['tokens']},
                 {1, 2})

示例#9

0

显示文件

def test_parse_xml():
    xml = open(os.path.join(os.path.dirname(__file__),
                            "test_corenlp.xml")).read()
    saf = stanford_to_saf(xml)
    assert_equal(
        {t['lemma']
         for t in saf['tokens']},
        {"John", "attack", "I", "in", "London", "hit", "he", "back", "."})
    london = [t for t in saf['tokens'] if t['lemma'] == 'London'][0]
    assert_equal(london['pos'], 'NNP')
    assert_in({"type": "LOCATION", "tokens": [london['id']]}, saf['entities'])

示例#10

0

显示文件

文件： test_corenlp.py 项目： fanfannothing/xtas

def test_parse():
    _check_corenlp()
    saf = stanford_to_saf(parse("John loves himself"))
    lemmata = {t['id']: t['lemma'] for t in saf['tokens']}
    assert_equal(saf['trees'], [{
        "tree": "(ROOT (S (NP (NNP John)) (VP (VBZ loves) "
                "(NP (PRP himself)))))",
        "sentence": 1
        }])
    deps = {(lemmata[d['child']], lemmata[d['parent']], d['relation'])
            for d in saf['dependencies']}
    assert_equal(deps, {('John', 'love', 'nsubj'),
                        ('himself', 'love', 'dobj')})
    corefs = {tuple(sorted([lemmata[c[0][0]], lemmata[c[1][0]]]))
              for c in saf['coreferences']}
    assert_equal(corefs, {tuple(sorted(['John', 'himself']))})

示例#11

0

显示文件

def test_parse():
    _check_corenlp()
    saf = stanford_to_saf(parse("John loves himself"))
    lemmata = {t['id']: t['lemma'] for t in saf['tokens']}
    assert_equal(saf['trees'], [{
        "tree": "(ROOT (S (NP (NNP John)) (VP (VBZ loves) "
                "(NP (PRP himself)))))",
        "sentence": 1
        }])
    deps = {(lemmata[d['child']], lemmata[d['parent']], d['relation'])
            for d in saf['dependencies']}
    assert_equal(deps, {('John', 'love', 'nsubj'),
                        ('himself', 'love', 'dobj')})
    corefs = {tuple(sorted([lemmata[c[0][0]], lemmata[c[1][0]]]))
              for c in saf['coreferences']}
    assert_equal(corefs, {tuple(sorted(['John', 'himself']))})

示例#12

0

显示文件

文件： test_corenlp.py 项目： chagge/xtas

def test_multiple_sentences():
    _check_corenlp()
    p = parse("John lives in Amsterdam. He works in London")
    saf = stanford_to_saf(p)
    tokens = {t['id'] : t for t in saf['tokens']}
    # are token ids unique?
    assert_equal(len(tokens), len(saf['tokens']))
    # is location in second sentence correct?
    entities = {tokens[e['tokens'][0]]['lemma'] : e['type']
                for e in saf['entities']}
    assert_in(('London', 'LOCATION'), entities.items())
    # is dependency in second sentence correct?
    rels = [(tokens[rel['child']]['lemma'], rel['relation'], tokens[rel['parent']]['lemma'])
            for rel in saf['dependencies']]
    assert_in(("he", "nsubj", "work"), rels)
    assert_in(("John", "nsubj", "live"), rels)
    # is coref parsed correctly?
    coref = {(tokens[x[0][0]]['lemma'], tokens[x[1][0]]['lemma']) for x in saf['coreferences']}
    assert_equal(coref, {("John", "he")})

示例#13

0

显示文件

def test_multiple_sentences():
    _check_corenlp()
    p = parse("John lives in Amsterdam. He works in London")
    saf = stanford_to_saf(p)
    tokens = {t['id']: t for t in saf['tokens']}
    # are token ids unique?
    assert_equal(len(tokens), len(saf['tokens']))
    # is location in second sentence correct?
    entities = {tokens[e['tokens'][0]]['lemma']: e['type']
                for e in saf['entities']}
    assert_in(('London', 'LOCATION'), entities.items())
    # is dependency in second sentence correct?
    rels = [(tokens[rel['child']]['lemma'], rel['relation'],
             tokens[rel['parent']]['lemma'])
            for rel in saf['dependencies']]
    assert_in(("he", "nsubj", "work"), rels)
    assert_in(("John", "nsubj", "live"), rels)
    # is coref parsed correctly?
    coref = {(tokens[x[0][0]]['lemma'], tokens[x[1][0]]['lemma'])
             for x in saf['coreferences']}
    assert_equal(coref, {("John", "he")})