Exemplo n.º 1
0
def tag(fi_tok, zh_tok, zh_untok, align):
    fi_tagging = get_extractor("FinExtractor").extract(fi_tok)
    zh_tagging = get_extractor("CmnExtractor").extract(zh_untok, zh_tok)
    for id, (_token, tag) in enumerate(
            chain(fi_tagging.iter_tags(), zh_tagging.iter_tags())):
        tag.id = id

    add_supports(fi_tagging, zh_tagging, align)
    return fi_tagging, zh_tagging
Exemplo n.º 2
0
def test_heraa_hetkeksi():
    extor = get_extractor("FinExtractor")
    tagging = extor.extract("hetkeksi")
    assert len(tagging.tokens) == 1
    tagging = extor.extract("Herää hetkeksi")
    hetkeksi_tokens = _filter_toks(tagging, "hetki")
    assert len(hetkeksi_tokens) == 1
Exemplo n.º 3
0
def test_extract_zh_friend():
    zh_tok = "朋友"
    zh_untok = "朋友"
    zh_tagging = get_extractor("CmnExtractor").extract(zh_untok, zh_tok)
    matching_token = None
    for tok in zh_tagging.tokens:
        if tok.token == "朋友":
            assert matching_token is None
            matching_token = tok
    assert matching_token is not None
    assert len(matching_token.anchors) == 2
    assert len(matching_token.tags) == 4
Exemplo n.º 4
0
def test_hyvaa():
    tagging = get_extractor("FinExtractor").extract("Hyvää !")
    matching_token = None
    wordnet_counts = {"fin": 0, "qf2": 0, "qwf": 0}
    for tok in tagging.tokens:
        if tok.token == "Hyvää":
            matching_token = tok
    assert matching_token is not None
    for tag in matching_token.tags:
        for wn, _ in tag.lemma_objs:
            wordnet_counts[wn] += 1
    assert wordnet_counts["fin"] >= 27
    assert wordnet_counts["qf2"] >= 25
    assert wordnet_counts["qwf"] >= 7
Exemplo n.º 5
0
def test_extract_zh_sincere_congrats_dave():
    zh_tok = "真诚地 , 大卫 。 恭喜 你 。"
    zh_untok = "真诚地,大卫。 恭喜你。"
    get_extractor("CmnExtractor").extract(zh_untok, zh_tok)
Exemplo n.º 6
0
def test_extract_zh_untok_sincere():
    zh_untok = "真诚地"
    tagging = get_extractor("CmnExtractor").extract_untok(zh_untok)
    sincere_asserts(tagging)
Exemplo n.º 7
0
def test_extract_fin_murhamies_has_murha_and_mies():
    tagging = get_extractor("FinExtractor").extract("murhamies")
    murha_tokens = _filter_toks(tagging, "murha")
    assert len(murha_tokens) == 1
    mies_tokens = _filter_toks(tagging, "mies")
    assert len(mies_tokens) == 1
Exemplo n.º 8
0
def test_extract_fin_ei_koskaan():
    tagging = get_extractor("FinExtractor").extract(
        "Älä koskaan sano mitään tuollaista hänestä !")
    ei_koskaan_tokens = _filter_toks(tagging, "ei_koskaan")
    assert 1 <= len(ei_koskaan_tokens) <= 2
Exemplo n.º 9
0
def test_extract_fin_saada_aikaan():
    tagging = get_extractor("FinExtractor").extract(
        "Katso , mitä olet saanut aikaan .")
    saada_aikaan_tokens = _filter_toks(tagging, "saada_aikaan")
    assert len(saada_aikaan_tokens) >= 1
Exemplo n.º 10
0
def test_open_brace():
    tagging = get_extractor("FinExtractor").extract("[ sillä on tapansa !")
    for token in tagging.tokens:
        for tag in token.tags:
            assert tag.lemma != ""
Exemplo n.º 11
0
def test_gordon():
    tagging = get_extractor("FinExtractor").extract(
        "Gordon on jossain täällä .")
    assert tagging.tokens[0].anchors[0].char == 7