示例#1
0
def test_ngram_first_word_match():
    """
    Test that a first word match is not enough to match.
    """
    c = load_from_file(fixture_location('long.conll'))
    it = find_ngrams(c, 'un cabinet'.split())

    with pytest.raises(StopIteration):
        next(it)
示例#2
0
def test_ngram_standard():
    """
    Test if the find_ngram method works for standard situations.
    """
    c = load_from_file(fixture_location('basic.conll'))

    s, i = next(find_ngrams(c, 'un film sur la'.split()))
    assert s.id == 'fr-ud-dev_00001'
    assert i == 2
示例#3
0
def test_ngram_none():
    """
    Test that no ngram is identified when no exist
    """
    c = load_from_file(fixture_location('long.conll'))
    it = find_ngrams(c, 'cabinet'.split())

    with pytest.raises(StopIteration):
        next(it)
示例#4
0
def test_ngram_multiword_split():
    """
    Test that ngram searches still work when they go over a multiword token.
    """
    c = load_from_file(fixture_location('long.conll'))

    it = find_ngrams(c, 'de " décentrement de le Sujet "'.split())
    s, i = next(it)

    assert s.id == 'fr-ud-test_00002'
    assert i == 8

    with pytest.raises(StopIteration):
        next(it)
示例#5
0
def test_ngram_case_insensitive():
    """
    Test that the case sensitivity function works.
    """
    c = load_from_file(fixture_location('long.conll'))
    results = list(find_ngrams(c, 'Il'.split(), case_sensitive=False))

    actual_ids = list(map(lambda res: res[0].id, results))
    actual_indices = list(map(operator.itemgetter(1), results))

    expected_ids = ['fr-ud-test_00003', 'fr-ud-test_00005', 'fr-ud-test_00008']
    expected_indices = [1, 16, 0]

    assert actual_ids == expected_ids
    assert actual_indices == expected_indices
示例#6
0
def test_ngram_multiple_per_sentence():
    """
    Test that all ngrams are found when there are multiple in the same sentence.
    """
    c = load_from_file(fixture_location('long.conll'))
    results = list(find_ngrams(c, 'telle ou telle'.split()))

    actual_ids = list(map(lambda res: res[0].id, results))
    actual_indices = list(map(operator.itemgetter(1), results))

    expected_ids = ['fr-ud-test_00008', 'fr-ud-test_00008']
    expected_indices = [21, 26]

    assert actual_ids == expected_ids
    assert actual_indices == expected_indices
示例#7
0
def test_ngram_case_insensitive_n_token():
    """
    Test that the case sensitivity function works, when it is the nth token.
    """
    c = load_from_file(fixture_location('long.conll'))
    s, i, tokens = next(
        find_ngrams(c,
                    'l\' orgaNisaTion pour La sécurité et la'.split(),
                    case_sensitive=False))

    actual_token_ids = list(map(lambda token: token.id, tokens))
    expected_token_ids = ['9', '10', '11', '12', '13', '14', '15']

    assert s.id == 'fr-ud-test_00004'
    assert i == 8
    assert actual_token_ids == expected_token_ids
示例#8
0
def test_ngram_multiword_split():
    """
    Test that ngram searches still work when they go over a multiword token.
    """
    c = load_from_file(fixture_location('long.conll'))

    it = find_ngrams(c, 'de " décentrement de le Sujet "'.split())
    s, i, tokens = next(it)

    actual_token_ids = list(map(lambda token: token.id, tokens))
    expected_token_ids = ['9', '10', '11', '12', '13', '14', '15']

    assert s.id == 'fr-ud-test_00002'
    assert i == 8
    assert actual_token_ids == expected_token_ids

    with pytest.raises(StopIteration):
        next(it)
示例#9
0
def test_ngram_multiple_multiword_splits():
    """
    Test that ngram searches work when they there is more than one multiword token.
    """
    c = load_from_file(fixture_location('long.conll'))

    it = find_ngrams(
        c, 'civile de le territoire non autonome de le Sahara'.split())
    s, i, tokens = next(it)

    actual_token_ids = list(map(lambda token: token.id, tokens))
    expected_token_ids = ['10', '11', '12', '13', '14', '15', '16', '17', '18']

    assert s.id == 'fr-ud-test_00003'
    assert i == 9
    assert actual_token_ids == expected_token_ids

    with pytest.raises(StopIteration):
        next(it)