Пример #1
0
def test_get_normalized_text(spacy_doc):
    expected = [
        'the', 'unit', 'test', 'be', 'not', 'go', 'well', '.', '-PRON-',
        'love', 'Python', ',', 'but', '-PRON-', 'do', 'not', 'love',
        'backwards', 'incompatibility', '.', 'no', 'programmer', 'be',
        'permanently', 'damage', 'for', 'textacy', "'s", 'sake', '.',
        'thank', 'God', 'for', 'Stack', 'Overflow', '.']
    observed = [utils.get_normalized_text(tok) for tok in spacy_doc if not tok.is_space]
    assert observed == expected
Пример #2
0
def test_get_normalized_text(spacy_doc):
    expected = [
        "the",
        "unit",
        "test",
        "be",
        "not",
        "go",
        "well",
        ".",
        "-PRON-",
        "love",
        "Python",
        ",",
        "but",
        "-PRON-",
        "do",
        "not",
        "love",
        "backwards",
        "incompatibility",
        ".",
        "no",
        "programmer",
        "be",
        "permanently",
        "damage",
        "for",
        "textacy",
        "'s",
        "sake",
        ".",
        "thank",
        "God",
        "for",
        "Stack",
        "Overflow",
        ".",
    ]
    observed = [
        utils.get_normalized_text(tok) for tok in spacy_doc if not tok.is_space
    ]
    assert observed == expected