示例#1
0
def add_hunspell_pipe(model):
    try:
        spacy.tokens.Token.get_extension('hunspell_spell')
        log.warning(f'SpaCy Token already has a hunspell Pipe section . . .')
        return model
    except ValueError:
        pass
    if sys.platform == 'linux' or sys.platform == 'linux2':
        hunspell = spaCyHunSpell(model, 'linux')
    elif sys.platform == 'darwin':
        hunspell = spaCyHunSpell(model, 'mac')
    else:  # sys.platform == 'win32':
        try:
            # TODO determine paths for en_US.dic and en_US.aff on windows
            hunspell = spaCyHunSpell(model, ('en_US.dic', 'en_US.aff'))
        except Exception:
            log.warning(
                'Failed to locate en_US.dic and en_US.aff files. Substituting with fake . . .'
            )
            hunspell = passthroughSpaCyPipe()
    try:
        model.add_pipe(hunspell)
    except ValueError:
        log.warning(
            f'SpaCy parser {model} already has a hunspell Pipe section...')
    return model
示例#2
0
def add_hunspell_pipe(model):
    if sys.platform == 'linux' or sys.platform == 'linux2':
        hunspell = spaCyHunSpell(model, 'linux')
    elif sys.platform == 'darwin':
        hunspell = spaCyHunSpell(model, 'mac')
    else:  # sys.platform == 'win32':
        try:
            # TODO determine paths for en_US.dic and en_US.aff on windows
            hunspell = spaCyHunSpell(model, ('en_US.dic', 'en_US.aff'))
        except Exception:
            log.warn('Failed to locate en_US.dic and en_US.aff files. Substituting with fake . . .')
            hunspell = passthroughSpaCyPipe()
    model.add_pipe(hunspell)
    return model
示例#3
0
 def __init__(self):
     self.nlp = spacy.load('en')
     self.hunspell = spaCyHunSpell(
         self.nlp, ('./src/hunspell/en_US.dic', './src/hunspell/en_US.aff'))
     self.nlp.add_pipe(self.hunspell)
     self.nlp.add_pipe(self.merge_entities, name='merge_entities')
     self.exclude = set(string.punctuation)
     self.date_pos = []
     self.date_neg = []
     self.relation_date = []
示例#4
0
def spell_check(query, indices_to_ignore):
    hunspell = spaCyHunSpell(nlp, 'linux')
    nlp.add_pipe(hunspell, name="hunspell")
    doc = nlp(query)
    original_data = [token.text for token in doc]
    for i, data in enumerate(doc):
        if i in indices_to_ignore:
            continue
        if not data._.hunspell_spell:
            suggestions = data._.hunspell_suggest
            suggestions_vocab = [nlp.vocab[suggestion] for suggestion in suggestions]
            result = [data.similarity(ind) for ind in suggestions_vocab]
            max_word_index = result.index(max(result))
            word = suggestions[max_word_index]
            original_data[i] = word
    response = " ".join(original_data)
    return response
def spellCheck():
    query = request.args.get('query')
    hunspell = spaCyHunSpell(larger_nlp, 'linux')
    larger_nlp.add_pipe(hunspell)
    doc = larger_nlp(query)
    original_data = [token.text for token in doc]
    for i, data in enumerate(doc):
        if (not data._.hunspell_spell):
            suggestions = data._.hunspell_suggest
            suggestions_vocab = [
                larger_nlp.vocab[suggestion] for suggestion in suggestions
            ]
            result = [data.similarity(ind) for ind in suggestions_vocab]
            max_word_index = result.index(max(result))
            word = suggestions[max_word_index]
            original_data[i] = word
    response = " ".join(original_data)
    return json.dumps({"data": response})
import spacy
from spacy_hunspell import spaCyHunSpell


def convertToWordVectors(suggestions):
    return [nlp.vocab[suggestion] for suggestion in suggestions]


nlp = spacy.load('en_core_web_lg')
hunspell = spaCyHunSpell(nlp, 'linux')
nlp.add_pipe(hunspell)

doc = nlp('hwo to read data')
print(doc)
original_data = [token.text for token in doc]
for i, data in enumerate(doc):
    if (not data._.hunspell_spell):
        suggestions = data._.hunspell_suggest
        suggestions_vocab = convertToWordVectors(suggestions)
        result = [data.similarity(ind) for ind in suggestions_vocab]
        max_word_index = result.index(max(result))
        word = suggestions[max_word_index]
        original_data[i] = word
print(" ".join(original_data))
示例#7
0
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import os

if __name__ == '__main__':
    # -- spacy pipeline test
    import spacy
    from spacy_hunspell import spaCyHunSpell

    nlp = spacy.load('en_core_web_sm')
    hunspell = spaCyHunSpell(nlp)
    nlp.add_pipe(hunspell)

    # cheeseburger test
    doc = nlp('I can haz cheezeburger.')
    haz = doc[2]
    assert haz._.hunspell_spell == False
    assert haz._.hunspell_suggest[:5] == \
        ['ha', 'haze', 'hazy', 'has', 'hat']

    # unicode test
    doc = nlp('I have a \u2033 bottle')
    doc = nlp('עקבת אחריו בכל רחבי המדינה.')
    doc = nlp(' ł (for example) ')
    doc = nlp('He said \u201CHello\u201D')