Пример #1
0
    def init(self):
        spell = xmpp.config.get('spell')
        self.aggro = (spell and spell.get('aggro')) or False
        self.lang = (spell and spell.get('lang')) or 'en'

        self.rgx = re.compile(r"[^a-zA-Z']")
        self.speller = Speller('lang', self.lang)
Пример #2
0
 def __init__(self,
              server_list,
              nick="OrthoNazi",
              langs=["fr_FR", "en_US"],
              channels=["#test"],
              whitelist_path=None,
              delay=300,
              trump_delay=10,
              victim="un débile profond",
              **params):
     super().__init__(server_list, nick, "OrthoNazi", **params)
     self.nazi_channels = channels
     self.spellers = [Speller("lang", lang) for lang in langs]
     self.rl = RateLimiter(delay)
     self.trump_rl = RateLimiter(trump_delay)
     self.whitelist_path = whitelist_path
     self.whitelist = {nick.lower(): True}
     self.victim = victim
     self.load()
Пример #3
0
def get_speller(lang=settings.LANGUAGE_CODE,
                encoding=settings.DEFAULT_CHARSET,
                home_dir=getattr(settings, 'ASPELL_HOME_DIR', None)):
    """
    Returns a Speller object ready for checking words.
    
    Note: aspell and pyaspell seem to have a different configkeys format in
    the constructor. Depending on the c_aspell variable the args are adjuseted
    slightly.
    
    """
    args = [('encoding', encoding), ('lang', lang)]
    if home_dir is not None:
        args.append(('home-dir', home_dir))
    if not c_aspell:
        args = [
            args,
        ]
    return Speller(*args)
Пример #4
0
 def __init__(self,
              server_list,
              nick="OrthoNazi",
              langs=["fr_FR", "en_US"],
              channels=["#test"],
              whitelist_path=None,
              delay=300,
              **params):
     super().__init__(server_list, nick, "OrthoNazi", **params)
     self.nazi_channels = channels
     self.spellers = [Speller("lang", lang) for lang in langs]
     self.rl = RateLimiter(delay)
     self.whitelist_path = whitelist_path
     try:
         with open(whitelist_path) as f:
             self.whitelist = pickle.load(f)
             logging.info("Whitelist loaded with {0} words".format(
                 len(self.whitelist)))
     except:
         self.whitelist = {}
     self.whitelist = {nick.lower(): True}
Пример #5
0
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer

from multiprocessing import Pool

from logging import getLogger
logger = getLogger(__name__)

wnl = WordNetLemmatizer()
from nltk.corpus import stopwords
from nltk.corpus import wordnet as wn
stops = set(stopwords.words("english")) | set(
    ['?', ',', '.', ';', ':', '"', "'"])

from aspell import Speller
asp = Speller('lang', 'en')
import spacy
nlp = spacy.load('en')


def get(w):
    if w in asp:
        return w
    try:
        return asp.suggest(w)[0]
    except IndexError:
        return w


def split_into_words(text):
    return [
Пример #6
0
from pattern.en import suggest
from aspell import Speller

# The algorithm manually implemented at spelling_correction_manual.py is available to be
# used out of the box in the pattern library:
# https://www.clips.uantwerpen.be/pattern.
# (Compatible with python 2.7 only)
#
# "Pattern is a web mining module for the Python programming language.
#  It has tools for data mining (Google, Twitter and Wikipedia API, a web crawler, a HTML DOM parser),
#  natural language processing (part-of-speech taggers, n-gram search, sentiment analysis, WordNet),
#  machine learning (vector space model, clustering, SVM), network analysis and <canvas> visualization."

# Other libraries are:
# PyEnchant: http://pythonhosted.org/pyenchant/
# AspellPython, wrapper around GNU Aspell: https://github.com/WojciechMula/aspell-python (requires libaspell-dev)
#   sudo apt install libaspell-dev
#   pip install aspell-python-py3

print(suggest("fianlly"))

print(suggest("flaot"))

sp = Speller()
print(sp.suggest("fianlly"))
Пример #7
0
    def score(self, data):
        scores = []
        txt = self.tokenizer(self.text)
        for d in self.make_docs(data):
            score = max([self.sim(txt, doc) for doc in d])
            scores.append(score)
        return scores

    def _create(self, data):
        return Series(self.score(data), index=data.index, name=data.name)


chkr = None
try:
    from aspell import Speller
    chkr = Speller()
except ImportError:
    pass


def count_spell_errors(toks, exemptions):
    if not toks:
        return 0
    return sum([not chkr.check(t) for t in toks if t not in exemptions])


class SpellingErrorCount(Feature):
    def __init__(self, feature, exemptions=None):
        if not chkr:
            raise NameError("You need to install the python aspell library")
        super(SpellingErrorCount, self).__init__(feature)