def checker(self, id, slang_dict, stop_words, emoji_dict, restart=False): # get enchant dict, instantiate MMSt broker = enchant.Broker() d = broker.request_dict("en_US") g = MMST(d, slang_dict, stop_words, emoji_dict) # open input file, get output file path input = open(self.input + '_{}'.format(id), "r") output_path = self.output + '_{}'.format(id) # check how many lines have already been processed in a previous run if restart or not os.path.isfile(output_path): start = 0 open_mode = "w+" else: start = sum(1 for line in open(output_path)) open_mode = "a+" # process remaining lines with open(output_path, open_mode) as f: for line in islice(input, start, None): try: tmp = g.input_sentence(line, self.load, verbose=False) f.write(tmp) except IndexError: print("ERROR: " + line)
def __init__(self, exec_by_ibus): engine_name = "bogo" long_engine_name = "BoGo" author = "BoGo Development Team <*****@*****.**>" description = "ibus-bogo for IBus" version = "0.4" license = "GPLv3" self.component = \ IBus.Component.new("org.freedesktop.IBus.BoGo", description, version, license, author, "https://github.com/BoGoEngine/ibus-bogo", "/usr/bin/exec", "ibus-bogo") engine = IBus.EngineDesc( name=engine_name, longname=long_engine_name, description=description, language="vi", license=license, author=author, icon=current_path + "/data/ibus-bogo-dev.svg", # icon = "ibus-bogo", layout="default") self.component.add_engine(engine) self.mainloop = GObject.MainLoop() self.bus = IBus.Bus() self.bus.connect("disconnected", self.bus_disconnected_cb) self.engine_count = 0 self.factory = IBus.Factory.new(self.bus.get_connection()) self.factory.connect("create-engine", self.create_engine) CONFIG_DIR = os.path.expanduser("~/.config/ibus-bogo/") self.config = Config() self.abbr_expander = AbbreviationExpander(config=self.config) self.abbr_expander.watch_file(CONFIG_DIR + "/abbr_rules.json") if exec_by_ibus: self.bus.request_name("org.freedesktop.IBus.BoGo", 0) else: self.bus.register_component(self.component) self.bus.set_global_engine_async("bogo", -1, None, None, None) custom_broker = enchant.Broker() custom_broker.set_param('enchant.myspell.dictionary.path', DICT_PATH) spellchecker = enchant.DictWithPWL('vi_VN_telex', pwl=PWL_PATH, broker=custom_broker) # FIXME: Catch enchant.errors.DictNotFoundError exception here. english_spellchecker = enchant.Dict('en_US') self.auto_corrector = AutoCorrector(self.config, spellchecker, english_spellchecker)
def spell_check(self, sentence, tokens): """Takes sentence and tokens as input where sentence must be a string and tokens must be a list of strings. Checks the spelling of each word in the sentence and provides suggestions for the misspelled words and replaces the word with the one chosen from suggestions""" d = enchant.Dict("en_US") b = enchant.Broker() b.set_ordering( "en_US", "aspell, myspell, ispell" ) # Set the ordering of the dictionaries to be used while spell-check. d.add( 'ambivert' ) # Add a word to your personal dictionary if you don't want it to be spell-checked. isCorrect = [] self.spellSuggestions = [] for token in tokens: if token.isalpha(): isCorrect.append(d.check(token)) if not d.check(token): z = sentence.find(token) self.spellSuggestions = d.suggest(token) print "\nSuggestions: ", self.spellSuggestions y = int( raw_input( "\nWhich suggestion nos. did you find correct?\n")) sentence = sentence.replace(sentence[z:z + len(token)], self.spellSuggestions[y - 1]) return sentence
def setLanguage(self, theLang, projectDict=None): """Load a dictionary for the language specified in the config. If that fails, we load a mock dictionary so that lookups don't crash. """ try: import enchant if self._theBroker is not None: logger.debug("Deleting old pyenchant broker") del self._theBroker self._theBroker = enchant.Broker() self._theDict = self._theBroker.request_dict(theLang) self._spellLanguage = theLang logger.debug("Enchant spell checking for language '%s' loaded", theLang) except Exception: logger.error("Failed to load enchant spell checking for language '%s'", theLang) self._theDict = FakeEnchant() self._spellLanguage = None self._readProjectDictionary(projectDict) for pWord in self._projDict: self._theDict.add_to_session(pWord) return
def __init__(self, language=config["default_language"], foreign_languages=("german", "french"), engine_order="aspell,myspell"): """Initializaing a spellchecker for the target and a number of frequent "foreign" languages. Attempting to establish relations between word pairs would make little sense if one word was foreign - but e.g. Spanish words are fairly frequent in American English corpora. LDT provides an option to define what foreign languages could be expected to be frequent in the input. They would then be disregarded in subsequent analysis. Args: language (str): the target language, either a full name ("english"), 2-letter code ("en"), or a spellchecker resource code for a specific sublanguage ("en_US"). foreign_languages (tuple): other languages that could be expected to be relatively frequent in the input. Ditto for the format. engine_order (str): pyenchant variable for the order of spellchecmer engine providers. Available providers vary by system. Note: Aspell worked better then hunspell or myspell in our experiments. """ super(Spellchecker, self).__init__(language=language) #: pyenchant engine, to expose its normal pyenchant attributes self.engine = enchant.Broker() #: the order or spellcheck engine providers self.engine_order = engine_order #: setting that order for all the languages self.engine.set_ordering('*', self.engine_order) #: (str): The main language of the spellchecker. self.language = check_language(language) #: (enchant dict object): The spellchecker for the main language. self.target = self._enchant_dict(self.language) #the top-priority provider for the main language self.provider = self.target.provider def _set_language(self, language): """Setter for the language attribute.""" self.language = check_language(language) self.target = self._enchant_dict(self.language) #: list(str): the language(s) to be considered "foreign". self.foreign_languages = [check_language(lang) for lang in foreign_languages] #: list(enchant dict objects): the dicts for the foreign language(s). self.foreign = [] for lang in self.foreign_languages: self.foreign.append(self._enchant_dict(lang))
def __init__(self, view, language='en', prefix='gtkspellchecker', collapse=True, params={}): self._view = view self.collapse = collapse self._view.connect('populate-popup', lambda entry, menu: self._extend_menu(menu)) self._view.connect('popup-menu', self._click_move_popup) self._view.connect('button-press-event', self._click_move_button) self._prefix = prefix if _pygobject: self._misspelled = gtk.TextTag.new('{}-misspelled'\ .format(self._prefix)) else: self._misspelled = gtk.TextTag('{}-misspelled'.format( self._prefix)) self._misspelled.set_property('underline', 4) self._broker = enchant.Broker() for param, value in params.items(): self._broker.set_param(param, value) self.languages = SpellChecker._LanguageList.from_broker(self._broker) if self.languages.exists(language): self._language = language elif self.languages.exists('en'): logger.warning(('no installed dictionary for language "{}", ' 'fallback to english'.format(language))) self._language = 'en' else: if self.languages: self._language = self.languages[0][0] logger.warning( ('no installed dictionary for language "{}" ' 'and english, fallback to first language in' 'language list ("{}")').format(language, self._language)) else: logger.critical('no dictionaries found') raise NoDictionariesFound() self._dictionary = self._broker.request_dict(self._language) self._deferred_check = False self._filters = dict(SpellChecker.DEFAULT_FILTERS) self._regexes = { SpellChecker.FILTER_WORD: re.compile('|'.join(self._filters[SpellChecker.FILTER_WORD])), SpellChecker.FILTER_LINE: re.compile('|'.join(self._filters[SpellChecker.FILTER_LINE])), SpellChecker.FILTER_TEXT: re.compile('|'.join(self._filters[SpellChecker.FILTER_TEXT]), re.MULTILINE) } self._enabled = True self.buffer_initialize()
def __init__(self, language, dictionary_dir, word_list_path): broker = enchant.Broker() broker.set_param("enchant.myspell.dictionary.path", dictionary_dir) logging.info( "Enchant broker param \"enchant.myspell.dictionary.path\" = %s", broker.get_param("enchant.myspell.dictionary.path") ) logging.info("Personal word list file: \"%s\"", word_list_path) self.dict = enchant.DictWithPWL( language, pwl=word_list_path, broker=broker )
def print_enchant_backends_and_languages(): """ Check if PyEnchant is installed. """ w('Enchant (spell checker)... ') try: import enchant w(os.linesep) backends = ', '.join([x.name for x in enchant.Broker().describe()]) print(' available backends: %s' % backends) langs = ', '.join(enchant.list_languages()) print(' available languages: %s' % langs) except ImportError: w('FAIL' + os.linesep)
def _construct_enchant(provider, lang, envs, encoding, variety, suponly): # Get Pology's internal personal dictonary for this language. dictpath, temporary = _compose_personal_dict(lang, envs) if not suponly: try: import enchant except ImportError: pkgs = ["python-enchant"] raise PologyError( _("@info", "Python wrapper for Enchant not found, " "please install it (possible package names: " "%(pkglist)s).", pkglist=format_item_list(pkgs))) # Create Enchant broker. try: broker = enchant.Broker() except Exception, e: raise PologyError( _("@info", "Cannot initialize Enchant:\n%(msg)s", msg=e)) # Find Enchant language. e_langs = filter(broker.dict_exists, [variety, lang]) if e_langs: e_lang = e_langs[0] else: if variety is not None: raise PologyError( _("@info", "Language '%(lang)s' and variety '%(var)s' " "not known to Enchant.", lang=lang, var=variety)) else: raise PologyError( _("@info", "Language '%(lang)s' not known to Enchant.", lang=lang)) # Choose the provider for the selected language. try: broker.set_ordering((e_lang or "*"), provider) except Exception, e: raise PologyError( _("@info", "Cannot configure Enchant for provider '%(pvd)s':\n%(msg)s", pvd=provider, msg=e))
def __init__(self, view, cherrytree_instance, on_rt_node, language='en', prefix='gtkspellchecker', collapse=True, params={}): self._view = view self._cherrytree_instance = cherrytree_instance self.collapse = collapse self._view.connect('populate-popup', lambda entry, menu: self._extend_menu(menu)) self._view.connect('popup-menu', self._click_move_popup) self._view.connect('button-press-event', self._click_move_button) self._prefix = prefix self._misspelled = Gtk.TextTag.new('{}-misspelled'.format( self._prefix)) self._misspelled.set_property('underline', 4) self._broker = enchant.Broker() for param, value in params.items(): self._broker.set_param(param, value) self.languages = SpellChecker._LanguageList.from_broker(self._broker) if self.languages.exists(language): self._language = language elif self.languages.exists('en'): self._language = 'en' else: if self.languages: self._language = self.languages[0][0] else: raise NoDictionariesFound() self._dictionary = self._broker.request_dict(self._language) self._deferred_check = False self._filters = dict(SpellChecker.DEFAULT_FILTERS) self._regexes = { SpellChecker.FILTER_WORD: re.compile('|'.join(self._filters[SpellChecker.FILTER_WORD])), SpellChecker.FILTER_LINE: re.compile('|'.join(self._filters[SpellChecker.FILTER_LINE])), SpellChecker.FILTER_TEXT: re.compile('|'.join(self._filters[SpellChecker.FILTER_TEXT]), re.MULTILINE) } self._enabled = True if on_rt_node: self.buffer_initialize()
def __init__(self, view, language='en', prefix='spellchecker'): self._enabled = True self._view = view self._view.connect('button-press-event', self._button_press_event) self._view.connect('populate-popup', self._populate_popup) self._view.connect('popup-menu', self._popup_menu) self._prefix = prefix self._misspelled = gtk.TextTag(name='%s-misspelled' % (self._prefix)) self._misspelled.set_property('underline', 4) self._language = language self._broker = enchant.Broker() self._dictionary = self._broker.request_dict(language) self._deferred_check = False self._ignore_regex = re.compile('') self._ignore_expressions = [] self.buffer_setup()
def autospel_name(request, inapp, inmodel): """ """ if not request.GET.get('term'): return HttpResponse(content_type='text/plain') q = request.GET.get('term') limit = request.GET.get('limit', 15) try: limit = int(limit) except ValueError: return HttpResponseBadRequest() Foo = apps.get_model(inapp, inmodel) # Initialize Dictionary dict_key = '%s:%s:name' % (inapp, inmodel) CACHE_TIMEOUT = 15 if DICT.has_key(dict_key): # Check if cached dictionary is sufficiently fresh if time.time() - DICT[dict_key]['last_refresh'] > CACHE_TIMEOUT: for o in Foo.objects.all(): if not DICT[dict_key]['dict'].is_added(o.name): DICT[dict_key]['dict'].add(o.name) DICT[dict_key]['last_refresh'] = time.time() else: # Create a dict with all possibilities DICT[dict_key] = {'last_refresh': None, 'dict': None} dict_broker = enchant.Broker() # Start with a blank dict dict_dir = tempfile.mkdtemp() DICT[dict_key]['dict'] = dict_broker.request_pwl_dict( dict_dir + "/enchanting_villages") # Add all the names from the database for o in Foo.objects.all(): DICT[dict_key]['dict'].add(o.name) DICT[dict_key]['last_refresh'] = time.time() foos = DICT[dict_key]['dict'].suggest(q) data = json.dumps(foos) return HttpResponse(data, content_type='application/json')
def _create_checker(providers, langtag, words): try: import enchant except ImportError: pkgs = ["python-enchant"] raise PologyError( _("@info", "Python wrapper for Enchant not found, " "please install it (possible package names: " "%(pkglist)s).", pkglist=format_item_list(pkgs))) if langtag is not None: try: broker = enchant.Broker() if providers is not None: broker.set_ordering(langtag, providers) checker = broker.request_dict(langtag) checker.check(".") except: checker = None else: tmpf = tempfile.NamedTemporaryFile() tmpf.close() checker = enchant.request_pwl_dict(tmpf.name) os.unlink(tmpf.name) if checker: pname = checker.provider.name.split()[0].lower() need_upcasing = (pname in ("personal", "myspell")) for word in words or []: checker.add_to_session(word) if need_upcasing: checker.add_to_session(word[0].upper() + word[1:]) checker.add_to_session(word.upper()) return checker
def autospel_name(request, inapp, inmodel): """ """ if not request.GET.get('q'): return HttpResponse(mimetype='text/plain') q = request.GET.get('q') limit = request.GET.get('limit', 15) try: limit = int(limit) except ValueError: return HttpResponseBadRequest() Foo = get_model( inapp, inmodel ) # Initialize Dictionary dict_key = '%s:%s:name'%(inapp, inmodel) CACHE_TIMEOUT=15 if DICT.has_key(dict_key): # Check if cached dictionary is sufficiently fresh if time.time() - DICT[dict_key]['last_refresh'] > CACHE_TIMEOUT: for o in Foo.objects.all(): if not DICT[dict_key]['dict'].is_added(o.name): DICT[dict_key]['dict'].add(o.name) DICT[dict_key]['last_refresh'] = time.time() else: # Create a dict with all possibilities DICT[dict_key] = { 'last_refresh': None, 'dict': None } dict_broker = enchant.Broker() # Start with a blank dict DICT[dict_key]['dict'] = dict_broker.request_pwl_dict(None) # Add all the names from the database for o in Foo.objects.all(): DICT[dict_key]['dict'].add(o.name) DICT[dict_key]['last_refresh'] = time.time() foos = DICT[dict_key]['dict'].suggest(q) return HttpResponse("%s|\n"%("|\n".join(foos)), mimetype='text/plain')
def __init__(self, config, abbr_expander): super().__init__() self.caps = 0 self.vietnameseMode = True self.config = config self.ui_delegate = UiDelegate(engine=self) custom_broker = enchant.Broker() custom_broker.set_param('enchant.myspell.dictionary.path', DICT_PATH) spellchecker = enchant.DictWithPWL('vi_VN_telex', pwl=PWL_PATH, broker=custom_broker) # FIXME: Catch enchant.errors.DictNotFoundError exception here. english_spellchecker = enchant.Dict('en_US') auto_corrector = AutoCorrector(config, spellchecker, english_spellchecker) self.preedit_backend = PreeditBackend(engine=self, config=config, abbr_expander=abbr_expander, auto_corrector=auto_corrector) self.surrounding_text_backend = SurroundingTextBackend( engine=self, config=config, abbr_expander=abbr_expander, auto_corrector=auto_corrector) # The preedit backend is the default self.backend = self.preedit_backend self.reset()
def __init__(self, language="en", providers="aspell,myspell", basename='analysis', threads=4): self.enchant_broker = enchant.Broker() self.enchant_broker.set_ordering("*", providers) self.enchant = enchant.Dict(language, self.enchant_broker) # Output options self.basename = basename # Finetuning word generation self.max_word_dist = 10 self.max_words = 10 self.more_words = False self.simple_words = False # Finetuning rule generation self.max_rule_len = 10 self.max_rules = 10 self.more_rules = False self.simple_rules = False self.brute_rules = False # Debugging options self.verbose = False self.debug = False self.word = None # Custom word to use. self.quiet = False ######################################################################## # Word and Rule Statistics self.numeric_stats_total = 0 self.special_stats_total = 0 self.foreign_stats_total = 0 ######################################################################## # Preanalysis Password Patterns self.password_pattern = dict() self.password_pattern["insertion"] = re.compile('^[^a-z]*(?P<password>.+?)[^a-z]*$', re.IGNORECASE) self.password_pattern["email"] = re.compile('^(?P<password>.+?)@[A-Z0-9.-]+\.[A-Z]{2,4}', re.IGNORECASE) self.password_pattern["alldigits"] = re.compile('^(\d+)$', re.IGNORECASE) self.password_pattern["allspecial"] = re.compile('^([^a-z0-9]+)$', re.IGNORECASE) ######################################################################## # Hashcat Rules Engine self.hashcat_rule = dict() # Dummy rule self.hashcat_rule[':'] = lambda x: x # Do nothing # Case rules self.hashcat_rule["l"] = lambda x: x.lower() # Lowercase all letters self.hashcat_rule["u"] = lambda x: x.upper() # Capitalize all letters self.hashcat_rule["c"] = lambda x: x.capitalize() # Capitalize the first letter self.hashcat_rule["C"] = lambda x: x[0].lower() + x[ 1:].upper() # Lowercase the first found character, uppercase the rest self.hashcat_rule["t"] = lambda x: x.swapcase() # Toggle the case of all characters in word self.hashcat_rule["T"] = lambda x, y: x[:y] + x[y].swapcase() + x[ y + 1:] # Toggle the case of characters at position N self.hashcat_rule["E"] = lambda x: " ".join( [i[0].upper() + i[1:] for i in x.split(" ")]) # Upper case the first letter and every letter after a space # Rotation rules self.hashcat_rule["r"] = lambda x: x[::-1] # Reverse the entire word self.hashcat_rule["{"] = lambda x: x[1:] + x[0] # Rotate the word left self.hashcat_rule["}"] = lambda x: x[-1] + x[:-1] # Rotate the word right # Duplication rules self.hashcat_rule["d"] = lambda x: x + x # Duplicate entire word self.hashcat_rule["p"] = lambda x, y: x * y # Duplicate entire word N times self.hashcat_rule["f"] = lambda x: x + x[::-1] # Duplicate word reversed self.hashcat_rule["z"] = lambda x, y: x[0] * y + x # Duplicate first character N times self.hashcat_rule["Z"] = lambda x, y: x + x[-1] * y # Duplicate last character N times self.hashcat_rule["q"] = lambda x: "".join([i + i for i in x]) # Duplicate every character self.hashcat_rule["y"] = lambda x, y: x[:y] + x # Duplicate first N characters self.hashcat_rule["Y"] = lambda x, y: x + x[-y:] # Duplicate last N characters # Cutting rules self.hashcat_rule["["] = lambda x: x[1:] # Delete first character self.hashcat_rule["]"] = lambda x: x[:-1] # Delete last character self.hashcat_rule["D"] = lambda x, y: x[:y] + x[y + 1:] # Deletes character at position N self.hashcat_rule["'"] = lambda x, y: x[:y] # Truncate word at position N self.hashcat_rule["x"] = lambda x, y, z: x[:y] + x[y + z:] # Delete M characters, starting at position N self.hashcat_rule["@"] = lambda x, y: x.replace(y, '') # Purge all instances of X # Insertion rules self.hashcat_rule["$"] = lambda x, y: x + y # Append character to end self.hashcat_rule["^"] = lambda x, y: y + x # Prepend character to front self.hashcat_rule["i"] = lambda x, y, z: x[:y] + z + x[y:] # Insert character X at position N # Replacement rules self.hashcat_rule["o"] = lambda x, y, z: x[:y] + z + x[y + 1:] # Overwrite character at position N with X self.hashcat_rule["s"] = lambda x, y, z: x.replace(y, z) # Replace all instances of X with Y self.hashcat_rule["L"] = lambda x, y: x[:y] + chr(ord(x[y]) << 1) + x[ y + 1:] # Bitwise shift left character @ N self.hashcat_rule["R"] = lambda x, y: x[:y] + chr(ord(x[y]) >> 1) + x[ y + 1:] # Bitwise shift right character @ N self.hashcat_rule["+"] = lambda x, y: x[:y] + chr(ord(x[y]) + 1) + x[ y + 1:] # Increment character @ N by 1 ascii value self.hashcat_rule["-"] = lambda x, y: x[:y] + chr(ord(x[y]) - 1) + x[ y + 1:] # Decrement character @ N by 1 ascii value self.hashcat_rule["."] = lambda x, y: x[:y] + x[y + 1] + x[ y + 1:] # Replace character @ N with value at @ N plus 1 self.hashcat_rule[","] = lambda x, y: x[:y] + x[y - 1] + x[ y + 1:] # Replace character @ N with value at @ N minus 1 # Swappping rules self.hashcat_rule["k"] = lambda x: x[1] + x[0] + x[2:] # Swap first two characters self.hashcat_rule["K"] = lambda x: x[:-2] + x[-1] + x[-2] # Swap last two characters self.hashcat_rule["*"] = lambda x, y, z: x[:y] + x[z] + x[y + 1:z] + x[y] + x[z + 1:] if z > y else x[:z] + x[ y] + x[z + 1:y] + x[z] + x[y + 1:] # Swap character X with Y ######################################################################## # Common numeric and special character substitutions (1337 5p34k) self.leet = dict() self.leet["1"] = "i" self.leet["2"] = "z" self.leet["3"] = "e" self.leet["4"] = "a" self.leet["5"] = "s" self.leet["6"] = "b" self.leet["7"] = "t" self.leet["8"] = "b" self.leet["9"] = "g" self.leet["0"] = "o" self.leet["!"] = "i" self.leet["|"] = "i" self.leet["@"] = "a" self.leet["$"] = "s" self.leet["+"] = "t" ######################################################################## # Preanalysis rules to bruteforce for each word self.preanalysis_rules = [] self.preanalysis_rules.append(([], self.hashcat_rule[':'])) # Blank rule self.preanalysis_rules.append((['r'], self.hashcat_rule['r'])) # Reverse rule
def __init__(self): print enchant.Broker().list_dicts() print enchant.Broker().list_languages() self.__spell_checker__ = SpellChecker(lang='en_US')
database = '*' if host is None: host = 'localhost' def gotDefinition(definitions): if not definitions: raise errors.NoDefinitions(u'No definitions for "%s" in "%s"' % (word, database)) for d in definitions: defLines = (line.strip() for line in d.text if line.strip()) yield d.db, u' '.join(defLines) return _dictDo(host, 'define', database, word).addCallback(gotDefinition) _enchantBroker = enchant.Broker() # XXX: there should probably be some way to specify this _enchantBroker.set_ordering('*', 'aspell,ispell,myspell') def spell(word, language): """ Check the spelling of C{word} in C{language} @type word: C{unicode} @type language: C{unicode} @raise errors.InvalidLanguage: If no dictionary for C{language} could be found @rtype: C{list} or C{None}
# enchant hook test import sys import enchant backends = [x.name for x in enchant.Broker().describe()] langs = enchant.list_languages() dicts = [x[0] for x in enchant.list_dicts()] # At least one backend should be available if len(backends) < 1: print('E: No dictionary backend available') exit(1) if len(dicts) < 1: print('W: No dictionary available') print(80 * '-') print('PYTHONPATH: %s' % sys.path) print(80 * '-') print('Backends: ' + ', '.join(backends)) print('Languages: %s' % ', '.join(langs)) print('Dictionaries: %s' % dicts) print(80 * '-') # Try spell checking if English is availale l = 'en_US' if l in langs: d = enchant.Dict(l) print('d.check("hallo") %s' % d.check('hallo')) print('d.check("halllo") %s' % d.check('halllo')) print('d.suggest("halllo") %s' % d.suggest('halllo'))
import enchant except ImportError: enchant = None import six from pylint.interfaces import ITokenChecker, IAstroidChecker from pylint.checkers import BaseTokenChecker from pylint.checkers.utils import check_messages if sys.version_info[0] >= 3: maketrans = str.maketrans else: maketrans = string.maketrans if enchant is not None: br = enchant.Broker() dicts = br.list_dicts() dict_choices = [''] + [d[0] for d in dicts] dicts = ["%s (%s)" % (d[0], d[1].name) for d in dicts] dicts = ", ".join(dicts) instr = "" else: dicts = "none" dict_choices = [''] instr = " To make it working install python-enchant package." table = maketrans("", "") class SpellingChecker(BaseTokenChecker): """Check spelling in comments and docstrings"""
def __init__(self, view, language='en', prefix='gtkspellchecker', collapse=True, params={}): self._view = view self.collapse = collapse self._view.connect('populate-popup', lambda entry, menu:self._extend_menu(menu)) self._view.connect('popup-menu', self._click_move_popup) self._view.connect('button-press-event', self._click_move_button) self._prefix = prefix if _pygobject: self._misspelled = gtk.TextTag.new('{}-misspelled'\ .format(self._prefix)) else: self._misspelled = gtk.TextTag('{}-misspelled'.format(self._prefix)) self._misspelled.set_property('underline', 4) self._broker = enchant.Broker() for param, value in params.items(): self._broker.set_param(param, value) self.languages = SpellChecker._LanguageList.from_broker(self._broker) if self.languages.exists(language): self._language = language elif self.languages.exists('en'): logger.warning(('no installed dictionary for language "{}", ' 'fallback to english'.format(language))) self._language = 'en' else: if self.languages: self._language = self.languages[0][0] logger.warning(('no installed dictionary for language "{}" ' 'and english, fallback to first language in' 'language list ("{}")').format(language, self._language)) else: logger.critical('no dictionaries found') raise NoDictionariesFound() self._dictionary = self._broker.request_dict(self._language) self._deferred_check = False self._filters = dict(SpellChecker.DEFAULT_FILTERS) self._regexes = {SpellChecker.FILTER_WORD : re.compile('|'.join( self._filters[SpellChecker.FILTER_WORD])), SpellChecker.FILTER_LINE : re.compile('|'.join( self._filters[SpellChecker.FILTER_LINE])), SpellChecker.FILTER_TEXT : re.compile('|'.join( self._filters[SpellChecker.FILTER_TEXT]), re.MULTILINE)} self._enabled = True self.buffer_initialize() self.notify_language_change_functions = [] self.frequency_dict = {} pp_pickled = 'pickled_dict' if pp_pickled and os.path.isfile(pp_pickled): f = open(pp_pickled, 'rb') self.frequency_dict = pickle.load(f) f.close() else: pp = get_media_path('wordlists/en_us_wordlist.xml') frequencies = ET.parse(pp) root = frequencies.getroot() for child in root: self.frequency_dict[child.text] = int(child.attrib['f']) f = open('pickled_dict', 'wb+') pickle.dump(self.frequency_dict, f) f.close()