示例#1
0
    def checker(self, id, slang_dict, stop_words, emoji_dict, restart=False):
        # get enchant dict, instantiate MMSt
        broker = enchant.Broker()
        d = broker.request_dict("en_US")
        g = MMST(d, slang_dict, stop_words, emoji_dict)

        # open input file, get output file path
        input = open(self.input + '_{}'.format(id), "r")
        output_path = self.output + '_{}'.format(id)

        # check how many lines have already been processed in a previous run
        if restart or not os.path.isfile(output_path):
            start = 0
            open_mode = "w+"
        else:
            start = sum(1 for line in open(output_path))
            open_mode = "a+"

        # process remaining lines
        with open(output_path, open_mode) as f:
            for line in islice(input, start, None):
                try:
                    tmp = g.input_sentence(line, self.load, verbose=False)
                    f.write(tmp)
                except IndexError:
                    print("ERROR: " + line)
示例#2
0
    def __init__(self, exec_by_ibus):
        engine_name = "bogo"
        long_engine_name = "BoGo"
        author = "BoGo Development Team <*****@*****.**>"
        description = "ibus-bogo for IBus"
        version = "0.4"
        license = "GPLv3"

        self.component = \
            IBus.Component.new("org.freedesktop.IBus.BoGo",
                               description,
                               version,
                               license,
                               author,
                               "https://github.com/BoGoEngine/ibus-bogo",
                               "/usr/bin/exec",
                               "ibus-bogo")

        engine = IBus.EngineDesc(
            name=engine_name,
            longname=long_engine_name,
            description=description,
            language="vi",
            license=license,
            author=author,
            icon=current_path + "/data/ibus-bogo-dev.svg",
            # icon = "ibus-bogo",
            layout="default")

        self.component.add_engine(engine)
        self.mainloop = GObject.MainLoop()
        self.bus = IBus.Bus()
        self.bus.connect("disconnected", self.bus_disconnected_cb)

        self.engine_count = 0
        self.factory = IBus.Factory.new(self.bus.get_connection())
        self.factory.connect("create-engine", self.create_engine)

        CONFIG_DIR = os.path.expanduser("~/.config/ibus-bogo/")
        self.config = Config()
        self.abbr_expander = AbbreviationExpander(config=self.config)
        self.abbr_expander.watch_file(CONFIG_DIR + "/abbr_rules.json")

        if exec_by_ibus:
            self.bus.request_name("org.freedesktop.IBus.BoGo", 0)
        else:
            self.bus.register_component(self.component)
            self.bus.set_global_engine_async("bogo", -1, None, None, None)
        custom_broker = enchant.Broker()
        custom_broker.set_param('enchant.myspell.dictionary.path', DICT_PATH)

        spellchecker = enchant.DictWithPWL('vi_VN_telex',
                                           pwl=PWL_PATH,
                                           broker=custom_broker)

        # FIXME: Catch enchant.errors.DictNotFoundError exception here.
        english_spellchecker = enchant.Dict('en_US')

        self.auto_corrector = AutoCorrector(self.config, spellchecker,
                                            english_spellchecker)
示例#3
0
    def spell_check(self, sentence, tokens):
        """Takes sentence and tokens as input where sentence must be a string and tokens must be a list of strings.
        Checks the spelling of each word in the sentence and provides suggestions for the misspelled words and replaces
        the word with the one chosen from suggestions"""
        d = enchant.Dict("en_US")
        b = enchant.Broker()
        b.set_ordering(
            "en_US", "aspell, myspell, ispell"
        )  # Set the ordering of the dictionaries to be used while spell-check.
        d.add(
            'ambivert'
        )  # Add a word to your personal dictionary if you don't want it to be spell-checked.

        isCorrect = []
        self.spellSuggestions = []
        for token in tokens:
            if token.isalpha():
                isCorrect.append(d.check(token))
                if not d.check(token):
                    z = sentence.find(token)
                    self.spellSuggestions = d.suggest(token)
                    print "\nSuggestions: ", self.spellSuggestions
                    y = int(
                        raw_input(
                            "\nWhich suggestion nos. did you find correct?\n"))
                    sentence = sentence.replace(sentence[z:z + len(token)],
                                                self.spellSuggestions[y - 1])
        return sentence
示例#4
0
    def setLanguage(self, theLang, projectDict=None):
        """Load a dictionary for the language specified in the config.
        If that fails, we load a mock dictionary so that lookups don't
        crash.
        """
        try:
            import enchant
            if self._theBroker is not None:
                logger.debug("Deleting old pyenchant broker")
                del self._theBroker

            self._theBroker = enchant.Broker()
            self._theDict = self._theBroker.request_dict(theLang)
            self._spellLanguage = theLang
            logger.debug("Enchant spell checking for language '%s' loaded", theLang)

        except Exception:
            logger.error("Failed to load enchant spell checking for language '%s'", theLang)
            self._theDict = FakeEnchant()
            self._spellLanguage = None

        self._readProjectDictionary(projectDict)
        for pWord in self._projDict:
            self._theDict.add_to_session(pWord)

        return
示例#5
0
文件: custom.py 项目: danielbis/ldt
    def __init__(self, language=config["default_language"],
                 foreign_languages=("german", "french"),
                 engine_order="aspell,myspell"):
        """Initializaing a spellchecker for the target and a number of
        frequent "foreign" languages.

        Attempting to establish relations between word pairs would make
        little sense if one word was foreign - but e.g. Spanish words are
        fairly frequent in American English corpora. LDT provides an option to
        define what foreign languages could be expected to be frequent in
        the input. They would then be disregarded in subsequent analysis.

        Args:
            language (str): the target language, either a full name
                ("english"), 2-letter code ("en"), or a spellchecker
                resource code for a specific sublanguage ("en_US").
            foreign_languages (tuple): other languages that could be expected
                to be relatively frequent in the input. Ditto for the format.
            engine_order (str): pyenchant variable for the order of
                spellchecmer engine providers. Available providers vary by
                system.

        Note:
             Aspell worked better then hunspell or myspell in our experiments.
        """

        super(Spellchecker, self).__init__(language=language)

        #: pyenchant engine, to expose its normal pyenchant attributes
        self.engine = enchant.Broker()

        #: the order or spellcheck engine providers
        self.engine_order = engine_order

        #: setting that order for all the languages
        self.engine.set_ordering('*', self.engine_order)

        #: (str): The main language of the spellchecker.
        self.language = check_language(language)

        #: (enchant dict object): The spellchecker for the main language.
        self.target = self._enchant_dict(self.language)

        #the top-priority provider for the main language
        self.provider = self.target.provider

        def _set_language(self, language):
            """Setter for the language attribute."""
            self.language = check_language(language)
            self.target = self._enchant_dict(self.language)

        #: list(str): the language(s) to be considered "foreign".
        self.foreign_languages = [check_language(lang) for
                                  lang in foreign_languages]

        #: list(enchant dict objects): the dicts for the foreign language(s).
        self.foreign = []
        for lang in self.foreign_languages:
            self.foreign.append(self._enchant_dict(lang))
示例#6
0
 def __init__(self,
              view,
              language='en',
              prefix='gtkspellchecker',
              collapse=True,
              params={}):
     self._view = view
     self.collapse = collapse
     self._view.connect('populate-popup',
                        lambda entry, menu: self._extend_menu(menu))
     self._view.connect('popup-menu', self._click_move_popup)
     self._view.connect('button-press-event', self._click_move_button)
     self._prefix = prefix
     if _pygobject:
         self._misspelled = gtk.TextTag.new('{}-misspelled'\
                                            .format(self._prefix))
     else:
         self._misspelled = gtk.TextTag('{}-misspelled'.format(
             self._prefix))
     self._misspelled.set_property('underline', 4)
     self._broker = enchant.Broker()
     for param, value in params.items():
         self._broker.set_param(param, value)
     self.languages = SpellChecker._LanguageList.from_broker(self._broker)
     if self.languages.exists(language):
         self._language = language
     elif self.languages.exists('en'):
         logger.warning(('no installed dictionary for language "{}", '
                         'fallback to english'.format(language)))
         self._language = 'en'
     else:
         if self.languages:
             self._language = self.languages[0][0]
             logger.warning(
                 ('no installed dictionary for language "{}" '
                  'and english, fallback to first language in'
                  'language list ("{}")').format(language, self._language))
         else:
             logger.critical('no dictionaries found')
             raise NoDictionariesFound()
     self._dictionary = self._broker.request_dict(self._language)
     self._deferred_check = False
     self._filters = dict(SpellChecker.DEFAULT_FILTERS)
     self._regexes = {
         SpellChecker.FILTER_WORD:
         re.compile('|'.join(self._filters[SpellChecker.FILTER_WORD])),
         SpellChecker.FILTER_LINE:
         re.compile('|'.join(self._filters[SpellChecker.FILTER_LINE])),
         SpellChecker.FILTER_TEXT:
         re.compile('|'.join(self._filters[SpellChecker.FILTER_TEXT]),
                    re.MULTILINE)
     }
     self._enabled = True
     self.buffer_initialize()
示例#7
0
 def __init__(self, language, dictionary_dir, word_list_path):
     broker = enchant.Broker()
     broker.set_param("enchant.myspell.dictionary.path", dictionary_dir)
     logging.info(
         "Enchant broker param \"enchant.myspell.dictionary.path\" = %s", 
         broker.get_param("enchant.myspell.dictionary.path")
         )
     logging.info("Personal word list file: \"%s\"", word_list_path)
     self.dict = enchant.DictWithPWL(
         language,
         pwl=word_list_path,
         broker=broker
         )
示例#8
0
def print_enchant_backends_and_languages():
    """
    Check if PyEnchant is installed.
    """
    w('Enchant (spell checker)... ')
    try:
        import enchant
        w(os.linesep)
        backends = ', '.join([x.name for x in enchant.Broker().describe()])
        print('  available backends: %s' % backends)
        langs = ', '.join(enchant.list_languages())
        print('  available languages: %s' % langs)
    except ImportError:
        w('FAIL' + os.linesep)
示例#9
0
def _construct_enchant(provider, lang, envs, encoding, variety, suponly):

    # Get Pology's internal personal dictonary for this language.
    dictpath, temporary = _compose_personal_dict(lang, envs)

    if not suponly:
        try:
            import enchant
        except ImportError:
            pkgs = ["python-enchant"]
            raise PologyError(
                _("@info", "Python wrapper for Enchant not found, "
                  "please install it (possible package names: "
                  "%(pkglist)s).",
                  pkglist=format_item_list(pkgs)))

        # Create Enchant broker.
        try:
            broker = enchant.Broker()
        except Exception, e:
            raise PologyError(
                _("@info", "Cannot initialize Enchant:\n%(msg)s", msg=e))

        # Find Enchant language.
        e_langs = filter(broker.dict_exists, [variety, lang])
        if e_langs:
            e_lang = e_langs[0]
        else:
            if variety is not None:
                raise PologyError(
                    _("@info", "Language '%(lang)s' and variety '%(var)s' "
                      "not known to Enchant.",
                      lang=lang,
                      var=variety))
            else:
                raise PologyError(
                    _("@info",
                      "Language '%(lang)s' not known to Enchant.",
                      lang=lang))

        # Choose the provider for the selected language.
        try:
            broker.set_ordering((e_lang or "*"), provider)
        except Exception, e:
            raise PologyError(
                _("@info",
                  "Cannot configure Enchant for provider '%(pvd)s':\n%(msg)s",
                  pvd=provider,
                  msg=e))
示例#10
0
 def __init__(self,
              view,
              cherrytree_instance,
              on_rt_node,
              language='en',
              prefix='gtkspellchecker',
              collapse=True,
              params={}):
     self._view = view
     self._cherrytree_instance = cherrytree_instance
     self.collapse = collapse
     self._view.connect('populate-popup',
                        lambda entry, menu: self._extend_menu(menu))
     self._view.connect('popup-menu', self._click_move_popup)
     self._view.connect('button-press-event', self._click_move_button)
     self._prefix = prefix
     self._misspelled = Gtk.TextTag.new('{}-misspelled'.format(
         self._prefix))
     self._misspelled.set_property('underline', 4)
     self._broker = enchant.Broker()
     for param, value in params.items():
         self._broker.set_param(param, value)
     self.languages = SpellChecker._LanguageList.from_broker(self._broker)
     if self.languages.exists(language):
         self._language = language
     elif self.languages.exists('en'):
         self._language = 'en'
     else:
         if self.languages:
             self._language = self.languages[0][0]
         else:
             raise NoDictionariesFound()
     self._dictionary = self._broker.request_dict(self._language)
     self._deferred_check = False
     self._filters = dict(SpellChecker.DEFAULT_FILTERS)
     self._regexes = {
         SpellChecker.FILTER_WORD:
         re.compile('|'.join(self._filters[SpellChecker.FILTER_WORD])),
         SpellChecker.FILTER_LINE:
         re.compile('|'.join(self._filters[SpellChecker.FILTER_LINE])),
         SpellChecker.FILTER_TEXT:
         re.compile('|'.join(self._filters[SpellChecker.FILTER_TEXT]),
                    re.MULTILINE)
     }
     self._enabled = True
     if on_rt_node: self.buffer_initialize()
示例#11
0
    def __init__(self, view, language='en', prefix='spellchecker'):

        self._enabled = True
        self._view = view
        self._view.connect('button-press-event', self._button_press_event)
        self._view.connect('populate-popup', self._populate_popup)
        self._view.connect('popup-menu', self._popup_menu)
        self._prefix = prefix
        self._misspelled = gtk.TextTag(name='%s-misspelled' % (self._prefix))
        self._misspelled.set_property('underline', 4)
        self._language = language
        self._broker = enchant.Broker()
        self._dictionary = self._broker.request_dict(language)
        self._deferred_check = False
        self._ignore_regex = re.compile('')
        self._ignore_expressions = []
        self.buffer_setup()
示例#12
0
def autospel_name(request, inapp, inmodel):
    """
	"""
    if not request.GET.get('term'):
        return HttpResponse(content_type='text/plain')

    q = request.GET.get('term')
    limit = request.GET.get('limit', 15)
    try:
        limit = int(limit)
    except ValueError:
        return HttpResponseBadRequest()
    Foo = apps.get_model(inapp, inmodel)
    # Initialize Dictionary
    dict_key = '%s:%s:name' % (inapp, inmodel)
    CACHE_TIMEOUT = 15
    if DICT.has_key(dict_key):
        # Check if cached dictionary is sufficiently fresh
        if time.time() - DICT[dict_key]['last_refresh'] > CACHE_TIMEOUT:
            for o in Foo.objects.all():
                if not DICT[dict_key]['dict'].is_added(o.name):
                    DICT[dict_key]['dict'].add(o.name)
            DICT[dict_key]['last_refresh'] = time.time()
    else:
        # Create a dict with all possibilities
        DICT[dict_key] = {'last_refresh': None, 'dict': None}
        dict_broker = enchant.Broker()
        # Start with a blank dict
        dict_dir = tempfile.mkdtemp()
        DICT[dict_key]['dict'] = dict_broker.request_pwl_dict(
            dict_dir + "/enchanting_villages")
        # Add all the names from the database
        for o in Foo.objects.all():
            DICT[dict_key]['dict'].add(o.name)
        DICT[dict_key]['last_refresh'] = time.time()

    foos = DICT[dict_key]['dict'].suggest(q)
    data = json.dumps(foos)
    return HttpResponse(data, content_type='application/json')
示例#13
0
def _create_checker(providers, langtag, words):

    try:
        import enchant
    except ImportError:
        pkgs = ["python-enchant"]
        raise PologyError(
            _("@info", "Python wrapper for Enchant not found, "
              "please install it (possible package names: "
              "%(pkglist)s).",
              pkglist=format_item_list(pkgs)))

    if langtag is not None:
        try:
            broker = enchant.Broker()
            if providers is not None:
                broker.set_ordering(langtag, providers)
            checker = broker.request_dict(langtag)
            checker.check(".")
        except:
            checker = None
    else:
        tmpf = tempfile.NamedTemporaryFile()
        tmpf.close()
        checker = enchant.request_pwl_dict(tmpf.name)
        os.unlink(tmpf.name)

    if checker:
        pname = checker.provider.name.split()[0].lower()
        need_upcasing = (pname in ("personal", "myspell"))
        for word in words or []:
            checker.add_to_session(word)
            if need_upcasing:
                checker.add_to_session(word[0].upper() + word[1:])
                checker.add_to_session(word.upper())
    return checker
示例#14
0
def autospel_name(request, inapp, inmodel):
	"""
	"""
	if not request.GET.get('q'):
		return HttpResponse(mimetype='text/plain')

	q = request.GET.get('q')
	limit = request.GET.get('limit', 15)
	try:
		limit = int(limit)
	except ValueError:
		return HttpResponseBadRequest()
	Foo = get_model( inapp, inmodel )
	# Initialize Dictionary
	dict_key = '%s:%s:name'%(inapp, inmodel)
	CACHE_TIMEOUT=15
	if DICT.has_key(dict_key):
		# Check if cached dictionary is sufficiently fresh
		if time.time() - DICT[dict_key]['last_refresh'] > CACHE_TIMEOUT:
			for o in Foo.objects.all():
				if not DICT[dict_key]['dict'].is_added(o.name):
					DICT[dict_key]['dict'].add(o.name)
			DICT[dict_key]['last_refresh'] = time.time()
	else:
		# Create a dict with all possibilities
		DICT[dict_key] = { 'last_refresh': None, 'dict': None }
		dict_broker = enchant.Broker()
		# Start with a blank dict
		DICT[dict_key]['dict'] = dict_broker.request_pwl_dict(None)
		# Add all the names from the database
		for o in Foo.objects.all():
			DICT[dict_key]['dict'].add(o.name)
		DICT[dict_key]['last_refresh'] = time.time()

	foos = DICT[dict_key]['dict'].suggest(q)
	return HttpResponse("%s|\n"%("|\n".join(foos)), mimetype='text/plain')
示例#15
0
    def __init__(self, config, abbr_expander):
        super().__init__()

        self.caps = 0
        self.vietnameseMode = True

        self.config = config
        self.ui_delegate = UiDelegate(engine=self)

        custom_broker = enchant.Broker()
        custom_broker.set_param('enchant.myspell.dictionary.path', DICT_PATH)

        spellchecker = enchant.DictWithPWL('vi_VN_telex',
                                           pwl=PWL_PATH,
                                           broker=custom_broker)

        # FIXME: Catch enchant.errors.DictNotFoundError exception here.
        english_spellchecker = enchant.Dict('en_US')

        auto_corrector = AutoCorrector(config, spellchecker,
                                       english_spellchecker)

        self.preedit_backend = PreeditBackend(engine=self,
                                              config=config,
                                              abbr_expander=abbr_expander,
                                              auto_corrector=auto_corrector)

        self.surrounding_text_backend = SurroundingTextBackend(
            engine=self,
            config=config,
            abbr_expander=abbr_expander,
            auto_corrector=auto_corrector)

        # The preedit backend is the default
        self.backend = self.preedit_backend
        self.reset()
示例#16
0
    def __init__(self, language="en", providers="aspell,myspell", basename='analysis', threads=4):

        self.enchant_broker = enchant.Broker()
        self.enchant_broker.set_ordering("*", providers)

        self.enchant = enchant.Dict(language, self.enchant_broker)

        # Output options
        self.basename = basename

        # Finetuning word generation
        self.max_word_dist = 10
        self.max_words = 10
        self.more_words = False
        self.simple_words = False

        # Finetuning rule generation
        self.max_rule_len = 10
        self.max_rules = 10
        self.more_rules = False
        self.simple_rules = False
        self.brute_rules = False

        # Debugging options
        self.verbose = False
        self.debug = False
        self.word = None  # Custom word to use.
        self.quiet = False

        ########################################################################
        # Word and Rule Statistics
        self.numeric_stats_total = 0
        self.special_stats_total = 0
        self.foreign_stats_total = 0

        ########################################################################
        # Preanalysis Password Patterns
        self.password_pattern = dict()
        self.password_pattern["insertion"] = re.compile('^[^a-z]*(?P<password>.+?)[^a-z]*$', re.IGNORECASE)
        self.password_pattern["email"] = re.compile('^(?P<password>.+?)@[A-Z0-9.-]+\.[A-Z]{2,4}', re.IGNORECASE)
        self.password_pattern["alldigits"] = re.compile('^(\d+)$', re.IGNORECASE)
        self.password_pattern["allspecial"] = re.compile('^([^a-z0-9]+)$', re.IGNORECASE)

        ########################################################################
        # Hashcat Rules Engine
        self.hashcat_rule = dict()

        # Dummy rule
        self.hashcat_rule[':'] = lambda x: x  # Do nothing

        # Case rules
        self.hashcat_rule["l"] = lambda x: x.lower()  # Lowercase all letters
        self.hashcat_rule["u"] = lambda x: x.upper()  # Capitalize all letters
        self.hashcat_rule["c"] = lambda x: x.capitalize()  # Capitalize the first letter
        self.hashcat_rule["C"] = lambda x: x[0].lower() + x[
                                                          1:].upper()  # Lowercase the first found character, uppercase the rest
        self.hashcat_rule["t"] = lambda x: x.swapcase()  # Toggle the case of all characters in word
        self.hashcat_rule["T"] = lambda x, y: x[:y] + x[y].swapcase() + x[
                                                                        y + 1:]  # Toggle the case of characters at position N
        self.hashcat_rule["E"] = lambda x: " ".join(
            [i[0].upper() + i[1:] for i in x.split(" ")])  # Upper case the first letter and every letter after a space

        # Rotation rules
        self.hashcat_rule["r"] = lambda x: x[::-1]  # Reverse the entire word
        self.hashcat_rule["{"] = lambda x: x[1:] + x[0]  # Rotate the word left
        self.hashcat_rule["}"] = lambda x: x[-1] + x[:-1]  # Rotate the word right

        # Duplication rules
        self.hashcat_rule["d"] = lambda x: x + x  # Duplicate entire word
        self.hashcat_rule["p"] = lambda x, y: x * y  # Duplicate entire word N times
        self.hashcat_rule["f"] = lambda x: x + x[::-1]  # Duplicate word reversed
        self.hashcat_rule["z"] = lambda x, y: x[0] * y + x  # Duplicate first character N times
        self.hashcat_rule["Z"] = lambda x, y: x + x[-1] * y  # Duplicate last character N times
        self.hashcat_rule["q"] = lambda x: "".join([i + i for i in x])  # Duplicate every character
        self.hashcat_rule["y"] = lambda x, y: x[:y] + x  # Duplicate first N characters
        self.hashcat_rule["Y"] = lambda x, y: x + x[-y:]  # Duplicate last N characters

        # Cutting rules
        self.hashcat_rule["["] = lambda x: x[1:]  # Delete first character
        self.hashcat_rule["]"] = lambda x: x[:-1]  # Delete last character
        self.hashcat_rule["D"] = lambda x, y: x[:y] + x[y + 1:]  # Deletes character at position N
        self.hashcat_rule["'"] = lambda x, y: x[:y]  # Truncate word at position N
        self.hashcat_rule["x"] = lambda x, y, z: x[:y] + x[y + z:]  # Delete M characters, starting at position N
        self.hashcat_rule["@"] = lambda x, y: x.replace(y, '')  # Purge all instances of X

        # Insertion rules
        self.hashcat_rule["$"] = lambda x, y: x + y  # Append character to end
        self.hashcat_rule["^"] = lambda x, y: y + x  # Prepend character to front
        self.hashcat_rule["i"] = lambda x, y, z: x[:y] + z + x[y:]  # Insert character X at position N

        # Replacement rules
        self.hashcat_rule["o"] = lambda x, y, z: x[:y] + z + x[y + 1:]  # Overwrite character at position N with X
        self.hashcat_rule["s"] = lambda x, y, z: x.replace(y, z)  # Replace all instances of X with Y
        self.hashcat_rule["L"] = lambda x, y: x[:y] + chr(ord(x[y]) << 1) + x[
                                                                            y + 1:]  # Bitwise shift left character @ N
        self.hashcat_rule["R"] = lambda x, y: x[:y] + chr(ord(x[y]) >> 1) + x[
                                                                            y + 1:]  # Bitwise shift right character @ N
        self.hashcat_rule["+"] = lambda x, y: x[:y] + chr(ord(x[y]) + 1) + x[
                                                                           y + 1:]  # Increment character @ N by 1 ascii value
        self.hashcat_rule["-"] = lambda x, y: x[:y] + chr(ord(x[y]) - 1) + x[
                                                                           y + 1:]  # Decrement character @ N by 1 ascii value
        self.hashcat_rule["."] = lambda x, y: x[:y] + x[y + 1] + x[
                                                                 y + 1:]  # Replace character @ N with value at @ N plus 1
        self.hashcat_rule[","] = lambda x, y: x[:y] + x[y - 1] + x[
                                                                 y + 1:]  # Replace character @ N with value at @ N minus 1

        # Swappping rules
        self.hashcat_rule["k"] = lambda x: x[1] + x[0] + x[2:]  # Swap first two characters
        self.hashcat_rule["K"] = lambda x: x[:-2] + x[-1] + x[-2]  # Swap last two characters
        self.hashcat_rule["*"] = lambda x, y, z: x[:y] + x[z] + x[y + 1:z] + x[y] + x[z + 1:] if z > y else x[:z] + x[
            y] + x[z + 1:y] + x[z] + x[y + 1:]  # Swap character X with Y

        ########################################################################
        # Common numeric and special character substitutions (1337 5p34k)
        self.leet = dict()
        self.leet["1"] = "i"
        self.leet["2"] = "z"
        self.leet["3"] = "e"
        self.leet["4"] = "a"
        self.leet["5"] = "s"
        self.leet["6"] = "b"
        self.leet["7"] = "t"
        self.leet["8"] = "b"
        self.leet["9"] = "g"
        self.leet["0"] = "o"
        self.leet["!"] = "i"
        self.leet["|"] = "i"
        self.leet["@"] = "a"
        self.leet["$"] = "s"
        self.leet["+"] = "t"

        ########################################################################
        # Preanalysis rules to bruteforce for each word
        self.preanalysis_rules = []
        self.preanalysis_rules.append(([], self.hashcat_rule[':']))  # Blank rule
        self.preanalysis_rules.append((['r'], self.hashcat_rule['r']))  # Reverse rule
示例#17
0
文件: spell_helper.py 项目: nvhuy/LM
 def __init__(self):
     print enchant.Broker().list_dicts()
     print enchant.Broker().list_languages()
     self.__spell_checker__ = SpellChecker(lang='en_US')
示例#18
0
        database = '*'
    if host is None:
        host = 'localhost'

    def gotDefinition(definitions):
        if not definitions:
            raise errors.NoDefinitions(u'No definitions for "%s" in "%s"' % (word, database))

        for d in definitions:
            defLines = (line.strip() for line in d.text if line.strip())
            yield d.db, u' '.join(defLines)

    return _dictDo(host, 'define', database, word).addCallback(gotDefinition)


_enchantBroker = enchant.Broker()
# XXX: there should probably be some way to specify this
_enchantBroker.set_ordering('*', 'aspell,ispell,myspell')

def spell(word, language):
    """
    Check the spelling of C{word} in C{language}

    @type word: C{unicode}

    @type language: C{unicode}

    @raise errors.InvalidLanguage: If no dictionary for C{language} could be
        found

    @rtype: C{list} or C{None}
示例#19
0
# enchant hook test
import sys
import enchant

backends = [x.name for x in enchant.Broker().describe()]
langs = enchant.list_languages()
dicts = [x[0] for x in enchant.list_dicts()]

# At least one backend should be available
if len(backends) < 1:
    print('E: No dictionary backend available')
    exit(1)

if len(dicts) < 1:
    print('W: No dictionary available')

print(80 * '-')
print('PYTHONPATH: %s' % sys.path)
print(80 * '-')
print('Backends: ' + ', '.join(backends))
print('Languages: %s' % ', '.join(langs))
print('Dictionaries: %s' % dicts)
print(80 * '-')

# Try spell checking if English is availale
l = 'en_US'
if l in langs:
    d = enchant.Dict(l)
    print('d.check("hallo") %s' % d.check('hallo'))
    print('d.check("halllo") %s' % d.check('halllo'))
    print('d.suggest("halllo") %s' % d.suggest('halllo'))
示例#20
0
    import enchant
except ImportError:
    enchant = None
import six

from pylint.interfaces import ITokenChecker, IAstroidChecker
from pylint.checkers import BaseTokenChecker
from pylint.checkers.utils import check_messages

if sys.version_info[0] >= 3:
    maketrans = str.maketrans
else:
    maketrans = string.maketrans

if enchant is not None:
    br = enchant.Broker()
    dicts = br.list_dicts()
    dict_choices = [''] + [d[0] for d in dicts]
    dicts = ["%s (%s)" % (d[0], d[1].name) for d in dicts]
    dicts = ", ".join(dicts)
    instr = ""
else:
    dicts = "none"
    dict_choices = ['']
    instr = " To make it working install python-enchant package."

table = maketrans("", "")


class SpellingChecker(BaseTokenChecker):
    """Check spelling in comments and docstrings"""
示例#21
0
    def __init__(self, view, language='en', prefix='gtkspellchecker',
                 collapse=True, params={}):

        self._view = view
        self.collapse = collapse
        self._view.connect('populate-popup',
                           lambda entry, menu:self._extend_menu(menu))
        self._view.connect('popup-menu', self._click_move_popup)
        self._view.connect('button-press-event', self._click_move_button)
        self._prefix = prefix
        if _pygobject:
            self._misspelled = gtk.TextTag.new('{}-misspelled'\
                                               .format(self._prefix))
        else:
            self._misspelled = gtk.TextTag('{}-misspelled'.format(self._prefix))
        self._misspelled.set_property('underline', 4)
        self._broker = enchant.Broker()
        for param, value in params.items(): self._broker.set_param(param, value)
        self.languages = SpellChecker._LanguageList.from_broker(self._broker)
        if self.languages.exists(language):
            self._language = language
        elif self.languages.exists('en'):
            logger.warning(('no installed dictionary for language "{}", '
                            'fallback to english'.format(language)))
            self._language = 'en'
        else:
            if self.languages:
                self._language = self.languages[0][0]
                logger.warning(('no installed dictionary for language "{}" '
                                'and english, fallback to first language in'
                                'language list ("{}")').format(language,
                                                                self._language))
            else:
                logger.critical('no dictionaries found')
                raise NoDictionariesFound()
        self._dictionary = self._broker.request_dict(self._language)
        self._deferred_check = False
        self._filters = dict(SpellChecker.DEFAULT_FILTERS)
        self._regexes = {SpellChecker.FILTER_WORD : re.compile('|'.join(
                             self._filters[SpellChecker.FILTER_WORD])),
                         SpellChecker.FILTER_LINE : re.compile('|'.join(
                             self._filters[SpellChecker.FILTER_LINE])),
                         SpellChecker.FILTER_TEXT : re.compile('|'.join(
                             self._filters[SpellChecker.FILTER_TEXT]),
                                                               re.MULTILINE)}
        self._enabled = True
        self.buffer_initialize()

        self.notify_language_change_functions = []

        self.frequency_dict = {}
        pp_pickled = 'pickled_dict'
        if pp_pickled and os.path.isfile(pp_pickled):
            f = open(pp_pickled, 'rb')
            self.frequency_dict = pickle.load(f)
            f.close()
        else:
            pp = get_media_path('wordlists/en_us_wordlist.xml')
            frequencies = ET.parse(pp)
            root = frequencies.getroot()
            for child in root:
                self.frequency_dict[child.text] = int(child.attrib['f'])
            f = open('pickled_dict', 'wb+')
            pickle.dump(self.frequency_dict, f)
            f.close()