Пример #1
0
    def test_clear_caches_persistance(self):
        temp_dir = tempfile.mkdtemp()
        try:
            h1 = Hunspell('test',
                hunspell_data_dir=DICT_DIR,
                disk_cache_dir=temp_dir,
                cache_manager='disk_hun')
            test_suggest = h1.suggest('testing')
            test_stem = h1.stem('testing')

            h1._suggest_cache['made-up'] = test_suggest
            self.assertEqual(h1.suggest('made-up'), test_suggest)
            h1._stem_cache['made-up'] = test_stem
            self.assertEqual(h1.stem('made-up'), test_stem)

            h1.save_cache()
            h1.clear_cache()
            del h1

            cacheman = get_cache_manager('disk_hun')
            cacheman.deregister_all_caches()
            self.assertEqual(len(cacheman.cache_by_name), 0)

            h2 = Hunspell('test',
                hunspell_data_dir=DICT_DIR,
                disk_cache_dir=temp_dir,
                cache_manager='disk_hun')

            self.assertEqual(len(h2._suggest_cache), 0)
            self.assertEqual(len(h2._stem_cache), 0)
            self.assertNotEqual(h2.suggest('made-up'), test_suggest)
            self.assertNotEqual(h2.stem('made-up'), test_stem)
        finally:
            shutil.rmtree(temp_dir) # Nuke temp content
def __stem(speller: Hunspell, plural: str) -> [str]:

    stems = list()
    for stem in speller.stem(plural):
        stem = stem.replace("ij", "ij")
        if len(plural) - len(stem) <= 3:
            ps = pluralize(stem)
            if ps == plural:
                stems.append(stem)

    return stems
Пример #3
0
    def test_non_overlapping_caches(self):
        test_suggest = self.h.suggest('testing')
        test_stem = self.h.stem('testing')

        self.h._suggest_cache['made-up'] = test_suggest
        self.assertEqual(self.h.suggest('made-up'), test_suggest)
        self.h._stem_cache['made-up'] = test_stem
        self.assertEqual(self.h.stem('made-up'), test_stem)

        h2 = Hunspell('en_US', hunspell_data_dir=DICT_DIR)
        self.assertNotEqual(h2.suggest('made-up'), test_suggest)
        self.assertNotEqual(h2.stem('made-up'), test_stem)
Пример #4
0
def test_clear_caches_persistance(hunspell):
    temp_dir = tempfile.mkdtemp()
    try:
        h1 = Hunspell('test',
                      hunspell_data_dir=DICT_DIR,
                      disk_cache_dir=temp_dir,
                      cache_manager='disk_hun')
        test_suggest = h1.suggest('testing')
        test_suffix = h1.suffix_suggest('testing')
        test_stem = h1.stem('testing')

        h1._suggest_cache['made-up'] = test_suggest
        assert h1.suggest('made-up') == test_suggest
        h1._suffix_cache['made-up'] = test_suffix
        assert h1.suffix_suggest('made-up') == test_suffix
        h1._stem_cache['made-up'] = test_stem
        assert h1.stem('made-up') == test_stem

        h1.save_cache()
        h1.clear_cache()
        del h1

        cacheman = get_cache_manager('disk_hun')
        cacheman.deregister_all_caches()
        assert len(cacheman.cache_by_name) == 0

        h2 = Hunspell('test',
                      hunspell_data_dir=DICT_DIR,
                      disk_cache_dir=temp_dir,
                      cache_manager='disk_hun')

        assert len(h2._suggest_cache) == 0
        assert len(h2._stem_cache) == 0
        assert h2.suggest('made-up') != test_suggest
        assert h2.suffix_suggest('made-up') != test_suffix
        assert h2.stem('made-up') != test_stem
    finally:
        shutil.rmtree(temp_dir)  # Nuke temp content
Пример #5
0
def test_non_overlapping_caches(hunspell):
    test_suggest = hunspell.suggest('testing')
    test_suffix = hunspell.suffix_suggest('testing')
    test_stem = hunspell.stem('testing')

    hunspell._suggest_cache['made-up'] = test_suggest
    assert hunspell.suggest('made-up') == test_suggest
    hunspell._suffix_cache['made-up'] = test_suffix
    assert hunspell.suffix_suggest('made-up') == test_suffix
    hunspell._stem_cache['made-up'] = test_stem
    assert hunspell.stem('made-up') == test_stem

    h2 = Hunspell('en_US', hunspell_data_dir=DICT_DIR)
    assert h2.suggest('made-up') != test_suggest
    assert h2.stem('made-up') != test_stem
Пример #6
0
def test_clear_caches_non_peristance(hunspell):
    test_suggest = hunspell.suggest('testing')
    test_suffix = hunspell.suffix_suggest('testing')
    test_stem = hunspell.stem('testing')

    hunspell._suggest_cache['made-up'] = test_suggest
    assert hunspell.suggest('made-up') == test_suggest
    hunspell._suffix_cache['made-up'] = test_suffix
    assert hunspell.suffix_suggest('made-up') == test_suffix
    hunspell._stem_cache['made-up'] = test_stem
    assert hunspell.stem('made-up') == test_stem

    hunspell.clear_cache()

    del hunspell
    hunspell = Hunspell('test', hunspell_data_dir=DICT_DIR)
    assert hunspell.suggest('made-up') != test_suggest
    assert hunspell.suffix_suggest('made-up') != test_suffix
    assert hunspell.stem('made-up') != test_stem
Пример #7
0
class HindiLanguage(StopWordsFromFileMixIn):
    """Hindi language support module."""

    __slots__ = [
        # Stop words map
        '__stop_words_map',

        # Hunspell instance
        '__hindi_hunspell',

        # Word tokenizer
        '__treebank_tokenizer',
    ]

    def __init__(self):
        """Constructor."""
        super().__init__()

        self.__treebank_tokenizer = TreebankWordTokenizer()

        hunspell_dict_dir = os.path.join(
            os.path.dirname(os.path.abspath(__file__)),
            'hindi-hunspell',
            'dict-hi_IN',
        )
        if not os.path.isdir(hunspell_dict_dir):
            raise McLanguageException(
                "Hunspell dictionary directory does not exist at path: %s." % hunspell_dict_dir
            )

        if not os.path.isfile(os.path.join(hunspell_dict_dir, 'hi_IN.dic')):
            raise McLanguageException("Hunspell dictionary file does not exist at path: %s" % hunspell_dict_dir)
        if not os.path.isfile(os.path.join(hunspell_dict_dir, 'hi_IN.aff')):
            raise McLanguageException("Hunspell affix file does not exist at path: %s" % hunspell_dict_dir)

        try:
            self.__hindi_hunspell = Hunspell(lang='hi_IN', hunspell_data_dir=hunspell_dict_dir)
        except Exception as ex:
            raise McLanguageException(
                "Unable to initialize Hunspell with data directory '%s': %s" % (hunspell_dict_dir, str(ex),)
            )

        # Quick self-test to make sure that Hunspell is installed and dictionary is available
        hunspell_exc_message = """
            Hunspell self-test failed; make sure that Hunspell is installed and dictionaries are accessible, e.g.
            you might need to fetch Git submodules by running:

                git submodule update --init --recursive
        """
        try:
            test_stems = self.stem_words(['गुरुओं'])
        except Exception as _:
            raise McLanguageException(hunspell_exc_message)
        else:
            if len(test_stems) == 0 or test_stems[0] != 'गुरु':
                raise McLanguageException(hunspell_exc_message)

    @staticmethod
    def language_code() -> str:
        return "hi"

    @staticmethod
    def sample_sentence() -> str:
        return (
            "ऋषियों को सताने वाले दुष्ट राक्षसों के राजा रावण का सर्वनाश करने वाले "
            "विष्णुवतार भगवान श्रीराम, अयोध्या के महाराज दशरथ के बड़े सपुत्र थे।"
        )

    def stem_words(self, words: List[str]) -> List[str]:
        words = decode_object_from_bytes_if_needed(words)
        if words is None:
            raise McLanguageException("Words to stem is None.")

        stems = []

        for word in words:
            if word is None or len(word) == 0:
                log.debug("Word is empty or None.")
                stem = word
            else:
                term_stems = self.__hindi_hunspell.stem(word)
                if len(term_stems) > 0:
                    stem = term_stems[0]

                    if stem is None or len(stem) == 0:
                        log.debug("Stem for word '%s' is empty or None." % word)
                        stem = word

                else:
                    log.debug("Stem for word '%s' was not found." % word)
                    stem = word

            stems.append(stem)

        if len(words) != len(stems):
            log.warning("Stem count is not the same as word count; words: %s; stems: %s" % (str(words), str(stems),))

        return stems

    def split_text_to_sentences(self, text: str) -> List[str]:
        text = decode_object_from_bytes_if_needed(text)
        if text is None:
            log.warning("Text is None.")
            return []

        # Replace Hindi's "।" with line break to make tokenizer split on both "।" and period
        text = text.replace("।", "।\n\n")

        # No non-breaking prefixes in Hausa, so using English file
        en = EnglishLanguage()
        return en.split_text_to_sentences(text)

    def split_sentence_to_words(self, sentence: str) -> List[str]:
        sentence = decode_object_from_bytes_if_needed(sentence)
        if sentence is None:
            log.warning("Sentence is None.")
            return []

        # Normalize apostrophe so that "it’s" and "it's" get treated identically
        sentence = sentence.replace("’", "'")

        # Replace Hindi's "।" with line break to make tokenizer split on both "।" and period
        sentence = sentence.replace("।", ".")

        # TweetTokenizer / sentence_splitter don't work with Hindi for whatever reason, and word_tokenize() would
        # require NLTK data to be installed which is time consuming on Travis
        tokens = self.__treebank_tokenizer.tokenize(sentence)

        def is_word(token_: str) -> bool:
            """Returns True if token looks like a word."""
            if re.match(pattern=r'\w', string=token_, flags=re.UNICODE):
                return True
            else:
                return False

        # TweetTokenizer leaves punctuation in-place
        tokens = [token for token in tokens if is_word(token)]

        return tokens
Пример #8
0
 def test_hunspell_stem(self):
     d = Hunspell('en_US', hunspell_data_dir=DICT_DIR)
     self.assertListEqual(d.stem('dog'), ['dog'])
     self.assertListEqual(d.stem('permanently'), ['permanent'])
     del d
Пример #9
0
 def test_hunspell_stem(self):
     d = Hunspell('en_US', hunspell_data_dir=DICT_DIR)
     self.assertListEqual(d.stem('dog'), ['dog'])
     self.assertListEqual(d.stem('permanently'), ['permanent'])
     del d
Пример #10
0
class HunspellTest(unittest.TestCase):
    def assertRegexpSearch(self, *args, **kwargs):
        if PY3:
            self.assertRegex(*args, **kwargs)
        else:
            self.assertRegexpMatches(*args, **kwargs)

    def setUp(self):
        self.h = Hunspell('test', hunspell_data_dir=DICT_DIR)

    def tearDown(self):
        try:
            del self.h
        except AttributeError:
            pass

    def assertAllIn(self, checked, expected):
        self.assertTrue(all(x in expected for x in checked),
            u"{} not all found in {}".format(checked, expected))

    def test_create_destroy(self):
        del self.h

    def test_missing_dict(self):
        with self.assertRaises(HunspellFilePathError):
            Hunspell('not_avail', hunspell_data_dir=DICT_DIR)

    @patch('os.path.isfile', return_value=True)
    @patch('os.access', return_value=True)
    def test_bad_path_encoding(self, *mocks):
        if PY3:
            with self.assertRaises(HunspellFilePathError):
                Hunspell('not_checked',
                    hunspell_data_dir=u'bad/\udcc3/decoding')
        else:
            # Python 2 just make an illegal string instead of raising
            with captured_c_stderr_file() as caperr:
                Hunspell('not_checked',
                    hunspell_data_dir=u'bad/\udcc3/decoding')
                with open(caperr, 'r') as err:
                    self.assertRegexpSearch(err.read(), r'error:[^\n]*bad/[^\n]*/decoding')

    @patch('hunspell.hunspell.WIN32_LONG_PATH_PREFIX', '/not/valid')
    def test_windows_utf_8_encoding_applies_prefix(self, *mocks):
        with captured_c_stderr_file() as caperr:
            with patch("os.name", 'nt'):
                # If python file existance checks used prefix, this would raise a HunspellFilePathError
                Hunspell('test', system_encoding='UTF-8')
            with open(caperr, 'r') as err:
                # But the Hunspell library lookup had the prefix applied
                self.assertRegexpSearch(err.read(), r'error:[^\n]*/not/valid[^\n]*')

    def test_spell(self):
        self.assertFalse(self.h.spell('dpg'))
        self.assertTrue(self.h.spell('dog'))

    def test_spell_utf8(self):
        self.assertTrue(self.h.spell(u'café'))
        self.assertFalse(self.h.spell(u'uncafé'))

    def test_spell_empty(self):
        self.assertTrue(self.h.spell(''))

    def test_suggest(self):
        required = ('dog', 'pg', 'deg', 'dig', 'dpt', 'dug', 'mpg', 'd pg')
        suggest = self.h.suggest('dpg')
        self.assertIsInstance(suggest, tuple)
        self.assertAllIn(required, suggest)

    def test_suggest_utf8(self):
        required = (u'café', u'Cerf')
        for variant in ('cefé', u'cefé'):
            suggest = self.h.suggest(variant)
            self.assertIsInstance(suggest, tuple)
            self.assertAllIn(required, suggest)

    def test_suggest_empty(self):
        self.assertEqual(self.h.suggest(''), ())

    def test_stem(self):
        self.assertEqual(self.h.stem('dog'), ('dog',))
        self.assertEqual(self.h.stem('permanently'), ('permanent',))

    def test_add(self):
        word = 'outofvocabularyword'
        self.assertEqual(self.h.spell(word), False)
        self.h.add(word)
        self.assertEqual(self.h.spell(word), True)
        typo = word + 'd'
        self.assertAllIn([word], self.h.suggest(typo))

    def test_bulk_suggest(self):
        self.h.set_concurrency(3)
        suggest = self.h.bulk_suggest(['dog', 'dpg'])
        self.assertEqual(sorted(suggest.keys()), ['dog', 'dpg'])
        self.assertIsInstance(suggest['dog'], tuple)
        self.assertAllIn(('dog',), suggest['dog'])

        required = ('dog', 'pg', 'deg', 'dig', 'dpt', 'dug', 'mpg', 'd pg')
        self.assertIsInstance(suggest['dpg'], tuple)
        self.assertAllIn(required, suggest['dpg'])

        checked = ['bjn', 'dog', 'dpg', 'dyg', 'foo', 'frg', 'opg', 'pgg', 'qre', 'twg']
        suggest = self.h.bulk_suggest(checked)
        self.assertEqual(sorted(suggest.keys()), checked)

    def test_bulk_stem(self):
        self.h.set_concurrency(3)
        self.assertDictEqual(self.h.bulk_stem(['dog', 'permanently']), {
            'permanently': ('permanent',),
            'dog': ('dog',)
        })
        self.assertDictEqual(self.h.bulk_stem(['dog', 'twigs', 'permanently', 'unrecorded']), {
            'unrecorded': ('recorded',),
            'permanently': ('permanent',),
            'twigs': ('twig',),
            'dog': ('dog',)
        })

    def test_non_overlapping_caches(self):
        test_suggest = self.h.suggest('testing')
        test_stem = self.h.stem('testing')

        self.h._suggest_cache['made-up'] = test_suggest
        self.assertEqual(self.h.suggest('made-up'), test_suggest)
        self.h._stem_cache['made-up'] = test_stem
        self.assertEqual(self.h.stem('made-up'), test_stem)

        h2 = Hunspell('en_US', hunspell_data_dir=DICT_DIR)
        self.assertNotEqual(h2.suggest('made-up'), test_suggest)
        self.assertNotEqual(h2.stem('made-up'), test_stem)

    def test_overlapping_caches(self):
        test_suggest = self.h.suggest('testing')
        test_stem = self.h.stem('testing')

        self.h._suggest_cache['made-up'] = test_suggest
        self.assertEqual(self.h.suggest('made-up'), test_suggest)
        self.h._stem_cache['made-up'] = test_stem
        self.assertEqual(self.h.stem('made-up'), test_stem)

        del self.h
        self.h = Hunspell('test', hunspell_data_dir=DICT_DIR)
        self.assertEqual(self.h.suggest('made-up'), test_suggest)
        self.assertEqual(self.h.stem('made-up'), test_stem)

    def test_save_caches_persistance(self):
        temp_dir = tempfile.mkdtemp()
        try:
            h1 = Hunspell('test',
                hunspell_data_dir=DICT_DIR,
                disk_cache_dir=temp_dir,
                cache_manager='disk_hun')
            test_suggest = h1.suggest('testing')
            test_stem = h1.stem('testing')

            h1._suggest_cache['made-up'] = test_suggest
            self.assertEqual(h1.suggest('made-up'), test_suggest)
            h1._stem_cache['made-up'] = test_stem
            self.assertEqual(h1.stem('made-up'), test_stem)

            h1.save_cache()
            del h1

            cacheman = get_cache_manager('disk_hun')
            cacheman.deregister_all_caches()
            self.assertEqual(len(cacheman.cache_by_name), 0)

            h2 = Hunspell('test',
                hunspell_data_dir=DICT_DIR,
                disk_cache_dir=temp_dir,
                cache_manager='disk_hun')

            self.assertNotEqual(len(h2._suggest_cache), 0)
            self.assertNotEqual(len(h2._stem_cache), 0)
            self.assertEqual(h2.suggest('made-up'), test_suggest)
            self.assertEqual(h2.stem('made-up'), test_stem)
        finally:
            shutil.rmtree(temp_dir) # Nuke temp content

    def test_clear_caches_persistance(self):
        temp_dir = tempfile.mkdtemp()
        try:
            h1 = Hunspell('test',
                hunspell_data_dir=DICT_DIR,
                disk_cache_dir=temp_dir,
                cache_manager='disk_hun')
            test_suggest = h1.suggest('testing')
            test_stem = h1.stem('testing')

            h1._suggest_cache['made-up'] = test_suggest
            self.assertEqual(h1.suggest('made-up'), test_suggest)
            h1._stem_cache['made-up'] = test_stem
            self.assertEqual(h1.stem('made-up'), test_stem)

            h1.save_cache()
            h1.clear_cache()
            del h1

            cacheman = get_cache_manager('disk_hun')
            cacheman.deregister_all_caches()
            self.assertEqual(len(cacheman.cache_by_name), 0)

            h2 = Hunspell('test',
                hunspell_data_dir=DICT_DIR,
                disk_cache_dir=temp_dir,
                cache_manager='disk_hun')

            self.assertEqual(len(h2._suggest_cache), 0)
            self.assertEqual(len(h2._stem_cache), 0)
            self.assertNotEqual(h2.suggest('made-up'), test_suggest)
            self.assertNotEqual(h2.stem('made-up'), test_stem)
        finally:
            shutil.rmtree(temp_dir) # Nuke temp content

    def test_clear_caches_non_peristance(self):
        test_suggest = self.h.suggest('testing')
        test_stem = self.h.stem('testing')

        self.h._suggest_cache['made-up'] = test_suggest
        self.assertEqual(self.h.suggest('made-up'), test_suggest)
        self.h._stem_cache['made-up'] = test_stem
        self.assertEqual(self.h.stem('made-up'), test_stem)

        self.h.clear_cache()

        del self.h
        self.h = Hunspell('test', hunspell_data_dir=DICT_DIR)
        self.assertNotEqual(self.h.suggest('made-up'), test_suggest)
        self.assertNotEqual(self.h.stem('made-up'), test_stem)
Пример #11
0
class HunspellTest(unittest.TestCase):
    def setUp(self):
        self.h = Hunspell('test', hunspell_data_dir=DICT_DIR)

    def tearDown(self):
        try:
            del self.h
        except AttributeError:
            pass

    def assertAllIn(self, checked, expected):
        self.assertTrue(all(x in expected for x in checked),
                        u"{} not all found in {}".format(checked, expected))

    def test_create_destroy(self):
        del self.h

    def test_missing_dict(self):
        with self.assertRaises(IOError):
            Hunspell('not_avail', hunspell_data_dir=DICT_DIR)

    def test_spell(self):
        self.assertFalse(self.h.spell('dpg'))
        self.assertTrue(self.h.spell('dog'))

    def test_spell_utf8(self):
        self.assertTrue(self.h.spell(u'café'))
        self.assertFalse(self.h.spell(u'uncafé'))

    def test_spell_empty(self):
        self.assertTrue(self.h.spell(''))

    def test_suggest(self):
        required = ('dog', 'pg', 'deg', 'dig', 'dpt', 'dug', 'mpg', 'd pg')
        suggest = self.h.suggest('dpg')
        self.assertIsInstance(suggest, tuple)
        self.assertAllIn(required, suggest)

    def test_suggest_utf8(self):
        required = (u'café', u'Cerf')
        for variant in ('cefé', u'cefé'):
            suggest = self.h.suggest(variant)
            self.assertIsInstance(suggest, tuple)
            self.assertAllIn(required, suggest)

    def test_suggest_empty(self):
        self.assertEqual(self.h.suggest(''), ())

    def test_stem(self):
        self.assertEqual(self.h.stem('dog'), ('dog', ))
        self.assertEqual(self.h.stem('permanently'), ('permanent', ))

    def test_bulk_suggest(self):
        self.h.set_concurrency(3)
        suggest = self.h.bulk_suggest(['dog', 'dpg'])
        self.assertEqual(sorted(suggest.keys()), ['dog', 'dpg'])
        self.assertIsInstance(suggest['dog'], tuple)
        self.assertAllIn(('dog', ), suggest['dog'])

        required = ('dog', 'pg', 'deg', 'dig', 'dpt', 'dug', 'mpg', 'd pg')
        self.assertIsInstance(suggest['dpg'], tuple)
        self.assertAllIn(required, suggest['dpg'])

        checked = [
            'bjn', 'dog', 'dpg', 'dyg', 'foo', 'frg', 'opg', 'pgg', 'qre',
            'twg'
        ]
        suggest = self.h.bulk_suggest(checked)
        self.assertEqual(sorted(suggest.keys()), checked)

    def test_bulk_stem(self):
        self.h.set_concurrency(3)
        self.assertDictEqual(self.h.bulk_stem(['dog', 'permanently']), {
            'permanently': ('permanent', ),
            'dog': ('dog', )
        })
        self.assertDictEqual(
            self.h.bulk_stem(['dog', 'twigs', 'permanently', 'unrecorded']), {
                'unrecorded': ('recorded', ),
                'permanently': ('permanent', ),
                'twigs': ('twig', ),
                'dog': ('dog', )
            })
Пример #12
0
class HunspellTest(unittest.TestCase):
    def setUp(self):
        self.h = Hunspell('test', hunspell_data_dir=DICT_DIR)

    def tearDown(self):
        try:
            del self.h
        except AttributeError:
            pass

    def assertAllIn(self, checked, expected):
        self.assertTrue(all(x in expected for x in checked),
            u"{} not all found in {}".format(checked, expected))

    def test_create_destroy(self):
        del self.h

    def test_missing_dict(self):
        with self.assertRaises(IOError):
            Hunspell('not_avail', hunspell_data_dir=DICT_DIR)

    def test_spell(self):
        self.assertFalse(self.h.spell('dpg'))
        self.assertTrue(self.h.spell('dog'))

    def test_spell_utf8(self):
        self.assertTrue(self.h.spell(u'café'))
        self.assertFalse(self.h.spell(u'uncafé'))

    def test_spell_empty(self):
        self.assertTrue(self.h.spell(''))

    def test_suggest(self):
        required = ('dog', 'pg', 'deg', 'dig', 'dpt', 'dug', 'mpg', 'd pg')
        suggest = self.h.suggest('dpg')
        self.assertIsInstance(suggest, tuple)
        self.assertAllIn(required, suggest)

    def test_suggest_utf8(self):
        required = (u'café', u'Cerf')
        for variant in ('cefé', u'cefé'):
            suggest = self.h.suggest(variant)
            self.assertIsInstance(suggest, tuple)
            self.assertAllIn(required, suggest)

    def test_suggest_empty(self):
        self.assertEqual(self.h.suggest(''), ())

    def test_stem(self):
        self.assertEqual(self.h.stem('dog'), ('dog',))
        self.assertEqual(self.h.stem('permanently'), ('permanent',))

    def test_bulk_suggest(self):
        self.h.set_concurrency(3)
        suggest = self.h.bulk_suggest(['dog', 'dpg'])
        self.assertEqual(sorted(suggest.keys()), ['dog', 'dpg'])
        self.assertIsInstance(suggest['dog'], tuple)
        self.assertAllIn(('dog',), suggest['dog'])

        required = ('dog', 'pg', 'deg', 'dig', 'dpt', 'dug', 'mpg', 'd pg')
        self.assertIsInstance(suggest['dpg'], tuple)
        self.assertAllIn(required, suggest['dpg'])

        checked = ['bjn', 'dog', 'dpg', 'dyg', 'foo', 'frg', 'opg', 'pgg', 'qre', 'twg']
        suggest = self.h.bulk_suggest(checked)
        self.assertEqual(sorted(suggest.keys()), checked)

    def test_bulk_stem(self):
        self.h.set_concurrency(3)
        self.assertDictEqual(self.h.bulk_stem(['dog', 'permanently']), {
            'permanently': ('permanent',),
            'dog': ('dog',)
        })
        self.assertDictEqual(self.h.bulk_stem(['dog', 'twigs', 'permanently', 'unrecorded']), {
            'unrecorded': ('recorded',),
            'permanently': ('permanent',),
            'twigs': ('twig',),
            'dog': ('dog',)
        })