def test_wordcount(self):
        words, rest = passwordmetrics._find_words('correcthorseb!wdbatterystaplerWd3t', self.words)
        self.assertEqual(words, set(['battery', 'horse', 'correct', 'staple']))
        self.assertEqual(rest, 'b!wdrWd3t')

        words, rest = passwordmetrics._find_words('abcdefgh', self.words)
        self.assertEqual(words, {'a'})
        self.assertEqual(rest, 'bcdefgh')

        # Duplicate words count only as one word:
        words, rest = passwordmetrics._find_words('batterybattery', self.words)
        self.assertEqual(words, {'battery'})
    def test_substitutions(self):
        # This long password gets a reasonable score on characters alone
        self.assertAlmostEqual(passwordmetrics._character_entropy('Tr0ub4dor&3')[0], 65.54588851677637)
        # But much worse when considering it uses a word, even though it's misspelled and only
        # appears once in the whole corpus (because I put it there).
        self.assertAlmostEqual(passwordmetrics.metrics('Tr0ub4dor&3')['entropy'], 42.11714046349914)
        # Although still better than if there was no substitutions in the word
        self.assertAlmostEqual(passwordmetrics.metrics('Troubador&3')['entropy'], 31.332505617941614)

        words, rest = passwordmetrics._find_words('batterybattery', self.words)
        self.assertEquals(rest, '')

        words, rest = passwordmetrics._find_words('b4tteryb4ttery', self.words)
        self.assertEquals(rest, '4')

        words, rest = passwordmetrics._find_words('b4ttery8attery', self.words)
        self.assertEquals(rest, '48')