示例#1
0
    def test_porter_snowball(self):
        """test abydos.stemmer.porter (Snowball testset)

        These test cases are from
        http://snowball.tartarus.org/algorithms/porter/diffs.txt
        """
        #  Snowball Porter test set
        with open(TESTDIR+'/corpora/snowball_porter.csv') as snowball_testset:
            next(snowball_testset)
            for line in snowball_testset:
                if line[0] != '#':
                    line = line.strip().split(',')
                    word, stem = line[0], line[1]
                    self.assertEqual(porter(word), stem.lower())
示例#2
0
    def test_porter(self):
        """Test abydos.stemmer.Porter."""
        # base case
        self.assertEqual(self.stmr.stem(''), '')

        # simple cases
        self.assertEqual(self.stmr.stem('c'), 'c')
        self.assertEqual(self.stmr.stem('da'), 'da')
        self.assertEqual(self.stmr.stem('ad'), 'ad')
        self.assertEqual(self.stmr.stem('sing'), 'sing')
        self.assertEqual(self.stmr.stem('singing'), 'sing')

        # missed branch test cases
        self.assertEqual(self.stmr.stem('capitalism'), 'capit')
        self.assertEqual(self.stmr.stem('fatalism'), 'fatal')
        self.assertEqual(self.stmr.stem('stional'), 'stional')
        self.assertEqual(self.stmr.stem('palism'), 'palism')
        self.assertEqual(self.stmr.stem('sization'), 'sizat')
        self.assertEqual(self.stmr.stem('licated'), 'licat')
        self.assertEqual(self.stmr.stem('lical'), 'lical')

        # Test wrapper
        self.assertEqual(porter('singing'), 'sing')
示例#3
0
    def test_porter(self):
        """Test abydos.stemmer.Porter."""
        # base case
        self.assertEqual(self.stmr.stem(''), '')

        # simple cases
        self.assertEqual(self.stmr.stem('c'), 'c')
        self.assertEqual(self.stmr.stem('da'), 'da')
        self.assertEqual(self.stmr.stem('ad'), 'ad')
        self.assertEqual(self.stmr.stem('sing'), 'sing')
        self.assertEqual(self.stmr.stem('singing'), 'sing')

        # missed branch test cases
        self.assertEqual(self.stmr.stem('capitalism'), 'capit')
        self.assertEqual(self.stmr.stem('fatalism'), 'fatal')
        self.assertEqual(self.stmr.stem('stional'), 'stional')
        self.assertEqual(self.stmr.stem('palism'), 'palism')
        self.assertEqual(self.stmr.stem('sization'), 'sizat')
        self.assertEqual(self.stmr.stem('licated'), 'licat')
        self.assertEqual(self.stmr.stem('lical'), 'lical')

        # Test wrapper
        self.assertEqual(porter('singing'), 'sing')
示例#4
0
    def test_porter_early_english(self):
        """Test abydos.stemmer.porter (early English)."""
        # base case
        self.assertEqual(porter('', early_english=True), '')

        # simple cases (no different from regular stemmer)
        self.assertEqual(porter('c', early_english=True), 'c')
        self.assertEqual(porter('da', early_english=True), 'da')
        self.assertEqual(porter('ad', early_english=True), 'ad')
        self.assertEqual(porter('sing', early_english=True), 'sing')
        self.assertEqual(porter('singing', early_english=True), 'sing')

        # make
        self.assertEqual(porter('make', early_english=True), 'make')
        self.assertEqual(porter('makes', early_english=True), 'make')
        self.assertEqual(porter('maketh', early_english=True), 'make')
        self.assertEqual(porter('makest', early_english=True), 'make')

        # say
        self.assertEqual(porter('say', early_english=True), 'sai')
        self.assertEqual(porter('says', early_english=True), 'sai')
        self.assertEqual(porter('sayeth', early_english=True), 'sai')
        self.assertEqual(porter('sayest', early_english=True), 'sai')

        # missed branch test cases
        self.assertEqual(porter('best', early_english=True), 'best')
        self.assertEqual(porter('meth', early_english=True), 'meth')
示例#5
0
    def test_porter_early_english(self):
        """test abydos.stemmer.porter (early English)
        """
        # base case
        self.assertEqual(porter('', early_english=True), '')

        # simple cases (no different from regular stemmer)
        self.assertEqual(porter('c', early_english=True), 'c')
        self.assertEqual(porter('da', early_english=True), 'da')
        self.assertEqual(porter('ad', early_english=True), 'ad')
        self.assertEqual(porter('sing', early_english=True), 'sing')
        self.assertEqual(porter('singing', early_english=True), 'sing')

        # make
        self.assertEqual(porter('make', early_english=True), 'make')
        self.assertEqual(porter('makes', early_english=True), 'make')
        self.assertEqual(porter('maketh', early_english=True), 'make')
        self.assertEqual(porter('makest', early_english=True), 'make')

        # say
        self.assertEqual(porter('say', early_english=True), 'sai')
        self.assertEqual(porter('says', early_english=True), 'sai')
        self.assertEqual(porter('sayeth', early_english=True), 'sai')
        self.assertEqual(porter('sayest', early_english=True), 'sai')

        # missed branch test cases
        self.assertEqual(porter('best', early_english=True), 'best')
        self.assertEqual(porter('meth', early_english=True), 'meth')