Пример #1
0
    def test_transformation(self):
        sent2 = 'The quick brown fox jumps over the lazy dog. '
        data_sample = SASample({'x': sent2, 'y': "negative"})
        import random
        random.seed(100)
        swap_ins = MLM(device='cpu')

        x = swap_ins.transform(data_sample, n=5)
        self.assertEqual(5, len(x))
        tokens = []
        for _sample in x:
            self.assertTrue(
                _sample.get_words('x')[:2] == data_sample.get_words('x')[:2])
            self.assertTrue(
                _sample.get_words('x')[3] == data_sample.get_words('x')[3])
            self.assertTrue(
                _sample.get_words('x')[-5:] == data_sample.get_words('x')[-5:])
            tokens.append(
                _sample.get_words('x')[2] + _sample.get_words('x')[4])

        self.assertTrue(5 == len(set(tokens)))

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        special_sample = swap_ins.transform(special_sample)[0]
        self.assertEqual('epilogue "\'', special_sample.get_text('x'))
Пример #2
0
    def test_transformation(self):
        sent1 = "The quick brown fox jumps over the lazy dog."
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = TwitterType()
        x = swap_ins.transform(data_sample, n=5)

        self.assertEqual(5, len(x))
        change = []
        for sample in x:
            origin = data_sample.get_words('x')
            trans = sample.get_words('x')
            change.append(sample.get_text('x'))
            self.assertTrue(origin == trans[1:] or origin == trans[:-1])

        self.assertTrue(5 == len(set(x)))

        # test special input
        special_sample = SASample({'x': '', 'y': "negative"})
        x = swap_ins.transform(special_sample)
        self.assertEqual(1, len(x))
        x = x[0]
        self.assertTrue(x.get_text('x') != '')
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        x = swap_ins.transform(special_sample)
        self.assertEqual(1, len(x))
        x = x[0]
        self.assertTrue(x.get_text('x') != '~!@#$%^7890"\'')
Пример #3
0
    def test_transformation(self):
        import random
        random.seed(100)
        sent1 = 'The quick brown fox jumps over the lazy dog.'
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = SpellingError()
        x = swap_ins.transform(data_sample, n=5)

        change = []
        for sample in x:
            origin = data_sample.get_words('x')
            trans = sample.get_words('x')
            self.assertEqual(origin[0], trans[0])
            self.assertEqual(origin[2:7], trans[2:7])
            change.append(trans[1] + trans[7] + trans[8])
            self.assertTrue(trans[1] != origin[1])
            self.assertTrue(trans[7] != origin[7])
            self.assertTrue(trans[8] != origin[8])

        self.assertTrue(5 == len(set(change)))

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
Пример #4
0
    def test_transformation(self):
        sent1 = 'Lionel Messi is a football player from Argentina. ' \
                'Fudan University is located in Shanghai province, ' \
                'Alibaba with 50000 staff. Wang Xiao is a stuendent. ' \
                'Zhangheng road in Pudong area.'
        data_sample = SASample({'x': sent1, 'y': "positive"})
        swap_ins = SwapNamedEnt()

        # test decompose_entities_info
        words = data_sample.get_words('x')
        a, b, c = swap_ins.decompose_entities_info(data_sample.get_ner('x'))
        for pos, word, label in zip(a, b, c):
            self.assertTrue(label in ['LOCATION', 'PERSON', 'ORGANIZATION'])
            self.assertEqual(words[pos[0]:pos[1]], word.split(' '))

        # test transformation
        import random
        random.seed(208)

        trans = swap_ins.transform(data_sample, n=5)
        self.assertEqual(5, len(trans))
        change = ["Mr Ross is a football player from Tashkent. Fudan Unive"
                  "rsity is located in South Zone province, "
                  "Zagreb with 50000 staff. Jean Chrétien is a stuendent. Zhan"
                  "gheng road in  Czech Republic area.",
                  "Mr Ross is a football player from Tashkent. Fudan Univer"
                  "sity is located in South Zone province, "
                  "Zagreb with 50000 staff. Jean Chrétien is a stuendent. "
                  "Zhangheng road in the valley area.",
                  "Mr Ross is a football player from Tashkent. Fudan University"
                  " is located in South Zone province, "
                  "Zagreb with 50000 staff. Jean Chrétien is a stuendent. "
                  "Zhangheng road in Parvan area.",
                  "Mr Ross is a football player from Tashkent. Fudan University"
                  " is located in South Zone province, "
                  "Zagreb with 50000 staff. "
                  "Jean Chrétien is a stuendent. Zhangheng road in East-West "
                  "area.",
                  "Mr Ross is a football player from Tashkent. Fudan "
                  "University is located in east Atlantic province, "
                  "Prague with 50000 staff. Mr Mayoral is a stuendent. "
                  "Zhangheng road in West Midlands area."]
        for sample, sent in zip(trans, change):
            self.assertTrue("positive", sample.get_value('y'))
            self.assertEqual(sent, sample.get_text('x'))

        # test special sample
        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
Пример #5
0
    def test_transformation(self):
        # test wrong mode
        self.assertRaises(ValueError, Prejudice, 'Loc', 'woman')
        self.assertRaises(ValueError, Prejudice, 'Name', 'Japan')
        self.assertRaises(ValueError, Prejudice, 'Loc', 'Ja')
        self.assertRaises(ValueError, Prejudice, 'Loc', ['Ja'])

        import random
        random.seed(100)

        sent1 = "Interesting and moving performances by Tom Courtenay " \
                "and Peter Finch"
        swap_ins = Prejudice(change_type='Name', prejudice_tendency='woman')

        data_sample = SASample({'x': sent1, 'y': "negative"})
        x = swap_ins.transform(data_sample, n=5)

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))

        change = []
        for sa_sample in x:
            self.assertEqual(
                data_sample.get_words('x')[:5],
                sa_sample.get_words('x')[:5])
            self.assertEqual(
                data_sample.get_words('x')[6:8],
                sa_sample.get_words('x')[6:8])
            self.assertEqual(
                data_sample.get_words('x')[-1],
                sa_sample.get_words('x')[-1])
            change.append(
                sa_sample.get_words('x')[5] + sa_sample.get_words('x')[8])

        self.assertTrue(5 == len(set(change)))
Пример #6
0
    def test_transformation(self):
        sent1 = 'The quick brown fox jumps over the lazy dog .'
        data_sample = SASample({'x': sent1, 'y': "negative"})

        # test wrong model
        self.assertRaises(ValueError, WordCase, 'random little')

        # test model
        test_case = WordCase()
        self.assertTrue(
            test_case.case_type in ['upper', 'lower', 'title', 'random'])

        # test lower
        self.assertEqual(
            [word.lower() for word in data_sample.get_words('x')],
            WordCase('lower').transform(data_sample)[0].get_words('x'))

        # test upper
        self.assertEqual(
            [word.upper() for word in data_sample.get_words('x')],
            WordCase('upper').transform(data_sample)[0].get_words('x'))

        # test title
        self.assertEqual(
            [word.title() for word in data_sample.get_words('x')],
            WordCase('title').transform(data_sample)[0].get_words('x'))

        # test special case
        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual(
            '',
            WordCase('lower').transform(special_sample)[0].get_text('x'))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual(
            '~!@#$%^7890"\'',
            WordCase('lower').transform(special_sample)[0].get_text('x'))
Пример #7
0
    def test_transformation(self):
        sent1 = 'The fast brown fox jumps over the lazy dog .'
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = SwapAntWordNet()
        x = swap_ins.transform(data_sample, n=5)
        self.assertTrue(1 == len(x))

        for sample in x:
            origin = data_sample.get_words('x')
            trans = sample.get_words('x')
            self.assertEqual(origin[0], trans[0])
            self.assertTrue(origin[1] != trans[1])
            self.assertEqual(origin[2:], trans[2:])

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
Пример #8
0
    def test_transformation(self):
        import random
        random.seed(1)
        sent1 = 'The quick brown fox jumps over the lazy dog .'
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = SwapSynWordNet()
        x = swap_ins.transform(data_sample, n=5)

        self.assertTrue(5 == len(x))
        for sample in x:
            cnt = 0
            for i, j in zip(sample.get_words('x'), data_sample.get_words('x')):
                if i != j:
                    cnt += 1
            self.assertTrue(cnt == 1)

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
Пример #9
0
    def test_transformation(self):

        sent1 = 'The quick brown fox jumps over the lazy dog .'
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = Tense()
        x = swap_ins.transform(data_sample, n=3)

        self.assertTrue(3 == len(x))
        change = []
        for sample in x:
            origin = data_sample.get_words('x')
            trans = sample.get_words('x')
            self.assertEqual(origin[:4], trans[:4])
            self.assertEqual(origin[5:], trans[5:])
            change.append(trans[4])
            self.assertTrue(trans[4] != origin[4])

        # test special input
        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
Пример #10
0
    def test_transformation(self):
        import random
        random.seed(1)
        sent1 = "There are no water in bottom."
        data_sample = SASample({'x': sent1, 'y': "negative"})
        swap_ins = SwapSynWordEmbedding()
        x = swap_ins.transform(data_sample, n=5)
        self.assertTrue(5 == len(x))

        change = []
        for sample in x:
            origin = data_sample.get_words('x')
            trans = sample.get_words('x')
            change.append(trans[-2])
            self.assertEqual(origin[:5], trans[:5])
            self.assertEqual(origin[-1], trans[-1])
        self.assertEqual(5, len(set(change)))

        special_sample = SASample({'x': '', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))
        special_sample = SASample({'x': '~!@#$%^7890"\'', 'y': "negative"})
        self.assertEqual([], swap_ins.transform(special_sample))