Пример #1
0
    def test_generate(self):
        test1 = CWSSample({'x': '', 'y': []})
        test2 = CWSSample({'x': '~ ! @ # $ % ^ & * ( ) _ +', 'y': []})
        dataset = Dataset('CWS')
        dataset.load([test1, test2])
        mode = [
            'SwapName', 'CnSwapNum', 'Reduplication', 'CnMLM',
            'SwapContraction', 'SwapVerb', 'SwapSyn'
        ]
        gene = CWSGenerator(transformation_methods=mode,
                            subpopulation_methods=[])
        for original_samples, trans_rst, trans_type in gene.generate(dataset):
            self.assertTrue(len(original_samples) == 0)
            self.assertTrue(len(trans_rst) == 0)

        # test wrong transformation_methods
        gene = CWSGenerator(transformation_methods=["wrong_transform_method"],
                            subpopulation_methods=[])
        self.assertRaises(ValueError, next, gene.generate(dataset))
        gene = CWSGenerator(transformation_methods=["AddSubtree"],
                            subpopulation_methods=[])
        self.assertRaises(ValueError, next, gene.generate(dataset))
        gene = CWSGenerator(transformation_methods="CnMLM",
                            subpopulation_methods=[])
        self.assertRaises(ValueError, next, gene.generate(dataset))

        sent1 = '周小明生产一万'
        sent2 = '央视想朦胧'
        dataset = Dataset(task='CWS')
        dataset.load({
            'x': [sent1, sent2],
            'y': [['B', 'M', 'E', 'B', 'E', 'B', 'E'],
                  ['B', 'E', 'S', 'B', 'E']]
        })

        gene = CWSGenerator(transformation_methods=mode,
                            subpopulation_methods=[])
        for original_samples, trans_rst, trans_type in gene.generate(dataset):
            self.assertTrue(len(original_samples) == len(trans_rst))
Пример #2
0
import unittest

from TextFlint.generation_layer.transformation.CWS.swap_contraction \
    import SwapContraction
from TextFlint.input_layer.component.sample.cws_sample import CWSSample

sent1 = '来自 央视 报道 。'
data_sample = CWSSample({'x': sent1, 'y': []})
swap_ins = SwapContraction()


class TestSwapContraction(unittest.TestCase):
    def test_get_transformations(self):
        self.assertTrue(
            ([[2, 4]], [['中央电视台']], [['B', 'M', 'M', 'M', 'E']]
             ) == swap_ins._get_transformations(data_sample.get_words()))
        self.assertRaises(AssertionError, swap_ins._get_transformations, sent1)
        self.assertRaises(AssertionError, swap_ins._get_transformations, '')
        self.assertTrue(swap_ins._get_transformations([]) == ([], [], []))

    def test_transformation(self):
        trans_sample = swap_ins.transform(data_sample)
        self.assertTrue(len(trans_sample) == 1)
        trans_sample = trans_sample[0]
        self.assertEqual([0, 0, 2, 2, 2, 2, 2, 0, 0, 0], trans_sample.mask)
        self.assertEqual('来自中央电视台报道。', trans_sample.get_value('x'))
        self.assertEqual(['B', 'E', 'B', 'M', 'M', 'M', 'E', 'B', 'E', 'S'],
                         trans_sample.get_value('y'))


if __name__ == "__main__":
Пример #3
0
import unittest

from TextFlint.generation_layer.transformation.CWS.swap_verb import SwapVerb
from TextFlint.input_layer.component.sample.cws_sample import CWSSample

sent1 = '小明想看书'
sample = CWSSample({'x': sent1, 'y': ['B', 'E', 'S', 'B', 'E']})
swap_ins = SwapVerb()


class TestSwapVerb(unittest.TestCase):
    def test_transformation(self):
        trans_sample = swap_ins.transform(sample)
        self.assertTrue(1 == len(trans_sample))
        trans_sample = trans_sample[0]
        self.assertEqual('小明想看一看书', trans_sample.get_value('x'))
        self.assertEqual(['B', 'E', 'S', 'B', 'M', 'M', 'E'],
                         trans_sample.get_value('y'))
        self.assertEqual([0, 0, 0, 2, 2, 2, 0], trans_sample.mask)


if __name__ == "__main__":
    unittest.main()