示例#1
0
    def test_generate(self):
        test1 = CWSSample({'x': '', 'y': []})
        test2 = CWSSample({'x': '~ ! @ # $ % ^ & * ( ) _ +', 'y': []})
        dataset = Dataset('CWS')
        dataset.load([test1, test2])
        mode = [
            'SwapName', 'CnSwapNum', 'Reduplication', 'CnMLM',
            'SwapContraction', 'SwapVerb', 'SwapSyn'
        ]
        gene = CWSGenerator(trans_methods=mode, sub_methods=[])
        for original_samples, trans_rst, trans_type in gene.generate(dataset):
            self.assertTrue(len(original_samples) == 0)
            self.assertTrue(len(trans_rst) == 0)

        # test wrong trans_methods
        gene = CWSGenerator(trans_methods=["wrong_transform_method"],
                            sub_methods=[])
        self.assertRaises(ValueError, next, gene.generate(dataset))
        gene = CWSGenerator(trans_methods=["AddSubtree"], sub_methods=[])
        self.assertRaises(ValueError, next, gene.generate(dataset))
        gene = CWSGenerator(trans_methods="CnMLM", sub_methods=[])
        self.assertRaises(ValueError, next, gene.generate(dataset))

        sent1 = '周小明生产一万'
        sent2 = '央视想朦胧'
        dataset = Dataset(task='CWS')
        dataset.load({
            'x': [sent1, sent2],
            'y': [['B', 'M', 'E', 'B', 'E', 'B', 'E'],
                  ['B', 'E', 'S', 'B', 'E']]
        })

        gene = CWSGenerator(trans_methods=mode, sub_methods=[])
        for original_samples, trans_rst, trans_type in gene.generate(dataset):
            self.assertTrue(len(original_samples) == len(trans_rst))
示例#2
0
 def test_transform(self):
     trans_data = swap_ins.transform(data_sample)
     self.assertTrue(len(trans_data) == 1)
     self.assertEqual('小明喜欢看电影。', trans_data[0].get_value('x'))
     self.assertEqual(['B', 'E', 'B', 'E', 'S', 'B', 'E', 'S'],
                      trans_data[0].get_value('y'))
     self.assertTrue([0, 0, 0, 0, 0, 2, 2, 0] == trans_data[0].mask)
     trans_data = swap_ins.transform(
         CWSSample({'x': '玩具厂 大量 生产 玩具 。', 'y': []}))
     self.assertEqual(1, len(trans_data))
示例#3
0
import unittest

from textflint.generation_layer.transformation.CWS.swap_contraction \
    import SwapContraction
from textflint.input_layer.component.sample.cws_sample import CWSSample

sent1 = '来自 央视 报道 。'
data_sample = CWSSample({'x': sent1, 'y': []})
swap_ins = SwapContraction()


class TestSwapContraction(unittest.TestCase):
    def test_get_transformations(self):
        self.assertTrue(
            ([[2, 4]], [['中央电视台']], [['B', 'M', 'M', 'M', 'E']]
             ) == swap_ins._get_transformations(data_sample.get_words()))
        self.assertRaises(AssertionError, swap_ins._get_transformations, sent1)
        self.assertRaises(AssertionError, swap_ins._get_transformations, '')
        self.assertTrue(swap_ins._get_transformations([]) == ([], [], []))

    def test_transformation(self):
        trans_sample = swap_ins.transform(data_sample)
        self.assertTrue(len(trans_sample) == 1)
        trans_sample = trans_sample[0]
        self.assertEqual([0, 0, 2, 2, 2, 2, 2, 0, 0, 0], trans_sample.mask)
        self.assertEqual('来自中央电视台报道。', trans_sample.get_value('x'))
        self.assertEqual(['B', 'E', 'B', 'M', 'M', 'M', 'E', 'B', 'E', 'S'],
                         trans_sample.get_value('y'))


if __name__ == "__main__":
示例#4
0
import unittest

from textflint.input_layer.component.sample.cws_sample import CWSSample
from textflint.generation_layer.transformation.CWS.reduplication \
    import Reduplication

sent1 = '朦胧的月色'
sample = CWSSample({'x': sent1, 'y': ['B', 'E', 'S', 'B', 'E']})
swap_ins = Reduplication()


class TestReduplication(unittest.TestCase):
    def test_transformation(self):
        trans_sample = swap_ins.transform(sample)
        self.assertTrue(1 == len(trans_sample))
        trans_sample = trans_sample[0]
        self.assertEqual('朦朦胧胧的月色', trans_sample.get_value('x'))
        self.assertEqual(['B', 'M', 'M', 'E', 'S', 'B', 'E'],
                         trans_sample.get_value('y'))
        self.assertEqual([2, 2, 2, 2, 0, 0, 0], trans_sample.mask)


if __name__ == "__main__":
    unittest.main()