Пример #1
0
    def test_generate(self):
        test1 = CWSSample({'x': '', 'y': []})
        test2 = CWSSample({'x': '~ ! @ # $ % ^ & * ( ) _ +', 'y': []})
        dataset = Dataset('CWS')
        dataset.load([test1, test2])
        mode = [
            'SwapName', 'CnSwapNum', 'Reduplication', 'CnMLM',
            'SwapContraction', 'SwapVerb', 'SwapSyn'
        ]
        gene = CWSGenerator(transformation_methods=mode,
                            subpopulation_methods=[])
        for original_samples, trans_rst, trans_type in gene.generate(dataset):
            self.assertTrue(len(original_samples) == 0)
            self.assertTrue(len(trans_rst) == 0)

        # test wrong transformation_methods
        gene = CWSGenerator(transformation_methods=["wrong_transform_method"],
                            subpopulation_methods=[])
        self.assertRaises(ValueError, next, gene.generate(dataset))
        gene = CWSGenerator(transformation_methods=["AddSubtree"],
                            subpopulation_methods=[])
        self.assertRaises(ValueError, next, gene.generate(dataset))
        gene = CWSGenerator(transformation_methods="CnMLM",
                            subpopulation_methods=[])
        self.assertRaises(ValueError, next, gene.generate(dataset))

        sent1 = '周小明生产一万'
        sent2 = '央视想朦胧'
        dataset = Dataset(task='CWS')
        dataset.load({
            'x': [sent1, sent2],
            'y': [['B', 'M', 'E', 'B', 'E', 'B', 'E'],
                  ['B', 'E', 'S', 'B', 'E']]
        })

        gene = CWSGenerator(transformation_methods=mode,
                            subpopulation_methods=[])
        for original_samples, trans_rst, trans_type in gene.generate(dataset):
            self.assertTrue(len(original_samples) == len(trans_rst))
Пример #2
0
                                     'AFC at Super Bowl 50?',
        'answers': [{"text": "Denver Broncos", "answer_start": 177},
                    {"text": "Denver Broncos", "answer_start": 177},
                    {"text": "Denver Broncos", "answer_start": 177}],
        'title': "Super_Bowl_50", 'is_impossible': False})
sample2 = MRCSample(
    {'context': " ", 'question': 'Which NFL team represented '
                                 'the AFC at Super Bowl 50?',
        'answers': [], 'title': "Super_Bowl_50", 'is_impossible': True})
sample3 = MRCSample(
    {'context': "! @ # $ % ^ & * ( )",
     'question': 'Which NFL team represented the AFC at Super Bowl 50?',
        'answers': [], 'title': "Super_Bowl_50", 'is_impossible': True})

dataset = Dataset('MRC')
dataset.load(data_sample)
dataset.extend([sample2, sample3])


class TestMRCGenerator(unittest.TestCase):

    def test_generate(self):
        # test task transformation
        # TODO, domain transformation addsentdiverse
        transformation_methods = ["PerturbAnswer", "ModifyPos"]
        gene = MRCGenerator(transformation_methods=transformation_methods,
                            subpopulation_methods=[])
        for original_samples, trans_rst, trans_type in gene.generate(dataset):
            self.assertEqual(1, len(trans_rst))
            for index in range(len(original_samples)):
                ori_sample = original_samples[index]
Пример #3
0
from TextFlint.generation_layer.generator.coref_generator import CorefGenerator
from TextFlint.input_layer.dataset import Dataset
import unittest
from ....data.coref_debug import CorefDebug

sample1 = CorefDebug.coref_sample1()
sample2 = CorefDebug.coref_sample2()
sample3 = CorefDebug.coref_sample3()
sample4 = CorefDebug.coref_sample4()
sample5 = CorefDebug.coref_sample5()
sample6 = CorefDebug.coref_sample6()
samples = [sample1, sample2, sample3, sample4, sample5, sample6]
dataset = Dataset("COREF")
dataset.load(samples)


class TestRndShuffle(unittest.TestCase):
    def test_transform(self):
        gene = CorefGenerator(transformation_methods=["RndShuffle"],
                              subpopulation_methods=[])
        for original_samples, trans_rst, trans_type in gene.generate(dataset):
            self.assertEqual(len(original_samples), len(trans_rst))
            for so, st in zip(original_samples, trans_rst):
                self.assertTrue(so.num_sentences() == st.num_sentences())


if __name__ == "__main__":
    unittest.main()
Пример #4
0
import unittest
from TextFlint.input_layer.dataset import Dataset
from TextFlint.generation_layer.generator.ner_generator import NERGenerator

sample1 = {'x': 'Amy lives in a city , which is called NYK .',
           'y': ['B-PER', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'O']}
sample2 = {'x': 'Jotion lives in Xian 105 kilometers away .',
           'y': ['B-PER', 'O', 'O', 'B-LOC', 'O', 'O', 'O', 'O']}
sample3 = {'x': 'China rejects Syrians call to boycott Chinese lamb .',
           'y': ['B-ORG', 'O', 'B-MISC', 'O', 'O', 'O', 'B-MISC', 'O', 'O']}
single_data_sample = [sample1]
data_samples = [sample1, sample2, sample3]
dataset = Dataset('NER')
single_dataset = Dataset('NER')
dataset.load(data_samples)
single_dataset.load(single_data_sample)
gene = NERGenerator()


class TestSpecialEntityTyposSwap(unittest.TestCase):

    def test_generate(self):
        # test task transformation
        transformation_methods = ["SwapEnt", "EntTypos"]
        gene = NERGenerator(transformation_methods=transformation_methods,
                            subpopulation_methods=[])

        for original_samples, trans_rst, trans_type in gene.generate(dataset):
            self.assertEqual(3, len(original_samples))
            for index in range(len(original_samples)):
                for ori_entity, trans_entity in \
Пример #5
0
    'y': ['DT', 'VBZ', 'DT', 'RB', 'JJ', 'NN']
}
sample2 = {
    'x': ['That', 'is', 'a', 'prefixed', 'survey'],
    'y': ['DT', 'VBZ', 'DT', 'JJ', 'NN']
}
sample3 = {'x': ['', '', ''], 'y': ['O', 'O', 'O']}
sample4 = {
    'x': '! @ # $ % ^ & * ( )',
    'y': ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
}
special_data_sample = [sample3, sample4]
data_samples = [sample1, sample2]

dataset = Dataset('POS')
dataset.load(data_samples)
special_dataset = Dataset('POS')
special_dataset.load(special_data_sample)


class TestPOSGenerate(unittest.TestCase):
    def test_generate(self):
        # test MultiPOSSwap transformation
        gene = POSGenerator(
            transformation_methods=["SwapMultiPOS"],
            subpopulation_methods=[],
            transformation_config={"SwapMultiPOS": [{
                "treebank_tag": "NN"
            }]})
        for original_samples, trans_rst, trans_type in gene.generate(dataset):
            self.assertEqual(2, len(original_samples))