def test_generate(self): test1 = CWSSample({'x': '', 'y': []}) test2 = CWSSample({'x': '~ ! @ # $ % ^ & * ( ) _ +', 'y': []}) dataset = Dataset('CWS') dataset.load([test1, test2]) mode = [ 'SwapName', 'CnSwapNum', 'Reduplication', 'CnMLM', 'SwapContraction', 'SwapVerb', 'SwapSyn' ] gene = CWSGenerator(transformation_methods=mode, subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertTrue(len(original_samples) == 0) self.assertTrue(len(trans_rst) == 0) # test wrong transformation_methods gene = CWSGenerator(transformation_methods=["wrong_transform_method"], subpopulation_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) gene = CWSGenerator(transformation_methods=["AddSubtree"], subpopulation_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) gene = CWSGenerator(transformation_methods="CnMLM", subpopulation_methods=[]) self.assertRaises(ValueError, next, gene.generate(dataset)) sent1 = '周小明生产一万' sent2 = '央视想朦胧' dataset = Dataset(task='CWS') dataset.load({ 'x': [sent1, sent2], 'y': [['B', 'M', 'E', 'B', 'E', 'B', 'E'], ['B', 'E', 'S', 'B', 'E']] }) gene = CWSGenerator(transformation_methods=mode, subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertTrue(len(original_samples) == len(trans_rst))
'AFC at Super Bowl 50?', 'answers': [{"text": "Denver Broncos", "answer_start": 177}, {"text": "Denver Broncos", "answer_start": 177}, {"text": "Denver Broncos", "answer_start": 177}], 'title': "Super_Bowl_50", 'is_impossible': False}) sample2 = MRCSample( {'context': " ", 'question': 'Which NFL team represented ' 'the AFC at Super Bowl 50?', 'answers': [], 'title': "Super_Bowl_50", 'is_impossible': True}) sample3 = MRCSample( {'context': "! @ # $ % ^ & * ( )", 'question': 'Which NFL team represented the AFC at Super Bowl 50?', 'answers': [], 'title': "Super_Bowl_50", 'is_impossible': True}) dataset = Dataset('MRC') dataset.load(data_sample) dataset.extend([sample2, sample3]) class TestMRCGenerator(unittest.TestCase): def test_generate(self): # test task transformation # TODO, domain transformation addsentdiverse transformation_methods = ["PerturbAnswer", "ModifyPos"] gene = MRCGenerator(transformation_methods=transformation_methods, subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(1, len(trans_rst)) for index in range(len(original_samples)): ori_sample = original_samples[index]
from TextFlint.generation_layer.generator.coref_generator import CorefGenerator from TextFlint.input_layer.dataset import Dataset import unittest from ....data.coref_debug import CorefDebug sample1 = CorefDebug.coref_sample1() sample2 = CorefDebug.coref_sample2() sample3 = CorefDebug.coref_sample3() sample4 = CorefDebug.coref_sample4() sample5 = CorefDebug.coref_sample5() sample6 = CorefDebug.coref_sample6() samples = [sample1, sample2, sample3, sample4, sample5, sample6] dataset = Dataset("COREF") dataset.load(samples) class TestRndShuffle(unittest.TestCase): def test_transform(self): gene = CorefGenerator(transformation_methods=["RndShuffle"], subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(len(original_samples), len(trans_rst)) for so, st in zip(original_samples, trans_rst): self.assertTrue(so.num_sentences() == st.num_sentences()) if __name__ == "__main__": unittest.main()
import unittest from TextFlint.input_layer.dataset import Dataset from TextFlint.generation_layer.generator.ner_generator import NERGenerator sample1 = {'x': 'Amy lives in a city , which is called NYK .', 'y': ['B-PER', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'O']} sample2 = {'x': 'Jotion lives in Xian 105 kilometers away .', 'y': ['B-PER', 'O', 'O', 'B-LOC', 'O', 'O', 'O', 'O']} sample3 = {'x': 'China rejects Syrians call to boycott Chinese lamb .', 'y': ['B-ORG', 'O', 'B-MISC', 'O', 'O', 'O', 'B-MISC', 'O', 'O']} single_data_sample = [sample1] data_samples = [sample1, sample2, sample3] dataset = Dataset('NER') single_dataset = Dataset('NER') dataset.load(data_samples) single_dataset.load(single_data_sample) gene = NERGenerator() class TestSpecialEntityTyposSwap(unittest.TestCase): def test_generate(self): # test task transformation transformation_methods = ["SwapEnt", "EntTypos"] gene = NERGenerator(transformation_methods=transformation_methods, subpopulation_methods=[]) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(3, len(original_samples)) for index in range(len(original_samples)): for ori_entity, trans_entity in \
'y': ['DT', 'VBZ', 'DT', 'RB', 'JJ', 'NN'] } sample2 = { 'x': ['That', 'is', 'a', 'prefixed', 'survey'], 'y': ['DT', 'VBZ', 'DT', 'JJ', 'NN'] } sample3 = {'x': ['', '', ''], 'y': ['O', 'O', 'O']} sample4 = { 'x': '! @ # $ % ^ & * ( )', 'y': ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] } special_data_sample = [sample3, sample4] data_samples = [sample1, sample2] dataset = Dataset('POS') dataset.load(data_samples) special_dataset = Dataset('POS') special_dataset.load(special_data_sample) class TestPOSGenerate(unittest.TestCase): def test_generate(self): # test MultiPOSSwap transformation gene = POSGenerator( transformation_methods=["SwapMultiPOS"], subpopulation_methods=[], transformation_config={"SwapMultiPOS": [{ "treebank_tag": "NN" }]}) for original_samples, trans_rst, trans_type in gene.generate(dataset): self.assertEqual(2, len(original_samples))