def test_batch_processing(self): sys_out_dir = os.path.join(self.artifact_path, "CL-mt5base", "xnli") datasets = [ os.path.join(sys_out_dir, "datasets", file) for file in os.listdir(os.path.join(sys_out_dir, "datasets")) ] outputs = [ os.path.join(sys_out_dir, "outputs", file) for file in os.listdir(os.path.join(sys_out_dir, "outputs")) ] file_type = FileType.json task_dummy = TaskType.text_classification tasks = [] for dataset, output in zip(datasets, outputs): loader = get_custom_dataset_loader( task_dummy, dataset, output, dataset_file_type=file_type, output_file_type=file_type, ) if not loader.user_defined_metadata_configs: raise ValueError( f"user_defined_metadata_configs in system output {output} hasn't " "been specified or task name should be specified") tasks.append(loader.user_defined_metadata_configs['task_name']) # Get loaders using real `task` and `file_type` loaders = [ get_custom_dataset_loader( task, dataset, output, dataset_file_type=file_type, output_file_type=file_type, ) for dataset, output, task in zip(datasets, outputs, tasks) ] system_outputs = [loader.load() for loader in loaders] # Run analysis reports = [] metadata = {} for loader, system_output, task in zip(loaders, system_outputs, tasks): metadata.update(loader.user_defined_metadata_configs) report = get_processor(task).process(metadata=metadata, sys_output=system_output) reports.append(report) self.assertEqual(len(reports), 2)
def test_generate_system_analysis(self): loader = get_custom_dataset_loader( TaskType.word_segmentation, self.conll_dataset, self.conll_output, Source.local_filesystem, Source.local_filesystem, FileType.conll, FileType.conll, ) data = loader.load() metadata = { "task_name": TaskType.word_segmentation.value, # "dataset_name": "conll2003", # "sub_dataset_name":"ner", "metric_names": ["F1Score"], } processor = get_processor(TaskType.word_segmentation) sys_info = processor.process(metadata, data) self.assertIsNotNone(sys_info.results.fine_grained) self.assertGreater(len(sys_info.results.overall), 0)
def test_load_tsv(self): loader = get_custom_dataset_loader( TaskType.text_pair_classification, self.tsv_dataset, self.txt_output, Source.local_filesystem, Source.local_filesystem, FileType.tsv, FileType.text, ) data = loader.load() self.assertEqual(len(data), 5) self.assertEqual( data[1], { 'text1': 'This church choir sings to the masses as they sing joyous ' + 'songs from the book at a church.', 'text2': 'The church is filled with song.', 'true_label': 'entailment', 'id': '1', 'predicted_label': 'entailment', }, )
def test_e2e(self): loader = get_custom_dataset_loader( TaskType.aspect_based_sentiment_classification, self.tsv_dataset, self.txt_output, Source.local_filesystem, Source.in_memory, FileType.tsv, FileType.text, ) data = loader.load() self.assertEqual(len(data), 100) self.assertEqual( data[0], { 'aspect': 'Boot time', 'text': 'Boot time is super fast, around anywhere from 35 seconds to ' + '1 minute.', 'true_label': 'positive', 'id': '0', 'predicted_label': 'positive', }, ) metadata = { "task_name": TaskType.aspect_based_sentiment_classification, "metric_names": ["Accuracy", "F1Score"], } processor = get_processor(TaskType.aspect_based_sentiment_classification) sys_info = processor.process(metadata, data) self.assertIsNotNone(sys_info.results.fine_grained) self.assertGreater(len(sys_info.results.overall), 0)
def test_generate_system_analysis(self): loader = get_custom_dataset_loader( TaskType.named_entity_recognition, self.conll_dataset, self.conll_output, Source.local_filesystem, Source.local_filesystem, FileType.conll, FileType.conll, ) data = loader.load() metadata = { "task_name": TaskType.named_entity_recognition.value, # "dataset_name": "conll2003", # "sub_dataset_name":"ner", "metric_names": ["F1Score"], } processor = get_processor(TaskType.named_entity_recognition) sys_info = processor.process(metadata, data) self.assertIsNotNone(sys_info.results.fine_grained) self.assertGreater(len(sys_info.results.overall), 0) # ------ Deep Test -------- # test: training set dependent features should be disabled when # training dataset is not provided activate_features = sys_info.results.fine_grained.keys() self.assertTrue("span_econ" not in activate_features and "span_efre" not in activate_features)
def test_no_user_defined_features(self): loader = get_custom_dataset_loader( TaskType.kg_link_tail_prediction, self.test_data, self.dataset_no_custom_feature, dataset_file_type=FileType.json, output_file_type=FileType.json, ) data = loader.load() self.assertEqual(data.metadata, FileLoaderMetadata()) metadata = { "task_name": TaskType.kg_link_tail_prediction.value, "dataset_name": "fb15k-237-subset", "metric_configs": [ HitsConfig(name='Hits4', hits_k=4), # you can modify k here MeanReciprocalRankConfig(name='MRR'), MeanRankConfig(name='MR'), ], } processor = get_processor(TaskType.kg_link_tail_prediction.value) sys_info = processor.process(metadata, data.samples) self.assertIsNotNone(sys_info.results.fine_grained) self.assertGreater(len(sys_info.results.overall), 0)
def test_sort_buckets_by_value(self): loader = get_custom_dataset_loader( TaskType.kg_link_tail_prediction, self.test_data, self.dataset_no_custom_feature, ) data = loader.load() self.assertEqual(data.metadata, FileLoaderMetadata()) metadata = { "task_name": TaskType.kg_link_tail_prediction.value, "dataset_name": "fb15k-237", "metric_configs": [ HitsConfig(name='Hits4', hits_k=4), MeanReciprocalRankConfig(name='MRR'), MeanRankConfig(name='MR'), ], "sort_by": "performance_value", "sort_by_metric": "first", } processor = get_processor(TaskType.kg_link_tail_prediction.value) sys_info = processor.process(metadata, data.samples) self.assertIsNotNone(sys_info.results.fine_grained) self.assertGreater(len(sys_info.results.overall), 0) symmetry_performances = sys_info.results.fine_grained['symmetry'] if len(symmetry_performances) <= 1: # can't sort if only 1 item return for i in range(len(symmetry_performances) - 1): first_item = symmetry_performances[i].performances[0].value second_item = symmetry_performances[i + 1].performances[0].value self.assertGreater(first_item, second_item)
def test_multiple_qa_customized_feature(self): dataset_path = os.path.join(self.artifact_path, "dataset_fig_qa.json") output_path = os.path.join(self.artifact_path, "output_fig_qa_customized_features.json") loader = get_custom_dataset_loader( TaskType.qa_multiple_choice, dataset_path, output_path, Source.local_filesystem, Source.local_filesystem, FileType.json, FileType.json, ) data = loader.load() self.assertIsInstance(data.samples[0]["commonsense_category"], list) self.assertEqual(data.samples[0]["commonsense_category"], ["obj", "cul"]) metadata = { "task_name": TaskType.qa_multiple_choice.value, "dataset_name": "fig_qa", "metric_names": ["Accuracy"], # don't forget this, otherwise the user-defined features will be ignored "user_defined_features_configs": data.metadata.custom_features, } processor = get_processor(TaskType.qa_multiple_choice.value) sys_info = processor.process(metadata, data.samples) self.assertIsNotNone(sys_info.results.fine_grained) self.assertGreater(len(sys_info.results.overall), 0)
def test_no_user_defined_features(self): dataset = os.path.join(self.artifact_path, "no_custom_feature.json") loader = get_custom_dataset_loader( TaskType.kg_link_tail_prediction, dataset, dataset, Source.local_filesystem, Source.local_filesystem, FileType.json, FileType.json, ) data = loader.load() self.assertEqual(data.metadata, FileLoaderMetadata()) metadata = { "task_name": TaskType.kg_link_tail_prediction.value, "dataset_name": "fb15k-237-subset", "metric_configs": [HitsConfig(name='Hits4', hits_k=4)], } processor = get_processor(TaskType.kg_link_tail_prediction.value) sys_info = processor.process(metadata, data.samples) self.assertIsNotNone(sys_info.results.fine_grained) self.assertGreater(len(sys_info.results.overall), 0)
def test_custom_features(self): loader = get_custom_dataset_loader( TaskType.machine_translation, self.tsv_dataset, self.json_output_with_features, Source.local_filesystem, Source.local_filesystem, FileType.tsv, FileType.json, ) data = loader.load() self.assertEqual(len(data), 4) self.assertEqual( data[0], { 'source': 'Ak sa chcete dostať ešte hlbšie, môžete si všimnúť ' + 'trhlinky.', 'reference': 'Now just to get really deep in , you can really get to ' + 'the cracks .', 'id': '0', 'hypothesis': 'If you want to get a deeper , you can see the forces .', 'num_capital_letters': 1, }, ) processor = get_processor(TaskType.machine_translation.value) sys_info = processor.process(dataclasses.asdict(data.metadata), data.samples) self.assertTrue('num_capital_letters' in sys_info.results.fine_grained)
def test_generate_system_analysis(self): loader = get_custom_dataset_loader( TaskType.machine_translation, self.tsv_dataset, self.txt_output, Source.local_filesystem, Source.local_filesystem, FileType.tsv, FileType.text, ) data = loader.load() metadata = { "task_name": TaskType.machine_translation.value, "dataset_name": "ted_multi", "metric_names": ["bleu"], } processor = get_processor(TaskType.machine_translation.value) sys_info = processor.process(metadata, data) # analysis.write_to_directory("./") self.assertIsNotNone(sys_info.results.fine_grained) self.assertGreater(len(sys_info.results.overall), 0)
def test_extractive_qa_en(self): json_en_dataset = os.path.join(self.artifact_path, "dataset-xquad-en.json") json_en_output = os.path.join(self.artifact_path, "output-xquad-en.json") loader = get_custom_dataset_loader( TaskType.qa_extractive, json_en_dataset, json_en_output, Source.local_filesystem, Source.local_filesystem, FileType.json, FileType.json, ) data = loader.load() self.assertEqual(len(data), 1190) sample = data[0] self.assertEqual(sample["predicted_answers"], {"text": "308"}) self.assertEqual(sample["id"], "0") self.assertEqual(sample["answers"], { "answer_start": [-1], "text": ["308"] }) self.assertEqual( sample["question"], "How many points did the Panthers defense surrender ?") self.assertTrue(sample["context"].startswith("The Panthers")) metadata = { "task_name": TaskType.qa_extractive, "dataset_name": "squad", "metric_names": ["F1ScoreQA", "ExactMatchQA"], # "language":"en" } processor = get_processor(TaskType.qa_extractive) sys_info = processor.process(metadata, data) # analysis.write_to_directory("./") self.assertIsNotNone(sys_info.results.fine_grained) self.assertGreater(len(sys_info.results.overall), 0) get_logger('test').info(f'OVERALL={sys_info.results.overall}') # should be 0.6974789915966386 self.assertAlmostEqual( sys_info.results.overall["ExactMatch"].value, 0.6974789915966386, 2, "almost equal", ) # should be 0.8235975260931867 self.assertAlmostEqual( sys_info.results.overall["F1"].value, 0.8235975260931867, 2, "almost equal", )
def test_simple_example(self): # Load the data dataset = self.dataset_no_custom_feature task = TaskType.kg_link_tail_prediction loader = get_custom_dataset_loader(task, dataset, dataset) data = loader.load() # Initialize the processor and perform the processing processor = get_processor(TaskType.kg_link_tail_prediction.value) sys_info = processor.process(metadata={}, sys_output=data.samples) # If you want to write out to disk you can use sys_info.write_to_directory('./')
def test_load_json(self): loader = get_custom_dataset_loader( TaskType.qa_multiple_choice, self.json_dataset, self.json_output, Source.local_filesystem, Source.local_filesystem, FileType.json, FileType.json, ) data = loader.load() self.assertEqual(len(data), 4)
def test_load_custom_dataset_tsv(self): loader = get_custom_dataset_loader( # use defaults TaskType.text_classification, self.tsv_dataset, self.txt_output, ) data = loader.load() self.assertEqual(len(data), 10) self.assertEqual( data[6], { "text": "a weird and wonderful comedy .", "true_label": "positive", "id": "6", "predicted_label": "positive", }, )
def test_extractive_qa_zh(self): json_zh_dataset = os.path.join(self.artifact_path, "dataset-xquad-zh.json") json_zh_output = os.path.join(self.artifact_path, "output-xquad-zh.json") loader = get_custom_dataset_loader( TaskType.qa_extractive, json_zh_dataset, json_zh_output, Source.local_filesystem, Source.local_filesystem, FileType.json, FileType.json, ) data = loader.load() metadata = { "task_name": TaskType.qa_extractive.value, "dataset_name": "squad", "metric_names": ["F1Score", "ExactMatch"], "source_language": "zh", "target_language": "zh", } processor = get_processor(TaskType.qa_extractive) sys_info = processor.process(metadata, data) get_logger('test').info( f'--------- sys_info.metric_configs {sys_info.metric_configs}') # analysis.write_to_directory("./") self.assertIsNotNone(sys_info.results.fine_grained) self.assertGreater(len(sys_info.results.overall), 0) # 0.6285714285714286 self.assertAlmostEqual( sys_info.results.overall["ExactMatch"].value, 0.6285714285714286, 2, "almost equal", ) # 0.7559651817716333 self.assertAlmostEqual( sys_info.results.overall["F1"].value, 0.7559651817716333, 2, "almost equal", )
def test_process_metadata_in_output_file(self): loader = get_custom_dataset_loader( TaskType.text_classification, self.json_dataset, self.json_output, Source.local_filesystem, Source.local_filesystem, FileType.json, FileType.json, ) data = loader.load() self.assertNotEqual(data.metadata, FileLoaderMetadata) metadata = dataclasses.asdict(data.metadata) processor = get_processor(TaskType.text_classification) sys_info = processor.process(metadata, data.samples) self.assertIsNotNone(sys_info.results.fine_grained) self.assertGreater(len(sys_info.results.overall), 0)
def test_load_custom_dataset_json(self): loader = get_custom_dataset_loader( TaskType.text_classification, self.json_dataset, self.json_output, dataset_file_type=FileType.json, output_file_type=FileType.json, ) data = loader.load() self.assertNotEqual(data.metadata, FileLoaderMetadata()) self.assertEqual(len(data), 7) self.assertEqual( data[6], { 'text': 'guaranteed to move anyone who ever , , or rolled .', 'true_label': 'positive', 'id': '6', 'predicted_label': 'positive', }, )
def test_process(self): metadata = { "task_name": TaskType.text_classification, "metric_names": ["Accuracy", "F1Score"], } loader = get_custom_dataset_loader( TaskType.text_classification, load_file_as_str(self.tsv_dataset), load_file_as_str(self.txt_output), Source.in_memory, Source.in_memory, FileType.tsv, FileType.text, ) data = loader.load() processor = get_processor(TaskType.text_classification) sys_info = processor.process(metadata, data) self.assertIsNotNone(sys_info.results.fine_grained) self.assertGreater(len(sys_info.results.overall), 0)
def test_generate_system_analysis(self): loader = get_custom_dataset_loader( TaskType.qa_multiple_choice, self.json_dataset, self.json_output, Source.local_filesystem, Source.local_filesystem, FileType.json, FileType.json, ) data = loader.load() metadata = { "task_name": TaskType.qa_multiple_choice.value, "dataset_name": "fig_qa", "metric_names": ["Accuracy"], } processor = get_processor(TaskType.qa_multiple_choice.value) sys_info = processor.process(metadata, data) self.assertIsNotNone(sys_info.results.fine_grained) self.assertGreater(len(sys_info.results.overall), 0)
def test_snli(self): metadata = { "task_name": TaskType.text_classification.value, "metric_names": ["Accuracy"], } loader = get_custom_dataset_loader( TaskType.text_pair_classification, self.tsv_dataset, self.txt_output, Source.local_filesystem, Source.local_filesystem, FileType.tsv, FileType.text, ) data = loader.load() processor = get_processor(TaskType.text_pair_classification) sys_info = processor.process(metadata, data) self.assertIsNotNone(sys_info.results.fine_grained) self.assertGreater(len(sys_info.results.overall), 0)
def test_generate_system_analysis(self): path_system_details = os.path.join( test_artifacts_path, "test_system_details.json" ) dataset_data = os.path.join( test_artifacts_path, "text_classification", "dataset.tsv" ) output_data = os.path.join( test_artifacts_path, "text_classification", "output.txt" ) with open(path_system_details) as fin: system_details = json.load(fin) metadata = { "task_name": TaskType.text_classification, "metric_names": ["Accuracy"], "system_details": system_details, } loader = get_custom_dataset_loader( TaskType.text_classification, dataset_data, output_data, Source.local_filesystem, Source.local_filesystem, FileType.tsv, FileType.text, ) data = loader.load() processor = get_processor(TaskType.text_classification) sys_info = processor.process(metadata, data) # analysis.write_to_directory("./") self.assertIsNotNone( sys_info.system_details, {"learning_rate": 0.0001, "number_of_layers": 10} )
def test_load_tsv(self): loader = get_custom_dataset_loader( TaskType.machine_translation, self.tsv_dataset, self.txt_output, Source.local_filesystem, Source.local_filesystem, FileType.tsv, FileType.text, ) data = loader.load() self.assertEqual(len(data), 4) self.assertEqual( data[0], { 'source': 'Ak sa chcete dostať ešte hlbšie, môžete si všimnúť ' + 'trhlinky.', 'reference': 'Now just to get really deep in , you can really get to ' + 'the cracks .', 'id': '0', 'hypothesis': 'If you want to get a deeper , you can see the forces .', }, )
def test_process_training_set_dependent_features(self): metadata = { "task_name": TaskType.text_classification.value, "metric_names": ["Accuracy", "F1Score"], "dataset_name": "ag_news", "reload_stat": False, } loader = get_custom_dataset_loader( TaskType.text_classification, self.json_dataset, self.json_output, Source.local_filesystem, Source.local_filesystem, FileType.json, FileType.json, ) data = loader.load() processor = get_processor(TaskType.text_classification) sys_info = processor.process(metadata, data) self.assertIsNotNone(sys_info.results.fine_grained) self.assertGreater(len(sys_info.results.overall), 0)
def test_with_user_defined_features(self): loader = get_custom_dataset_loader( # use defaults TaskType.kg_link_tail_prediction, self.test_data, self.dataset_with_custom_feature, ) data = loader.load() self.assertEqual(len(data.metadata.custom_features), 1) self.assertEqual(len(data), 10) self.assertEqual( set(data[0].keys()), { "id", "true_head", "true_link", 'true_head_decipher', 'true_tail_decipher', "true_tail", "predict", "predictions", "rel_type", "true_rank", }, )