Exemplos de convert_examples_to_features em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: data_loader.mrc_utils

Método / Função: convert_examples_to_features

Exemplos em hotexamples.com: 2

convert_examples_to_features em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de data_loader.mrc_utils.convert_examples_to_features em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

Arquivo: mrc_data_loader.py Projeto: victortowne/mrc-for-flat-nested-ner

def convert_examples_to_features(self, data_sign="train",): print("=*="*10) print("loading {} data ... ...".format(data_sign)) if data_sign == "train": examples = self.data_processor.get_train_examples(self.data_dir) self.num_train_instances = len(examples) elif data_sign == "dev": examples = self.data_processor.get_dev_examples(self.data_dir) self.num_dev_instances = len(examples) elif data_sign == "test": examples = self.data_processor.get_test_examples(self.data_dir) self.num_test_instances = len(examples) else: raise ValueError("please notice that the data_sign can only be train/dev/test !!") cache_path = os.path.join(self.data_dir, "mrc-ner.{}.cache.{}".format(data_sign, str(self.max_seq_len))) if os.path.exists(cache_path) and self.data_cache: features = torch.load(cache_path) else: features = convert_examples_to_features(examples, self.tokenizer, self.label_list, self.max_seq_length, allow_impossible=self.allow_impossible) if self.data_cache: torch.save(features, cache_path) return features

Exemplo n.º 2

0

Exibir arquivo

Arquivo: mrc_data_loader.py Projeto: wusongxu/mrc-for-flat-nested-ner

def convert_examples_to_features(self, data_sign="train", num_data_processor=1): print("=*="*10) print("loading {} data ... ...".format(data_sign)) if data_sign == "train": examples = self.data_processor.get_train_examples(self.data_dir) self.num_train_instances = len(examples) elif data_sign == "dev": examples = self.data_processor.get_dev_examples(self.data_dir) self.num_dev_instances = len(examples) elif data_sign == "test": examples = self.data_processor.get_test_examples(self.data_dir) self.num_test_instances = len(examples) else: raise ValueError("please notice that the data_sign can only be train/dev/test !!") if num_data_processor == 1: cache_path = os.path.join(self.data_dir, "mrc-ner.{}.cache.{}".format(data_sign, str(self.max_seq_len))) if os.path.exists(cache_path): features = torch.load(cache_path) else: features = convert_examples_to_features(examples, self.tokenizer, self.label_list, self.max_seq_length, allow_impossible=self.allow_impossible) torch.save(features, cache_path) return features def export_features_to_cache_file(idx, sliced_features, num_data_processor): cache_path = os.path.join(self.data_dir, "mrc-ner.{}.cache.{}.{}-{}".format(data_sign, str(self.max_seq_len), str(num_data_processor), str(idx))) torch.save(sliced_features, cache_path) print(">>> >>> >>> export sliced features to : {}".format(cache_path)) features_lst = [] total_examples = len(examples) size_of_one_process = math.ceil(total_examples / num_data_processor) path_to_preprocessed_cache = os.path.join(self.data_dir, "mrc-ner.{}.cache.{}.{}-*".format(data_sign, str(self.max_seq_len), str(num_data_processor))) collection_of_preprocessed_cache = glob(path_to_preprocessed_cache) if len(collection_of_preprocessed_cache) == num_data_processor: print("%%%% %%%% Load Saved Cache files in {} %%% %%% ".format(self.data_dir)) elif len(collection_of_preprocessed_cache) != 0: for item_of_preprocessed_cache in collection_of_preprocessed_cache: os.remove(item_of_preprocessed_cache) for idx in range(num_data_processor): start = size_of_one_process * idx end = (idx+1) * size_of_one_process if (idx+1)* size_of_one_process < total_examples else total_examples sliced_examples = examples[start:end] sliced_features = convert_examples_to_features(sliced_examples, self.tokenizer, self.label_list, self.max_seq_length, allow_impossible=self.allow_impossible) export_features_to_cache_file(idx, sliced_features, num_data_processor) del examples else: for idx in range(num_data_processor): start = size_of_one_process * idx end = (idx+1) * size_of_one_process if (idx+1)* size_of_one_process < total_examples else total_examples sliced_examples = examples[start:end] sliced_features = convert_examples_to_features(sliced_examples, self.tokenizer, self.label_list, self.max_seq_length, allow_impossible=self.allow_impossible) export_features_to_cache_file(idx, sliced_features, num_data_processor) del examples multi_process_for_data = Pool(num_data_processor) for idx in range(num_data_processor): features_lst.append(multi_process_for_data.apply_async(MRCNERDataLoader.read_features_from_cache_file, args=(idx, self.data_dir, data_sign, self.max_seq_len, num_data_processor))) multi_process_for_data.close() multi_process_for_data.join() features = [] for feature_slice in features_lst: features.extend(feature_slice.get()) print("check number of examples before and after data processing : ") print(len(features), total_examples) assert len(features) == total_examples return features