def test_onnx_conversion_and_inference(tmp_path, model_name): AdaptiveModel.convert_to_onnx(model_name=model_name, output_path=tmp_path / "test-onnx", task_type="question_answering") onnx_inferencer = Inferencer.load(tmp_path / "test-onnx", task_type="question_answering", num_processes=0) qa_input = [{ "questions": ["What is the population of Berlin?"], "text": "Berlin is the capital and largest city of Germany by both area and population. Its 3,769,495 " "inhabitants as of December 31, 2019 make it the most populous city of the European Union, " "according to population within city limits.The city is also one of Germany's 16 federal states.", }] result_onnx = onnx_inferencer.inference_from_dicts(qa_input)[0] assert result_onnx["predictions"][0]["answers"][0]["answer"] == "3,769,495" pytorch_inferencer = Inferencer.load(model_name, task_type="question_answering", num_processes=0) result_pytorch = pytorch_inferencer.inference_from_dicts(qa_input)[0] for (onnx, pytorch) in zip( result_onnx["predictions"][0]["answers"][0].items(), result_pytorch["predictions"][0]["answers"][0].items()): # keys assert onnx[0] == pytorch[0] # values if type(onnx[1]) == float: np.testing.assert_almost_equal(onnx[1], pytorch[1], decimal=4) # score else: assert onnx[1] == pytorch[1]
def import_downstream_models(): ####################### loads a SQUAD finetuned model # saves it as a FARM adaptive model device, n_gpu = initialize_device_settings(use_cuda=True) model = "bert-large-uncased-whole-word-masking-finetuned-squad" save_dir = "saved_models/FARM-bert-large-uncased-whole-word-masking-finetuned-squad" lm = Bert.load(model) ph = QuestionAnsweringHead.load(model) am = AdaptiveModel(language_model=lm, prediction_heads=[ph], embeds_dropout_prob=0.1, lm_output_types="per_token", device=device) am.save(save_dir) # saves the processor associated with it, so you can use it in inference mode # TODO load HF's tokenizer_config.json and adjust settings tokenizer = BertTokenizer.from_pretrained( pretrained_model_name_or_path=model) label_list = ["start_token", "end_token"] metric = "squad" processor = SquadProcessor( tokenizer=tokenizer, max_seq_len=256, label_list=label_list, metric=metric, data_dir="../data/squad20", ) processor.save(save_dir)
def onnx_runtime_example(): """ This example shows conversion of a transformers model from the Model Hub to ONNX format & inference using ONNXRuntime. """ model_name_or_path = "deepset/roberta-base-squad2" onnx_model_export_path = Path("./roberta-onnx") AdaptiveModel.convert_to_onnx(model_name_or_path, onnx_model_export_path, task_type="question_answering") # for ONNX models, the Inferencer uses ONNXRuntime under-the-hood inferencer = Inferencer.load(model_name_or_path=onnx_model_export_path) qa_input = [{ "questions": ["Who counted the game among the best ever made?"], "text": "Twilight Princess was released to universal critical acclaim and commercial success. " "It received perfect scores from major publications such as 1UP.com, Computer and Video Games, " "Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators " "GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii " "version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called " "it one of the greatest games ever created.", }] results = inferencer.inference_from_dicts(qa_input) print(results) inferencer.close_multiprocessing_pool()
def convert_to_onnx(cls, model_name: str, output_path: Path, convert_to_float16: bool = False, quantize: bool = False, task_type: str = "question_answering", opset_version: int = 11): """ Convert a PyTorch BERT model to ONNX format and write to ./onnx-export dir. The converted ONNX model can be loaded with in the `FARMReader` using the export path as `model_name_or_path` param. Usage: `from haystack.reader.farm import FARMReader from pathlib import Path onnx_model_path = Path("roberta-onnx-model") FARMReader.convert_to_onnx(model_name="deepset/bert-base-cased-squad2", output_path=onnx_model_path) reader = FARMReader(onnx_model_path)` :param model_name: transformers model name :param output_path: Path to output the converted model :param convert_to_float16: Many models use float32 precision by default. With the half precision of float16, inference is faster on Nvidia GPUs with Tensor core like T4 or V100. On older GPUs, float32 could still be be more performant. :param quantize: convert floating point number to integers :param task_type: Type of task for the model. Available options: "question_answering" or "embeddings". :param opset_version: ONNX opset version """ AdaptiveModel.convert_to_onnx(model_name=model_name, output_path=output_path, task_type=task_type, convert_to_float16=convert_to_float16, quantize=quantize, opset_version=opset_version)
def test_qa(caplog): caplog.set_level(logging.CRITICAL) set_all_seeds(seed=42) device, n_gpu = initialize_device_settings(use_cuda=True) batch_size = 2 n_epochs = 1 evaluate_every = 4 base_LM_model = "bert-base-cased" tokenizer = Tokenizer.load( pretrained_model_name_or_path=base_LM_model, do_lower_case=False ) label_list = ["start_token", "end_token"] processor = SquadProcessor( tokenizer=tokenizer, max_seq_len=20, doc_stride=10, max_query_length=6, train_filename="train-sample.json", dev_filename="dev-sample.json", test_filename=None, data_dir="samples/qa", label_list=label_list, metric="squad" ) data_silo = DataSilo(processor=processor, batch_size=batch_size) language_model = LanguageModel.load(base_LM_model) prediction_head = QuestionAnsweringHead(layer_dims=[768, len(label_list)]) model = AdaptiveModel( language_model=language_model, prediction_heads=[prediction_head], embeds_dropout_prob=0.1, lm_output_types=["per_token"], device=device, ) model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=2e-5, #optimizer_opts={'name': 'AdamW', 'lr': 2E-05}, n_batches=len(data_silo.loaders["train"]), n_epochs=n_epochs, device=device ) trainer = Trainer( optimizer=optimizer, data_silo=data_silo, epochs=n_epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, device=device ) model = trainer.train(model) save_dir = "testsave/qa" model.save(save_dir) processor.save(save_dir)
def test_s3e_fit(): # small test data language_model = Path("samples/s3e/tiny_fasttext_model") corpus_path = Path("samples/s3e/tiny_corpus.txt") save_dir = Path("testsave/fitted_s3e/") do_lower_case = False batch_size = 2 use_gpu = False # Fit S3E on a corpus set_all_seeds(seed=42) device, n_gpu = initialize_device_settings(use_cuda=use_gpu, use_amp=False) # Create a InferenceProcessor tokenizer = Tokenizer.load(pretrained_model_name_or_path=language_model, do_lower_case=do_lower_case) processor = InferenceProcessor(tokenizer=tokenizer, max_seq_len=128) # Create an AdaptiveModel language_model = LanguageModel.load(language_model) model = AdaptiveModel( language_model=language_model, prediction_heads=[], embeds_dropout_prob=0.1, lm_output_types=[], device=device) model, processor, s3e_stats = fit_s3e_on_corpus(processor=processor, model=model, corpus=corpus_path, n_clusters=3, pca_n_components=30, svd_postprocessing=True, min_token_occurrences=1) # save everything to allow inference without fitting everything again model.save(save_dir) processor.save(save_dir) with open(save_dir / "s3e_stats.pkl", "wb") as f: pickle.dump(s3e_stats, f) # Load model, tokenizer and processor directly into Inferencer inferencer = Inferencer(model=model, processor=processor, task_type="embeddings", gpu=use_gpu, batch_size=batch_size, extraction_strategy="s3e", extraction_layer=-1, s3e_stats=s3e_stats, num_processes=0) # Input basic_texts = [ {"text": "a man is walking on the street."}, {"text": "a woman is walking on the street."}, ] # Get embeddings for input text (you can vary the strategy and layer) result = inferencer.inference_from_dicts(dicts=basic_texts) assert result[0]["context"] == basic_texts[0]["text"] assert result[0]["vec"][0] - 0.00527727306941057 < 1e-6 assert result[0]["vec"][-2] - 0.06285100416478565 < 1e-6
def test_lm_finetuning(caplog): caplog.set_level(logging.CRITICAL) set_all_seeds(seed=42) device, n_gpu = initialize_device_settings(use_cuda=True) n_epochs = 1 batch_size = 5 evaluate_every = 2 lang_model = "bert-base-cased" tokenizer = BertTokenizer.from_pretrained( pretrained_model_name_or_path=lang_model, do_lower_case=False) processor = BertStyleLMProcessor( data_dir="samples/lm_finetuning", train_filename="train-sample.txt", test_filename="test-sample.txt", dev_filename=None, tokenizer=tokenizer, max_seq_len=64, ) data_silo = DataSilo(processor=processor, batch_size=batch_size) language_model = Bert.load(lang_model) lm_prediction_head = BertLMHead.load(lang_model) next_sentence_head = NextSentenceHead.load(lang_model) model = AdaptiveModel( language_model=language_model, prediction_heads=[lm_prediction_head, next_sentence_head], embeds_dropout_prob=0.1, lm_output_types=["per_token", "per_sequence"], device=device, ) optimizer, warmup_linear = initialize_optimizer( model=model, learning_rate=2e-5, warmup_proportion=0.1, n_batches=len(data_silo.loaders["train"]), n_epochs=n_epochs, ) trainer = Trainer( optimizer=optimizer, data_silo=data_silo, epochs=n_epochs, n_gpu=n_gpu, warmup_linear=warmup_linear, evaluate_every=evaluate_every, device=device, ) model = trainer.train(model) save_dir = "testsave/lm_finetuning" model.save(save_dir) processor.save(save_dir)
def convert_to_transformers(): farm_input_dir = Path( "../saved_models/farm-bert-base-german-cased-hatespeech-GermEval18Coarse" ) transformers_output_dir = "../saved_models/bert-base-german-cased-hatespeech-GermEval18Coarse" # # # # load from FARM format model = AdaptiveModel.load(farm_input_dir, device="cpu") processor = Processor.load_from_dir(farm_input_dir) model.connect_heads_with_processor(processor.tasks) # convert to transformers transformer_model = Converter.convert_to_transformers(model)[0] # # Alternative way to convert to transformers: #transformer_model = model.convert_to_transformers()[0] # save it (note: transformers use str instead of Path objects) Path(transformers_output_dir).mkdir(parents=True, exist_ok=True) transformer_model.save_pretrained(transformers_output_dir) processor.tokenizer.save_pretrained(transformers_output_dir) # run predictions (using transformers) nlp = pipeline('sentiment-analysis', model=str(transformers_output_dir), tokenizer=str(transformers_output_dir)) res = nlp("Was ein scheiß Nazi!") pprint.pprint(res)
def load(cls, load_dir, batch_size=4, gpu=False, embedder_only=False): """ Initializes inferencer from directory with saved model. :param load_dir: Directory where the saved model is located. :type load_dir: str :param batch_size: Number of samples computed once per batch :type batch_size: int :param gpu: If GPU shall be used :type gpu: bool :param embedder_only: If true, a faster processor (InferenceProcessor) is loaded. This should only be used for extracting embeddings (no downstream predictions). :type embedder_only: bool :return: An instance of the Inferencer. """ device, n_gpu = initialize_device_settings( use_cuda=gpu, local_rank=-1, fp16=False ) model = AdaptiveModel.load(load_dir, device) if embedder_only: # model.prediction_heads = [] processor = InferenceProcessor.load_from_dir(load_dir) else: processor = Processor.load_from_dir(load_dir) name = os.path.basename(load_dir) return cls(model, processor, batch_size=batch_size, gpu=gpu, name=name)
def convert_from_transformers(): # CASE 1: MODEL # Load model from transformers model hub (-> continue training / compare models / ...) model = AdaptiveModel.convert_from_transformers( "deepset/bert-large-uncased-whole-word-masking-squad2", device="cpu", task_type="question_answering") # ... continue as in the other examples e.g. to fine-tune this QA model on your own data # CASE 2: INFERENCER # Load Inferencer from transformers, incl. model & tokenizer (-> just get predictions) nlp = Inferencer.load( "deepset/bert-large-uncased-whole-word-masking-squad2", task_type="question_answering") # run predictions QA_input = [{ "questions": ["Why is model conversion important?"], "text": "The option to convert models between FARM and transformers gives freedom to the user and let people easily switch between frameworks." }] result = nlp.inference_from_dicts(dicts=QA_input, rest_api_schema=True) pprint.pprint(result) # save it farm_model_dir = Path("../saved_models/bert-english-qa-large") nlp.save(farm_model_dir)
def get_adaptive_model( lm_output_type, prediction_heads, layer_dims, model, device, embeds_dropout_prob, class_weights=None, ): parsed_lm_output_types = lm_output_type.split(",") language_model = LanguageModel.load(model) initialized_heads = [] for head_name in prediction_heads.split(","): initialized_heads.append( PredictionHead.create( prediction_head_name=head_name, layer_dims=layer_dims, class_weights=class_weights, )) model = AdaptiveModel( language_model=language_model, prediction_heads=initialized_heads, embeds_dropout_prob=embeds_dropout_prob, lm_output_types=parsed_lm_output_types, device=device, ) return model
def convert_to_transformers(): farm_model_dir = Path("../saved_models/bert-english-qa-large") # load from FARM format model = AdaptiveModel.load(farm_model_dir, device="cpu") tokenizer = Tokenizer.load(farm_model_dir) # convert to transformers transformer_model = model.convert_to_transformers() # save it (Note: transformers uses strings rather than Path objects) model_dir = "../saved_models/bert-large-uncased-whole-word-masking-squad2" os.makedirs(model_dir, exist_ok=True) transformer_model.save_pretrained(model_dir) tokenizer.save_pretrained(model_dir) # run predictions (using transformers) nlp = pipeline('question-answering', model=model_dir, tokenizer=model_dir) res = nlp({ 'question': 'Why is model conversion important?', 'context': 'The option to convert models between FARM and transformers gives freedom to the user and let people easily switch between frameworks.' }) pprint.pprint(res)
def train_on_split(silo_to_use, n_fold, save_dir): logger.info(f"############ Crossvalidation: Fold {n_fold} ############") # Create an AdaptiveModel # a) which consists of a pretrained language model as a basis language_model = LanguageModel.load(lang_model) # b) and a prediction head on top that is suited for our task => Text classification prediction_head = TextClassificationHead( layer_dims=[ 768, len(processor.tasks["text_classification"]["label_list"]) ], class_weights=data_silo.calculate_class_weights( task_name="text_classification")) model = AdaptiveModel(language_model=language_model, prediction_heads=[prediction_head], embeds_dropout_prob=0.2, lm_output_types=["per_sequence"], device=device) # Create an optimizer model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=0.5e-5, device=device, n_batches=len(silo_to_use.loaders["train"]), n_epochs=n_epochs, use_amp=use_amp) # Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time # Also create an EarlyStopping instance and pass it on to the trainer # An early stopping instance can be used to save the model that performs best on the dev set # according to some metric and stop training when no improvement is happening for some iterations. # NOTE: Using a different save directory for each fold, allows us afterwards to use the # nfolds best models in an ensemble! save_dir += f"-{n_fold}" earlystopping = EarlyStopping( metric="f1_offense", mode= "max", # use the metric from our own metrics function instead of loss save_dir=save_dir, # where to save the best model patience= 5 # number of evaluations to wait for improvement before terminating the training ) trainer = Trainer(optimizer=optimizer, data_silo=silo_to_use, epochs=n_epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, device=device, early_stopping=earlystopping, evaluator_test=False) # train it model = trainer.train(model) return model
def onnx_runtime_example(): """ This example converts a Question Answering FARM AdaptiveModel to ONNX format and uses ONNX Runtime for doing Inference. """ device = "cpu" model_name_or_path = "deepset/bert-base-cased-squad2" onnx_model_export_path = Path("./onnx-export") model = AdaptiveModel.convert_from_transformers(model_name_or_path, device=device, task_type="question_answering") model.convert_to_onnx(onnx_model_export_path) inferencer = Inferencer.load(model_name_or_path=onnx_model_export_path) qa_input = [ { "qas": ["Who counted the game among the best ever made?"], "context": "Twilight Princess was released to universal critical acclaim and commercial success. " "It received perfect scores from major publications such as 1UP.com, Computer and Video Games, " "Electronic Gaming Monthly, Game Informer, GamesRadar, and GameSpy. On the review aggregators " "GameRankings and Metacritic, Twilight Princess has average scores of 95% and 95 for the Wii " "version and scores of 95% and 96 for the GameCube version. GameTrailers in their review called " "it one of the greatest games ever created.", } ] results = inferencer.inference_from_dicts(qa_input) print(results)
def __init__(self, load_dir, batch_size=4, gpu=False): """ Initializes inferencer from directory with saved model. :param load_dir: Directory containing a saved AdaptiveModel :type load_dir str :param batch_size: Number of samples computed once per batch :type batch_size: int :param gpu: If GPU shall be used :type gpu: bool """ # Init device and distributed settings device, n_gpu = initialize_device_settings( use_cuda=gpu, local_rank=-1, fp16=False ) self.processor = Processor.load_from_dir(load_dir) self.model = AdaptiveModel.load(load_dir, device) self.model.eval() self.batch_size = batch_size self.device = device self.language = self.model.language_model.language # TODO adjust for multiple prediction heads if len(self.model.prediction_heads) == 1: self.prediction_type = self.model.prediction_heads[0].model_type self.label_map = self.processor.label_maps[0] elif len(self.model.prediction_heads) == 0: self.prediction_type = "embedder" self.name = os.path.basename(load_dir) set_all_seeds(42, n_gpu)
def convert_from_transformers(): transformers_input_name = "deepset/bert-base-german-cased-hatespeech-GermEval18Coarse" farm_output_dir = Path( "../saved_models/farm-bert-base-german-cased-hatespeech-GermEval18Coarse" ) # # CASE 1: MODEL # # Load model from transformers model hub (-> continue training / compare models / ...) model = AdaptiveModel.convert_from_transformers( transformers_input_name, device="cpu", task_type="text_classification") # # ... continue as in the other examples e.g. to fine-tune this QA model on your own data # # # CASE 2: INFERENCER # # Load Inferencer from transformers, incl. model & tokenizer (-> just get predictions) nlp = Inferencer.load(transformers_input_name, task_type="text_classification") # # # run predictions result = nlp.inference_from_dicts(dicts=[{ "text": "Was ein scheiß Nazi!" }], rest_api_schema=True) pprint.pprint(result) # save it nlp.save(farm_output_dir)
def test_prediction_head_load_save_class_weights(tmp_path, caplog=None): """This is a regression test for #428 and #422.""" if caplog: caplog.set_level(logging.CRITICAL) set_all_seeds(seed=42) device, n_gpu = initialize_device_settings(use_cuda=False) batch_size = 1 lang_model = "bert-base-german-cased" data_dir_path = "samples/doc_class" tokenizer = Tokenizer.load( pretrained_model_name_or_path=lang_model, do_lower_case=False) tcp_params = dict(tokenizer=tokenizer, max_seq_len=8, data_dir=Path(data_dir_path), train_filename="train-sample.tsv", label_list=["OTHER", "OFFENSE"], metric="f1_macro", dev_filename="test-sample.tsv", test_filename=None, dev_split=0.0, label_column_name="coarse_label") processor = TextClassificationProcessor(**tcp_params) data_silo = DataSilo( processor=processor, batch_size=batch_size) language_model = LanguageModel.load(lang_model) prediction_head = TextClassificationHead( num_labels=2, class_weights=data_silo.calculate_class_weights(task_name="text_classification")) model = AdaptiveModel( language_model=language_model, prediction_heads=[prediction_head], embeds_dropout_prob=0.1, lm_output_types=["per_sequence"], device=device) model.save(tmp_path) model_loaded = AdaptiveModel.load(tmp_path, device='cpu') assert model_loaded is not None
def evaluate_question_answering(): ########################## ########## Settings ########################## device, n_gpu = initialize_device_settings(use_cuda=True) lang_model = "deepset/roberta-base-squad2" do_lower_case = True data_dir = Path("../data/squad20") evaluation_filename = "dev-v2.0.json" batch_size = 50 no_ans_boost = 0 accuracy_at = 3 # accuracy at n is useful for answers inside long documents # 1.Create a tokenizer tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model, do_lower_case=do_lower_case) # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset processor = SquadProcessor( tokenizer=tokenizer, max_seq_len=256, label_list=["start_token", "end_token"], metric="squad", train_filename=None, dev_filename=None, dev_split=0, test_filename=evaluation_filename, data_dir=data_dir, doc_stride=128, ) # 3. Create a DataSilo that loads dataset, provides DataLoaders for them and calculates a few descriptive statistics of our datasets data_silo = DataSilo(processor=processor, batch_size=batch_size) # 4. Create an Evaluator evaluator = Evaluator(data_loader=data_silo.get_data_loader("test"), tasks=data_silo.processor.tasks, device=device) # 5. Load model model = AdaptiveModel.convert_from_transformers( lang_model, device=device, task_type="question_answering") # use "load" if you want to use a local model that was trained with FARM #model = AdaptiveModel.load(lang_model, device=device) model.prediction_heads[0].no_ans_boost = no_ans_boost model.prediction_heads[0].n_best = accuracy_at model.connect_heads_with_processor(data_silo.processor.tasks, require_labels=True) # 6. Run the Evaluator results = evaluator.eval(model) f1_score = results[0]["f1"] em_score = results[0]["EM"] tnacc = results[0]["top_n_accuracy"] print("F1-Score:", f1_score) print("Exact Match Score:", em_score) print(f"top_{accuracy_at}_accuracy:", tnacc)
def distilbert_nq(caplog=None): if caplog: caplog.set_level(logging.CRITICAL) set_all_seeds(seed=42) device, n_gpu = initialize_device_settings(use_cuda=False) batch_size = 2 n_epochs = 1 evaluate_every = 4 base_LM_model = "distilbert-base-uncased" tokenizer = Tokenizer.load( pretrained_model_name_or_path=base_LM_model, do_lower_case=True ) processor = NaturalQuestionsProcessor( tokenizer=tokenizer, max_seq_len=20, doc_stride=10, max_query_length=6, train_filename="train_sample.jsonl", dev_filename="dev_sample.jsonl", data_dir=Path("samples/nq") ) data_silo = DataSilo(processor=processor, batch_size=batch_size, max_processes=1) language_model = LanguageModel.load(base_LM_model) qa_head = QuestionAnsweringHead() classification_head = TextClassificationHead(num_labels=len(processor.answer_type_list)) model = AdaptiveModel( language_model=language_model, prediction_heads=[qa_head, classification_head], embeds_dropout_prob=0.1, lm_output_types=["per_token", "per_sequence"], device=device, ) model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=2e-5, #optimizer_opts={'name': 'AdamW', 'lr': 2E-05}, n_batches=len(data_silo.loaders["train"]), n_epochs=n_epochs, device=device ) trainer = Trainer( model=model, optimizer=optimizer, data_silo=data_silo, epochs=n_epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, device=device ) trainer.train() return model, processor
def distilbert_squad(request): set_all_seeds(seed=42) device, n_gpu = initialize_device_settings(use_cuda=False) batch_size = 2 n_epochs = 1 evaluate_every = 4 base_LM_model = "distilbert-base-uncased" tokenizer = Tokenizer.load( pretrained_model_name_or_path=base_LM_model, do_lower_case=True, use_fast=True # TODO parametrize this to test slow as well ) label_list = ["start_token", "end_token"] processor = SquadProcessor(tokenizer=tokenizer, max_seq_len=20, doc_stride=10, max_query_length=6, train_filename="train-sample.json", dev_filename="dev-sample.json", test_filename=None, data_dir=Path("samples/qa"), label_list=label_list, metric="squad") data_silo = DataSilo(processor=processor, batch_size=batch_size, max_processes=1) language_model = LanguageModel.load(base_LM_model) prediction_head = QuestionAnsweringHead() model = AdaptiveModel( language_model=language_model, prediction_heads=[prediction_head], embeds_dropout_prob=0.1, lm_output_types=["per_token"], device=device, ) model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=2e-5, #optimizer_opts={'name': 'AdamW', 'lr': 2E-05}, n_batches=len(data_silo.loaders["train"]), n_epochs=n_epochs, device=device) trainer = Trainer(model=model, optimizer=optimizer, data_silo=data_silo, epochs=n_epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, device=device) trainer.train() return model, processor
def evaluate_classification(): ########################## ########## Settings ########################## device, n_gpu = initialize_device_settings(use_cuda=True) lang_model = "deepset/bert-base-german-cased-sentiment-Germeval17" do_lower_case = False batch_size = 100 data_dir = Path("../data/germeval17") evaluation_filename = "test_TIMESTAMP1.tsv" label_list = ["negative", "neutral", "positive"] metric = "f1_macro" # 1.Create a tokenizer tokenizer = Tokenizer.load( pretrained_model_name_or_path=lang_model, do_lower_case=do_lower_case) # 2. Create a DataProcessor that handles all the conversion from raw text into a pytorch Dataset # Here we load GermEval 2017 Data automaticaly if it is not available. processor = TextClassificationProcessor( tokenizer=tokenizer, max_seq_len=384, label_list=label_list, metric=metric, train_filename=None, dev_filename=None, dev_split=0, test_filename=evaluation_filename, data_dir=data_dir, ) # 3. Create a DataSilo that loads dataset, provides DataLoaders for them and calculates a few descriptive statistics of our datasets data_silo = DataSilo( processor=processor, batch_size=batch_size) # 4. Create an Evaluator evaluator = Evaluator( data_loader=data_silo.get_data_loader("test"), tasks=data_silo.processor.tasks, device=device ) # 5. Load model model = AdaptiveModel.convert_from_transformers(lang_model, device=device, task_type="text_classification") # use "load" if you want to use a local model that was trained with FARM # model = AdaptiveModel.load(lang_model, device=device) model.connect_heads_with_processor(data_silo.processor.tasks, require_labels=True) # 6. Run the Evaluator results = evaluator.eval(model) f1_score = results[0]["f1_macro"] print("Macro-averaged F1-Score:", f1_score)
def train_on_split(silo_to_use, n_fold): logger.info( f"############ Crossvalidation: Fold {n_fold} ############") # fine-tune pre-trained question-answering model model = AdaptiveModel.convert_from_transformers( lang_model, device=device, task_type="question_answering") model.connect_heads_with_processor(data_silo.processor.tasks, require_labels=True) # If positive, thjs will boost "No Answer" as prediction. # If negative, this will prevent the model from giving "No Answer" as prediction. model.prediction_heads[0].no_ans_boost = no_ans_boost # Number of predictions the model will make per Question. # The multiple predictions are used for evaluating top n recall. model.prediction_heads[0].n_best = accuracy_at # # or train question-answering models from scratch # # Create an AdaptiveModel # # a) which consists of a pretrained language model as a basis # language_model = LanguageModel.load(lang_model) # # b) and a prediction head on top that is suited for our task => Question-answering # prediction_head = QuestionAnsweringHead(no_ans_boost=no_ans_boost, n_best=accuracy_at) # model = AdaptiveModel( # language_model=language_model, # prediction_heads=[prediction_head], # embeds_dropout_prob=0.1, # lm_output_types=["per_token"], # device=device,) # Create an optimizer model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=learning_rate, device=device, n_batches=len(silo_to_use.loaders["train"]), n_epochs=n_epochs, use_amp=use_amp) # Feed everything to the Trainer, which keeps care of growing our model into powerful plant and evaluates it from time to time # Also create an EarlyStopping instance and pass it on to the trainer trainer = Trainer(model=model, optimizer=optimizer, data_silo=silo_to_use, epochs=n_epochs, n_gpu=n_gpu, lr_schedule=lr_schedule, evaluate_every=evaluate_every, device=device, evaluator_test=False) # train it trainer.train() return trainer.model
def embedding_extraction(): logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO) ########################## ########## Settings ########################## set_all_seeds(seed=42) # load from a local path: #lang_model = Path("../saved_models/glove-german-uncased") # or through s3 lang_model = "glove-german-uncased" #only glove or word2vec or converted fasttext (fixed vocab) embeddings supported do_lower_case = True use_amp = None device, n_gpu = initialize_device_settings(use_cuda=True, use_amp=use_amp) # Create a InferenceProcessor tokenizer = Tokenizer.load(pretrained_model_name_or_path=lang_model, do_lower_case=do_lower_case) processor = InferenceProcessor(tokenizer=tokenizer, max_seq_len=128) # Create an AdaptiveModel language_model = LanguageModel.load(lang_model) model = AdaptiveModel(language_model=language_model, prediction_heads=[], embeds_dropout_prob=0.1, lm_output_types=["per_sequence"], device=device) # Create Inferencer for embedding extraction inferencer = Inferencer(model=model, processor=processor, task_type="embeddings") # Extract vectors basic_texts = [ { "text": "Schartau sagte dem Tagesspiegel, dass Fischer ein Idiot sei" }, { "text": "Martin Müller spielt Handball in Berlin" }, ] result = inferencer.extract_vectors(dicts=basic_texts, extraction_strategy="cls_token", extraction_layer=-1) print(result) inferencer.close_multiprocessing_pool()
def test_multiple_prediction_heads(): model = "bert-base-german-cased" lm = LanguageModel.load(model) ph1 = TextClassificationHead(num_labels=3, label_list=["negative", "neutral", "positive"]) ph2 = TokenClassificationHead(num_labels=3, label_list=["PER", "LOC", "ORG"]) adaptive_model = AdaptiveModel(language_model=lm, prediction_heads=[ph1, ph2], embeds_dropout_prob=0.1, lm_output_types="per_token", device="cpu") transformer_models = Converter.convert_to_transformers(adaptive_model) assert isinstance(transformer_models[0], BertForSequenceClassification) assert isinstance(transformer_models[1], BertForTokenClassification) del lm del transformer_models del adaptive_model
def test_conversion_inferencer_qa(): # input question = "Why is model conversion important?" text = "The option to convert models between FARM and transformers gives freedom to the user and let people easily switch between frameworks." # Load from model hub model = "deepset/bert-base-cased-squad2" nlp = Inferencer.load(model, task_type="question_answering", num_processes=0) assert nlp.processor.tokenizer.do_lower_case == False assert nlp.processor.tokenizer.is_fast == True QA_input = [{"questions": [question], "text": text}] result_farm = nlp.inference_from_dicts(dicts=QA_input) answer_farm = result_farm[0]["predictions"][0]["answers"][0]["answer"] assert answer_farm == 'gives freedom to the user' # save it farm_model_dir = Path("testsave/bert-conversion-test") nlp.save(farm_model_dir) # free RAM del nlp # load from disk in FARM format model = AdaptiveModel.load(farm_model_dir, device="cpu") tokenizer = Tokenizer.load(farm_model_dir) # convert to transformers transformer_model = Converter.convert_to_transformers(model)[0] # free RAM del model # save it (Note: transformers uses strings rather than Path objects) model_dir = "testsave/bert-conversion-test-hf" os.makedirs(model_dir, exist_ok=True) transformer_model.save_pretrained(model_dir) tokenizer.save_pretrained(model_dir) del transformer_model del tokenizer # run predictions (using transformers) nlp = pipeline('question-answering', model=model_dir, tokenizer=model_dir) result_transformers = nlp({ 'question': question, 'context': text }) answer_transformers = result_transformers["answer"] assert answer_farm == answer_transformers del nlp
def test_conversion_adaptive_model(caplog): if caplog: caplog.set_level(logging.CRITICAL) model = AdaptiveModel.convert_from_transformers( "deepset/bert-base-cased-squad2", device="cpu", task_type="question_answering") transformer_model = model.convert_to_transformers() transformer_model2 = AutoModelForQuestionAnswering.from_pretrained( "deepset/bert-base-cased-squad2") # compare weights for p1, p2 in zip(transformer_model.parameters(), transformer_model2.parameters()): assert (p1.data.ne(p2.data).sum() == 0)
def onnx_adaptive_model_qa(use_gpu, num_processes): model_name_or_path = "deepset/bert-base-cased-squad2" onnx_model_export_path = Path("benchmarks/onnx-export") if not (onnx_model_export_path / "model.onnx").is_file(): model = AdaptiveModel.convert_from_transformers( model_name_or_path, device="cpu", task_type="question_answering") model.convert_to_onnx(onnx_model_export_path) model = Inferencer.load(onnx_model_export_path, task_type="question_answering", batch_size=1, num_processes=num_processes, gpu=use_gpu) return model
def __init__(self, load_dir, batch_size=4, gpu=False): # Init device and distributed settings device, n_gpu = initialize_device_settings(use_cuda=gpu, local_rank=-1, fp16=False) self.processor = Processor.load_from_dir(load_dir) self.model = AdaptiveModel.load(load_dir, device) self.model.eval() self.batch_size = batch_size self.device = device self.language = self.model.language_model.language # TODO adjust for multiple prediction heads self.prediction_type = self.model.prediction_heads[0].model_type self.name = os.path.basename(load_dir) self.label_map = self.processor.label_maps[0] set_all_seeds(42, 1)
def test_conversion_inferencer_ner(): # input text = "Paris is a town in France." # Load from model hub model = "dslim/bert-base-NER" nlp = Inferencer.load(model, task_type="ner", num_processes=0) assert nlp.processor.tokenizer.do_lower_case == False assert nlp.processor.tokenizer.is_fast == True input = [{"text": text}] result_farm = nlp.inference_from_dicts(dicts=input) pred_farm = result_farm[0]["predictions"] assert pred_farm[0][0]["label"] == 'LOC' assert pred_farm[0][1]["label"] == 'LOC' assert len(pred_farm[0]) == 2 # save it farm_model_dir = Path("testsave/bert-conversion-test-hf") nlp.save(farm_model_dir) del nlp # load from disk in FARM format model = AdaptiveModel.load(farm_model_dir, device="cpu") tokenizer = Tokenizer.load(farm_model_dir) # convert to transformers transformer_model = Converter.convert_to_transformers(model)[0] del model # save it (Note: transformers uses strings rather than Path objects) model_dir = "testsave/bert-conversion-test-hf" os.makedirs(model_dir, exist_ok=True) transformer_model.save_pretrained(model_dir) tokenizer.save_pretrained(model_dir) del transformer_model del tokenizer # run predictions (using transformers) nlp = pipeline('ner', model=model_dir, tokenizer=model_dir) result_transformers = nlp(text) assert result_transformers[0]["entity"] == 'B-LOC' assert result_transformers[1]["entity"] == 'B-LOC' assert len(result_transformers) == 2 del nlp
def load( cls, load_dir, batch_size=4, gpu=False, embedder_only=False, return_class_probs=False, strict=True ): """ Initializes Inferencer from directory with saved model. :param load_dir: Directory where the saved model is located. :type load_dir: str :param batch_size: Number of samples computed once per batch :type batch_size: int :param gpu: If GPU shall be used :type gpu: bool :param embedder_only: If true, a faster processor (InferenceProcessor) is loaded. This should only be used for extracting embeddings (no downstream predictions). :type embedder_only: bool :param strict: whether to strictly enforce that the keys loaded from saved model match the ones in the PredictionHead (see torch.nn.module.load_state_dict()). Set to `False` for backwards compatibility with PHs saved with older version of FARM. :type strict: bool :return: An instance of the Inferencer. """ device, n_gpu = initialize_device_settings(use_cuda=gpu, local_rank=-1, fp16=False) model = AdaptiveModel.load(load_dir, device, strict=strict) if embedder_only: # model.prediction_heads = [] processor = InferenceProcessor.load_from_dir(load_dir) else: processor = Processor.load_from_dir(load_dir) name = os.path.basename(load_dir) return cls( model, processor, batch_size=batch_size, gpu=gpu, name=name, return_class_probs=return_class_probs, )