def __init__( self, model_name, pipeline_name, model_path=None, device="cpu", quantization=False, opset=11, force=False, **convert_kwargs ): if model_path == None: model_path = f"onnx/{model_name}.onnx" model_path = Path(model_path) if not model_path.is_file() or force: convert( framework="pt", model=model_name, output=model_path, opset=opset, pipeline_name=pipeline_name, **convert_kwargs, ) if quantization: model_path = optimize(model_path) model_path = quantize(model_path) self.model_path = str(model_path) self.provider = "CPUExecutionProvider" if device == "cpu" else "CUDAExecutionProvider" self.session = self.create_model_for_provider() self.config = AutoConfig.from_pretrained(model_name)
def _test_export(self, model, framework, opset): try: # Compute path path = "onnx" + sep + model + ".onnx" # Remove folder if exists if exists(dirname(path)): rmtree(dirname(path)) # Export convert(framework, model, path, opset) except Exception as e: self.fail(e)
def _test_export(self, model, framework, opset, tokenizer=None): try: # Compute path with TemporaryDirectory() as tempdir: path = tempdir + "/model.onnx" # Remove folder if exists if exists(dirname(path)): rmtree(dirname(path)) # Export convert(framework, model, path, opset, tokenizer) except Exception as e: self.fail(e)
def _test_export(self, model, framework, opset, tokenizer=None): try: # Compute path with TemporaryDirectory() as tempdir: path = Path(tempdir).joinpath("model.onnx") # Remove folder if exists if path.parent.exists(): path.parent.rmdir() # Export convert(framework, model, path, opset, tokenizer) return path except Exception as e: self.fail(e)
def convert_to_onnx(self, onnx_output_dir=None, set_onnx_arg=True): """Convert the model to ONNX format and save to output_dir Args: onnx_output_dir (str, optional): If specified, ONNX model will be saved to output_dir (else args.output_dir will be used). Defaults to None. set_onnx_arg (bool, optional): Updates the model args to set onnx=True. Defaults to True. """ # noqa if not onnx_output_dir: onnx_output_dir = os.path.join(self.options.output_dir, self.options.model_type, self.options.model_name, "onnx") os.makedirs(onnx_output_dir, exist_ok=True) if not os.listdir(onnx_output_dir): onnx_model_name = os.path.join(onnx_output_dir, "onnx_model.onnx") with tempfile.TemporaryDirectory() as temp_dir: basedir = os.path.basename(temp_dir) temp_dir = os.path.join(self.options.output_dir, basedir) self.save_model(output_dir=temp_dir, model=self.model) convert( framework="pt", model=temp_dir, tokenizer=self.tokenizer, output=Path(onnx_model_name), pipeline_name="ner", opset=11, ) self.tokenizer.save_pretrained(onnx_output_dir) self.config.save_pretrained(onnx_output_dir) onnx_options = SessionOptions() use_cuda = True if self._device.type != 'cpu' else False onnx_execution_provider = "CUDAExecutionProvider" if use_cuda else "CPUExecutionProvider" onnx_options.intra_op_num_threads = 1 onnx_options.execution_mode = ExecutionMode.ORT_SEQUENTIAL onnx_model_path = os.path.join(onnx_output_dir, "onnx_model.onnx") if self.options.dynamic_quantize: # Append "-quantized" at the end of the model's name quantized_model_path = generate_identified_filename( Path(onnx_model_path), "-quantized") quantize_dynamic(Path(onnx_model_path), quantized_model_path) onnx_model_path = quantized_model_path.as_posix() return InferenceSession(onnx_model_path, onnx_options, providers=[onnx_execution_provider])
def _save_impl(self, path, export_type=None): path = Path(path) self._save_gpu(path) if export_type == 'cpu': with TemporaryDirectory() as temp_dir: temp_model_path = Path(temp_dir) / 'temp.onnx' convert(framework='pt', model=str(path), output=temp_model_path, pipeline_name='sentiment-analysis', opset=11) optimized_path = optimize(temp_model_path) quantized_path = quantize(optimized_path) target_path = BertCpuClassifier.onnx_model_path(path) with open(quantized_path, 'rb') as src, gzip.open(target_path, 'wb') as dst: shutil.copyfileobj(src, dst) os.remove(path / 'pytorch_model.bin')
def export(): shutil.rmtree("onnx", ignore_errors=1) model = Classification.from_pretrained("model") model.base_model.save_pretrained("./bertBase") convert( framework="pt", model="bertBase", # CHANGED: refer to custom model tokenizer=get_tokenizer(), # <-- CHANGED: add tokenizer output=Path("onnx/bert-base-cased.onnx"), opset=12, ) # # Mixed precision conversion for bert-base-cased model converted from Pytorch optimized_model = optimizer.optimize_model( "onnx/bert-base-cased.onnx", # CHANGED: original `bert-base-cased.onnx` didn't point to right directory model_type="bert", num_heads=12, hidden_size=768, ) optimized_model.convert_model_float32_to_float16() optimized_model.save_model_to_file("onnx/bert-base-cased.onnx")
def get_best_model(self, metric, app_key, app_name=None): """Get best model and scripting the model, (metric='token-f1')""" # Get best model by validation metrics eval_history = self.history if metric in {'token-f1'}: bestscore = np.max([m[f'eval_{metric}'] for m in eval_history]) best_step = eval_history[np.argmax( [m[f'eval_{metric}'] for m in eval_history])]['step'] elif metric in {'loss'}: bestscore = np.min([m[f'eval_{metric}'] for m in eval_history]) best_step = eval_history[np.argmin( [m[f'eval_{metric}'] for m in eval_history])]['step'] else: raise ValueError(f'Use Another metric {metric} is not supported.') best_dir = os.path.join(self.args.output_dir, f'checkpoint-{best_step}') print(f'my best model {metric} is {bestscore} at step {best_step}') # onnx export train_time = app_key.split('/')[0] target_dir = f"{app_name}/{app_key},{metric}={bestscore:.4f}/SentimentExtractor-{train_time}" os.makedirs(target_dir, exist_ok=True) convert(framework='pt', model=best_dir, output=f'{target_dir}/model.onnx', opset=11, tokenizer=self.args.tokenizer, pipeline_name='ner') df = pd.DataFrame(eval_history) df.to_csv(f"{target_dir}/history.csv", index=False) # 마지막에 체크포인트 제거 shutil.rmtree(self.args.output_dir) return df
from pathlib import Path import tempfile from transformers.convert_graph_to_onnx import convert, quantize dest = Path(tempfile.mkdtemp(), "sentiment-analysis.onnx") convert(pipeline_name="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", output=dest, framework="pt", opset=11) quantize(dest)
def convert_to_onnx(cls, model_name, output_path, task_type, convert_to_float16=False, quantize=False, opset_version=11): """ Convert a PyTorch model from transformers hub to an ONNX Model. :param model_name: transformers model name :type model_name: str :param output_path: output Path to write the converted to :type output_path: Path :param task_type: Type of task for the model. Available options: "embeddings", "question_answering", "text_classification", "ner". :param convert_to_float16: By default, the model use float32 precision. With half precision of flaot16, inference should be faster on Nvidia GPUs with Tensor core like T4 or V100. On older GPUs, float32 might be more performant. :type convert_to_float16: bool :param quantize: convert floating point number to integers :type quantize: bool :param opset_version: ONNX opset version :type opset_version: int :return: """ language_model_class = LanguageModel.get_language_model_class( model_name) if language_model_class not in ["Bert", "Roberta", "XLMRoberta"]: raise Exception( "The current ONNX conversion only support 'BERT', 'RoBERTa', and 'XLMRoberta' models." ) task_type_to_pipeline_map = { "question_answering": "question-answering", "embeddings": "feature-extraction", "ner": "ner" } convert(pipeline_name=task_type_to_pipeline_map[task_type], framework="pt", model=model_name, output=output_path / "model.onnx", opset=opset_version, use_external_format=True if language_model_class is "XLMRoberta" else False) # save processor & model config files that are needed when loading the model with the FARM Inferencer processor = Processor.convert_from_transformers( tokenizer_name_or_path=model_name, task_type=task_type, max_seq_len=256, doc_stride=128, use_fast=True) processor.save(output_path) model = AdaptiveModel.convert_from_transformers(model_name, device="cpu", task_type=task_type) model.save(output_path) os.remove( output_path / "language_model.bin" ) # remove the actual PyTorch model(only configs are required) onnx_model_config = { "task_type": task_type, "onnx_opset_version": opset_version, "language_model_class": language_model_class, "language": model.language_model.language } with open(output_path / "onnx_model_config.json", "w") as f: json.dump(onnx_model_config, f) if convert_to_float16: from onnxruntime_tools import optimizer config = AutoConfig.from_pretrained(model_name) optimized_model = optimizer.optimize_model( input=str(output_path / "model.onnx"), model_type='bert', num_heads=config.num_hidden_layers, hidden_size=config.hidden_size) optimized_model.convert_model_float32_to_float16() optimized_model.save_model_to_file("model.onnx") if quantize: quantize_model(output_path / "model.onnx")
from pathlib import Path import tempfile from transformers.convert_graph_to_onnx import convert, quantize dest = Path(tempfile.mkdtemp(), "ner.onnx") convert(pipeline_name="ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", output=dest, framework="pt", opset=11) quantize(dest)
for initializer in model.graph.initializer: if initializer.name in name_to_input: inputs.remove(name_to_input[initializer.name]) onnx.save(model, output) if __name__ == '__main__': args = get_args_from_command_line() #text = "tick tock tick" #convert_bert_to_onnx('tick tock', args.model_dir, args.onnx_model_path) #remove_initializer_from_input(args.onnx_model_path, args.onnx_model_path) convert(framework="pt", model=args.model_dir, tokenizer="DeepPavlov/bert-base-cased-conversational", output=args.onnx_model_path, opset=11) # ONNX optimization optimized_model = optimizer.optimize_model(args.onnx_model_path, model_type='bert', num_heads=12, hidden_size=768) optimized_onnx_model_path = os.path.join( os.path.dirname(args.onnx_model_path), 'bert_optimized.onnx') optimized_model.save_model_to_file(optimized_onnx_model_path) print('Optimized model saved at :', optimized_onnx_model_path) # ONNX quantization model = onnx.load(optimized_onnx_model_path) quantized_model = quantize(model,
from pathlib import Path import tempfile from transformers.convert_graph_to_onnx import convert, quantize dest = Path(tempfile.mkdtemp(), "fill-mask.onnx") convert( pipeline_name="fill-mask", model="distilroberta-base", output=dest, framework="pt", opset=11 ) print(dest)
from pathlib import Path import tempfile from transformers.convert_graph_to_onnx import convert, quantize dest = Path(tempfile.mkdtemp(), "feature-extraction.onnx") convert(pipeline_name="feature-extraction", model="distilbert-base-cased", output=dest, framework="pt", opset=11) quantize(dest)
from pathlib import Path import tempfile from transformers.convert_graph_to_onnx import convert, quantize # requires: # transformers==4.0.0 # torch==1.7.1 dest = Path(tempfile.mkdtemp(), "text-generation.onnx") convert(pipeline_name="text-generation", model="gpt2", output=dest, framework="pt", opset=11) print(dest)
def convert_to_onnx(model_path, model_name, output_path): output_path = pathlib.Path(output_path) convert(framework="pt", model=model_path, tokenizer=model_name, output=output_path, opset=11) if model_name in ('gpt2',): # gpt2-medium not supported yet optimized_model = optimizer.optimize_model(output_path, model_type=model_name) optimized_model.save_model_to_file(output_path)
from pathlib import Path from transformers.convert_graph_to_onnx import convert import shutil # checking whether folder exists or not # try: # shutil.rmtree("onnx") # except OSError as e: # print("Error: %s" % e.strerror) if __name__ == "__main__": name_model = "bert-base-cased" name_model = "gpt2" name_model = "vinai/phobert-base" convert(framework="pt", model=name_model, output=Path("onnx/" + name_model + ".onnx"), opset=11)
args.country_code][f'iter{str(args.iteration_number)}'][label], label, 'models', 'best_model') onnx_path = os.path.join(model_path, 'onnx') try: shutil.rmtree( onnx_path ) # deleting onxx folder and contents, if exists, conversion excepts except: logger.info('no existing folder, creating one') os.makedirs(onnx_path) logger.info('>> converting..') convert(framework="pt", model=model_path, tokenizer=convert_model_path_to_model_name(model_path), output=Path(os.path.join(onnx_path, 'converted.onnx')), opset=11, pipeline_name='sentiment-analysis') logger.info('>> ONNX optimization') optimized_output = optimize(Path(os.path.join(onnx_path, 'converted.onnx'))) logger.info('>> Quantization') quantized_output = quantize(optimized_output) logger.info('>> Verification') verify(Path(os.path.join(onnx_path, 'converted.onnx'))) verify(optimized_output) verify(quantized_output)
label)) model_path = args.model_path.format(label) onnx_path = model_path + '/onnx/'.format(label) try: shutil.rmtree( onnx_path ) # deleting onxx folder and contents, if exists, conversion excepts except: print('no existing folder, creating one') os.makedirs(onnx_path) print('>> converting..') convert(framework="pt", model=model_path, tokenizer=args.model_name, output=onnx_path + 'converted.onnx', opset=11) print('>> optimizing..') # ONNX optimization optimized_model = optimizer.optimize_model(onnx_path + '/converted.onnx', model_type=args.model_type, num_heads=12, hidden_size=768) optimized_onnx_model_path = os.path.join(onnx_path, 'bert_optimized.onnx') optimized_model.save_model_to_file(optimized_onnx_model_path) print('Optimized model saved at :', optimized_onnx_model_path) print('>> quantizing..')
from pathlib import Path import tempfile from transformers.convert_graph_to_onnx import convert, quantize dest = Path(tempfile.mkdtemp(), "question-answering.onnx") convert(pipeline_name="question-answering", model="distilbert-base-cased-distilled-squad", output=dest, framework="pt", opset=11) quantize(dest)
# Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # Reference: https://github.com/huggingface/transformers/blob/master/notebooks/04-onnx-export.ipynb # nnfusion codegen flags: nnfusion /path/to/pt-bert-base-cased.onnx -f onnx -p 'batch:3;sequence:512' from pathlib import Path from transformers.convert_graph_to_onnx import convert convert(framework="pt", model="bert-base-cased", output=Path("onnx/pt-bert-base-cased.onnx"), opset=11)