def _get_config_with_export_list( self, task_class: Type[NewTask], model_class: Type[Model], test_file_metadata: TestFileMetadata, ) -> PyTextConfig: return PyTextConfig( task=task_class.Config( data=Data.Config( source=TSVDataSource.Config( train_filename=test_file_metadata.filename, eval_filename=test_file_metadata.filename, test_filename=test_file_metadata.filename, field_names=test_file_metadata.field_names, ), batcher=PoolingBatcher.Config(train_batch_size=1, test_batch_size=1), ), trainer=TaskTrainer.Config(epochs=1), model=model_class.Config( inputs=type(model_class.Config.inputs)( dense=FloatListTensorizer.Config( column=test_file_metadata.dense_col_name, error_check=True, dim=test_file_metadata.dense_feat_dim, ))), ), use_tensorboard=False, use_cuda_if_available=False, export=ExportConfig( export_torchscript_path="/tmp/model_torchscript.pt"), version=LATEST_VERSION, )
def testEmptyBatchPaddingConfigThrowsException(self): empty_export_config = ExportConfig(seq_padding_control=[0, 10, 20]) model = DummyModel(max_seq_len=22, embedding_dim=10) script_func = torch.jit.script(model) with self.assertRaises(RuntimeError): accelerator_transformerLayers_inputs(model, script_func, empty_export_config, None, "")
def torchscript_export(context, export_json, model, output_path, quantize): """Convert a pytext model snapshot to a torchscript model.""" export_config = ExportConfig() # only populate from export_json if no export option is configured from the command line. if export_json: export_json_config = _load_and_validate_export_json_config(export_json) read_chunk_size = export_json_config.pop("read_chunk_size", None) if read_chunk_size is not None: print("Warning: Ignoring read_chunk_size.") if export_json_config.get("read_chunk_size", None) is not None: print( "Error: Do not know what to do with read_chunk_size. Ignoring." ) if "export_list" not in export_json_config.keys(): export_section_config_list = [export_json_config["export"]] else: export_section_config_list = export_json_config["export_list"] for export_section_config in export_section_config_list: if not quantize and not output_path: export_config.export_caffe2_path = export_section_config.get( "export_caffe2_path", None) export_config.export_onnx_path = export_section_config.get( "export_onnx_path", "/tmp/model.onnx") export_config.torchscript_quantize = export_section_config.get( "torchscript_quantize", False) else: print( "the export-json config is ignored because export options are found the command line" ) export_config.torchscript_quantize = quantize export_config.export_torchscript_path = export_section_config.get( "export_torchscript_path", None) # if config has export_torchscript_path, use export_torchscript_path from config, otherwise keep the default from CLI if export_config.export_torchscript_path is not None: output_path = export_config.export_torchscript_path export_config.export_lite_path = export_section_config.get( "export_lite_path", None) export_config.inference_interface = export_section_config.get( "inference_interface", None) export_config.accelerate = export_section_config.get( "accelerate", []) export_config.seq_padding_control = export_section_config.get( "seq_padding_control", None) export_config.batch_padding_control = export_section_config.get( "batch_padding_control", None) if not model or not output_path: config = context.obj.load_config() model = model or config.save_snapshot_path output_path = output_path or f"{config.save_snapshot_path}.torchscript" print(f"Exporting {model} to torchscript file: {output_path}") export_saved_model_to_torchscript(model, output_path, export_config)
def torchscript_export(context, export_json, model, output_path, quantize, target): """Convert a pytext model snapshot to a torchscript model.""" export_cfg = ExportConfig() # only populate from export_json if no export option is configured from the command line. if export_json: export_json_config = _load_and_validate_export_json_config(export_json) read_chunk_size = export_json_config.pop("read_chunk_size", None) if read_chunk_size is not None: print("Warning: Ignoring read_chunk_size.") if export_json_config.get("read_chunk_size", None) is not None: print( "Error: Do not know what to do with read_chunk_size. Ignoring." ) if "export" in export_json_config.keys(): export_cfgs = [export_json_config["export"]] else: export_cfgs = export_json_config["export_list"] if target: print( "A single export was specified in the command line. Filtering out all other export options" ) export_cfgs = [ cfg for cfg in export_cfgs if cfg["target"] == target ] if export_cfgs == []: print( "No ExportConfig matches the target name specified in the command line." ) for partial_export_cfg in export_cfgs: if not quantize and not output_path: export_cfg = config_from_json(ExportConfig, partial_export_cfg) else: print( "the export-json config is ignored because export options are found the command line" ) export_cfg = config_from_json( ExportConfig, partial_export_cfg, ("export_caffe2_path", "export_onnx_path"), ) export_cfg.torchscript_quantize = quantize # if config has export_torchscript_path, use export_torchscript_path from config, otherwise keep the default from CLI if export_cfg.export_torchscript_path is not None: output_path = export_cfg.export_torchscript_path if not model or not output_path: config = context.obj.load_config() model = model or config.save_snapshot_path output_path = output_path or f"{config.save_snapshot_path}.torchscript" print(f"Exporting {model} to torchscript file: {output_path}") print(export_cfg) export_saved_model_to_torchscript(model, output_path, export_cfg)
def testNonPositiveBatchPaddingIgnored(self): model = DummyModel(max_seq_len=10, embedding_dim=32) script_func = torch.jit.script(model) export_config = ExportConfig(seq_padding_control=[22], batch_padding_control=[0]) input_examples = accelerator_transformerLayers_inputs( model, script_func, export_config, None, "") self.assertEqual(len(input_examples), 0)
def testSeqPaddingLimitedBymaxSeqLen(self): model = DummyModel(max_seq_len=10, embedding_dim=32) script_func = torch.jit.script(model) export_config = ExportConfig(seq_padding_control=[0, 5, 50], batch_padding_control=[0, 15]) input_examples = accelerator_transformerLayers_inputs( model, script_func, export_config, None, "") # effective seq padding [5, 10] self.assertEqual(len(input_examples), 2)
def testNonPositiveSeqPaddingIgnored(self): model = DummyModel(max_seq_len=10, embedding_dim=32) script_func = torch.jit.script(model) export_config = ExportConfig(seq_padding_control=[-2, 0], batch_padding_control=[0, 15]) input_examples = accelerator_transformerLayers_inputs( model, script_func, export_config, None, "") # only default max_seq_length used for seq padding self.assertEqual(len(input_examples), 1)
def testReturnWithCorrectShape(self): model = DummyModel(max_seq_len=10, embedding_dim=32) script_func = torch.jit.script(model) export_config = ExportConfig(seq_padding_control=[0, 5], batch_padding_control=[0, 15]) input_examples = accelerator_transformerLayers_inputs( model, script_func, export_config, None, "") self.assertEqual(len(input_examples), 2) self.assertEqual(input_examples[0][0].get_dims(), [5, 15, 32]) self.assertEqual(input_examples[0][1].get_dims(), [15, 5]) self.assertEqual(input_examples[1][0].get_dims(), [10, 15, 32]) self.assertEqual(input_examples[1][1].get_dims(), [15, 10])
def accelerator_transformerLayers_inputs( model: nn.Module, trace: torch.jit.ScriptFunction, export_options: ExportConfig, dataset_iterable: Iterable, module_path, ): import torch_glow # we use the padding control from the Export Config: if export_options is None: export_options = ExportConfig() if export_options.seq_padding_control is None: raise RuntimeError("seq padding control not specified") if export_options.batch_padding_control is None: raise RuntimeError("batch padding control not specified") batch_padding_control = export_options.batch_padding_control # Restrict seq_padding_control to valid ranges seq_padding_control = [] max_seq_len = trace.get_max_seq_len() for pad in export_options.seq_padding_control: if pad < max_seq_len: seq_padding_control.append(pad) seq_padding_control.append(max_seq_len) # this should use a method, or module_path, instead of being hardcoded # embedding_dim = model.encoder.encoder.transformer.token_embedding.embedding_dim embedding_dim = accelerator.get_embedding_module_from_path( model, module_path) input_examples = [] for seq_len in seq_padding_control: if seq_len <= 0: continue for batch_size in batch_padding_control: if batch_size <= 0: continue # Todo: We directly generate data input instead of using dataset_iterable, enhance later input1 = torch.randn([seq_len, batch_size, embedding_dim], dtype=torch.float32) input2 = torch.randn([batch_size, seq_len]).bool() input_specs = torch_glow.input_specs_from_tensors([input1, input2]) input_examples.append(input_specs) return input_examples
def accelerator_lstm_inputs( model: nn.Module, trace: torch.jit.ScriptFunction, export_options: ExportConfig, dataset_iterable: Iterable, module_path, ): import torch_glow # we use the padding control from the Export Config: if export_options is None: export_options = ExportConfig() if export_options.seq_padding_control is None: raise RuntimeError("seq padding control not specified") if export_options.batch_padding_control is None: raise RuntimeError("batch padding control not specified") batch_padding_control = export_options.batch_padding_control seq_padding_control = export_options.seq_padding_control embedding_dim = trace.embedding.word_embedding.embedding_dim * 2 lstm_num_layers = trace.lstm_num_layers lstm_dim = trace.lstm_dim input_examples = [] for seq_len in seq_padding_control: if seq_len <= 0: continue for batch_size in batch_padding_control: if batch_size <= 0: continue # Todo: We directly generate data input instead of using dataset_iterable, enhance later input_embedding = torch.randn( [batch_size, seq_len, embedding_dim], dtype=torch.float32 ) input_hidden = torch.randn( [batch_size, lstm_num_layers, lstm_dim], dtype=torch.float32 ) input_cell = torch.randn( [batch_size, lstm_num_layers, lstm_dim], dtype=torch.float32 ) input_specs = torch_glow.input_specs_from_tensors( [input_embedding, input_hidden, input_cell] ) input_examples.append(input_specs) return input_examples
def accelerator_transformerLayers_inputs(model: nn.Module, export_options: ExportConfig, dataset_iterable: iter, module_path): import torch_glow # we use the padding control from the Export Config: if export_options is None: export_options = ExportConfig() seq_padding_control = export_options.seq_padding_control batch_padding_control = export_options.batch_padding_control if seq_padding_control is None: raise RuntimeError("seq padding control not specified") if batch_padding_control is None: raise RuntimeError("batch padding control not specified") max_seq_len = model.get_max_seq_len() seq_padding_control = [ pad if pad <= max_seq_len else max_seq_len for pad in seq_padding_control ] + [max_seq_len] # this should use a method, or module_path, instead of being hardcoded embedding_dim = model.encoder.encoder.transformer.token_embedding.embedding_dim input_examples = [] for seq_len in seq_padding_control: if seq_len <= 0: continue for batch_size in batch_padding_control: if batch_size <= 0: continue # Todo: We directly generate data input instead of using dataset_iterable, enhance later input1 = torch.randn([seq_len, batch_size, embedding_dim], dtype=torch.float32) input2 = torch.randn([batch_size, seq_len]).bool() input_specs = torch_glow.input_specs_from_tensors([input1, input2]) input_examples.append(input_specs) return input_examples
def get_seq_and_batch_padding_control( trace: torch.jit.ScriptFunction, export_options: ExportConfig ): # we use the padding control from the Export Config: if export_options is None: export_options = ExportConfig() if export_options.seq_padding_control is None: raise RuntimeError("seq padding control not specified") if export_options.batch_padding_control is None: raise RuntimeError("batch padding control not specified") batch_padding_control = export_options.batch_padding_control # Restrict seq_padding_control to valid ranges seq_padding_control = [] max_seq_len = trace.get_max_seq_len() for pad in export_options.seq_padding_control: if pad < max_seq_len: seq_padding_control.append(pad) seq_padding_control.append(max_seq_len) return seq_padding_control, batch_padding_control
def torchscript_export( self, model, export_path=None, sort_input=False, sort_key=1, export_config=None, ): # TODO(T88310041) Remove These torch._C._jit_set_profiling_executor(True) torch._C._jit_set_profiling_mode(False) # unpack export config if export_config is None: export_config = ExportConfig() quantize = export_config.torchscript_quantize seq_padding_control = export_config.seq_padding_control batch_padding_control = export_config.batch_padding_control accel = AccelerateOptions(export_config.accelerate) print(f"Using accelerate options: {accel.__dict__}") # what hosts can this model run on # by default, pytext works on CPU and CUDA (because it implements set_device) model_host = ["cpu", "cuda"] if accel.use_cuda: # CUDA FP16 models only work on CUDA model_host = ["cuda"] if accel.use_nnpi: model_host = ["nnpi"] if hasattr(model, "set_host"): model.set_host(model_host) # what is the type of this model # pytext models are nlp models model_type = ["nlp"] instance_paths_p = any( True for _ in find_module_instances(model, RoBERTaEncoder, [])) if instance_paths_p: model_type.append("transformer") instance_paths_p = any( True for _ in find_module_instances(model, BiLSTM, [])) if instance_paths_p: model_type.append("BiLSTM") if hasattr(model, "set_model"): model.set_type(model_type) # Make sure to put the model on CPU and disable CUDA before exporting to # ONNX to disable any data_parallel pieces cuda.CUDA_ENABLED = False model.cpu() optimizer = self.trainer.optimizer optimizer.pre_export(model) model = rewrite_nnpi_modules(model, accel) # Trace needs eval mode, to disable dropout etc model.eval() model.prepare_for_onnx_export_() unused_raw_batch, batch = next( iter(self.data.batches(Stage.TRAIN, load_early=True))) inputs = model.onnx_trace_input(batch) # call model forward to set correct device types if sort_input: _, sorted_indices = sort(inputs[sort_key], descending=True) inputs = [i.index_select(0, sorted_indices) for i in inputs] model(*inputs) # Default to dynamic if accel.use_fx_quantize: data_loader = self.data.batches(Stage.TRAIN, load_early=False) trace = quantize_fx( model, inputs, data_loader, accel.use_nnpi_fx_dynamic_quantize or accel.use_cpu_fx_dynamic_quantize, accel.use_nnpi_fx_static_selectively_quantize or accel.use_cpu_fx_static_selectively_quantize, ) elif (quantize or accel.use_nnpi_quantize) and hasattr( model, "graph_mode_quantize"): data_loader = self.data.batches(Stage.TRAIN, load_early=False) print("Quantizing the model ...") # recognize legazy nnpi_q or $platform:$option syntax quantize_linear_only = accel.use_nnpi_quantize module_swap = accel.use_nnpi trace = quantize_statically(model, inputs, data_loader, quantize_linear_only, module_swap) else: if quantize: log_feature_usage("quantize.dynamically.CPU") model.quantize() if accel.use_cuda and (accel.use_cuda_half_ft or accel.use_cuda_dq): log_accelerator_feature_usage( "build.CUDA.half.faster_transformers") # We need a separate path for GPU-only tracing, as we can't just trace a CPU model # and invoke .cuda().half(), # as we don't have equivalent CPU implementations of these operators. precision.FP16_ENABLED = True cuda.CUDA_ENABLED = True if accel.use_cuda_dq: model = swap_modules(model, MODULE_TO_REWRITER["cuda-dq"]) else: model = swap_modules(model, MODULE_TO_REWRITER["cuda"]) model.eval() model.half().cuda() # obtain new inputs with cuda/fp16 enabled. unused_raw_batch, batch = next( iter(self.data.batches(Stage.TRAIN, load_early=True))) inputs = model.onnx_trace_input(batch) trace = model.trace(inputs) print("Traced (faster_transformers)!") # should be unnecessary. trace.cuda().half() unused_raw_batch, batch = next( iter(self.data.batches(Stage.TRAIN, load_early=True))) inputs = model.onnx_trace_input(batch) results = trace(*inputs) assert results else: trace = model.trace(inputs) print("Traced!") if accel.use_cuda and accel.use_cuda_half: log_accelerator_feature_usage("build.CUDA.half") # convert trace to half precision trace.cuda().half() #### trace test: demonstrate that it is usable precision.FP16_ENABLED = True cuda.CUDA_ENABLED = True unused_raw_batch, batch = next( iter(self.data.batches(Stage.TRAIN, load_early=True))) inputs = model.onnx_trace_input(batch) assert trace(*inputs) #### end of trace test if hasattr(model, "torchscriptify"): trace = model.torchscriptify(self.data.tensorizers, trace) if hasattr(trace, "validate"): trace.validate(export_config) if seq_padding_control is not None: if hasattr(trace, "set_padding_control"): trace.set_padding_control("sequence_length", seq_padding_control) else: print( "Padding_control not supported by model. Ignoring seq_padding_control" ) if batch_padding_control is not None: if hasattr(trace, "set_padding_control"): trace.set_padding_control("batch_length", batch_padding_control) else: print( "Padding_control not supported by model. Ignoring batch_padding_control" ) trace.apply(lambda s: s._pack() if s._c._has_method("_pack") else None) if accel.use_nnpi and not accel.use_nnpi_split: print("Lowering using to_glow") trace = lower_modules_to_accelerator( model, trace, export_config, accel.use_nnpi_throughput_optimized, accel.use_nnpi_throughput_maximized, accel.use_nnpi_gelu_clip, ) if accel.use_nnpi_split: print("Lowering split model to Glow") trace = lower_split_model_to_accelerator(model, trace, export_config) if export_path is not None: print(f"Saving torchscript model to: {export_path}") with PathManager.open(export_path, "wb") as f: torch.jit.save(trace, f) return trace
def torchscript_export( self, model, export_path=None, sort_input=False, sort_key=1, export_config=None, ): # unpack export config if export_config is None: export_config = ExportConfig() quantize = export_config.torchscript_quantize accelerate = export_config.accelerate seq_padding_control = export_config.seq_padding_control batch_padding_control = export_config.batch_padding_control inference_interface = export_config.inference_interface # Make sure to put the model on CPU and disable CUDA before exporting to # ONNX to disable any data_parallel pieces cuda.CUDA_ENABLED = False model.cpu() optimizer = self.trainer.optimizer optimizer.pre_export(model) if "nnpi" in accelerate: model = swap_modules_for_accelerator(model) # Trace needs eval mode, to disable dropout etc model.eval() model.prepare_for_onnx_export_() unused_raw_batch, batch = next( iter(self.data.batches(Stage.TRAIN, load_early=True)) ) inputs = model.onnx_trace_input(batch) # call model forward to set correct device types if sort_input: _, sorted_indices = sort(inputs[sort_key], descending=True) inputs = [i.index_select(0, sorted_indices) for i in inputs] model(*inputs) use_cuda_half = "cuda:half" in accelerate if quantize and hasattr(model, "graph_mode_quantize"): data_loader = self.data.batches(Stage.TRAIN, load_early=False) print("Quantizing the model ...") quantize_linear_only = "nnpi_quantize" in accelerate module_swap = "nnpi" in accelerate trace = quantize_statically( model, inputs, data_loader, quantize_linear_only, module_swap ) else: if quantize: log_accelerator_feature_usage("quantize.dynamically.CPU") model.quantize() trace = model.trace(inputs) print("traced!") if use_cuda_half: log_accelerator_feature_usage("build.CUDA.half") # convert trace to half precision trace.cuda().half() #### trace test: demonstrate that it is usable precision.FP16_ENABLED = True cuda.CUDA_ENABLED = True unused_raw_batch, batch = next( iter(self.data.batches(Stage.TRAIN, load_early=True)) ) inputs = model.onnx_trace_input(batch) assert trace(*inputs) #### end of trace test if hasattr(model, "torchscriptify"): trace = model.torchscriptify(self.data.tensorizers, trace) if hasattr(trace, "validate"): trace.validate(export_config) if seq_padding_control is not None: if hasattr(trace, "set_padding_control"): trace.set_padding_control("sequence_length", seq_padding_control) else: print( "Padding_control not supported by model. Ignoring seq_padding_control" ) if batch_padding_control is not None: if hasattr(trace, "set_padding_control"): trace.set_padding_control("batch_length", batch_padding_control) else: print( "Padding_control not supported by model. Ignoring batch_padding_control" ) if inference_interface is not None: if hasattr(trace, "inference_interface"): trace.inference_interface(inference_interface) else: print( "inference_interface not supported by model. Ignoring inference_interface" ) trace.apply(lambda s: s._pack() if s._c._has_method("_pack") else None) if "nnpi" in accelerate: print("lowering using to_glow") trace = lower_modules_to_accelerator(model, trace, export_config) if export_path is not None: print(f"Saving torchscript model to: {export_path}") with PathManager.open(export_path, "wb") as f: torch.jit.save(trace, f) return trace
def torchscript_export( self, model, export_path=None, sort_input=False, sort_key=1, export_config=None, ): # unpack export config if export_config is None: export_config = ExportConfig() quantize = export_config.torchscript_quantize accelerate = export_config.accelerate seq_padding_control = export_config.seq_padding_control batch_padding_control = export_config.batch_padding_control # introduce a single nnpi:quantize that obviates need for torchscript quantize on NNPI use_nnpi = ("nnpi" in accelerate) or ("nnpi:quantize" in accelerate) use_nnpi_throughput_optimized = "nnpi:throughput_optimized" in accelerate use_cuda_half = "cuda:half" in accelerate use_cuda_half_faster_transformers = "cuda:half:ft" in accelerate use_nnpi_quantize = "nnpi:quantize" in accelerate use_nnpi_fx_static_quantize = "nnpi:fx_static_quantize" in accelerate use_nnpi_fx_dynamic_quantize = "nnpi:fx_dynamic_quantize" in accelerate use_cpu_fx_static_quantize = "cpu:fx_static_quantize" in accelerate use_cpu_fx_dynamic_quantize = "cpu:fx_dynamic_quantize" in accelerate use_fx_quantize = (use_nnpi_fx_static_quantize or use_nnpi_fx_dynamic_quantize or use_cpu_fx_static_quantize or use_cpu_fx_dynamic_quantize) # Make sure to put the model on CPU and disable CUDA before exporting to # ONNX to disable any data_parallel pieces cuda.CUDA_ENABLED = False model.cpu() optimizer = self.trainer.optimizer optimizer.pre_export(model) if use_nnpi or use_fx_quantize: model = swap_modules_for_accelerator(model) # Trace needs eval mode, to disable dropout etc model.eval() model.prepare_for_onnx_export_() unused_raw_batch, batch = next( iter(self.data.batches(Stage.TRAIN, load_early=True))) inputs = model.onnx_trace_input(batch) # call model forward to set correct device types if sort_input: _, sorted_indices = sort(inputs[sort_key], descending=True) inputs = [i.index_select(0, sorted_indices) for i in inputs] model(*inputs) # Default to dynamic if use_fx_quantize: data_loader = self.data.batches(Stage.TRAIN, load_early=False) trace = quantize_fx( model, inputs, data_loader, use_nnpi_fx_dynamic_quantize or use_cpu_fx_dynamic_quantize, ) elif (quantize or use_nnpi_quantize) and hasattr( model, "graph_mode_quantize"): data_loader = self.data.batches(Stage.TRAIN, load_early=False) print("Quantizing the model ...") # recognize legazy nnpi_q or $platform:$option syntax quantize_linear_only = use_nnpi_quantize or ("nnpi_quantize" in accelerate) module_swap = use_nnpi trace = quantize_statically(model, inputs, data_loader, quantize_linear_only, module_swap) else: if quantize: log_feature_usage("quantize.dynamically.CPU") model.quantize() if use_cuda_half_faster_transformers: log_accelerator_feature_usage( "build.CUDA.half.faster_transformers") # We need a separate path for GPU-only tracing, as we can't just trace a CPU model # and invoke .cuda().half(), # as we don't have equivalent CPU implementations of these operators. precision.FP16_ENABLED = True cuda.CUDA_ENABLED = True model = swap_modules_for_faster_transformer(model) model.eval() model.half().cuda() # obtain new inputs with cuda/fp16 enabled. unused_raw_batch, batch = next( iter(self.data.batches(Stage.TRAIN, load_early=True))) inputs = model.onnx_trace_input(batch) trace = model.trace(inputs) print("traced (faster_transformers)!") # should be unnecessary. trace.cuda().half() unused_raw_batch, batch = next( iter(self.data.batches(Stage.TRAIN, load_early=True))) inputs = model.onnx_trace_input(batch) assert trace(*inputs) else: trace = model.trace(inputs) print("traced!") if use_cuda_half: log_accelerator_feature_usage("build.CUDA.half") # convert trace to half precision trace.cuda().half() #### trace test: demonstrate that it is usable precision.FP16_ENABLED = True cuda.CUDA_ENABLED = True unused_raw_batch, batch = next( iter(self.data.batches(Stage.TRAIN, load_early=True))) inputs = model.onnx_trace_input(batch) assert trace(*inputs) #### end of trace test if hasattr(model, "torchscriptify"): trace = model.torchscriptify(self.data.tensorizers, trace) if hasattr(trace, "validate"): trace.validate(export_config) if seq_padding_control is not None: if hasattr(trace, "set_padding_control"): trace.set_padding_control("sequence_length", seq_padding_control) else: print( "Padding_control not supported by model. Ignoring seq_padding_control" ) if batch_padding_control is not None: if hasattr(trace, "set_padding_control"): trace.set_padding_control("batch_length", batch_padding_control) else: print( "Padding_control not supported by model. Ignoring batch_padding_control" ) trace.apply(lambda s: s._pack() if s._c._has_method("_pack") else None) if use_nnpi: print("lowering using to_glow") trace = lower_modules_to_accelerator( model, trace, export_config, use_nnpi_throughput_optimized) if export_path is not None: print(f"Saving torchscript model to: {export_path}") with PathManager.open(export_path, "wb") as f: torch.jit.save(trace, f) return trace
def torchscript_export(self, model, export_path=None, export_config=None): # noqa # unpack export config # unpack export config if export_config is None: export_config = ExportConfig() quantize = export_config.torchscript_quantize accelerate = export_config.accelerate seq_padding_control = export_config.seq_padding_control batch_padding_control = export_config.batch_padding_control if (accelerate is not None) and (accelerate != []): raise RuntimeError( "old-style task.py does not support export for NNPI accelerators" ) cuda.CUDA_ENABLED = False model.cpu() optimizer = self.trainer.optimizer optimizer.pre_export(model) model.eval() model.prepare_for_onnx_export_() unused_raw_batch, batch = next( iter(self.data.batches(Stage.TRAIN, load_early=True))) inputs = model.onnx_trace_input(batch) model(*inputs) if quantize: model.quantize() if self.trace_both_encoders: trace = jit.trace(model, inputs) else: trace = jit.trace(model.encoder1, (inputs[0], )) if hasattr(model, "torchscriptify"): trace = model.torchscriptify(self.data.tensorizers, trace, self.trace_both_encoders) if seq_padding_control is not None: if hasattr(trace, "set_padding_control"): trace.set_padding_control("sequence_length", seq_padding_control) else: print( "Padding_control not supported by model. Ignoring padding_control" ) if batch_padding_control is not None: if hasattr(trace, "set_padding_control"): trace.set_padding_control("batch_length", batch_padding_control) else: print( "Padding_control not supported by model. Ignoring padding_control" ) trace.apply(lambda s: s._pack() if s._c._has_method("_pack") else None) if export_path is not None: print(f"Saving torchscript model to: {export_path}") with PathManager.open(export_path, "wb") as f: torch.jit.save(trace, f) return trace
def torchscript_export(self, model, export_path=None, export_config=None): # noqa # unpack export config # unpack export config if export_config is None: export_config = ExportConfig() quantize = export_config.torchscript_quantize accelerate = export_config.accelerate seq_padding_control = export_config.seq_padding_control batch_padding_control = export_config.batch_padding_control inference_interface = export_config.inference_interface cuda.CUDA_ENABLED = False model.cpu() optimizer = self.trainer.optimizer optimizer.pre_export(model) model.eval() model.prepare_for_onnx_export_() unused_raw_batch, batch = next( iter(self.data.batches(Stage.TRAIN, load_early=True))) inputs = model.onnx_trace_input(batch) model(*inputs) if quantize: model.quantize() if accelerate is not None and "half" in accelerate: model.half() if self.trace_both_encoders: trace = jit.trace(model, inputs) else: trace = jit.trace(model.encoder1, (inputs[0], )) if hasattr(model, "torchscriptify"): trace = model.torchscriptify(self.data.tensorizers, trace, self.trace_both_encoders) if seq_padding_control is not None: if hasattr(trace, "set_padding_control"): trace.set_padding_control("sequence_length", seq_padding_control) else: print( "Padding_control not supported by model. Ignoring padding_control" ) if batch_padding_control is not None: if hasattr(trace, "set_padding_control"): trace.set_padding_control("batch_length", batch_padding_control) else: print( "Padding_control not supported by model. Ignoring padding_control" ) if inference_interface is not None: if hasattr(trace, "inference_interface"): trace.inference_interface(inference_interface) else: print( "inference_interface not supported by model. Ignoring inference_interface" ) trace.apply(lambda s: s._pack() if s._c._has_method("_pack") else None) if accelerate is not None and "nnpi" in accelerate: trace._c = torch._C._freeze_module( trace._c, preservedAttrs=[ "make_prediction", "make_batch", "set_padding_control" ], ) if export_path is not None: print(f"Saving torchscript model to: {export_path}") with PathManager.open(export_path, "wb") as f: torch.jit.save(trace, f) return trace