def lower_modules_to_accelerator(model: nn.Module, trace, export_options: ExportConfig): import torch_glow if hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder): backend = "NNPI" ( submod_modelpath, compilation_spec_dict, inputs_function, ) = accelerator.get_modules(model, backend)[0] submod_tracepath = accelerator.model2trace_path(submod_modelpath) spec = torch_glow.CompilationSpec() spec.get_settings().set_glow_backend(backend) compilation_group = torch_glow.CompilationGroup() spec.compilation_groups_append(compilation_group) compilation_group_settings = compilation_group.get_settings() compilation_group_settings.set_convert_to_fp16(True) for k, v in compilation_spec_dict.items(): compilation_group.get_settings().backend_specific_opts_insert(k, v) if inputs_function is not None: input_sets = inputs_function(model, trace, export_options, None, submod_modelpath) compilation_group.set_input_sets(input_sets) trace = torch_glow.to_glow_selective( trace, {submod_tracepath: spec}, inplace=False, ) return trace else: return trace
def lower_modules_to_accelerator(model: nn.Module, trace, export_options: ExportConfig): import torch_glow if hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder): backend = "NNPI" submod_modelpath, compilation_spec_dict = accelerator.get_modules( model, backend)[0] submod_tracepath = accelerator.model2trace_path(submod_modelpath) spec = torch_glow.CompilationSpec() spec.get_settings().set_glow_backend(backend) compilation_group = torch_glow.CompilationGroup() spec.compilation_groups_append(compilation_group) compilation_group_settings = compilation_group.get_settings() compilation_group_settings.set_convert_to_fp16(True) for k, v in compilation_spec_dict.items(): compilation_group.get_settings().backend_specific_opts_insert(k, v) # Todod: @input decorator dose not work properly, fixing it later # input_sets = inputs.input_process(model, export_options, None, submod_tracepath) input_sets = accelerator_transformerLayers_inputs( model, trace, export_options, None, submod_tracepath) compilation_group.set_input_sets(input_sets) trace = torch_glow.to_glow_selective( trace, {submod_tracepath: spec}, inplace=False, ) return trace else: return trace
def lower_modules_to_accelerator(model: nn.Module, trace, export_options: ExportConfig, throughput_optimize=False): # Raise error if accelerator could not be imported if not accelerator_lowering_supported: raise RuntimeError("Accelerator Lowering not supported!") import torch_glow log_accelerator_feature_usage("build.NNPI") if ((hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder)) or (hasattr(model, "representation") and isinstance(model.representation, AcceleratorBiLSTM)) or (hasattr(model, "lower_module") # Internal CNN LM module to add accelerator support. and type(model.lower_module).__qualname__ == "CNNLowerModule")): backend = "NNPI" ( submod_modelpath, compilation_spec_dict, inputs_function, ) = accelerator.get_modules(model, backend)[0] submod_tracepath = accelerator.model2trace_path(submod_modelpath) spec = torch_glow.CompilationSpec() spec.get_settings().set_glow_backend(backend) compilation_group = torch_glow.CompilationGroup() spec.compilation_groups_append(compilation_group) compilation_group_settings = compilation_group.get_settings() compilation_group_settings.set_convert_to_fp16(True) # Override the options for throughput-optimized case if throughput_optimize: compilation_spec_dict["NNPI_IceCores"] = "4" compilation_spec_dict["NNPINumParallelChunks"] = "4" compilation_group_settings.set_replication_count(3) for k, v in compilation_spec_dict.items(): compilation_group.get_settings().backend_specific_opts_insert(k, v) if inputs_function is not None: input_sets = inputs_function(model, trace, export_options, None, submod_modelpath) else: raise RuntimeError( "inputs_function needs to be specified in accelerator decorator" ) compilation_group.set_input_sets(input_sets) trace = torch_glow.to_glow_selective( trace, {submod_tracepath: spec}, inplace=False, ) return trace else: return trace
def lower_modules_to_accelerator(model, trace, seq_padding_control, batch_padding_control): import torch_glow if hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder): backend = "NNPI" submod_modelpath, compilation_spec_dict = accelerator.get_modules( model, backend)[0] submod_tracepath = accelerator.model2trace_path(submod_modelpath) embedding_dim = model.encoder.encoder.transformer.token_embedding.embedding_dim spec = torch_glow.CompilationSpec() spec.get_settings().set_glow_backend(backend) compilation_group = torch_glow.CompilationGroup() spec.compilation_groups_append(compilation_group) compilation_group_settings = compilation_group.get_settings() compilation_group_settings.set_convert_to_fp16(True) for k, v in compilation_spec_dict.items(): compilation_group.get_settings().backend_specific_opts_insert(k, v) for seq_len in seq_padding_control: if seq_len <= 0: continue for batch_size in batch_padding_control: if batch_size <= 0: continue input1 = torch.randn([seq_len, batch_size, embedding_dim], dtype=torch.float32) input2 = torch.randn([batch_size, seq_len]).bool() input_specs = torch_glow.input_specs_from_tensors( [input1, input2]) compilation_group.input_sets_append(input_specs) trace = torch_glow.to_glow_selective( trace, {submod_tracepath: spec}, inplace=False, ) return trace else: return trace
def lower_modules_to_accelerator( model: nn.Module, trace, export_options: ExportConfig, throughput_optimize=False ): # Raise error if accelerator could not be imported if not accelerator_lowering_supported: raise RuntimeError("Accelerator Lowering not supported!") import torch_glow log_accelerator_feature_usage("build.NNPI") if ( (hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder)) or ( hasattr(model, "representation") and isinstance(model.representation, AcceleratorBiLSTM) ) or ( hasattr(model, "lower_module") # Internal CNN LM module to add accelerator support. and type(model.lower_module).__qualname__ == "CNNLowerModule" ) ): backend = "NNPI" backend_qualifier = "" if throughput_optimize: backend_qualifier = ":throughput_optimized" modules_to_lower = accelerator.get_modules(model, backend + backend_qualifier) if len(modules_to_lower) < 1: raise RuntimeError("Need at least one module to lower to accelerator") elif len(modules_to_lower) > 1: print(f"Warning. Received {len(modules_to_lower)} modules to lower.") print("Warning. Only lowering first module.") ( submod_modelpath, compilation_spec_dict, inputs_function, ) = modules_to_lower[0] submod_tracepath = accelerator.model2trace_path(submod_modelpath) spec = torch_glow.CompilationSpec() spec.get_settings().set_glow_backend(backend) compilation_group = torch_glow.CompilationGroup() spec.compilation_groups_append(compilation_group) compilation_group_settings = compilation_group.get_settings() # Set values from dict that are not set via backend-specific opts compilation_group_settings.set_convert_to_fp16( compilation_spec_dict.pop("glow:ConvertToFP16", "true") in ["true", "True"] ) compilation_group_settings.set_replication_count( int(compilation_spec_dict.pop("glow:ReplicationCount", "1")) ) for k, v in compilation_spec_dict.items(): compilation_group.get_settings().backend_specific_opts_insert(k, v) if inputs_function is not None: input_sets = inputs_function( model, trace, export_options, None, submod_modelpath ) else: raise RuntimeError( "inputs_function needs to be specified in accelerator decorator" ) compilation_group.set_input_sets(input_sets) trace = torch_glow.to_glow_selective( trace, {submod_tracepath: spec}, inplace=False, ) return trace else: return trace