def test_to_glow_selective(self): a = torch.zeros(4) + 8 b = torch.zeros(4) + 7 torch_res = model(a, b) spec = torch_glow.CompilationSpec() spec.get_settings().set_glow_backend("Interpreter") compilation_group = torch_glow.CompilationGroup() spec.compilation_groups_append(compilation_group) a_spec = torch_glow.InputSpec() a_spec.set_same_as(a) b_spec = torch_glow.InputSpec() b_spec.set_same_as(b) compilation_group.input_sets_append([a_spec, b_spec]) glow_mod = torch_glow.to_glow_selective(model, { "foo.bar": (spec, (a, b)), "qux": (spec, (a, b)) }) glow_mod = torch.jit.trace(glow_mod, (a, b)) glow_res = glow_mod(a, b) assert torch.allclose(torch_res, glow_res)
def lower_modules_to_accelerator(model: nn.Module, trace, export_options: ExportConfig): import torch_glow if hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder): backend = "NNPI" ( submod_modelpath, compilation_spec_dict, inputs_function, ) = accelerator.get_modules(model, backend)[0] submod_tracepath = accelerator.model2trace_path(submod_modelpath) spec = torch_glow.CompilationSpec() spec.get_settings().set_glow_backend(backend) compilation_group = torch_glow.CompilationGroup() spec.compilation_groups_append(compilation_group) compilation_group_settings = compilation_group.get_settings() compilation_group_settings.set_convert_to_fp16(True) for k, v in compilation_spec_dict.items(): compilation_group.get_settings().backend_specific_opts_insert(k, v) if inputs_function is not None: input_sets = inputs_function(model, trace, export_options, None, submod_modelpath) compilation_group.set_input_sets(input_sets) trace = torch_glow.to_glow_selective( trace, {submod_tracepath: spec}, inplace=False, ) return trace else: return trace
def test_to_glow_selective_already_scripted(self): a = torch.zeros(4) + 8 b = torch.zeros(4) + 7 torch_res = model(a, b) spec = torch_glow.CompilationSpec() spec.get_settings().set_glow_backend("Interpreter") compilation_group = torch_glow.CompilationGroup() spec.compilation_groups_append(compilation_group) a_spec = torch_glow.InputSpec() a_spec.set_same_as(a) b_spec = torch_glow.InputSpec() b_spec.set_same_as(b) compilation_group.input_sets_append([a_spec, b_spec]) with torch.no_grad(): traced_model = torch.jit.trace(model, (a, b)) glow_mod = torch_glow.to_glow_selective( traced_model, { "foo.bar": spec, "qux": spec }, inplace=False, ) glow_res = glow_mod(a, b) assert torch.allclose(torch_res, glow_res)
def test_to_glow_selective(self): inputs = (torch.zeros(4) + 8, torch.zeros(4) + 7) torch_res = model(*inputs) bar_inputs = [ torch.randn(shape) for shape in torch_glow.get_submod_input_shapes( model, "foo.bar", inputs) ] qux_inputs = [ torch.randn(shape) for shape in torch_glow.get_submod_input_shapes( model, "qux", inputs) ] glow_mod = torch_glow.to_glow_selective( model, { "foo.bar": (get_compilation_spec(bar_inputs), bar_inputs), "qux": (get_compilation_spec(qux_inputs), qux_inputs), }, inplace=False, ) glow_mod = torch.jit.trace(glow_mod, inputs) glow_res = glow_mod(*inputs) assert torch.allclose(torch_res, glow_res)
def lower_modules_to_accelerator(model: nn.Module, trace, export_options: ExportConfig): import torch_glow if hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder): backend = "NNPI" submod_modelpath, compilation_spec_dict = accelerator.get_modules( model, backend)[0] submod_tracepath = accelerator.model2trace_path(submod_modelpath) spec = torch_glow.CompilationSpec() spec.get_settings().set_glow_backend(backend) compilation_group = torch_glow.CompilationGroup() spec.compilation_groups_append(compilation_group) compilation_group_settings = compilation_group.get_settings() compilation_group_settings.set_convert_to_fp16(True) for k, v in compilation_spec_dict.items(): compilation_group.get_settings().backend_specific_opts_insert(k, v) # Todod: @input decorator dose not work properly, fixing it later # input_sets = inputs.input_process(model, export_options, None, submod_tracepath) input_sets = accelerator_transformerLayers_inputs( model, trace, export_options, None, submod_tracepath) compilation_group.set_input_sets(input_sets) trace = torch_glow.to_glow_selective( trace, {submod_tracepath: spec}, inplace=False, ) return trace else: return trace
def test_to_glow_selective_already_scripted(self): inputs = (torch.zeros(4) + 8, torch.zeros(4) + 7) torch_res = model(*inputs) bar_inputs = [ torch.randn(shape) for shape in torch_glow.get_submod_input_shapes( model, "foo.bar", inputs) ] qux_inputs = [ torch.randn(shape) for shape in torch_glow.get_submod_input_shapes( model, "qux", inputs) ] with torch.no_grad(): traced_model = torch.jit.trace(model, inputs) glow_mod = torch_glow.to_glow_selective( traced_model, { "foo.bar": get_compilation_spec(bar_inputs), "qux": get_compilation_spec(qux_inputs), }, inplace=False, ) glow_res = glow_mod(*inputs) assert torch.allclose(torch_res, glow_res)
def test_to_glow_selective_multi_spec(self): a = torch.randn(4) b = torch.randn(6) foo = Foo() bar = Bar() model = Model(foo, bar) torch_resA = model(a, a) torch_resB = model(b, b) metaA = torch_glow.InputMeta() metaA.set_same_as(a) inputA = [metaA] metaB = torch_glow.InputMeta() metaB.set_same_as(b) inputB = [metaB] options = torch_glow.CompilationOptions() options.backend = "Interpreter" specA = torch_glow.GlowCompileSpec() specA.set(inputA, options) specB = torch_glow.GlowCompileSpec() specB.set(inputB, options) lowered_mod = torch_glow.to_glow_selective( model, {"foo": [(specA, (a)), (specB, (b))]} ) glow_resA = lowered_mod(a, a) glow_resB = lowered_mod(b, b) assert torch.allclose(torch_resA, glow_resA) assert torch.allclose(torch_resB, glow_resB)
def lower_modules_to_accelerator(model: nn.Module, trace, export_options: ExportConfig, throughput_optimize=False): # Raise error if accelerator could not be imported if not accelerator_lowering_supported: raise RuntimeError("Accelerator Lowering not supported!") import torch_glow log_accelerator_feature_usage("build.NNPI") if ((hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder)) or (hasattr(model, "representation") and isinstance(model.representation, AcceleratorBiLSTM)) or (hasattr(model, "lower_module") # Internal CNN LM module to add accelerator support. and type(model.lower_module).__qualname__ == "CNNLowerModule")): backend = "NNPI" ( submod_modelpath, compilation_spec_dict, inputs_function, ) = accelerator.get_modules(model, backend)[0] submod_tracepath = accelerator.model2trace_path(submod_modelpath) spec = torch_glow.CompilationSpec() spec.get_settings().set_glow_backend(backend) compilation_group = torch_glow.CompilationGroup() spec.compilation_groups_append(compilation_group) compilation_group_settings = compilation_group.get_settings() compilation_group_settings.set_convert_to_fp16(True) # Override the options for throughput-optimized case if throughput_optimize: compilation_spec_dict["NNPI_IceCores"] = "4" compilation_spec_dict["NNPINumParallelChunks"] = "4" compilation_group_settings.set_replication_count(3) for k, v in compilation_spec_dict.items(): compilation_group.get_settings().backend_specific_opts_insert(k, v) if inputs_function is not None: input_sets = inputs_function(model, trace, export_options, None, submod_modelpath) else: raise RuntimeError( "inputs_function needs to be specified in accelerator decorator" ) compilation_group.set_input_sets(input_sets) trace = torch_glow.to_glow_selective( trace, {submod_tracepath: spec}, inplace=False, ) return trace else: return trace
def test_to_glow_selective(self): a = torch.zeros(4) + 8 b = torch.zeros(4) + 7 torch_res = model(a, b) input_meta = InputMeta() input_meta.set_same_as(a) inputs = [input_meta, input_meta] options = CompilationOptions() options.backend = "Interpreter" spec = GlowCompileSpec() spec.set(inputs, options) # test interface with implicit "forward" glow_mod = torch_glow.to_glow_selective(model, { "foo.bar": (spec, (a, b)), "qux": (spec, (a, b)) }) glow_mod = torch.jit.trace(glow_mod, (a, b)) glow_res = glow_mod(a, b) assert torch.allclose(torch_res, glow_res) # test interface with explicit "forward" glow_mod = torch_glow.to_glow_selective( model, { "foo.bar": { "forward": (spec, (a, b)) }, "qux": { "forward": (spec, (a, b)) }, }, ) glow_mod = torch.jit.trace(glow_mod, (a, b)) glow_res = glow_mod(a, b) assert torch.allclose(torch_res, glow_res)
def test_to_glow_selective(self): a = torch.zeros(4) + 8 b = torch.zeros(4) + 7 torch_res = model(a, b) spec = torch.classes.glow.GlowCompileSpec() spec.setBackend("Interpreter") sim = torch.classes.glow.SpecInputMeta() sim.setSameAs(a) spec.addInputs([sim, sim]) glow_mod = torch_glow.to_glow_selective(model, { "foo.bar": (spec, (a, b)), "qux": (spec, (a, b)) }) glow_mod = torch.jit.trace(glow_mod, (a, b)) glow_res = glow_mod(a, b) assert torch.allclose(torch_res, glow_res)
def lower_modules_to_accelerator(model, trace, seq_padding_control, batch_padding_control): import torch_glow if hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder): backend = "NNPI" submod_modelpath, compilation_spec_dict = accelerator.get_modules( model, backend)[0] submod_tracepath = accelerator.model2trace_path(submod_modelpath) embedding_dim = model.encoder.encoder.transformer.token_embedding.embedding_dim spec = torch_glow.CompilationSpec() spec.get_settings().set_glow_backend(backend) compilation_group = torch_glow.CompilationGroup() spec.compilation_groups_append(compilation_group) compilation_group_settings = compilation_group.get_settings() compilation_group_settings.set_convert_to_fp16(True) for k, v in compilation_spec_dict.items(): compilation_group.get_settings().backend_specific_opts_insert(k, v) for seq_len in seq_padding_control: if seq_len <= 0: continue for batch_size in batch_padding_control: if batch_size <= 0: continue input1 = torch.randn([seq_len, batch_size, embedding_dim], dtype=torch.float32) input2 = torch.randn([batch_size, seq_len]).bool() input_specs = torch_glow.input_specs_from_tensors( [input1, input2]) compilation_group.input_sets_append(input_specs) trace = torch_glow.to_glow_selective( trace, {submod_tracepath: spec}, inplace=False, ) return trace else: return trace
def test_input_spec(self): """Test setting quantized and non-quantized input specs.""" with torch.no_grad(): a = torch.tensor([[0.1]]) b = torch.tensor([[0.1]]) mod = TestModule() traced_model = torch.jit.trace(mod, (a, b)) ref_result = traced_model(a, b) # test non-quantized input glow_mod = torch_glow.to_glow(traced_model, get_compilation_spec((a, b))) glow_result = glow_mod(a, b) self.assertTrue(torch.allclose(ref_result, glow_result)) # test quantized input add_inputs = torch_glow.get_submod_inputs(mod, "add", (a, b)) glow_mod = torch_glow.to_glow_selective( traced_model, {"add": get_compilation_spec(add_inputs)} ) glow_result = glow_mod(a, b) self.assertTrue(torch.allclose(ref_result, glow_result))
def lower_modules_to_accelerator(model, trace, seq_padding_control, batch_padding_control): import torch_glow if hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder): embedding_dim = model.encoder.encoder.transformer.token_embedding.embedding_dim spec = torch_glow.CompilationSpec() spec.get_settings().set_glow_backend("NNPI") compilation_group = torch_glow.CompilationGroup() spec.compilation_groups_append(compilation_group) compilation_group_settings = compilation_group.get_settings() compilation_group_settings.set_convert_to_fp16(True) compilation_group.get_settings().backend_specific_opts_insert( "NNPI_IceCores", "12") compilation_group.get_settings().backend_specific_opts_insert( "NNPINumParallelChunks", "12") for seq_len in seq_padding_control: if seq_len <= 0: continue for batch_size in batch_padding_control: if batch_size <= 0: continue input1 = torch.randn([seq_len, batch_size, embedding_dim], dtype=torch.float32) input2 = torch.randn([batch_size, seq_len]).bool() input_specs = torch_glow.input_specs_from_tensors( [input1, input2]) compilation_group.input_sets_append(input_specs) trace = torch_glow.to_glow_selective( trace, {"model.encoder.encoder.transformer.layers": spec}, inplace=False, ) return trace else: return trace
def lower_modules_to_accelerator( model: nn.Module, trace, export_options: ExportConfig, throughput_optimize=False ): # Raise error if accelerator could not be imported if not accelerator_lowering_supported: raise RuntimeError("Accelerator Lowering not supported!") import torch_glow log_accelerator_feature_usage("build.NNPI") if ( (hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder)) or ( hasattr(model, "representation") and isinstance(model.representation, AcceleratorBiLSTM) ) or ( hasattr(model, "lower_module") # Internal CNN LM module to add accelerator support. and type(model.lower_module).__qualname__ == "CNNLowerModule" ) ): backend = "NNPI" backend_qualifier = "" if throughput_optimize: backend_qualifier = ":throughput_optimized" modules_to_lower = accelerator.get_modules(model, backend + backend_qualifier) if len(modules_to_lower) < 1: raise RuntimeError("Need at least one module to lower to accelerator") elif len(modules_to_lower) > 1: print(f"Warning. Received {len(modules_to_lower)} modules to lower.") print("Warning. Only lowering first module.") ( submod_modelpath, compilation_spec_dict, inputs_function, ) = modules_to_lower[0] submod_tracepath = accelerator.model2trace_path(submod_modelpath) spec = torch_glow.CompilationSpec() spec.get_settings().set_glow_backend(backend) compilation_group = torch_glow.CompilationGroup() spec.compilation_groups_append(compilation_group) compilation_group_settings = compilation_group.get_settings() # Set values from dict that are not set via backend-specific opts compilation_group_settings.set_convert_to_fp16( compilation_spec_dict.pop("glow:ConvertToFP16", "true") in ["true", "True"] ) compilation_group_settings.set_replication_count( int(compilation_spec_dict.pop("glow:ReplicationCount", "1")) ) for k, v in compilation_spec_dict.items(): compilation_group.get_settings().backend_specific_opts_insert(k, v) if inputs_function is not None: input_sets = inputs_function( model, trace, export_options, None, submod_modelpath ) else: raise RuntimeError( "inputs_function needs to be specified in accelerator decorator" ) compilation_group.set_input_sets(input_sets) trace = torch_glow.to_glow_selective( trace, {submod_tracepath: spec}, inplace=False, ) return trace else: return trace