Exemplo n.º 1
0
def lower_modules_to_accelerator(model: nn.Module, trace,
                                 export_options: ExportConfig):
    import torch_glow

    if hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder):
        backend = "NNPI"
        (
            submod_modelpath,
            compilation_spec_dict,
            inputs_function,
        ) = accelerator.get_modules(model, backend)[0]
        submod_tracepath = accelerator.model2trace_path(submod_modelpath)
        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend(backend)
        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)
        compilation_group_settings = compilation_group.get_settings()
        compilation_group_settings.set_convert_to_fp16(True)
        for k, v in compilation_spec_dict.items():
            compilation_group.get_settings().backend_specific_opts_insert(k, v)

        if inputs_function is not None:
            input_sets = inputs_function(model, trace, export_options, None,
                                         submod_modelpath)
        compilation_group.set_input_sets(input_sets)

        trace = torch_glow.to_glow_selective(
            trace,
            {submod_tracepath: spec},
            inplace=False,
        )

        return trace
    else:
        return trace
Exemplo n.º 2
0
def lower_modules_to_accelerator(model: nn.Module, trace,
                                 export_options: ExportConfig):
    import torch_glow

    if hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder):
        backend = "NNPI"
        submod_modelpath, compilation_spec_dict = accelerator.get_modules(
            model, backend)[0]
        submod_tracepath = accelerator.model2trace_path(submod_modelpath)
        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend(backend)
        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)
        compilation_group_settings = compilation_group.get_settings()
        compilation_group_settings.set_convert_to_fp16(True)
        for k, v in compilation_spec_dict.items():
            compilation_group.get_settings().backend_specific_opts_insert(k, v)

        # Todod: @input decorator dose not work properly, fixing it later
        # input_sets = inputs.input_process(model, export_options, None, submod_tracepath)
        input_sets = accelerator_transformerLayers_inputs(
            model, trace, export_options, None, submod_tracepath)
        compilation_group.set_input_sets(input_sets)

        trace = torch_glow.to_glow_selective(
            trace,
            {submod_tracepath: spec},
            inplace=False,
        )

        return trace
    else:
        return trace
Exemplo n.º 3
0
def lower_modules_to_accelerator(model: nn.Module,
                                 trace,
                                 export_options: ExportConfig,
                                 throughput_optimize=False):
    # Raise error if accelerator could not be imported
    if not accelerator_lowering_supported:
        raise RuntimeError("Accelerator Lowering not supported!")

    import torch_glow

    log_accelerator_feature_usage("build.NNPI")
    if ((hasattr(model, "encoder")
         and isinstance(model.encoder, RoBERTaEncoder))
            or (hasattr(model, "representation")
                and isinstance(model.representation, AcceleratorBiLSTM)) or
        (hasattr(model, "lower_module")
         # Internal CNN LM module to add accelerator support.
         and type(model.lower_module).__qualname__ == "CNNLowerModule")):
        backend = "NNPI"
        (
            submod_modelpath,
            compilation_spec_dict,
            inputs_function,
        ) = accelerator.get_modules(model, backend)[0]
        submod_tracepath = accelerator.model2trace_path(submod_modelpath)
        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend(backend)
        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)
        compilation_group_settings = compilation_group.get_settings()
        compilation_group_settings.set_convert_to_fp16(True)

        # Override the options for throughput-optimized case
        if throughput_optimize:
            compilation_spec_dict["NNPI_IceCores"] = "4"
            compilation_spec_dict["NNPINumParallelChunks"] = "4"
            compilation_group_settings.set_replication_count(3)

        for k, v in compilation_spec_dict.items():
            compilation_group.get_settings().backend_specific_opts_insert(k, v)

        if inputs_function is not None:
            input_sets = inputs_function(model, trace, export_options, None,
                                         submod_modelpath)
        else:
            raise RuntimeError(
                "inputs_function needs to be specified in accelerator decorator"
            )
        compilation_group.set_input_sets(input_sets)

        trace = torch_glow.to_glow_selective(
            trace,
            {submod_tracepath: spec},
            inplace=False,
        )

        return trace
    else:
        return trace
Exemplo n.º 4
0
def lower_modules_to_accelerator(model, trace, seq_padding_control,
                                 batch_padding_control):
    import torch_glow

    if hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder):
        backend = "NNPI"
        submod_modelpath, compilation_spec_dict = accelerator.get_modules(
            model, backend)[0]
        submod_tracepath = accelerator.model2trace_path(submod_modelpath)
        embedding_dim = model.encoder.encoder.transformer.token_embedding.embedding_dim
        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend(backend)
        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)
        compilation_group_settings = compilation_group.get_settings()
        compilation_group_settings.set_convert_to_fp16(True)
        for k, v in compilation_spec_dict.items():
            compilation_group.get_settings().backend_specific_opts_insert(k, v)

        for seq_len in seq_padding_control:
            if seq_len <= 0:
                continue
            for batch_size in batch_padding_control:
                if batch_size <= 0:
                    continue
                input1 = torch.randn([seq_len, batch_size, embedding_dim],
                                     dtype=torch.float32)
                input2 = torch.randn([batch_size, seq_len]).bool()
                input_specs = torch_glow.input_specs_from_tensors(
                    [input1, input2])
                compilation_group.input_sets_append(input_specs)

        trace = torch_glow.to_glow_selective(
            trace,
            {submod_tracepath: spec},
            inplace=False,
        )

        return trace
    else:
        return trace
Exemplo n.º 5
0
def lower_modules_to_accelerator(
    model: nn.Module, trace, export_options: ExportConfig, throughput_optimize=False
):
    # Raise error if accelerator could not be imported
    if not accelerator_lowering_supported:
        raise RuntimeError("Accelerator Lowering not supported!")

    import torch_glow

    log_accelerator_feature_usage("build.NNPI")
    if (
        (hasattr(model, "encoder") and isinstance(model.encoder, RoBERTaEncoder))
        or (
            hasattr(model, "representation")
            and isinstance(model.representation, AcceleratorBiLSTM)
        )
        or (
            hasattr(model, "lower_module")
            # Internal CNN LM module to add accelerator support.
            and type(model.lower_module).__qualname__ == "CNNLowerModule"
        )
    ):
        backend = "NNPI"
        backend_qualifier = ""

        if throughput_optimize:
            backend_qualifier = ":throughput_optimized"

        modules_to_lower = accelerator.get_modules(model, backend + backend_qualifier)

        if len(modules_to_lower) < 1:
            raise RuntimeError("Need at least one module to lower to accelerator")
        elif len(modules_to_lower) > 1:
            print(f"Warning. Received {len(modules_to_lower)} modules to lower.")
            print("Warning. Only lowering first module.")

        (
            submod_modelpath,
            compilation_spec_dict,
            inputs_function,
        ) = modules_to_lower[0]
        submod_tracepath = accelerator.model2trace_path(submod_modelpath)
        spec = torch_glow.CompilationSpec()
        spec.get_settings().set_glow_backend(backend)
        compilation_group = torch_glow.CompilationGroup()
        spec.compilation_groups_append(compilation_group)
        compilation_group_settings = compilation_group.get_settings()

        # Set values from dict that are not set via backend-specific opts
        compilation_group_settings.set_convert_to_fp16(
            compilation_spec_dict.pop("glow:ConvertToFP16", "true") in ["true", "True"]
        )
        compilation_group_settings.set_replication_count(
            int(compilation_spec_dict.pop("glow:ReplicationCount", "1"))
        )

        for k, v in compilation_spec_dict.items():
            compilation_group.get_settings().backend_specific_opts_insert(k, v)

        if inputs_function is not None:
            input_sets = inputs_function(
                model, trace, export_options, None, submod_modelpath
            )
        else:
            raise RuntimeError(
                "inputs_function needs to be specified in accelerator decorator"
            )
        compilation_group.set_input_sets(input_sets)

        trace = torch_glow.to_glow_selective(
            trace,
            {submod_tracepath: spec},
            inplace=False,
        )

        return trace
    else:
        return trace