Пример #1
0
def insert_acblock(model: nn.Module):
    items = list(model.named_children())
    idx = 0
    while idx < len(items):
        name, module = items[idx]
        if isinstance(module, nn.Conv2d) and module.kernel_size[0] > 1:
            # 将标准卷积替换为ACBlock
            in_channels = module.in_channels
            out_channels = module.out_channels
            kernel_size = module.kernel_size
            stride = module.stride
            padding = module.padding
            dilation = module.dilation
            groups = module.groups
            padding_mode = module.padding_mode

            acblock = AsymmetricConvolutionBlock(in_channels,
                                                 out_channels,
                                                 kernel_size[0],
                                                 stride[0],
                                                 padding=padding[0],
                                                 padding_mode=padding_mode,
                                                 dilation=dilation,
                                                 groups=groups)
            model.add_module(name, acblock)
            # 如果conv层之后跟随着BN层,那么删除该BN层
            # 参考[About BN layer #35](https://github.com/DingXiaoH/ACNet/issues/35)
            if (idx + 1) < len(items) and isinstance(items[idx + 1][1],
                                                     nn.BatchNorm2d):
                new_layer = nn.Identity()
                model.add_module(items[idx + 1][0], new_layer)
        else:
            insert_acblock(module)
        idx += 1
Пример #2
0
def add_input_quantization(mod: nn.Module):
    # Instantiate input observer, for keeping track of the input quantization specs
    if mod.qconfig is not None:
        mod.add_module('input_quantization', mod.qconfig.activation())
        # Only update the observer, but do not apply quantization
        mod.input_quantization.enable_fake_quant(False)
        mod.register_forward_pre_hook(input_observer_hook)
Пример #3
0
    def convertBatchNorm(cls,
                         module: nn.Module,
                         num_micro_batches: int = 1) -> nn.Module:
        """Converts a :class:`nn.BatchNorm` or underlying
        :class:`nn.BatchNorm`s into :class:`DelayedBatchNorm`::
            from torchvision.models.resnet import resnet101
            from pytorch_Gpipe.delayedNorm import DelayedBatchNorm
            model = resnet101()
            model = DelayedBatchNorm.convertBatchNorm(model)
        """
        if isinstance(module, DelayedBatchNorm
                      ) and module.num_micro_batches is num_micro_batches:
            return module

        if isinstance(module, _BatchNorm) and module.track_running_stats:
            module_output = DelayedBatchNorm(module.num_features, module.eps,
                                             module.momentum, module.affine,
                                             num_micro_batches)
            if module.affine:
                module_output.register_parameter('weight', module.weight)
                module_output.register_parameter('bias', module.bias)
            module_output.register_buffer('running_mean', module.running_mean)
            module_output.register_buffer('running_var', module.running_var)
            module_output.register_buffer('num_batches_tracked',
                                          module.num_batches_tracked)

            return module_output

        for name, child in module.named_children():
            module.add_module(name,
                              cls.convertBatchNorm(child, num_micro_batches))

        return module
Пример #4
0
def add_modules(target_module: nn.Module,
                module_list: List[Union[nn.Module, WithModule]],
                prefix: str = 'unit_'):
    """
    Adds modules to target modules. WithModule type is supported
    Args:
        target_module: Module to be injected
        module_list: List of modules to be added
        prefix: Prefix to module names
    """
    for i, unit in enumerate(module_list):
        name = f'{prefix}{i}'
        if isinstance(unit, nn.Module):
            target_module.add_module(name, unit)
        elif 'module' in dir(unit) and unit.module is not None:
            target_module.add_module(name, unit.module)
Пример #5
0
def replace_layers_in_module(module: nn.Module, mapping_fn: Callable, *args,
                             **kwargs) -> bool:
    """
    Recursively iterate over the children of a module and replace them according to `mapping_fn`.

    Returns:
        True if a layer has been changed.
    """
    changed = False
    for name, child in module.named_children():
        new_module = mapping_fn(child, *args, **kwargs)

        if new_module is not None:
            changed = True
            module.add_module(name, new_module)

        # recursively apply to child
        changed |= replace_layers_in_module(child, mapping_fn, *args, **kwargs)
    return changed
Пример #6
0
def _transform_lstm_rnn(module: Module, debug: bool) -> GraphModule:
    """Transforms multi-layer RNN/LSTM to Sequential of single-layer RNN/LSTM.

    Converts multi-layer RNN/LSTM to Sequential with single-layer RNN/LSTM.
    If dropout probability is nonzero, creates intermediate dropout layers.
    Finally, copies training mode.

    Args:
        module: container module to transform
        debug: whether to print debug messages

    Returns:
        equivalent transformed module

    Raises:
        NotImplementedError: if initial hidden state is used in forward pass
    """
    if debug:
        print("\tBegin transformation: LSTM, RNN")
    graph: Graph = BackpackTracer().trace(module)

    nodes = [
        n
        for n in graph.nodes
        if n.op == "call_module"
        and isinstance(module.get_submodule(n.target), (RNN, LSTM))
        and module.get_submodule(n.target).num_layers > 1
    ]
    for node in nodes:
        if len(node.args) > 1:
            raise NotImplementedError(
                "For conversion, LSTM/RNN input must not have hidden states."
            )
        lstm_module_replace = _make_rnn_backpack(module.get_submodule(node.target))
        module.add_module(node.target, lstm_module_replace)

    graph.lint()
    if debug:
        print(f"\tRNNs, LSTMs transformed: {len(nodes)}")
    return GraphModule(module, graph)
Пример #7
0
def fuse_repvgg_block(model: nn.Module):
    for name, module in model.named_children():
        if isinstance(module, RepVGGBlock):
            # 将RepVGGBlock替换为标准卷积
            kernel, bias = get_equivalent_kernel_bias(
                module.rbr_dense, module.rbr_1x1, module.rbr_identity,
                module.in_channels, module.groups, module.padding)
            # 新建标准卷积,赋值权重和偏差后重新插入模型
            fused_conv = nn.Conv2d(module.in_channels,
                                   module.out_channels,
                                   module.kernel_size,
                                   stride=module.stride,
                                   padding=module.padding,
                                   dilation=module.dilation,
                                   groups=module.groups,
                                   padding_mode=module.padding_mode,
                                   bias=True)
            fused_conv.weight = nn.Parameter(kernel.detach().cpu())
            fused_conv.bias = nn.Parameter(bias.detach().cpu())
            model.add_module(name, fused_conv)
        else:
            fuse_repvgg_block(module)
Пример #8
0
def modify_for_grad_receptive_field(module: nn.Module):
    for name, m in module.named_children():
        if mod_isinstance(m, nn.Conv2d):
            init_for_grad_receptive_field(m)
        elif mod_isinstance(m, nn.MaxPool2d):
            # change maxpool to avgpool
            replacement = nn.AvgPool2d(
                kernel_size=cast(int, m.kernel_size),
                stride=cast(int, m.stride),
                padding=cast(int, m.padding),
                ceil_mode=cast(bool, m.ceil_mode),
            )
            module.add_module(name, replacement)
        elif mod_isinstance(m, nn.Dropout):
            # turn off Dropout
            # https://discuss.pytorch.org/t/how-to-freeze-bn-layers-while-training-the-rest-of-network-mean-and-var-wont-freeze/89736/10
            m.eval()
        elif mod_isinstance(m, nn.BatchNorm2d):
            # turn off batchnorm
            # https://discuss.pytorch.org/t/how-to-close-batchnorm-when-using-torchvision-models/21812/2
            if hasattr(m, "reset_parameters"):
                cast(Callable, m.reset_parameters)()
            m.eval()
            init_for_grad_receptive_field(m)
Пример #9
0
class FCDenseNet(Module):
    r"""
    The One Hundred Layers Tiramisu: Fully Convolutional DenseNets for Semantic Segmentation
    https://arxiv.org/abs/1611.09326

    In this paper, we extend DenseNets to deal with the problem of semantic segmentation. We achieve state-of-the-art
    results on urban scene benchmark datasets such as CamVid and Gatech, without any further post-processing module nor
    pretraining. Moreover, due to smart construction of the model, our approach has much less parameters than currently
    published best entries for these datasets.
    """
    def __init__(
        self,
        in_channels: int = 3,
        out_channels: int = 1000,
        initial_num_features: int = 48,
        dropout: float = 0.2,
        down_dense_growth_rates: Union[int, Sequence[int]] = 16,
        down_dense_bottleneck_ratios: Union[Optional[int],
                                            Sequence[Optional[int]]] = None,
        down_dense_num_layers: Union[int, Sequence[int]] = (4, 5, 7, 10, 12),
        down_transition_compression_factors: Union[float,
                                                   Sequence[float]] = 1.0,
        middle_dense_growth_rate: int = 16,
        middle_dense_bottleneck: Optional[int] = None,
        middle_dense_num_layers: int = 15,
        up_dense_growth_rates: Union[int, Sequence[int]] = 16,
        up_dense_bottleneck_ratios: Union[Optional[int],
                                          Sequence[Optional[int]]] = None,
        up_dense_num_layers: Union[int, Sequence[int]] = (12, 10, 7, 5, 4)):
        super(FCDenseNet, self).__init__()

        # region Parameters handling
        self.in_channels = in_channels
        self.out_channels = out_channels

        if type(down_dense_growth_rates) == int:
            down_dense_growth_rates = (down_dense_growth_rates, ) * 5
        if down_dense_bottleneck_ratios is None or type(
                down_dense_bottleneck_ratios) == int:
            down_dense_bottleneck_ratios = (down_dense_bottleneck_ratios, ) * 5
        if type(down_dense_num_layers) == int:
            down_dense_num_layers = (down_dense_num_layers, ) * 5
        if type(down_transition_compression_factors) == float:
            down_transition_compression_factors = (
                down_transition_compression_factors, ) * 5

        if type(up_dense_growth_rates) == int:
            up_dense_growth_rates = (up_dense_growth_rates, ) * 5
        if up_dense_bottleneck_ratios is None or type(
                up_dense_bottleneck_ratios) == int:
            up_dense_bottleneck_ratios = (up_dense_bottleneck_ratios, ) * 5
        if type(up_dense_num_layers) == int:
            up_dense_num_layers = (up_dense_num_layers, ) * 5
        # endregion

        # region First convolution
        # The Lasagne implementation uses convolution with 'same' padding, the PyTorch equivalent is padding=1
        self.features = Conv2d(in_channels,
                               initial_num_features,
                               kernel_size=3,
                               padding=1,
                               bias=False)
        current_channels = self.features.out_channels
        # endregion

        # region Downward path
        # Pairs of Dense Blocks with input concatenation and TransitionDown layers
        down_dense_params = [{
            'concat_input': True,
            'growth_rate': gr,
            'num_layers': nl,
            'dense_layer_params': {
                'dropout': dropout,
                'bottleneck_ratio': br
            }
        } for gr, nl, br in zip(down_dense_growth_rates, down_dense_num_layers,
                                down_dense_bottleneck_ratios)]
        down_transition_params = [{
            'dropout': dropout,
            'compression': c
        } for c in down_transition_compression_factors]
        skip_connections_channels = []

        self.down_dense = Module()
        self.down_trans = Module()
        down_pairs_params = zip(down_dense_params, down_transition_params)
        for i, (dense_params,
                transition_params) in enumerate(down_pairs_params):
            block = DenseBlock(current_channels, **dense_params)
            current_channels = block.out_channels
            self.down_dense.add_module(f'block_{i}', block)

            skip_connections_channels.append(block.out_channels)

            transition = TransitionDown(current_channels, **transition_params)
            current_channels = transition.out_channels
            self.down_trans.add_module(f'trans_{i}', transition)
        # endregion

        # region Middle block
        # Renamed from "bottleneck" in the paper, to avoid confusion with the Bottleneck of DenseLayers
        self.middle = DenseBlock(current_channels,
                                 middle_dense_growth_rate,
                                 middle_dense_num_layers,
                                 concat_input=True,
                                 dense_layer_params={
                                     'dropout': dropout,
                                     'bottleneck_ratio':
                                     middle_dense_bottleneck
                                 })
        current_channels = self.middle.out_channels
        # endregion

        # region Upward path
        # Pairs of TransitionUp layers and Dense Blocks without input concatenation
        up_transition_params = [{
            'skip_channels': sc,
        } for sc in reversed(skip_connections_channels)]
        up_dense_params = [{
            'concat_input': False,
            'growth_rate': gr,
            'num_layers': nl,
            'dense_layer_params': {
                'dropout': dropout,
                'bottleneck_ratio': br
            }
        } for gr, nl, br in zip(up_dense_growth_rates, up_dense_num_layers,
                                up_dense_bottleneck_ratios)]

        self.up_dense = Module()
        self.up_trans = Module()
        up_pairs_params = zip(up_transition_params, up_dense_params)
        for i, (transition_params_up,
                dense_params_up) in enumerate(up_pairs_params):
            transition = TransitionUp(current_channels, **transition_params_up)
            current_channels = transition.out_channels
            self.up_trans.add_module(f'trans_{i}', transition)

            block = DenseBlock(current_channels, **dense_params_up)
            current_channels = block.out_channels
            self.up_dense.add_module(f'block_{i}', block)
        # endregion

        # region Final convolution
        self.final = Conv2d(current_channels,
                            out_channels,
                            kernel_size=1,
                            bias=False)
        # endregion

        # region Weight initialization
        for module in self.modules():
            if isinstance(module, Conv2d):
                init.kaiming_normal_(module.weight)
            elif isinstance(module, BatchNorm2d):
                module.reset_parameters()
            elif isinstance(module, Linear):
                init.xavier_uniform(module.weight)
                init.constant(module.bias, 0)
        # endregion

    def forward(self, x):
        res = self.features(x)

        skip_tensors = []
        for dense, trans in zip(self.down_dense.children(),
                                self.down_trans.children()):
            res = dense(res)
            skip_tensors.append(res)
            res = trans(res)

        res = self.middle(res)

        for skip, trans, dense in zip(reversed(skip_tensors),
                                      self.up_trans.children(),
                                      self.up_dense.children()):
            res = trans(res, skip)
            res = dense(res)

        res = self.final(res)

        return res

    def predict(self, x):
        logits = self(x)
        return F.softmax(logits)
Пример #10
0
def _unwrap_layers(module: nn.Module):
    for name, sub_module in module.named_children():
        if isinstance(sub_module, Wrapper):
            module.add_module(name, sub_module.layer)
        else:
            _unwrap_layers(sub_module)
Пример #11
0
def fuse_acblock(model: nn.Module, eps=1e-5):
    for name, module in model.named_children():
        if isinstance(module, AsymmetricConvolutionBlock):
            # 将ACBlock替换为标准卷积
            # 获取Nx1卷积的权重以及对应BN的权重、偏置、运行时均值、运行时方差
            vertical_conv_weight = module.ver_conv.weight
            vertical_bn_weight = module.ver_bn.weight
            vertical_bn_bias = module.ver_bn.bias
            vertical_bn_running_mean = module.ver_bn.running_mean
            vertical_bn_running_std = torch.sqrt(module.ver_bn.running_var +
                                                 eps)
            # 获取1xN卷积的权重以及对应BN的权重、偏置、运行时均值、运行时方差
            horizontal_conv_weight = module.hor_conv.weight
            horizontal_bn_weight = module.hor_bn.weight
            horizontal_bn_bias = module.hor_bn.bias
            horizontal_bn_running_mean = module.hor_bn.running_mean
            horizontal_bn_running_std = torch.sqrt(module.hor_bn.running_var +
                                                   eps)
            if isinstance(module.square_bn, nn.Identity):
                # 获取NxN卷积的权重以及对应BN的权重、偏置、运行时均值、运行时方差
                # 在insert_repvgg_block过程中将Conv后面的BN设置nn.Identity
                square_weight = module.square_conv.weight
                square_bias = module.square_conv.bias
                # 计算偏差
                fused_bias = square_bias + vertical_bn_bias + horizontal_bn_bias \
                             - vertical_bn_running_mean * vertical_bn_weight / vertical_bn_running_std \
                             - horizontal_bn_running_mean * horizontal_bn_weight / horizontal_bn_running_std
                # 计算权重
                fused_kernel = square_weight
            else:
                square_conv_weight = module.square_conv.weight
                square_bn_weight = module.square_bn.weight
                square_bn_bias = module.square_bn.bias
                square_bn_running_mean = module.square_bn.running_mean
                square_bn_running_std = torch.sqrt(
                    module.square_bn.running_var + eps)
                # 计算偏差
                fused_bias = square_bn_bias + vertical_bn_bias + horizontal_bn_bias \
                             - square_bn_running_mean * square_bn_weight / square_bn_running_std \
                             - vertical_bn_running_mean * vertical_bn_weight / vertical_bn_running_std \
                             - horizontal_bn_running_mean * horizontal_bn_weight / horizontal_bn_running_std
                # 计算权重
                fused_kernel = _fuse_kernel(square_conv_weight,
                                            square_bn_weight,
                                            square_bn_running_std)
            # 计算权重
            _add_to_square_kernel(
                fused_kernel,
                _fuse_kernel(vertical_conv_weight, vertical_bn_weight,
                             vertical_bn_running_std))
            _add_to_square_kernel(
                fused_kernel,
                _fuse_kernel(horizontal_conv_weight, horizontal_bn_weight,
                             horizontal_bn_running_std))
            # 新建标准卷积,赋值权重和偏差后重新插入模型
            fused_conv = nn.Conv2d(module.in_channels,
                                   module.out_channels,
                                   module.kernel_size,
                                   stride=module.stride,
                                   padding=module.padding,
                                   dilation=module.dilation,
                                   groups=module.groups,
                                   padding_mode=module.padding_mode)
            fused_conv.weight = nn.Parameter(fused_kernel.detach().cpu())
            fused_conv.bias = nn.Parameter(fused_bias.detach().cpu())
            model.add_module(name, fused_conv)
        else:
            fuse_acblock(module, eps=eps)
Пример #12
0
 def _make_dense_layers(module: nn.Module, name, input_dim, output_dim):
     module.add_module(name, nn.Linear(input_dim, output_dim))
     module.add_module(name+'_bn', nn.BatchNorm1d(output_dim))
Пример #13
0
def build_regularized_relu_block_(module_block: nn.Module,
                                  suffix: str,
                                  reg: Regularization,
                                  dropout: float = 0.5,
                                  num_elem: int = 0):
    if reg == Regularization.DROPOUT:
        module_block.add_module('Dropout_{}'.format(suffix),
                                nn.Dropout(p=dropout))
        module_block.add_module('ReLU_{}'.format(suffix), nn.ReLU())
    elif reg == Regularization.BN_RELU:
        module_block.add_module('BN_{}'.format(suffix),
                                nn.BatchNorm1d(num_elem))
        module_block.add_module('ReLU_{}'.format(suffix), nn.ReLU())
    elif reg == Regularization.RELU_BN:
        module_block.add_module('ReLU_{}'.format(suffix), nn.ReLU())
        module_block.add_module('BN_{}'.format(suffix),
                                nn.BatchNorm1d(num_elem))