def set_swish(self, memory_efficient=True): """ Sets swish function as memory efficient (for training) or standard. """ self._swish = MemoryEfficientSwish() if memory_efficient else Swish() for block in self._blocks: block.set_swish(memory_efficient)
def __init__(self, block_args, global_params): """ Args: block_args (EasyDict): block args, see: class: `EfficientNet`. global_params (EasyDict): global args, see: class: `EfficientNet`. """ super().__init__() self._block_args = block_args self.has_se = (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) self.id_skip = block_args.id_skip # Expansion phase # number of input channels inp = block_args.in_channels # number of output channels oup = block_args.in_channels * block_args.expand_ratio if block_args.expand_ratio != 1: self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, padding=0, bias=False) self._bn0 = get_norm(global_params.norm, out_channels=oup) # Depthwise convolution phase k = block_args.kernel_size s = block_args.stride self._depthwise_conv = Conv2d(in_channels=oup, out_channels=oup, groups=oup, kernel_size=k, stride=s, padding="SAME", bias=False) self._bn1 = get_norm(global_params.norm, out_channels=oup) # Squeeze and Excitation layer, if desired if self.has_se: num_squeezed_channels = max( 1, int(block_args.in_channels * block_args.se_ratio)) self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1, padding=0) self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1, padding=0) # Output phase final_oup = block_args.out_channels self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, padding=0, bias=False) self._bn2 = get_norm(global_params.norm, final_oup) self._swish = MemoryEfficientSwish()
def __init__(self, cfg, input_shape: List[ShapeSpec]): super().__init__() in_channels = input_shape[0].channels num_classes = cfg.MODEL.EFFICIENTDET.NUM_CLASSES norm = cfg.MODEL.EFFICIENTDET.HEAD.NORM bn_momentum = cfg.MODEL.EFFICIENTDET.HEAD.BN_MOMENTUM bn_eps = cfg.MODEL.EFFICIENTDET.HEAD.BN_EPS prior_prob = cfg.MODEL.EFFICIENTDET.HEAD.PRIOR_PROB memory_efficient = cfg.MODEL.EFFICIENTDET.HEAD.MEMORY_EFFICIENT_SWISH num_conv_layers = cfg.MODEL.EFFICIENTDET.HEAD.NUM_CONV num_anchors = cfg.build_anchor_generator(cfg, input_shape).num_cell_anchors self.bn_momentum = bn_momentum self.bn_eps = bn_eps self.prior_prob = prior_prob assert ( len(set(num_anchors)) == 1 ), "Using different number of anchors between levels is not currently supported!" num_anchors = num_anchors[0] self.cls_subnet = nn.ModuleList([]) self.bbox_subnet = nn.ModuleList([]) for _ in range(num_conv_layers): self.cls_subnet.append( SeparableConvBlock(in_channels, in_channels, kernel_size=3, padding="SAME")) self.bbox_subnet.append( SeparableConvBlock(in_channels, in_channels, kernel_size=3, padding="SAME")) num_levels = len(input_shape) self.bn_cls_subnet = nn.ModuleList() self.bn_bbox_subnet = nn.ModuleList() for _ in range(num_levels): self.bn_cls_subnet.append( nn.ModuleList([ get_norm(norm, in_channels) for _ in range(num_conv_layers) ])) self.bn_bbox_subnet.append( nn.ModuleList([ get_norm(norm, in_channels) for _ in range(num_conv_layers) ])) self.cls_score = SeparableConvBlock(in_channels, num_anchors * num_classes, kernel_size=3, padding="SAME") self.bbox_pred = SeparableConvBlock(in_channels, num_anchors * 4, kernel_size=3, padding="SAME") self.act = MemoryEfficientSwish() if memory_efficient else Swish() self._init_weights()
def __init__(self, in_channels=3, blocks_args=None, global_params=None, out_features=None): """ Args: in_channels (int): Number of input image channels. blocks_args (list[EasyDict]): a list of EasyDict to construct blocks. Each item in the list contains: * num_repeat: int, the number of `MBConvBlock` in the stage. * in_channels: int, the number of input tensor channels in the stage. * out_channels: int, the number of output tensor channels in the stage. * kernel_size: int, the kernel size of conv layer in the stage. * stride: int or list or tuple, the stride of conv layer in the stage. * expand_ratio: int, the channel expansion ratio at expansion phase in `MBConvBlock`. * id_skip: bool, if `True`, apply skip connection in `MBConvBlock` when stride is equal to 1 and the input and output channels are equal. * se_ratio: float, Squeeze layer channel reduction ratio in SE module, between 0 and 1. global_params (namedtuple): a EasyDict contains global params shared between blocks. Which contains: * norm: str, the normalization to use. * bn_momentum: float, the `momentum` parameter of the norm module. * bn_eps: float, the `eps` parameter of the norm module. * dropout_rate: dropout rate. * num_classes: None or int: if None, will not perform classification. * width_coefficient: float, coefficient of width. * depth_coefficient: float, coefficient of depth. * depth_divisor: int, when calculating and rounding the number of channels of each stage according to the depth coefficient, the number of channels must be an integer multiple of "depth_divisor". * min_depth: int, the lower bound of the number of channels in each stage. * drop_connect_rate: float, between 0 to 1, drop connect rate. * image_size: int, input image size. out_features (list[str]): name of the layers whose outputs should be returned in forward. Can be anything in "stage1", "stage2", ..., "stage8" or "linear". If None, will return the output of the last layer. """ super().__init__() assert isinstance(blocks_args, list), "blocks_args should be a list" assert len(blocks_args) > 0, "block_args must be greater than 0" self._size_divisibility = 0 self._global_params = global_params self._blocks_args = blocks_args self._out_features = list() self._out_feature_strides = dict() self._out_feature_channels = dict() self.num_classes = global_params.num_classes # Stem # number of output channels out_channels = round_filters(32, global_params, skip=global_params.fix_head_stem) self._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding="SAME", bias=False) self._bn0 = get_norm(global_params.norm, out_channels=out_channels) # Build blocks self._blocks = nn.ModuleList([]) curr_stride = 2 curr_block_idx = 0 self.block_idx_to_name = dict() for stage_idx, block_args in enumerate(blocks_args): # Update block input and output filters based on depth multiplier. block_args.update( in_channels=round_filters(block_args.in_channels, global_params), out_channels=round_filters(block_args.out_channels, global_params), num_repeat=round_repeats(block_args.num_repeat, global_params)) name = "stage{}".format(stage_idx + 2) curr_stride *= block_args.stride self._out_feature_strides[name] = curr_stride self._out_feature_channels[name] = block_args.out_channels curr_block_idx += block_args.num_repeat self.block_idx_to_name[curr_block_idx - 1] = name # The first block needs to take care of stride and # filter size increase. self._blocks.append(MBConvBlock(block_args, global_params)) if block_args.num_repeat > 1: next_block_args = deepcopy(block_args) next_block_args.update(in_channels=block_args.out_channels, stride=1) for _ in range(block_args.num_repeat - 1): self._blocks.append(MBConvBlock(next_block_args, global_params)) # Head if self.num_classes is not None: in_channels = block_args.out_channels # output of final block out_channels = round_filters(1280, global_params, skip=global_params.fix_head_stem) self._conv_head = Conv2d(in_channels, out_channels, kernel_size=1, padding=0, bias=False) self._bn1 = get_norm(global_params.norm, out_channels=out_channels) # Final linear layers self._avg_pooling = nn.AdaptiveAvgPool2d(1) self._dropout = nn.Dropout(global_params.dropout_rate) self._fc = nn.Linear(out_channels, global_params.num_classes) name = "linear" self._swish = MemoryEfficientSwish() if out_features is None: out_features = [name] self._out_features = out_features assert len(self._out_features) # init bn params bn_mom = global_params.bn_momentum bn_eps = global_params.bn_eps if bn_mom is not None and bn_eps is not None: for m in self.modules(): if isinstance(m, nn.BatchNorm2d): m.momentum = bn_mom m.eps = bn_eps
def set_swish(self, memory_efficient=True): """ Sets swish function as memory efficient or standard. """ self._swish = MemoryEfficientSwish() if memory_efficient else Swish()
def __init__(self, input_size, in_channels_list, out_channels, fuse_type="fast", norm="BN", memory_efficient=True): """ input_size (int): the input image size. in_channels_list (list): the number of input tensor channels per level. out_channels (int): the number of output tensor channels. fuse_type (str): now only support three weighted fusion approaches: * fast: Output = sum(Input_i * w_i / sum(w_j)) * sotfmax: Output = sum(Input_i * e ^ w_i / sum(e ^ w_j)) * sum: Output = sum(Input_i) / len(Input_i) norm (str): the normalization to use. memory_efficient (bool): use `MemoryEfficientSwish` or `Swish` as activation function. """ super(BiFPNLayer, self).__init__() assert fuse_type in ("fast", "softmax", "sum"), f"Unknown fuse method: {fuse_type}." \ " Please select in [fast, sotfmax, sum]." self.input_size = input_size self.in_channels_list = in_channels_list self.fuse_type = fuse_type self.levels = len(in_channels_list) self.nodes_input_offsets = [ [3, 4], [2, 5], [1, 6], [0, 7], [1, 7, 8], [2, 6, 9], [3, 5, 10], [4, 11], ] self.nodes_strides = [ 2 ** x for x in [6, 5, 4, 3, 4, 5, 6, 7] ] # Change input feature map to have target number of channels. self.resample_convs = nn.ModuleList() for node_i_input_offsets in self.nodes_input_offsets: resample_convs_i = nn.ModuleList() for input_offset in node_i_input_offsets: if self.in_channels_list[input_offset] != out_channels: resample_conv = Conv2d( self.in_channels_list[input_offset], out_channels, kernel_size=1, stride=1, padding=0, norm=get_norm(norm, out_channels), activation=None, ) else: resample_conv = nn.Identity() self.in_channels_list.append(out_channels) resample_convs_i.append(resample_conv) self.resample_convs.append(resample_convs_i) # fpn combine weights self.edge_weights = nn.ParameterList() for node_i_input_offsets in self.nodes_input_offsets: # combine weight if fuse_type == "fast" or fuse_type == "softmax": weights_i = nn.Parameter( torch.ones(len(node_i_input_offsets), dtype=torch.float32), requires_grad=True, ) elif fuse_type == "sum": weights_i = nn.Parameter( torch.ones(len(node_i_input_offsets), dtype=torch.float32), requires_grad=False, ) else: raise ValueError("Unknown fuse method: {}".format(self.fuse_type)) self.edge_weights.append(weights_i) # Convs for combine edge features self.combine_convs = nn.ModuleList() for node_i_input_offsets in self.nodes_input_offsets: combine_conv = SeparableConvBlock( out_channels, out_channels, kernel_size=3, padding="SAME", norm=get_norm(norm, out_channels), activation=None, ) self.combine_convs.append(combine_conv) self.act = MemoryEfficientSwish() if memory_efficient else Swish() self.down_sampling = MaxPool2d(kernel_size=3, stride=2, padding="SAME") self.up_sampling = nn.Upsample(scale_factor=2, mode='nearest')