def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, pad_type='zero', activation='lrelu', norm='none', sn=False): super(Conv2dLayer, self).__init__() # Initialize the padding scheme if pad_type == 'reflect': self.pad = nn.ReflectionPad2d(padding) elif pad_type == 'replicate': self.pad = nn.ReplicationPad2d(padding) elif pad_type == 'zero': self.pad = nn.ZeroPad2d(padding) else: assert 0, "Unsupported padding type: {}".format(pad_type) # Initialize the normalization type if norm == 'bn': self.norm = nn.BatchNorm2d(out_channels) elif norm == 'in': self.norm = nn.InstanceNorm2d(out_channels) elif norm == 'ln': self.norm = LayerNorm(out_channels) elif norm == 'none': self.norm = None else: assert 0, "Unsupported normalization: {}".format(norm) # Initialize the activation funtion if activation == 'relu': self.activation = nn.ReLU(inplace=True) elif activation == 'lrelu': self.activation = nn.LeakyReLU(0.2, inplace=True) elif activation == 'prelu': self.activation = nn.PReLU() elif activation == 'selu': self.activation = nn.SELU(inplace=True) elif activation == 'tanh': self.activation = nn.Tanh() elif activation == 'sigmoid': self.activation = nn.Sigmoid() elif activation == 'none': self.activation = None else: assert 0, "Unsupported activation: {}".format(activation) # Initialize the convolution layers if sn: self.conv2d = SpectralNorm( nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding=0, dilation=dilation)) else: self.conv2d = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding=0, dilation=dilation)
def __init__(self): super(MaxPoolPad, self).__init__() self.pad = nn.ZeroPad2d((1, 0, 1, 0)) self.pool = nn.MaxPool2d(3, stride=2, padding=1)
def __init__(self, input_dim, output_dim, kernel_size, stride, padding=0, norm='none', activation='relu', pad_type='zero'): super(Conv2dBlock, self).__init__() self.use_bias = True # initialize padding if pad_type == 'reflect': self.pad = nn.ReflectionPad2d(padding) elif pad_type == 'replicate': self.pad = nn.ReplicationPad2d(padding) elif pad_type == 'zero': self.pad = nn.ZeroPad2d(padding) else: assert 0, "Unsupported padding type: {}".format(pad_type) # initialize normalization norm_dim = output_dim if norm == 'bn': self.norm = nn.BatchNorm2d(norm_dim) elif norm == 'in': #self.norm = nn.InstanceNorm2d(norm_dim, track_running_stats=True) self.norm = nn.InstanceNorm2d(norm_dim) elif norm == 'ln': self.norm = LayerNorm(norm_dim) elif norm == 'adain': self.norm = AdaptiveInstanceNorm2d(norm_dim) elif norm == 'none' or norm == 'sn': self.norm = None else: assert 0, "Unsupported normalization: {}".format(norm) # initialize activation if activation == 'relu': self.activation = nn.ReLU(inplace=True) elif activation == 'lrelu': self.activation = nn.LeakyReLU(0.2, inplace=True) elif activation == 'prelu': self.activation = nn.PReLU() elif activation == 'selu': self.activation = nn.SELU(inplace=True) elif activation == 'tanh': self.activation = nn.Tanh() elif activation == 'none': self.activation = None else: assert 0, "Unsupported activation: {}".format(activation) # initialize convolution if norm == 'sn': self.conv = SpectralNorm( nn.Conv2d(input_dim, output_dim, kernel_size, stride, bias=self.use_bias)) else: self.conv = nn.Conv2d(input_dim, output_dim, kernel_size, stride, bias=self.use_bias)
def down_shift(x): x = x[:, :, :-1, :] pad = nn.ZeroPad2d((0, 0, 1, 0)) return pad(x)
def __init__(self, stem_filters, num_filters): super(CellStem1, self).__init__() self.num_filters = num_filters self.stem_filters = stem_filters self.conv_1x1 = nn.Sequential() self.conv_1x1.add_module('relu', nn.ReLU()) self.conv_1x1.add_module( 'conv', nn.Conv2d(2 * self.num_filters, self.num_filters, 1, stride=1, bias=False)) self.conv_1x1.add_module( 'bn', nn.BatchNorm2d(self.num_filters, eps=0.001, momentum=0.1, affine=True)) self.relu = nn.ReLU() self.path_1 = nn.Sequential() self.path_1.add_module( 'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False)) self.path_1.add_module( 'conv', nn.Conv2d(self.stem_filters, self.num_filters // 2, 1, stride=1, bias=False)) self.path_2 = nn.ModuleList() self.path_2.add_module('pad', nn.ZeroPad2d((0, 1, 0, 1))) self.path_2.add_module( 'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False)) self.path_2.add_module( 'conv', nn.Conv2d(self.stem_filters, self.num_filters // 2, 1, stride=1, bias=False)) self.final_path_bn = nn.BatchNorm2d(self.num_filters, eps=0.001, momentum=0.1, affine=True) self.comb_iter_0_left = BranchSeparables(self.num_filters, self.num_filters, 5, 2, 2, name='specific', bias=False) self.comb_iter_0_right = BranchSeparables(self.num_filters, self.num_filters, 7, 2, 3, name='specific', bias=False) # self.comb_iter_1_left = nn.MaxPool2d(3, stride=2, padding=1) self.comb_iter_1_left = MaxPoolPad() self.comb_iter_1_right = BranchSeparables(self.num_filters, self.num_filters, 7, 2, 3, name='specific', bias=False) # self.comb_iter_2_left = nn.AvgPool2d(3, stride=2, padding=1, count_include_pad=False) self.comb_iter_2_left = AvgPoolPad() self.comb_iter_2_right = BranchSeparables(self.num_filters, self.num_filters, 5, 2, 2, name='specific', bias=False) self.comb_iter_3_right = nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False) self.comb_iter_4_left = BranchSeparables(self.num_filters, self.num_filters, 3, 1, 1, name='specific', bias=False) # self.comb_iter_4_right = nn.MaxPool2d(3, stride=2, padding=1) self.comb_iter_4_right = MaxPoolPad()
def layer_pad(self, net, args, options): options = hc.Config(options) return nn.ZeroPad2d((args[0], args[1], args[2], args[3]))
def __init__(self, in_channels, out_channels, kernel_size, stride, padding, z_padding=1, bias=False): BranchSeparables.__init__(self, in_channels, out_channels, kernel_size, stride, padding, bias) self.padding = nn.ZeroPad2d((z_padding, 0, z_padding, 0))
def create_modules(module_defs): """ Constructs module list of layer blocks from module configuration in module_defs """ hyperparams = module_defs.pop(0) hyperparams.update( { "batch": int(hyperparams["batch"]), "subdivisions": int(hyperparams["subdivisions"]), "width": int(hyperparams["width"]), "height": int(hyperparams["height"]), "channels": int(hyperparams["channels"]), "optimizer": hyperparams.get("optimizer"), "momentum": float(hyperparams["momentum"]), "decay": float(hyperparams["decay"]), "learning_rate": float(hyperparams["learning_rate"]), "burn_in": int(hyperparams["burn_in"]), "max_batches": int(hyperparams["max_batches"]), "policy": hyperparams["policy"], "lr_steps": list( zip(map(int, hyperparams["steps"].split(",")), map(float, hyperparams["scales"].split(","))) ), } ) assert ( hyperparams["height"] == hyperparams["width"] ), "Height and width should be equal! Non square images are padded with zeros." output_filters = [hyperparams["channels"]] module_list = nn.ModuleList() for module_i, module_def in enumerate(module_defs): modules = nn.Sequential() if module_def["type"] == "convolutional": bn = int(module_def["batch_normalize"]) filters = int(module_def["filters"]) kernel_size = int(module_def["size"]) pad = (kernel_size - 1) // 2 modules.add_module( f"conv_{module_i}", nn.Conv2d( in_channels=output_filters[-1], out_channels=filters, kernel_size=kernel_size, stride=int(module_def["stride"]), padding=pad, bias=not bn, ), ) if bn: modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5)) if module_def["activation"] == "leaky": modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1)) elif module_def["type"] == "maxpool": kernel_size = int(module_def["size"]) stride = int(module_def["stride"]) if kernel_size == 2 and stride == 1: modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1))) maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2)) modules.add_module(f"maxpool_{module_i}", maxpool) elif module_def["type"] == "upsample": upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest") modules.add_module(f"upsample_{module_i}", upsample) elif module_def["type"] == "route": layers = [int(x) for x in module_def["layers"].split(",")] filters = sum([output_filters[1:][i] for i in layers]) modules.add_module(f"route_{module_i}", nn.Sequential()) elif module_def["type"] == "shortcut": filters = output_filters[1:][int(module_def["from"])] modules.add_module(f"shortcut_{module_i}", nn.Sequential()) elif module_def["type"] == "yolo": anchor_idxs = [int(x) for x in module_def["mask"].split(",")] # Extract anchors anchors = [int(x) for x in module_def["anchors"].split(",")] anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] anchors = [anchors[i] for i in anchor_idxs] num_classes = int(module_def["classes"]) # Define detection layer yolo_layer = YOLOLayer(anchors, num_classes) modules.add_module(f"yolo_{module_i}", yolo_layer) # Register module list and number of output filters module_list.append(modules) output_filters.append(filters) return hyperparams, module_list
def __init__(self, padding): super(IRevInjectivePad, self).__init__() self.padding = padding self.pad = nn.ZeroPad2d(padding=(0, 0, 0, padding))
def __init__(self, use_norm=True, num_class=2, layer_nums=[3, 5, 5], layer_strides=[2, 2, 2], num_filters=[128, 128, 256], upsample_strides=[1,2,4], # num_upsample_filters=[256, 256,256], num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, use_rc_net=False, name='rpn'): super(RPN, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier self._use_bev = use_bev self._use_rc_net = use_rc_net assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) factors = [] for i in range(len(layer_nums)): assert int(np.prod(layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append(np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) if use_norm: if use_groupnorm: BatchNorm2d = change_default_args( num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args( eps=1e-3, momentum=0.01)(nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. block2_input_filters = num_filters[0] if use_bev: self.bev_extractor = Sequential( Conv2d(6, 32, 3, padding=1), BatchNorm2d(32), nn.ReLU(), # nn.MaxPool2d(2, 2), Conv2d(32, 64, 3, padding=1), BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(2, 2), ) block2_input_filters += 64 self.block1 = Sequential( nn.ZeroPad2d(1), Conv2d( num_input_features, num_filters[0], 3, stride=layer_strides[0]), BatchNorm2d(num_filters[0]), nn.ReLU(), ) for i in range(layer_nums[0]): self.block1.add( Conv2d(num_filters[0], num_filters[0], 3, padding=1)) self.block1.add(BatchNorm2d(num_filters[0])) self.block1.add(nn.ReLU()) self.deconv1 = Sequential( ConvTranspose2d( num_filters[0], num_upsample_filters[0], upsample_strides[0], stride=upsample_strides[0]), BatchNorm2d(num_upsample_filters[0]), nn.ReLU(), ) self.block2 = Sequential( nn.ZeroPad2d(1), Conv2d( block2_input_filters, num_filters[1], 3, stride=layer_strides[1]), BatchNorm2d(num_filters[1]), nn.ReLU(), ) for i in range(layer_nums[1]): self.block2.add( Conv2d(num_filters[1], num_filters[1], 3, padding=1)) self.block2.add(BatchNorm2d(num_filters[1])) self.block2.add(nn.ReLU()) self.deconv2 = Sequential( ConvTranspose2d( num_filters[1], num_upsample_filters[1], upsample_strides[1], stride=upsample_strides[1]), BatchNorm2d(num_upsample_filters[1]), nn.ReLU(), ) self.block3 = Sequential( nn.ZeroPad2d(1), Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]), BatchNorm2d(num_filters[2]), nn.ReLU(), ) for i in range(layer_nums[2]): self.block3.add( Conv2d(num_filters[2], num_filters[2], 3, padding=1)) self.block3.add(BatchNorm2d(num_filters[2])) self.block3.add(nn.ReLU()) self.deconv3 = Sequential( ConvTranspose2d( num_filters[2], num_upsample_filters[2], upsample_strides[2], stride=upsample_strides[2]), BatchNorm2d(num_upsample_filters[2]), nn.ReLU(), ) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.conv_box = nn.Conv2d( sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d( sum(num_upsample_filters), num_anchor_per_loc * 2, 1) if self._use_rc_net: self.conv_rc = nn.Conv2d( sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1)
def __init__(self, use_norm=True, num_class=2, layer_nums=[3, 5, 5], layer_strides=[2, 2, 2], num_filters=[128, 128, 256], upsample_strides=[1, 2, 4], num_upsample_filters=[256, 256, 256], num_input_features=128, num_anchor_per_loc=2, encode_background_as_zeros=True, use_direction_classifier=True, use_groupnorm=False, num_groups=32, use_bev=False, box_code_size=7, use_rc_net=False, name='rpn'): super(RPNV2, self).__init__() self._num_anchor_per_loc = num_anchor_per_loc self._use_direction_classifier = use_direction_classifier self._use_bev = use_bev self._use_rc_net = use_rc_net # assert len(layer_nums) == 3 assert len(layer_strides) == len(layer_nums) assert len(num_filters) == len(layer_nums) assert len(upsample_strides) == len(layer_nums) assert len(num_upsample_filters) == len(layer_nums) """ factors = [] for i in range(len(layer_nums)): assert int(np.prod(layer_strides[:i + 1])) % upsample_strides[i] == 0 factors.append(np.prod(layer_strides[:i + 1]) // upsample_strides[i]) assert all([x == factors[0] for x in factors]) """ if use_norm: if use_groupnorm: BatchNorm2d = change_default_args( num_groups=num_groups, eps=1e-3)(GroupNorm) else: BatchNorm2d = change_default_args( eps=1e-3, momentum=0.01)(nn.BatchNorm2d) Conv2d = change_default_args(bias=False)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=False)( nn.ConvTranspose2d) else: BatchNorm2d = Empty Conv2d = change_default_args(bias=True)(nn.Conv2d) ConvTranspose2d = change_default_args(bias=True)( nn.ConvTranspose2d) in_filters = [num_input_features, *num_filters[:-1]] # note that when stride > 1, conv2d with same padding isn't # equal to pad-conv2d. we should use pad-conv2d. blocks = [] deblocks = [] for i, layer_num in enumerate(layer_nums): block = Sequential( nn.ZeroPad2d(1), Conv2d( in_filters[i], num_filters[i], 3, stride=layer_strides[i]), BatchNorm2d(num_filters[i]), nn.ReLU(), ) for j in range(layer_num): block.add( Conv2d(num_filters[i], num_filters[i], 3, padding=1)) block.add(BatchNorm2d(num_filters[i])) block.add(nn.ReLU()) blocks.append(block) deblock = Sequential( ConvTranspose2d( num_filters[i], num_upsample_filters[i], upsample_strides[i], stride=upsample_strides[i]), BatchNorm2d(num_upsample_filters[i]), nn.ReLU(), ) deblocks.append(deblock) self.blocks = nn.ModuleList(blocks) self.deblocks = nn.ModuleList(deblocks) if encode_background_as_zeros: num_cls = num_anchor_per_loc * num_class else: num_cls = num_anchor_per_loc * (num_class + 1) self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1) self.conv_box = nn.Conv2d( sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1) if use_direction_classifier: self.conv_dir_cls = nn.Conv2d( sum(num_upsample_filters), num_anchor_per_loc * 2, 1) if self._use_rc_net: self.conv_rc = nn.Conv2d( sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1)
def create_modules(module_defs, img_size, arc): # Constructs module list of layer blocks from module configuration in module_defs hyperparams = module_defs.pop(0) output_filters = [int(hyperparams['channels'])] module_list = nn.ModuleList() routs = [] # list of layers which rout to deeper layes yolo_index = -1 for i, mdef in enumerate(module_defs): modules = nn.Sequential() if mdef['type'] == 'convolutional': bn = int(mdef['batch_normalize']) filters = int(mdef['filters']) kernel_size = int(mdef['size']) pad = (kernel_size - 1) // 2 if int(mdef['pad']) else 0 modules.add_module( 'Conv2d', nn.Conv2d(in_channels=output_filters[-1], out_channels=filters, kernel_size=kernel_size, stride=int(mdef['stride']), padding=pad, bias=not bn)) if bn: modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.1)) if mdef['activation'] == 'leaky': # TODO: activation study https://github.com/ultralytics/yolov3/issues/441 modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True)) # modules.add_module('activation', nn.PReLU(num_parameters=1, init=0.10)) # modules.add_module('activation', Swish()) elif mdef['activation'] == 'mish': modules.add_module('activation', Mish()) elif mdef['type'] == 'convolutional_nobias': filters = int(mdef['filters']) kernel_size = int(mdef['size']) modules.add_module( 'Conv2d', nn.Conv2d(in_channels=output_filters[-1], out_channels=filters, kernel_size=kernel_size, stride=int(mdef['stride']), bias=False)) elif mdef['type'] == 'convolutional_noconv': filters = int(mdef['filters']) modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.1)) modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True)) elif mdef['type'] == 'maxpool': kernel_size = int(mdef['size']) stride = int(mdef['stride']) maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2)) if kernel_size == 2 and stride == 1: # yolov3-tiny modules.add_module('ZeroPad2d', nn.ZeroPad2d((0, 1, 0, 1))) modules.add_module('MaxPool2d', maxpool) else: modules = maxpool elif mdef['type'] == 'upsample': modules = nn.Upsample(scale_factor=int(mdef['stride']), mode='nearest') elif mdef[ 'type'] == 'route': # nn.Sequential() placeholder for 'route' layer layers = [int(x) for x in mdef['layers'].split(',')] filters = sum( [output_filters[i + 1 if i > 0 else i] for i in layers]) if 'groups' in mdef: filters = filters // 2 routs.extend([l if l > 0 else l + i for l in layers]) # if mdef[i+1]['type'] == 'reorg3d': # modules = nn.Upsample(scale_factor=1/float(mdef[i+1]['stride']), mode='nearest') # reorg3d elif mdef[ 'type'] == 'shortcut': # nn.Sequential() placeholder for 'shortcut' layer filters = output_filters[int(mdef['from'])] layer = int(mdef['from']) routs.extend([i + layer if layer < 0 else layer]) elif mdef['type'] == 'reorg3d': # yolov3-spp-pan-scale # torch.Size([16, 128, 104, 104]) # torch.Size([16, 64, 208, 208]) <-- # stride 2 interpolate dimensions 2 and 3 to cat with prior layer pass elif mdef['type'] == 'yolo': yolo_index += 1 mask = [int(x) for x in mdef['mask'].split(',')] # anchor mask modules = YOLOLayer( anchors=mdef['anchors'][mask], # anchor list nc=int(mdef['classes']), # number of classes img_size=img_size, # (416, 416) yolo_index=yolo_index, # 0, 1 or 2 arc=arc) # yolo architecture # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3) try: if arc == 'defaultpw' or arc == 'Fdefaultpw': # default with positive weights b = [-4, -3.6] # obj, cls elif arc == 'default': # default no pw (40 cls, 80 obj) b = [-5.5, -4.0] elif arc == 'uBCE': # unified BCE (80 classes) b = [0, -8.5] elif arc == 'uCE': # unified CE (1 background + 80 classes) b = [10, -0.1] elif arc == 'Fdefault': # Focal default no pw (28 cls, 21 obj, no pw) b = [-2.1, -1.8] elif arc == 'uFBCE' or arc == 'uFBCEpw': # unified FocalBCE (5120 obj, 80 classes) b = [0, -6.5] elif arc == 'uFCE': # unified FocalCE (64 cls, 1 background + 80 classes) b = [7.7, -1.1] bias = module_list[-1][0].bias.view(len(mask), -1) # 255 to 3x85 bias[:, 4] += b[0] - bias[:, 4].mean() # obj bias[:, 5:] += b[1] - bias[:, 5:].mean() # cls # bias = torch.load('weights/yolov3-spp.bias.pt')[yolo_index] # list of tensors [3x85, 3x85, 3x85] module_list[-1][0].bias = torch.nn.Parameter(bias.view(-1)) # utils.print_model_biases(model) except: print('WARNING: smart bias initialization failure.') elif mdef['type'] == 'focus': filters = int(mdef['filters']) else: print('Warning: Unrecognized Layer Type: ' + mdef['type']) # Register module list and number of output filters module_list.append(modules) output_filters.append(filters) return module_list, routs, hyperparams
def __init__(self, model_cfg, input_channels): super().__init__() self.model_cfg = model_cfg if self.model_cfg.get('LAYER_NUMS', None) is not None: assert len(self.model_cfg.LAYER_NUMS) == len( self.model_cfg.LAYER_STRIDES) == len( self.model_cfg.NUM_FILTERS) layer_nums = self.model_cfg.LAYER_NUMS layer_strides = self.model_cfg.LAYER_STRIDES num_filters = self.model_cfg.NUM_FILTERS else: layer_nums = layer_strides = num_filters = [] if self.model_cfg.get('UPSAMPLE_STRIDES', None) is not None: assert len(self.model_cfg.UPSAMPLE_STRIDES) == len( self.model_cfg.NUM_UPSAMPLE_FILTERS) num_upsample_filters = self.model_cfg.NUM_UPSAMPLE_FILTERS upsample_strides = self.model_cfg.UPSAMPLE_STRIDES else: upsample_strides = num_upsample_filters = [] num_levels = len(layer_nums) c_in_list = [input_channels, *num_filters[:-1]] self.blocks = nn.ModuleList() self.deblocks = nn.ModuleList() for idx in range(num_levels): cur_layers = [ nn.ZeroPad2d(1), nn.Conv2d(c_in_list[idx], num_filters[idx], kernel_size=3, stride=layer_strides[idx], padding=0, bias=False), nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01), nn.ReLU() ] for k in range(layer_nums[idx]): cur_layers.extend([ nn.Conv2d(num_filters[idx], num_filters[idx], kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01), nn.ReLU() ]) self.blocks.append(nn.Sequential(*cur_layers)) if len(upsample_strides) > 0: stride = upsample_strides[idx] if stride > 1: self.deblocks.append( nn.Sequential( nn.ConvTranspose2d(num_filters[idx], num_upsample_filters[idx], upsample_strides[idx], stride=upsample_strides[idx], bias=False), nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3, momentum=0.01), nn.ReLU())) else: stride = np.round(1 / stride).astype(np.int) self.deblocks.append( nn.Sequential( nn.Conv2d(num_filters[idx], num_upsample_filters[idx], stride, stride=stride, bias=False), nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3, momentum=0.01), nn.ReLU())) c_in = sum(num_upsample_filters) if len(upsample_strides) > num_levels: self.deblocks.append( nn.Sequential( nn.ConvTranspose2d(c_in, c_in, upsample_strides[-1], stride=upsample_strides[-1], bias=False), nn.BatchNorm2d(c_in, eps=1e-3, momentum=0.01), nn.ReLU(), )) self.num_bev_features = c_in
def create_modules(module_defs): """ Constructs module list of layer blocks from module configuration in module_defs """ hyperparams = module_defs.pop(0) output_filters = [int(hyperparams['channels'])] module_list = nn.ModuleList() yolo_layer_count = 0 for i, module_def in enumerate(module_defs): modules = nn.Sequential() if module_def['type'] == 'convolutional': bn = int(module_def['batch_normalize']) filters = int(module_def['filters']) kernel_size = int(module_def['size']) pad = (kernel_size - 1) // 2 if int(module_def['pad']) else 0 modules.add_module( 'conv_%d' % i, nn.Conv2d(in_channels=output_filters[-1], out_channels=filters, kernel_size=kernel_size, stride=int(module_def['stride']), padding=pad, bias=not bn)) if bn: modules.add_module('batch_norm_%d' % i, nn.BatchNorm2d(filters)) if module_def['activation'] == 'leaky': modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1)) elif module_def['type'] == 'maxpool': kernel_size = int(module_def['size']) stride = int(module_def['stride']) if kernel_size == 2 and stride == 1: modules.add_module('_debug_padding_%d' % i, nn.ZeroPad2d((0, 1, 0, 1))) maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2)) modules.add_module('maxpool_%d' % i, maxpool) elif module_def['type'] == 'upsample': # upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest') # WARNING: deprecated upsample = Upsample(scale_factor=int(module_def['stride'])) modules.add_module('upsample_%d' % i, upsample) elif module_def['type'] == 'route': layers = [int(x) for x in module_def['layers'].split(',')] filters = sum( [output_filters[i + 1 if i > 0 else i] for i in layers]) modules.add_module('route_%d' % i, EmptyLayer()) elif module_def['type'] == 'shortcut': filters = output_filters[int(module_def['from'])] modules.add_module('shortcut_%d' % i, EmptyLayer()) elif module_def['type'] == 'yolo': anchor_idxs = [int(x) for x in module_def['mask'].split(',')] # Extract anchors anchors = [float(x) for x in module_def['anchors'].split(',')] anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] anchors = [anchors[i] for i in anchor_idxs] nC = int(module_def['classes']) # number of classes img_size = int(hyperparams['height']) # Define detection layer yolo_layer = YOLOLayer(anchors, nC, img_size, yolo_layer_count, cfg=hyperparams['cfg']) modules.add_module('yolo_%d' % i, yolo_layer) yolo_layer_count += 1 # Register module list and number of output filters module_list.append(modules) output_filters.append(filters) return hyperparams, module_list
def __init__(self): super(CAN, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU(inplace=True), nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU(inplace=True), nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True, dilation=2), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True, dilation=2), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True, dilation=2), nn.ReLU(inplace=True), nn.Conv2d(512, 4096, kernel_size=7, stride=1, padding=3, bias=True, dilation=4), #fc6 layer nn.ReLU(inplace=True), nn.Conv2d(4096, 4096, kernel_size=1, stride=1, padding=0, bias=True), #fc7 layer nn.ReLU(inplace=True), nn.Conv2d(4096, 2, kernel_size=1, stride=1, padding=0, bias=True), #final layer nn.ReLU(inplace=True), nn.ZeroPad2d(1), # context module nn.Conv2d(2, 2, kernel_size=3, stride=1, padding=(0, 1), bias=True, dilation=(1, 2)), #ctx_conv nn.ReLU(inplace=True), nn.ZeroPad2d(1), nn.Conv2d(2, 2, kernel_size=3, stride=1, padding=(0, 1), bias=True, dilation=(1, 2)), nn.ReLU(inplace=True), nn.ZeroPad2d(2), nn.Conv2d(2, 2, kernel_size=3, stride=1, padding=(0, 2), bias=True, dilation=(2, 4)), nn.ReLU(inplace=True), nn.ZeroPad2d(4), nn.Conv2d(2, 2, kernel_size=3, stride=1, padding=(0, 4), bias=True, dilation=(4, 8)), nn.ReLU(inplace=True), nn.ZeroPad2d(8), nn.Conv2d(2, 2, kernel_size=3, stride=1, padding=(0, 8), bias=True, dilation=(8, 16)), nn.ReLU(inplace=True), nn.ZeroPad2d(16), nn.Conv2d(2, 2, kernel_size=3, stride=1, padding=(0, 16), bias=True, dilation=(16, 32)), nn.ReLU(inplace=True), nn.ZeroPad2d(1), nn.Conv2d(2, 2, kernel_size=3, stride=1, padding=0, bias=True, dilation=(1, 1)), nn.ReLU(inplace=True), #nn.ZeroPad2d(1), nn.Conv2d(2, 2, kernel_size=1, stride=1, padding=0, bias=False, dilation=1), # nn.ZeroPad2d(32), # nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, bias=True, dilation=32), # nn.ReLU(inplace=True), # nn.ZeroPad2d(64), # nn.Conv2d(19, 19, kernel_size=3, stride=1, padding=1, bias=True, dilation=64), # nn.ReLU(inplace=True), # nn.ZeroPad2d(1), # nn.Conv2d(19, 19, kernel_size=3, stride=1, padding=1, bias=True), # nn.LeakyReLU(inplace=True), # nn.Conv2d(19, 2, kernel_size=1, stride=1, padding=0, bias=True), # nn.Upsample(size=(CONFIG['tusimple']['output_shape'][0],CONFIG['tusimple']['output_shape'][1]), mode='bilinear'), # nn.Conv2d(19, 19, kernel_size=16, stride=1, padding=7, bias=False), # nn.ReLU(inplace=True), #nn.Softmax(dim=1) )
def train_single_scale(netD, netG, reals, Gs, Zs, in_s, NoiseAmp, opt, centers=None): real = reals[len(Gs)] opt.nzx = real.shape[2] #+(opt.ker_size-1)*(opt.num_layer) opt.nzy = real.shape[3] #+(opt.ker_size-1)*(opt.num_layer) opt.receptive_field = opt.ker_size + ((opt.ker_size - 1) * (opt.num_layer - 1)) * opt.stride pad_noise = int(((opt.ker_size - 1) * opt.num_layer) / 2) pad_image = int(((opt.ker_size - 1) * opt.num_layer) / 2) if opt.mode == 'animation_train': opt.nzx = real.shape[2] + (opt.ker_size - 1) * (opt.num_layer) opt.nzy = real.shape[3] + (opt.ker_size - 1) * (opt.num_layer) pad_noise = 0 m_noise = nn.ZeroPad2d(int(pad_noise)) m_image = nn.ZeroPad2d(int(pad_image)) alpha = opt.alpha fixed_noise = functions.generate_noise([opt.nc_z, opt.nzx, opt.nzy]) z_opt = torch.full(fixed_noise.shape, 0, device=opt.device) z_opt = m_noise(z_opt) # setup optimizer optimizerD = optim.Adam(netD.parameters(), lr=opt.lr_d, betas=(opt.beta1, 0.999)) optimizerG = optim.Adam(netG.parameters(), lr=opt.lr_g, betas=(opt.beta1, 0.999)) schedulerD = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizerD, milestones=[1600], gamma=opt.gamma) schedulerG = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizerG, milestones=[1600], gamma=opt.gamma) errD2plot = [] errG2plot = [] D_real2plot = [] D_fake2plot = [] z_opt2plot = [] for epoch in range(opt.niter): schedulerD.step() schedulerG.step() if (Gs == []) & (opt.mode != 'SR_train'): z_opt = functions.generate_noise([1, opt.nzx, opt.nzy]) z_opt = m_noise(z_opt.expand(1, 3, opt.nzx, opt.nzy)) noise_ = functions.generate_noise([1, opt.nzx, opt.nzy]) noise_ = m_noise(noise_.expand(1, 3, opt.nzx, opt.nzy)) else: noise_ = functions.generate_noise([opt.nc_z, opt.nzx, opt.nzy]) noise_ = m_noise(noise_) ############################ # (1) Update D network: maximize D(x) + D(G(z)) ########################### for j in range(opt.Dsteps): # train with real netD.zero_grad() output = netD(real).to(opt.device) #D_real_map = output.detach() errD_real = -output.mean() #-a errD_real.backward(retain_graph=True) D_x = -errD_real.item() # train with fake if (j == 0) & (epoch == 0): if (Gs == []) & (opt.mode != 'SR_train'): prev = torch.full([1, opt.nc_z, opt.nzx, opt.nzy], 0, device=opt.device) in_s = prev prev = m_image(prev) z_prev = torch.full([1, opt.nc_z, opt.nzx, opt.nzy], 0, device=opt.device) z_prev = m_noise(z_prev) opt.noise_amp = 1 elif opt.mode == 'SR_train': z_prev = in_s criterion = nn.MSELoss() RMSE = torch.sqrt(criterion(real, z_prev)) opt.noise_amp = opt.noise_amp_init * RMSE z_prev = m_image(z_prev) prev = z_prev else: prev = draw_concat(Gs, Zs, reals, NoiseAmp, in_s, 'rand', m_noise, m_image, opt) prev = m_image(prev) z_prev = draw_concat(Gs, Zs, reals, NoiseAmp, in_s, 'rec', m_noise, m_image, opt) criterion = nn.MSELoss() RMSE = torch.sqrt(criterion(real, z_prev)) opt.noise_amp = opt.noise_amp_init * RMSE z_prev = m_image(z_prev) else: prev = draw_concat(Gs, Zs, reals, NoiseAmp, in_s, 'rand', m_noise, m_image, opt) prev = m_image(prev) if opt.mode == 'paint_train': prev = functions.quant2centers(prev, centers) plt.imsave('%s/prev.png' % (opt.outf), functions.convert_image_np(prev), vmin=0, vmax=1) if (Gs == []) & (opt.mode != 'SR_train'): noise = noise_ else: noise = opt.noise_amp * noise_ + prev fake = netG(noise.detach(), prev) output = netD(fake.detach()) errD_fake = output.mean() errD_fake.backward(retain_graph=True) D_G_z = output.mean().item() gradient_penalty = functions.calc_gradient_penalty( netD, real, fake, opt.lambda_grad) gradient_penalty.backward() errD = errD_real + errD_fake + gradient_penalty optimizerD.step() errD2plot.append(errD.detach()) ############################ # (2) Update G network: maximize D(G(z)) ########################### for j in range(opt.Gsteps): netG.zero_grad() output = netD(fake) #D_fake_map = output.detach() errG = -output.mean() errG.backward(retain_graph=True) if alpha != 0: loss = nn.MSELoss() if opt.mode == 'paint_train': z_prev = functions.quant2centers(z_prev, centers) plt.imsave('%s/z_prev.png' % (opt.outf), functions.convert_image_np(z_prev), vmin=0, vmax=1) Z_opt = opt.noise_amp * z_opt + z_prev rec_loss = alpha * loss(netG(Z_opt.detach(), z_prev), real) rec_loss.backward(retain_graph=True) rec_loss = rec_loss.detach() else: Z_opt = z_opt rec_loss = 0 optimizerG.step() errG2plot.append(errG.detach() + rec_loss) D_real2plot.append(D_x) D_fake2plot.append(D_G_z) z_opt2plot.append(rec_loss) if epoch % 25 == 0 or epoch == (opt.niter - 1): print('[%d/%d]' % (epoch, opt.niter)) if epoch % 500 == 0 or epoch == (opt.niter - 1): plt.imsave('%s/fake_sample.png' % (opt.outf), functions.convert_image_np(fake.detach()), vmin=0, vmax=1) plt.imsave('%s/G(z_opt).png' % (opt.outf), functions.convert_image_np( netG(Z_opt.detach(), z_prev).detach()), vmin=0, vmax=1) #plt.imsave('%s/D_fake.png' % (opt.outf), functions.convert_image_np(D_fake_map)) #plt.imsave('%s/D_real.png' % (opt.outf), functions.convert_image_np(D_real_map)) #plt.imsave('%s/z_opt.png' % (opt.outf), functions.convert_image_np(z_opt.detach()), vmin=0, vmax=1) #plt.imsave('%s/prev.png' % (opt.outf), functions.convert_image_np(prev), vmin=0, vmax=1) #plt.imsave('%s/noise.png' % (opt.outf), functions.convert_image_np(noise), vmin=0, vmax=1) #plt.imsave('%s/z_prev.png' % (opt.outf), functions.convert_image_np(z_prev), vmin=0, vmax=1) torch.save(z_opt, '%s/z_opt.pth' % (opt.outf)) functions.save_networks(netG, netD, z_opt, opt) return z_opt, in_s, netG
def create_modules(module_defs, device='cuda'): """ Constructs module list of layer blocks from module configuration in module_defs """ hyperparams = module_defs.pop(0) output_filters = [int(hyperparams['channels'])] module_list = nn.ModuleList() yolo_layer_count = 0 for i, module_def in enumerate(module_defs): modules = nn.Sequential() if module_def['type'] == 'convolutional': bn = int(module_def['batch_normalize']) filters = int(module_def['filters']) kernel_size = int(module_def['size']) pad = (kernel_size - 1) // 2 if int(module_def['pad']) else 0 modules.add_module( 'conv_%d' % i, nn.Conv2d(in_channels=output_filters[-1], out_channels=filters, kernel_size=kernel_size, stride=int(module_def['stride']), padding=pad, bias=not bn)) if bn: after_bn = batch_norm(filters) modules.add_module('batch_norm_%d' % i, after_bn) # BN is uniformly initialized by default in pytorch 1.0.1. # In pytorch>1.2.0, BN weights are initialized with constant 1, # but we find with the uniform initialization the model converges faster. nn.init.uniform_(after_bn.weight) nn.init.zeros_(after_bn.bias) if module_def['activation'] == 'leaky': modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1)) elif module_def['type'] == 'maxpool': kernel_size = int(module_def['size']) stride = int(module_def['stride']) if kernel_size == 2 and stride == 1: modules.add_module('_debug_padding_%d' % i, nn.ZeroPad2d((0, 1, 0, 1))) maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2)) modules.add_module('maxpool_%d' % i, maxpool) elif module_def['type'] == 'upsample': upsample = Upsample(scale_factor=int(module_def['stride'])) modules.add_module('upsample_%d' % i, upsample) elif module_def['type'] == 'route': layers = [int(x) for x in module_def['layers'].split(',')] filters = sum( [output_filters[i + 1 if i > 0 else i] for i in layers]) modules.add_module('route_%d' % i, EmptyLayer()) elif module_def['type'] == 'shortcut': filters = output_filters[int(module_def['from'])] modules.add_module('shortcut_%d' % i, EmptyLayer()) elif module_def['type'] == 'yolo': anchor_idxs = [int(x) for x in module_def['mask'].split(',')] # Extract anchors anchors = [float(x) for x in module_def['anchors'].split(',')] anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] anchors = [anchors[i] for i in anchor_idxs] nC = int(module_def['classes']) # number of classes img_size = (int(hyperparams['width']), int(hyperparams['height'])) # Define detection layer yolo_layer = YOLOLayer(anchors, nC, int(hyperparams['nID']), int(hyperparams['embedding_dim']), img_size, yolo_layer_count, device) modules.add_module('yolo_%d' % i, yolo_layer) yolo_layer_count += 1 # Register module list and number of output filters module_list.append(modules) output_filters.append(filters) return hyperparams, module_list
def forward(self, guidance, blur_depth, sparse_depth=None): # normalize features gate1_wb_cmb = torch.abs(guidance.narrow(1, 0, self.out_feature)) gate2_wb_cmb = torch.abs( guidance.narrow(1, 1 * self.out_feature, self.out_feature)) gate3_wb_cmb = torch.abs( guidance.narrow(1, 2 * self.out_feature, self.out_feature)) gate4_wb_cmb = torch.abs( guidance.narrow(1, 3 * self.out_feature, self.out_feature)) gate5_wb_cmb = torch.abs( guidance.narrow(1, 4 * self.out_feature, self.out_feature)) gate6_wb_cmb = torch.abs( guidance.narrow(1, 5 * self.out_feature, self.out_feature)) gate7_wb_cmb = torch.abs( guidance.narrow(1, 6 * self.out_feature, self.out_feature)) gate8_wb_cmb = torch.abs( guidance.narrow(1, 7 * self.out_feature, self.out_feature)) # gate1:left_top, gate2:center_top, gate3:right_top # gate4:left_center, , gate5: right_center # gate6:left_bottom, gate7: center_bottom, gate8: right_bottm # top pad left_top_pad = nn.ZeroPad2d((0, 2, 0, 2)) gate1_wb_cmb = left_top_pad(gate1_wb_cmb).unsqueeze(1) center_top_pad = nn.ZeroPad2d((1, 1, 0, 2)) gate2_wb_cmb = center_top_pad(gate2_wb_cmb).unsqueeze(1) right_top_pad = nn.ZeroPad2d((2, 0, 0, 2)) gate3_wb_cmb = right_top_pad(gate3_wb_cmb).unsqueeze(1) # center pad left_center_pad = nn.ZeroPad2d((0, 2, 1, 1)) gate4_wb_cmb = left_center_pad(gate4_wb_cmb).unsqueeze(1) right_center_pad = nn.ZeroPad2d((2, 0, 1, 1)) gate5_wb_cmb = right_center_pad(gate5_wb_cmb).unsqueeze(1) # bottom pad left_bottom_pad = nn.ZeroPad2d((0, 2, 2, 0)) gate6_wb_cmb = left_bottom_pad(gate6_wb_cmb).unsqueeze(1) center_bottom_pad = nn.ZeroPad2d((1, 1, 2, 0)) gate7_wb_cmb = center_bottom_pad(gate7_wb_cmb).unsqueeze(1) right_bottm_pad = nn.ZeroPad2d((2, 0, 2, 0)) gate8_wb_cmb = right_bottm_pad(gate8_wb_cmb).unsqueeze(1) gate_wb = torch.cat( (gate1_wb_cmb, gate2_wb_cmb, gate3_wb_cmb, gate4_wb_cmb, gate5_wb_cmb, gate6_wb_cmb, gate7_wb_cmb, gate8_wb_cmb), 1) # pad input and convert to 8 channel 3D features raw_depht_input = blur_depth # blur_depht_pad = nn.ZeroPad2d((1,1,1,1)) result_depth = blur_depth if sparse_depth is not None: sparse_mask = sparse_depth.sign() for i in range(self.prop_time): # one propagation spn_kernel = self.prop_kernel result_depth = self.pad_blur_depth(result_depth) neigbor_weighted_sum = self.eight_way_propagation( gate_wb, result_depth, spn_kernel) neigbor_weighted_sum = neigbor_weighted_sum.squeeze(1) neigbor_weighted_sum = neigbor_weighted_sum[:, :, 1:-1, 1:-1] result_depth = neigbor_weighted_sum if sparse_depth is not None: result_depth = ( 1 - sparse_mask) * result_depth + sparse_mask * raw_depht_input return result_depth
def __init__(self, stride=2, padding=1): super(AvgPoolPad, self).__init__() self.pad = nn.ZeroPad2d((1, 0, 1, 0)) self.pool = nn.AvgPool2d(3, stride=stride, padding=padding, count_include_pad=False)
def __init__(self, n_class=1): super(FCN8s, self).__init__() padDim = 4 nFilters = [16, 32, 32] filtsize = [5, 5, 5] poolsize = [2, 2, 2] stepSize = [2, 2, 2] ninputChannels = 5 self.padding_1 = nn.ZeroPad2d(padDim) self.cnn_conv1 = nn.Conv2d(ninputChannels, nFilters[0], (filtsize[0], filtsize[0]), (1, 1)) self.tanh1 = nn.Tanh() self.maxpool1 = nn.MaxPool2d((poolsize[0], poolsize[0]), (stepSize[0], stepSize[0])) ninputChannels = nFilters[0] self.padding_2 = nn.ZeroPad2d(padDim) self.cnn_conv2 = nn.Conv2d(ninputChannels, nFilters[1], (filtsize[1], filtsize[1]), (1, 1)) self.tanh2 = nn.Tanh() self.maxpool2 = nn.MaxPool2d((poolsize[1], poolsize[1]), (stepSize[1], stepSize[1])) ninputChannels = nFilters[1] self.padding_3 = nn.ZeroPad2d(padDim) self.cnn_conv3 = nn.Conv2d(ninputChannels, nFilters[2], (filtsize[2], filtsize[2]), (1, 1)) self.tanh3 = nn.Tanh() self.maxpool3 = nn.MaxPool2d((poolsize[2], poolsize[2]), (stepSize[2], stepSize[2])) nFullyConnected = nFilters[2] * 10 * 8 self.cnn_drop = nn.Dropout2d(p=0.6) self.linear = nn.Linear(nFullyConnected, 128) # conv1 self.conv1_1 = nn.Conv2d(128, 64, (1, 3), padding=(0, 100)) self.bnorm1_1 = nn.BatchNorm2d(64) self.relu1_1 = nn.Tanh() self.conv1_2 = nn.Conv2d(64, 64, (1, 3), padding=(0, 1)) self.bnorm1_2 = nn.BatchNorm2d(64) self.relu1_2 = nn.Tanh() self.pool1 = nn.MaxPool2d((1, 2), stride=(1, 2), ceil_mode=True) # 1/2 # conv2 self.conv2_1 = nn.Conv2d(64, 128, (1, 3), padding=(0, 1)) self.bnorm2_1 = nn.BatchNorm2d(128) self.relu2_1 = nn.Tanh() self.conv2_2 = nn.Conv2d(128, 128, (1, 3), padding=(0, 1)) self.bnorm2_2 = nn.BatchNorm2d(128) self.relu2_2 = nn.Tanh() self.pool2 = nn.MaxPool2d((1, 2), stride=(1, 2), ceil_mode=True) # 1/4 # conv3 self.conv3_1 = nn.Conv2d(128, 256, (1, 3), padding=(0, 1)) self.bnorm3_1 = nn.BatchNorm2d(256) self.relu3_1 = nn.Tanh() self.conv3_2 = nn.Conv2d(256, 256, (1, 3), padding=(0, 1)) self.bnorm3_2 = nn.BatchNorm2d(256) self.relu3_2 = nn.Tanh() self.conv3_3 = nn.Conv2d(256, 256, (1, 3), padding=(0, 1)) self.bnorm3_3 = nn.BatchNorm2d(256) self.relu3_3 = nn.Tanh() self.pool3 = nn.MaxPool2d((1, 2), stride=(1, 2), ceil_mode=True) # 1/8 # conv4 self.conv4_1 = nn.Conv2d(256, 512, (1, 3), padding=(0, 1)) self.bnorm4_1 = nn.BatchNorm2d(512) self.relu4_1 = nn.Tanh() self.conv4_2 = nn.Conv2d(512, 512, (1, 3), padding=(0, 1)) self.bnorm4_2 = nn.BatchNorm2d(512) self.relu4_2 = nn.Tanh() self.conv4_3 = nn.Conv2d(512, 512, (1, 3), padding=(0, 1)) self.bnorm4_3 = nn.BatchNorm2d(512) self.relu4_3 = nn.Tanh() self.pool4 = nn.MaxPool2d((1, 2), stride=(1, 2), ceil_mode=True) # 1/16 # conv5 self.conv5_1 = nn.Conv2d(512, 512, (1, 3), padding=(0, 1)) self.bnorm5_1 = nn.BatchNorm2d(512) self.relu5_1 = nn.Tanh() self.conv5_2 = nn.Conv2d(512, 512, (1, 3), padding=(0, 1)) self.bnorm5_2 = nn.BatchNorm2d(512) self.relu5_2 = nn.Tanh() self.conv5_3 = nn.Conv2d(512, 512, (1, 3), padding=(0, 1)) self.bnorm5_3 = nn.BatchNorm2d(512) self.relu5_3 = nn.Tanh() self.pool5 = nn.MaxPool2d((1, 2), stride=(1, 2), ceil_mode=True) # 1/32 # fc6 self.fc6 = nn.Conv2d(512, 4096, (1, 7)) self.relu6 = nn.Tanh() self.drop6 = nn.Dropout2d() # fc7 self.fc7 = nn.Conv2d(4096, 4096, (1, 1)) self.relu7 = nn.Tanh() self.drop7 = nn.Dropout2d() self.score_fr = nn.Conv2d(4096, n_class, (1, 1)) self.score_pool3 = nn.Conv2d(256, n_class, (1, 1)) self.score_pool4 = nn.Conv2d(512, n_class, (1, 1)) self.upscore2 = nn.ConvTranspose2d(n_class, n_class, (1, 4), stride=(1, 2), bias=False) self.upscore8 = nn.ConvTranspose2d(n_class, n_class, (1, 16), stride=(1, 8), bias=False) self.upscore_pool4 = nn.ConvTranspose2d(n_class, n_class, (1, 4), stride=(1, 2), bias=False) self.Sigmoid = nn.Sigmoid() self.classifierLayer = nn.Linear(128, 150) self.logsoftmax = nn.LogSoftmax()
def create_modules(module_defs): """ Constructs module list of layer blocks from module configuration in module_defs """ hyperparams = module_defs.pop(0) output_filters = [int(hyperparams["channels"])] module_list = nn.ModuleList() for i, module_def in enumerate(module_defs): modules = nn.Sequential() if module_def["type"] == "convolutional": bn = int(module_def["batch_normalize"]) filters = int(module_def["filters"]) kernel_size = int(module_def["size"]) pad = (kernel_size - 1) // 2 if int(module_def["pad"]) else 0 modules.add_module( "conv_%d" % i, nn.Conv2d( in_channels=output_filters[-1], out_channels=filters, kernel_size=kernel_size, stride=int(module_def["stride"]), padding=pad, bias=not bn, ), ) if bn: modules.add_module("batch_norm_%d" % i, nn.BatchNorm2d(filters)) if module_def["activation"] == "leaky": modules.add_module("leaky_%d" % i, nn.LeakyReLU(0.1)) elif module_def["type"] == "maxpool": kernel_size = int(module_def["size"]) stride = int(module_def["stride"]) if kernel_size == 2 and stride == 1: padding = nn.ZeroPad2d((0, 1, 0, 1)) modules.add_module("_debug_padding_%d" % i, padding) maxpool = nn.MaxPool2d( kernel_size=int(module_def["size"]), stride=int(module_def["stride"]), padding=int((kernel_size - 1) // 2), ) modules.add_module("maxpool_%d" % i, maxpool) elif module_def["type"] == "upsample": upsample = nn.Upsample(scale_factor=int(module_def["stride"]), mode="nearest") modules.add_module("upsample_%d" % i, upsample) elif module_def["type"] == "route": layers = [int(x) for x in module_def["layers"].split(",")] filters = sum([output_filters[layer_i] for layer_i in layers]) modules.add_module("route_%d" % i, EmptyLayer()) elif module_def["type"] == "shortcut": filters = output_filters[int(module_def["from"])] modules.add_module("shortcut_%d" % i, EmptyLayer()) elif module_def["type"] == "yolo": anchor_idxs = [int(x) for x in module_def["mask"].split(",")] # Extract anchors anchors = [int(x) for x in module_def["anchors"].split(",")] anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] anchors = [anchors[i] for i in anchor_idxs] num_classes = int(module_def["classes"]) img_height = int(hyperparams["height"]) # Define detection layer yolo_layer = YOLOLayer(anchors, num_classes, img_height) modules.add_module("yolo_%d" % i, yolo_layer) # Register module list and number of output filters module_list.append(modules) output_filters.append(filters) return hyperparams, module_list
def __init__(self, padding): super(pad, self).__init__() self.padding = nn.ZeroPad2d(padding)
def right_shift(x): x = x[:, :, :, :-1] pad = nn.ZeroPad2d((1, 0, 0, 0)) return pad(x)
def save_layer_feature(style_feat, in_featuremap, out_featuremap, save_path, file_name, mean, std): paddding = nn.ZeroPad2d(5) in_featuremap = paddding(in_featuremap).tanh() / 2 + 0.5 out_featuremap = paddding(out_featuremap).tanh() / 2 + 0.5 style_feat = paddding(style_feat).tanh() / 2 + 0.5 b, c, h, w = in_featuremap.size() # features = feature_numpy[0] result = Image.new('RGB', (w * c, h * 3 + 60)) mean = mean.view(3, b, c) std = std.view(3, b, c) content_mean = mean[0] content_std = std[0] style_mean = mean[1] style_std = std[1] adain_mean = mean[2] adain_std = std[2] fontsize = 10 font = ImageFont.truetype( fm.findfont(fm.FontProperties(family='DejaVu Sans')), fontsize) for i in range(c): in_feature = style_feat[0:1, i:i + 1, :, :] in_feature_numpy = tensor2im(in_feature) in_image_pil = Image.fromarray(in_feature_numpy) result.paste(in_image_pil, box=(w * i, 0)) in_feature = in_featuremap[0:1, i:i + 1, :, :] in_feature_numpy = tensor2im(in_feature) in_image_pil = Image.fromarray(in_feature_numpy) result.paste(in_image_pil, box=(w * i, h)) out_feature = out_featuremap[0:1, i:i + 1, :, :] out_max = out_feature[0][0].max() out_feature_numpy = tensor2im(out_feature) np_max = np.max(out_feature_numpy) location = np.where(out_feature_numpy == np_max) out_image_pil = Image.fromarray(out_feature_numpy) result.paste(out_image_pil, box=(w * i, h * 2)) draw = ImageDraw.Draw(result) color = "#FF0000" string = str(round(content_mean[0, i].cpu().item(), 2)) + ', ' + str( round(content_std[0, i].cpu().item(), 2)) draw.text((w * i, h * 3 + 4), string, font=font, fill=color, spacing=0, align='left') color = "#00FF00" string = str(round(style_mean[0, i].cpu().item(), 2)) + ', ' + str( round(style_std[0, i].cpu().item(), 2)) draw.text((w * i, h * 3 + 4 + 20), string, font=font, fill=color, spacing=0, align='left') color = "#FFFFFF" string = str(round(adain_mean[0, i].cpu().item(), 2)) + ', ' + str( round(adain_std[0, i].cpu().item(), 2)) draw.text((w * i, h * 3 + 4 + 40), string, font=font, fill=color, spacing=0, align='left') save_name = os.path.join(save_path, str(i) + '.jpg') result.save(save_name, quality=100)
def __init__(self, in_channels_left, out_channels_left, in_channels_right, out_channels_right): super(FirstCell, self).__init__() self.conv_1x1 = nn.Sequential() self.conv_1x1.add_module('relu', nn.ReLU()) self.conv_1x1.add_module( 'conv', nn.Conv2d(in_channels_right, out_channels_right, 1, stride=1, bias=False)) self.conv_1x1.add_module( 'bn', nn.BatchNorm2d(out_channels_right, eps=0.001, momentum=0.1, affine=True)) self.relu = nn.ReLU() self.path_1 = nn.Sequential() self.path_1.add_module( 'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False)) self.path_1.add_module( 'conv', nn.Conv2d(in_channels_left, out_channels_left, 1, stride=1, bias=False)) self.path_2 = nn.ModuleList() self.path_2.add_module('pad', nn.ZeroPad2d((0, 1, 0, 1))) self.path_2.add_module( 'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False)) self.path_2.add_module( 'conv', nn.Conv2d(in_channels_left, out_channels_left, 1, stride=1, bias=False)) self.final_path_bn = nn.BatchNorm2d(out_channels_left * 2, eps=0.001, momentum=0.1, affine=True) self.comb_iter_0_left = BranchSeparables(out_channels_right, out_channels_right, 5, 1, 2, bias=False) self.comb_iter_0_right = BranchSeparables(out_channels_right, out_channels_right, 3, 1, 1, bias=False) self.comb_iter_1_left = BranchSeparables(out_channels_right, out_channels_right, 5, 1, 2, bias=False) self.comb_iter_1_right = BranchSeparables(out_channels_right, out_channels_right, 3, 1, 1, bias=False) self.comb_iter_2_left = nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False) self.comb_iter_3_left = nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False) self.comb_iter_3_right = nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False) self.comb_iter_4_left = BranchSeparables(out_channels_right, out_channels_right, 3, 1, 1, bias=False)
def __init__(self): super(SimpleNet, self).__init__() self.conv_pad_1 = nn.ZeroPad2d((1, 1, 1, 1)) self.conv_1 = nn.Conv2d(3, 32, (3, 3), (1, 1)) self.pool_pad_1 = nn.ZeroPad2d((0, 0, 0, 0)) self.pool_1 = nn.MaxPool2d(1) self.drop_1 = nn.Dropout(p=0.2) self.conv_pad_2 = nn.ZeroPad2d((1, 1, 1, 1)) self.conv_2 = nn.Conv2d(32, 32, (3, 3), (1, 1)) self.pool_pad_2 = nn.ZeroPad2d((0, 0, 0, 0)) self.pool_2 = nn.MaxPool2d(2) self.drop_2 = nn.Dropout(p=0.2) self.conv_pad_3 = nn.ZeroPad2d((1, 1, 1, 1)) self.conv_3 = nn.Conv2d(32, 64, (3, 3), (1, 1)) self.pool_pad_3 = nn.ZeroPad2d((0, 0, 0, 0)) self.pool_3 = nn.MaxPool2d(1) self.drop_3 = nn.Dropout(p=0.2) self.conv_pad_4 = nn.ZeroPad2d((1, 1, 1, 1)) self.conv_4 = nn.Conv2d(64, 64, (3, 3), (1, 1)) self.pool_pad_4 = nn.ZeroPad2d((0, 0, 0, 0)) self.pool_4 = nn.MaxPool2d(2) self.drop_4 = nn.Dropout(p=0.2) self.conv_pad_5 = nn.ZeroPad2d((1, 1, 1, 1)) self.conv_5 = nn.Conv2d(64, 128, (3, 3), (1, 1)) self.pool_pad_5 = nn.ZeroPad2d((0, 0, 0, 0)) self.pool_5 = nn.MaxPool2d(1) self.drop_5 = nn.Dropout(p=0.2) self.conv_pad_6 = nn.ZeroPad2d((1, 1, 1, 1)) self.conv_6 = nn.Conv2d(128, 128, (3, 3), (1, 1)) self.pool_pad_6 = nn.ZeroPad2d((0, 0, 0, 0)) self.pool_6 = nn.MaxPool2d(2) self.drop_6 = nn.Dropout(p=0.2) self.fc = nn.Linear(128 * 4 * 4, 10)
def __init__(self, kernel_size, stride=1, padding=1, zero_pad=False): super(MaxPool, self).__init__() self.zero_pad = nn.ZeroPad2d((1, 0, 1, 0)) if zero_pad else None self.pool = nn.MaxPool2d(kernel_size, stride=stride, padding=padding)
def __init__(self): super(CAE, self).__init__() self.encoded = None # ENCODER # 64x64x64 self.e_conv_1 = nn.Sequential( #1 nn.ZeroPad2d((1, 2, 1, 2)), nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(5, 5), stride=(2, 2)), nn.LeakyReLU()) # 128x32x32 self.e_conv_2 = nn.Sequential( #2 nn.ZeroPad2d((1, 2, 1, 2)), nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(5, 5), stride=(2, 2)), nn.LeakyReLU()) # 128x32x32 self.e_block_1 = nn.Sequential( #3 nn.ZeroPad2d((1, 1, 1, 1)), nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), stride=(1, 1)), nn.LeakyReLU(), #4 nn.ZeroPad2d((1, 1, 1, 1)), nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1)), ) # 128x32x32 self.e_block_2 = nn.Sequential( #5 nn.ZeroPad2d((1, 1, 1, 1)), nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1)), nn.LeakyReLU(), ) # 128x32x32 self.e_block_3 = nn.Sequential( #8 nn.ZeroPad2d((1, 1, 1, 1)), nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1)), ) # 32x32x32 self.e_conv_3 = nn.Sequential( #9 nn.Conv2d(in_channels=128, out_channels=32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)), nn.Tanh()) # DECODER # 128x64x64 self.d_up_conv_1 = nn.Sequential( #1 nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), stride=(1, 1)), nn.LeakyReLU(), #2 nn.ZeroPad2d((1, 1, 1, 1)), nn.ConvTranspose2d(in_channels=64, out_channels=128, kernel_size=(2, 2), stride=(2, 2))) # 128x64x64 self.d_block_1 = nn.Sequential( #3 nn.ZeroPad2d((1, 1, 1, 1)), nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1)), nn.LeakyReLU(), ) # 128x64x64 self.d_block_2 = nn.Sequential( #5 nn.ZeroPad2d((1, 1, 1, 1)), nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1)), nn.LeakyReLU(), ) # 128x64x64 self.d_block_3 = nn.Sequential( #8 nn.ZeroPad2d((1, 1, 1, 1)), nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1, 1)), ) # 256x128x128 self.d_up_conv_2 = nn.Sequential( #9 nn.Conv2d(in_channels=128, out_channels=32, kernel_size=(3, 3), stride=(1, 1)), nn.LeakyReLU(), #10 nn.ZeroPad2d((1, 1, 1, 1)), nn.ConvTranspose2d(in_channels=32, out_channels=256, kernel_size=(2, 2), stride=(2, 2))) # 3x128x128 self.d_up_conv_3 = nn.Sequential( #11 nn.Conv2d(in_channels=256, out_channels=16, kernel_size=(3, 3), stride=(1, 1)), nn.LeakyReLU(), #12 nn.ReflectionPad2d((2, 2, 2, 2)), nn.Conv2d(in_channels=16, out_channels=3, kernel_size=(3, 3), stride=(1, 1)), nn.Tanh())
def create_modules(module_defs): """ Constructs module list of layer blocks from module configuration in module_defs """ hyperparams = module_defs.pop(0) output_filters = [int(hyperparams["channels"])] module_list = nn.ModuleList() for module_i, module_def in enumerate(module_defs): modules = nn.Sequential() if module_def["type"] == "convolutional": bn = int(module_def["batch_normalize"]) filters = int(module_def["filters"]) kernel_size = int(module_def["size"]) pad = (kernel_size - 1) // 2 modules.add_module( f"conv_{module_i}", nn.Conv2d( in_channels=output_filters[-1], out_channels=filters, kernel_size=kernel_size, stride=int(module_def["stride"]), padding=pad, bias=not bn, ), ) if bn: modules.add_module( f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5)) if module_def["activation"] == "leaky": modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1, inplace=True)) elif module_def["type"] == "maxpool": kernel_size = int(module_def["size"]) stride = int(module_def["stride"]) if kernel_size == 2 and stride == 1: modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1))) maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2)) modules.add_module(f"maxpool_{module_i}", maxpool) elif module_def["type"] == "upsample": upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest") modules.add_module(f"upsample_{module_i}", upsample) elif module_def["type"] == "route": layers = [int(x) for x in module_def["layers"].split(",")] filters = sum([output_filters[1:][i] for i in layers]) modules.add_module(f"route_{module_i}", EmptyLayer()) elif module_def["type"] == "shortcut": filters = output_filters[1:][int(module_def["from"])] modules.add_module(f"shortcut_{module_i}", EmptyLayer()) elif module_def["type"] == "yolo": anchor_idxs = [int(x) for x in module_def["mask"].split(",")] # Extract anchors anchors = [int(x) for x in module_def["anchors"].split(",")] anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] anchors = [anchors[i] for i in anchor_idxs] num_classes = int(module_def["classes"]) img_size = int(hyperparams["height"]) # Define detection layer yolo_layer = YOLOLayer(anchors, num_classes, img_size) modules.add_module(f"yolo_{module_i}", yolo_layer) elif module_def["type"] == "graylayer": modules.add_module(f"graylayer_{module_i}", grayLayer()) filters = module_def["filters"] elif module_def["type"] == "expandlayer": modules.add_module(f"expandlayer_{module_i}", expandLayer()) filters = module_def["filters"] # Register module list and number of output filters module_list.append(modules) output_filters.append(filters) return hyperparams, module_list
def __init__(self, label_num): super(Autoencoder, self).__init__() self.label_num = label_num self.conv1 = nn.Sequential(nn.ZeroPad2d((1, 2, 1, 2)), nn.Conv2d(3, 3, kernel_size=3, padding=1), nn.LeakyReLU(), nn.Conv2d(3, 3, kernel_size=3, padding=1), nn.LeakyReLU(), nn.Conv2d(3, 32, kernel_size=5, stride=2), nn.LeakyReLU(), nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.LeakyReLU(), nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.LeakyReLU()) self.conv2 = nn.Sequential(nn.ZeroPad2d((1, 2, 1, 2)), nn.Conv2d(32, 64, kernel_size=5, stride=2), nn.LeakyReLU(), nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.LeakyReLU(), nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.LeakyReLU()) self.conv3 = nn.Sequential( nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1), nn.LeakyReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.LeakyReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.LeakyReLU()) self.conv4 = nn.Sequential( nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1), nn.LeakyReLU(), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.LeakyReLU(), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.LeakyReLU()) self.fc1 = nn.Conv2d(256, 10, kernel_size=3, stride=2, padding=1) self.fc2 = nn.Linear(640, 2, bias=True) self.fc_mu = nn.Sequential(nn.Linear(640, 200, bias=True), nn.LeakyReLU()) self.fc_var = nn.Sequential(nn.Linear(640, 200, bias=True), nn.ReLU()) self.emb_label = nn.Sequential( nn.Linear(self.label_num, 200, bias=True), nn.LeakyReLU()) self.fc2dec = nn.Linear(400, 640, bias=True) self.fc1dec = nn.Sequential( nn.ConvTranspose2d(10, 256, kernel_size=2, stride=2), nn.LeakyReLU()) self.conv4dec = nn.Sequential( nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.LeakyReLU(), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.LeakyReLU(), nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2), nn.LeakyReLU()) self.conv3dec = nn.Sequential( nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.LeakyReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.LeakyReLU(), nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2), nn.LeakyReLU()) self.conv2dec = nn.Sequential( nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.LeakyReLU(), nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.LeakyReLU(), nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2), nn.LeakyReLU()) self.conv1dec = nn.Sequential( nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.LeakyReLU(), nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.LeakyReLU(), nn.ConvTranspose2d(32, 3, kernel_size=2, stride=2), nn.LeakyReLU(), nn.Conv2d(3, 3, kernel_size=3, padding=1), nn.LeakyReLU(), nn.Conv2d(3, 3, kernel_size=3, padding=1), nn.Sigmoid())