def __init__(self): super(PartialGen, self).__init__() self.first = PartialConv2d(3, 32, 1, 1, multi_channel=True, return_mask=True) self.g1 = GenBlock(32) self.c1 = Conv2d(32, 32, 3, 1, 1) self.c2 = Conv2d(32, 3, 3, 1, 1)
def test_conv_with_backward_2kernels(self): conv = """ def convolution(float(N,C,H,W) I, float(M,C,KH,KW) W1, float(M) Bias) -> (O) { O(n, m, h, w) +=! I(n, r_c, h + r_kh, w + r_kw) * W1(m, r_c, r_kh, r_kw) O(n, m, h, w) = O(n, m, h, w) + Bias(m) } def convolution_igrad(float(M,C,KH,KW) W1, float(N,M,H,W) d_O) -> (d_I) { d_I(n, c, h, w) +=! d_O( n, r_m, h - r_kh, w - r_kw) * W1(r_m, c, r_kh, r_kw) } def convolution_wgrad(float(N,C,H,W) I, float(N,M,H,W) d_O) -> (d_W1) { d_W1(m, c, kh, kw) +=! d_O(r_n, m, r_h - kh, r_w - kw) * I(r_n, c, r_h, r_w) } def convolution_biasgrad(float(M) Bias) -> (d_Bias) { # TODO: Bias incorrect + check d_Bias(m) = Bias(m) } """ N, C, H, W, O, kH, kW = 32, 4, 56, 56, 16, 1, 1 T = tc.define( conv, tc.make_autotuned_options_factory(starting_options='naive', tuner_config=tuner_config)) I = torch.randn(N, C, H, W, device='cuda', requires_grad=True) # Reference from torch.nn.modules.conv import Conv2d Conv = Conv2d(C, O, 1, stride=1).cuda() ref = Conv.forward(I) W = Conv.weight.clone() Bias = Conv.bias.clone() def convolution_backward(I, W, Bias, d_O): d_I = T.convolution_igrad(W, d_O) d_O = T.convolution_wgrad(I, d_O) d_Bias = T.convolution_biasgrad(Bias) return (d_I, d_O, d_Bias) convolution_function = tc.make_autograd(T.convolution, convolution_backward) # First occurrence triggers tuning out = convolution_function(I, W, Bias) out.sum().backward() # Subsequent occurrences do not out = convolution_function(I, W, Bias) out.sum().backward() tc.assert_almost_equal(ref, out, I, operations=C * kH * kW)
def __init__(self, img_size=(args.im_s, args.im_s), patch_size=(args.ps, args.ps), emb_dim=args.emb_dim, mlp_dim=args.mlp_dim, num_heads=args.num_heads, num_layers=args.num_layers, n_classes=2, dropout_rate=0., at_d_r=args.at_d_r): super(VIT, self).__init__() self.nl = num_layers ih, iw = img_size ph, pw = patch_size num_patches = int((ih * iw) / (ph * pw)) self.cls_tokens = nn.Parameter(torch.rand(1, 1, emb_dim)) self.patch_embed = Conv2d(in_channels=3, out_channels=emb_dim, kernel_size=patch_size, stride=patch_size) self.pos_embed = nn.Parameter(torch.randn(1, num_patches + 1, emb_dim)) self.dropout = nn.Dropout(dropout_rate) self.tel = nn.ModuleList() for i in range(num_layers): layer = transencoder(emb_dim, mlp_dim, num_heads, at_d_r) self.tel.append(layer) self.mlp_head = nn.Sequential(nn.LayerNorm(emb_dim), nn.Linear(emb_dim, n_classes))
def _init_layers(self): padding = self.filter_size // 2 self.conv1 = Conv2d(self.input_channels, self.num_filters, kernel_size=self.filter_size, padding=padding) self.relu1 = ReLU() self.conv2 = Conv2d(self.num_filters, self.num_filters, kernel_size=self.filter_size, padding=padding) self.relu2 = ReLU() self.conv3 = Conv2d(self.num_filters, 1, kernel_size=self.filter_size, padding=padding) self.softmax = Softmax(dim=0)
def __init__(self, batch_norm=True): super().__init__() self.batch_norm = batch_norm self.c1 = Conv2d(3, 64, 5, 2, padding=2) if self.batch_norm: self.bn1 = BatchNorm2d(64) self.c2 = Conv2d(64, 128, 3, 2, padding=1) if self.batch_norm: self.bn2 = BatchNorm2d(128) self.c3 = Conv2d(128, 128, 3, 1, padding=1) if self.batch_norm: self.bn3 = BatchNorm2d(128) self.up4 = Upsample(scale_factor=2) self.c4 = Conv2d(128 + 64, 64, 3, 1, padding=1) self.up5 = Upsample(scale_factor=2) self.c5 = Conv2d(64 + 3, 3, 3, 1, padding=1)
def __init__( self, in_channels: int, out_channels: int, *args, hidden_kernel: Union[int, Tuple[int, int]], **kwargs, # XXX star-args are passed directly to Conv2d for the `input` ): # make sure the kenrel sizes are odd, and get the padding # XXX the only place where the `2d-ness` is hardcoded here! n_kernel = _pair(hidden_kernel) assert all(k & 1 for k in n_kernel) n_pad = [k >> 1 for k in n_kernel] # stride == 1 super().__init__() # input to reset and update gates, and the candidate state self.x_hrz = Conv2d( in_channels, 3 * out_channels, *args, **kwargs, ) # hidden state to reset and update gates self.h_rz = Conv2d( out_channels, 2 * out_channels, n_kernel, stride=1, bias=False, padding=n_pad, ) # hidden state to the candidate self.h_h = Conv2d( out_channels, out_channels, n_kernel, stride=1, bias=False, padding=n_pad, )
def __init__(self): super().__init__() self.first = PartialConv2d(3, 32, 3, padding=1, multi_channel=True, return_mask=True) self.r1 = PartialResBlock(32) self.r2 = PartialResBlock(32) self.r3 = PartialResBlock(32) self.c1 = Conv2d(32, 32 * 4, 3, 2, 1) self.up1 = PixelShuffle(2) self.c2 = Conv2d(32, 16, 3, 1, 1) self.c3 = Conv2d(16, 16 * 4, 3, 2, 1) self.up2 = PixelShuffle(2) self.c4 = Conv2d(16, 3, 3, 1, 1)
def make_conv_layers(cfg): layers = [] in_channels = 3 for v in cfg: if v == 'M_2': layers += [Downsample(kernel_size=2)] elif v == 'M_4': layers += [Downsample(kernel_size=4)] else: conv = Conv2d(in_channels, v, kernel_size=3, padding=1) layers += [conv, ReLU(inplace=True)] in_channels = v return nn.Sequential(*layers)
def acl_vgg(data, stateful): dcn = dcn_vgg() att_module = nn.Sequential( MaxPool2d(kernel_size=(2,2), stride=(2,2)) #The channels being input to maxpool are 512 (dcn output), to find the output channels of maxpool divide by the kernel size. 512/2=256 Conv2d(256, 64, kernel_size=(3, 3), padding=0) ReLU() Conv2D(64, 128, kernel_size=(3, 3), padding=0) ReLU() MaxPool2d(kernel_size=(2,2), stride=(2,2)) Conv2D(128, 64, kernel_size=(3, 3), padding=0) ReLU() Conv2D(64, 128, kernel_size=(3, 3), padding=0) ReLU() Conv2D(128, 1, kernel_size=(1, 1), padding=0) Sigmoid() Upsample(scale_factor=4, mode='nearest') ) outs = TimeDistributed(dcn)(data) attention = TimeDistributed(att_module)(outs) f_attention = TimeDistributed()(attention.view(attention.size()[0], -1)) #flatten f_attention = TimeDistributed()(f_attention.expand(512)) #repeatvector f_attention = TimeDistributed()(f_attention.transpose().unsqueeze(0)) #permute f_attention = TimeDistributed()(f_attention.reshape((32, 40, 512))) m_outs = outs * f_attention #elementwise multiplication outs = outs + m_outs ### This needs to change clstm = ConvLSTMCell(use_gpu=False, input_size=512, hidden_size=256, kernel_size=(3,3)) outs = clstm(outs) ### produce_smaps = nn.Sequential( #InputDimensions will be figured out after changing the ConvLSTM Conv2D(InputDimensions, 1, kernel_size=(1, 1), padding=0) Sigmoid() Upsample(scale_factor=4, mode='nearest') ) outs = TimeDistributed(produce_smaps)(outs) attention = TimeDistributed(Upsample(scale_factor=2, mode='nearest'))(attention) return [outs, outs, outs, attention, attention, attention]
def __init__(self): super().__init__() self.first = PartialConv2d(3, 32, 1, 1, multi_channel=True, return_mask=True) self.c1 = PartialConv2d(32, 32, 3, 1, padding=1, multi_channel=True, return_mask=True) self.c2 = PartialConv2d(32, 32, 3, 1, padding=1, multi_channel=True, return_mask=True) self.c3 = PartialConv2d(32, 32, 3, 1, padding=1, multi_channel=True, return_mask=True) self.c4 = PartialConv2d(32, 32, 3, 1, padding=1, multi_channel=True, return_mask=True) self.last = Conv2d(32, 3, 3, 1, 1)
def __init__(self): super(Discriminator, self).__init__() self.conv1_1 = Conv2d(4, 3, kernel_size=(1, 1), stride=(1, 1), padding=(1, 1)) self.conv1_2 = Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.conv2_1 = Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.conv2_2 = Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.conv3_1 = Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.conv3_2 = Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.fc4 = Linear(49152, 100) self.fc5 = Linear(100, 2) self.fc6 = Linear(2, 1) self.pool = MaxPool2d(kernel_size=2, stride=2, padding=0) self.tanh = Tanh() self.sigmoid = Sigmoid()
def __init__(self): super(Sal_based_Attention_module, self).__init__() # Create encoder based on VGG16 architecture as pointed on salgan architecture and apply aforementionned changes Based_Attention_Module = based_AM # select only first 5 conv blocks , here we keep same receptive field of VGG 212*212 # each neuron on bottelneck will see just (244,244) viewport during sliding , # input (640,320) , features numbers on bottelneck 40*20*512, exclude last maxpooling of salgan ,receptive # features number on AM boottlneck 10*5*128 # attentioin moduels receptive field enlarged (676,676) self.encoder = torch.nn.Sequential(*Based_Attention_Module) self.attention_module = torch.nn.Sequential(*[ Downsample(kernel_size=2), Conv2d(512, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Downsample(kernel_size=2), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), Upsample(scale_factor=4, mode='nearest') ]) #self.reshape = Reshape(-1,512,40,20) # define decoder based on VGG16 (inverse order and Upsampling layers , chose nearest mode) decoder_list = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] self.decoder = torch.nn.Sequential(*decoder_list) print("Model initialized, Sal_based_Attention_module")
def __init__(self): super(Decoder,self,pretainer = True).__init__() decoder_list=[ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor= 4, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor= 4, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] self.decoder = torch.nn.Sequential(*decoder_list) self._initialize_weights() print("decoder initialized") print("architecture len :",str(len(self.Autoencoder)))
def __init__(self): super(Encoder,self,pretainer = True).__init__() # Create encoder based on VGG16 architecture # Change just 4,5 th maxpooling layer to 4 scale instead of 2 # select only convolutional layers first 5 conv blocks ,cahnge maxpooling=> enlarge receptive field # each neuron on bottelneck will see (580,580) all viewports , # input (576,288) , features numbers on bottelneck (9*4)*512, exclude last maxpooling encoder_list[ Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=0), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Downsample(kernel_size = 3) Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Downsample(kernel_size = 3) Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Downsample(kernel_size = 3 , stride = 4) Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Downsample(kernel_size = 3 , stride = 4) Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), ] self.encoder = torch.nn.Sequential(*Global_Attention_Encoder) print("encoder initialized") print("architecture len :",str(len(self.Autoencoder)))
def __init__(self, seed_init, freeze=True, use_gpu=True): super(SalCLSTM56, self).__init__() self.use_gpu = use_gpu # Create encoder based on VGG16 architecture original_vgg16 = vgg16() # select only convolutional layers encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # define decoder based on VGG16 (inverse order and Upsampling layers) decoder_list = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), #During Upsampling operation we may end up losing 1 dimension if it was an odd number before ] decoder = torch.nn.Sequential(*decoder_list) # assamble the full architecture encoder-decoder self.salgan = torch.nn.Sequential(*(list(encoder.children()) + list(decoder.children()))) #print(self.salgan) # ConvLSTM self.input_size = 128 self.hidden_size = 128 self.Gates = nn.Conv2d( in_channels=self.input_size + self.hidden_size, out_channels=4 * self.hidden_size, kernel_size=(3, 3), padding=1) #padding 1 to preserve HxW dimensions final_convolutions = [ Conv2d(self.hidden_size, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] self.final_convs = torch.nn.Sequential(*final_convolutions) # Initialize weights of ConvLSTM torch.manual_seed(seed_init) for name, param in self.Gates.named_parameters(): if "weight" in name: nn.init.xavier_normal_(param) elif "bias" in name: nn.init.constant_(param, 0) else: print( "There is some uninitiallized parameter. Check your parameters and try again." ) exit() for name, param in self.final_convs.named_parameters(): if "weight" in name: nn.init.xavier_normal_(param) elif "bias" in name: nn.init.constant_(param, 0) else: print( "There is some uninitiallized parameter. Check your parameters and try again." ) exit() # Freeze SalGAN if freeze: for child in self.salgan.children(): for param in child.parameters(): param.requires_grad = False
def __init__(self): super(SalGAN, self).__init__() # Create encoder based on VGG16 architecture original_vgg16 = vgg16() # select only convolutional layers encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # define decoder based on VGG16 (inverse order and Upsampling layers) decoder_list = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] decoder = torch.nn.Sequential(*decoder_list) # assamble the full architecture encoder-decoder self.salgan = torch.nn.Sequential(*(list(encoder.children()) + list(decoder.children())))
def __init__(self): super(Scanpath_based_Attention_module, self).__init__() Based_Attention_Module = based_AM soft_sam = SpatialSoftArgmax2d(normalized_coordinates=False) self.soft_sam = soft_sam self.encoder = torch.nn.Sequential(*Based_Attention_Module) self.attention_module = torch.nn.Sequential(*[ Downsample(kernel_size=2), Conv2d(512, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Downsample(kernel_size=2), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), Upsample(scale_factor=4, mode='nearest') ]) decoder_list = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU() #Upsample(scale_factor=2, mode='nearest'), ] decoder_list_hm = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] self.decoder_hm = torch.nn.Sequential(*decoder_list_hm) self.decoder = torch.nn.Sequential(*decoder_list) self.aux = torch.nn.Sequential(*[ #Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 100, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(100, 100, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU() ]) for name, param in self.aux.named_parameters(): if "weight" in name: nn.init.xavier_normal_(param) elif "bias" in name: nn.init.constant_(param, 150.0) print("Model initialized, Sal_based_Attention_module")
def __init__(self, use_gpu=True): super(Sal_global_Attention, self).__init__() self.use_gpu = use_gpu # Create encoder based on VGG16 architecture as pointed on salgan architecture # Change just 4,5 th maxpooling lyer to 4 scale instead of 2 Global_Attention_Encoder = global_attention # select only convolutional layers first 5 conv blocks ,cahnge maxpooling=> enlarge receptive field # each neuron on bottelneck will see (580,580) all viewports , # input (576,288) , features numbers on bottelneck (9*4)*512, exclude last maxpooling encoder = torch.nn.Sequential(*Global_Attention_Encoder) # define decoder based on VGG16 (inverse order and Upsampling layers , chose nearest mode) decoder_list = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] decoder = torch.nn.Sequential(*decoder_list) # aggreegate the full architecture encoder-decoder of Sal_global_Attention self.Sal_global_Attention = torch.nn.Sequential( *(list(encoder.children()) + list(decoder.children()))) print("Model initialized, Sal_global_Attention") print("architecture len :", str(len(self.Sal_global_Attention)))
def __init__(self, alpha, ema_loc, residual, dropout, use_gpu=True): super(SalEMA,self).__init__() self.dropout = dropout self.residual = residual self.use_gpu = use_gpu if alpha == None: self.alpha = nn.Parameter(torch.Tensor([0.25])) print("Initial alpha set to: {}".format(self.alpha)) else: self.alpha = torch.Tensor([alpha]) assert(self.alpha<=1 and self.alpha>=0) self.ema_loc = ema_loc # 30 = bottleneck # Create encoder based on VGG16 architecture original_vgg16 = vgg16() # select only convolutional layers encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # define decoder based on VGG16 (inverse order and Upsampling layers) decoder_list=[ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] decoder = torch.nn.Sequential(*decoder_list) # assamble the full architecture encoder-decoder self.salgan = torch.nn.Sequential(*(list(encoder.children())+list(decoder.children()))) print("Model initialized, EMA located at {}".format(self.salgan[self.ema_loc]))
def create_model(input_channels): # Create encoder based on VGG16 architecture # original_vgg16 = vgg16() # # # select only convolutional layers # encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # new enconder encoder = [ Conv2d(input_channels, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(64), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(64), ReLU(), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(128), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(128), ReLU(), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(256), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(256), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(256), ReLU(), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU() ] # define decoder based on VGG16 (inverse order and Upsampling layers) decoder_list = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(256), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(256), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(256), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(128), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(128), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(64), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(64), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)), Sigmoid(), ] encoder = torch.nn.Sequential(*encoder) decoder = torch.nn.Sequential(*decoder_list) # assamble the full architecture encoder-decoder model = torch.nn.Sequential(*(list(encoder.children()) + list(decoder.children()))) return model
def __init__(self, alpha, ema_loc_1, ema_loc_2, use_gpu=True): super(SalGAN_EMA2,self).__init__() self.use_gpu = use_gpu self.alpha = alpha self.ema_loc_1 = ema_loc_1 # 30 = bottleneck self.ema_loc_2 = ema_loc_2 # 30 = bottleneck assert(self.alpha<=1 and self.alpha>=0) # Create encoder based on VGG16 architecture original_vgg16 = vgg16() # select only convolutional layers encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # define decoder based on VGG16 (inverse order and Upsampling layers) decoder_list=[ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] decoder = torch.nn.Sequential(*decoder_list) # assamble the full architecture encoder-decoder self.salgan = torch.nn.Sequential(*(list(encoder.children())+list(decoder.children()))) print("Model initialized, EMAs located at {} and {}".format(self.salgan[self.ema_loc_1], self.salgan[self.ema_loc_2]))
def __init__(self): super(SalEMA, self).__init__() self.dropout = False self.residual = False self.use_gpu = True self.alpha = nn.Parameter(torch.Tensor([0.1])) self.ema_loc = 30 # 30 = bottleneck original_vgg16 = vgg16() # select only convolutional layers encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # define decoder based on VGG16 (inverse order and Upsampling layers) decoder_list = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] decoder = torch.nn.Sequential(*decoder_list) self.salgan = torch.nn.Sequential(*(list(encoder.children()) + list(decoder.children()))) print("Model initialized, SalEMA")
def dcn_vgg(input_channels): model = nn.Sequential( Conv2d(input_channels, 64, kernel_size=(3, 3), padding=0), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), padding=0), ReLU() MaxPool2d(kernel_size=(2,2), stride=(2,2)) Conv2d(64, 128, kernel_size=(3, 3), padding=0), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), padding=0), ReLU() MaxPool2d(kernel_size=(2,2), stride=(2,2)) Conv2d(128, 256, kernel_size=(3, 3), padding=0), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), padding=0), ReLU() MaxPool2d(kernel_size=(2,2), stride=(2,2)) Conv2d(256, 512, kernel_size=(3, 3), padding=0), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), padding=0), ReLU() MaxPool2d(kernel_size=(2,2), stride=(2,2)) Conv2d(512, 512, kernel_size=(3, 3), padding=0), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), padding=0), ReLU() Conv2d(512, 512, kernel_size=(3, 3), padding=0), ReLU() ) return model
def __init__(self, use_gpu=True): super(Salgan360,self).__init__() self.use_gpu = use_gpu # Create encoder based on VGG16 architecture as pointed on salgan architecture original_vgg16 = vgg16() # select only convolutional layers first 5 conv blocks , here we keep same receptive field of VGG 212*212 # each neuron on bottelneck will see just (212,212) viewport during sliding , # input (576,288) , features numbers on bottelneck 36*18*512, exclude last maxpooling encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # define decoder based on VGG16 (inverse order and Upsampling layers , chose nearest mode) decoder_list=[ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] decoder = torch.nn.Sequential(*decoder_list) # aggreegate the full architecture encoder-decoder of salgan360 self.Salgan360 = torch.nn.Sequential(*(list(encoder.children())+list(decoder.children()))) print("Model initialized, SalGAN360") print("architecture len :",str(len(self.Salgan360)))
def create_model(): # Create encoder based on VGG16 architecture original_vgg16 = vgg16(pretrained=True) # select only convolutional layers encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # define decoder based on VGG16 (inverse order and Upsampling layers) decoder_list = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)), Sigmoid(), ] decoder = torch.nn.Sequential(*decoder_list) # assamble the full architecture encoder-decoder model = torch.nn.Sequential(*(list(encoder.children()) + list(decoder.children()))) return model