def __init__(self, alpha = None, use_gpu=True): super(Poles,self).__init__() self.use_gpu = use_gpu if alpha == None: self.alpha = nn.Parameter(torch.Tensor([0.25])) print("Initial alpha set to: {}".format(self.alpha)) else: self.alpha = torch.Tensor([alpha]) if use_gpu: self.alpha = self.alpha.cuda() assert(self.alpha<=1 and self.alpha>=0) self.ema_loc = 30 # 30 = bottleneck # Create encoder based on VGG16 architecture original_vgg16 = vgg16() # select only convolutional layers encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # define decoder based on VGG16 (inverse order and Upsampling layers) decoder_list=[ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] decoder = torch.nn.Sequential(*decoder_list) # assamble the full architecture encoder-decoder self.salgan = torch.nn.Sequential(*(list(encoder.children())+list(decoder.children()))) print("Model initialized, EMA located at {}".format(self.salgan[self.ema_loc]))
def __init__(self): super(Decoder,self,pretainer = True).__init__() decoder_list=[ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor= 4, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor= 4, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] self.decoder = torch.nn.Sequential(*decoder_list) self._initialize_weights() print("decoder initialized") print("architecture len :",str(len(self.Autoencoder)))
def create_model(input_channels): # Create encoder based on VGG16 architecture # original_vgg16 = vgg16() # # # select only convolutional layers # encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # new enconder encoder = [ Conv2d(input_channels, 64, kernel_size=(3, 3), stride=(4, 4), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU() ] # define decoder based on VGG16 (inverse order and Upsampling layers) decoder_list = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=8, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)), Sigmoid(), ] encoder = torch.nn.Sequential(*encoder) decoder = torch.nn.Sequential(*decoder_list) # assamble the full architecture encoder-decoder model = torch.nn.Sequential(*(list(encoder.children()) + list(decoder.children()))) return model
def __init__(self): super(Encoder,self,pretainer = True).__init__() # Create encoder based on VGG16 architecture # Change just 4,5 th maxpooling layer to 4 scale instead of 2 # select only convolutional layers first 5 conv blocks ,cahnge maxpooling=> enlarge receptive field # each neuron on bottelneck will see (580,580) all viewports , # input (576,288) , features numbers on bottelneck (9*4)*512, exclude last maxpooling encoder_list[ Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=0), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Downsample(kernel_size = 3) Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Downsample(kernel_size = 3) Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Downsample(kernel_size = 3 , stride = 4) Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Downsample(kernel_size = 3 , stride = 4) Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), ] self.encoder = torch.nn.Sequential(*Global_Attention_Encoder) print("encoder initialized") print("architecture len :",str(len(self.Autoencoder)))
def __init__(self, use_gpu=True): super(Salgan360,self).__init__() self.use_gpu = use_gpu # Create encoder based on VGG16 architecture as pointed on salgan architecture original_vgg16 = vgg16() # select only convolutional layers first 5 conv blocks , here we keep same receptive field of VGG 212*212 # each neuron on bottelneck will see just (212,212) viewport during sliding , # input (576,288) , features numbers on bottelneck 36*18*512, exclude last maxpooling encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # define decoder based on VGG16 (inverse order and Upsampling layers , chose nearest mode) decoder_list=[ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] decoder = torch.nn.Sequential(*decoder_list) # aggreegate the full architecture encoder-decoder of salgan360 self.Salgan360 = torch.nn.Sequential(*(list(encoder.children())+list(decoder.children()))) print("Model initialized, SalGAN360") print("architecture len :",str(len(self.Salgan360)))