示例#1
0
    def  __init__(self, alpha = None, use_gpu=True):
        super(Poles,self).__init__()


        self.use_gpu = use_gpu
        if alpha == None:
            self.alpha = nn.Parameter(torch.Tensor([0.25]))
            print("Initial alpha set to: {}".format(self.alpha))
            
        else:
            self.alpha = torch.Tensor([alpha])
            if use_gpu:
                self.alpha = self.alpha.cuda()
                
        assert(self.alpha<=1 and self.alpha>=0)
        self.ema_loc = 30 # 30 = bottleneck

        # Create encoder based on VGG16 architecture
        original_vgg16 = vgg16()

        # select only convolutional layers
        encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

        # define decoder based on VGG16 (inverse order and Upsampling layers)
        decoder_list=[
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        decoder = torch.nn.Sequential(*decoder_list)

        # assamble the full architecture encoder-decoder
        self.salgan = torch.nn.Sequential(*(list(encoder.children())+list(decoder.children())))

        print("Model initialized, EMA located at {}".format(self.salgan[self.ema_loc]))
    def  __init__(self):
        super(Decoder,self,pretainer = True).__init__()

        decoder_list=[
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor= 4, mode='nearest'),

            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor= 4, mode='nearest'),

            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        self.decoder = torch.nn.Sequential(*decoder_list)
        self._initialize_weights()
        print("decoder initialized")
        print("architecture len :",str(len(self.Autoencoder))) 
示例#3
0
def create_model(input_channels):
    # Create encoder based on VGG16 architecture
    # original_vgg16 = vgg16()
    #
    # # select only convolutional layers
    # encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

    # new enconder
    encoder = [
        Conv2d(input_channels,
               64,
               kernel_size=(3, 3),
               stride=(4, 4),
               padding=(1, 1)),
        ReLU(),
        Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        MaxPool2d(kernel_size=2,
                  stride=2,
                  padding=0,
                  dilation=1,
                  ceil_mode=False),
        Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        MaxPool2d(kernel_size=2,
                  stride=2,
                  padding=0,
                  dilation=1,
                  ceil_mode=False),
        Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        MaxPool2d(kernel_size=2,
                  stride=2,
                  padding=0,
                  dilation=1,
                  ceil_mode=False),
        Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        MaxPool2d(kernel_size=2,
                  stride=2,
                  padding=0,
                  dilation=1,
                  ceil_mode=False),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU()
    ]

    # define decoder based on VGG16 (inverse order and Upsampling layers)
    decoder_list = [
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Upsample(scale_factor=8, mode='nearest'),
        Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)),
        Sigmoid(),
    ]
    encoder = torch.nn.Sequential(*encoder)
    decoder = torch.nn.Sequential(*decoder_list)

    # assamble the full architecture encoder-decoder
    model = torch.nn.Sequential(*(list(encoder.children()) +
                                  list(decoder.children())))

    return model
 def  __init__(self):
     super(Encoder,self,pretainer = True).__init__()
     # Create encoder based on VGG16 architecture 
     # Change just 4,5 th maxpooling layer to 4 scale instead of 2 
     # select only convolutional layers first 5 conv blocks ,cahnge maxpooling=> enlarge receptive field
     # each neuron on bottelneck will see (580,580) all viewports  ,
     # input (576,288) , features numbers on bottelneck (9*4)*512, exclude last maxpooling
     encoder_list[
         Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=0),
         ReLU(),
         Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),
         Downsample(kernel_size = 3)
         Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),
         Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),            
         Downsample(kernel_size = 3)
         Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),
         Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),              
         Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(), 
         Downsample(kernel_size = 3 , stride = 4)
         Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),
         Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),              
         Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),
         Downsample(kernel_size = 3 , stride = 4)
         Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),
         Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),              
         Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),                           
     ]
     self.encoder = torch.nn.Sequential(*Global_Attention_Encoder)
     print("encoder initialized")
     print("architecture len :",str(len(self.Autoencoder))) 
示例#5
0
    def  __init__(self, use_gpu=True):
        super(Salgan360,self).__init__()

        self.use_gpu = use_gpu
        # Create encoder based on VGG16 architecture as pointed on salgan architecture 
        original_vgg16 = vgg16()

        # select only convolutional layers first 5 conv blocks , here we keep same receptive field of VGG 212*212 
        # each neuron on bottelneck will see just (212,212) viewport during sliding  ,
        # input (576,288) , features numbers on bottelneck 36*18*512, exclude last maxpooling
        encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

        # define decoder based on VGG16 (inverse order and Upsampling layers , chose nearest mode)
        decoder_list=[
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        decoder = torch.nn.Sequential(*decoder_list)

        # aggreegate the full architecture encoder-decoder of salgan360
        self.Salgan360 = torch.nn.Sequential(*(list(encoder.children())+list(decoder.children())))

        print("Model initialized, SalGAN360")
        print("architecture len :",str(len(self.Salgan360)))