示例#1
0
    def __init__(self):
        super(SalGAN, self).__init__()
        # Create encoder based on VGG16 architecture
        original_vgg16 = vgg16()

        # select only convolutional layers
        encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

        # define decoder based on VGG16 (inverse order and Upsampling layers)
        decoder_list = [
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        decoder = torch.nn.Sequential(*decoder_list)

        # assamble the full architecture encoder-decoder
        self.salgan = torch.nn.Sequential(*(list(encoder.children()) +
                                            list(decoder.children())))
示例#2
0
    def __init__(self, requires_grad=False):
        super().__init__()
        vgg_pretrained_features = torchvision.models.vgg19(
            pretrained=True).features
        self.up5 = Upsample(scale_factor=16, mode='bicubic')
        self.up4 = Upsample(scale_factor=8, mode='bicubic')
        self.up3 = Upsample(scale_factor=4, mode='bicubic')
        self.up2 = Upsample(scale_factor=2, mode='bicubic')
        self.up1 = Upsample(scale_factor=1, mode='bicubic')
        self.weights = [1.0 / 32, 1.0 / 16, 1.0 / 8, 1.0 / 4, 1.0]

        self.slice1 = torch.nn.Sequential()
        self.slice2 = torch.nn.Sequential()
        self.slice3 = torch.nn.Sequential()
        self.slice4 = torch.nn.Sequential()
        self.slice5 = torch.nn.Sequential()
        for x in range(2):
            self.slice1.add_module(str(x), vgg_pretrained_features[x])
        for x in range(2, 7):
            self.slice2.add_module(str(x), vgg_pretrained_features[x])
        for x in range(7, 12):
            self.slice3.add_module(str(x), vgg_pretrained_features[x])
        for x in range(12, 21):
            self.slice4.add_module(str(x), vgg_pretrained_features[x])
        for x in range(21, 30):
            self.slice5.add_module(str(x), vgg_pretrained_features[x])
        if not requires_grad:
            for param in self.parameters():
                param.requires_grad = False
示例#3
0
    def  __init__(self, use_gpu=True):
        super(Salgan360,self).__init__()

        self.use_gpu = use_gpu
        # Create encoder based on VGG16 architecture as pointed on salgan architecture 
        original_vgg16 = vgg16()

        # select only convolutional layers first 5 conv blocks , here we keep same receptive field of VGG 212*212 
        # each neuron on bottelneck will see just (212,212) viewport during sliding  ,
        # input (576,288) , features numbers on bottelneck 36*18*512, exclude last maxpooling
        encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

        # define decoder based on VGG16 (inverse order and Upsampling layers , chose nearest mode)
        decoder_list=[
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        decoder = torch.nn.Sequential(*decoder_list)

        # aggreegate the full architecture encoder-decoder of salgan360
        self.Salgan360 = torch.nn.Sequential(*(list(encoder.children())+list(decoder.children())))

        print("Model initialized, SalGAN360")
        print("architecture len :",str(len(self.Salgan360)))
示例#4
0
def create_model():
    # Create encoder based on VGG16 architecture
    original_vgg16 = vgg16(pretrained=True)

    # select only convolutional layers
    encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

    # define decoder based on VGG16 (inverse order and Upsampling layers)
    decoder_list = [
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)),
        Sigmoid(),
    ]

    decoder = torch.nn.Sequential(*decoder_list)

    # assamble the full architecture encoder-decoder
    model = torch.nn.Sequential(*(list(encoder.children()) +
                                  list(decoder.children())))

    return model
示例#5
0
def get_image(filmed_net, original_img):
    """Given FiLMedNet and original image array create new image  to
    visualize prepooled activations. Save this image.
    input:
        filmed_net: FiLMedNet objects
        original_img: np.ndarray
    """
    #Save prepooled activations
    activations = filmed_net.cf_input
    activations = filmed_net.classifier[0](activations)
    activations = filmed_net.classifier[1](activations)
    activations = filmed_net.classifier[2](activations)

    #Create average feature map scaled from 0 to 1
    f_map = (activations**2).mean(0).mean(0).sqrt()
    f_map = f_map - f_map.min().expand_as(f_map)
    f_map = f_map / f_map.max().expand_as(f_map)

    #Upsample the feature map to the size of the orignal image and add it as an
    #additional channel.
    f_map = (255 * f_map).round()
    upsample = Upsample(size=torch.Size(original_img.shape[:-1]),
                        mode='bilinear')
    channel = upsample(f_map.unsqueeze(0).unsqueeze(0))
    channel = channel.squeeze().unsqueeze(-1).data.numpy()

    filtered_img = np.concatenate((original_img, channel), axis=2)

    #Save image
    filename = args.question.replace(' ', '_').strip(punctuation)
    imsave(filename + '.png', filtered_img)
示例#6
0
    def __init__(self, batch_norm=True):
        super().__init__()
        self.batch_norm = batch_norm
        self.c1 = PartialConv2d_3k(3,
                                   64,
                                   5,
                                   2,
                                   padding=2,
                                   multi_channel=True,
                                   return_mask=True)
        if self.batch_norm:
            self.bn1 = BatchNorm2d(64)
        self.c2 = PartialConv2d_3k(64,
                                   128,
                                   3,
                                   2,
                                   padding=1,
                                   multi_channel=True,
                                   return_mask=True)
        if self.batch_norm:
            self.bn2 = BatchNorm2d(128)
        self.c3 = PartialConv2d_3k(128,
                                   128,
                                   3,
                                   1,
                                   padding=1,
                                   multi_channel=True,
                                   return_mask=True)
        if self.batch_norm:
            self.bn3 = BatchNorm2d(128)

        self.up4 = Upsample(scale_factor=2)
        self.c4 = PartialConv2d_3k(128 + 64,
                                   64,
                                   3,
                                   1,
                                   padding=1,
                                   multi_channel=True,
                                   return_mask=True)
        self.up5 = Upsample(scale_factor=2)
        self.c5 = PartialConv2d_3k(64 + 3,
                                   3,
                                   3,
                                   1,
                                   padding=1,
                                   multi_channel=True,
                                   return_mask=True)
    def  __init__(self):
        super(Decoder,self,pretainer = True).__init__()

        decoder_list=[
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor= 4, mode='nearest'),

            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor= 4, mode='nearest'),

            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        self.decoder = torch.nn.Sequential(*decoder_list)
        self._initialize_weights()
        print("decoder initialized")
        print("architecture len :",str(len(self.Autoencoder))) 
示例#8
0
def acl_vgg(data, stateful):
    dcn = dcn_vgg()
    att_module = nn.Sequential(
                    MaxPool2d(kernel_size=(2,2), stride=(2,2))
                    #The channels being input to maxpool are 512 (dcn output), to find the output channels of maxpool divide by the kernel size. 512/2=256
                    Conv2d(256, 64, kernel_size=(3, 3), padding=0)
                    ReLU()
                    Conv2D(64, 128, kernel_size=(3, 3), padding=0)
                    ReLU()
                    MaxPool2d(kernel_size=(2,2), stride=(2,2))
                    Conv2D(128, 64, kernel_size=(3, 3), padding=0)
                    ReLU()
                    Conv2D(64, 128, kernel_size=(3, 3), padding=0)
                    ReLU()
                    Conv2D(128, 1, kernel_size=(1, 1), padding=0)
                    Sigmoid()
                    Upsample(scale_factor=4, mode='nearest')
                    )


    outs = TimeDistributed(dcn)(data)

    attention = TimeDistributed(att_module)(outs)

    f_attention = TimeDistributed()(attention.view(attention.size()[0], -1)) #flatten
    f_attention = TimeDistributed()(f_attention.expand(512)) #repeatvector
    f_attention = TimeDistributed()(f_attention.transpose().unsqueeze(0)) #permute
    f_attention = TimeDistributed()(f_attention.reshape((32, 40, 512)))
    m_outs = outs * f_attention #elementwise multiplication
    outs = outs + m_outs

    ### This needs to change
    clstm = ConvLSTMCell(use_gpu=False, input_size=512, hidden_size=256, kernel_size=(3,3))
    outs = clstm(outs)
    ###

    produce_smaps = nn.Sequential(
                    #InputDimensions will be figured out after changing the ConvLSTM
                    Conv2D(InputDimensions, 1, kernel_size=(1, 1), padding=0)
                    Sigmoid()
                    Upsample(scale_factor=4, mode='nearest')
                    )

    outs = TimeDistributed(produce_smaps)(outs)
    attention = TimeDistributed(Upsample(scale_factor=2, mode='nearest'))(attention)
    return [outs, outs, outs, attention, attention, attention]
示例#9
0
    def __init__(self, batch_norm=True):
        super().__init__()
        self.batch_norm = batch_norm
        self.c1 = Conv2d(3, 64, 5, 2, padding=2)
        if self.batch_norm:
            self.bn1 = BatchNorm2d(64)
        self.c2 = Conv2d(64, 128, 3, 2, padding=1)
        if self.batch_norm:
            self.bn2 = BatchNorm2d(128)
        self.c3 = Conv2d(128, 128, 3, 1, padding=1)
        if self.batch_norm:
            self.bn3 = BatchNorm2d(128)

        self.up4 = Upsample(scale_factor=2)
        self.c4 = Conv2d(128 + 64, 64, 3, 1, padding=1)
        self.up5 = Upsample(scale_factor=2)
        self.c5 = Conv2d(64 + 3, 3, 3, 1, padding=1)
示例#10
0
 def __init__(self, class_num: int = 80):
     self.class_num = class_num
     self.upsample = Upsample(None, 2, nearest)
     self.cat = Concat(1)
     self.initBlock1()
     self.initBlock2()
     self.initBlock3()
     self.initBlock4()
     self.initBlock5()
     self.initDetect()
示例#11
0
def generate_patches(src_path, files, set_path, crop_size, img_format,
                     upsampling):
    img_path = os.path.join(src_path, files)
    img = Image.open(img_path).convert('RGB')

    if upsampling > 0:
        img = ToTensor()(img).unsqueeze_(0)
        m = Upsample(scale_factor=abs(upsampling), mode='nearest')
        img = m(img)
        img = tensor2img(img)

    name, _ = files.split('.')
    filedir = os.path.join(set_path, 'a')
    filedirb = os.path.join(set_path, 'b')
    if not dir_exists(filedir):
        mkdir(filedir)
        mkdir(filedirb)

    img = np.array(img)
    h, w = img.shape[0], img.shape[1]

    if crop_size == None:
        img = np.copy(img)
        img_patches = np.expand_dims(img, 0)
    else:
        rem_h = (h % crop_size[0])
        rem_w = (w % crop_size[1])
        img = img[:h - rem_h, :w - rem_w]
        img_patches = crop(img, crop_size)

    # print('Cropped')

    for i in range(min(len(img_patches), 3)):
        img = Image.fromarray(img_patches[i])
        # print(np.asarray(compress(torch.Tensor(img_patches[0]), 4) * (2**4 - 1)))
        imgs = tensor2img(compress(ToTensor()(img_patches[i]), 3))

        # print('Compressed')

        img.save(os.path.join(filedir, '{}_{}.{}'.format(name, i, img_format)))
        # print('OK')
        imgs.save(
            os.path.join(filedirb, '{}_{}.{}'.format(name, i, img_format)))
    def forward(self, seg_map, dense_map, target, seg_mask, infer=False):
        seg_map = seg_map.float().cuda()
        dense_map = dense_map.float().cuda()
        target = target.float().cuda()

        feat_map_total = []
        for each_class in range(self.opt.label_nc):
            inp_enc = seg_map[:, each_class:each_class + 1, :, :]
            feat_map_each_class = self.netE.forward(inp_enc)  # bs, 10, H, w
            feat_map_total.append(feat_map_each_class)
        feat_map_total = torch.cat([i for i in feat_map_total], dim=1)

        # local pooling step and Upscaling
        local_avg_pool_fn = nn.AvgPool2d((64, 64))
        feat_map_each_class_pooled = local_avg_pool_fn(feat_map_total)
        upscale_fn = Upsample(scale_factor=64, mode='nearest')
        feat_map_final = upscale_fn(feat_map_each_class_pooled)

        # Gan Input
        input_concat = torch.cat((dense_map, feat_map_final), dim=1).cuda()
        fake_image = self.netG.forward(input_concat)

        # Fake Detection and Loss
        pred_fake_pool = self.discriminate(seg_map, fake_image, use_pool=True)
        loss_D_fake = self.criterionGAN(pred_fake_pool, False)

        # Real Detection and Loss
        pred_real = self.discriminate(seg_map, target)
        loss_D_real = self.criterionGAN(pred_real, True)

        # GAN loss (Fake Passability Loss)
        pred_fake = self.netD.forward(torch.cat((seg_map, fake_image), dim=1))
        loss_G_GAN = self.criterionGAN(pred_fake, True)

        ###############################
        # Crossentropy loss
        loss_G_CE = 0
        loss_G_CE = self.criterionCE(fake_image, seg_mask)

        return [
            self.loss_filter(loss_G_GAN, loss_G_CE, loss_D_real, loss_D_fake),
            None if not infer else fake_image
        ]
示例#13
0
    def __init__(self, filters, in_channels, block_i=0, up_conv_z2=False):
        super().__init__()
        print(f'Creating UpBlock with {filters} filters')
        self.conv_part = Sequential()
        conv0 = Conv3d(in_channels=in_channels,
                       out_channels=filters,
                       kernel_size=(3, 3, 3),
                       padding=(1, 1, 1))
        self.conv_part.add_module(f'u{block_i}-0_conv3d', conv0)
        self.conv_part.add_module(f'u{block_i}-0_relu', ReLU())
        uz = int(up_conv_z2)
        conv1 = Conv3d(in_channels=filters,
                       out_channels=filters,
                       kernel_size=(3, 3, 1 + 2 * uz),
                       padding=(1, 1, uz))
        self.conv_part.add_module(f'u{block_i}-1_conv3d', conv1)
        self.conv_part.add_module(f'u{block_i}-1_relu', ReLU())

        self.up_sample = Upsample(scale_factor=(2, 2, 1))
示例#14
0
    def inference_forward_shape(self, query, ref, dense_map):

        query = query.float().cuda()
        dense_map = dense_map.float().cuda()
        ref = ref.float().cuda()

        query_ref_mixed = torch.cat(
            (query[:, 0:5, :, :], ref[:, 5:8, :, :], query[:, 8:, :, :]), axis=1)

        # query_ref_mixed = torch.cat(
        #     (query[:, 0:9, :, :],  ref[:, 5:8, :, :] , query[:, 8:9, :, :], ref[:, 9:10, :, :], query[:, 10:12, :, :], ref[:, 12:13, :, :],
        #     query[:, 13:16, :, :],ref[:, 16:20, :, :]), axis=1)

        # query_ref_mixed = torch.cat((query[:, 0:9, :, :], ref[:, 9:10, :, :], query[:, 10:12, :, :],
        #                              ref[:, 12:13, :, :], query[:, 13:16, :, :], ref[:, 16:20, :, :]), axis=1)

        feat_map_total = []
        for each_class in range(self.opt.label_nc):
            # bs, 1, H, w
            inp_enc = query_ref_mixed[:, each_class:each_class+1, :, :]
            with torch.no_grad():
                feat_map_each_class = self.netE.forward(
                    inp_enc)  # bs, 10, H, w
            feat_map_total.append(feat_map_each_class)

        feat_map_total = torch.cat([i for i in feat_map_total], dim=1)

        # local pooling step
        local_avg_pool_fn = nn.AvgPool2d((64, 64))
        feat_map_each_class_pooled = local_avg_pool_fn(feat_map_total)

        # Upscaling
        upscale_fn = Upsample(scale_factor=64, mode='nearest')
        feat_map_final = upscale_fn(feat_map_each_class_pooled)

        input_concat = torch.cat((dense_map, feat_map_final), dim=1)

        with torch.no_grad():
            fake_image = self.netG.forward(input_concat)

        return query_ref_mixed, fake_image
    def inference_enc(self, query, dense_map, ref, cloth_part='uppercloth'):
        query = query.float().cuda()
        dense_map = dense_map.float().cuda()
        ref = ref.float().cuda()

        # Cloth part to mix
        if cloth_part == 'uppercloth':
            query_ref_mixed = torch.cat(
                (query[:, 0:5, :, :], ref[:, 5:8, :, :], query[:, 8:, :, :]),
                axis=1)

        elif cloth_part == 'bottomcloth':
            query_ref_mixed = torch.cat(
                (query[:, 0:9, :, :], ref[:, 9:10, :, :],
                 query[:, 10:12, :, :], ref[:, 12:13, :, :],
                 query[:, 13:16, :, :], ref[:, 16:20, :, :]),
                axis=1)
        # Encoder
        feat_map_total = []
        for each_class in range(self.opt.label_nc):
            inp_enc = query_ref_mixed[:, each_class:each_class + 1, :, :]
            with torch.no_grad():
                feat_map_each_class = self.netE.forward(
                    inp_enc)  # bs, 10, H, w
            feat_map_total.append(feat_map_each_class)
        feat_map_total = torch.cat([i for i in feat_map_total], dim=1)

        # Local pooling step and Upscaling
        local_avg_pool_fn = nn.AvgPool2d((64, 64))
        feat_map_each_class_pooled = local_avg_pool_fn(feat_map_total)
        upscale_fn = Upsample(scale_factor=64, mode='nearest')
        feat_map_final = upscale_fn(feat_map_each_class_pooled)

        # GAN
        input_concat = torch.cat((dense_map, feat_map_final), dim=1)
        with torch.no_grad():
            fake_image = self.netG.forward(input_concat)

        return query_ref_mixed, fake_image
示例#16
0
def create_model(input_channels):
    # Create encoder based on VGG16 architecture
    # original_vgg16 = vgg16()
    #
    # # select only convolutional layers
    # encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

    # new enconder
    encoder = [
        Conv2d(input_channels,
               64,
               kernel_size=(3, 3),
               stride=(1, 1),
               padding=(1, 1)),
        BatchNorm2d(64),
        ReLU(),
        Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(64),
        ReLU(),
        MaxPool2d(kernel_size=2,
                  stride=2,
                  padding=0,
                  dilation=1,
                  ceil_mode=False),
        Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(128),
        ReLU(),
        Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(128),
        ReLU(),
        MaxPool2d(kernel_size=2,
                  stride=2,
                  padding=0,
                  dilation=1,
                  ceil_mode=False),
        Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(256),
        ReLU(),
        Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(256),
        ReLU(),
        Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(256),
        ReLU(),
        MaxPool2d(kernel_size=2,
                  stride=2,
                  padding=0,
                  dilation=1,
                  ceil_mode=False),
        Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        MaxPool2d(kernel_size=2,
                  stride=2,
                  padding=0,
                  dilation=1,
                  ceil_mode=False),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU()
    ]

    # define decoder based on VGG16 (inverse order and Upsampling layers)
    decoder_list = [
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(256),
        ReLU(),
        Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(256),
        ReLU(),
        Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(256),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(128),
        ReLU(),
        Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(128),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(64),
        ReLU(),
        Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(64),
        ReLU(),
        Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)),
        Sigmoid(),
    ]
    encoder = torch.nn.Sequential(*encoder)
    decoder = torch.nn.Sequential(*decoder_list)

    # assamble the full architecture encoder-decoder
    model = torch.nn.Sequential(*(list(encoder.children()) +
                                  list(decoder.children())))

    return model
示例#17
0
文件: SalEMA.py 项目: snlee81/SalEMA
    def  __init__(self, alpha, ema_loc, residual, dropout, use_gpu=True):
        super(SalEMA,self).__init__()

        self.dropout = dropout
        self.residual = residual
        self.use_gpu = use_gpu
        if alpha == None:
            self.alpha = nn.Parameter(torch.Tensor([0.25]))
            print("Initial alpha set to: {}".format(self.alpha))
        else:
            self.alpha = torch.Tensor([alpha])
        assert(self.alpha<=1 and self.alpha>=0)
        self.ema_loc = ema_loc # 30 = bottleneck

        # Create encoder based on VGG16 architecture
        original_vgg16 = vgg16()

        # select only convolutional layers
        encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

        # define decoder based on VGG16 (inverse order and Upsampling layers)
        decoder_list=[
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        decoder = torch.nn.Sequential(*decoder_list)

        # assamble the full architecture encoder-decoder
        self.salgan = torch.nn.Sequential(*(list(encoder.children())+list(decoder.children())))

        print("Model initialized, EMA located at {}".format(self.salgan[self.ema_loc]))
示例#18
0
文件: SalEMA.py 项目: snlee81/SalEMA
    def  __init__(self, alpha, ema_loc_1, ema_loc_2, use_gpu=True):
        super(SalGAN_EMA2,self).__init__()

        self.use_gpu = use_gpu
        self.alpha = alpha
        self.ema_loc_1 = ema_loc_1 # 30 = bottleneck
        self.ema_loc_2 = ema_loc_2 # 30 = bottleneck
        assert(self.alpha<=1 and self.alpha>=0)

        # Create encoder based on VGG16 architecture
        original_vgg16 = vgg16()

        # select only convolutional layers
        encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

        # define decoder based on VGG16 (inverse order and Upsampling layers)
        decoder_list=[
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        decoder = torch.nn.Sequential(*decoder_list)

        # assamble the full architecture encoder-decoder
        self.salgan = torch.nn.Sequential(*(list(encoder.children())+list(decoder.children())))

        print("Model initialized, EMAs located at {} and {}".format(self.salgan[self.ema_loc_1], self.salgan[self.ema_loc_2]))
示例#19
0
 def __init__(self, cin, cout, k, stride=1, padding=0):
     super(UpscaleConv, self).__init__()
     self.upsample1 = Upsample(scale_factor=2, mode="nearest")
     self.conv2 = nn.Conv2d(cin, cout, k, stride=1, padding=padding)
示例#20
0
    def __init__(self):
        super(SalEMA, self).__init__()

        self.dropout = False
        self.residual = False
        self.use_gpu = True

        self.alpha = nn.Parameter(torch.Tensor([0.1]))
        self.ema_loc = 30  # 30 = bottleneck

        original_vgg16 = vgg16()

        # select only convolutional layers
        encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

        # define decoder based on VGG16 (inverse order and Upsampling layers)
        decoder_list = [
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        decoder = torch.nn.Sequential(*decoder_list)

        self.salgan = torch.nn.Sequential(*(list(encoder.children()) +
                                            list(decoder.children())))

        print("Model initialized, SalEMA")
示例#21
0
            self.slice5.add_module(str(x), vgg_pretrained_features[x])
        if not requires_grad:
            for param in self.parameters():
                param.requires_grad = False
    
    def forward(self, X):
        h_relu1 = self.slice1(X)
        h_relu2 = self.slice2(h_relu1)
        h_relu3 = self.slice3(h_relu2)
        h_relu4 = self.slice4(h_relu3)
        h_relu5 = self.slice5(h_relu4)
        out = [h_relu1, h_relu2, h_relu3, h_relu4, h_relu5]
        return out


up5 = Upsample(scale_factor=16, mode='bicubic')
up4 = Upsample(scale_factor=8, mode='bicubic')
up3 = Upsample(scale_factor=4, mode='bicubic')
up2 = Upsample(scale_factor=2, mode='bicubic')
up1 = Upsample(scale_factor=1, mode='bicubic')
to_pil = ToPILImage()
to_tensor = ToTensor()


def one_hot_encoding(semantic, num_classes=20):
    one_hot = torch.zeros(num_classes, semantic.size(1), semantic.size(2))
    for class_id in range(num_classes):
        one_hot[class_id,:,:] = (semantic.squeeze(0)==class_id)
    one_hot = one_hot[:num_classes-1,:,:]
    return one_hot
示例#22
0
def main(args):

    # device
    device = torch.device(args.device)

    # tensorboard
    logger_tb = logger.Logger(log_dir=args.experiment_name)

    # load img
    img = plt.imread(args.input_img)

    # def norm(x): return (x - x.min(axis=(0, 1))) / (x.max(axis=(0, 1)) - x.min(axis=(0, 1)))
    norm = lambda x: (x - x.min(axis=(0, 1))) / (x.max(axis=(0, 1)) - x.min(
        axis=(0, 1)))
    img = norm(img)
    img = np.transpose(img, (2, 0, 1))

    # load pretrained model
    vgg19 = models.vgg19(pretrained=True).features.eval()
    model = utils.build_model(vgg19, optim_layer=args.layer, device=device)
    model = model.to(device)

    # loss function
    loss_fn = utils.L2Loss()

    # Populate oct_imgs with different sized zooms of the original image
    oct_imgs = [img]
    for oct_itr in range(args.num_octave):

        zoom_img = zoom(oct_imgs[-1],
                        (1, 1 / args.octave_ratio, 1 / args.octave_ratio))
        oct_imgs.append(zoom_img)

    oct_imgs = [utils.process_tensor(oct_img, device) for oct_img in oct_imgs]
    ori_oct_imgs = [oct_img.clone() for oct_img in oct_imgs]

    while len(oct_imgs) > 0:
        oct_img = oct_imgs.pop()
        ori_oct_img = ori_oct_imgs.pop()
        idx = len(oct_imgs)

        print(f"Deep dreaming on octave: {idx}")

        for epoch in range(args.epoch):
            model.zero_grad()
            output = model.forward(oct_img)
            loss = loss_fn(output)
            loss.backward()
            grad = oct_img.grad.cpu().numpy()
            lr = args.lr / np.abs(grad).mean()

            # apply gaussian smoothing on gradient
            sigma = (epoch * 4.0) / args.epoch + 0.5
            grad_smooth1 = gaussian_filter(grad, sigma=sigma)
            grad_smooth2 = gaussian_filter(grad, sigma=sigma * 2)
            grad_smooth3 = gaussian_filter(grad, sigma=sigma * 0.5)
            grad = (grad_smooth1 + grad_smooth2 + grad_smooth3)
            grad = torch.Tensor(grad).to(device)

            # backpropagate on ocatve image
            oct_img.data += lr * grad.data
            oct_img.data.clamp_(0, 1)
            oct_img.grad.data.zero_()

            # display image on tensorboard
            dream_img = oct_img.squeeze().cpu().detach().numpy().copy()
            logger_tb.update_loss('loss ', loss.item(), epoch)
            logger_tb.update_image(f'transformation oct{idx}', dream_img,
                                   epoch)

        if len(oct_imgs) == 0:
            break

        # add the "dreamed" portion of the current octave to the next octave
        h = oct_imgs[-1].shape[2]
        w = oct_imgs[-1].shape[3]
        difference = oct_img.data - ori_oct_img.data
        difference = Upsample(size=(h, w), mode='nearest')(difference)
        oct_imgs[-1].data += difference
示例#23
0
    def __init__(self, seed_init, freeze=True, use_gpu=True):
        super(SalCLSTM56, self).__init__()

        self.use_gpu = use_gpu

        # Create encoder based on VGG16 architecture
        original_vgg16 = vgg16()

        # select only convolutional layers
        encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

        # define decoder based on VGG16 (inverse order and Upsampling layers)
        decoder_list = [
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            #During Upsampling operation we may end up losing 1 dimension if it was an odd number before
        ]

        decoder = torch.nn.Sequential(*decoder_list)
        # assamble the full architecture encoder-decoder
        self.salgan = torch.nn.Sequential(*(list(encoder.children()) +
                                            list(decoder.children())))
        #print(self.salgan)
        # ConvLSTM
        self.input_size = 128
        self.hidden_size = 128
        self.Gates = nn.Conv2d(
            in_channels=self.input_size + self.hidden_size,
            out_channels=4 * self.hidden_size,
            kernel_size=(3, 3),
            padding=1)  #padding 1 to preserve HxW dimensions

        final_convolutions = [
            Conv2d(self.hidden_size,
                   64,
                   kernel_size=(3, 3),
                   stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]
        self.final_convs = torch.nn.Sequential(*final_convolutions)

        # Initialize weights of ConvLSTM

        torch.manual_seed(seed_init)
        for name, param in self.Gates.named_parameters():
            if "weight" in name:
                nn.init.xavier_normal_(param)
            elif "bias" in name:
                nn.init.constant_(param, 0)
            else:
                print(
                    "There is some uninitiallized parameter. Check your parameters and try again."
                )
                exit()
        for name, param in self.final_convs.named_parameters():
            if "weight" in name:
                nn.init.xavier_normal_(param)
            elif "bias" in name:
                nn.init.constant_(param, 0)
            else:
                print(
                    "There is some uninitiallized parameter. Check your parameters and try again."
                )
                exit()

        # Freeze SalGAN
        if freeze:
            for child in self.salgan.children():
                for param in child.parameters():
                    param.requires_grad = False
示例#24
0
import torch.nn as nn
from torch.nn.modules.upsampling import Upsample

from tlkit.utils import load_state_dict_from_path
from .superposition import HashConv2d, ProjectedConv2d
from .basic_models import zero_fn, ScaleLayer

upsampler = Upsample(scale_factor=2, mode='nearest')


def load_submodule(model_class,
                   model_weights_path,
                   model_kwargs,
                   backup_fn=zero_fn):
    # If there is a model, use it! If there is initialization, use it! If neither, use backup_fn
    if model_class is not None:
        model = model_class(**model_kwargs)
        if model_weights_path is not None:
            model, _ = load_state_dict_from_path(model, model_weights_path)
    else:
        model = backup_fn
        assert model_weights_path is None, 'cannot have weights without model'
    return model


def _make_layer(in_channels,
                out_channels,
                num_groups=2,
                kernel_size=3,
                stride=1,
                padding=0,
示例#25
0
    def __init__(self):
        super(Scanpath_based_Attention_module, self).__init__()
        Based_Attention_Module = based_AM
        soft_sam = SpatialSoftArgmax2d(normalized_coordinates=False)
        self.soft_sam = soft_sam
        self.encoder = torch.nn.Sequential(*Based_Attention_Module)
        self.attention_module = torch.nn.Sequential(*[
            Downsample(kernel_size=2),
            Conv2d(512, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Downsample(kernel_size=2),
            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(128, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
            Upsample(scale_factor=4, mode='nearest')
        ])

        decoder_list = [
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU()
            #Upsample(scale_factor=2, mode='nearest'),
        ]

        decoder_list_hm = [
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        self.decoder_hm = torch.nn.Sequential(*decoder_list_hm)

        self.decoder = torch.nn.Sequential(*decoder_list)
        self.aux = torch.nn.Sequential(*[
            #Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1,
                                                                         1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1,
                                                                         1)),
            ReLU(),
            Conv2d(256, 100, kernel_size=(3, 3), stride=(1, 1), padding=(1,
                                                                         1)),
            ReLU(),
            Conv2d(100, 100, kernel_size=(3, 3), stride=(1, 1), padding=(1,
                                                                         1)),
            ReLU()
        ])

        for name, param in self.aux.named_parameters():
            if "weight" in name:
                nn.init.xavier_normal_(param)
            elif "bias" in name:
                nn.init.constant_(param, 150.0)

        print("Model initialized, Sal_based_Attention_module")
    def __init__(self, use_gpu=True):
        super(Sal_global_Attention, self).__init__()

        self.use_gpu = use_gpu
        # Create encoder based on VGG16 architecture as pointed on salgan architecture
        # Change just 4,5 th maxpooling lyer to 4 scale instead of 2
        Global_Attention_Encoder = global_attention

        # select only convolutional layers first 5 conv blocks ,cahnge maxpooling=> enlarge receptive field
        # each neuron on bottelneck will see (580,580) all viewports  ,
        # input (576,288) , features numbers on bottelneck (9*4)*512, exclude last maxpooling
        encoder = torch.nn.Sequential(*Global_Attention_Encoder)

        # define decoder based on VGG16 (inverse order and Upsampling layers , chose nearest mode)
        decoder_list = [
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        decoder = torch.nn.Sequential(*decoder_list)

        # aggreegate the full architecture encoder-decoder of Sal_global_Attention
        self.Sal_global_Attention = torch.nn.Sequential(
            *(list(encoder.children()) + list(decoder.children())))

        print("Model initialized, Sal_global_Attention")
        print("architecture len :", str(len(self.Sal_global_Attention)))
示例#27
0
    def __init__(self, in_nc=3, out_nc=3, N=8, S=8, upscale=4):
        super(ORDSRModel, self).__init__()
        self.upscale = upscale
        self.N = N
        self.S = S
        self.upsampling = Upsample(scale_factor=self.upscale, mode='bicubic')

        # ================================ Extract Shallow DCT Features ================================ #
        self.DCTTrans = DCTConv(stride=self.S, padding=0, blocksize=self.N)

        # ================================ Learn the High DCT Features ================================ #
        self.conv0 = nn.Conv2d(in_channels=64,
                               out_channels=64,
                               kernel_size=5,
                               stride=1,
                               padding=2,
                               bias=True)
        self.relu0 = nn.LeakyReLU(0.2, inplace=True)

        self.conv1 = nn.Conv2d(in_channels=60,
                               out_channels=64,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=True)
        self.relu1 = nn.LeakyReLU(0.2, inplace=True)

        self.conv2 = nn.Conv2d(in_channels=64,
                               out_channels=64,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=True)
        self.relu2 = nn.LeakyReLU(0.2, inplace=True)

        self.conv3 = nn.Conv2d(in_channels=64,
                               out_channels=64,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=True)
        self.relu3 = nn.LeakyReLU(0.2, inplace=True)

        self.conv4 = nn.Conv2d(in_channels=64,
                               out_channels=64,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=True)
        self.relu4 = nn.LeakyReLU(0.2, inplace=True)

        self.conv5 = nn.Conv2d(in_channels=64,
                               out_channels=64,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=True)
        self.relu5 = nn.LeakyReLU(0.2, inplace=True)

        self.conv6 = nn.Conv2d(in_channels=64,
                               out_channels=64,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=True)
        self.relu6 = nn.LeakyReLU(0.2, inplace=True)

        self.conv7 = nn.Conv2d(in_channels=64,
                               out_channels=64,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=True)
        self.relu7 = nn.LeakyReLU(0.2, inplace=True)

        self.conv8 = nn.Conv2d(in_channels=64,
                               out_channels=64,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=True)
        self.relu8 = nn.LeakyReLU(0.2, inplace=True)

        self.conv9 = nn.Conv2d(in_channels=64,
                               out_channels=64,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               bias=True)
        self.relu9 = nn.LeakyReLU(0.2, inplace=True)

        self.conv10 = nn.Conv2d(in_channels=64,
                                out_channels=64,
                                kernel_size=3,
                                stride=1,
                                padding=1,
                                bias=True)
        self.relu10 = nn.LeakyReLU(0.2, inplace=True)

        self.conv11 = nn.Conv2d(in_channels=64,
                                out_channels=64,
                                kernel_size=3,
                                stride=1,
                                padding=1,
                                bias=True)
        self.relu11 = nn.LeakyReLU(0.2, inplace=True)

        self.conv12 = nn.Conv2d(in_channels=64,
                                out_channels=64,
                                kernel_size=3,
                                stride=1,
                                padding=1,
                                bias=True)
        self.relu12 = nn.LeakyReLU(0.2, inplace=True)

        self.conv13 = nn.Conv2d(in_channels=64,
                                out_channels=64,
                                kernel_size=3,
                                stride=1,
                                padding=1,
                                bias=True)
        self.relu13 = nn.LeakyReLU(0.2, inplace=True)

        self.conv14 = nn.Conv2d(in_channels=64,
                                out_channels=60,
                                kernel_size=3,
                                stride=1,
                                padding=1,
                                bias=True)
        self.relu14 = nn.LeakyReLU(0.2, inplace=True)
示例#28
0
    def __init__(self):
        super(Sal_based_Attention_module, self).__init__()

        # Create encoder based on VGG16 architecture as pointed on salgan architecture and apply aforementionned changes
        Based_Attention_Module = based_AM

        # select only first 5 conv blocks , here we keep same receptive field of VGG 212*212
        # each neuron on bottelneck will see just (244,244) viewport during sliding  ,
        # input (640,320) , features numbers on bottelneck 40*20*512, exclude last maxpooling of salgan ,receptive
        # features number on AM boottlneck 10*5*128
        # attentioin moduels receptive field enlarged (676,676)
        self.encoder = torch.nn.Sequential(*Based_Attention_Module)
        self.attention_module = torch.nn.Sequential(*[
            Downsample(kernel_size=2),
            Conv2d(512, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Downsample(kernel_size=2),
            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(128, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
            Upsample(scale_factor=4, mode='nearest')
        ])

        #self.reshape = Reshape(-1,512,40,20)

        # define decoder based on VGG16 (inverse order and Upsampling layers , chose nearest mode)
        decoder_list = [
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        self.decoder = torch.nn.Sequential(*decoder_list)

        print("Model initialized, Sal_based_Attention_module")
示例#29
0
def mae_features(config_file_path, gpu_ids, dataroot, data_origin):
    
    soft_fdr = os.path.join(dataroot, 'mae_features_' + data_origin)
    
    if not os.path.exists(soft_fdr):
        os.makedirs(soft_fdr)

    # load experiment setting
    with open(config_file_path, 'r') as stream:
        config = yaml.load(stream, Loader=yaml.FullLoader)
    
    # activate GPUs
    config['gpu_ids'] = gpu_ids
    gpu = int(gpu_ids)
    
    # get data_loaders
    cfg_test_loader = config['test_dataloader']
    cfg_test_loader['dataset_args']['dataroot'] = dataroot
    test_loader = trainer_util.get_dataloader(cfg_test_loader['dataset_args'], cfg_test_loader['dataloader_args'])
    
    class VGG19(torch.nn.Module):
        def __init__(self, requires_grad=False):
            super().__init__()
            vgg_pretrained_features = torchvision.models.vgg19(pretrained=True).features
    
            self.slice1 = torch.nn.Sequential()
            self.slice2 = torch.nn.Sequential()
            self.slice3 = torch.nn.Sequential()
            self.slice4 = torch.nn.Sequential()
            self.slice5 = torch.nn.Sequential()
            for x in range(2):
                self.slice1.add_module(str(x), vgg_pretrained_features[x])
            for x in range(2, 7):
                self.slice2.add_module(str(x), vgg_pretrained_features[x])
            for x in range(7, 12):
                self.slice3.add_module(str(x), vgg_pretrained_features[x])
            for x in range(12, 21):
                self.slice4.add_module(str(x), vgg_pretrained_features[x])
            for x in range(21, 30):
                self.slice5.add_module(str(x), vgg_pretrained_features[x])
            if not requires_grad:
                for param in self.parameters():
                    param.requires_grad = False
    
        def forward(self, X):
            h_relu1 = self.slice1(X)
            h_relu2 = self.slice2(h_relu1)
            h_relu3 = self.slice3(h_relu2)
            h_relu4 = self.slice4(h_relu3)
            h_relu5 = self.slice5(h_relu4)
            out = [h_relu1, h_relu2, h_relu3, h_relu4, h_relu5]
            return out
        
    from  torch.nn.modules.upsampling import Upsample
    up5 = Upsample(scale_factor=16, mode='bicubic')
    up4 = Upsample(scale_factor=8, mode='bicubic')
    up3 = Upsample(scale_factor=4, mode='bicubic')
    up2 = Upsample(scale_factor=2, mode='bicubic')
    up1 = Upsample(scale_factor=1, mode='bicubic')
    to_pil = ToPILImage()
    
    # Going through visualization loader
    weights = [1.0/32, 1.0/16, 1.0/8, 1.0/4, 1.0]
    vgg = VGG19().cuda(gpu)
    
    with torch.no_grad():
        for i, data_i in enumerate(test_loader):
            print('Generating image %i out of %i'%(i+1, len(test_loader)))
            img_name = os.path.basename(data_i['original_path'][0])
            original = data_i['original'].cuda(gpu)
            synthesis = data_i['synthesis'].cuda(gpu)
            
            x_vgg, y_vgg = vgg(original), vgg(synthesis)
            feat5 = torch.mean(torch.abs(x_vgg[4] - y_vgg[4]), dim=1).unsqueeze(1)
            feat4 = torch.mean(torch.abs(x_vgg[3] - y_vgg[3]), dim=1).unsqueeze(1)
            feat3 = torch.mean(torch.abs(x_vgg[2] - y_vgg[2]), dim=1).unsqueeze(1)
            feat2 = torch.mean(torch.abs(x_vgg[1] - y_vgg[1]), dim=1).unsqueeze(1)
            feat1 = torch.mean(torch.abs(x_vgg[0] - y_vgg[0]), dim=1).unsqueeze(1)
            
            img_5 = up5(feat5)
            img_4 = up4(feat4)
            img_3 = up3(feat3)
            img_2 = up2(feat2)
            img_1 = up1(feat1)
            
            combined = weights[0] * img_1 + weights[1] * img_2 + weights[2] * img_3 + weights[3] * img_4 + weights[
                4] * img_5
            min_v = torch.min(combined.squeeze())
            max_v = torch.max(combined.squeeze())
            combined = (combined.squeeze() - min_v) / (max_v - min_v)
    
            combined = to_pil(combined.cpu())
            pred_name = 'mea_' + img_name
            combined.save(os.path.join(soft_fdr, pred_name))
示例#30
0
import matplotlib.pylab as plt
import seaborn as sns
from tqdm import trange

import torch
import torchvision
from torch.nn.modules.upsampling import Upsample

from lucent.optvis import render  #, param, transform, objectives

from rosettastone.utils import show_grid

# TODO: as parameter
SIZE = 227  #224

UPSAMPLE = Upsample(size=(SIZE, SIZE), mode='bilinear', align_corners=False)


def crop_by_max_activation(raw_acts,
                           img,
                           quantile_threshold=0.1,
                           shape=(SIZE, SIZE)):
    upsampled_acts = UPSAMPLE(raw_acts[
        None,
        None, :, :]).cpu().numpy()  #upsample(raw_acts.cpu().numpy(), shape)
    mask_threshold = np.quantile(upsampled_acts, 1 - quantile_threshold)
    mask = upsampled_acts > mask_threshold
    if not mask.any():
        return None

    _, _, rows, cols = np.nonzero(mask)