Exemplo n.º 1
0
    def __init__(
        self,
        patch_size: int = 32,
        kernel_type: str = 'polar',  # 'cart' 'polar'
        whitening: str = 'pcawt',  # 'lw', 'pca', 'pcaws', 'pcawt
        training_set: str = 'liberty',  # 'liberty', 'notredame', 'yosemite'
        output_dims: int = 128,
    ) -> None:
        super().__init__()

        relative: bool = kernel_type == 'polar'
        sigma: float = 1.4 * (patch_size / 64)

        # Sequence of modules.
        smoothing = GaussianBlur2d((5, 5), (sigma, sigma), 'replicate')
        gradients = MKDGradients()
        ori = EmbedGradients(patch_size=patch_size, relative=relative)
        ese = ExplicitSpacialEncoding(kernel_type=kernel_type,
                                      fmap_size=patch_size,
                                      in_dims=ori.kernel.d)
        wh = Whitening(whitening,
                       load_whitening_model(kernel_type, training_set),
                       in_dims=ese.odims,
                       output_dims=output_dims)

        self.features = nn.Sequential(smoothing, gradients, ori, ese, wh)
Exemplo n.º 2
0
 def monochrome_model(gaussian_p, solarize_p):
     return nn.Sequential(
         RandomResizedCrop((image_size, image_size),
                           interpolation="BICUBIC"), RandomHorizontalFlip(),
         RandomApply(GaussianBlur2d(get_kernel_size(image_size),
                                    (0.1, 2.0)),
                     p=gaussian_p), RandomSolarize(0, 0, p=solarize_p))
Exemplo n.º 3
0
 def color_model(gaussian_p, solarize_p):
     return nn.Sequential(
         RandomResizedCrop((image_size, image_size),
                           interpolation="BICUBIC"), RandomHorizontalFlip(),
         ColorJitter(0.4, 0.4, 0.2, 0.1, p=0.8), RandomGrayscale(p=0.2),
         RandomApply(GaussianBlur2d(get_kernel_size(image_size),
                                    (0.1, 2.0)),
                     p=gaussian_p), RandomSolarize(0, 0, p=solarize_p))
Exemplo n.º 4
0
    def __init__(
        self,
        patch_size: int = 32,
        kernel_type: str = 'concat',
        whitening: str = 'pcawt',
        training_set: str = 'liberty',
        output_dims: int = 128,
    ) -> None:
        super().__init__()

        self.patch_size: int = patch_size
        self.kernel_type: str = kernel_type
        self.whitening: str = whitening
        self.training_set: str = training_set

        self.sigma = 1.4 * (patch_size / 64)
        self.smoothing = GaussianBlur2d((5, 5), (self.sigma, self.sigma),
                                        'replicate')
        self.gradients = MKDGradients()
        # This stupid thing needed for jitting...
        polar_s: str = 'polar'
        cart_s: str = 'cart'
        self.parametrizations = [
            polar_s, cart_s
        ] if self.kernel_type == 'concat' else [self.kernel_type]

        # Initialize cartesian/polar embedding with absolute/relative gradients.
        self.odims: int = 0
        relative_orientations = {polar_s: True, cart_s: False}
        self.feats = {}
        for parametrization in self.parametrizations:
            gradient_embedding = EmbedGradients(
                patch_size=patch_size,
                relative=relative_orientations[parametrization])
            spatial_encoding = ExplicitSpacialEncoding(
                kernel_type=parametrization,
                fmap_size=patch_size,
                in_dims=gradient_embedding.kernel.d)

            self.feats[parametrization] = nn.Sequential(
                gradient_embedding, spatial_encoding)
            self.odims += spatial_encoding.odims
        # Compute true output_dims.
        self.output_dims: int = min(output_dims, self.odims)

        # Load supervised(lw)/unsupervised(pca) model trained on training_set.
        if self.whitening is not None:
            whitening_models = torch.hub.load_state_dict_from_url(
                urls[self.kernel_type],
                map_location=lambda storage, loc: storage)
            whitening_model = whitening_models[training_set]
            self.whitening_layer = Whitening(whitening,
                                             whitening_model,
                                             in_dims=self.odims,
                                             output_dims=self.output_dims)
            self.odims = self.output_dims
        self.eval()
Exemplo n.º 5
0
    def __init__(self, in_features, sigma=1, kernel_size=3):

        super().__init__()
        self.in_features = in_features
        self.sigma = sigma
        self.kernel_size = kernel_size
        self.GaussianBlur2d = GaussianBlur2d(sigma=(self.sigma, self.sigma),
                                             kernel_size=(self.kernel_size,
                                                          self.kernel_size),
                                             border_type='constant')
Exemplo n.º 6
0
    def __init__(self):
        super(Edge, self).__init__()

        from kornia.filters import Laplacian, Sobel, GaussianBlur2d

        layers = []
        # layers.append(nn.Conv2d(1, 1, 3, padding = 1, bias=False))
        # layers.append(nn.Conv2d(1, 1, 3, padding = 1, bias=False))
        layers.append(GaussianBlur2d((5, 5), (1.5, 1.5)))
        layers.append(Sobel(normalized=True))
        # layers.append(Laplacian(7, normalized=True))
        self.edge = nn.Sequential(*layers)
Exemplo n.º 7
0
    def __init__(self,
                 kernel_size: int or tuple = 3,
                 sigma: float or tuple = 1.,
                 border_type: str = 'reflect'):
        super(MaskedGaussianBlur, self).__init__()

        if isinstance(kernel_size, int):
            self.kernel_size = (kernel_size, kernel_size)
        else:
            assert isinstance(kernel_size, tuple)
            self.kernel_size = kernel_size
        if isinstance(sigma, float):
            self.sigma = (sigma, sigma)
        else:
            assert isinstance(sigma, tuple)
            self.sigma = sigma
        from kornia.filters import GaussianBlur2d
        self.gblur = GaussianBlur2d(kernel_size=self.kernel_size,
                                    sigma=(sigma, sigma),
                                    border_type=border_type)
Exemplo n.º 8
0
    def __init__(self, input_shape, s=1.0, apply_transforms=None):

        assert len(input_shape) == 3, "input_shape should be (H, W, C)"

        self.input_shape = input_shape
        self.H, self.W, self.C = input_shape[0], input_shape[1], input_shape[2]
        self.s = s
        self.apply_transforms = apply_transforms

        if self.apply_transforms is None:
            kernel_size = int(0.1 * self.H)
            sigma = self._get_sigma()

            self.apply_transforms = KorniaCompose([
                RandomResizedCrop(size=(self.H, self.W), scale=(0.08, 1.0)),
                RandomHorizontalFlip(p=0.5),
                ColorJitter(0.8 * self.s, 0.8 * self.s, 0.8 * self.s, 0.2 * self.s),
                RandomGrayscale(p=0.2),
                GaussianBlur2d(kernel_size=(kernel_size, kernel_size),
                               sigma=(sigma, sigma))
            ])
 def gaussian_blur(self, p: float = 1.0) -> TransformType:
     return RandomApply(GaussianBlur2d((3, 3), (1.5, 1.5)), p)
Exemplo n.º 10
0
    def __init__(
            self,
            image_shape,
            output_size,
            n_atoms,
            dueling,
            jumps,
            spr,
            augmentation,
            target_augmentation,
            eval_augmentation,
            dynamics_blocks,
            norm_type,
            noisy_nets,
            aug_prob,
            classifier,
            imagesize,
            time_offset,
            local_spr,
            global_spr,
            momentum_encoder,
            shared_encoder,
            distributional,
            dqn_hidden_size,
            momentum_tau,
            renormalize,
            renormalize_type,
            q_l1_type,
            dropout,
            final_classifier,
            model_rl,
            noisy_nets_std,
            residual_tm,
            pred_hidden_ratio,
            encoder_type,
            transition_type,
            conv_proj_channel,
            proj_hidden_size,
            gru_input_size,
            gru_proj_size,
            ln_ratio,
            use_maxpool=False,
            channels=None,  # None uses default.
            kernel_sizes=None,
            strides=None,
            paddings=None,
            framestack=4,
    ):
        """Instantiates the neural network according to arguments; network defaults
        stored within this method."""
        super().__init__()
        self.noisy = noisy_nets
        self.time_offset = time_offset
        self.aug_prob = aug_prob
        self.classifier_type = classifier

        self.distributional = distributional
        n_atoms = 1 if not self.distributional else n_atoms
        self.dqn_hidden_size = dqn_hidden_size

        self.transforms = []
        self.eval_transforms = []

        self.uses_augmentation = False
        for aug in augmentation:
            if aug == "affine":
                transformation = RandomAffine(5, (.14, .14), (.9, 1.1), (-5, 5))
                eval_transformation = nn.Identity()
                self.uses_augmentation = True
            elif aug == "crop":
                transformation = RandomCrop((84, 84))
                # Crashes if aug-prob not 1: use CenterCrop((84, 84)) or Resize((84, 84)) in that case.
                eval_transformation = CenterCrop((84, 84))
                self.uses_augmentation = True
                imagesize = 84
            elif aug == "rrc":
                transformation = RandomResizedCrop((100, 100), (0.8, 1))
                eval_transformation = nn.Identity()
                self.uses_augmentation = True
            elif aug == "blur":
                transformation = GaussianBlur2d((5, 5), (1.5, 1.5))
                eval_transformation = nn.Identity()
                self.uses_augmentation = True
            elif aug == "shift":
                transformation = nn.Sequential(nn.ReplicationPad2d(4), RandomCrop((84, 84)))
                eval_transformation = nn.Identity()
            elif aug == "intensity":
                transformation = Intensity(scale=0.05)
                eval_transformation = nn.Identity()
            elif aug == "none":
                transformation = eval_transformation = nn.Identity()
            else:
                raise NotImplementedError()
            self.transforms.append(transformation)
            self.eval_transforms.append(eval_transformation)

        self.dueling = dueling
        f, c = image_shape[:2]
        in_channels = np.prod(image_shape[:2])

        if encoder_type == 'conv2d':
            self.conv = Conv2dModel(
                in_channels=in_channels,
                channels=[32, 64, 64],
                kernel_sizes=[8, 4, 3],
                strides=[4, 2, 1],
                paddings=[0, 0, 0],
                use_maxpool=False,
                dropout=dropout,
                conv_proj_channel=conv_proj_channel,
            )
        elif encoder_type == 'resnet18':
            self.conv = resnet18()
        else:
            raise NotImplementedError

        fake_input = torch.zeros(1, f*c, imagesize, imagesize)
        fake_output = self.conv(fake_input)


        self.hidden_size = fake_output.shape[1]
        self.pixels = fake_output.shape[-1]*fake_output.shape[-2]
        print("Spatial latent size is {}".format(fake_output.shape[1:]))

        if proj_hidden_size:
            self.conv_proj = nn.Sequential(
                nn.Flatten(1, -1),
                nn.Linear(self.hidden_size * self.pixels, proj_hidden_size),
                nn.LayerNorm(proj_hidden_size),
                nn.ReLU(),
                nn.Dropout(dropout),
            )
        else:
            self.conv_proj = nn.Identity()

        self.jumps = jumps
        self.model_rl = model_rl
        self.use_spr = spr
        self.target_augmentation = target_augmentation
        self.eval_augmentation = eval_augmentation
        self.num_actions = output_size
        self.transition_type = transition_type

        if dueling:
            self.head = DQNDistributionalDuelingHeadModel(self.hidden_size,
                                                          output_size,
                                                          hidden_size=self.dqn_hidden_size,
                                                          pixels=self.pixels,
                                                          noisy=self.noisy,
                                                          n_atoms=n_atoms,
                                                          std_init=noisy_nets_std,
                                                          proj_hidden_size=proj_hidden_size)
        else:
            self.head = DQNDistributionalHeadModel(self.hidden_size,
                                                   output_size,
                                                   hidden_size=self.dqn_hidden_size,
                                                   pixels=self.pixels,
                                                   noisy=self.noisy,
                                                   n_atoms=n_atoms,
                                                   std_init=noisy_nets_std)

        if self.jumps > 0:
            repr_size = proj_hidden_size if proj_hidden_size else (self.pixels * self.hidden_size)

            if transition_type == 'gru':
                self.dynamics_model = GRUModel(
                    input_size = gru_input_size,
                    repr_size = repr_size,
                    proj_size = gru_proj_size,
                    num_layers = 1,
                    num_actions = self.num_actions,
                    renormalize=renormalize,
                    renormalize_type=renormalize_type,
                    dropout=dropout
                )
            else:
                self.dynamics_model = TransitionModel(channels=self.hidden_size,
                                                      num_actions=output_size,
                                                      pixels=self.pixels,
                                                      hidden_size=self.hidden_size,
                                                      limit=1,
                                                      blocks=dynamics_blocks,
                                                      norm_type=norm_type,
                                                      renormalize=renormalize,
                                                      residual=residual_tm)
        else:
            self.dynamics_model = nn.Identity()

        self.renormalize = renormalize
        self.renormalize_type = renormalize_type
        self.ln_ratio = ln_ratio

        if renormalize_type == 'train_ln':
            self.renormalize_ln = nn.LayerNorm(repr_size)
        else:
            self.renormalize_ln = nn.Identity()

        if self.use_spr:
            self.local_spr = local_spr
            self.global_spr = global_spr
            self.momentum_encoder = momentum_encoder
            self.momentum_tau = momentum_tau
            self.shared_encoder = shared_encoder
            assert not (self.shared_encoder and self.momentum_encoder)

            # in case someone tries something silly like --local-spr 2
            self.num_sprs = int(bool(self.local_spr)) + \
                            int(bool(self.global_spr))

            if self.local_spr:
                self.local_final_classifier = nn.Identity()
                if self.classifier_type == "mlp":
                    self.local_classifier = nn.Sequential(nn.Linear(self.hidden_size,
                                                                    self.hidden_size),
                                                          nn.BatchNorm1d(self.hidden_size),
                                                          nn.ReLU(),
                                                          nn.Linear(self.hidden_size,
                                                                    self.hidden_size))
                elif self.classifier_type == "bilinear":
                    self.local_classifier = nn.Linear(self.hidden_size, self.hidden_size)
                elif self.classifier_type == "none":
                    self.local_classifier = nn.Identity()
                if final_classifier == "mlp":
                    self.local_final_classifier = nn.Sequential(nn.Linear(self.hidden_size, 2*self.hidden_size),
                                                                nn.BatchNorm1d(2*self.hidden_size),
                                                                nn.ReLU(),
                                                                nn.Linear(2*self.hidden_size,
                                                                    self.hidden_size))
                elif final_classifier == "linear":
                    self.local_final_classifier = nn.Linear(self.hidden_size, self.hidden_size)
                else:
                    self.local_final_classifier = nn.Identity()

                self.local_target_classifier = self.local_classifier
            else:
                self.local_classifier = self.local_target_classifier = nn.Identity()
            if self.global_spr:
                self.global_final_classifier = nn.Identity()
                if self.classifier_type == "mlp":
                    self.global_classifier = nn.Sequential(
                                                nn.Flatten(-3, -1),
                                                nn.Linear(self.pixels*self.hidden_size, 512),
                                                nn.BatchNorm1d(512),
                                                nn.ReLU(),
                                                nn.Linear(512, 256)
                                                )
                    self.global_target_classifier = self.global_classifier
                    global_spr_size = 256
                elif self.classifier_type == "q_l1":
                    self.global_classifier = QL1Head(self.head, dueling=dueling, type=q_l1_type)
                    global_spr_size = self.global_classifier.out_features
                    self.global_target_classifier = self.global_classifier
                elif self.classifier_type == "q_l2":
                    self.global_classifier = nn.Sequential(self.head, nn.Flatten(-2, -1))
                    self.global_target_classifier = self.global_classifier
                    global_spr_size = 256
                elif self.classifier_type == "bilinear":
                    self.global_classifier = nn.Sequential(nn.Flatten(-3, -1),
                                                           nn.Linear(self.hidden_size*self.pixels,
                                                                     self.hidden_size*self.pixels))
                    self.global_target_classifier = nn.Flatten(-3, -1)
                elif self.classifier_type == "none":
                    self.global_classifier = nn.Flatten(-3, -1)
                    self.global_target_classifier = nn.Flatten(-3, -1)

                    global_spr_size = self.hidden_size*self.pixels
                if final_classifier == "mlp":
                    global_final_hidden_size = int(global_spr_size * pred_hidden_ratio)
                    self.global_final_classifier = nn.Sequential(
                        nn.Linear(global_spr_size, global_final_hidden_size),
                        nn.BatchNorm1d(global_final_hidden_size),
                        nn.ReLU(),
                        nn.Linear(global_final_hidden_size, global_spr_size)
                    )
                elif final_classifier == "linear":
                    self.global_final_classifier = nn.Sequential(
                        nn.Linear(global_spr_size, global_spr_size),
                    )
                elif final_classifier == "none":
                    self.global_final_classifier = nn.Identity()
            else:
                self.global_classifier = self.global_target_classifier = nn.Identity()

            if self.momentum_encoder:
                self.target_encoder = copy.deepcopy(self.conv)
                self.target_encoder_proj = copy.deepcopy(self.conv_proj)
                self.target_renormalize_ln = copy.deepcopy(self.renormalize_ln)
                self.global_target_classifier = copy.deepcopy(self.global_target_classifier)
                self.local_target_classifier = copy.deepcopy(self.local_target_classifier)
                for param in (list(self.target_encoder.parameters())
                            + list(self.target_encoder_proj.parameters())
                            + list(self.target_renormalize_ln.parameters())
                            + list(self.global_target_classifier.parameters())
                            + list(self.local_target_classifier.parameters())):
                    param.requires_grad = False

            elif not self.shared_encoder:
                # Use a separate target encoder on the last frame only.
                self.global_target_classifier = copy.deepcopy(self.global_target_classifier)
                self.local_target_classifier = copy.deepcopy(self.local_target_classifier)
                if self.stack_actions:
                    input_size = c - 1
                else:
                    input_size = c
                self.target_encoder = Conv2dModel(in_channels=input_size,
                                                  channels=[32, 64, 64],
                                                  kernel_sizes=[8, 4, 3],
                                                  strides=[4, 2, 1],
                                                  paddings=[0, 0, 0],
                                                  use_maxpool=False,
                                                  )

            elif self.shared_encoder:
                self.target_encoder = self.conv

        print("Initialized model with {} parameters".format(count_parameters(self)))