Exemplo n.º 1
0
    def __init__(self, in_size, num_classes, **kwargs):
        super().__init__()

        c, h, w = in_size

        self.blocks = nn.Sequential(
            nn.Conv2d(c, 32, kernel_size=1),
            MobBlock(32),
            nn.MaxPool2d(kernel_size=2),  # 16x16
            nn.Conv2d(32, 64, kernel_size=1),
            MobBlock(64),
            nn.MaxPool2d(kernel_size=2),  # 8x8
            nn.Conv2d(64, 128, kernel_size=1),
            MobBlock(128),
            nn.MaxPool2d(kernel_size=2),  # 4x4
            MobBlock(128),
            nn.Conv2d(128, 320, kernel_size=1),
            # nn.MaxPool2d(kernel_size=2),  # 2x2
            # ResBlock(64), nn.Conv2d(64, 112, kernel_size=1),
            # nn.MaxPool2d(kernel_size=2),  # 4x4
            # ResBlock(112, kernel=5), nn.Conv2d(112, 192, kernel_size=1),
            # nn.MaxPool2d(kernel_size=2), # 2x2
            # ResBlock(192), nn.Conv2d(192, 320, kernel_size=1),
            # nn.MaxPool2d(kernel_size=2), # 1x1
            util.Flatten(),
            nn.Linear(320 * 4 * 4, num_classes))
Exemplo n.º 2
0
    def __init__(self, insize, num_classes, mul=1.0, **kwargs):
        super().__init__()
        c, h, w = insize

        self.prep = nn.Sequential(
            conv((c, h, w),
                 64,
                 kernel_size=3,
                 padding=1,
                 bias=False,
                 sparse=False,
                 **kwargs), nn.BatchNorm2d(64), nn.ReLU())

        self.layer0 = DavidLayer((64, h, w), 128, sparse=False,
                                 **kwargs)  # one maxpool

        self.mid = nn.Sequential(
            conv((128, h // 2, w // 2),
                 256,
                 kernel_size=3,
                 padding=1,
                 bias=False,
                 sparse=True,
                 **kwargs), nn.BatchNorm2d(256), nn.ReLU(),
            nn.MaxPool2d(kernel_size=2))

        self.layer1 = DavidLayer((256, h // 4, w // 4),
                                 512,
                                 sparse=False,
                                 **kwargs)  # one maxpool

        self.head = nn.Sequential(  # h//8, w//8
            nn.MaxPool2d(kernel_size=4), util.Flatten(),
            nn.Linear(512 * h // 32 * w // 32, num_classes),
            util.Lambda(lambda x: x * mul))
Exemplo n.º 3
0
    def __init__(self,
                 num_classes: int,
                 num_groups: int = 3,
                 N: int = 3,
                 k: int = 6,
                 drop_p: float = 0.0,
                 start_nf: int = 16,
                 n_in_channels: int = 3):
        super().__init__()

        n_channels = [start_nf]
        for i in range(num_groups):
            n_channels.append(start_nf * (2**i) * k)

        layers = [nn.Conv2d(n_in_channels, n_channels[0], 3, 1,
                            padding=1)]  # conv1
        for i in range(num_groups):
            layers += _make_group(N, n_channels[i], n_channels[i + 1],
                                  WideBlock, (1 if i == 0 else 2), drop_p)

        layers += [
            nn.BatchNorm2d(n_channels[num_groups]),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d(1),
            util.Flatten(),
            nn.Linear(n_channels[num_groups], num_classes)
        ]

        self.features = nn.Sequential(*layers)
Exemplo n.º 4
0
    def __init__(self, in_size, num_classes, **kwargs):
        super().__init__()

        c, h, w = in_size

        #self.layer0 = nn.Conv2d(c, c, kernel_size=3, padding=1)

        self.sparse = Convolution((c, h, w), (32, h, w), **kwargs)
        # self.nsp = nn.Conv2d(c, 32, kernel_size=3, padding=1)

        self.blocks = nn.Sequential(
            nn.MaxPool2d(kernel_size=2),  # 16x16
            MobBlock(32),
            nn.Conv2d(32, 16, kernel_size=1),
            MobBlock(16),
            MobBlock(16),
            nn.Conv2d(16, 24, kernel_size=1),
            nn.MaxPool2d(kernel_size=2),  # 8x8
            MobBlock(24, kernel=5),
            MobBlock(24, kernel=5),
            nn.Conv2d(24, 40, kernel_size=1),
            nn.MaxPool2d(kernel_size=2),  # 4x4
            MobBlock(40),
            MobBlock(40),
            MobBlock(40),
            nn.Conv2d(40, 80, kernel_size=1),
            # nn.MaxPool2d(kernel_size=2),
            MobBlock(80, kernel=5),
            MobBlock(80, kernel=5),
            MobBlock(80, kernel=5),
            nn.Conv2d(80, 112, kernel_size=1),
            #nn.MaxPool2d(kernel_size=2),
            MobBlock(112, kernel=5),
            MobBlock(112, kernel=5),
            MobBlock(112, kernel=5),
            MobBlock(112, kernel=5),
            nn.Conv2d(112, 192, kernel_size=1),
            # nn.MaxPool2d(kernel_size=2),
            MobBlock(192),
            nn.Conv2d(192, 320, kernel_size=1),
            util.Flatten(),
            nn.Linear(320 * 4 * 4, num_classes))
Exemplo n.º 5
0
def prep(ci, hi, wi, pool=4, coord='none'):
    """
    Canonical preprocessing model (list of modules). Results in linear layer
    of HIDLIN units

    :return:
    """
    
    CFirst = nn.Conv2d if coord == 'none' else util.CConv2d
    CRest = util.CConv2d if coord == 'all' else nn.Conv2d
    
    activation = nn.ReLU()

    ch1, ch2, ch3 = 32, 64, 128

    #hid = max(1, floor(floor(wi / p1) / p2) * floor(floor(hi / p1) / p2)) * ch3
    hid = max(1.0, floor(wi/ (pool ** 3 * 2)) * floor(hi/ (pool ** 3 *2)) * ch3 )

    return [
        CFirst(ci, ch1, kernel_size=3, padding=1),
        activation,
        nn.MaxPool2d(kernel_size=pool),
        CRest(ch1, ch1, kernel_size=3, padding=1),
        activation,
        nn.MaxPool2d(kernel_size=pool),
        CRest(ch1, ch2, kernel_size=3, padding=1),
        activation,
        nn.MaxPool2d(kernel_size=pool),
        CRest(ch2, ch2, kernel_size=3, padding=1),
        activation,
        nn.MaxPool2d(kernel_size=2),
        CRest(ch2, ch3, kernel_size=3, padding=1),
        activation,
        CRest(ch3, ch3, kernel_size=3, padding=1),
        activation,
        util.Flatten(),
        nn.Linear(hid, HIDLIN),
        activation,
        nn.Linear(HIDLIN, HIDLIN)
    ]
Exemplo n.º 6
0
    def __init__(self, data_size, latent_size=128, depth=3):
        super().__init__()

        c, h, w = data_size
        cs = [c] + [2**(d+4) for d in range(depth)]

        div = 2 ** depth

        modules = []

        for d in range(depth):
            modules += [
                nn.Conv2d(cs[d], cs[d+1], 3, padding=1), nn.ReLU(),
                nn.Conv2d(cs[d+1], cs[d+1], 3, padding=1), nn.ReLU(),
                nn.MaxPool2d((2, 2))
            ]

        modules += [
            util.Flatten(),
            nn.Linear(cs[-1] * (h//div) * (w//div), 1024), nn.ReLU(),
            nn.Linear(1024, latent_size) # encoder produces a cont. index tuple (ln -1 for the means, 1 for the sigma)
        ]

        self.encoder = nn.Sequential(*modules)
Exemplo n.º 7
0
    def __init__(self, in_size, num_classes, **kwargs):
        super().__init__()

        c, h, w = in_size

        self.sparse = Convolution((c, h, w), (32, h, w), **kwargs)
        self.spars1 = Convolution((32, h // 2, w // 2), (64, h, w), **kwargs)
        self.spars2 = Convolution((64, h // 4, w // 4), (128, h, w), **kwargs)

        self.blocks = nn.Sequential(
            self.sparse,
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),  # 16x16
            self.spars1,
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),  # 8x8
            self.spars2,
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),  # 4x4
            MobBlock(128),
            nn.Conv2d(128, 320, kernel_size=1),
            nn.MaxPool2d(kernel_size=2),  # 2x2
            util.Flatten(),
            nn.Linear(320 * 2 * 2, num_classes))
Exemplo n.º 8
0
    def __init__(self,
                 data_size,
                 latent_size=(5, 5, 128),
                 depth=3,
                 gadditional=2,
                 radditional=4,
                 region=0.2,
                 method='clamp',
                 sigma_scale=1.0,
                 min_sigma=0.01):
        super().__init__()

        self.method, self.gadditional, self.radditional = method, gadditional, radditional
        self.sigma_scale, self.min_sigma = sigma_scale, min_sigma

        # latent space
        self.latent = nn.Parameter(torch.randn(size=latent_size))
        self.region = [int(r * region) for r in latent_size[:-1]]

        ln = len(latent_size)
        emb_size = latent_size[-1]

        c, h, w = data_size

        cs = [c] + [2**(d + 4) for d in range(depth)]

        div = 2**depth

        modules = []

        for d in range(depth):
            modules += [
                nn.Conv2d(cs[d], cs[d + 1], 3, padding=1),
                nn.ReLU(),
                nn.Conv2d(cs[d + 1], cs[d + 1], 3, padding=1),
                nn.ReLU(),
                nn.MaxPool2d((2, 2))
            ]

        modules += [
            util.Flatten(),
            nn.Linear(cs[-1] * (h // div) * (w // div), 1024),
            nn.ReLU(),
            nn.Linear(
                1024, len(latent_size)
            )  # encoder produces a cont. index tuple (ln -1 for the means, 1 for the sigma)
        ]

        self.encoder = nn.Sequential(*modules)

        upmode = 'bilinear'
        cl = lambda x: int(math.ceil(x))

        modules = [
            nn.Linear(emb_size, cs[-1] * cl(h / div) * cl(w / div)),
            nn.ReLU(),
            util.Reshape((cs[-1], cl(h / div), cl(w / div)))
        ]

        for d in range(depth, 0, -1):
            modules += [
                nn.Upsample(scale_factor=2, mode=upmode),
                nn.ConvTranspose2d(cs[d], cs[d], 3, padding=1),
                nn.ReLU(),
                nn.ConvTranspose2d(cs[d], cs[d - 1], 3, padding=1),
                nn.ReLU()
            ]

        modules += [
            nn.ConvTranspose2d(c, c, (3, 3), padding=1),
            nn.Sigmoid(),
            util.Lambda(lambda x: x[:, :, :h, :w]
                        )  # crop out any extra pixels due to rounding errors
        ]
        self.decoder = nn.Sequential(*modules)

        self.smp = True
Exemplo n.º 9
0
def go(arg):

    if arg.seed < 0:
        seed = random.randint(0, 1000000)
        print('random seed: ', seed)
    else:
        torch.manual_seed(arg.seed)

    tbw = SummaryWriter(log_dir=arg.tb_dir)

    normalize = transforms.Compose([transforms.ToTensor()])

    if(arg.task=='mnist'):
        data = arg.data + os.sep + arg.task

        if arg.final:
            train = torchvision.datasets.MNIST(root=data, train=True, download=True, transform=normalize)
            trainloader = torch.utils.data.DataLoader(train, batch_size=arg.batch, shuffle=True, num_workers=2)

            test = torchvision.datasets.MNIST(root=data, train=False, download=True, transform=normalize)
            testloader = torch.utils.data.DataLoader(test, batch_size=arg.batch, shuffle=False, num_workers=2)

        else:
            NUM_TRAIN = 45000
            NUM_VAL = 5000
            total = NUM_TRAIN + NUM_VAL

            train = torchvision.datasets.MNIST(root=data, train=True, download=True, transform=normalize)

            trainloader = DataLoader(train, batch_size=arg.batch, sampler=util.ChunkSampler(0, NUM_TRAIN, total))
            testloader = DataLoader(train, batch_size=arg.batch, sampler=util.ChunkSampler(NUM_TRAIN, NUM_VAL, total))

        shape = (1, 28, 28)
        num_classes = 10

    elif (arg.task == 'image-folder-bw'):

        tr = transforms.Compose([transforms.Grayscale(), transforms.ToTensor()])

        if arg.final:
            train = torchvision.datasets.ImageFolder(root=arg.data + '/train/', transform=tr)
            test  = torchvision.datasets.ImageFolder(root=arg.data + '/test/', transform=tr)

            trainloader = DataLoader(train, batch_size=arg.batch, shuffle=True)
            testloader = DataLoader(train, batch_size=arg.batch, shuffle=True)

        else:

            NUM_TRAIN = 45000
            NUM_VAL = 5000
            total = NUM_TRAIN + NUM_VAL

            train = torchvision.datasets.ImageFolder(root=arg.data + '/train/', transform=tr)

            trainloader = DataLoader(train, batch_size=arg.batch, sampler=util.ChunkSampler(0, NUM_TRAIN, total))
            testloader = DataLoader(train, batch_size=arg.batch, sampler=util.ChunkSampler(NUM_TRAIN, NUM_VAL, total))


        for im, labels in trainloader:
            shape = im[0].size()
            break

        num_classes = 10

    else:
        raise Exception('Task name {} not recognized'.format(arg.task))

    activation = nn.ReLU()

    hyperlayer = None

    if arg.modelname == 'conv':

        base = prep(*shape, pool=arg.pool)

        model = nn.Sequential(*(
            base +
            [activation, nn.Linear(HIDLIN, num_classes),
            nn.Softmax()])
        )

        reinforce = False

    elif arg.modelname == 'reinforce':

        hyperlayer = ReinforceLayer(in_shape=shape, glimpses=arg.num_glimpses,
                glimpse_size=(28, 28),
                num_classes=num_classes, pool=arg.pool)

        model = nn.Sequential(
             hyperlayer,
             R(util.Flatten()),
             R(nn.Linear(28 * 28 * shape[0] * arg.num_glimpses, arg.hidden)),
             R(activation),
             R(nn.Linear(arg.hidden, num_classes)),
             R(nn.Softmax())
        )

        reinforce = True

    elif arg.modelname == 'ash':

        hyperlayer = BoxAttentionLayer(
            glimpses=arg.num_glimpses,
            in_size=shape, k=arg.k,
            gadditional=arg.gadditional, radditional=arg.radditional, region=(arg.region, arg.region),
            min_sigma=arg.min_sigma, pool=arg.pool
        )

        model = nn.Sequential(
             hyperlayer,
             util.Flatten(),
             nn.Linear(arg.k * arg.k * shape[0] * arg.num_glimpses, arg.hidden),
             activation,
             nn.Linear(arg.hidden, num_classes),
             nn.Softmax()
        )

        reinforce = False

    elif arg.modelname == 'quad':
        """
        Network with quadrangle attention (instead of bounding box).
        """

        hyperlayer = QuadAttentionLayer(
            glimpses=arg.num_glimpses,
            in_size=shape, k=arg.k,
            gadditional=arg.gadditional, radditional=arg.radditional, region=(arg.region, arg.region),
            min_sigma=arg.min_sigma, pool=arg.pool
        )

        model = nn.Sequential(
             hyperlayer,
             util.Flatten(),
             nn.Linear(arg.k * arg.k * shape[0] * arg.num_glimpses, arg.hidden),
             activation,
             nn.Linear(arg.hidden, num_classes),
             nn.Softmax()
        )

        reinforce = False

    elif arg.modelname == 'aff':
        """
        Network with affine tranformation  (instead of bounding box).
        """

        hyperlayer = AffineAttentionLayer(
            glimpses=arg.num_glimpses,
            in_size=shape, k=arg.k,
            gadditional=arg.gadditional, radditional=arg.radditional, region=(arg.region, arg.region),
            min_sigma=arg.min_sigma,
            scale=arg.stn_scale, pool=arg.pool
        )

        model = nn.Sequential(
             hyperlayer,
             util.Flatten(),
             nn.Linear(arg.k * arg.k * shape[0] * arg.num_glimpses, arg.hidden),
             activation,
             nn.Linear(arg.hidden, num_classes),
             nn.Softmax()
        )

        reinforce = False


    elif arg.modelname == 'aff-conv':
        """
        Network with affine tranformation attention (instead of bounding box).
        """

        hyperlayer = AffineAttentionLayer(
            glimpses=arg.num_glimpses,
            in_size=shape, k=arg.k,
            gadditional=arg.gadditional, radditional=arg.radditional, region=(arg.region, arg.region),
            min_sigma=arg.min_sigma,
            scale=arg.stn_scale, pool=arg.pool
        )

        ch1, ch2, ch3 = 16, 32, 64
        h = (arg.k // 8) ** 2 * 64

        model = nn.Sequential(
            hyperlayer,
            util.Reshape((arg.num_glimpses * shape[0], arg.k, arg.k)),  # Fold glimpses into channels
            nn.Conv2d(arg.num_glimpses * shape[0], ch1, kernel_size=3, padding=1),
            activation,
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(ch1, ch2, kernel_size=3, padding=1),
            activation,
            nn.Conv2d(ch2, ch2, kernel_size=3, padding=1),
            activation,
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(ch2, ch3, kernel_size=3, padding=1),
            activation,
            nn.Conv2d(ch3, ch3, kernel_size=3, padding=1),
            activation,
            nn.MaxPool2d(kernel_size=2),
            util.Flatten(),
            nn.Linear(h, 128),
            activation,
            nn.Linear(128, num_classes),
            nn.Softmax()
        )

        reinforce = False

    elif arg.modelname == 'stn':
        """
        Spatial transformer with an MLP head.
        """

        hyperlayer = STNAttentionLayer(in_size=shape, k=arg.k, glimpses=arg.num_glimpses, scale=arg.stn_scale, pool=arg.pool)

        model = nn.Sequential(
             hyperlayer,
             util.Flatten(),
             nn.Linear(arg.k * arg.k * shape[0] * arg.num_glimpses, arg.hidden),
             activation,
             nn.Linear(arg.hidden, num_classes),
             nn.Softmax()
        )

        reinforce = False

    elif arg.modelname == 'stn-conv':
        """
        Spatial transformer with a convolutional head.
        """

        hyperlayer = STNAttentionLayer(in_size=shape, k=arg.k, glimpses=arg.num_glimpses, scale=arg.stn_scale, pool=arg.pool)

        ch1, ch2, ch3 = 16, 32, 64
        h = (arg.k // 8) ** 2 * 64

        model = nn.Sequential(
            hyperlayer,
            util.Reshape((arg.num_glimpses * shape[0], arg.k, arg.k)), # Fold glimpses into channels
            nn.Conv2d(arg.num_glimpses * shape[0], ch1, kernel_size=3, padding=1),
            activation,
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(ch1, ch2, kernel_size=3, padding=1),
            activation,
            nn.Conv2d(ch2, ch2, kernel_size=3, padding=1),
            activation,
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(ch2, ch3, kernel_size=3, padding=1),
            activation,
            nn.Conv2d(ch3, ch3, kernel_size=3, padding=1),
            activation,
            nn.MaxPool2d(kernel_size=2),
            util.Flatten(),
            nn.Linear(h, 128),
            activation,
            nn.Linear(128, num_classes),
            nn.Softmax()
        )

        reinforce = False

    elif arg.modelname == 'ash-conv':
        """
        Model with a convolution head. More powerful classification, but more difficult to train on top of a hyperlayer.
        """

        hyperlayer = BoxAttentionLayer(
            glimpses=arg.num_glimpses,
            in_size=shape, k=arg.k,
            gadditional=arg.gadditional, radditional=arg.radditional, region=(arg.region, arg.region),
            min_sigma=arg.min_sigma, pool=arg.pool
        )

        ch1, ch2, ch3 = 16, 32, 64
        h = (arg.k // 8) ** 2 * 64

        model = nn.Sequential(
            hyperlayer,
            util.Reshape((arg.num_glimpses * shape[0], arg.k, arg.k)), # Fold glimpses into channels
            nn.Conv2d(arg.num_glimpses * shape[0], ch1, kernel_size=5, padding=2),
            activation,
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(ch1, ch2, kernel_size=5, padding=2),
            activation,
            nn.Conv2d(ch2, ch2, kernel_size=5, padding=2),
            activation,
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(ch2, ch3, kernel_size=5, padding=2),
            activation,
            nn.Conv2d(ch3, ch3, kernel_size=5, padding=2),
            activation,
            nn.MaxPool2d(kernel_size=2),
            util.Flatten(),
            nn.Linear(h, 128),
            activation,
            nn.Linear(128, num_classes),
            nn.Softmax()
        )

        reinforce = False

    else:
        raise Exception('Model name {} not recognized'.format(arg.modelname))

    if arg.cuda:
        model.cuda()

    optimizer = optim.Adam(model.parameters(), lr=arg.lr)

    xent = nn.CrossEntropyLoss()
    mse = nn.MSELoss()

    step = 0

    sigs, vals = [], []

    util.makedirs('./mnist/')

    for epoch in range(arg.epochs):

        model.train(True)

        for i, (inputs, labels) in tqdm(enumerate(trainloader, 0)):

            # if i> 2:
            #     break

            if arg.cuda:
                inputs, labels = inputs.cuda(), labels.cuda()
            inputs, labels = Variable(inputs), Variable(labels)

            optimizer.zero_grad()

            if not reinforce:
                outputs = model(inputs)
            else:
                outputs, stoch_nodes, actions = model(inputs)

            mloss = F.cross_entropy(outputs, labels, reduce=False)

            if reinforce:

                rloss = stoch_nodes.log_prob(actions) * - mloss.detach()[:, None, None]

                loss = rloss.sum(dim=1) + mloss[:, None]

                tbw.add_scalar('mnist/train-loss', float(loss.mean().item()), step)
                tbw.add_scalar('mnist/model-loss', float(rloss.sum(dim=1).mean().item()), step)
                tbw.add_scalar('mnist/reinf-loss', float(mloss.mean().item()), step)

            else:
                loss = mloss

                tbw.add_scalar('mnist/train-loss', float(loss.data.sum().item()), step)

            loss = loss.sum()
            loss.backward()  # compute the gradients

            optimizer.step()

            step += inputs.size(0)

            if epoch % arg.plot_every == 0 and i == 0 and hyperlayer is not None:

                hyperlayer.plot(inputs[:10, ...])
                plt.savefig('mnist/attention.{:03}.pdf'.format(epoch))

        total = 0.0
        correct = 0.0

        model.train(False)

        for i, (inputs, labels) in enumerate(testloader, 0):

            if arg.cuda:
                inputs, labels = inputs.cuda(), labels.cuda()

            # wrap them in Variables
            inputs, labels = Variable(inputs), Variable(labels)

            if not reinforce:
                outputs = model(inputs)
            else:
                outputs, _, _ = model(inputs)

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        accuracy = correct/total

        tbw.add_scalar('mnist1d/per-epoch-test-acc', accuracy, epoch)
        print('EPOCH {}: {} accuracy '.format(epoch, accuracy))

    LOG.info('Finished Training.')