def __init__(self, output_stride=16, num_classes=21):
        super(DeepLab, self).__init__()
        BatchNorm = nn.BatchNorm2d

        self.ResNet = ResNet18()
        self.aspp = build_aspp(output_stride, BatchNorm)
        self.decoder = build_decoder(num_classes, BatchNorm)
    def __init__(self, backbone='resnet', output_stride=16, num_classes=4,
                 sync_bn=False, freeze_bn=False):
        super(DeepLab, self).__init__()
        if backbone == 'drn':
            output_stride = 8

        if sync_bn == True:
            BatchNorm = SynchronizedBatchNorm2d
        else:
            BatchNorm = nn.BatchNorm2d

        self.backbone = build_backbone(backbone, output_stride, BatchNorm)
        self.aspp = build_aspp(backbone, output_stride, BatchNorm)
        self.decoder = build_decoder(num_classes, backbone, BatchNorm)

        self.freeze_bn = freeze_bn
Exemplo n.º 3
0
def main():
    global args

    img_model = resnet.resnet50(args.image_model_path).cuda().eval()
    pla_model = resnet.resnet50(args.place_model_path).cuda().eval()

    decoder_model = decoder.build_decoder(None, args.size, args.num_feat, args.num_feat).cuda()

    optimizer = torch.optim.SGD(decoder_model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # Data loading code
    train_loader = torch.utils.data.DataLoader(
        SaliconLoader.ImageList(args.data_folder, transforms.Compose([
            transforms.ToTensor(),
        ]),
        train=True,
        ),
        batch_size=args.batch_size, shuffle=True,
        num_workers=args.workers, pin_memory=True)

    if args.mse:
        args.output_folder = args.output_folder + "_mse"
        criterion = nn.MSELoss().cuda()
    else:
        args.lr *= 0.1
        args.output_folder = args.output_folder + "_eml"
        criterion = EMLLoss.Loss().cuda()

    args.output_folder = pl.Path(args.output_folder)

    if not args.output_folder.is_dir():
        args.output_folder.mkdir()

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)
        train(train_loader, img_model, pla_model, decoder_model, criterion, optimizer, epoch)

    state = {
        'state_dict' : decoder_model.state_dict(),
        }

    save_path = args.output_folder / ("model.pth.tar")
    save_model(state, save_path)
Exemplo n.º 4
0
    def __init__(self,
                 backbone='resnet',
                 output_stride=16,
                 num_classes=21,
                 freeze_bn=False):
        super(DeepLab, self).__init__()
        if backbone == 'drn':
            output_stride = 8

        BatchNorm = nn.BatchNorm2d

        self.backbone = resnet_deeplab.ResNet101(BatchNorm=BatchNorm,
                                                 pretrained=False,
                                                 output_stride=8)
        self.aspp = build_aspp(backbone, output_stride, BatchNorm)
        self.decoder = build_decoder(num_classes, backbone, BatchNorm)

        self.freeze_bn = freeze_bn
Exemplo n.º 5
0
    def __init__(self, config):
        super(Transducer, self).__init__()
        # define encoder
        self.config = config
        self.encoder = build_encoder(config)
        # define decoder
        self.decoder = build_decoder(config)
        # define JointNet
        self.joint = JointNet(
            input_size=config.joint.input_size,
            inner_dim=config.joint.inner_size,
            vocab_size=config.vocab_size
            )

        if config.share_embedding:
            assert self.decoder.embedding.weight.size() == self.joint.project_layer.weight.size(), '%d != %d' % (self.decoder.embedding.weight.size(1),  self.joint.project_layer.weight.size(1))
            self.joint.project_layer.weight = self.decoder.embedding.weight

        self.crit = RNNTLoss(blank=28)
Exemplo n.º 6
0
def main():
    global args

    preprocess = transforms.Compose([
        transforms.Resize(args.size),
        transforms.ToTensor(),
    ])

    img_model = resnet.resnet50(args.image_model_path).cuda().eval()
    pla_model = resnet.resnet50(args.place_model_path).cuda().eval()
    decoder_model = decoder.build_decoder(args.decoder_model_path, args.size,
                                          args.num_feat,
                                          args.num_feat).cuda().eval()

    pil_img = Image.open(args.img_path).convert('RGB')
    processed = preprocess(pil_img).unsqueeze(0).cuda()

    with torch.no_grad():

        img_feat = img_model(processed, decode=True)
        pla_feat = pla_model(processed, decode=True)

        pred = decoder_model([img_feat, pla_feat])

    fig, ax = plt.subplots(1, 2)

    pred = pred.squeeze().detach().cpu().numpy()
    pred = post_process(pred)

    pred_path = args.img_path.stem + "_smap.png"
    print("Saving prediction", pred_path)
    sio.imsave(pred_path, pred)

    processed = processed.squeeze().permute(1, 2, 0).cpu()

    ax[0].imshow(processed)
    ax[0].set_title("Input Image")
    ax[1].imshow(pred)
    ax[1].set_title("Prediction")
    plt.show()
Exemplo n.º 7
0
    def __init__(self,
                 pretrained_model_name_or_path,
                 num_layers,
                 dim_model,
                 num_heads,
                 dim_ff,
                 dropout,
                 max_utter_num_length,
                 utter_type,
                 max_decode_output_length,
                 vocab_size,
                 embeddings=None):
        super().__init__()
        self.token_encoder = BertModel.from_pretrained(
            pretrained_model_name_or_path)
        self.target_embeddings = nn.Sequential(
            self.token_encoder.embeddings.word_embeddings,
            PositionalEncoding(dropout, dim_model, max_decode_output_length))
        self.utterance_encoder = TransformerEncoder(num_layers, dim_model,
                                                    num_heads, dim_ff, dropout,
                                                    max_utter_num_length,
                                                    utter_type, embeddings)
        self.decoder = build_decoder(num_layer=num_layers,
                                     heads=num_heads,
                                     d_model=dim_model,
                                     d_ff=dim_ff,
                                     drop_rate=dropout)

        # self.token_weight_1 = nn.Linear(Config.dim_model, 1, bias=False)
        # self.token_weight_2 = nn.Linear(Config.dim_model, Config.dim_model, bias=True)
        for param in self.utterance_encoder.parameters():
            if param.dim() > 1:
                nn.init.xavier_uniform_(param)
        for param in self.decoder.parameters():
            if param.dim() > 1:
                nn.init.xavier_uniform_(param)
Exemplo n.º 8
0
    def __init__(self, block, layers, num_classes):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64, affine=affine_par)
        for i in self.bn1.parameters():
            i.requires_grad = False
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self.Resblock(block, 64, layers[0], stride=1)
        self.layer2 = self.Resblock(block, 128, layers[1], stride=2)
        self.layer3 = self.Resblock(block,
                                    256,
                                    layers[2],
                                    stride=1,
                                    dilation=2)
        self.channel_compress = nn.Sequential(
            nn.Conv2d(in_channels=1024 + 512,
                      out_channels=256,
                      kernel_size=3,
                      stride=1,
                      padding=2,
                      dilation=2,
                      bias=True), nn.ReLU(inplace=True), nn.Dropout2d(p=0.5))
        self.IEM = build_IEM(256)
        self.layer5 = nn.Sequential(
            nn.Conv2d(in_channels=256 + 256,
                      out_channels=256,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      bias=True), nn.ReLU(inplace=True), nn.Dropout2d(p=0.5))
        self.skip1 = nn.Sequential(
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True))
        self.skip2 = nn.Sequential(
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True))
        self.skip3 = nn.Sequential(
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True))
        self.dilation_conv_0 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout2d(p=0.5),
        )
        self.dilation_conv_1 = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout2d(p=0.5),
        )
        self.dilation_conv_6 = nn.Sequential(
            nn.Conv2d(256,
                      256,
                      kernel_size=3,
                      stride=1,
                      padding=6,
                      dilation=6,
                      bias=True), nn.ReLU(inplace=True), nn.Dropout2d(p=0.5))
        self.dilation_conv_12 = nn.Sequential(
            nn.Conv2d(256,
                      256,
                      kernel_size=3,
                      stride=1,
                      padding=12,
                      dilation=12,
                      bias=True), nn.ReLU(inplace=True), nn.Dropout2d(p=0.5))
        self.dilation_conv_18 = nn.Sequential(
            nn.Conv2d(256,
                      256,
                      kernel_size=3,
                      stride=1,
                      padding=18,
                      dilation=18,
                      bias=True), nn.ReLU(inplace=True), nn.Dropout2d(p=0.5))
        self.layer_out1 = nn.Sequential(
            nn.Conv2d(1280, 256, kernel_size=1, stride=1, padding=0,
                      bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout2d(p=0.5),
        )
        self.layer_out2 = nn.Conv2d(256,
                                    num_classes,
                                    kernel_size=1,
                                    stride=1,
                                    bias=True)
        self.decoder = build_decoder(num_classes, 256, nn.BatchNorm2d)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, 0.01)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
from encoder import build_encoder
from decoder import build_decoder

(X_train, _), (X_test, _) = mnist.load_data()

image_size = X_train.shape[1]
X_train = np.reshape(X_train, [-1, image_size, image_size, 1])
X_test = np.reshape(X_test, [-1, image_size, image_size, 1])
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255

latent_dim = 16
batch_size = 128
kernel_size = 3
layer_filters = [32, 64]

inputs, encoder, shape = build_encoder(image_size, latent_dim, layer_filters,
                                       kernel_size)
decoder = build_decoder(shape, latent_dim, layer_filters, kernel_size)

autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder')
autoencoder.summary()
autoencoder.compile(loss='mse', optimizer='adam')

autoencoder.fit(X_train,
                X_train,
                validation_data=(X_test, X_test),
                epochs=30,
                batch_size=batch_size)