def __init__(self, input_nc, output_nc, ngf=64, n_blocks=6, img_size=256, light=False): assert (n_blocks >= 0) super(ResnetGenerator, self).__init__() self.input_nc = input_nc self.output_nc = output_nc self.ngf = ngf self.n_blocks = n_blocks self.img_size = img_size self.light = light DownBlock = [] DownBlock += [ nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf, kernel_size=7, stride=1, padding=0, bias=False), nn.InstanceNorm2d(ngf), nn.ReLU(True) ] # Down-Sampling n_downsampling = 2 for i in range(n_downsampling): mult = 2**i DownBlock += [ nn.ReflectionPad2d(1), nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=0, bias=False), nn.InstanceNorm2d(ngf * mult * 2), nn.ReLU(True) ] # Down-Sampling Bottleneck mult = 2**n_downsampling for i in range(n_blocks): DownBlock += [ResnetBlock(ngf * mult, use_bias=False)] # Class Activation Map self.gap_fc = nn.Linear(ngf * mult, 1, bias=False) self.gmp_fc = nn.Linear(ngf * mult, 1, bias=False) self.conv1x1 = nn.Conv2d(ngf * mult * 2, ngf * mult, kernel_size=1, stride=1, bias=True) self.relu = nn.ReLU(True) # Gamma, Beta block if self.light: FC = [ nn.Linear(ngf * mult, ngf * mult, bias=False), nn.ReLU(True), nn.Linear(ngf * mult, ngf * mult, bias=False), nn.ReLU(True) ] else: FC = [ nn.Linear(img_size // mult * img_size // mult * ngf * mult, ngf * mult, bias=False), nn.ReLU(True), nn.Linear(ngf * mult, ngf * mult, bias=False), nn.ReLU(True) ] self.gamma = nn.Linear(ngf * mult, ngf * mult, bias=False) self.beta = nn.Linear(ngf * mult, ngf * mult, bias=False) # Up-Sampling Bottleneck for i in range(n_blocks): setattr(self, 'UpBlock1_' + str(i + 1), ResnetAdaILNBlock(ngf * mult, use_bias=False)) # Up-Sampling UpBlock2 = [] for i in range(n_downsampling): mult = 2**(n_downsampling - i) UpBlock2 += [ nn.Upsample(scale_factor=2, mode='nearest'), nn.ReflectionPad2d(1), nn.Conv2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=1, padding=0, bias=False), ILN(int(ngf * mult / 2)), nn.ReLU(True) ] UpBlock2 += [ nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, stride=1, padding=0, bias=False), nn.Tanh() ] self.DownBlock = nn.Sequential(*DownBlock) self.FC = nn.Sequential(*FC) self.UpBlock2 = nn.Sequential(*UpBlock2)
def __init__(self, in_chs, out_chs, feat_res=(56, 112), up_ratio=2, aspp_sec=(12, 24, 36), norm_act=ABN): super(ASPPInPlaceABNBlock, self).__init__() self.in_norm = norm_act(in_chs) self.gave_pool = nn.Sequential( OrderedDict([("gavg", nn.AdaptiveAvgPool2d((1, 1))), ("conv1_0", nn.Conv2d(in_chs, out_chs, kernel_size=1, stride=1, padding=0, groups=1, bias=False, dilation=1)), ("up0", nn.Upsample(size=feat_res, mode='bilinear'))])) self.conv1x1 = nn.Sequential( OrderedDict([("conv1_1", nn.Conv2d(in_chs, out_chs, kernel_size=1, stride=1, padding=0, bias=False, groups=1, dilation=1))])) self.aspp_bra1 = nn.Sequential( OrderedDict([("conv2_1", nn.Conv2d(in_chs, out_chs, kernel_size=3, stride=1, padding=aspp_sec[0], bias=False, groups=1, dilation=aspp_sec[0]))])) self.aspp_bra2 = nn.Sequential( OrderedDict([("conv2_2", nn.Conv2d(in_chs, out_chs, kernel_size=3, stride=1, padding=aspp_sec[1], bias=False, groups=1, dilation=aspp_sec[1]))])) self.aspp_bra3 = nn.Sequential( OrderedDict([("conv2_3", nn.Conv2d(in_chs, out_chs, kernel_size=3, stride=1, padding=aspp_sec[2], bias=False, groups=1, dilation=aspp_sec[2]))])) self.aspp_catdown = nn.Sequential( OrderedDict([("norm_act", norm_act(5 * out_chs)), ("conv_down", nn.Conv2d(5 * out_chs, out_chs, kernel_size=1, stride=1, padding=1, bias=False, groups=1, dilation=1)), ("dropout", nn.Dropout2d(p=0.2, inplace=True))])) self.upsampling = nn.Upsample(size=(int(feat_res[0] * up_ratio), int(feat_res[1] * up_ratio)), mode='bilinear')
def __init__(self, latent_variable_size): super(AE, self).__init__() self.latent_variable_size = latent_variable_size # ENCODER # img: 64 x 64 self.e1 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=4, stride=2, padding=1) self.bn1 = nn.BatchNorm2d(8) # 32 x 32 self.e2 = nn.Conv2d(8, 16, 4, 2, 1) self.bn2 = nn.BatchNorm2d(16) # 16 x 16 self.e3 = nn.Conv2d(16, 32, 4, 2, 1) self.bn3 = nn.BatchNorm2d(32) # 8 x 8 self.e4 = nn.Conv2d(32, 64, 4, 2, 1) self.bn4 = nn.BatchNorm2d(64) # 4 x 4 self.e5 = nn.Conv2d(64, 64, 4, 2, 1) self.bn5 = nn.BatchNorm2d(64) # 2 x 2 self.fc1 = nn.Linear(64*2*2, latent_variable_size) # batch_size x latent_variable_size (100 x 128) # DECODER self.d1 = nn.Linear(latent_variable_size, 64*2*2*2) # 2 x 2 self.up1 = nn.Upsample(scale_factor=2) # removes the *2*2 from output of d1 b/c scale_factor scales both H and W self.pd1 = nn.ReplicationPad2d(1) # +2 to height/width self.d2 = nn.Conv2d(64*2, 64, kernel_size=3, stride=1) # -2 to height/width self.bn6 = nn.BatchNorm2d(64, eps=1.e-3) # eps is added to denominator for numerical stability # 4 x 4 self.up2 = nn.Upsample(scale_factor=2) self.pd2 = nn.ReplicationPad2d(1) self.d3 = nn.Conv2d(64, 32, 3, 1) self.bn7 = nn.BatchNorm2d(32, 1.e-3) # 8 x 8 self.up3 = nn.Upsample(scale_factor=2) self.pd3 = nn.ReplicationPad2d(1) self.d4 = nn.Conv2d(32, 16, 3, 1) self.bn8 = nn.BatchNorm2d(16, 1.e-3) # 16 x 16 self.up4 = nn.Upsample(scale_factor=2) self.pd4 = nn.ReplicationPad2d(1) self.d5 = nn.Conv2d(16, 8, 3, 1) self.bn9 = nn.BatchNorm2d(8, 1.e-3) # 32 x 32 self.up5 = nn.Upsample(scale_factor=2) self.pd5 = nn.ReplicationPad2d(1) self.d6 = nn.Conv2d(8, 3, 3, 1) # 64 x 64 self.leakyrelu = nn.LeakyReLU(0.2) self.relu = nn.ReLU() self.hardtanh = nn.Hardtanh() self.sigmoid = nn.Sigmoid()
def __init__(self, in_ch=3, out_ch=1): super(SiamUNetU, self).__init__() n1 = 64 filters = [n1, n1 * 2, n1 * 4, n1 * 8, n1 * 16, n1 * 32] self.pool = nn.MaxPool2d(kernel_size=2, stride=2) self.Up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) self.conv0_0 = conv_block_nested(in_ch, filters[0], filters[0]) self.conv1_0 = conv_block_nested(filters[0], filters[1], filters[1]) self.conv2_0 = conv_block_nested(filters[1], filters[2], filters[2]) self.conv3_0 = conv_block_nested(filters[2], filters[3], filters[3]) self.conv4_0 = conv_block_nested(filters[3], filters[4], filters[4]) self.conv5_0 = conv_block_nested(filters[4], filters[5], filters[5]) self.bn1 = nn.BatchNorm2d(filters[0]) self.bn2 = nn.BatchNorm2d(filters[1]) self.bn3 = nn.BatchNorm2d(filters[2]) self.bn4 = nn.BatchNorm2d(filters[3]) self.bn5 = nn.BatchNorm2d(filters[4]) self.bn6 = nn.BatchNorm2d(filters[5]) self.conv0_1 = conv_block_nested(filters[0] + filters[1], filters[0], filters[0]) self.conv1_1 = conv_block_nested(filters[1] + filters[2], filters[1], filters[1]) self.conv2_1 = conv_block_nested(filters[2] + filters[3], filters[2], filters[2]) self.conv3_1 = conv_block_nested(filters[3] + filters[4], filters[3], filters[3]) self.conv4_1 = conv_block_nested(filters[4] + filters[5], filters[4], filters[4]) self.conv0_2 = conv_block_nested(filters[0] * 2 + filters[1], filters[0], filters[0]) self.conv1_2 = conv_block_nested(filters[1] * 2 + filters[2], filters[1], filters[1]) self.conv2_2 = conv_block_nested(filters[2] * 2 + filters[3], filters[2], filters[2]) self.conv3_2 = conv_block_nested(filters[3] * 2 + filters[4], filters[3], filters[3]) self.conv0_3 = conv_block_nested(filters[0] * 3 + filters[1], filters[0], filters[0]) self.conv1_3 = conv_block_nested(filters[1] * 3 + filters[2], filters[1], filters[1]) self.conv2_3 = conv_block_nested(filters[2] * 3 + filters[3], filters[2], filters[2]) self.conv0_4 = conv_block_nested(filters[0] * 4 + filters[1], filters[0], filters[0]) self.conv1_4 = conv_block_nested(filters[1] * 4 + filters[2], filters[1], filters[1]) self.conv0_5 = conv_block_nested(filters[0] * 5 + filters[1], filters[0], filters[0]) self.final = nn.Conv2d(filters[0], out_ch, kernel_size=1)
def train_advent(model, trainloader, targetloader, cfg): ''' UDA training with advent ''' # Create the model and start the training. input_size_source = cfg.TRAIN.INPUT_SIZE_SOURCE input_size_target = cfg.TRAIN.INPUT_SIZE_TARGET device = cfg.GPU_ID num_classes = cfg.NUM_CLASSES viz_tensorboard = os.path.exists(cfg.TRAIN.TENSORBOARD_LOGDIR) if viz_tensorboard: writer = SummaryWriter(log_dir=cfg.TRAIN.TENSORBOARD_LOGDIR) # SEGMNETATION NETWORK model.train() model.to(device) cudnn.benchmark = True cudnn.enabled = True # DISCRIMINATOR NETWORK # feature-level d_aux = get_fc_discriminator(num_classes=num_classes) d_aux.train() d_aux.to(device) # seg maps, i.e. output, level d_main = get_fc_discriminator(num_classes=num_classes) d_main.train() d_main.to(device) # OPTIMIZERS # segnet's optimizer optimizer = optim.SGD(model.optim_parameters(cfg.TRAIN.LEARNING_RATE), lr=cfg.TRAIN.LEARNING_RATE, momentum=cfg.TRAIN.MOMENTUM, weight_decay=cfg.TRAIN.WEIGHT_DECAY) # discriminators' optimizers optimizer_d_aux = optim.Adam(d_aux.parameters(), lr=cfg.TRAIN.LEARNING_RATE_D, betas=(0.9, 0.99)) optimizer_d_main = optim.Adam(d_main.parameters(), lr=cfg.TRAIN.LEARNING_RATE_D, betas=(0.9, 0.99)) # interpolate output segmaps interp = nn.Upsample(size=(input_size_source[1], input_size_source[0]), mode='bilinear', align_corners=True) interp_target = nn.Upsample(size=(input_size_target[1], input_size_target[0]), mode='bilinear', align_corners=True) # labels for adversarial training source_label = 0 target_label = 1 trainloader_iter = enumerate(trainloader) targetloader_iter = enumerate(targetloader) for i_iter in tqdm(range(cfg.TRAIN.EARLY_STOP + 1)): # reset optimizers optimizer.zero_grad() optimizer_d_aux.zero_grad() optimizer_d_main.zero_grad() # adapt LR if needed adjust_learning_rate(optimizer, i_iter, cfg) adjust_learning_rate_discriminator(optimizer_d_aux, i_iter, cfg) adjust_learning_rate_discriminator(optimizer_d_main, i_iter, cfg) # UDA Training # only train segnet. Don't accumulate grads in disciminators for param in d_aux.parameters(): param.requires_grad = False for param in d_main.parameters(): param.requires_grad = False # train on source _, batch = trainloader_iter.__next__() images_source, labels, _, _ = batch pred_src_aux, pred_src_main = model(images_source.cuda(device)) if cfg.TRAIN.MULTI_LEVEL: pred_src_aux = interp(pred_src_aux) loss_seg_src_aux = loss_calc(pred_src_aux, labels, device) else: loss_seg_src_aux = 0 pred_src_main = interp(pred_src_main) loss_seg_src_main = loss_calc(pred_src_main, labels, device) loss = (cfg.TRAIN.LAMBDA_SEG_MAIN * loss_seg_src_main + cfg.TRAIN.LAMBDA_SEG_AUX * loss_seg_src_aux) loss.backward() # adversarial training ot fool the discriminator _, batch = targetloader_iter.__next__() images, _, _, _ = batch pred_trg_aux, pred_trg_main = model(images.cuda(device)) if cfg.TRAIN.MULTI_LEVEL: pred_trg_aux = interp_target(pred_trg_aux) d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_trg_aux))) loss_adv_trg_aux = bce_loss(d_out_aux, source_label) else: loss_adv_trg_aux = 0 pred_trg_main = interp_target(pred_trg_main) d_out_main = d_main(prob_2_entropy(F.softmax(pred_trg_main))) loss_adv_trg_main = bce_loss(d_out_main, source_label) loss = (cfg.TRAIN.LAMBDA_ADV_MAIN * loss_adv_trg_main + cfg.TRAIN.LAMBDA_ADV_AUX * loss_adv_trg_aux) loss = loss loss.backward() # Train discriminator networks # enable training mode on discriminator networks for param in d_aux.parameters(): param.requires_grad = True for param in d_main.parameters(): param.requires_grad = True # train with source if cfg.TRAIN.MULTI_LEVEL: pred_src_aux = pred_src_aux.detach() d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_src_aux))) loss_d_aux = bce_loss(d_out_aux, source_label) loss_d_aux = loss_d_aux / 2 loss_d_aux.backward() pred_src_main = pred_src_main.detach() d_out_main = d_main(prob_2_entropy(F.softmax(pred_src_main))) loss_d_main = bce_loss(d_out_main, source_label) loss_d_main = loss_d_main / 2 loss_d_main.backward() # train with target if cfg.TRAIN.MULTI_LEVEL: pred_trg_aux = pred_trg_aux.detach() d_out_aux = d_aux(prob_2_entropy(F.softmax(pred_trg_aux))) loss_d_aux = bce_loss(d_out_aux, target_label) loss_d_aux = loss_d_aux / 2 loss_d_aux.backward() else: loss_d_aux = 0 pred_trg_main = pred_trg_main.detach() d_out_main = d_main(prob_2_entropy(F.softmax(pred_trg_main))) loss_d_main = bce_loss(d_out_main, target_label) loss_d_main = loss_d_main / 2 loss_d_main.backward() optimizer.step() if cfg.TRAIN.MULTI_LEVEL: optimizer_d_aux.step() optimizer_d_main.step() current_losses = {'loss_seg_src_aux': loss_seg_src_aux, 'loss_seg_src_main': loss_seg_src_main, 'loss_adv_trg_aux': loss_adv_trg_aux, 'loss_adv_trg_main': loss_adv_trg_main, 'loss_d_aux': loss_d_aux, 'loss_d_main': loss_d_main} print_losses(current_losses, i_iter) if i_iter % cfg.TRAIN.SAVE_PRED_EVERY == 0 and i_iter != 0: print('taking snapshot ...') print('exp =', cfg.TRAIN.SNAPSHOT_DIR) snapshot_dir = Path(cfg.TRAIN.SNAPSHOT_DIR) torch.save(model.state_dict(), snapshot_dir / f'model_{i_iter}.pth') torch.save(d_aux.state_dict(), snapshot_dir / f'model_{i_iter}_D_aux.pth') torch.save(d_main.state_dict(), snapshot_dir / f'model_{i_iter}_D_main.pth') if i_iter >= cfg.TRAIN.EARLY_STOP - 1: break sys.stdout.flush() # Visualize with tensorboard if viz_tensorboard: log_losses_tensorboard(writer, current_losses, i_iter) if i_iter % cfg.TRAIN.TENSORBOARD_VIZRATE == cfg.TRAIN.TENSORBOARD_VIZRATE - 1: draw_in_tensorboard(writer, images, i_iter, pred_trg_main, num_classes, 'T') draw_in_tensorboard(writer, images_source, i_iter, pred_src_main, num_classes, 'S')
def __init__(self, in_channel, out_channel): super(up_sample, self).__init__() self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) self.double_conv = double_conv(in_channel, out_channel)
def inception_score(imgs, args, cuda=True, batch_size=32, resize=False, splits=1): """Computes the inception score of the generated images imgs imgs -- Torch dataset of (3xHxW) numpy images normalized in the range [-1, 1] cuda -- whether or not to run on GPU batch_size -- batch size for feeding into Inception v3 splits -- number of splits """ N = len(imgs) assert batch_size > 0 assert N > batch_size # Set up dtype if cuda: dtype = torch.cuda.FloatTensor else: if torch.cuda.is_available(): print( "WARNING: You have a CUDA device, so you should probably set cuda=True" ) dtype = torch.FloatTensor # Set up dataloader dataloader = torch.utils.data.DataLoader(imgs, batch_size=batch_size, num_workers=args.workers) # Load inception model inception_model = inception_v3(pretrained=True, transform_input=False).type(dtype) inception_model.eval() up = nn.Upsample(size=(299, 299), mode='bilinear').type(dtype) def get_pred(x): if resize: x = up(x) x = inception_model(x) return F.softmax(x).data.cpu().numpy() # Get predictions preds = np.zeros((N, 1000)) for i, batch in enumerate(dataloader, 0): batch = batch.float().cuda() batchv = Variable(batch) batch_size_i = batch.size()[0] preds[i * batch_size:i * batch_size + batch_size_i] = get_pred(batchv) # Now compute the mean kl-div split_scores = [] for k in range(splits): part = preds[k * (N // splits):(k + 1) * (N // splits), :] py = np.mean(part, axis=0) scores = [] for i in range(part.shape[0]): pyx = part[i, :] scores.append(entropy(pyx, py)) split_scores.append(np.exp(np.mean(scores))) return np.mean(split_scores), np.std(split_scores)
class YunNet(nn.Module): def __init__(self, nlabel, mindepth): super(YunNet, self).__init__() self.nlabel = nlabel self.mindepth = mindepth #spp self.feature_extraction = feature_extraction() self.group_convs = nn.Sequential( nn.Conv2d(64, 32, 3, 1, 1, groups=32), nn.LeakyReLU(0.1, inplace=True) ) self.refine0 = nn.Sequential( nn.Conv2d(32, 32, 3, 1 ,1), nn.LeakyReLU(0.1, inplace=True) ) self.refine1 = nn.Sequential( nn.Conv2d(32, 32, 3, 1 ,1), nn.LeakyReLU(0.1, inplace=True) ) self.refine2 = nn.Sequential( nn.Conv2d(32, 1, 3, 1 ,1), ) #3DCNN self.conv3d0 = nn.Sequential( convbn_3d(64, 32, 3, 1, 1), nn.ReLU(inplace = True), convbn_3d(32, 32, 3, 1, 1), nn.ReLU(inplace = True), ) self.conv3d0_2 = nn.Sequential( convbn_3d(32, 32, 1, 1, 0), nn.ReLU(inplace = True), ) self.conv3d1 = nn.Sequential( convbn_3d(32, 64, 3, 2, 1), nn.ReLU(inplace = True), convbn_3d(64, 64, 3, 1, 1), ) self.conv3d1_2 = nn.Sequential( convbn_3d(64, 64, 1, 1, 0), nn.ReLU(inplace = True), ) self.conv3d2 = nn.Sequential( convbn_3d(64, 128, 3, 2, 1), nn.ReLU(inplace = True), convbn_3d(128, 128, 3, 1, 1), ) self.conv3d2_2 = nn.Sequential( convbn_3d(128, 128, 1, 1, 0), nn.ReLU(inplace = True), ) self.conv2_3 = nn.Sequential( nn.ConvTranspose3d(128, 64, kernel_size=3, padding=1, output_padding=1, stride=2,bias=False), nn.BatchNorm3d(64) ) self.conv3_2 = nn.Sequential( nn.ConvTranspose3d(64, 32, kernel_size=3, padding=1, output_padding=1, stride=2,bias=False), nn.BatchNorm3d(32) ) self.cost_disp1 = nn.Sequential( nn.Upsample(scale_factor=2, mode = 'trilinear'), nn.ReLU(inplace = True), convbn_3d(64, 1, 3, 1, 1), ) self.cost_disp2 = nn.Sequential( convbn_3d(32, 32, 3, 1, 1), nn.ReLU(inplace = True), convbn_3d(32, 1, 3, 1, 1), ) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.Conv3d): n = m.kernel_size[0] * m.kernel_size[1]*m.kernel_size[2] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.BatchNorm3d): m.weight.data.fill_(1) m.bias.data.zero_()
def upBlock(in_planes, out_planes): block = nn.Sequential( nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True), conv3x3(in_planes, out_planes * 2), nn.BatchNorm2d(out_planes * 2), GLU()) return block
def __init__(self, img_size=(416, 416), features=256): super(YoloDecoder, self).__init__() nc = 4 # ToDo: Number of classes, Don't hardcode # Anchors, masks, stride is hardcoded in get_yolo function, pick from config # Yolo3 layers have routes, which are concatenation of some layers -- from config file yolo_index = -1 self.module_list = nn.ModuleList() # Picking yolo3 layers after the maxpooling section self.y_84 = self.single_cbl(2048, 512, 1) self.y_85 = self.single_cbl(512, 1024, 3) self.y_86 = self.single_cbl(1024, 512, 1) self.y_87 = self.single_cbl(512, 1024, 3) self.y_88 = self.pre_yolo( 1024, 27, 1 ) # Pre-yolo layer. Calculate channel size of 27, don't hardcode. No batchnorm.. # Yolo Layer yolo_index += 1 self.y_89 = self.get_yolo(nc, yolo_index, img_size) # self.y_90 = FeatureConcat(layers=[-4]) # == 86 Define empty layer, and just return 86 output self.y_91 = self.single_cbl(512, 256, 1) self.y_92 = nn.Upsample(scale_factor=2) # self.y_93 = FeatureConcat(layers=[-1, 61]) # Encoder, Do this during forward pass self.y_94 = self.single_cbl(768, 256, 1) self.y_95 = self.single_cbl(256, 512, 3) self.y_96 = self.single_cbl(512, 256, 1) self.y_97 = self.single_cbl(256, 512, 3) self.y_98 = self.single_cbl(512, 256, 1) self.y_99 = self.single_cbl(256, 512, 3) self.y_100 = self.pre_yolo(512, 27, 1) # Yolo layer yolo_index += 1 self.y_101 = self.get_yolo(nc, yolo_index, img_size) # self.y_102 = FeatureConcat(-4) # == 98 self.y_103 = self.single_cbl(256, 128, 1) self.y_104 = nn.Upsample(scale_factor=2) #, mode=nearest # self.y_105 = FeatureConcat(-1, 36) # Encoder self.y_106 = self.single_cbl(384, 128, 1) self.y_107 = self.single_cbl(128, 256, 3) self.y_108 = self.single_cbl(256, 128, 1) self.y_109 = self.single_cbl(128, 256, 3) self.y_110 = self.single_cbl(256, 128, 1) self.y_111 = self.single_cbl(128, 256, 3) self.y_112 = self.pre_yolo(256, 27, 1) # Yolo layer yolo_index += 1 self.y_113 = self.get_yolo(nc, yolo_index, img_size) self.yolo_layers = get_yolo_layers(self) # ToDo: Populate this with all layers. Verify if this is correct self.module_list.extend([ self.y_84, self.y_85, self.y_86, self.y_87, self.y_88, self.y_89, self.y_91, self.y_92, self.y_94, self.y_95, self.y_96, self.y_97, self.y_98, self.y_99, self.y_100, self.y_101, self.y_103, self.y_104, self.y_106, self.y_107, self.y_108, self.y_109, self.y_110, self.y_111, self.y_112 ]) #self.y_90, self.y_93, self.y_102, self.y_105,
def __init__(self): super(YoloUpsample, self).__init__() self.upsample = nn.Upsample(scale_factor=2, mode='nearest').to(device)
def evaluate(model, dataset, ignore_label=250, save_output_images=False, save_dir=None, input_size=(512, 1024)): if dataset == 'pascal_voc': num_classes = 21 input_size = (505, 505) data_loader = get_loader(dataset) data_path = get_data_path(dataset) test_dataset = data_loader(data_path, split="val", crop_size=input_size, scale=False, mirror=False) testloader = data.DataLoader(test_dataset, batch_size=1, shuffle=False, pin_memory=True) interp = nn.Upsample(size=input_size, mode='bilinear', align_corners=True) elif dataset == 'cityscapes': num_classes = 19 data_loader = get_loader('cityscapes') data_path = get_data_path('cityscapes') test_dataset = data_loader(data_path, img_size=input_size, is_transform=True, split='val') testloader = data.DataLoader(test_dataset, batch_size=1, shuffle=False, pin_memory=True) interp = nn.Upsample(size=input_size, mode='bilinear', align_corners=True) print('Evaluating, found ' + str(len(testloader)) + ' images.') data_list = [] colorize = VOCColorize() total_loss = [] for index, batch in enumerate(testloader): image, label, size, name, _ = batch size = size[0] with torch.no_grad(): output = model(Variable(image).cuda()) output = interp(output) label_cuda = Variable(label.long()).cuda() criterion = CrossEntropy2d( ignore_label=ignore_label).cuda() # Ignore label ?? loss = criterion(output, label_cuda) total_loss.append(loss.item()) output = output.cpu().data[0].numpy() if dataset == 'pascal_voc': output = output[:, :size[0], :size[1]] gt = np.asarray(label[0].numpy()[:size[0], :size[1]], dtype=np.int) elif dataset == 'cityscapes': gt = np.asarray(label[0].numpy(), dtype=np.int) output = output.transpose(1, 2, 0) output = np.asarray(np.argmax(output, axis=2), dtype=np.int) if save_output_images: if dataset == 'pascal_voc': filename = os.path.join(save_dir, '{}.png'.format(name[0])) color_file = Image.fromarray( colorize(output).transpose(1, 2, 0), 'RGB') color_file.save(filename) if (index + 1) % 100 == 0: print('%d processed' % (index + 1)) if save_dir: filename = os.path.join(save_dir, 'result.txt') else: filename = None mIoU = get_iou(data_list, num_classes, dataset, filename) loss = np.mean(total_loss) return mIoU, loss
def __init__(self,dist=False): super(SIGGRAPHGenerator, self).__init__() self.dist = dist use_bias = True norm_layer=nn.BatchNorm2d # Conv1 model1=[nn.Conv2d(4, 64, kernel_size=3, stride=1, padding=1, bias=use_bias),] model1+=[nn.ReLU(True),] model1+=[nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=use_bias),] model1+=[nn.ReLU(True),] model1+=[norm_layer(64),] # add a subsampling operation # Conv2 model2=[nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=use_bias),] model2+=[nn.ReLU(True),] model2+=[nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=use_bias),] model2+=[nn.ReLU(True),] model2+=[norm_layer(128),] # add a subsampling layer operation # Conv3 model3=[nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1, bias=use_bias),] model3+=[nn.ReLU(True),] model3+=[nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=use_bias),] model3+=[nn.ReLU(True),] model3+=[nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=use_bias),] model3+=[nn.ReLU(True),] model3+=[norm_layer(256),] # add a subsampling layer operation # Conv4 model4=[nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1, bias=use_bias),] model4+=[nn.ReLU(True),] model4+=[nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=use_bias),] model4+=[nn.ReLU(True),] model4+=[nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=use_bias),] model4+=[nn.ReLU(True),] model4+=[norm_layer(512),] # Conv5 model5=[nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=use_bias),] model5+=[nn.ReLU(True),] model5+=[nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=use_bias),] model5+=[nn.ReLU(True),] model5+=[nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=use_bias),] model5+=[nn.ReLU(True),] model5+=[norm_layer(512),] # Conv6 model6=[nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=use_bias),] model6+=[nn.ReLU(True),] model6+=[nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=use_bias),] model6+=[nn.ReLU(True),] model6+=[nn.Conv2d(512, 512, kernel_size=3, dilation=2, stride=1, padding=2, bias=use_bias),] model6+=[nn.ReLU(True),] model6+=[norm_layer(512),] # Conv7 model7=[nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=use_bias),] model7+=[nn.ReLU(True),] model7+=[nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=use_bias),] model7+=[nn.ReLU(True),] model7+=[nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=use_bias),] model7+=[nn.ReLU(True),] model7+=[norm_layer(512),] # Conv7 model8up=[nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, bias=use_bias)] model3short8=[nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=use_bias),] model8=[nn.ReLU(True),] model8+=[nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=use_bias),] model8+=[nn.ReLU(True),] model8+=[nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=use_bias),] model8+=[nn.ReLU(True),] model8+=[norm_layer(256),] # Conv9 model9up=[nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1, bias=use_bias),] model2short9=[nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=use_bias),] # add the two feature maps above model9=[nn.ReLU(True),] model9+=[nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=use_bias),] model9+=[nn.ReLU(True),] model9+=[norm_layer(128),] # Conv10 model10up=[nn.ConvTranspose2d(128, 128, kernel_size=4, stride=2, padding=1, bias=use_bias),] model1short10=[nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=use_bias),] # add the two feature maps above model10=[nn.ReLU(True),] model10+=[nn.Conv2d(128, 128, kernel_size=3, dilation=1, stride=1, padding=1, bias=use_bias),] model10+=[nn.LeakyReLU(negative_slope=.2),] # classification output model_class=[nn.Conv2d(256, 529, kernel_size=1, padding=0, dilation=1, stride=1, bias=use_bias),] # regression output model_out=[nn.Conv2d(128, 2, kernel_size=1, padding=0, dilation=1, stride=1, bias=use_bias),] model_out+=[nn.Tanh()] self.model1 = nn.Sequential(*model1) self.model2 = nn.Sequential(*model2) self.model3 = nn.Sequential(*model3) self.model4 = nn.Sequential(*model4) self.model5 = nn.Sequential(*model5) self.model6 = nn.Sequential(*model6) self.model7 = nn.Sequential(*model7) self.model8up = nn.Sequential(*model8up) self.model8 = nn.Sequential(*model8) self.model9up = nn.Sequential(*model9up) self.model9 = nn.Sequential(*model9) self.model10up = nn.Sequential(*model10up) self.model10 = nn.Sequential(*model10) self.model3short8 = nn.Sequential(*model3short8) self.model2short9 = nn.Sequential(*model2short9) self.model1short10 = nn.Sequential(*model1short10) self.model_class = nn.Sequential(*model_class) self.model_out = nn.Sequential(*model_out) self.upsample4 = nn.Sequential(*[nn.Upsample(scale_factor=4, mode='nearest'),]) self.softmax = nn.Sequential(*[nn.Softmax(dim=1),])
def upsample(scale_factor=2): return nn.Upsample(scale_factor=scale_factor)
def __init__(self, in_size, out_size, scale): super(SkipUp, self).__init__() self.unpool = nn.Upsample(scale_factor=scale, mode='bilinear') self.conv = nn.Conv2d(in_size, out_size, 3, 1, 1) self.conv2 = nn.Conv2d(out_size, out_size, 3, 1, 1)
def __init__(self, input_x=0, output_channels=25, is_training=True, batch_size=1): super(DentlyNet, self).__init__() self.input_x = input_x self.batch_size = batch_size self.output_channels = output_channels self.ks = 3 self.device = torch.cuda.is_available() ### indexing model: # P = ((S-1)*W-S+F)/2, with F = filter size, S = stride self.down0_conv = nn.Conv2d(3, 9, kernel_size=(31, 9), stride=(1, 1), padding=(15, 4)) #320 3X31X9X9 = 7533 self.down0_conv_bn = nn.BatchNorm2d(9, track_running_stats=False, momentum=0.5) self.mp2d = torch.nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0), return_indices=True) self.down1_conv = nn.Conv2d(9, 24, kernel_size=(7, 5), stride=(1, 1), padding=(3, 2)) #160 9X7X5X24 = 7560 self.down1_conv_bn = nn.BatchNorm2d(24, track_running_stats=False, momentum=0.5) self.down2_conv = nn.Conv2d(24, 24, kernel_size=(9, 5), stride=(1, 1), padding=(4, 2)) #24X31X9X24 = 160704 self.down2_conv_bn = nn.BatchNorm2d(24, track_running_stats=False, momentum=0.5) self.down2_1_conv = nn.Conv2d(24, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) #24X3X3X96 = 20736 self.down2_1_conv_bn = nn.BatchNorm2d(48, track_running_stats=False, momentum=0.5) self.down2_2_conv = nn.Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)) #96X1X1X24 = 2304 self.down2_2_conv_bn = nn.BatchNorm2d(24, track_running_stats=False, momentum=0.5) self.down4_conv = nn.Conv2d(24, 9, kernel_size=(7, 5), stride=(1, 1), padding=(3, 2)) #24X7X5X9 = 7560 self.down4_conv_bn = nn.BatchNorm2d(9, track_running_stats=False, momentum=0.5) self.Unmp2d = torch.nn.MaxUnpool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0)) self.down3_conv = nn.Conv2d(9, output_channels, kernel_size=(5, 3), stride=(1, 1), padding=(2, 1)) #320 9*5*3*14 = 1890 self.down3_conv_bn = nn.BatchNorm2d(output_channels, track_running_stats=False, momentum=0.5) self.down5_conv = nn.Conv2d(output_channels, output_channels, kernel_size=(5, 3), stride=(1, 1), padding=(2, 1)) # 14*5*3*14 = 2940 self.down5_conv_bn = nn.BatchNorm2d(output_channels, track_running_stats=False, momentum=0.5) self.upSample = nn.Upsample(scale_factor=4, mode='bilinear', align_corners=False) ### skeleton model: # P = ((S-1)*W-S+F)/2, with F = filter size, S = stride self.down0_convSkel = nn.Conv2d(3, 7, kernel_size=(31, 5), stride=(1, 1), padding=(15, 2)) self.down0_conv_bnSkel = nn.BatchNorm2d(7, track_running_stats=False, momentum=0.5) self.down1_convSkel = nn.Conv2d(7, 7, kernel_size=(31, 5), stride=(1, 1), padding=(15, 2)) self.down1_conv_bnSkel = nn.BatchNorm2d(7, track_running_stats=False, momentum=0.5) self.down2_convSkel = nn.Conv2d(7, 1, kernel_size=(31, 5), stride=(1, 1), padding=(15, 2)) self.mp2dSkel = torch.nn.MaxPool2d(kernel_size=(40, 1), stride=(40, 1), padding=(0, 0), return_indices=True) self.Unmp2dSkel = torch.nn.MaxUnpool2d(kernel_size=(40, 1), stride=(40, 1), padding=(0, 0)) self.mp2dSkel2 = torch.nn.MaxPool2d(kernel_size=(40, 1), stride=(40, 1), padding=(20, 0), return_indices=True) self.Unmp2dSkel2 = torch.nn.MaxUnpool2d(kernel_size=(40, 1), stride=(40, 1), padding=(20, 0)) if self.device: self.dtype = torch.cuda.FloatTensor self.dtype_l = torch.cuda.LongTensor else: self.dtype = torch.FloatTensor self.dtype_l = torch.LongTensor
def __init__(self, block1, block2, batch_size, n_class): super(Model_2b_depgd_GAP_MS, self).__init__() self.batch_size = batch_size self.n_class = n_class # backbone self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=4) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.max_pool = nn.MaxPool2d(3, stride=2) self.proj_layer1 = self.make_proj_layer(block1, 64, d1=64, d2=256, stride=1) self.skip_layer1_1 = self.make_skip_layer(block1, 256, d1=64, d2=256, stride=1) self.skip_layer1_2 = self.make_skip_layer(block1, 256, d1=64, d2=256, stride=1) self.proj_layer2 = self.make_proj_layer(block1, 256, d1=128, d2=512, stride=2) self.skip_layer2_1 = self.make_skip_layer(block1, 512, d1=128, d2=512) self.skip_layer2_2 = self.make_skip_layer(block1, 512, d1=128, d2=512) self.skip_layer2_3 = self.make_skip_layer(block1, 512, d1=128, d2=512) self.proj_layer3 = self.make_proj_layer(block1, 512, d1=256, d2=1024, stride=2) self.skip_layer3_1 = self.make_skip_layer(block1, 1024, d1=256, d2=1024) self.skip_layer3_2 = self.make_skip_layer(block1, 1024, d1=256, d2=1024) self.skip_layer3_3 = self.make_skip_layer(block1, 1024, d1=256, d2=1024) self.skip_layer3_4 = self.make_skip_layer(block1, 1024, d1=256, d2=1024) self.skip_layer3_5 = self.make_skip_layer(block1, 1024, d1=256, d2=1024) self.proj_layer4 = self.make_proj_layer(block1, 1024, d1=512, d2=2048, stride=2) self.skip_layer4_1 = self.make_skip_layer(block1, 2048, d1=512, d2=2048) self.skip_layer4_2 = self.make_skip_layer(block1, 2048, d1=512, d2=2048) self.conv2 = nn.Conv2d(2048, 1024, 1) self.bn2 = nn.BatchNorm2d(1024) #depth self.dep_up_conv1 = self.make_up_conv_layer(block2, 1024, 512, self.batch_size) self.dep_up_conv2 = self.make_up_conv_layer(block2, 512, 256, self.batch_size) self.dep_up_conv3 = self.make_up_conv_layer(block2, 256, 128, self.batch_size) self.dep_up_conv4 = self.make_up_conv_layer(block2, 128, 64, self.batch_size) self.dep_skip_up1 = SkipUp(512, 64, 8) self.dep_skip_up2 = SkipUp(256, 64, 4) self.dep_skip_up3 = SkipUp(128, 64, 2) self.dep_conv3 = nn.Conv2d(64, 1, 3, padding=1) #64,1 # self.upsample = nn.UpsamplingBilinear2d(size = (480,640)) self.upsample = nn.Upsample(size=(480, 640), mode='bilinear') #sem self.sem_up_conv1 = self.make_up_conv_layer(block2, 1024, 512, self.batch_size) self.sem_up_conv2 = self.make_up_conv_layer(block2, 512, 256, self.batch_size) self.sem_up_conv3 = self.make_up_conv_layer(block2, 256, 128, self.batch_size) self.sem_up_conv4 = self.make_up_conv_layer(block2, 128, 64, self.batch_size) self.sem_skip_up1 = SkipUp(512, 64, 8) self.sem_skip_up2 = SkipUp(256, 64, 4) self.sem_skip_up3 = SkipUp(128, 64, 2) self.sem_conv3_2 = nn.Conv2d(64, self.n_class, 3, padding=1) #64,1 self.midn = 32 self.GAP_convSem = nn.Conv2d(64, self.midn, 1) self.GAP_convDep1 = nn.Conv2d(64, self.midn, 1) self.GAP_convDep2 = nn.Conv2d(64, self.midn, 1) self.GAP_convCom = nn.Conv2d(self.midn, 64, 1) self.GAP_BNSem = nn.BatchNorm2d(self.midn) self.GAP_BNDep1 = nn.BatchNorm2d(self.midn) self.GAP_BNDep2 = nn.BatchNorm2d(self.midn) self.GAP_BNCom = nn.BatchNorm2d(64) self.sideup1 = nn.Upsample((8, 10), mode='bilinear') self.sideup2 = nn.Upsample((15, 19), mode='bilinear') self.sideup3 = nn.Upsample((29, 38), mode='bilinear') self.sideup4 = nn.Upsample((57, 76), mode='bilinear') self.sidedconv1 = nn.Conv2d(2048, 64, 1) self.sidedconv2 = nn.Conv2d(1024, 64, 1) self.sidedconv3 = nn.Conv2d(512, 64, 1) self.sidedconv4 = nn.Conv2d(256, 64, 1) self.sidecconv1 = nn.Conv2d(128, 64, 3, padding=1) self.sidecconv2 = nn.Conv2d(128, 64, 3, padding=1) self.sidecconv3 = nn.Conv2d(128, 64, 3, padding=1) self.sidecconv4 = nn.Conv2d(128, 64, 3, padding=1) self.sideoconv1 = nn.Conv2d(64, self.n_class, 3, padding=1) self.sideoconv2 = nn.Conv2d(64, self.n_class, 3, padding=1) self.sideoconv3 = nn.Conv2d(64, self.n_class, 3, padding=1) self.sideoconv4 = nn.Conv2d(64, self.n_class, 3, padding=1)
def make_unpool_layer(dim): return nn.Upsample(scale_factor=2)
def create_modules(blocks: List) -> Tuple: """ Create all the nn.Module objects based on the configurations passed from the yolov3 cfg file in a sequential manner. From this, each block in the list we will create the multiple layers required. :param blocks: a list of blocks represented as a dict read from the cfg file. :return: A tuple with the net_info information about input and pre-processing and a nn.ModuleList() """ net_info = blocks[0] # captures the information about the input and pre-processing module_list: ModuleList = nn.ModuleList() prev_filters = 3 output_filters = [] filters = 0 for index, x in enumerate(blocks[1:]): module = nn.Sequential() # check the type of block # create a new module for the block # append to module_list if x["type"] == "convolutional": # get the info about the layer activation = x["activation"] try: batch_normalize = int(x["batch_normalize"]) bias = False except Exception as e: print("[***DEBUG***] Exception thrown in create_modules() \n{}".format(e.__cause__)) batch_normalize = 0 bias = True filters = int(x["filters"]) padding = int(x["pad"]) kernel_size = int(x["size"]) stride = int(x["stride"]) pad = (kernel_size - 1) // 2 if padding else 0 # add the Convolutional Layer conv = nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=bias) module.add_module("conv_{0}".format(index), conv) # add the Batch Norm Layer if batch_normalize: bn = nn.BatchNorm2d(filters) module.add_module("batch_norm_{0}".format(index), bn) # check the activation # it is either Linear or a Leaky ReLU for YOLO if activation == "leaky": activn = nn.LeakyReLU(negative_slope=0.1, inplace=True) module.add_module("leaky_{0}".format(index), activn) # if its an upsampling layer - we use Bilinear2dUpsampling elif x["type"] == "upsample": stride = int(x["stride"]) upsample = nn.Upsample(scale_factor=2, mode="bilinear") module.add_module("upsample_{0}".format(index), upsample) # if it is a route layer elif x["type"] == "route": x["layers"] = x["layers"].split(",") start = int(x["layers"][0]) try: # end if there exists one end = int(x["layers"][1]) except Exception as e: print("[***DEBUG***] Exception thrown in create_modules() \n{}".format(e.__cause__)) end = 0 # positive annotation if start > 0: start = start - index if end > 0: end = end - index route = EmptyLayer() module.add_module("route_{0}".format(index), route) # The convolutional layer just in front of a route layer applies it's kernel to (possibly concatenated) # feature maps from a previous layers. The following code updates the filters variable to hold the number # of filters outputted by a route layer. if end < 0: # if we are concatenating maps filters = output_filters[index + start] + output_filters[index + end] else: filters = output_filters[index + start] # shortcut corresponds to skip connections elif x["type"] == "shortcut": shortcut = EmptyLayer() module.add_module("shortcut_{0}".format(index), shortcut) # YOLO is the detection layer elif x["type"] == "yolo": mask = x["mask"].split(",") mask = [int(x) for x in mask] anchors = x["anchors"].split(",") anchors = [int(a) for a in anchors] # should be a total of 9 anchors = [(anchors[i], anchors[i+1]) for i in range(0, len(anchors), 2)] anchors = [anchors[i] for i in mask] detection = DetectionLayer(anchors) module.add_module("Detection_{0}".format(index), detection) module_list.append(module) prev_filters = filters output_filters.append(filters) return net_info, module_list
def main(): """Create the model and start the evaluation process.""" args = get_arguments() gpu0 = args.gpu if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) model = Res_Deeplab(num_classes=args.num_classes) model.cuda() if args.restore_from[:4] == 'http': saved_state_dict = model_zoo.load_url(args.restore_from) else: saved_state_dict = torch.load(args.restore_from) model.load_state_dict(saved_state_dict) model.eval() model.cuda(gpu0) if args.dataset == 'pascal_voc': testloader = data.DataLoader(VOCDataSet(args.data_dir, args.data_list, crop_size=(505, 505), mean=IMG_MEAN, scale=False, mirror=False), batch_size=1, shuffle=False, pin_memory=True) interp = nn.Upsample(size=(505, 505), mode='bilinear', align_corners=True) elif args.dataset == 'pascal_context': input_transform = transform.Compose([ transform.ToTensor(), transform.Normalize([.485, .456, .406], [.229, .224, .225]) ]) data_kwargs = { 'transform': input_transform, 'base_size': 512, 'crop_size': 512 } data_loader = get_loader('pascal_context') data_path = get_data_path('pascal_context') test_dataset = data_loader(data_path, split='val', mode='val', **data_kwargs) testloader = data.DataLoader(test_dataset, batch_size=1, drop_last=False, shuffle=False, num_workers=1, pin_memory=True) interp = nn.Upsample(size=(512, 512), mode='bilinear', align_corners=True) elif args.dataset == 'cityscapes': data_loader = get_loader('cityscapes') data_path = get_data_path('cityscapes') test_dataset = data_loader(data_path, img_size=(512, 1024), is_transform=True, split='val') testloader = data.DataLoader(test_dataset, batch_size=1, shuffle=False, pin_memory=True) interp = nn.Upsample(size=(512, 1024), mode='bilinear', align_corners=True) data_list = [] colorize = VOCColorize() if args.with_mlmt: mlmt_preds = np.loadtxt('mlmt_output/output_ema_p_1_0_voc_5.txt', dtype=float) # best mt 0.05 mlmt_preds[mlmt_preds >= 0.2] = 1 mlmt_preds[mlmt_preds < 0.2] = 0 for index, batch in enumerate(testloader): if index % 100 == 0: print('%d processd' % (index)) image, label, size, name, _ = batch size = size[0] output = model(Variable(image, volatile=True).cuda(gpu0)) output = interp(output).cpu().data[0].numpy() if args.dataset == 'pascal_voc': output = output[:, :size[0], :size[1]] gt = np.asarray(label[0].numpy()[:size[0], :size[1]], dtype=np.int) elif args.dataset == 'pascal_context': gt = np.asarray(label[0].numpy(), dtype=np.int) elif args.dataset == 'cityscapes': gt = np.asarray(label[0].numpy(), dtype=np.int) if args.with_mlmt: for i in range(args.num_classes): output[i] = output[i] * mlmt_preds[index][i] output = output.transpose(1, 2, 0) output = np.asarray(np.argmax(output, axis=2), dtype=np.int) if args.save_output_images: if args.dataset == 'pascal_voc': filename = os.path.join(args.save_dir, '{}.png'.format(name[0])) color_file = Image.fromarray( colorize(output).transpose(1, 2, 0), 'RGB') color_file.save(filename) elif args.dataset == 'pascal_context': filename = os.path.join(args.save_dir, filename[0]) scipy.misc.imsave(filename, gt) data_list.append([gt.flatten(), output.flatten()]) filename = os.path.join(args.save_dir, 'result.txt') get_iou(args, data_list, args.num_classes, filename)
def __init__(self, up_channels, ref_channels, out_channels, block_name='residual', upper='interpolation', norm_type=nn.BatchNorm3d, act_type=nn.ReLU, se_type=None, drop_type=None, num_blocks=1): super(UpBlock, self).__init__() assert upper in [ 'interpolation', 'upsample', 'convt' ], "only 'interpolation'|'upsample'|'convt' supported" self.upper = upper inner_channels = up_channels // 2 if self.upper == 'interpolation': self.up_conv = ConvBnAct3d(up_channels, inner_channels, norm_type=norm_type, act_type=act_type) elif self.upper == 'upsample': self.up_conv = nn.Sequential( nn.Upsample(scale_factor=2, mode='nearest'), ConvBnAct3d(up_channels, inner_channels, norm_type=norm_type, act_type=act_type)) elif self.upper == 'convt': self.up_conv = nn.Sequential( nn.ConvTranspose3d(up_channels, inner_channels, kernel_size=2, stride=2, bias=False), ConvBnAct3d(inner_channels, inner_channels, kernel_size=1, padding=0, norm_type=norm_type, act_type=act_type)) self.trans_conv = ConvBnAct3d(inner_channels + ref_channels, out_channels, kernel_size=1, padding=0, norm_type=norm_type, act_type=act_type) self.drop = Drop(drop_type) if block_name == 'residual': block = ResBlock(out_channels, norm_type=norm_type, act_type=act_type, se_type=se_type) elif block_name == 'bottleneck': block = BottleNeck(out_channels, norm_type=norm_type, act_type=act_type, se_type=se_type) elif block_name == 'sk': block = SK_Block(out_channels, out_channels, norm_type=norm_type, act_type=act_type) else: raise NotImplementedError('{} not implemented'.format(block_name)) layers = [] for i in range(num_blocks): layers.append(block) self.res_block = nn.Sequential(*layers)
def __init__(self, img_size=300, num_class=2, nc=64, mode='unet', normrecon=True): super().__init__() assert mode in ['unet', 'ae', 'cnn'] self.all_activations = [] self.mode = mode # Auxilary layers self.maxpool = nn.MaxPool2d(2) self.activation = nn.ReLU(inplace=True) # Architecture. l1_size = self._calc_layer_size(nc * 1, img_size // 1) l2_size = self._calc_layer_size(nc * 2, img_size // 2) l3_size = self._calc_layer_size(nc * 4, img_size // 4) l4_size = self._calc_layer_size(nc * 8, img_size // 8) # Convolve down to bottleneck self.dconv_down1 = DeconvBlock(1, nc * 1, img_size) self.dconv_down2 = DeconvBlock(nc * 1, nc * 2, self.dconv_down1.output_size) self.dconv_down3 = DeconvBlock(nc * 2, nc * 4, self.dconv_down2.output_size) self.dconv_down4 = DeconvBlock(nc * 4, nc * 8, self.dconv_down3.output_size) # Make prediction off of bottleneck self.fc1 = nn.Linear(l4_size, num_class) # Upsample from bottleneck (for reconstruction: ae and unet) if mode != 'cnn': self.upsample3 = nn.Upsample(size=(img_size // 4, img_size // 4), mode='bilinear', align_corners=True) if mode == 'unet': in_ch_size = nc * 4 + nc * 8 elif mode == 'ae': in_ch_size = nc * 8 self.dconv_up3 = DeconvBlock(in_ch_size, nc * 4, self.dconv_down4.output_size) self.upsample2 = nn.Upsample(size=(img_size // 2, img_size // 2), mode='bilinear', align_corners=True) if mode == 'unet': in_ch_size = nc * 2 + nc * 4 elif mode == 'ae': in_ch_size = nc * 4 self.dconv_up2 = DeconvBlock(in_ch_size, nc * 2, self.dconv_down4.output_size) self.upsample1 = nn.Upsample(size=(img_size, img_size), mode='bilinear', align_corners=True) if mode == 'unet': in_ch_size = nc * 1 + nc * 2 elif mode == 'ae': in_ch_size = nc * 2 self.dconv_up1 = DeconvBlock(in_ch_size, nc * 1, self.dconv_down4.output_size) if normrecon: self.conv_last = nn.Sequential(nn.Conv2d(nc, 1, 1), nn.Sigmoid()) else: self.conv_last = nn.Conv2d(nc, 1, 1)
def train_minent(model, trainloader, targetloader, cfg): ''' UDA training with minEnt ''' # Create the model and start the training. input_size_source = cfg.TRAIN.INPUT_SIZE_SOURCE input_size_target = cfg.TRAIN.INPUT_SIZE_TARGET device = cfg.GPU_ID num_classes = cfg.NUM_CLASSES viz_tensorboard = os.path.exists(cfg.TRAIN.TENSORBOARD_LOGDIR) if viz_tensorboard: writer = SummaryWriter(log_dir=cfg.TRAIN.TENSORBOARD_LOGDIR) # SEGMNETATION NETWORK model.train() model.to(device) cudnn.benchmark = True cudnn.enabled = True # OPTIMIZERS # segnet's optimizer optimizer = optim.SGD(model.optim_parameters(cfg.TRAIN.LEARNING_RATE), lr=cfg.TRAIN.LEARNING_RATE, momentum=cfg.TRAIN.MOMENTUM, weight_decay=cfg.TRAIN.WEIGHT_DECAY) # interpolate output segmaps interp = nn.Upsample(size=(input_size_source[1], input_size_source[0]), mode='bilinear', align_corners=True) interp_target = nn.Upsample(size=(input_size_target[1], input_size_target[0]), mode='bilinear', align_corners=True) trainloader_iter = enumerate(trainloader) targetloader_iter = enumerate(targetloader) for i_iter in tqdm(range(cfg.TRAIN.EARLY_STOP)): # reset optimizers optimizer.zero_grad() # adapt LR if needed adjust_learning_rate(optimizer, i_iter, cfg) # UDA Training # train on source _, batch = trainloader_iter.__next__() images_source, labels, _, _ = batch pred_src_aux, pred_src_main = model(images_source.cuda(device)) if cfg.TRAIN.MULTI_LEVEL: pred_src_aux = interp(pred_src_aux) loss_seg_src_aux = loss_calc(pred_src_aux, labels, device) else: loss_seg_src_aux = 0 pred_src_main = interp(pred_src_main) loss_seg_src_main = loss_calc(pred_src_main, labels, device) loss = (cfg.TRAIN.LAMBDA_SEG_MAIN * loss_seg_src_main + cfg.TRAIN.LAMBDA_SEG_AUX * loss_seg_src_aux) loss.backward() # adversarial training with minent _, batch = targetloader_iter.__next__() images, _, _, _ = batch pred_trg_aux, pred_trg_main = model(images.cuda(device)) pred_trg_aux = interp_target(pred_trg_aux) pred_trg_main = interp_target(pred_trg_main) pred_prob_trg_aux = F.softmax(pred_trg_aux) pred_prob_trg_main = F.softmax(pred_trg_main) loss_target_entp_aux = entropy_loss(pred_prob_trg_aux) loss_target_entp_main = entropy_loss(pred_prob_trg_main) loss = (cfg.TRAIN.LAMBDA_ENT_AUX * loss_target_entp_aux + cfg.TRAIN.LAMBDA_ENT_MAIN * loss_target_entp_main) loss.backward() optimizer.step() current_losses = {'loss_seg_src_aux': loss_seg_src_aux, 'loss_seg_src_main': loss_seg_src_main, 'loss_ent_aux': loss_target_entp_aux, 'loss_ent_main': loss_target_entp_main} print_losses(current_losses, i_iter) if i_iter % cfg.TRAIN.SAVE_PRED_EVERY == 0 and i_iter != 0: print('taking snapshot ...') print('exp =', cfg.TRAIN.SNAPSHOT_DIR) torch.save(model.state_dict(), osp.join(cfg.TRAIN.SNAPSHOT_DIR, f'model_{i_iter}.pth')) if i_iter >= cfg.TRAIN.EARLY_STOP - 1: break sys.stdout.flush() # Visualize with tensorboard if viz_tensorboard: log_losses_tensorboard(writer, current_losses, i_iter) if i_iter % cfg.TRAIN.TENSORBOARD_VIZRATE == cfg.TRAIN.TENSORBOARD_VIZRATE - 1: draw_in_tensorboard(writer, images, i_iter, pred_trg_main, num_classes, 'T') draw_in_tensorboard(writer, images_source, i_iter, pred_src_main, num_classes, 'S')
def __init__(self): super(Net, self).__init__() # backbone self.resnet50 = resnet50.resnet50(pretrained=True, strides=[2, 2, 2, 1]) self.stage1 = nn.Sequential(self.resnet50.conv1, self.resnet50.bn1, self.resnet50.relu, self.resnet50.maxpool) self.stage2 = nn.Sequential(self.resnet50.layer1) self.stage3 = nn.Sequential(self.resnet50.layer2) self.stage4 = nn.Sequential(self.resnet50.layer3) self.stage5 = nn.Sequential(self.resnet50.layer4) self.mean_shift = Net.MeanShift(2) # branch: class boundary detection self.fc_edge1 = nn.Sequential( nn.Conv2d(64, 32, 1, bias=False), nn.GroupNorm(4, 32), nn.ReLU(inplace=True), ) self.fc_edge2 = nn.Sequential( nn.Conv2d(256, 32, 1, bias=False), nn.GroupNorm(4, 32), nn.ReLU(inplace=True), ) self.fc_edge3 = nn.Sequential( nn.Conv2d(512, 32, 1, bias=False), nn.GroupNorm(4, 32), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False), nn.ReLU(inplace=True), ) self.fc_edge4 = nn.Sequential( nn.Conv2d(1024, 32, 1, bias=False), nn.GroupNorm(4, 32), nn.Upsample(scale_factor=4, mode='bilinear', align_corners=False), nn.ReLU(inplace=True), ) self.fc_edge5 = nn.Sequential( nn.Conv2d(2048, 32, 1, bias=False), nn.GroupNorm(4, 32), nn.Upsample(scale_factor=4, mode='bilinear', align_corners=False), nn.ReLU(inplace=True), ) self.fc_edge6 = nn.Conv2d(160, 1, 1, bias=True) # branch: displacement field self.fc_dp1 = nn.Sequential( nn.Conv2d(64, 64, 1, bias=False), nn.GroupNorm(8, 64), nn.ReLU(inplace=True), ) self.fc_dp2 = nn.Sequential( nn.Conv2d(256, 128, 1, bias=False), nn.GroupNorm(16, 128), nn.ReLU(inplace=True), ) self.fc_dp3 = nn.Sequential( nn.Conv2d(512, 256, 1, bias=False), nn.GroupNorm(16, 256), nn.ReLU(inplace=True), ) self.fc_dp4 = nn.Sequential( nn.Conv2d(1024, 256, 1, bias=False), nn.GroupNorm(16, 256), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False), nn.ReLU(inplace=True), ) self.fc_dp5 = nn.Sequential( nn.Conv2d(2048, 256, 1, bias=False), nn.GroupNorm(16, 256), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False), nn.ReLU(inplace=True), ) self.fc_dp6 = nn.Sequential( nn.Conv2d(768, 256, 1, bias=False), nn.GroupNorm(16, 256), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False), nn.ReLU(inplace=True), ) self.fc_dp7 = nn.Sequential( nn.Conv2d(448, 256, 1, bias=False), nn.GroupNorm(16, 256), nn.ReLU(inplace=True), nn.Conv2d(256, 2, 1, bias=False), self.mean_shift ) self.backbone = nn.ModuleList([self.stage1, self.stage2, self.stage3, self.stage4, self.stage5]) self.edge_layers = nn.ModuleList([self.fc_edge1, self.fc_edge2, self.fc_edge3, self.fc_edge4, self.fc_edge5, self.fc_edge6]) self.dp_layers = nn.ModuleList([self.fc_dp1, self.fc_dp2, self.fc_dp3, self.fc_dp4, self.fc_dp5, self.fc_dp6, self.fc_dp7])
def __init__(self, n_class=19, in_size=(448, 896), num_layers=128, in_chns=32, squeeze_ratio=1.0 / 32, out_chns=1, dilate_sec=(1, 2, 4, 8, 4, 2), aspp_sec=(24, 48, 72), norm_act=InPlaceABN): """ MixedScaleDenseNet: Mixed Scale Dense Network :param n_class: (int) Number of classes :param in_size: (tuple or int) Size of the input image feed to the network :param num_layers: (int) Number of layers used in the mixed scale dense block/stage :param in_chns: (int) Input channels of the mixed scale dense block/stage :param out_chns: (int) Output channels of each Conv used in the mixed scale dense block/stage :param dilate_sec: (tuple) Dilation rates used in the mixed scale dense block/stage :param aspp_sec: (tuple) Dilation rates used in ASPP :param norm_act: (object) Batch Norm Activation Type """ super(MixedScaleDenseNet, self).__init__() self.n_classes = n_class self.conv_in = nn.Sequential( OrderedDict([("conv", nn.Conv2d(in_channels=3, out_channels=in_chns, kernel_size=7, stride=2, padding=3, bias=False)), ("norm", norm_act(in_chns)), ("pool", nn.MaxPool2d(3, stride=2, padding=1))])) self.dense = DenseModule(in_chns, squeeze_ratio, out_chns, num_layers, dilate_sec=dilate_sec, norm_act=norm_act) self.last_channel = self.dense.out_channels # in_chns + num_layers * out_chns # Pooling and predictor self.feat_out = norm_act(self.last_channel) self.out_se = nn.Sequential( SCSEBlock(channel=self.last_channel, reduction=16)) if self.n_classes != 0: self.aspp = nn.Sequential( ASPPInPlaceABNBlock(self.last_channel, self.last_channel, feat_res=(int(in_size[0] / 4), int(in_size[1] / 4)), aspp_sec=aspp_sec, norm_act=norm_act)) self.score_se = nn.Sequential( SCSEBlock(channel=self.last_channel, reduction=16)) self.score = nn.Sequential( OrderedDict([("norm.1", norm_act(self.last_channel)), ("conv.1", nn.Conv2d(self.last_channel, self.last_channel, kernel_size=3, stride=1, padding=2, dilation=2, bias=False)), ("norm.2", norm_act(self.last_channel)), ("conv.2", nn.Conv2d(self.last_channel, self.n_classes, kernel_size=1, stride=1, padding=0, bias=True)), ("up1", nn.Upsample(size=in_size, mode='bilinear'))]))
def val(): """Create the models and start the evaluation process.""" args = get_arguments() # gpu0 = args.gpu # os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu h, w = args.input_size, args.input_size if args.whole: if args.data_set == 'pascalvoc': input_size = (320, 480) else: input_size = (1024, 2048) else: input_size = (h, w) import libs.models as models model = models.__dict__[args.arch](num_classes=args.num_classes) saved_state_dict = torch.load(args.restore_from) model.load_state_dict(saved_state_dict,strict=False) # print("Model: " + args.arch + " Restoring from: " + args.restore_from) model.eval() model.cuda() if args.rgb == 1: IMG_MEAN = np.array((0.485, 0.456, 0.406), dtype=np.float32) IMG_VARS = np.array((0.229, 0.224, 0.225), dtype=np.float32) else: IMG_MEAN = np.array((104.00698793, 116.66876762, 122.67891434), dtype=np.float32) IMG_VARS = np.array((1, 1, 1), dtype=np.float32) if args.data_set == 'cityscapes': dataset = Cityscapes(args.data_dir, args.data_list, crop_size=(1024, 2048), mean=IMG_MEAN, vars=IMG_VARS, scale=False, mirror=False, RGB=args.rgb) elif args.data_set == 'pascalvoc': dataset = VOCSegmentation(args.data_dir, image_set = 'train', scale = False, mean=IMG_MEAN, vars = IMG_VARS) testloader = data.DataLoader(dataset, batch_size=1, shuffle=False, pin_memory=True) print("testloader: " + str(len(testloader))) confusion_matrix = np.zeros((args.num_classes, args.num_classes)) palette = get_palette(256) if args.data_set == 'pascalvoc': interp = nn.Upsample(size=(320, 480), mode='bilinear', align_corners=True) else: interp = nn.Upsample(size=(1024, 2048), mode='bilinear', align_corners=True) output_images = os.path.join(args.output_dir, "./images") output_results = os.path.join(args.output_dir, "./result") if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) if not os.path.exists(output_images): os.makedirs(output_images) if not os.path.exists(output_results): os.makedirs(output_results) for index, batch in enumerate(testloader): if index % 100 == 0: print('%d processd' % (index)) image, label = batch if args.data_set == 'pascalvoc': image = image.numpy() label = label.numpy() # print(image.shape) # print(label.shape) # sys.exit() with torch.no_grad(): if args.whole: output = predict_multiscale(model, image, input_size, [1.0], args.num_classes, False) else: output = predict_sliding(model, image.numpy(), input_size, args.num_classes, True) # print(output.shape) seg_pred = np.asarray(np.argmax(output, axis=2), dtype=np.uint8) output_im = PILImage.fromarray(seg_pred) output_im.putpalette(palette) if args.data_set == 'cityscapes': seg_gt = np.asarray(label[0].numpy(), dtype=np.int) else: seg_gt = np.asarray(label[0], dtype=np.int) # print(seg_pred.shape) # (320, 480) # print(seg_gt.shape) # (320, 480) # sys.exit() ignore_index = seg_gt != 255 # print(ignore_index) # sys.exit() seg_gt = seg_gt[ignore_index] seg_pred = seg_pred[ignore_index] confusion_matrix += get_confusion_matrix(seg_gt, seg_pred, args.num_classes) pos = confusion_matrix.sum(1) res = confusion_matrix.sum(0) tp = np.diag(confusion_matrix) IU_array = (tp / np.maximum(1.0, pos + res - tp)) mean_IU = IU_array.mean() print({'meanIU': mean_IU, 'IU_array': IU_array}) with open(os.path.join(args.output_dir, "result", 'result.txt'), 'w') as f: f.write(json.dumps({'meanIU': mean_IU, 'IU_array': IU_array.tolist()}))
def main(): """Create the model and start the evaluation process.""" args = get_arguments() cfg = fromfile(args.config) # gpu0 = args.gpu os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu h, w = map(int, args.input_size.split(',')) if args.whole: input_size = (1024, 2048) else: input_size = (h, w) model = Res_Deeplab(cfg.model, cfg.data_cfg.num_classes) for i in range(59500, 60001, 100): restore_from = 'snapshots/CS_scenes_%s.pth' % i saved_state_dict = torch.load(restore_from) model.load_state_dict(saved_state_dict) model.eval() model.cuda() testloader = data.DataLoader(CSDataSet(args.data_dir, args.data_list, crop_size=(1024, 2048), mean=IMG_MEAN, scale=False, mirror=False, use_zip=args.use_zip), batch_size=1, shuffle=False, pin_memory=True) data_list = [] confusion_matrix = np.zeros((args.num_classes, args.num_classes)) palette = get_palette(256) interp = nn.Upsample(size=(1024, 2048), mode='bilinear', align_corners=True) if not os.path.exists('outputs'): os.makedirs('outputs') for index, batch in enumerate(testloader): if index % 100 == 0: print('Time %s, %d processd' % (time.strftime("%Y-%m-%d %H:%M:%S"), index)) image, label, size, name = batch size = size[0].numpy() with torch.no_grad(): if args.whole: output = predict_multiscale(model, image, input_size, [0.75, 1.0, 1.25], args.num_classes, True, args.recurrence) else: output = predict_sliding(model, image.numpy(), input_size, args.num_classes, True, args.recurrence) # padded_prediction = model(Variable(image, volatile=True).cuda()) # output = interp(padded_prediction).cpu().data[0].numpy().transpose(1,2,0) seg_pred = np.asarray(np.argmax(output, axis=2), dtype=np.uint8) output_im = PILImage.fromarray(seg_pred) output_im.putpalette(palette) output_im.save('outputs/' + name[0] + '.png') seg_gt = np.asarray(label[0].numpy()[:size[0], :size[1]], dtype=np.int) ignore_index = seg_gt != 255 seg_gt = seg_gt[ignore_index] seg_pred = seg_pred[ignore_index] # show_all(gt, output) confusion_matrix += get_confusion_matrix(seg_gt, seg_pred, args.num_classes) pos = confusion_matrix.sum(1) res = confusion_matrix.sum(0) tp = np.diag(confusion_matrix) IU_array = (tp / np.maximum(1.0, pos + res - tp)) mean_IU = IU_array.mean() # getConfusionMatrixPlot(confusion_matrix) print({'meanIU': mean_IU, 'IU_array': IU_array}) with open('result.txt', 'a') as f: f.write( json.dumps({ 'meanIU': mean_IU, 'IU_array': IU_array.tolist() }))
def __init__(self, upscale_factor=2): super(PyTorchTestModel, self).__init__() self.m = nn.Upsample( scale_factor=upscale_factor, mode="nearest", )
def __init__(self, activation='ReLU', pooling='max'): super(UNet, self).__init__() if activation == 'ReLU': activation_function = nn.ReLU() elif activation == 'LeakyReLU': activation_function = nn.LeakyReLU() else: raise NotImplementedError if pooling == 'max': pooling_layer = nn.MaxPool2d(kernel_size=2, stride=2) else: raise NotImplementedError # ----------------- Encoder ------------------- self.conv1 = nn.Sequential(nn.Conv2d(1, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), activation_function, nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), activation_function, pooling_layer) self.conv2 = nn.Sequential(nn.Conv2d(32, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), activation_function, nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), activation_function, pooling_layer, nn.Dropout2d(p=0.3)) self.conv3 = nn.Sequential( nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), activation_function, nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), activation_function, pooling_layer, nn.Dropout2d(p=0.3)) self.conv4 = nn.Sequential(nn.Conv2d(128, 1, kernel_size=3, padding=1), ) self.encoder = nn.Sequential(self.conv1, self.conv2, self.conv3, self.conv4) # --------------------------------------------- # ----------------- Decoder ------------------- self.conv5 = nn.Sequential(nn.Conv2d(1, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), activation_function) self.conv6 = nn.Sequential( nn.Upsample(scale_factor=2, mode='bilinear'), nn.Conv2d(128, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), activation_function, nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), activation_function, nn.Dropout2d(p=0.3)) self.conv7 = nn.Sequential( nn.Upsample(scale_factor=2, mode='bilinear'), nn.Conv2d(64, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), activation_function, nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), activation_function, nn.Dropout2d(p=0.3)) self.conv8 = nn.Sequential( nn.Upsample(scale_factor=2, mode='bilinear'), nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), activation_function, nn.Conv2d(32, 1, kernel_size=3, padding=1)) self.decoder = nn.Sequential(self.conv5, self.conv6, self.conv7, self.conv8)
def __init__(self, in_ch, out_ch, res_ch=0, scale_factor=2, mode="nearest", batch_norm=True, rep=1, index=0): ''' Constructor ''' super(DarknetUpsampling, self).__init__() # Parameters self.in_channels = in_ch self.out_channels = out_ch self.res_channels = res_ch self.repetitions = rep self.batch_norm = batch_norm self.scale_factor = scale_factor self.mode = mode self.index_in = index # Set Conv in block self.conv_in = DarknetConvBlock(self.in_channels, self.out_channels, kernel_size=1, stride=1, padding=0, batch_norm=batch_norm, index=index) # Upsampling block if self.mode in ["linear", "bilinear", "bicubic", "trilinear"]: up_layer = nn.Upsample(scale_factor=self.scale_factor, mode=self.mode, align_corners=True) else: up_layer = nn.Upsample(scale_factor=self.scale_factor, mode=self.mode) self.upsample = nn.Sequential() self.upsample.add_module("upsample_{0}".format(index), up_layer) # Set output conv block self.conv_loop = nn.Sequential() for idx in range(1, rep + 1): index += 1 # Change input and output according the pair of convs (1x1 and 3x3) if idx == 1: init_vol = self.out_channels + self.res_channels final_vol = self.out_channels filter_sz = 1 pad = 0 elif idx % 2 == 1: init_vol = self.in_channels final_vol = self.out_channels filter_sz = 1 pad = 0 else: init_vol = self.out_channels final_vol = self.in_channels filter_sz = 3 pad = 1 # Conv layer conv_module = DarknetConvBlock(init_vol, final_vol, kernel_size=filter_sz, stride=1, padding=pad, batch_norm=batch_norm, index=index) self.conv_loop.add_module("conv_block_{0}".format(idx), conv_module) self.index_out = index