def __init__(self, d): super(decoder5, self).__init__() # decoder self.reflecPad15 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv15 = nn.Conv2d(512, 512, 3, 1, 0) self.conv15.weight = torch.nn.Parameter( torch.Tensor(d.modules[1].weight)) self.conv15.bias = torch.nn.Parameter(torch.Tensor(d.modules[1].bias)) self.relu15 = nn.ReLU(inplace=True) self.unpool = nn.UpsamplingNearest2d(scale_factor=2) # 28 x 28 self.reflecPad16 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv16 = nn.Conv2d(512, 512, 3, 1, 0) self.conv16.weight = torch.nn.Parameter( torch.Tensor(d.modules[5].weight)) self.conv16.bias = torch.nn.Parameter(torch.Tensor(d.modules[5].bias)) self.relu16 = nn.ReLU(inplace=True) # 28 x 28 self.reflecPad17 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv17 = nn.Conv2d(512, 512, 3, 1, 0) self.conv17.weight = torch.nn.Parameter( torch.Tensor(d.modules[8].weight)) self.conv17.bias = torch.nn.Parameter(torch.Tensor(d.modules[8].bias)) self.relu17 = nn.ReLU(inplace=True) # 28 x 28 self.reflecPad18 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv18 = nn.Conv2d(512, 512, 3, 1, 0) self.conv18.weight = torch.nn.Parameter( torch.Tensor(d.modules[11].weight)) self.conv18.bias = torch.nn.Parameter(torch.Tensor(d.modules[11].bias)) self.relu18 = nn.ReLU(inplace=True) # 28 x 28 self.reflecPad19 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv19 = nn.Conv2d(512, 256, 3, 1, 0) self.conv19.weight = torch.nn.Parameter( torch.Tensor(d.modules[14].weight)) self.conv19.bias = torch.nn.Parameter(torch.Tensor(d.modules[14].bias)) self.relu19 = nn.ReLU(inplace=True) # 28 x 28 self.unpool2 = nn.UpsamplingNearest2d(scale_factor=2) # 56 x 56 self.reflecPad20 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv20 = nn.Conv2d(256, 256, 3, 1, 0) self.conv20.weight = torch.nn.Parameter( torch.Tensor(d.modules[18].weight)) self.conv20.bias = torch.nn.Parameter(torch.Tensor(d.modules[18].bias)) self.relu20 = nn.ReLU(inplace=True) # 56 x 56 self.reflecPad21 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv21 = nn.Conv2d(256, 256, 3, 1, 0) self.conv21.weight = torch.nn.Parameter( torch.Tensor(d.modules[21].weight)) self.conv21.bias = torch.nn.Parameter(torch.Tensor(d.modules[21].bias)) self.relu21 = nn.ReLU(inplace=True) self.reflecPad22 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv22 = nn.Conv2d(256, 256, 3, 1, 0) self.conv22.weight = torch.nn.Parameter( torch.Tensor(d.modules[24].weight)) self.conv22.bias = torch.nn.Parameter(torch.Tensor(d.modules[24].bias)) self.relu22 = nn.ReLU(inplace=True) self.reflecPad23 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv23 = nn.Conv2d(256, 128, 3, 1, 0) self.conv23.weight = torch.nn.Parameter( torch.Tensor(d.modules[27].weight)) self.conv23.bias = torch.nn.Parameter(torch.Tensor(d.modules[27].bias)) self.relu23 = nn.ReLU(inplace=True) self.unpool3 = nn.UpsamplingNearest2d(scale_factor=2) # 112 X 112 self.reflecPad24 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv24 = nn.Conv2d(128, 128, 3, 1, 0) self.conv24.weight = torch.nn.Parameter( torch.Tensor(d.modules[31].weight)) self.conv24.bias = torch.nn.Parameter(torch.Tensor(d.modules[31].bias)) self.relu24 = nn.ReLU(inplace=True) self.reflecPad25 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv25 = nn.Conv2d(128, 64, 3, 1, 0) self.conv25.weight = torch.nn.Parameter( torch.Tensor(d.modules[34].weight)) self.conv25.bias = torch.nn.Parameter(torch.Tensor(d.modules[34].bias)) self.relu25 = nn.ReLU(inplace=True) self.unpool4 = nn.UpsamplingNearest2d(scale_factor=2) self.reflecPad26 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv26 = nn.Conv2d(64, 64, 3, 1, 0) self.conv26.weight = torch.nn.Parameter( torch.Tensor(d.modules[38].weight)) self.conv26.bias = torch.nn.Parameter(torch.Tensor(d.modules[38].bias)) self.relu26 = nn.ReLU(inplace=True) self.reflecPad27 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv27 = nn.Conv2d(64, 3, 3, 1, 0) self.conv27.weight = torch.nn.Parameter( torch.Tensor(d.modules[41].weight)) self.conv27.bias = torch.nn.Parameter(torch.Tensor(d.modules[41].bias))
def build_layers( img_sz, img_fm, init_fm, max_fm, n_layers, n_attr, n_skip, deconv_method, instance_norm, enc_dropout, dec_dropout, ): """ Build auto-encoder layers. """ assert init_fm <= max_fm assert n_skip <= n_layers - 1 assert np.log2(img_sz).is_integer() assert n_layers <= int(np.log2(img_sz)) assert type(instance_norm) is bool assert 0 <= enc_dropout < 1 assert 0 <= dec_dropout < 1 norm_fn = nn.InstanceNorm2d if instance_norm else nn.BatchNorm2d enc_layers = [] dec_layers = [] n_in = img_fm n_out = init_fm for i in range(n_layers): enc_layer = [] dec_layer = [] skip_connection = n_layers - (n_skip + 1) <= i < n_layers - 1 n_dec_in = n_out + n_attr + (n_out if skip_connection else 0) n_dec_out = n_in # encoder layer enc_layer.append(nn.Conv2d(n_in, n_out, 4, 2, 1)) if i > 0: enc_layer.append(norm_fn(n_out, affine=True)) enc_layer.append(nn.LeakyReLU(0.2, inplace=True)) if enc_dropout > 0: enc_layer.append(nn.Dropout(enc_dropout)) # decoder layer if deconv_method == "upsampling": dec_layer.append(nn.UpsamplingNearest2d(scale_factor=2)) dec_layer.append(nn.Conv2d(n_dec_in, n_dec_out, 3, 1, 1)) elif deconv_method == "convtranspose": dec_layer.append( nn.ConvTranspose2d(n_dec_in, n_dec_out, 4, 2, 1, bias=False)) else: assert deconv_method == "pixelshuffle" dec_layer.append(nn.Conv2d(n_dec_in, n_dec_out * 4, 3, 1, 1)) dec_layer.append(nn.PixelShuffle(2)) if i > 0: dec_layer.append(norm_fn(n_dec_out, affine=True)) if dec_dropout > 0 and i >= n_layers - 3: dec_layer.append(nn.Dropout(dec_dropout)) dec_layer.append(nn.ReLU(inplace=True)) else: pass # dec_layer.append(nn.Tanh()) # update n_in = n_out n_out = min(2 * n_out, max_fm) enc_layers.append(nn.Sequential(*enc_layer)) dec_layers.insert(0, nn.Sequential(*dec_layer)) return enc_layers, dec_layers
def __init__(self): super(decoder5, self).__init__() # decoder self.reflecPad15 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv15 = nn.Conv2d(512, 512, 3, 1, 0) self.relu15 = nn.ReLU(inplace=True) self.unpool = nn.UpsamplingNearest2d(scale_factor=2) # 28 x 28 self.reflecPad16 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv16 = nn.Conv2d(512, 512, 3, 1, 0) self.relu16 = nn.ReLU(inplace=True) # 28 x 28 self.reflecPad17 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv17 = nn.Conv2d(512, 512, 3, 1, 0) self.relu17 = nn.ReLU(inplace=True) # 28 x 28 self.reflecPad18 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv18 = nn.Conv2d(512, 512, 3, 1, 0) self.relu18 = nn.ReLU(inplace=True) # 28 x 28 self.reflecPad19 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv19 = nn.Conv2d(512, 256, 3, 1, 0) self.relu19 = nn.ReLU(inplace=True) # 28 x 28 self.unpool2 = nn.UpsamplingNearest2d(scale_factor=2) # 56 x 56 self.reflecPad20 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv20 = nn.Conv2d(256, 256, 3, 1, 0) self.relu20 = nn.ReLU(inplace=True) # 56 x 56 self.reflecPad21 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv21 = nn.Conv2d(256, 256, 3, 1, 0) self.relu21 = nn.ReLU(inplace=True) self.reflecPad22 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv22 = nn.Conv2d(256, 256, 3, 1, 0) self.relu22 = nn.ReLU(inplace=True) self.reflecPad23 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv23 = nn.Conv2d(256, 128, 3, 1, 0) self.relu23 = nn.ReLU(inplace=True) self.unpool3 = nn.UpsamplingNearest2d(scale_factor=2) # 112 X 112 self.reflecPad24 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv24 = nn.Conv2d(128, 128, 3, 1, 0) self.relu24 = nn.ReLU(inplace=True) self.reflecPad25 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv25 = nn.Conv2d(128, 64, 3, 1, 0) self.relu25 = nn.ReLU(inplace=True) self.unpool4 = nn.UpsamplingNearest2d(scale_factor=2) self.reflecPad26 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv26 = nn.Conv2d(64, 64, 3, 1, 0) self.relu26 = nn.ReLU(inplace=True) self.reflecPad27 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv27 = nn.Conv2d(64, 3, 3, 1, 0)
def deconv_block(in_dim, out_dim): return nn.Sequential( nn.Conv2d(in_dim, out_dim, kernel_size=3, stride=1, padding=1), nn.ELU(True), nn.Conv2d(out_dim, out_dim, kernel_size=3, stride=1, padding=1), nn.ELU(True), nn.UpsamplingNearest2d(scale_factor=2))
def __init__(self,d): super(decoder,self).__init__() # decoder self.reflecPad15 = nn.ReflectionPad2d((1,1,1,1)) self.conv15 = nn.Conv2d(512,512,3,1,0) self.conv15.weight = torch.nn.Parameter(d.get(1).weight.float()) self.conv15.bias = torch.nn.Parameter(d.get(1).bias.float()) self.relu15 = nn.ReLU(inplace=True) self.unpool = nn.UpsamplingNearest2d(scale_factor=2) # 28 x 28 self.reflecPad16 = nn.ReflectionPad2d((1,1,1,1)) self.conv16 = nn.Conv2d(512,512,3,1,0) self.conv16.weight = torch.nn.Parameter(d.get(5).weight.float()) self.conv16.bias = torch.nn.Parameter(d.get(5).bias.float()) self.relu16 = nn.ReLU(inplace=True) # 28 x 28 self.reflecPad17 = nn.ReflectionPad2d((1,1,1,1)) self.conv17 = nn.Conv2d(512,512,3,1,0) self.conv17.weight = torch.nn.Parameter(d.get(8).weight.float()) self.conv17.bias = torch.nn.Parameter(d.get(8).bias.float()) self.relu17 = nn.ReLU(inplace=True) # 28 x 28 self.reflecPad18 = nn.ReflectionPad2d((1,1,1,1)) self.conv18 = nn.Conv2d(512,512,3,1,0) self.conv18.weight = torch.nn.Parameter(d.get(11).weight.float()) self.conv18.bias = torch.nn.Parameter(d.get(11).bias.float()) self.relu18 = nn.ReLU(inplace=True) # 28 x 28 self.reflecPad19 = nn.ReflectionPad2d((1,1,1,1)) self.conv19 = nn.Conv2d(512,256,3,1,0) self.conv19.weight = torch.nn.Parameter(d.get(14).weight.float()) self.conv19.bias = torch.nn.Parameter(d.get(14).bias.float()) self.relu19 = nn.ReLU(inplace=True) # 28 x 28 self.unpool2 = nn.UpsamplingNearest2d(scale_factor=2) # 56 x 56 self.reflecPad20 = nn.ReflectionPad2d((1,1,1,1)) self.conv20 = nn.Conv2d(256,256,3,1,0) self.conv20.weight = torch.nn.Parameter(d.get(18).weight.float()) self.conv20.bias = torch.nn.Parameter(d.get(18).bias.float()) self.relu20 = nn.ReLU(inplace=True) # 56 x 56 self.reflecPad21 = nn.ReflectionPad2d((1,1,1,1)) self.conv21 = nn.Conv2d(256,256,3,1,0) self.conv21.weight = torch.nn.Parameter(d.get(21).weight.float()) self.conv21.bias = torch.nn.Parameter(d.get(21).bias.float()) self.relu21 = nn.ReLU(inplace=True) self.reflecPad22 = nn.ReflectionPad2d((1,1,1,1)) self.conv22 = nn.Conv2d(256,256,3,1,0) self.conv22.weight = torch.nn.Parameter(d.get(24).weight.float()) self.conv22.bias = torch.nn.Parameter(d.get(24).bias.float()) self.relu22 = nn.ReLU(inplace=True) self.reflecPad23 = nn.ReflectionPad2d((1,1,1,1)) self.conv23 = nn.Conv2d(256,128,3,1,0) self.conv23.weight = torch.nn.Parameter(d.get(27).weight.float()) self.conv23.bias = torch.nn.Parameter(d.get(27).bias.float()) self.relu23 = nn.ReLU(inplace=True) self.unpool3 = nn.UpsamplingNearest2d(scale_factor=2) # 112 X 112 self.reflecPad24 = nn.ReflectionPad2d((1,1,1,1)) self.conv24 = nn.Conv2d(128,128,3,1,0) self.conv24.weight = torch.nn.Parameter(d.get(31).weight.float()) self.conv24.bias = torch.nn.Parameter(d.get(31).bias.float()) self.relu24 = nn.ReLU(inplace=True) self.reflecPad25 = nn.ReflectionPad2d((1,1,1,1)) self.conv25 = nn.Conv2d(128,64,3,1,0) self.conv25.weight = torch.nn.Parameter(d.get(34).weight.float()) self.conv25.bias = torch.nn.Parameter(d.get(34).bias.float()) self.relu25 = nn.ReLU(inplace=True) self.unpool4 = nn.UpsamplingNearest2d(scale_factor=2) self.reflecPad26 = nn.ReflectionPad2d((1,1,1,1)) self.conv26 = nn.Conv2d(64,64,3,1,0) self.conv26.weight = torch.nn.Parameter(d.get(38).weight.float()) self.conv26.bias = torch.nn.Parameter(d.get(38).bias.float()) self.relu26 = nn.ReLU(inplace=True) self.reflecPad27 = nn.ReflectionPad2d((1,1,1,1)) self.conv27 = nn.Conv2d(64,3,3,1,0) self.conv27.weight = torch.nn.Parameter(d.get(41).weight.float()) self.conv27.bias = torch.nn.Parameter(d.get(41).bias.float())
def detect_object_in_image(net_model, pnp_solver, in_img, config, grid_belief_debug=False, norm_belief=True, run_sampling=False, network='dope'): '''Detect objects in a image using a specific trained network model''' if in_img is None: return [] # Run network inference image_tensor = transform(in_img) image_torch = Variable(image_tensor).cuda().unsqueeze(0) with torch.cuda.amp.autocast(): out, seg = net_model(image_torch) vertex2 = out[-1][0].to(torch.float32) aff = seg[-1][0].to(torch.float32) # Find objects from network output detected_objects = ObjectDetector.find_object_poses( vertex2, aff, pnp_solver, config # run_sampling=run_sampling, # scale_factor = scale_factor, # OFFSET_DUE_TO_UPSAMPLING = OFFSET_DUE_TO_UPSAMPLING ) if not grid_belief_debug: return detected_objects else: # Run the belief maps debug display on the beliefmaps upsampling = nn.UpsamplingNearest2d(scale_factor=1) tensor = vertex2 # shape [9, 50, 50] belief_imgs = [] in_img = (torch.tensor(in_img).float() / 255.0) in_img *= 0.7 for j in range(tensor.size()[0]): belief = tensor[j].clone() if norm_belief: belief -= float(torch.min(belief).item()) belief /= float(torch.max(belief).item()) # print (image_torch.size()) # raise() # belief *= 0.5 # print(in_img.size()) belief = upsampling( belief.unsqueeze(0).unsqueeze(0)).squeeze().squeeze().data belief = torch.clamp(belief, 0, 1).cpu() belief = torch.cat([ # belief.unsqueeze(0) + in_img[:,:,0], # belief.unsqueeze(0) + in_img[:,:,1], # belief.unsqueeze(0) + in_img[:,:,2] belief.unsqueeze(0), belief.unsqueeze(0), belief.unsqueeze(0) ]).unsqueeze(0) belief = torch.clamp(belief, 0, 1) # belief_imgs.append(belief.data.squeeze().cpu().numpy().transpose(1,2,0)) belief_imgs.append(belief.data.squeeze().numpy()) # Create the image grid belief_imgs = torch.tensor(np.array(belief_imgs)) im_belief = ObjectDetector.get_image_grid(belief_imgs, None, mean=0, std=1) return detected_objects, im_belief
return self.lambda_func(self.forward_prepare(input)) class LambdaMap(LambdaBase): def forward(self, input): return list(map(self.lambda_func, self.forward_prepare(input))) class LambdaReduce(LambdaBase): def forward(self, input): return reduce(self.lambda_func, self.forward_prepare(input)) feature_invertor_conv3_1 = nn.Sequential( # Sequential, nn.ReflectionPad2d((1, 1, 1, 1)), nn.Conv2d(256, 128, (3, 3)), nn.ReLU(), nn.UpsamplingNearest2d(scale_factor=2), nn.ReflectionPad2d((1, 1, 1, 1)), nn.Conv2d(128, 128, (3, 3)), nn.ReLU(), nn.ReflectionPad2d((1, 1, 1, 1)), nn.Conv2d(128, 64, (3, 3)), nn.ReLU(), nn.UpsamplingNearest2d(scale_factor=2), nn.ReflectionPad2d((1, 1, 1, 1)), nn.Conv2d(64, 64, (3, 3)), nn.ReLU(), nn.ReflectionPad2d((1, 1, 1, 1)), nn.Conv2d(64, 3, (3, 3)), )
def UpLayer(type, scale_factor=2): if type == 'nearest': return nn.UpsamplingNearest2d(scale_factor=scale_factor) elif type == 'bilinear': return nn.UpsamplingBilinear2d(scale_factor=2)
def __init__(self, d): super(decoder4, self).__init__() # decoder self.reflecPad11 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv11 = nn.Conv2d(512, 256, 3, 1, 0) self.conv11.weight = torch.nn.Parameter(d.get(1).weight.float()) self.conv11.bias = torch.nn.Parameter(d.get(1).bias.float()) self.relu11 = nn.ReLU(inplace=True) # 28 x 28 self.unpool = nn.UpsamplingNearest2d(scale_factor=2) # 56 x 56 self.reflecPad12 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv12 = nn.Conv2d(256, 256, 3, 1, 0) self.conv12.weight = torch.nn.Parameter(d.get(5).weight.float()) self.conv12.bias = torch.nn.Parameter(d.get(5).bias.float()) self.relu12 = nn.ReLU(inplace=True) # 56 x 56 self.reflecPad13 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv13 = nn.Conv2d(256, 256, 3, 1, 0) self.conv13.weight = torch.nn.Parameter(d.get(8).weight.float()) self.conv13.bias = torch.nn.Parameter(d.get(8).bias.float()) self.relu13 = nn.ReLU(inplace=True) # 56 x 56 self.reflecPad14 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv14 = nn.Conv2d(256, 256, 3, 1, 0) self.conv14.weight = torch.nn.Parameter(d.get(11).weight.float()) self.conv14.bias = torch.nn.Parameter(d.get(11).bias.float()) self.relu14 = nn.ReLU(inplace=True) # 56 x 56 self.reflecPad15 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv15 = nn.Conv2d(256, 128, 3, 1, 0) self.conv15.weight = torch.nn.Parameter(d.get(14).weight.float()) self.conv15.bias = torch.nn.Parameter(d.get(14).bias.float()) self.relu15 = nn.ReLU(inplace=True) # 56 x 56 self.unpool2 = nn.UpsamplingNearest2d(scale_factor=2) # 112 x 112 self.reflecPad16 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv16 = nn.Conv2d(128, 128, 3, 1, 0) self.conv16.weight = torch.nn.Parameter(d.get(18).weight.float()) self.conv16.bias = torch.nn.Parameter(d.get(18).bias.float()) self.relu16 = nn.ReLU(inplace=True) # 112 x 112 self.reflecPad17 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv17 = nn.Conv2d(128, 64, 3, 1, 0) self.conv17.weight = torch.nn.Parameter(d.get(21).weight.float()) self.conv17.bias = torch.nn.Parameter(d.get(21).bias.float()) self.relu17 = nn.ReLU(inplace=True) # 112 x 112 self.unpool3 = nn.UpsamplingNearest2d(scale_factor=2) # 224 x 224 self.reflecPad18 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv18 = nn.Conv2d(64, 64, 3, 1, 0) self.conv18.weight = torch.nn.Parameter(d.get(25).weight.float()) self.conv18.bias = torch.nn.Parameter(d.get(25).bias.float()) self.relu18 = nn.ReLU(inplace=True) # 224 x 224 self.reflecPad19 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv19 = nn.Conv2d(64, 3, 3, 1, 0) self.conv19.weight = torch.nn.Parameter(d.get(28).weight.float()) self.conv19.bias = torch.nn.Parameter(d.get(28).bias.float())
def Upsample(x): Upsample_m = nn.UpsamplingNearest2d(scale_factor=2) #size=(120,360) x = Upsample_m(x) return x
def __init__(self, img_size, channels, kernel_size, dropout, final_activation=nn.Tanh()): super().__init__() # TODO: use nn.ReflectionPad2d to allow even numbered kernel sizes # Padding to ensure that input and output dims are the same pad = (int)((kernel_size - 1)/2) self.kernel_size = kernel_size self.img_size = img_size self.channels = channels self.pad = pad fc_nodes = (int)(8*8*channels) # Output of encoder is a self.encoder = nn.Sequential( nn.Conv2d( 3, # input channels channels, # output channels kernel_size, # kernel size stride=1, padding=pad, ), nn.ELU(), nn.Conv2d(channels, channels, kernel_size, stride=1, padding=pad), nn.ELU(), nn.Conv2d(channels, channels, kernel_size, stride=2, padding=pad), nn.ELU(), nn.Dropout2d(dropout), # Image size halved nn.Conv2d(channels, channels*2, kernel_size, stride=1, padding=pad), nn.ELU(), nn.Conv2d(channels*2, channels*2, kernel_size, stride=2, padding=pad), nn.ELU(), # Image size quartered nn.Conv2d(channels*2, channels*3, kernel_size, stride=1, padding=pad), nn.ELU(), nn.Conv2d(channels*3, channels*3, kernel_size, stride=2, padding=pad), nn.ELU(), # Image size one eigth nn.Conv2d(channels*3, channels*4, kernel_size, stride=1, padding=pad), nn.ELU(), nn.Conv2d(channels*4, channels*4, kernel_size, stride=2, padding=pad), nn.ELU(), # Image size on sixteenth nn.Conv2d(channels*4, channels*5, kernel_size, stride=1, padding=pad), nn.ELU(), nn.Conv2d(channels*5, channels*5, kernel_size, stride=1, padding=pad), nn.ELU(), nn.Dropout2d(dropout), # nn.Conv2d(channels, channels, kernel_size, stride=2, padding=pad), # nn.ELU(), ) self.fc1 = nn.Linear(8*8*channels*5, 64) self.fc2 = nn.Linear(64, 8*8*channels) self.decoder = nn.Sequential( # Image size one sixteenth nn.Conv2d(channels, channels, kernel_size, stride=1, padding=pad), nn.ELU(), nn.Conv2d(channels, channels, kernel_size, stride=1, padding=pad), nn.ELU(), nn.Dropout2d(dropout), nn.UpsamplingNearest2d(scale_factor=2), # Image size one eigth nn.Conv2d(channels, channels, kernel_size, stride=1, padding=pad), nn.ELU(), nn.Conv2d(channels, channels, kernel_size, stride=1, padding=pad), nn.ELU(), nn.UpsamplingNearest2d(scale_factor=2), # Image size one quarter nn.Conv2d(channels, channels, kernel_size, stride=1, padding=pad), nn.ELU(), nn.Conv2d(channels, channels, kernel_size, stride=1, padding=pad), nn.ELU(), nn.UpsamplingNearest2d(scale_factor=2), # Image size halved nn.Conv2d(channels, channels, kernel_size, stride=1, padding=pad), nn.ELU(), nn.Conv2d(channels, channels, kernel_size, stride=1, padding=pad), nn.ELU(), nn.UpsamplingNearest2d(scale_factor=2), # Image size normal nn.Conv2d(channels, channels, kernel_size, stride=1, padding=pad), nn.ELU(), nn.Conv2d(channels, channels, kernel_size, stride=1, padding=pad), nn.ELU(), nn.Dropout2d(dropout), nn.Conv2d(channels, 3, kernel_size, stride=1, padding=pad), final_activation, )
def __init__(self, base_n_channels, neck_n_channels): super(RIN, self).__init__() assert base_n_channels >= 8, "Base num channels should be at least 8" assert neck_n_channels >= 32, "Neck num channels should be at least 32" self.pc1 = PCBlock(channels_in=3, channels_out=base_n_channels, kernel_size=5, stride=1, padding=2) self.pc2 = PCBlock(channels_in=base_n_channels, channels_out=base_n_channels * 2, kernel_size=3, stride=2, padding=1) self.pc3 = PCBlock(channels_in=base_n_channels * 2, channels_out=base_n_channels * 2, kernel_size=3, stride=1, padding=1) self.pc4 = PCBlock(channels_in=base_n_channels * 2, channels_out=base_n_channels * 4, kernel_size=3, stride=2, padding=1) self.pc5 = PCBlock(channels_in=base_n_channels * 4, channels_out=base_n_channels * 4, kernel_size=3, stride=1, padding=1) self.pc6 = PCBlock(channels_in=base_n_channels * 4, channels_out=base_n_channels * 4, kernel_size=3, stride=1, padding=2, dilation=2) self.pc7 = PCBlock(channels_in=base_n_channels * 4, channels_out=base_n_channels * 4, kernel_size=3, stride=1, padding=2, dilation=2) self.pc8 = PCBlock(channels_in=base_n_channels * 4, channels_out=base_n_channels * 4, kernel_size=3, stride=1, padding=4, dilation=4) self.pc9 = PCBlock(channels_in=base_n_channels * 4, channels_out=base_n_channels * 4, kernel_size=3, stride=1, padding=4, dilation=4) self.pc10 = PCBlock(channels_in=base_n_channels * 4, channels_out=base_n_channels * 4, kernel_size=3, stride=1, padding=1) self.upsample = nn.UpsamplingNearest2d(scale_factor=2.0) self.pc11 = PCBlock(channels_in=base_n_channels * 4 + neck_n_channels, channels_out=base_n_channels * 2, kernel_size=3, stride=1, padding=1) self.pc12 = PCBlock(channels_in=base_n_channels * 2, channels_out=base_n_channels * 2, kernel_size=3, stride=1, padding=1) self.pc13 = PCBlock(channels_in=base_n_channels * 2, channels_out=base_n_channels, kernel_size=3, stride=1, padding=1) self.pc14 = PCBlock(channels_in=base_n_channels, channels_out=base_n_channels, kernel_size=3, stride=1, padding=1) self.conv1 = nn.Conv2d(base_n_channels, 3, kernel_size=3, stride=1, padding=1) self.init_weights(init_type="normal", gain=0.02)
def __init__(self, in_channels, out_channels, post_conv=True, use_dropout=False, dropout_prob=0.1, norm=nn.BatchNorm2d, upsampling_mode='transpose'): ''' :param in_channels: Number of input channels :param out_channels: Number of output channels :param post_conv: Whether to have another convolutional layer after the upsampling layer. :param use_dropout: bool. Whether to use dropout or not. :param dropout_prob: Float. The dropout probability (if use_dropout is True) :param norm: Which norm to use. If None, no norm is used. Default is Batchnorm with affinity. :param upsampling_mode: Which upsampling mode: transpose: Upsampling with stride-2, kernel size 4 transpose convolutions. bilinear: Feature map is upsampled with bilinear upsampling, then a conv layer. nearest: Feature map is upsampled with nearest neighbor upsampling, then a conv layer. shuffle: Feature map is upsampled with pixel shuffling, then a conv layer. ''' super().__init__() net = list() if upsampling_mode == 'transpose': net += [ nn.ConvTranspose2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1, bias=True if norm is None else False) ] elif upsampling_mode == 'bilinear': net += [nn.UpsamplingBilinear2d(scale_factor=2)] net += [ Conv2dSame(in_channels, out_channels, kernel_size=3, bias=True if norm is None else False) ] elif upsampling_mode == 'nearest': net += [nn.UpsamplingNearest2d(scale_factor=2)] net += [ Conv2dSame(in_channels, out_channels, kernel_size=3, bias=True if norm is None else False) ] elif upsampling_mode == 'shuffle': net += [nn.PixelShuffle(upscale_factor=2)] net += [ Conv2dSame(in_channels // 4, out_channels, kernel_size=3, bias=True if norm is None else False) ] else: raise ValueError("Unknown upsampling mode!") if norm is not None: net += [norm(out_channels, affine=True)] net += [nn.ReLU(True)] if use_dropout: net += [nn.Dropout2d(dropout_prob, False)] if post_conv: net += [ Conv2dSame(out_channels, out_channels, kernel_size=3, bias=True if norm is None else False) ] if norm is not None: net += [norm(out_channels, affine=True)] net += [nn.ReLU(True)] if use_dropout: net += [nn.Dropout2d(0.1, False)] self.net = nn.Sequential(*net)
def __init__(self, inp=10, out=16, kernel_size=3, bias=True): super(TestUpsampleNearest2d, self).__init__() self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) self.up = nn.UpsamplingNearest2d(scale_factor=2)
def lua_recursive_model(module, seq): for m in module.modules: name = type(m).__name__ real = m if name == 'TorchObject': name = m._typename.replace('cudnn.', '') m = m._obj if name == 'SpatialConvolution': if not hasattr(m, 'groups'): m.groups = 1 n = nn.Conv2d(m.nInputPlane, m.nOutputPlane, (m.kW, m.kH), (m.dW, m.dH), (m.padW, m.padH), 1, m.groups, bias=(m.bias is not None)) copy_param(m, n) add_submodule(seq, n) elif name == 'SpatialBatchNormalization': n = nn.BatchNorm2d(m.running_mean.size(0), m.eps, m.momentum, m.affine) copy_param(m, n) add_submodule(seq, n) elif name == 'ReLU': n = nn.ReLU() add_submodule(seq, n) elif name == 'SpatialMaxPooling': n = nn.MaxPool2d((m.kW, m.kH), (m.dW, m.dH), (m.padW, m.padH), ceil_mode=m.ceil_mode) add_submodule(seq, n) elif name == 'SpatialAveragePooling': n = nn.AvgPool2d((m.kW, m.kH), (m.dW, m.dH), (m.padW, m.padH), ceil_mode=m.ceil_mode) add_submodule(seq, n) elif name == 'SpatialUpSamplingNearest': n = nn.UpsamplingNearest2d(scale_factor=m.scale_factor) add_submodule(seq, n) elif name == 'View': n = Lambda(lambda x: x.view(x.size(0), -1)) add_submodule(seq, n) elif name == 'Linear': # Linear in pytorch only accept 2D input n1 = Lambda(lambda x: x.view(1, -1) if 1 == len(x.size()) else x) n2 = nn.Linear(m.weight.size(1), m.weight.size(0), bias=(m.bias is not None)) copy_param(m, n2) n = nn.Sequential(n1, n2) add_submodule(seq, n) elif name == 'Dropout': m.inplace = False n = nn.Dropout(m.p) add_submodule(seq, n) elif name == 'SoftMax': n = nn.Softmax() add_submodule(seq, n) elif name == 'Identity': n = Lambda(lambda x: x) # do nothing add_submodule(seq, n) elif name == 'SpatialFullConvolution': n = nn.ConvTranspose2d(m.nInputPlane, m.nOutputPlane, (m.kW, m.kH), (m.dW, m.dH), (m.padW, m.padH)) add_submodule(seq, n) elif name == 'SpatialReplicationPadding': n = nn.ReplicationPad2d((m.pad_l, m.pad_r, m.pad_t, m.pad_b)) add_submodule(seq, n) elif name == 'SpatialReflectionPadding': n = nn.ReflectionPad2d((m.pad_l, m.pad_r, m.pad_t, m.pad_b)) add_submodule(seq, n) elif name == 'Copy': n = Lambda(lambda x: x) # do nothing add_submodule(seq, n) elif name == 'Narrow': n = Lambda(lambda x, a= (m.dimension, m.index, m.length): x.narrow(*a)) add_submodule(seq, n) elif name == 'SpatialCrossMapLRN': lrn = torch.legacy.nn.SpatialCrossMapLRN(m.size, m.alpha, m.beta, m.k) n = Lambda(lambda x, lrn=lrn: Variable(lrn.forward(x.data))) add_submodule(seq, n) elif name == 'Sequential': n = nn.Sequential() lua_recursive_model(m, n) add_submodule(seq, n) elif name == 'ConcatTable': # output is list n = LambdaMap(lambda x: x) lua_recursive_model(m, n) add_submodule(seq, n) elif name == 'CAddTable': # input is list n = LambdaReduce(lambda x, y: x + y) add_submodule(seq, n) elif name == 'Concat': dim = m.dimension n = LambdaReduce(lambda x, y, dim=dim: torch.cat((x, y), dim)) lua_recursive_model(m, n) add_submodule(seq, n) elif name == 'TorchObject': print('Not Implement', name, real._typename) else: print('Not Implement', name)
def __init__(self, image_size, num_blocks): super(Generator, self).__init__() self.in_channels = 3 self.dim = 64 self.out_channels = 3 self.num_blocks = num_blocks self.image_size = image_size # Down-Sampling # down_sampling = [] down_sampling += [ nn.ReflectionPad2d(padding=3), nn.Conv2d(self.in_channels, self.dim, kernel_size=7, stride=1, padding=0, bias=False), nn.InstanceNorm2d(self.dim), nn.ReLU(inplace=True) ] num_down_sampling = 2 for i in range(num_down_sampling): factor = 2**i down_sampling += [ nn.ReflectionPad2d(padding=1), nn.Conv2d(self.dim * factor, self.dim * factor * 2, kernel_size=3, stride=2, padding=0, bias=False), nn.InstanceNorm2d(self.dim * factor * 2), nn.ReLU(inplace=True), ] factor = 2**num_down_sampling for i in range(num_blocks): down_sampling += [ResNetBlock(self.dim * factor)] self.down_sampling = nn.Sequential(*down_sampling) # Class Activation Map (CAM) # self.gap_fc = nn.Linear(self.dim * factor, 1, bias=False) self.gmp_fc = nn.Linear(self.dim * factor, 1, bias=False) self.conv = nn.Sequential( nn.Conv2d(self.dim * factor * 2, self.dim * factor, kernel_size=1, stride=1, padding=0, bias=True), nn.ReLU(inplace=True)) # Block for Gamma and Beta # self.fc = nn.Sequential( nn.Linear(image_size // factor * image_size // factor * self.dim * factor, self.dim * factor, bias=False), nn.ReLU(inplace=True), nn.Linear(self.dim * factor, self.dim * factor, bias=False), nn.ReLU(inplace=True)) self.gamma = nn.Linear(self.dim * factor, self.dim * factor, bias=False) self.beta = nn.Linear(self.dim * factor, self.dim * factor, bias=False) # Up-Sampling # for i in range(num_blocks): setattr(self, "UpBlock" + str(i + 1), ResNetAdaLINBlock(self.dim * factor)) up_sampling = [] num_up_sampling = 2 for i in range(num_up_sampling): factor = 2**(num_up_sampling - i) up_sampling += [ nn.UpsamplingNearest2d(scale_factor=2), nn.ReflectionPad2d(padding=1), nn.Conv2d(self.dim * factor, int(self.dim * factor / 2), kernel_size=3, stride=1, padding=0, bias=False), ILN(int(self.dim * factor / 2)), nn.ReLU(inplace=True) ] up_sampling += [ nn.ReflectionPad2d(padding=3), nn.Conv2d(self.dim, self.out_channels, kernel_size=7, stride=1, padding=0, bias=False), nn.Tanh() ] self.up_sampling = nn.Sequential(*up_sampling)
def __init__(self, h, n, input_dim=(64, 64, 3)): super(D, self).__init__() self.n = n self.h = h channel, width, height = input_dim self.blocks = int(np.log2(width) - 2) print("[!] {} blocks in D ".format(self.blocks)) encoder_layers = [] encoder_layers.append( nn.Conv2d(3, n, kernel_size=3, stride=1, padding=1)) prev_channel_size = n for i in range(self.blocks): channel_size = (i + 1) * n encoder_layers.append( nn.Conv2d(prev_channel_size, channel_size, kernel_size=3, stride=1, padding=1)) encoder_layers.append(nn.ELU()) encoder_layers.append( nn.Conv2d(channel_size, channel_size, kernel_size=3, stride=1, padding=1)) encoder_layers.append(nn.ELU()) if i < self.blocks - 1: # Downsampling encoder_layers.append( nn.Conv2d(channel_size, channel_size, kernel_size=3, stride=2, padding=1)) encoder_layers.append(nn.ELU()) prev_channel_size = channel_size self.encoder = nn.Sequential(*encoder_layers) self.fc_encode = nn.Linear(8 * 8 * self.blocks * n, h) self.fc_decode = nn.Linear(h, 8 * 8 * n) decoder_layers = [] for i in range(self.blocks): decoder_layers.append( nn.Conv2d(n, n, kernel_size=3, stride=1, padding=1)) decoder_layers.append(nn.ELU()) decoder_layers.append( nn.Conv2d(n, n, kernel_size=3, stride=1, padding=1)) decoder_layers.append(nn.ELU()) if i < self.blocks - 1: decoder_layers.append(nn.UpsamplingNearest2d(scale_factor=2)) decoder_layers.append( nn.Conv2d(n, channel, kernel_size=3, stride=1, padding=1)) self.decoder = nn.Sequential(*decoder_layers)
def create_up(in_c, out_c, stride=2, dilation=1): model = nn.Sequential(nn.UpsamplingNearest2d(stride, stride), nn.Conv2d(in_c, out_c, 3, 1, (dilation - 1) + 1), nn.BatchNorm2d(out_c), nn.PReLU()) return model
def forward(self, x): # Encoder part with 6 Resblock-a (D6) x_conv_1 = self.conv1(x) # print(x_conv_1.size()) x_resblock_1 = self.rest_block_1(x_conv_1) x_conv_2 = self.conv2(x_resblock_1) # print(f'conv2 size: {x_conv_2.size()}') x_resblock_2 = self.rest_block_2(x_conv_2) x_conv_3 = self.conv3(x_resblock_2) # print(f'conv3 size: {x_conv_3.size()}') x_resblock_3 = self.rest_block_3(x_conv_3) x_conv_4 = self.conv4(x_resblock_3) # print(f'conv4 size: {x_conv_4.size()}') x_resblock_4 = self.rest_block_4(x_conv_4) x_conv_5 = self.conv5(x_resblock_4) # print(f'conv5 size: {x_conv_4.size()}') x_resblock_5 = self.rest_block_5(x_conv_5) x_conv_6 = self.conv6(x_resblock_5) # print(f'conv6 size: {x_conv_5.size()}') x_resblock_6 = self.rest_block_6(x_conv_6) x_pooling_1 = self.PSPPooling(x_resblock_6) #Decoder part upsampling with combine #up5 # print(f'pooling 1 size : {x_pooling_1.size()}') x_convup_1 = self.convup1(x_pooling_1) # x_upsampling1 = nn.UpsamplingNearest2d(scale_factor=2)(x_convup_1) x_upsampling1 = nn.UpsamplingNearest2d(scale_factor=2)(x_convup_1) # print(f'upsampling 1 size: {x_upsampling1.size()} x_restblock_5 size: {x_resblock_5.size()}') x_combine_1 = self.combine1(x_upsampling1, x_resblock_5) # print(f'combine 1 output {x_combine_1.size()}') x_resblockup_5 = self.rest_block_up_5(x_combine_1) #up4 # print(f'resblockup 5 size : {x_resblockup_5.size()}') x_convup_2 = self.convup2(x_resblockup_5) # print(f'convup2 size : {x_convup_2.size()}') x_upsampling2 = nn.UpsamplingNearest2d(scale_factor=2)(x_convup_2) # print(f'x_upsampling2 size : {x_upsampling2.size()} x_resblock_4 size: {x_resblock_4.size()}') x_combine_2 = self.combine2(x_upsampling2, x_resblock_4) x_resblockup_4 = self.rest_block_up_4(x_combine_2) #up3 x_convup_3 = self.convup3(x_resblockup_4) x_upsampling3 = nn.UpsamplingNearest2d(scale_factor=2)(x_convup_3) x_combine_3 = self.combine3(x_upsampling3, x_resblock_3) x_resblockup_3 = self.rest_block_up_3(x_combine_3) #up2 x_convup_4 = self.convup4(x_resblockup_3) x_upsampling4 = nn.UpsamplingNearest2d(scale_factor=2)(x_convup_4) x_combine_4 = self.combine4(x_upsampling4, x_resblock_2) x_resblockup_2 = self.rest_block_up_2(x_combine_4) #up1 x_convup_5 = self.convup5(x_resblockup_2) x_upsampling5 = nn.UpsamplingNearest2d(scale_factor=2)(x_convup_5) x_combine_5 = self.combine5(x_upsampling5, x_resblock_1) x_resblockup_1 = self.rest_block_up_1(x_combine_5) x_combine_6 = self.combine5(x_resblockup_1, x_conv_1) # print(f'x_combine6 size: {x_combine_6.size()}') x_pooling_2 = self.PSPPoolingResult(x_combine_6) x_conv_result = self.conv_final(x_pooling_2) return x_conv_result
def __init__(self, c3, c4, c5, inner_channels, weight_inputs=True, first=False): super(BiFPNLayer, self).__init__() self.first = first if self.first: self.c3_latent = nn.Sequential( Conv2dDynamicSamePadding(c3, inner_channels, 1), nn.BatchNorm2d(inner_channels, momentum=0.01, eps=1e-3)) self.c4_latent = nn.Sequential( Conv2dDynamicSamePadding(c4, inner_channels, 1), nn.BatchNorm2d(inner_channels, momentum=0.01, eps=1e-3)) self.c5_latent = nn.Sequential( Conv2dDynamicSamePadding(c5, inner_channels, 1), nn.BatchNorm2d(inner_channels, momentum=0.01, eps=1e-3)) self.c5_to_p6 = nn.Sequential( Conv2dDynamicSamePadding(c5, inner_channels, 1), nn.BatchNorm2d(inner_channels, momentum=0.01, eps=1e-3), MaxPool2dDynamicSamePadding(3, 2)) self.p6_to_p7 = nn.Sequential(MaxPool2dDynamicSamePadding(3, 2)) self.c4_latent_re = nn.Sequential( Conv2dDynamicSamePadding(c4, inner_channels, 1), nn.BatchNorm2d(inner_channels, momentum=0.01, eps=1e-3)) self.c5_latent_re = nn.Sequential( Conv2dDynamicSamePadding(c5, inner_channels, 1), nn.BatchNorm2d(inner_channels, momentum=0.01, eps=1e-3)) self.p6_0 = DWSConv2d(inner_channels, inner_channels, act=False) self.p6_0_scale = ScaleWeight(2, requires_grad=weight_inputs) self.p5_0 = DWSConv2d(inner_channels, inner_channels, act=False) self.p5_0_scale = ScaleWeight(2, requires_grad=weight_inputs) self.p4_0 = DWSConv2d(inner_channels, inner_channels, act=False) self.p4_0_scale = ScaleWeight(2, requires_grad=weight_inputs) self.p3_1 = DWSConv2d(inner_channels, inner_channels, act=False) self.p3_1_scale = ScaleWeight(2, requires_grad=weight_inputs) self.p4_1 = DWSConv2d(inner_channels, inner_channels, act=False) self.p4_1_scale = ScaleWeight(3, requires_grad=weight_inputs) self.p5_1 = DWSConv2d(inner_channels, inner_channels, act=False) self.p5_1_scale = ScaleWeight(3, requires_grad=weight_inputs) self.p6_1 = DWSConv2d(inner_channels, inner_channels, act=False) self.p6_1_scale = ScaleWeight(3, requires_grad=weight_inputs) self.p7_1 = DWSConv2d(inner_channels, inner_channels, act=False) self.p7_1_scale = ScaleWeight(2, requires_grad=weight_inputs) self.up_sample = nn.UpsamplingNearest2d(scale_factor=2) self.down_sample = MaxPool2dDynamicSamePadding(3, 2) self.act = MemoryEfficientSwish()
def upsample_conv(self, x, conv): return conv(nn.UpsamplingNearest2d(scale_factor=2)(x))
def __init__(self, nc, ngf, ndf, latent_variable_size): super(VAE, self).__init__() #self.cuda = True self.nc = nc self.ngf = ngf self.ndf = ndf self.latent_variable_size = latent_variable_size # encoder self.e1 = nn.Conv2d(nc, ndf, 4, 2, 1) self.bn1 = nn.BatchNorm2d(ndf) self.e2 = nn.Conv2d(ndf, ndf * 2, 4, 2, 1) self.bn2 = nn.BatchNorm2d(ndf * 2) self.e3 = nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1) self.bn3 = nn.BatchNorm2d(ndf * 4) self.e4 = nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1) self.bn4 = nn.BatchNorm2d(ndf * 8) self.e5 = nn.Conv2d(ndf * 8, ndf * 16, 4, 2, 1) self.bn5 = nn.BatchNorm2d(ndf * 16) self.e6 = nn.Conv2d(ndf * 16, ndf * 32, 4, 2, 1) self.bn6 = nn.BatchNorm2d(ndf * 32) self.e7 = nn.Conv2d(ndf * 32, ndf * 64, 4, 2, 1) self.bn7 = nn.BatchNorm2d(ndf * 64) self.fc1 = nn.Linear(ndf * 64 * 4 * 4, latent_variable_size) self.fc2 = nn.Linear(ndf * 64 * 4 * 4, latent_variable_size) # decoder self.d1 = nn.Linear(latent_variable_size, ngf * 64 * 4 * 4) self.up1 = nn.UpsamplingNearest2d(scale_factor=2) self.pd1 = nn.ReplicationPad2d(1) self.d2 = nn.Conv2d(ngf * 64, ngf * 32, 3, 1) self.bn8 = nn.BatchNorm2d(ngf * 32, 1.e-3) self.up2 = nn.UpsamplingNearest2d(scale_factor=2) self.pd2 = nn.ReplicationPad2d(1) self.d3 = nn.Conv2d(ngf * 32, ngf * 16, 3, 1) self.bn9 = nn.BatchNorm2d(ngf * 16, 1.e-3) self.up3 = nn.UpsamplingNearest2d(scale_factor=2) self.pd3 = nn.ReplicationPad2d(1) self.d4 = nn.Conv2d(ngf * 16, ngf * 8, 3, 1) self.bn10 = nn.BatchNorm2d(ngf * 8, 1.e-3) self.up4 = nn.UpsamplingNearest2d(scale_factor=2) self.pd4 = nn.ReplicationPad2d(1) self.d5 = nn.Conv2d(ngf * 8, ngf * 4, 3, 1) self.bn11 = nn.BatchNorm2d(ngf * 4, 1.e-3) self.up5 = nn.UpsamplingNearest2d(scale_factor=2) self.pd5 = nn.ReplicationPad2d(1) self.d6 = nn.Conv2d(ngf * 4, ngf * 2, 3, 1) self.bn12 = nn.BatchNorm2d(ngf * 2, 1.e-3) self.up6 = nn.UpsamplingNearest2d(scale_factor=2) self.pd6 = nn.ReplicationPad2d(1) self.d7 = nn.Conv2d(ngf * 2, ngf, 3, 1) self.bn13 = nn.BatchNorm2d(ngf, 1.e-3) self.up7 = nn.UpsamplingNearest2d(scale_factor=2) self.pd7 = nn.ReplicationPad2d(1) self.d8 = nn.Conv2d(ngf, nc, 3, 1) self.leakyrelu = nn.LeakyReLU(0.2) self.relu = nn.ReLU() #self.sigmoid = nn.Sigmoid() self.maxpool = nn.MaxPool2d((2, 2), (2, 2))
def __init__(self, in_, out, scale): super().__init__() self.up_conv = nn.Conv2d(in_, out, 1) self.upsample = nn.UpsamplingNearest2d(scale_factor=scale)
def __init__(self, previous_in_channels, out_channels, kernel_size): super(Decoder, self).__init__(previous_in_channels, out_channels, kernel_size, pre_output=nn.UpsamplingNearest2d(scale_factor=2))
def __init__(self, opt, test=False, input_=None, target=None): super(Resnet_ae, self).__init__() self.__name__ = "resnet_ae" # define variables bsz = 1 if test else opt.bsz if input_ is not None: self.input = input_ else: self.input = torch.FloatTensor(bsz * opt.input_len, opt.nc_in, 64, 64) self.input = Variable(self.input) if target is not None: self.target = target else: self.target = torch.FloatTensor(bsz * opt.target_len, opt.nc_out, 64, 64) self.target = Variable(self.target) self.criterion = nn.MSELoss() if opt.instanceNorm: Norm = nn.InstanceNorm2d else: Norm = nn.BatchNorm2d # define model self.nc_out = opt.nc_out self.latentDim = opt.latentDim self.input_len, self.target_len = opt.input_len, opt.target_len self.frame_height, self.frame_width = opt.frame_width, opt.frame_height resnet = torchvision.models.resnet18(True) self.resnet_features = nn.Sequential(*list(resnet.children())[:6]) middleNL = nn.Sigmoid() if opt.middleNL == "sigmoid" else nn.Tanh() self.encoder = nn.Sequential(nn.Linear(128 * 8 * 8, opt.latentDim), middleNL) self.decoder = nn.Linear(opt.input_len * opt.latentDim, opt.target_len * 128 * 8 * 8) self.deconv = nn.Sequential( nn.Conv2d(128, opt.nf * 4, 3, 1, 1), Norm(opt.nf * 4), nn.ReLU(), nn.UpsamplingNearest2d(scale_factor=2), nn.Conv2d(opt.nf * 4, opt.nf * 2, 3, 1, 1), Norm(opt.nf * 2), nn.ReLU(), nn.UpsamplingNearest2d(scale_factor=2), nn.Conv2d(opt.nf * 2, opt.nf, 3, 1, 1), Norm(opt.nf), nn.ReLU(), nn.UpsamplingNearest2d(scale_factor=2), nn.Conv2d(opt.nf, opt.target_len * opt.nc_out, 3, 1, 1), nn.Sigmoid(), ) # define maskPredictor if opt.maskPredictor: optmp = { "frame_width": opt.frame_width, "frame_height": opt.frame_height, "input_len": 1, "target_len": 1, "nc_in": opt.nc_in, "nc_out": opt.nc_out, "nf": opt.nf, "latentDim": 128, "instanceNorm": False, "middleNL": opt.middleNL, "bsz": None, "maskPredictor": None, "lr": None, "beta1": None, } optmp = utils.to_namespace(optmp) self.maskPredictor = Resnet_ae(optmp, False, self.target, self.target).eval() self.maskPredictor.load(opt.maskPredictor) else: self.maskPredictor = None # does this have to be done at the end of __init__ ? if opt.lr is not None: self.optimizer = optim.Adam(self.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999))
def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, gpu_ids=[], padding_type='reflect', upsample=False): assert (n_blocks >= 0) super(ResnetGenerator, self).__init__() self.input_nc = input_nc self.output_nc = output_nc self.ngf = ngf self.gpu_ids = gpu_ids if type(norm_layer) == functools.partial: use_bias = norm_layer.func == nn.InstanceNorm2d else: use_bias = norm_layer == nn.InstanceNorm2d model = [ nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=use_bias), norm_layer(ngf), nn.ReLU(True) ] n_downsampling = 2 for i in range(n_downsampling - 1): mult = 2**i model += [ nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=use_bias), norm_layer(ngf * mult * 2), nn.ReLU(True) ] mult = 2**(n_downsampling - 1) model += [ nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=use_bias) ] mult = 2**n_downsampling for i in range(n_blocks): model += [ ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias) ] model += [norm_layer(ngf * mult), nn.ReLU(True)] for i in range(n_downsampling): mult = 2**(n_downsampling - i) if upsample: model += [ nn.UpsamplingNearest2d(scale_factor=2), nn.ReflectionPad2d(1), nn.Conv2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, bias=use_bias) ] else: model += [ nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1, bias=use_bias) ] model += [norm_layer(int(ngf * mult / 2)), nn.ReLU(True)] model += [nn.ReflectionPad2d(3)] model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)] model += [nn.Tanh()] self.model = nn.Sequential(*model)
def __init__(self, imsize=(1, 28, 28), outsize=None, s=32, mean=None, std=None): super(ConvNet, self).__init__() print("Version 0.6") pow_pad = (2**(int(np.ceil(np.log2(imsize[-2])))) - imsize[-2], 2**(int(np.ceil(np.log2(imsize[-1])))) - imsize[-1]) kern_size = 4 * ((imsize[1] + pow_pad[0]) // 16) * ( (imsize[2] + pow_pad[1]) // 16) * s print("Additional padding to fit 2 exp:", pow_pad) print("Kern size:", kern_size) self.imsize = imsize if outsize is None: self.outsize = imsize else: self.outsize = outsize if mean is None: self.register_buffer('mean', torch.zeros(imsize)) else: self.register_buffer('mean', torch.Tensor(mean)) if std is None: self.register_buffer('std', torch.ones(imsize)) else: self.register_buffer('std', torch.Tensor(std)) self.layers = nn.Sequential( nn.Conv2d(imsize[0], imsize[0], kernel_size=1, padding=pow_pad), #32x32x1 = 1024 nn.BatchNorm2d(imsize[0]), nn.ReLU(), nn.Conv2d(imsize[0], 1 * s, kernel_size=5, padding=2), #32x32x32 = 32768 nn.BatchNorm2d(1 * s), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2), #16x16x32 = 8192 nn.Conv2d(1 * s, 2 * s, kernel_size=3, padding=1), nn.BatchNorm2d(2 * s), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2), # 8x8x64 = 4096 nn.Conv2d(2 * s, 4 * s, kernel_size=3, padding=1), nn.BatchNorm2d(4 * s), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2), # 4x4x128 = 2048 nn.Conv2d(4 * s, 4 * s, kernel_size=3, padding=1), nn.BatchNorm2d(4 * s), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2), # 2x2x128 = 512 KernModule(h=kern_size), nn.UpsamplingNearest2d(scale_factor=2), nn.ConvTranspose2d(4 * s, 4 * s, kernel_size=3, padding=1), nn.BatchNorm2d(4 * s), nn.ReLU(), nn.UpsamplingNearest2d(scale_factor=2), nn.ConvTranspose2d(4 * s, 2 * s, kernel_size=3, padding=1), nn.BatchNorm2d(2 * s), nn.ReLU(), nn.UpsamplingNearest2d(scale_factor=2), nn.ConvTranspose2d(2 * s, 1 * s, kernel_size=3, padding=1), nn.BatchNorm2d(1 * s), nn.ReLU(), nn.UpsamplingNearest2d(scale_factor=2), nn.ConvTranspose2d(1 * s, self.outsize[0], kernel_size=5, padding=2), nn.Sigmoid())
def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, gpu_ids=[], padding_type='reflect', multisa=False, upsample=False): assert (n_blocks >= 0) super(ResnetDecoder, self).__init__() self.input_nc = input_nc self.output_nc = output_nc self.ngf = ngf self.gpu_ids = gpu_ids self.saliency = multisa usesa = 1 if multisa else 0 if type(norm_layer) == functools.partial: use_bias = norm_layer.func == nn.InstanceNorm2d else: use_bias = norm_layer == nn.InstanceNorm2d model = [] n_downsampling = 2 mult = 2**n_downsampling for i in range(n_blocks): model += [ ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias) ] for i in range(n_downsampling): mult = 2**(n_downsampling - i) if upsample: model += [ nn.UpsamplingNearest2d(scale_factor=2), nn.ReflectionPad2d(1), nn.Conv2d(ngf * mult + usesa, int(ngf * mult / 2), kernel_size=3, bias=use_bias) ] else: model += [ nn.ConvTranspose2d(ngf * mult + usesa, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1, bias=use_bias) ] model += [norm_layer(int(ngf * mult / 2)), nn.ReLU(True)] model += [nn.ReflectionPad2d(3)] model += [nn.Conv2d(ngf + usesa, output_nc, kernel_size=7, padding=0)] model += [nn.Tanh()] self.model = nn.ModuleList(model)
def __init__(self, d): super(decoder4, self).__init__() # decoder self.reflecPad11 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv11 = nn.Conv2d(512, 256, 3, 1, 0) self.conv11.weight = torch.nn.Parameter( torch.from_numpy(d.modules[1].weight).float()) self.conv11.bias = torch.nn.Parameter( torch.from_numpy(d.modules[1].bias).float()) self.relu11 = nn.ReLU(inplace=True) # 28 x 28 self.unpool = nn.UpsamplingNearest2d(scale_factor=2) # 56 x 56 self.reflecPad12 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv12 = nn.Conv2d(256, 256, 3, 1, 0) self.conv12.weight = torch.nn.Parameter( torch.from_numpy(d.modules[5].weight).float()) self.conv12.bias = torch.nn.Parameter( torch.from_numpy(d.modules[5].bias).float()) self.relu12 = nn.ReLU(inplace=True) # 56 x 56 self.reflecPad13 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv13 = nn.Conv2d(256, 256, 3, 1, 0) self.conv13.weight = torch.nn.Parameter( torch.from_numpy(d.modules[8].weight).float()) self.conv13.bias = torch.nn.Parameter( torch.from_numpy(d.modules[8].bias).float()) self.relu13 = nn.ReLU(inplace=True) # 56 x 56 self.reflecPad14 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv14 = nn.Conv2d(256, 256, 3, 1, 0) self.conv14.weight = torch.nn.Parameter( torch.from_numpy(d.modules[11].weight).float()) self.conv14.bias = torch.nn.Parameter( torch.from_numpy(d.modules[11].bias).float()) self.relu14 = nn.ReLU(inplace=True) # 56 x 56 self.reflecPad15 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv15 = nn.Conv2d(256, 128, 3, 1, 0) self.conv15.weight = torch.nn.Parameter( torch.from_numpy(d.modules[14].weight).float()) self.conv15.bias = torch.nn.Parameter( torch.from_numpy(d.modules[14].bias).float()) self.relu15 = nn.ReLU(inplace=True) # 56 x 56 self.unpool2 = nn.UpsamplingNearest2d(scale_factor=2) # 112 x 112 self.reflecPad16 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv16 = nn.Conv2d(128, 128, 3, 1, 0) self.conv16.weight = torch.nn.Parameter( torch.from_numpy(d.modules[18].weight).float()) self.conv16.bias = torch.nn.Parameter( torch.from_numpy(d.modules[18].bias).float()) self.relu16 = nn.ReLU(inplace=True) # 112 x 112 self.reflecPad17 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv17 = nn.Conv2d(128, 64, 3, 1, 0) self.conv17.weight = torch.nn.Parameter( torch.from_numpy(d.modules[21].weight).float()) self.conv17.bias = torch.nn.Parameter( torch.from_numpy(d.modules[21].bias).float()) self.relu17 = nn.ReLU(inplace=True) # 112 x 112 self.unpool3 = nn.UpsamplingNearest2d(scale_factor=2) # 224 x 224 self.reflecPad18 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv18 = nn.Conv2d(64, 64, 3, 1, 0) self.conv18.weight = torch.nn.Parameter( torch.from_numpy(d.modules[25].weight).float()) self.conv18.bias = torch.nn.Parameter( torch.from_numpy(d.modules[25].bias).float()) self.relu18 = nn.ReLU(inplace=True) # 224 x 224 self.reflecPad19 = nn.ReflectionPad2d((1, 1, 1, 1)) self.conv19 = nn.Conv2d(64, 3, 3, 1, 0) self.conv19.weight = torch.nn.Parameter( torch.from_numpy(d.modules[28].weight).float()) self.conv19.bias = torch.nn.Parameter( torch.from_numpy(d.modules[28].bias).float())
def create_network(self, blocks): models = nn.ModuleList() prev_filters = 3 out_filters = [] conv_id = 0 for block in blocks: if block['type'] == 'net': prev_filters = int(block['channels']) continue elif block['type'] == 'convolutional': conv_id = conv_id + 1 batch_normalize = int(block['batch_normalize']) filters = int(block['filters']) kernel_size = int(block['size']) stride = int(block['stride']) is_pad = int(block['pad']) pad = (kernel_size - 1) // 2 if is_pad else 0 activation = block['activation'] model = nn.Sequential() if batch_normalize: model.add_module( 'conv{0}'.format(conv_id), nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=False)) model.add_module('bn{0}'.format(conv_id), nn.BatchNorm2d(filters, eps=1e-4)) #model.add_module('bn{0}'.format(conv_id), BN2d(filters)) else: model.add_module( 'conv{0}'.format(conv_id), nn.Conv2d(prev_filters, filters, kernel_size, stride, pad)) if activation == 'leaky': model.add_module('leaky{0}'.format(conv_id), nn.LeakyReLU(0.1, inplace=True)) elif activation == 'relu': model.add_module('relu{0}'.format(conv_id), nn.ReLU(inplace=True)) prev_filters = filters out_filters.append(prev_filters) models.append(model) elif block['type'] == 'maxpool': pool_size = int(block['size']) stride = int(block['stride']) if stride > 1: model = nn.MaxPool2d(pool_size, stride) else: model = MaxPoolStride1() out_filters.append(prev_filters) models.append(model) elif block['type'] == 'avgpool': model = GlobalAvgPool2d() out_filters.append(prev_filters) models.append(model) elif block['type'] == 'softmax': model = nn.Softmax() out_filters.append(prev_filters) models.append(model) elif block['type'] == 'cost': if block['_type'] == 'sse': model = nn.MSELoss(size_average=True) elif block['_type'] == 'L1': model = nn.L1Loss(size_average=True) elif block['_type'] == 'smooth': model = nn.SmoothL1Loss(size_average=True) out_filters.append(1) models.append(model) elif block['type'] == 'reorg': stride = int(block['stride']) prev_filters = stride * stride * prev_filters out_filters.append(prev_filters) models.append(Reorg(stride)) elif block['type'] == 'route': layers = block['layers'].split(',') ind = len(models) layers = [ int(i) if int(i) > 0 else int(i) + ind for i in layers ] if len(layers) == 1: prev_filters = out_filters[layers[0]] elif len(layers) == 2: assert (layers[0] == ind - 1) prev_filters = out_filters[layers[0]] + out_filters[ layers[1]] out_filters.append(prev_filters) models.append(EmptyModule()) elif block['type'] == 'shortcut': ind = len(models) prev_filters = out_filters[ind - 1] out_filters.append(prev_filters) models.append(EmptyModule()) elif block['type'] == 'connected': filters = int(block['output']) if block['activation'] == 'linear': model = nn.Linear(prev_filters, filters) elif block['activation'] == 'leaky': model = nn.Sequential(nn.Linear(prev_filters, filters), nn.LeakyReLU(0.1, inplace=True)) elif block['activation'] == 'relu': model = nn.Sequential(nn.Linear(prev_filters, filters), nn.ReLU(inplace=True)) prev_filters = filters out_filters.append(prev_filters) models.append(model) elif block['type'] == 'region': if self.distiling: loss = DistiledRegionLoss() else: loss = RegionLoss() anchors = block['anchors'].split(',') if anchors == ['']: loss.anchors = [] else: loss.anchors = [float(i) for i in anchors] loss.num_classes = int(block['classes']) loss.num_anchors = int(block['num']) loss.anchor_step = len(loss.anchors) // loss.num_anchors loss.object_scale = float(block['object_scale']) loss.noobject_scale = float(block['noobject_scale']) loss.class_scale = float(block['class_scale']) loss.coord_scale = float(block['coord_scale']) out_filters.append(prev_filters) models.append(loss) elif block['type'] == 'upsample': model = nn.UpsamplingNearest2d( scale_factor=int(block['stride'])) out_filters.append(prev_filters) models.append(model) else: print('unknown type %s' % (block['type'])) return models