def rotate_tensor2d(inputs, rotate_theta, offset=None, padding_mode='zeros', pre_padding=None): """rotate 2D tensor counter-clockwise Args: inputs: torch tensor, [N, C, W, H] rotate_theta: ndarray,[N] offset: None or ndarray, [2, N] padding_mode: "zeros" or "border" pre_padding: None of float. the valud used for pre-padding such that width == height Retudn: outputs: rotated tensor """ device = inputs.device if pre_padding is not None: lr_pad_w = int((np.max(inputs.shape[2:])-inputs.shape[3])/2) ud_pad_h = int((np.max(inputs.shape[2:])-inputs.shape[2])/2) add_pad = nn.ConstantPad2d((lr_pad_w,lr_pad_w,ud_pad_h,ud_pad_h),0.0).to(device) inputs = add_pad(inputs) const_zeros = np.zeros(len(rotate_theta)) affine = np.asarray([[np.cos(rotate_theta), -np.sin(rotate_theta), const_zeros], [np.sin(rotate_theta), np.cos(rotate_theta), const_zeros]]) affine = torch.from_numpy(affine).permute(2, 0, 1).float().to(device) flow_grid = F.affine_grid(affine, inputs.size(), align_corners=True).to(device) outputs = F.grid_sample(inputs, flow_grid, padding_mode=padding_mode, align_corners=True) if offset is not None: const_ones = np.ones(len(rotate_theta)) affine = np.asarray([[const_ones, const_zeros, offset[0]], [const_zeros, const_ones, offset[1]]]) affine = torch.from_numpy(affine).permute(2, 0, 1).float().to(device) flow_grid = F.affine_grid(affine, inputs.size(), align_corners=True).to(device) outputs = F.grid_sample(outputs, flow_grid, padding_mode=padding_mode, align_corners=True) if pre_padding is not None: outputs = outputs[:,:,ud_pad_h:(outputs.shape[2]-ud_pad_h), lr_pad_w:(outputs.shape[3]-lr_pad_w)] return outputs
def __init__(self, conv_output_size: List[int], conv_kernel_size: List[Tuple[int, int]], adaptive_pooling_size: List[Tuple[int, int]], **kwargs): super().__init__(**kwargs) self.cosine_module = CosineMatrixAttention() if len(conv_output_size) != len(conv_kernel_size) or len( conv_output_size) != len(adaptive_pooling_size): raise Exception( "conv_output_size, conv_kernel_size, adaptive_pooling_size must have the same length" ) conv_layer_dict = OrderedDict() last_channel_out = 1 for i in range(len(conv_output_size)): conv_layer_dict["pad_" + str(i)] = nn.ConstantPad2d( (0, conv_kernel_size[i][0] - 1, 0, conv_kernel_size[i][1] - 1), 0) conv_layer_dict["conv_" + str(i)] = nn.Conv2d( kernel_size=conv_kernel_size[i], in_channels=last_channel_out, out_channels=conv_output_size[i]) conv_layer_dict["relu_" + str(i)] = nn.ReLU() conv_layer_dict["pool_" + str(i)] = nn.AdaptiveMaxPool2d( adaptive_pooling_size[i]) last_channel_out = conv_output_size[i] self.conv_layers = nn.Sequential(conv_layer_dict) self.dense = nn.Linear(conv_output_size[-1] * adaptive_pooling_size[-1][0] * adaptive_pooling_size[-1][1], out_features=100, bias=True) self.dense2 = nn.Linear(100, out_features=10, bias=True) self.dense3 = nn.Linear(10, out_features=1, bias=False)
def create_resnetv2_stem( in_chs, out_chs=64, stem_type='', preact=True, conv_layer=StdConv2d, norm_layer=partial(GroupNormAct, num_groups=32)): stem = OrderedDict() assert stem_type in ('', 'fixed', 'same', 'deep', 'deep_fixed', 'deep_same', 'tiered') # NOTE conv padding mode can be changed by overriding the conv_layer def if is_stem_deep(stem_type): # A 3 deep 3x3 conv stack as in ResNet V1D models if 'tiered' in stem_type: stem_chs = (3 * out_chs // 8, out_chs // 2) # 'T' resnets in resnet.py else: stem_chs = (out_chs // 2, out_chs // 2) # 'D' ResNets stem['conv1'] = conv_layer(in_chs, stem_chs[0], kernel_size=3, stride=2) stem['norm1'] = norm_layer(stem_chs[0]) stem['conv2'] = conv_layer(stem_chs[0], stem_chs[1], kernel_size=3, stride=1) stem['norm2'] = norm_layer(stem_chs[1]) stem['conv3'] = conv_layer(stem_chs[1], out_chs, kernel_size=3, stride=1) if not preact: stem['norm3'] = norm_layer(out_chs) else: # The usual 7x7 stem conv stem['conv'] = conv_layer(in_chs, out_chs, kernel_size=7, stride=2) if not preact: stem['norm'] = norm_layer(out_chs) if 'fixed' in stem_type: # 'fixed' SAME padding approximation that is used in BiT models stem['pad'] = nn.ConstantPad2d(1, 0.) stem['pool'] = nn.MaxPool2d(kernel_size=3, stride=2, padding=0) elif 'same' in stem_type: # full, input size based 'SAME' padding, used in ViT Hybrid model stem['pool'] = create_pool2d('max', kernel_size=3, stride=2, padding='same') else: # the usual PyTorch symmetric padding stem['pool'] = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) return nn.Sequential(stem)
def __init__(self): super(NNPaddingModule, self).__init__() self.input1d = torch.randn(1, 4, 50) self.module1d = nn.ModuleList([ nn.ReflectionPad1d(2), nn.ReplicationPad1d(2), nn.ConstantPad1d(2, 3.5), ]) self.input2d = torch.randn(1, 4, 30, 10) self.module2d = nn.ModuleList([ nn.ReflectionPad2d(2), nn.ReplicationPad2d(2), nn.ZeroPad2d(2), nn.ConstantPad2d(2, 3.5), ]) self.input3d = torch.randn(1, 4, 10, 4, 4) self.module3d = nn.ModuleList([ nn.ReflectionPad3d(1), nn.ReplicationPad3d(3), nn.ConstantPad3d(3, 3.5), ])
def __init__(self, C_in, C_out, stride): super(FactorizedReduce, self).__init__() self.stride = stride self.C_in = C_in self.C_out = C_out self.relu = nn.ReLU(inplace=False) if stride == 2: #assert C_out % 2 == 0, 'C_out : {:}'.format(C_out) C_outs = [C_out // 2, C_out - C_out // 2] self.convs = nn.ModuleList() for i in range(2): self.convs.append( nn.Conv2d(C_in, C_outs[i], 1, stride=stride, padding=0, bias=False)) self.pad = nn.ConstantPad2d((0, 1, 0, 1), 0) else: raise ValueError('Invalid stride : {:}'.format(stride)) self.bn = nn.BatchNorm2d(C_out)
def dcn_vgg(self): self.conv1_1 = nn.Conv2d(3, 64, 3, padding=(1, 1)) self.conv1_2 = nn.Conv2d(64, 64, 3, padding=(1, 1)) self.maxpool1 = nn.MaxPool2d((2, 2)) self.conv2_1 = nn.Conv2d(64, 128, 3, padding=(1, 1)) self.conv2_2 = nn.Conv2d(128, 128, 3, padding=(1, 1)) self.maxpool2 = nn.MaxPool2d((2, 2)) self.conv3_1 = nn.Conv2d(128, 256, 3, padding=(1, 1)) self.conv3_2 = nn.Conv2d(256, 256, 3, padding=(1, 1)) self.conv3_3 = nn.Conv2d(256, 256, 3, padding=(1, 1)) self.maxpool3 = nn.MaxPool2d((2, 2)) self.conv4_1 = nn.Conv2d(256, 512, 3, padding=(1, 1)) self.conv4_2 = nn.Conv2d(512, 512, 3, padding=(1, 1)) self.conv4_3 = nn.Conv2d(512, 512, 3, padding=(1, 1)) self.padding_one_side = nn.ConstantPad2d(padding=(1,0,1,0), value=0) self.maxpool4 = nn.MaxPool2d((2, 2), stride=(1, 1)) self.conv5_1 = nn.Conv2d(512, 512, 3, padding=(1, 1)) self.conv5_2 = nn.Conv2d(512, 512, 3, padding=(1, 1)) self.conv5_3 = nn.Conv2d(512, 512, 3, padding=(1, 1))
def forward(self, query, key, value, mask, adj): scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt( query.size(-1)) length = query.size(2) Pi = F.sigmoid(self.predict(query)) * length Pi = Pi.repeat(1, 1, 1, length) J = torch.FloatTensor(list( range(length))).unsqueeze(0).unsqueeze(1).unsqueeze(2).repeat( query.size(0), query.size(1), length, 1).cuda() G = -torch.pow(J - Pi, 2) / math.pow(self.D / 2, 2) scores += G mask = mask.unsqueeze(1).repeat(1, query.size(1), 1, 1) if adj is not None: adj = adj.unsqueeze(1).repeat(1, query.size(1), 1, 1) padding = nn.ConstantPad2d((0, 1, 0, 1), 1) adj = padding(adj) adj_mask = adj > 0 mask = mask * adj_mask scores = scores.masked_fill(mask == 0, -1e9) p_attn = F.softmax(scores, dim=-1) p_attn = self.dropout(p_attn) return torch.matmul(p_attn, value), p_attn
def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(3, 64, 3) self.bn1 = nn.BatchNorm2d(64) self.conv2 = nn.Conv2d(64, 64, 3) self.do1 = nn.Dropout2d(p=0.2) self.conv3 = nn.Conv2d(64, 64, 3) self.pool = nn.MaxPool2d(2, 2) self.conv4 = nn.Conv2d(64, 128, 3) self.bn2 = nn.BatchNorm2d(128) self.conv5 = nn.Conv2d(128, 128, 3) self.conv6 = nn.Conv2d(128, 128, 3) self.conv7 = nn.Conv2d(128, 256, 3) self.conv8 = nn.Conv2d(256, 256, 3) self.conv9 = nn.Conv2d(256, 256, 3) #instead of FC layer self.conv10 = nn.Conv2d(256, 100, 1) self.bn3 = nn.BatchNorm2d(256) self.conv11 = nn.Conv2d(100, 10, 4) #zero padding self.zeropad = nn.ConstantPad2d(1, 0)
def normalize_and_scale(delta_im, mode='train'): if opt.foolmodel == 'incv3': delta_im = nn.ConstantPad2d((0,-1,-1,0),0)(delta_im) # crop slightly to match inception delta_im = delta_im + 1 # now 0..2 delta_im = delta_im * 0.5 # now 0..1 # normalize image color channels for c in range(3): delta_im[:,c,:,:] = (delta_im[:,c,:,:].clone() - mean_arr[c]) / stddev_arr[c] # threshold each channel of each image in deltaIm according to inf norm # do on a per image basis as the inf norm of each image could be different bs = opt.batchSize if (mode == 'train') else opt.testBatchSize for i in range(bs): # do per channel l_inf normalization for ci in range(3): l_inf_channel = delta_im[i,ci,:,:].detach().abs().max() mag_in_scaled_c = mag_in/(255.0*stddev_arr[ci]) gpu_id = gpulist[1] if n_gpu > 1 else gpulist[0] delta_im[i,ci,:,:] = delta_im[i,ci,:,:].clone() * np.minimum(1.0, mag_in_scaled_c / l_inf_channel.cpu().numpy()) return delta_im
def forward(self, sent_tuple): """ :param sent_tuple: (sent, sent_len) in a dialogue :return: """ sent_enc = self.encoder(sent_tuple) _, sent_len = sent_tuple # attention mask len_mask = np.ones((self.max_sent_len, 1)) len_mask[len(sent_len):] = 0 attn_mask = np.matmul(len_mask, len_mask.transpose()) attn_mask = torch.from_numpy(attn_mask) attn_mask = torch.eq( attn_mask, 0).cuda() if torch.cuda.is_available() else torch.eq( attn_mask, 0) # positional enc pos = torch.LongTensor(range(len(sent_len))) pos = Variable(pos).cuda() if torch.cuda.is_available() else Variable( pos) pos_enc = self.position_enc(pos) sent_enc += pos_enc # padding enc_pad = nn.ConstantPad2d( (0, 0, 0, self.max_sent_len - len(sent_len)), 0) sent_enc = enc_pad(sent_enc) # sent_enc = sent_enc.data attn_output, attn = self.attention(sent_enc, sent_enc, sent_enc, attn_mask) # fully connect layer logit = self.decoder(attn_output) logit = logit[:len(sent_len)] return logit
def __init__(self): super(WNet, self).__init__() self.feature1 = [] self.feature2 = [] bias = True #U-Net1 #module1 self.module = [] self.maxpool1 = [] self.uconv1 = [] self.module.append( self.add_conv_stage(config.ChNum[0],config.ChNum[1],config.ConvSize,padding=config.pad,seperable=False) ) #module2-5 for i in range(2,config.MaxLv+1): self.module.append(self.add_conv_stage(config.ChNum[i-1],config.ChNum[i],config.ConvSize,padding=config.pad)) #module6-8 for i in range(config.MaxLv-1,1,-1): self.module.append(self.add_conv_stage(2*config.ChNum[i],config.ChNum[i],config.ConvSize,padding=config.pad)) #module9 self.module.append( self.add_conv_stage(2*config.ChNum[1],config.ChNum[1],config.ConvSize,padding=config.pad,seperable=False) ) #module1-4 for i in range(config.MaxLv-1): self.maxpool1.append(nn.MaxPool2d(config.ScaleRatio)) #module5-8 for i in range(config.MaxLv,1,-1): self.uconv1.append(nn.ConvTranspose2d(config.ChNum[i],config.ChNum[i-1],config.ScaleRatio,config.ScaleRatio,bias = True)) self.predconv = nn.Conv2d(config.ChNum[1],config.K,1,bias = bias) self.pad = nn.ConstantPad2d(config.radius-1,0) self.softmax = nn.Softmax2d() self.module = torch.nn.ModuleList(self.module) self.maxpool1 = torch.nn.ModuleList(self.maxpool1) self.uconv1 = torch.nn.ModuleList(self.uconv1)
def conv_h_w(h, w, in_planes, out_planes, stride=1, bias=True, padding='same'): if padding == 'valid': stride = tuple([max(1, i) for i in stride]) return nn.Conv2d(in_planes, out_planes, kernel_size=(h, w), stride=stride, bias=bias) elif padding == 'same': padding_h = (h - 1) // 2 remainder_h = (h - 1) % 2 padding_w = (w - 1) // 2 remainder_w = (w - 1) % 2 if isinstance(stride, int): stride = (stride, stride) stride_h, stride_w = stride if stride_h == 0: padding_h = 0 remainder_h = 0 stride_h = 1 if stride_w == 0: padding_w = 0 remainder_w = 0 stride_w = 1 return nn.Sequential( nn.ConstantPad2d((padding_w, padding_w + remainder_w, padding_h, padding_h + remainder_h), 0), nn.Conv2d(in_planes, out_planes, kernel_size=(h, w), stride=(stride_h, stride_w), bias=bias)) else: raise ValueError( f'padding must be either "same" or "valid". Got {padding}')
def extract_patch(self, x, l, size): """ @param x: img. (batch, height, width, channel) @param l: location. (batch, 2) @param size: the size of the extracted patch. @return Variable (batch, height, width, channel) """ B, C, H, W = x.shape if not hasattr(self, 'imgShape'): self.imgShape = torch.FloatTensor([H, W]).unsqueeze(0) if self.use_gpu: self.imgShape = self.imgShape.cuda() # coordins from [-1,1] to H,W scale coords = (0.5 * ((l.data + 1.0) * self.imgShape)).long() # pad the image with enough 0s x = nn.ConstantPad2d(size // 2, 0.)(x) # calculate coordinate for each batch samle (padding considered) from_x, from_y = coords[:, 0], coords[:, 1] to_x, to_y = from_x + size, from_y + size # The above is the original implementation # It only works if the input image is a square # The following is the correct implementation # from_y, from_x = coords[:, 0], coords[:, 1] # to_y, to_x = from_y + size, from_x + size # extract the patches patch = [] for i in range(B): patch.append(x[i, :, from_y[i]:to_y[i], from_x[i]:to_x[i]].unsqueeze(0)) return torch.cat(patch)
def __init__(self, C_in, C_out, BatchNorm, eps=1e-5, momentum=0.1, affine=True): super(DoubleFactorizedReduce, self).__init__() assert C_out % 2 == 0 self.relu = nn.ReLU(inplace=False) self.conv_1 = nn.Conv2d(C_in, C_out // 2, 1, stride=4, padding=0, bias=False) self.conv_2 = nn.Conv2d(C_in, C_out // 2, 1, stride=4, padding=0, bias=False) self.bn = BatchNorm(C_out, affine=affine) self.pad = nn.ConstantPad2d((0, 2, 0, 2), 0)
def __init__(self, pool_size=2, maxpool=True, training=False, grid_size=None, **kwargs): super(StochasticPool2DLayer, self).__init__(**kwargs) self.rng = torch.cuda.manual_seed_all( 123) # this changed in Pytorch for working self.pool_size = pool_size self.maxpool_flag = maxpool self.training = training if grid_size: self.grid_size = grid_size else: self.grid_size = pool_size self.Maxpool = torch.nn.MaxPool2d(kernel_size=self.pool_size, stride=1) self.Avgpool = torch.nn.AvgPool2d( kernel_size=self.pool_size, stride=self.pool_size, padding=self.pool_size // 2, ) self.padding = nn.ConstantPad2d((0, 1, 0, 1), 0)
def conv_dw(inp, oup, stride): inp = int(inp * self.alpha) oup = int(oup * self.alpha) if stride == 2: return nn.Sequential( # DepthwiseConv2D nn.ConstantPad2d((0, 1, 0, 1), 0), nn.Conv2d(inp, inp, 3, stride, 0, groups=inp, bias=False), nn.BatchNorm2d(inp, eps=1e-3), nn.ReLU6(inplace=True), nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup, eps=1e-3), nn.ReLU6(inplace=True), ) else: return nn.Sequential( # DepthwiseConv2D nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), nn.BatchNorm2d(inp, eps=1e-3), nn.ReLU6(inplace=True), nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup, eps=1e-3), nn.ReLU6(inplace=True), )
def __init__(self, max_len, embedding, pos_embed_size, pos_embed_num, slide_window, class_num, num_filters, keep_prob): super(ACNN, self).__init__() self.dw = embedding.shape[1] self.vac_len = embedding.shape[0] self.dp = pos_embed_size self.d = self.dw + 2 * self.dp self.np = pos_embed_num self.nr = class_num self.dc = num_filters self.keep_prob = keep_prob self.k = slide_window self.p = (self.k - 1) // 2 self.n = max_len self.kd = self.d * self.k self.e1_embedding = nn.Embedding(self.vac_len, self.dw) self.e1_embedding.weight = nn.Parameter(torch.from_numpy(embedding)) self.e2_embedding = nn.Embedding(self.vac_len, self.dw) self.e2_embedding.weight = nn.Parameter(torch.from_numpy(embedding)) self.x_embedding = nn.Embedding(self.vac_len, self.dw) self.x_embedding.weight = nn.Parameter(torch.from_numpy(embedding)) self.dist1_embedding = nn.Embedding(self.np, self.dp) self.dist2_embedding = nn.Embedding(self.np, self.dp) self.pad = nn.ConstantPad2d((0, 0, self.p, self.p), 0) self.y_embedding = nn.Embedding(self.nr, self.dc) self.dropout = nn.Dropout(self.keep_prob) # self.conv = nn.Conv2d(1, self.dc, (self.k, self.kd), (1, self.kd), (self.p, 0), bias=True) self.conv = nn.Conv2d(1, self.dc, (1, self.kd), (1, self.kd), bias=True) # renewed self.tanh = nn.Tanh() self.U = nn.Parameter(torch.randn(self.dc, self.nr)) self.We1 = nn.Parameter(torch.randn(self.dw, self.dw)) self.We2 = nn.Parameter(torch.randn(self.dw, self.dw)) self.max_pool = nn.MaxPool2d((1, self.dc), (1, self.dc)) self.softmax = nn.Softmax()
def __init__(self, C_in, C_out, kernel_size=1, stride=2, padding=0, affine=True): super(FactorizedReduce, self).__init__() assert C_out % 2 == 0 self.relu = nn.ReLU(inplace=False) self.conv_1 = nn.Conv2d(C_in, C_out // 2, kernel_size, stride=stride, padding=padding, bias=False) self.conv_2 = nn.Conv2d(C_in, C_out // 2, kernel_size, stride=stride, padding=padding, bias=False) self.bn = nn.BatchNorm2d(C_out, affine=affine) self.pad = nn.ConstantPad2d((0, 1, 0, 1), 0)
def __init__(self, ch_in, ch_out): super(Up_Layer0, self).__init__() #1st conv self.layer1 = self.define_layer1(ch_in, ch_out) #self.layer2 = self.define_layer2(ch_in, ch_out) #2nd conv self.layer3 = self.define_layer1(ch_out, ch_out) #self.layer4 = self.define_layer2(ch_out, ch_out) #3rd conv self.layer5 = self.define_layer1(ch_in, ch_out) #self.layer6 = self.define_layer2(ch_in, ch_out) #4th conv self.layer7 = self.define_layer1(ch_out, ch_out) #self.layer8 = self.define_layer2(ch_out, ch_out) self.lamda1 = 0 self.lamda2 = 0 self.lamda3 = 0 self.lamda4 = 0 self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) # add 0 padding on right and down to keep shape the same self.pad = nn.ConstantPad2d((0, 1, 0, 1), 0) self.degradation = nn.Conv2d(ch_out, ch_out, kernel_size=2)
def forward(self, x): # フィルターと出力の形状のセットアップ FN, FC, FH, FW = self.weight.shape N, C, H, W = x.shape OH = (H - FH) // self.stride + 1 OW = (W - FW) // self.stride + 1 padding = nn.ConstantPad2d(self.padding, 0.0) out = [] for nx in range(N): f_out = [] for nf in range(FN): f_out.append( self.calConv( padding(x[nx]).type(x.dtype), self.weight[nf], self.bias[nf], self.stride)) out.append(torch.stack(f_out)) output = torch.stack(out) return output
def forward(self, input_sentence, batch_size): # Forward pass through CNN layer e = self.embedding(input_sentence) m = nn.ConstantPad2d((0, 0, 1, 0), 0) # print(e.size()) e = torch.squeeze(e, dim=2) # print(e.size()) e = m(e) # print(e.size()) #input = input.permute(1, 0, 2) #lstm_out, self.hidden = self.lstm(input.view(len(input), self.batch_size, -1)) e = e.unsqueeze(1) # print(e.size()) c_out = self.cnn(e) c_out = c_out.squeeze(3) c_out = F.relu(c_out) # print(c_out.size()) c_out = c_out.contiguous().transpose(1, 2) y_pred = self.linear(c_out) y_pred = y_pred.view(batch_size, -1, self.output_dim) return y_pred
def crop_feat_patch(feat_map, peak, ww, softmax=0): # peak: T x 2 # feat_map: 1 x d x T x h x w peak = peak.copy() NEG_INF = -1e10 pad_val = NEG_INF if softmax else 0 padlen = ww // 2 feat_map = nn.ConstantPad2d([padlen, padlen, padlen, padlen], pad_val)(feat_map) peak += padlen if len(peak.shape) == 1: # Same map over all time steps c_y, c_x = peak feat_patch = feat_map[..., max(c_y - ww // 2, 0):c_y + ww // 2 + 1, max(c_x - ww // 2, 0):c_x + ww // 2 + 1] else: feat_patch = [] # check that time dims are the same assert peak.shape[0] == feat_map.shape[-3], 'Time dims are different' for t_i, (c_y, c_x) in enumerate(peak): fp = feat_map[..., t_i, max(c_y - ww // 2, 0):c_y + ww // 2 + 1, max(c_x - ww // 2, 0):c_x + ww // 2 + 1] feat_patch.append(fp) feat_patch = np.stack(feat_patch, 2) if isinstance( feat_map, np.ndarray) else torch.stack(feat_patch, 2) if softmax: feat_patch = logsoftmax_2d(feat_patch).exp() return feat_patch
def __init__(self): super(Net, self).__init__() self.batchnorm0 = nn.BatchNorm2d(1) self.conv1 = nn.Conv2d(1, 4, 5) self.padding1 = nn.ConstantPad2d(-2, 0) self.batchnorm1 = nn.BatchNorm2d(4) self.conv2 = nn.Conv2d(4, 8, 5) self.padding2 = nn.ConstantPad2d(-2, 0) self.batchnorm2 = nn.BatchNorm2d(8) self.conv3 = nn.Conv2d(8, 16, 5) self.padding3 = nn.ConstantPad2d(-2, 0) self.batchnorm3 = nn.BatchNorm2d(16) self.conv4 = nn.Conv2d(16, 32, 5, stride=2) self.padding4 = nn.ConstantPad2d(-1, 0) self.batchnorm4 = nn.BatchNorm2d(32) self.conv5 = nn.Conv2d(32, 64, 5, stride=2) self.padding5 = nn.ConstantPad2d(-1, 0) self.batchnorm5 = nn.BatchNorm2d(64) self.conv6 = nn.Conv2d(64, 128, 5, stride=2) self.padding6 = nn.ConstantPad2d(-1, 0) self.batchnorm6 = nn.BatchNorm2d(128) self.conv7 = nn.Conv2d(128, 128, 5, stride=2) self.padding7 = nn.ConstantPad2d(-1, 0) self.batchnorm7 = nn.BatchNorm2d(128) self.fc1 = nn.Linear(int(96000 / 30), 64) self.fc2 = nn.Linear(64, 20) self.pool = nn.MaxPool2d(2, 2)
def random_scale_crop(scale, data=None, target=None, ignore_label=255, probs=None): """ Args: scale: scale ratio. Float data: input data to augment BxCxWxH target: labels to augment BxWxH probs: probability masks to augment BxCxWxH ignore_label: integeer value that defines the ignore class in the datasets for the labels Returns: data, target and prob, after applied a scaling operation. output resolution is preserve as the same as the input resolution WxH """ if scale != 1: init_size_w = data.shape[2] init_size_h = data.shape[3] # scale data, labels and probs data = nn.functional.interpolate(data, scale_factor=scale, mode='bilinear', align_corners=True, recompute_scale_factor=True) if target is not None: target = nn.functional.interpolate( target.unsqueeze(1).float(), scale_factor=scale, mode='nearest', recompute_scale_factor=True).long().squeeze(1) if probs is not None: probs = nn.functional.interpolate( probs.unsqueeze(1), scale_factor=scale, mode='bilinear', align_corners=True, recompute_scale_factor=True).squeeze(1) final_size_w = data.shape[2] final_size_h = data.shape[3] diff_h = init_size_h - final_size_h diff_w = init_size_w - final_size_w if scale < 1: # add padding if needed if diff_h % 2 == 1: pad = nn.ConstantPad2d((diff_w // 2, diff_w // 2 + 1, diff_h // 2 + 1, diff_h // 2), 0) else: pad = nn.ConstantPad2d( (diff_w // 2, diff_w // 2, diff_h // 2, diff_h // 2), 0) data = pad(data) if probs is not None: probs = pad(probs) # padding with ignore label to add to labels if diff_h % 2 == 1: pad = nn.ConstantPad2d((diff_w // 2, diff_w // 2 + 1, diff_h // 2 + 1, diff_h // 2), ignore_label) else: pad = nn.ConstantPad2d( (diff_w // 2, diff_w // 2, diff_h // 2, diff_h // 2), ignore_label) if target is not None: target = pad(target) else: # crop if needed w = random.randint(0, data.shape[2] - init_size_w) h = random.randint(0, data.shape[3] - init_size_h) data = data[:, :, h:h + init_size_h, w:w + init_size_w] if probs is not None: probs = probs[:, h:h + init_size_h, w:w + init_size_w] if target is not None: target = target[:, h:h + init_size_h, w:w + init_size_w] return data, target, probs
def test_constantpad2d(self): model = nn.ConstantPad2d((1, 2, 3, 4), 3.5) self.run_model_test(model, train=False, batch_size=BATCH_SIZE)
def __init__(self, img_size, latent_size, condition_size=0, aux_size=0, kernel_size=4, num_channels=3, num_gen_channels=1024, skip_channels=[], batch_norm=True, sequential_noise=False, aux_only_on_top=False): super(Generator, self).__init__() # If we have a tuple make sure we maintain the aspect ratio if isinstance(img_size, tuple): self.img_size = img_size self.init_size = tuple(int(4 * x / max(img_size)) for x in self.img_size) else: self.img_size = (img_size, img_size) self.init_size = (4, 4) self.latent_size = latent_size self.condition_size = condition_size self.aux_size = aux_size self.rnn_noise = None if self.aux_size > 0 and sequential_noise: self.rnn_noise = nn.GRU(self.aux_size, self.aux_size, batch_first=True) self.rnn_noise_squashing = nn.Tanh() self.num_layers = int(np.log2(max(self.img_size))) - 1 self.num_channels = num_channels self.num_gen_channels = num_gen_channels self.dcl = nn.ModuleList() self.aux_only_on_top = aux_only_on_top self.total_latent_size = self.latent_size + self.condition_size if self.aux_size > 0 and self.aux_only_on_top: self.aux_dcl = nn.Sequential( nn.ConvTranspose2d(self.aux_size, num_gen_channels, (self.init_size[0] // 2, self.init_size[1]), bias=False), nn.BatchNorm2d(num_gen_channels), nn.ReLU(True), nn.ConstantPad2d((0, 0, 0, self.init_size[0] // 2), 0)) else: self.total_latent_size += self.aux_size stride = 2 if batch_norm: self.dcl.append( nn.Sequential( nn.ConvTranspose2d(self.total_latent_size, num_gen_channels, self.init_size, bias=False), nn.BatchNorm2d(num_gen_channels), nn.ReLU(True))) else: self.dcl.append( nn.Sequential( nn.ConvTranspose2d(self.total_latent_size, num_gen_channels, self.init_size, bias=False), nn.ReLU(True))) num_input_channels = self.num_gen_channels in_size = self.init_size for i in range(self.num_layers - 2): if not skip_channels: self.dcl.append(Deconv(num_input_channels, num_input_channels // 2, in_size, kernel_size, stride=stride, batch_norm=batch_norm)) else: self.dcl.append( UnetBlock(num_input_channels, num_input_channels // 2, skip_channels[i], in_size, kernel_size, stride=stride, batch_norm=batch_norm)) num_input_channels //= 2 in_size = tuple(2 * x for x in in_size) padding = calculate_padding(kernel_size, stride) self.dcl.append(nn.ConvTranspose2d(num_input_channels, self.num_channels, kernel_size, stride=stride, padding=padding // 2, bias=False)) self.final_activation = nn.Tanh()
def __init__(self, batch_size, causal=True): super(TCNN, self).__init__() self.batch_size = batch_size # self.tcnn = nn.Module() self.padding = nn.ConstantPad2d((1, 0, 0, 0), value=0) # left, right, top, bottom self.encoder_conv2d_1 = nn.Conv2d(1, 16, kernel_size=(2, 5), stride=(1, 1), padding=(1, 2)) # self.padding_2 = nn.ConstantPad2d((2, 1, 1, 0), value=0) # left, right, top, bottom self.encoder_conv2d_2 = nn.Conv2d(16, 16, kernel_size=(2, 5), stride=(1, 2), padding=(1, 2)) self.encoder_conv2d_3 = nn.Conv2d(16, 16, kernel_size=(2, 5), stride=(1, 2), padding=(1, 2)) self.encoder_conv2d_4 = nn.Conv2d(16, 32, kernel_size=(2, 5), stride=(1, 2), padding=(1, 2)) self.encoder_conv2d_5 = nn.Conv2d(32, 32, kernel_size=(2, 5), stride=(1, 2), padding=(1, 2)) self.encoder_conv2d_6 = nn.Conv2d(32, 64, kernel_size=(2, 5), stride=(1, 2), padding=(1, 2)) self.encoder_conv2d_7 = nn.Conv2d(64, 64, kernel_size=(2, 5), stride=(1, 2), padding=(1, 2)) #TCM self.tcm = nn.Sequential( # first dilation block ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 1, 1], causal=causal), ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 2, 1], causal=causal), ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 4, 1], causal=causal), ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 8, 1], causal=causal), ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 16, 1], causal=causal), ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 32, 1], causal=causal), # second dilation block ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 1, 1], causal=causal), ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 2, 1], causal=causal), ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 4, 1], causal=causal), ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 8, 1], causal=causal), ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 16, 1], causal=causal), ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 32, 1], causal=causal), # third dilation block ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 1, 1], causal=causal), ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 2, 1], causal=causal), ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 4, 1], causal=causal), ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 8, 1], causal=causal), ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 16, 1], causal=causal), ResidualBlock(in_channels=256, kernel=[1, 3, 1], dilation=[1, 32, 1], causal=causal), ) # self.tcm = TCN(input_channel=1, hidden_channel=1) #decoder module self.decoder_deconv2d_7 = nn.ConvTranspose2d(128, 64, kernel_size=(2, 5), stride=(1, 1), padding=(0, 0)) self.decoder_deconv2d_6 = nn.ConvTranspose2d(128, 32, kernel_size=(2, 5), stride=(1, 2), padding=(0, 0)) self.decoder_deconv2d_5 = nn.ConvTranspose2d(64, 32, kernel_size=(2, 5), stride=(1, 2), padding=(0, 0)) self.decoder_deconv2d_4 = nn.ConvTranspose2d(64, 16, kernel_size=(2, 5), stride=(1, 2), padding=(0, 0)) self.decoder_deconv2d_3 = nn.ConvTranspose2d(32, 16, kernel_size=(2, 5), stride=(1, 2), padding=(0, 0)) self.decoder_deconv2d_2 = nn.ConvTranspose2d(32, 16, kernel_size=(2, 5), stride=(1, 2), padding=(0, 0)) self.decoder_deconv2d_1 = nn.ConvTranspose2d(16, 1, kernel_size=(2, 5), stride=(1, 1), padding=(0, 0))
def __init__( self, block_units, width_factor, # in_channels=3, # TODO: add later num_classes=5, # just a random number # encoder=False, # TODO: add later ): super().__init__() wf = width_factor # shortcut 'cause we'll use it a lot. # The following will be unreadable if we split lines. # pylint: disable=line-too-long # fmt: off self.root = nn.Sequential( OrderedDict([ ('conv', StdConv2d(3, 64 * wf, kernel_size=7, stride=2, padding=3, bias=False)), ('pad', nn.ConstantPad2d(1, 0)), ('pool', nn.MaxPool2d(kernel_size=3, stride=2, padding=0)), # The following is subtly not the same! # ('pool', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), ])) self.body = nn.Sequential( OrderedDict([ ('block1', nn.Sequential( OrderedDict( [('unit01', PreActBottleneck( cin=64 * wf, cout=256 * wf, cmid=64 * wf))] + [(f'unit{i:02d}', PreActBottleneck( cin=256 * wf, cout=256 * wf, cmid=64 * wf)) for i in range(2, block_units[0] + 1)], ))), ('block2', nn.Sequential( OrderedDict( [('unit01', PreActBottleneck(cin=256 * wf, cout=512 * wf, cmid=128 * wf, stride=2))] + [(f'unit{i:02d}', PreActBottleneck( cin=512 * wf, cout=512 * wf, cmid=128 * wf)) for i in range(2, block_units[1] + 1)], ))), ('block3', nn.Sequential( OrderedDict( [('unit01', PreActBottleneck(cin=512 * wf, cout=1024 * wf, cmid=256 * wf, stride=2))] + [(f'unit{i:02d}', PreActBottleneck( cin=1024 * wf, cout=1024 * wf, cmid=256 * wf)) for i in range(2, block_units[2] + 1)], ))), ('block4', nn.Sequential( OrderedDict( [('unit01', PreActBottleneck(cin=1024 * wf, cout=2048 * wf, cmid=512 * wf, stride=2))] + [(f'unit{i:02d}', PreActBottleneck( cin=2048 * wf, cout=2048 * wf, cmid=512 * wf)) for i in range(2, block_units[3] + 1)], ))), ])) # pylint: enable=line-too-long self.head = nn.Sequential( OrderedDict([ ('gn', nn.GroupNorm(32, 2048 * wf)), ('relu', nn.ReLU(inplace=True)), ('avg', nn.AdaptiveAvgPool2d(output_size=1)), ('conv', nn.Conv2d(2048 * wf, num_classes, kernel_size=1, bias=True)), ]))
def forward(self, adv_patch, lab_batch, img_size, Scale=1., do_rotate=True, rand_loc=True): #adv_patch = F.conv2d(adv_patch.unsqueeze(0),self.kernel,padding=(2,2)) adv_patch = self.medianpooler(adv_patch.unsqueeze(0)) # adv_contour = self.medianpooler(adv_contour.unsqueeze(0)) # Determine size of padding pad = (img_size - adv_patch.size(-1)) / 2 # Make a batch of patches # pdb.set_trace() # print(lab_batch.shape) adv_patch = adv_patch.unsqueeze(0) #.unsqueeze(0) adv_batch = adv_patch.expand(lab_batch.size(0), lab_batch.size(1), -1, -1, -1) # adv_contour = adv_contour.unsqueeze(0)#.unsqueeze(0) # adv_contour = adv_contour.expand(lab_batch.size(0), lab_batch.size(1), -1, -1, -1) batch_size = torch.Size((lab_batch.size(0), lab_batch.size(1))) # Contrast, brightness and noise transforms # Create random contrast tensor contrast = torch.cuda.FloatTensor(batch_size).uniform_( self.min_contrast, self.max_contrast) contrast = contrast.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) contrast = contrast.expand(-1, -1, adv_batch.size(-3), adv_batch.size(-2), adv_batch.size(-1)) contrast = contrast.cuda() # Create random brightness tensor brightness = torch.cuda.FloatTensor(batch_size).uniform_( self.min_brightness, self.max_brightness) brightness = brightness.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) brightness = brightness.expand(-1, -1, adv_batch.size(-3), adv_batch.size(-2), adv_batch.size(-1)) brightness = brightness.cuda() # Create random noise tensor noise = torch.cuda.FloatTensor(adv_batch.size()).uniform_( -1, 1) * self.noise_factor # pdb.set_trace() # test_batch = adv_batch.squeeze().cpu().transpose(1,3) # advs = torch.unbind(test_batch, 0) # print(advs[0].shape) # print(torch.max(advs[0])) # plt.imshow(advs[4]) # Apply contrast/brightness/noise, clamp # pdb.set_trace() # print(adv_batch.shape) adv_batch = adv_batch * contrast + brightness + noise # print(adv_batch.shape) # pdb.set_trace() adv_batch = torch.clamp(adv_batch, 0., 0.99999) # print(adv_batch) # pdb.set_trace() # test_batch = adv_batch.squeeze().cpu().transpose(1,3) # advs = torch.unbind(test_batch, 0) # print(advs[0].shape) # print(advs[0]) # plt.imshow(advs[4]) # print(adv_batch.shape) # pdb.set_trace() # print(adv_batch.shape) # print(lab_batch) # Where the label class_id is 1 we don't want a patch (padding) --> fill mask with zero's cls_ids = torch.narrow(lab_batch, 2, 0, 1) # print(cls_ids) cls_mask = cls_ids.expand(-1, -1, 3) cls_mask = cls_mask.unsqueeze(-1) cls_mask = cls_mask.expand(-1, -1, -1, adv_batch.size(3)) cls_mask = cls_mask.unsqueeze(-1) cls_mask = cls_mask.expand(-1, -1, -1, -1, adv_batch.size(4)) msk_batch = torch.cuda.FloatTensor(cls_mask.size()).fill_(1) - cls_mask # Pad patch and mask to image dimensions mypad = nn.ConstantPad2d( (int(pad + 0.5), int(pad), int(pad + 0.5), int(pad)), 0) # pdb.set_trace() adv_batch = mypad(adv_batch) # adv_contour = mypad(adv_contour) # print(adv_batch.shape) # print(adv_contour.shape) # print(adv_batch) # print(adv_contour) msk_batch = mypad(msk_batch) # adv_batch = adv_batch * adv_contour # Rotation and rescaling transforms anglesize = (lab_batch.size(0) * lab_batch.size(1)) if do_rotate: angle = torch.cuda.FloatTensor(anglesize).uniform_( self.minangle, self.maxangle) else: angle = torch.cuda.FloatTensor(anglesize).fill_(0) # Resizes and rotates current_patch_size = adv_patch.size(-1) lab_batch_scaled = torch.cuda.FloatTensor(lab_batch.size()).fill_(0) lab_batch_scaled[:, :, 1] = lab_batch[:, :, 1] * img_size lab_batch_scaled[:, :, 2] = lab_batch[:, :, 2] * img_size lab_batch_scaled[:, :, 3] = lab_batch[:, :, 3] * img_size lab_batch_scaled[:, :, 4] = lab_batch[:, :, 4] * img_size target_size = torch.sqrt(((lab_batch_scaled[:, :, 3].mul(0.2))**2) + ((lab_batch_scaled[:, :, 4].mul(0.2))**2)) target_x = lab_batch[:, :, 1].view(np.prod(batch_size)) target_y = lab_batch[:, :, 2].view(np.prod(batch_size)) targetoff_x = lab_batch[:, :, 3].view(np.prod(batch_size)) targetoff_y = lab_batch[:, :, 4].view(np.prod(batch_size)) if (rand_loc): off_x = targetoff_x * (torch.cuda.FloatTensor( targetoff_x.size()).uniform_(-0.4, 0.4)) target_x = target_x + off_x off_y = targetoff_y * (torch.cuda.FloatTensor( targetoff_y.size()).uniform_(-0.4, 0.4)) target_y = target_y + off_y target_y = target_y - 0.05 scale = target_size * Scale / current_patch_size scale = scale.view(anglesize) s = adv_batch.size() adv_batch = adv_batch.view(s[0] * s[1], s[2], s[3], s[4]) msk_batch = msk_batch.view(s[0] * s[1], s[2], s[3], s[4]) tx = (-target_x + 0.5) * 2 ty = (-target_y + 0.5) * 2 sin = torch.sin(angle) cos = torch.cos(angle) # Theta = rotation,rescale matrix theta = torch.cuda.FloatTensor(anglesize, 2, 3).fill_(0) theta[:, 0, 0] = cos / scale theta[:, 0, 1] = sin / scale theta[:, 0, 2] = tx * cos / scale + ty * sin / scale theta[:, 1, 0] = -sin / scale theta[:, 1, 1] = cos / scale theta[:, 1, 2] = -tx * sin / scale + ty * cos / scale b_sh = adv_batch.shape grid = F.affine_grid(theta, adv_batch.shape) adv_batch_t = F.grid_sample(adv_batch, grid) msk_batch_t = F.grid_sample(msk_batch, grid) ''' # Theta2 = translation matrix theta2 = torch.cuda.FloatTensor(anglesize, 2, 3).fill_(0) theta2[:, 0, 0] = 1 theta2[:, 0, 1] = 0 theta2[:, 0, 2] = (-target_x + 0.5) * 2 theta2[:, 1, 0] = 0 theta2[:, 1, 1] = 1 theta2[:, 1, 2] = (-target_y + 0.5) * 2 grid2 = F.affine_grid(theta2, adv_batch.shape) adv_batch_t = F.grid_sample(adv_batch_t, grid2) msk_batch_t = F.grid_sample(msk_batch_t, grid2) ''' adv_batch_t = adv_batch_t.view(s[0], s[1], s[2], s[3], s[4]) msk_batch_t = msk_batch_t.view(s[0], s[1], s[2], s[3], s[4]) adv_batch_t = torch.clamp(adv_batch_t, 0., 0.999999) # pdb.set_trace() # test_batch = adv_batch_t.squeeze().cpu().transpose(1,3) # advs = torch.unbind(test_batch, 0) # print(advs[0].shape) # print(advs[0][150:170,200:220,0]) # plt.imshow(advs[0][150:200,220:250]) #img = msk_batch_t[0, 0, :, :, :].detach().cpu() #img = transforms.ToPILImage()(img) #img.show() #exit() # pdb.set_trace() # print(adv_batch_t.shape) # adv_batch_t = torch.where((adv == 0), img_batch, adv) return adv_batch_t * msk_batch_t
def __init__(self, input_nc=3, n_base_filters=64, output_nc=3, norm_layer=nn.BatchNorm2d, n_blocks=9, gpu_ids=[]): assert (n_blocks >= 0) super(GenerateB, self).__init__() self.input_nc = input_nc self.n_base_filters = n_base_filters self.output_nc = output_nc self.gpu_ids = gpu_ids self.norm_layer = norm_layer self.n_blocks = n_blocks self.reflection1 = nn.ReflectionPad2d(1) # for resnet self.reflection3 = nn.ReflectionPad2d(3) self.constantpad1 = nn.ConstantPad2d(1, 0) # for resnet self.constantpad3 = nn.ConstantPad2d(3, 0) self.norm_layer1 = self.norm_layer(n_base_filters) # 64 self.norm_layer2 = self.norm_layer(2 * n_base_filters) # 128 self.norm_layer3 = self.norm_layer(4 * n_base_filters) # 256 self.norm_layer4 = self.norm_layer(8 * n_base_filters) # 512 self.conv1 = nn.Conv2d(in_channels=input_nc, out_channels=n_base_filters, kernel_size=7, stride=1, padding=0) self.conv2 = nn.Conv2d(in_channels=n_base_filters, out_channels=2 * n_base_filters, kernel_size=3, stride=2, padding=1) self.conv3 = nn.Conv2d(in_channels=2 * n_base_filters, out_channels=4 * n_base_filters, kernel_size=3, stride=2, padding=1) self.conv4 = nn.Conv2d(in_channels=4 * n_base_filters, out_channels=8 * n_base_filters, kernel_size=3, stride=2, padding=1) self.resNet_conv1 = nn.Conv2d(in_channels=8 * n_base_filters, out_channels=8 * n_base_filters, kernel_size=3, stride=1, padding=0) self.resNet_conv2 = nn.Conv2d(in_channels=8 * n_base_filters, out_channels=8 * n_base_filters, kernel_size=3, stride=1, padding=0) self.relu = nn.ReLU(True) if type(norm_layer) == functools.partial: use_bias = norm_layer.func == nn.InstanceNorm2d else: use_bias = norm_layer == nn.InstanceNorm2d self.convTran = nn.Sequential( nn.ConvTranspose2d(n_base_filters * 8, n_base_filters * 4, kernel_size=3, stride=2, padding=1, output_padding=1, bias=use_bias), norm_layer(n_base_filters * 4), nn.ReLU(inplace=True), nn.ConvTranspose2d(n_base_filters * 4, n_base_filters * 2, kernel_size=3, stride=2, padding=1, output_padding=1, bias=use_bias), norm_layer(n_base_filters * 2), nn.ReLU(inplace=True), nn.ConvTranspose2d(n_base_filters * 2, n_base_filters, kernel_size=3, stride=2, padding=1, output_padding=1, bias=use_bias), norm_layer(n_base_filters), nn.ReLU(inplace=True), nn.ReflectionPad2d(3), nn.Conv2d(n_base_filters, output_nc, kernel_size=7, stride=1, padding=0), # nn.ReLU(inplace=True) # nn.Sigmoid() nn.Tanh())