def init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): init.kaiming_normal_(m.weight, mode='fan_out') if m.bias is not None: init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): init.constant_(m.weight, 1) init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): init.normal_(m.weight, std=0.001) if m.bias is not None: init.constant_(m.bias, 0)
def init_func(m): classname = m.__class__.__name__ if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): if init_type == 'normal': init.normal_(m.weight.data, 0.0, gain) elif init_type == 'xavier': init.xavier_normal_(m.weight.data, gain=gain) elif init_type == 'kaiming': init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') elif init_type == 'orthogonal': init.orthogonal_(m.weight.data, gain=gain) else: raise NotImplementedError('initialization method [%s] is not implemented' % init_type) if hasattr(m, 'bias') and m.bias is not None: init.constant_(m.bias.data, 0.0) elif classname.find('BatchNorm2d') != -1: init.normal_(m.weight.data, 1.0, gain) init.constant_(m.bias.data, 0.0)
def weight_init(m): ''' Usage: model = Model() model.apply(weight_init) ''' if isinstance(m, nn.Conv1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.Conv2d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.Conv3d): init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose2d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose3d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.BatchNorm1d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.BatchNorm2d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.BatchNorm3d): init.constant_(m.weight, 1) init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.LSTM): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.LSTMCell): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.GRU): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.GRUCell): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data)
def __init__(self, batchNorm=True, bias=True, bitW=32, bitA=32, cut_ratio=2): super(FlowNetSGrayHalfLLSQ, self).__init__() ratio = cut_ratio C01_OUT = 64 // ratio C11_OUT = 64 // ratio C12_OUT = 64 // ratio C2__OUT = 128 // ratio C21_OUT = 128 // ratio C3__OUT = 256 // ratio C30_OUT = 256 // ratio C31_OUT = 256 // ratio C4__OUT = 512 // ratio C41_OUT = 512 // ratio C5__OUT = 512 // ratio C51_OUT = 512 // ratio C6__OUT = 1024 // ratio C61_OUT = 1024 // ratio DC5_OUT = 512 // ratio DC4_OUT = 256 // ratio DC3_OUT = 128 // ratio DC2_OUT = 64 // ratio self.batchNorm = batchNorm self.conv1 = conv_Q(self.batchNorm, 2, C01_OUT, bias=bias, bitW=bitW, bitA=bitA) # 7x7 origin self.conv1_1 = conv_Q(self.batchNorm, C01_OUT, C11_OUT, bias=bias, bitW=bitW, bitA=bitA) self.conv1_2 = conv_Q(self.batchNorm, C11_OUT, C12_OUT, bias=bias, bitW=bitW, bitA=bitA, stride=2) self.conv2 = conv_Q(self.batchNorm, C12_OUT, C2__OUT, bias=bias, bitW=bitW, bitA=bitA) # 5x5 origin self.conv2_1 = conv_Q(self.batchNorm, C2__OUT, C21_OUT, bias=bias, bitW=bitW, bitA=bitA, stride=2) self.conv3 = conv_Q(self.batchNorm, C21_OUT, C3__OUT, bias=bias, bitW=bitW, bitA=bitA) # 5x5 origin self.conv3_0 = conv_Q(self.batchNorm, C3__OUT, C30_OUT, bias=bias, bitW=bitW, bitA=bitA, stride=2) self.conv3_1 = conv_Q(self.batchNorm, C30_OUT, C31_OUT, bias=bias, bitW=bitW, bitA=bitA) self.conv4 = conv_Q(self.batchNorm, C31_OUT, C4__OUT, bias=bias, bitW=bitW, bitA=bitA, stride=2) self.conv4_1 = conv_Q(self.batchNorm, C4__OUT, C41_OUT, bias=bias, bitW=bitW, bitA=bitA) self.conv5 = conv_Q(self.batchNorm, C41_OUT, C5__OUT, bias=bias, bitW=bitW, bitA=bitA, stride=2) self.conv5_1 = conv_Q(self.batchNorm, C5__OUT, C51_OUT, bias=bias, bitW=bitW, bitA=bitA) self.conv6 = conv_Q(self.batchNorm, C51_OUT, C6__OUT, bias=bias, bitW=bitW, bitA=bitA, stride=2) self.conv6_1 = conv_Q(self.batchNorm, C6__OUT, C61_OUT, bias=bias, bitW=bitW, bitA=bitA) self.deconv5 = deconv_Q(C61_OUT, DC5_OUT, bitW=bitW, bitA=bitA) self.deconv4 = deconv_Q(C51_OUT + DC5_OUT + 2, DC4_OUT, bitW=bitW, bitA=bitA) self.deconv3 = deconv_Q(C41_OUT + DC4_OUT + 2, DC3_OUT, bitW=bitW, bitA=bitA) self.deconv2 = deconv_Q(C31_OUT + DC3_OUT + 2, DC2_OUT, bitW=bitW, bitA=bitA) self.predict_flow6 = predict_flow_Q(C61_OUT, bitW=bitW) self.predict_flow5 = predict_flow_Q(C51_OUT + DC5_OUT + 2, bitW=bitW) self.predict_flow4 = predict_flow_Q(C41_OUT + DC4_OUT + 2, bitW=bitW) self.predict_flow3 = predict_flow_Q(C31_OUT + DC3_OUT + 2, bitW=bitW) self.predict_flow2 = predict_flow_Q(C21_OUT + DC2_OUT + 2, bitW=bitW) k_up = 4 self.upsampled_flow6_to_5 = ConvTrans2d_Q(2, 2, k_up, 2, 1, bias=False, bit=bitW) self.upsampled_flow5_to_4 = ConvTrans2d_Q(2, 2, k_up, 2, 1, bias=False, bit=bitW) self.upsampled_flow4_to_3 = ConvTrans2d_Q(2, 2, k_up, 2, 1, bias=False, bit=bitW) self.upsampled_flow3_to_2 = ConvTrans2d_Q(2, 2, k_up, 2, 1, bias=False, bit=bitW) for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): kaiming_normal_(m.weight, 0.1) if m.bias is not None: constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): constant_(m.weight, 1) constant_(m.bias, 0)
def train(opt): """ dataset preparation """ if not opt.data_filtering_off: print('Filtering the images containing characters which are not in opt.character') print('Filtering the images whose label is longer than opt.batch_max_length') # see https://github.com/clovaai/deep-text-recognition-benchmark/blob/6593928855fb7abb999a99f428b3e4477d4ae356/dataset.py#L130 opt.select_data = opt.select_data.split('-') opt.batch_ratio = opt.batch_ratio.split('-') train_dataset = Batch_Balanced_Dataset(opt) log = open(f'./saved_models/{opt.exp_name}/log_dataset.txt', 'a') AlignCollate_valid = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) valid_dataset, valid_dataset_log = hierarchical_dataset(root=opt.valid_data, opt=opt) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=opt.batch_size, shuffle=True, # 'True' to check training progress with validation function. num_workers=int(opt.workers), collate_fn=AlignCollate_valid, pin_memory=True) log.write(valid_dataset_log) print('-' * 80) log.write('-' * 80 + '\n') log.close() """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) # weight initialization for name, param in model.named_parameters(): if 'localization_fc2' in name: print(f'Skip {name} as it is already initialized') continue try: if 'bias' in name: init.constant_(param, 0.0) elif 'weight' in name: init.kaiming_normal_(param) except Exception as e: # for batchnorm. if 'weight' in name: param.data.fill_(1) continue # data parallel for multi-GPU model = torch.nn.DataParallel(model).to(device) model.train() if opt.saved_model != '': print(f'loading pretrained model from {opt.saved_model}') if opt.FT: model.load_state_dict(torch.load(opt.saved_model), strict=False) else: model.load_state_dict(torch.load(opt.saved_model)) print("Model:") print(model) """ setup loss """ if 'CTC' in opt.Prediction: criterion = torch.nn.CTCLoss(zero_infinity=True).to(device) else: criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device) # ignore [GO] token = ignore index 0 # loss averager loss_avg = Averager() # filter that only require gradient decent filtered_parameters = [] params_num = [] for p in filter(lambda p: p.requires_grad, model.parameters()): filtered_parameters.append(p) params_num.append(np.prod(p.size())) print('Trainable params num : ', sum(params_num)) # [print(name, p.numel()) for name, p in filter(lambda p: p[1].requires_grad, model.named_parameters())] # setup optimizer if opt.adam: optimizer = optim.Adam(filtered_parameters, lr=opt.lr, betas=(opt.beta1, 0.999)) else: optimizer = optim.Adadelta(filtered_parameters, lr=opt.lr, rho=opt.rho, eps=opt.eps) print("Optimizer:") print(optimizer) """ final options """ # print(opt) with open(f'./saved_models/{opt.exp_name}/opt.txt', 'a') as opt_file: opt_log = '------------ Options -------------\n' args = vars(opt) for k, v in args.items(): opt_log += f'{str(k)}: {str(v)}\n' opt_log += '---------------------------------------\n' print(opt_log) opt_file.write(opt_log) """ start training """ start_iter = 0 if opt.saved_model != '': try: start_iter = int(opt.saved_model.split('_')[-1].split('.')[0]) print(f'continue to train, start_iter: {start_iter}') except: pass start_time = time.time() best_accuracy = -1 best_norm_ED = -1 iteration = start_iter while(True): # train part image_tensors, labels = train_dataset.get_batch() image = image_tensors.to(device) text, length = converter.encode(labels, batch_max_length=opt.batch_max_length) batch_size = image.size(0) if 'CTC' in opt.Prediction: preds = model(image, text) preds_size = torch.IntTensor([preds.size(1)] * batch_size) preds = preds.log_softmax(2).permute(1, 0, 2) cost = criterion(preds, text, preds_size, length) else: preds = model(image, text[:, :-1]) # align with Attention.forward target = text[:, 1:] # without [GO] Symbol cost = criterion(preds.view(-1, preds.shape[-1]), target.contiguous().view(-1)) model.zero_grad() cost.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) # gradient clipping with 5 (Default) optimizer.step() loss_avg.add(cost) # validation part if (iteration + 1) % opt.valInterval == 0 or iteration == 0: # To see training progress, we also conduct validation when 'iteration == 0' elapsed_time = time.time() - start_time # for log with open(f'./saved_models/{opt.exp_name}/log_train.txt', 'a') as log: model.eval() with torch.no_grad(): valid_loss, current_accuracy, current_norm_ED, preds, confidence_score, labels, infer_time, length_of_data = validation( model, criterion, valid_loader, converter, opt) model.train() # training loss and validation loss loss_log = f'[{iteration+1}/{opt.num_iter}] Train loss: {loss_avg.val():0.5f}, Valid loss: {valid_loss:0.5f}, Elapsed_time: {elapsed_time:0.5f}' loss_avg.reset() current_model_log = f'{"Current_accuracy":17s}: {current_accuracy:0.3f}, {"Current_norm_ED":17s}: {current_norm_ED:0.2f}' # keep best accuracy model (on valid dataset) if current_accuracy > best_accuracy: best_accuracy = current_accuracy torch.save(model.state_dict(), f'./saved_models/{opt.exp_name}/best_accuracy.pth') if current_norm_ED > best_norm_ED: best_norm_ED = current_norm_ED torch.save(model.state_dict(), f'./saved_models/{opt.exp_name}/best_norm_ED.pth') best_model_log = f'{"Best_accuracy":17s}: {best_accuracy:0.3f}, {"Best_norm_ED":17s}: {best_norm_ED:0.2f}' loss_model_log = f'{loss_log}\n{current_model_log}\n{best_model_log}' print(loss_model_log) log.write(loss_model_log + '\n') # show some predicted results dashed_line = '-' * 80 head = f'{"Ground Truth":25s} | {"Prediction":25s} | Confidence Score & T/F' predicted_result_log = f'{dashed_line}\n{head}\n{dashed_line}\n' for gt, pred, confidence in zip(labels[:5], preds[:5], confidence_score[:5]): if 'Attn' in opt.Prediction: gt = gt[:gt.find('[s]')] pred = pred[:pred.find('[s]')] predicted_result_log += f'{gt:25s} | {pred:25s} | {confidence:0.4f}\t{str(pred == gt)}\n' predicted_result_log += f'{dashed_line}' print(predicted_result_log) log.write(predicted_result_log + '\n') # save model per 1e+5 iter. if (iteration + 1) % 1e+5 == 0: torch.save( model.state_dict(), f'./saved_models/{opt.exp_name}/iter_{iteration+1}.pth') if (iteration + 1) == opt.num_iter: print('end the training') sys.exit() iteration += 1
def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 64, 3, 1) # 1表示输入通道,20表示输出通道,5表示conv核大小,1表示conv步长 init.kaiming_normal_(self.conv1.weight) self.conv2 = nn.Conv2d(64, 64, 3, 1) init.kaiming_normal_(self.conv2.weight) self.conv3 = nn.Conv2d(64, 128, 3, 1) init.kaiming_normal_(self.conv3.weight) self.conv4 = nn.Conv2d(128, 128, 3, 1) init.kaiming_normal_(self.conv4.weight) self.conv5 = nn.Conv2d(128, 256, 3, 1) init.kaiming_normal_(self.conv5.weight) self.conv6 = nn.Conv2d(256, 256, 3, 1) init.kaiming_normal_(self.conv6.weight) self.fc1 = nn.Linear(3 * 3 * 256, 2048) self.fc2 = nn.Linear(2048, 512) self.fc3 = nn.Linear(512, 10)
def __init__( self, n_state: int, n_obs: int, n_ctrl_state: Optional[int], n_ctrl_obs: Optional[int], n_switch: int, n_base_A: Optional[int], n_base_B: Optional[int], n_base_C: Optional[int], n_base_D: Optional[int], n_base_R: int, n_base_Q: int, switch_link_type: SwitchLinkType, switch_link_dims_hidden: tuple = tuple(), switch_link_activations: nn.Module = nn.LeakyReLU(0.1, inplace=True), make_cov_from_cholesky_avg=False, b_fn: Optional[nn.Module] = None, d_fn: Optional[nn.Module] = None, init_scale_A: (float, None) = None, init_scale_B: (float, None) = None, init_scale_C: Optional[float] = None, init_scale_D: Optional[float] = None, init_scale_Q_diag: Optional[Union[float, Sequence[float]]] = None, init_scale_R_diag: Optional[Union[float, Sequence[float]]] = None, requires_grad_A: bool = True, requires_grad_B: bool = True, requires_grad_C: bool = True, requires_grad_D: bool = True, requires_grad_R: bool = True, requires_grad_Q: bool = True, full_cov_R: bool = True, full_cov_Q: bool = True, LQinv_logdiag_limiter: Optional[nn.Module] = None, LRinv_logdiag_limiter: Optional[nn.Module] = None, LRinv_logdiag_scaling: Optional[float] = None, LQinv_logdiag_scaling: Optional[float] = None, A_scaling: Optional[float] = None, B_scaling: Optional[float] = None, C_scaling: Optional[float] = None, D_scaling: Optional[float] = None, eye_init_A: bool = True, # False -> orthogonal ): super().__init__() self.make_cov_from_cholesky_avg = make_cov_from_cholesky_avg self.LQinv_logdiag_limiter = (LQinv_logdiag_limiter if LQinv_logdiag_limiter is not None else torch.nn.Identity()) self.LRinv_logdiag_limiter = (LRinv_logdiag_limiter if LRinv_logdiag_limiter is not None else torch.nn.Identity()) # scaling factors: trick to roughly correct for wrong assumption in ADAM # that all params should receive similar total updates (gradient norm) # and thus have similar scale. --> Should fix ADAM though! self._LRinv_logdiag_scaling = LRinv_logdiag_scaling self._LQinv_logdiag_scaling = LQinv_logdiag_scaling self._A_scaling = A_scaling self._B_scaling = B_scaling self._C_scaling = C_scaling self._D_scaling = D_scaling self.b_fn = b_fn self.d_fn = d_fn # ***** Switch Link function ***** n_bases = [ n for n in [ n_base_A, n_base_B, n_base_C, n_base_D, n_base_R, n_base_Q, ] if n is not None ] if switch_link_type == SwitchLinkType.identity: assert len(set(n_bases)) == 1 and n_bases[0] == n_switch, ( f"n_base: {n_bases} should match switch dim {n_switch} " f"when using identity link.") elif switch_link_type == SwitchLinkType.shared: assert len(set(n_bases)) == 1 names_and_dims_out = filter_out_none({ "A": n_base_A, "B": n_base_B, "C": n_base_C, "D": n_base_D, "R": n_base_R, "Q": n_base_Q, }) if switch_link_type.value == SwitchLinkType.individual.value: self.link_transformers = IndividualLink( dim_in=n_switch, names_and_dims_out=names_and_dims_out, dims_hidden=switch_link_dims_hidden, activations_hidden=switch_link_activations, ) elif switch_link_type.value == SwitchLinkType.identity.value: self.link_transformers = IdentityLink( names=tuple(names_and_dims_out.keys())) elif switch_link_type.value == SwitchLinkType.shared.value: dims = [dim for dim in names_and_dims_out.values()] # strip None assert len(set(dims)) == 1 dim_out = dims[0] self.link_transformers = SharedLink( dim_in=n_switch, dim_out=dim_out, names=tuple(names_and_dims_out.keys()), ) else: raise Exception(f"unknown switch link type: {switch_link_type}") # ***** Initialise GLS Parameters ***** if n_base_Q is not None: if full_cov_Q: # tril part is always initialised zero self.LQinv_tril = nn.Parameter( torch.zeros((n_base_Q, n_obs, n_obs)), requires_grad=requires_grad_Q, ) else: self.register_parameter("LQinv_tril", None) init_scale_Q_diag = (init_scale_Q_diag if init_scale_Q_diag is not None else [1e-4, 1e0]) if isinstance(init_scale_Q_diag, (list, tuple)): self._LQinv_logdiag = nn.Parameter( self.make_cov_init( init_scale_cov_diag=init_scale_Q_diag, n_base=n_base_Q, dim_cov=n_obs, ), requires_grad=requires_grad_Q, ) else: self._LQinv_logdiag = nn.Parameter( torch.ones( (n_base_Q, n_obs)) * -math.log(init_scale_Q_diag), requires_grad=requires_grad_Q, ) # Cannot use setter with nn.Module and nn.Parameter self._LQinv_logdiag.data /= self._LQinv_logdiag_scaling if n_base_R is not None: if full_cov_R: # tril part is always initialised zero self.LRinv_tril = nn.Parameter( torch.zeros((n_base_R, n_state, n_state)), requires_grad=requires_grad_R, ) else: self.register_parameter("LRinv_tril", None) init_scale_R_diag = (init_scale_R_diag if init_scale_R_diag is not None else [1e-4, 1e0]) if isinstance(init_scale_R_diag, (list, tuple)): self._LRinv_logdiag = nn.Parameter( self.make_cov_init( init_scale_cov_diag=init_scale_R_diag, n_base=n_base_R, dim_cov=n_state, ), requires_grad=requires_grad_R, ) else: self._LRinv_logdiag = nn.Parameter( torch.ones( (n_base_R, n_state)) * -math.log(init_scale_R_diag), requires_grad=requires_grad_R, ) # Cannot use setter with nn.Module and nn.Parameter self._LRinv_logdiag.data /= self._LRinv_logdiag_scaling if n_base_A is not None: if init_scale_A is not None: init_scale_A = torch.tensor(init_scale_A) else: init_var_avg_R = torch.mean( torch.exp(-2 * self.LRinv_logdiag), dim=0, ) # Heuristic: transition var + innovation noise var = 1, where\ # transition and noise param are averaged from base mats. init_var_A = 1 - init_var_avg_R init_scale_A = (torch.sqrt(init_var_A))[None, :, None] if eye_init_A: A_diag = torch.eye(n_state).repeat(n_base_A, 1, 1) else: A_diag = torch.nn.init.orthogonal_( torch.empty(n_base_A, n_state, n_state), ) # broadcast basemat-dim and columns -> scale columns; or scalar self._A = nn.Parameter( init_scale_A * A_diag, requires_grad=requires_grad_A, ) # Cannot use setter with nn.Module and nn.Parameter self._A.data /= self._A_scaling else: self.register_parameter("_A", None) if n_base_B is not None: if init_scale_B is not None: self._B = nn.Parameter( init_scale_B * torch.randn(n_base_B, n_state, n_ctrl_state), requires_grad=requires_grad_B, ) else: self._B = nn.Parameter( torch.stack( [ kaiming_normal_( tensor=torch.empty(n_state, n_ctrl_state), nonlinearity="linear", ) for n in range(n_base_B) ], dim=0, ), requires_grad=requires_grad_B, ) self._B.data /= self._B_scaling else: self.register_parameter("_B", None) if n_base_C is not None: if init_scale_C is not None: self._C = nn.Parameter( init_scale_C * torch.randn(n_base_C, n_obs, n_state), requires_grad=requires_grad_C, ) else: self._C = nn.Parameter( torch.stack( [ kaiming_normal_( tensor=torch.empty(n_obs, n_state), nonlinearity="linear", ) for n in range(n_base_C) ], dim=0, ), requires_grad=requires_grad_C, ) self._C.data /= self._C_scaling else: self.register_parameter("_C", None) if n_base_D is not None: if init_scale_D is not None: self._D = nn.Parameter( init_scale_D * torch.randn(n_base_D, n_obs, n_ctrl_obs), requires_grad=requires_grad_D, ) else: self._D = nn.Parameter( torch.stack( [ kaiming_normal_( tensor=torch.empty(n_obs, n_ctrl_obs), nonlinearity="linear", ) for n in range(n_base_D) ], dim=0, ), requires_grad=requires_grad_D, ) self._D.data /= self._D_scaling else: self.register_parameter("_D", None)
def build_network(self): if self.nn.input_type == INPUT_TYPE_OBSERVATION_VECTOR: self.fc1 = torch.nn.Linear(*self.nn.input_dims, self.nn.fc_layers_dims[0]) torch_init.kaiming_normal_(self.fc1.weight.data) torch_init.zeros_(self.fc1.bias.data) self.fc2 = torch.nn.Linear(self.nn.fc_layers_dims[0], self.nn.fc_layers_dims[1]) torch_init.kaiming_normal_(self.fc2.weight.data) torch_init.zeros_(self.fc2.bias.data) else: # self.input_type == INPUT_TYPE_STACKED_FRAMES frames_stack_size = ATARI_FRAMES_STACK_SIZE self.in_channels = frames_stack_size * self.image_channels # filters, kernel_size, stride, padding conv1_fksp = (32, 8, 4, 1) conv2_fksp = (64, 4, 2, 0) conv3_fksp = (128, 3, 1, 0) i_H, i_W = self.nn.input_dims[0], self.nn.input_dims[1] conv1_o_H, conv1_o_W = calc_conv_layer_output_dims( i_H, i_W, *conv1_fksp[1:]) conv2_o_H, conv2_o_W = calc_conv_layer_output_dims( conv1_o_H, conv1_o_W, *conv2_fksp[1:]) conv3_o_H, conv3_o_W = calc_conv_layer_output_dims( conv2_o_H, conv2_o_W, *conv3_fksp[1:]) self.flat_dims = conv3_fksp[0] * conv3_o_H * conv3_o_W self.conv1 = torch.nn.Conv2d(self.in_channels, *conv1_fksp) torch_init.kaiming_normal_(self.conv1.weight.data) torch_init.zeros_(self.conv1.bias.data) self.conv1_bn = torch.nn.LayerNorm( [conv1_fksp[0], conv1_o_H, conv1_o_W]) self.conv2 = torch.nn.Conv2d(conv1_fksp[0], *conv2_fksp) torch_init.kaiming_normal_(self.conv2.weight.data) torch_init.zeros_(self.conv2.bias.data) self.conv2_bn = torch.nn.LayerNorm( [conv2_fksp[0], conv2_o_H, conv2_o_W]) self.conv3 = torch.nn.Conv2d(conv2_fksp[0], *conv3_fksp) torch_init.kaiming_normal_(self.conv3.weight.data) torch_init.zeros_(self.conv3.bias.data) self.conv3_bn = torch.nn.LayerNorm( [conv3_fksp[0], conv3_o_H, conv3_o_W]) self.fc1 = torch.nn.Linear(self.flat_dims, self.nn.fc_layers_dims[0]) torch_init.kaiming_normal_(self.fc1.weight.data) torch_init.zeros_(self.fc1.bias.data) self.fc_last = torch.nn.Linear(self.nn.fc_layers_dims[-1], self.nn.n_actions) torch_init.xavier_normal_(self.fc_last.weight.data) torch_init.zeros_(self.fc_last.bias.data)
def _init_weight_parameters(self): for name, module in self.named_modules(): if isinstance(module, torch.nn.Conv2d): init.kaiming_normal_(module.weight.data, a=0, mode='fan_in')
def __init__(self, cardinality, depth, nlabels, base_width, widen_factor=4): """ Constructor Args: cardinality: number of convolution groups. depth: number of layers. nlabels: number of classes base_width: base number of channels in each group. widen_factor: factor to adjust the channel dimensionality """ super(CifarResNeXt_MCN, self).__init__() self.cardinality = cardinality self.depth = depth self.block_depth = (self.depth - 2) // 9 self.base_width = base_width self.widen_factor = widen_factor self.nlabels = nlabels self.output_size = 64 self.stages = [ 64, 64 * self.widen_factor, 128 * self.widen_factor, 256 * self.widen_factor ] self.conv_1_3x3 = nn.Conv2d(3, 64, 3, 1, 1, bias=False) self.bn_1 = nn.BatchNorm2d(64) self.stage_1 = self.block('stage_1', self.stages[0], self.stages[1], 1) self.stage_2 = self.block('stage_2', self.stages[1], self.stages[2], 2) self.stage_3 = self.block('stage_3', self.stages[2], self.stages[3], 2) self.classifier = nn.Linear(2048, nlabels) init.kaiming_normal_(self.classifier.weight) self.MCN_block1 = models.MCN_block(xs_dim=[256, 512, 1024], out_dim=[256, 2048 - 256], x_strides=[4, 2, 1], bn_pool_flag=True) self.MCN_block2 = models.MCN_block(xs_dim=[256, 1024, 2048], out_dim=[256, 2048 - 256], x_strides=[8, 2, 2], bn_pool_flag=True) #self.conv_final = nn.Conv2d(2048, 1024, kernel_size=3, stride = 2, padding=0) # self.reduce_MCN = nn.Conv2d() #self.pool = nn.MaxPool2d(kernel_size=2, stride=2) self.relu = nn.LeakyReLU(negative_slope=0.15, inplace=True) #self.fc = nn.Linear(2048, nlabels) self.avg = nn.AvgPool2d(kernel_size=4, stride=1) #self.conv_x0_f1 = nn.Conv2d(1024, 512, kernel_size=3, padding=1, stride = 2) #self.bn_2 = nn.BatchNorm2d(512) #self.conv_x0_f2 = nn.Conv2d(512, 512, kernel_size=3, padding=0, stride = 1) for key in self.state_dict(): if key.split('.')[-1] == 'weight': if 'conv' in key: init.kaiming_normal_(self.state_dict()[key], mode='fan_out') if 'bn' in key: self.state_dict()[key][...] = 1 elif key.split('.')[-1] == 'bias': self.state_dict()[key][...] = 0
def build(self, input_shape, dtype=torch.float64): print("building") self.input_shapes = input_shape self.len_input = len(self.input_shapes) self.connections = self.input_shapes[-1] if self.dendrite_mode == self.modes[1]: # sparse self.connections -= self.dendrite_shift elif self.dendrite_mode == self.modes[2]: # overlap: self.connections += self.dendrite_shift if self.dendrites is None: self.segmenter() # list of dendrites per neuron if self.version == 4: self.dendrites = torch.constant(self.dendrites) self.pre_dendrites = self.connections * self.units # neurons*previous_layer_neurons if self.version != 1: dwshape = [self.units, self.seql] else: dwshape = [self.seql, self.units] # dwshape=[self.units,self.seql,*[1 for _ in range(self.len_input-1)]] # self.num_dendrites=self.pre_dendrites/self.dendrite_size # if self.bigger_dendrite: # self.num_dendrites=math.floor(self.num_dendrites) # else: # self.num_dendrites=math.ceil(self.num_dendrites) # input_shape = tensor_shape.TensorShape(input_shape) if self.version == 2: if len(self.input_shapes) > 2: part_inshape = (*self.input_shapes[1:-1], -1) else: part_inshape = (-1, ) self.debuildshape = (self.units * self.connections, *part_inshape) self.deseqshape = (self.units * self.connections, ) self.rebuildshape = (self.units, self.seql, *part_inshape) print('line228') if self.weight_twice: """if self.uniqueW==2:#useless since all input are there once, could also work with sparse print([self.dendrite_size,self.seql, self.units]) self.kernel=self.add_variable('Weight',shape=[*[1 for _ in range(self.len_input-1)],self.dendrite_size,self.seql, self.units], initializer=self.Weight_initializer,regularizer=self.Weight_regularizer, constraint=self.Weight_constraint,dtype=self.dtype, trainable=True)""" if self.uniqueW: kernel = torch.empty(*[1 for _ in range(self.len_input - 1)], self.input_shapes[-1], self.units, dtype=dtype) else: kernel = torch.empty(1, self.units, dtype=dtype) finit.kaiming_normal(kernel) self.kernel = nn.Parameter(kernel) self.register_parameter('kernel', self.kernel) self.params.append(self.kernel) print('line246') dw = torch.empty(*dwshape, dtype=dtype) finit.kaiming_normal(dw) self.dendriticW = nn.Parameter(dw) self.params.append(self.dendriticW) print(self.dendriticW) print("added dendw") if self.use_bias: if self.weight_twice: if self.uniqueW: b = torch.empty(self.input_shapes[-1], self.units, dtype=dtype) else: b = torch.empty(1, self.units, dtype=dtype) try: finit.kaiming_normal_(b) except: finit.xavier_normal_(b) self.bias = nn.Parameter(b) self.register_parameter('Bias', self.bias) self.params.append(self.bias) if self.uniqueW: db = torch.empty(self.seql, self.units, dtype=dtype) else: db = torch.empty(1, self.units, dtype=dtype) finit.kaiming_normal_(db) self.dendriticB = nn.Parameter(db) self.params.append(self.dendriticB) self.register_parameter('dendritic_B', self.dendriticB) print("supered") #self.register_parameter('dentritic_W', self.dendriticW) self.built = True print('builded')
def __init__(self, input_channels, base_num_channels=30, num_pool=3, num_classes=152): super().__init__() self.upsample_mode = 'trilinear' self.pool = nn.MaxPool3d #self.transposed_conv = nn.ConvTranspose3d self.downsample_path_convs = [] self.downsample_path_pooling = [] self.upsample_path_convs = [] self.upsample_path_upsampling = [] # build the downsampling pathway # initialise channel numbers for first level #input_channels = input_channels # specified as argument output_channels = base_num_channels for level in range(num_pool): # Add two convolution blocks self.downsample_path_convs.append( nnUnetConvBlockStack(input_channels, output_channels, 2)) # Add pooling self.downsample_path_pooling.append(self.pool([2, 2, 2])) # Calculate input/output channels for next level input_channels = output_channels output_channels *= 2 # now the 'bottleneck' final_num_channels = self.downsample_path_convs[-1].output_channels self.downsample_path_convs.append( nn.Sequential( nnUnetConvBlockStack(input_channels, output_channels, 1), nnUnetConvBlockStack(output_channels, final_num_channels, 1))) # now build the upsampling pathway for level in range(num_pool): channels_from_down = final_num_channels channels_from_skip = self.downsample_path_convs[-( 2 + level)].output_channels channels_after_upsampling_and_concat = channels_from_skip * 2 if level != num_pool - 1: final_num_channels = self.downsample_path_convs[-( 3 + level)].output_channels else: final_num_channels = channels_from_skip self.upsample_path_upsampling.append( Upsample(scale_factor=[2, 2, 2], mode=self.upsample_mode)) #self.upsample_path_upsampling.append(nn.ConvTranspose3d(channels_from_skip, channels_from_skip, 3, stride=2, output_padding=1)) # Add two convs self.upsample_path_convs.append( nn.Sequential( nnUnetConvBlockStack(channels_after_upsampling_and_concat, channels_from_skip, 1), nnUnetConvBlockStack(channels_from_skip, final_num_channels, 1))) # convert to segmentation output self.segmentation_output = nn.Conv3d( self.upsample_path_convs[-1][-1].output_channels, num_classes, 1, 1, 0, 1, 1, False) # register modules self.downsample_path_convs = nn.ModuleList(self.downsample_path_convs) self.downsample_path_pooling = nn.ModuleList( self.downsample_path_pooling) self.upsample_path_convs = nn.ModuleList(self.upsample_path_convs) self.upsample_path_upsampling = nn.ModuleList( self.upsample_path_upsampling) self.segmentation_output = nn.ModuleList([self.segmentation_output]) # run weight initialisation from torch.nn.init import kaiming_normal_ for module in self.modules(): if isinstance(module, nn.Conv3d) or isinstance( module, nn.Conv2d) or isinstance( module, nn.ConvTranspose2d) or isinstance( module, nn.ConvTranspose3d): kaiming_normal_(module.weight, a=1e-2, nonlinearity='leaky_relu') if module.bias is not None: nn.init.constant_(module.bias, 0)
def weights_init(m): if isinstance(m, torch.nn.Conv2d): init.kaiming_normal_(m.weight, mode='fan_out') init.constant_(m.bias, 0)
print(new_model) #%% # 提取所有的卷积层 conv_model = nn.Sequential() for name, module in model.named_modules(): if isinstance(module, nn.Conv2d): conv_model.add_module(name, module) print(conv_model) #%% # 提取模型中的参数 for name, param in model.named_parameters(): print('{} : {}'.format(name, param.shape)) #%% # 权重初始化 from torch.nn import init for m in model.modules(): if isinstance(m, nn.Conv2d): init.normal_(m.weight.data) init.xavier_normal_(m.weight.data) init.kaiming_normal_(m.weight.data) m.bias.data.fill_(0) elif isinstance(m, nn.Linear): m.weight.data.normal_()
def _weights_init(m): if isinstance(m, nn.Conv2d or nn.Linear): kaiming_normal_(m.weight) elif isinstance(m, nn.BatchNorm2d or nn.BatchNorm1d): m.weight.data.fill_(1) m.bias.data.zero_()
def conv_params(ni, no, k=1): return kaiming_normal_(torch.Tensor(no, ni, k, k))
def reset_parameters(self): init.kaiming_normal_(self.word_linear.weight.data) init.constant_(self.word_linear.bias.data, val=0) self.treelstm_layer.reset_parameters() init.normal_(self.comp_query.data, mean=0, std=0.01)
def weights_init(m): if isinstance(m, nn.Conv2d): init.kaiming_normal_(m.weight.data) if m.bias is not None: # バイアス項がある場合 nn.init.constant_(m.bias, 0.0)
def __init__( self, in_channels: int = 3, out_channels: int = 1000, initial_num_features: int = 48, dropout: float = 0.2, down_dense_growth_rates: Union[int, Sequence[int]] = 16, down_dense_bottleneck_ratios: Union[Optional[int], Sequence[Optional[int]]] = None, down_dense_num_layers: Union[int, Sequence[int]] = (4, 5, 7, 10, 12), down_transition_compression_factors: Union[float, Sequence[float]] = 1.0, middle_dense_growth_rate: int = 16, middle_dense_bottleneck: Optional[int] = None, middle_dense_num_layers: int = 15, up_dense_growth_rates: Union[int, Sequence[int]] = 16, up_dense_bottleneck_ratios: Union[Optional[int], Sequence[Optional[int]]] = None, up_dense_num_layers: Union[int, Sequence[int]] = (12, 10, 7, 5, 4)): super(FCDenseNet, self).__init__() # region Parameters handling self.in_channels = in_channels self.out_channels = out_channels if type(down_dense_growth_rates) == int: down_dense_growth_rates = (down_dense_growth_rates, ) * 5 if down_dense_bottleneck_ratios is None or type( down_dense_bottleneck_ratios) == int: down_dense_bottleneck_ratios = (down_dense_bottleneck_ratios, ) * 5 if type(down_dense_num_layers) == int: down_dense_num_layers = (down_dense_num_layers, ) * 5 if type(down_transition_compression_factors) == float: down_transition_compression_factors = ( down_transition_compression_factors, ) * 5 if type(up_dense_growth_rates) == int: up_dense_growth_rates = (up_dense_growth_rates, ) * 5 if up_dense_bottleneck_ratios is None or type( up_dense_bottleneck_ratios) == int: up_dense_bottleneck_ratios = (up_dense_bottleneck_ratios, ) * 5 if type(up_dense_num_layers) == int: up_dense_num_layers = (up_dense_num_layers, ) * 5 # endregion # region First convolution # The Lasagne implementation uses convolution with 'same' padding, the PyTorch equivalent is padding=1 self.features = Conv2d(in_channels, initial_num_features, kernel_size=3, padding=1, bias=False) current_channels = self.features.out_channels # endregion # region Downward path # Pairs of Dense Blocks with input concatenation and TransitionDown layers down_dense_params = [{ 'concat_input': True, 'growth_rate': gr, 'num_layers': nl, 'dense_layer_params': { 'dropout': dropout, 'bottleneck_ratio': br } } for gr, nl, br in zip(down_dense_growth_rates, down_dense_num_layers, down_dense_bottleneck_ratios)] down_transition_params = [{ 'dropout': dropout, 'compression': c } for c in down_transition_compression_factors] skip_connections_channels = [] self.down_dense = Module() self.down_trans = Module() down_pairs_params = zip(down_dense_params, down_transition_params) for i, (dense_params, transition_params) in enumerate(down_pairs_params): block = DenseBlock(current_channels, **dense_params) current_channels = block.out_channels self.down_dense.add_module(f'block_{i}', block) skip_connections_channels.append(block.out_channels) transition = TransitionDown(current_channels, **transition_params) current_channels = transition.out_channels self.down_trans.add_module(f'trans_{i}', transition) # endregion # region Middle block # Renamed from "bottleneck" in the paper, to avoid confusion with the Bottleneck of DenseLayers self.middle = DenseBlock(current_channels, middle_dense_growth_rate, middle_dense_num_layers, concat_input=True, dense_layer_params={ 'dropout': dropout, 'bottleneck_ratio': middle_dense_bottleneck }) current_channels = self.middle.out_channels # endregion # region Upward path # Pairs of TransitionUp layers and Dense Blocks without input concatenation up_transition_params = [{ 'skip_channels': sc, } for sc in reversed(skip_connections_channels)] up_dense_params = [{ 'concat_input': False, 'growth_rate': gr, 'num_layers': nl, 'dense_layer_params': { 'dropout': dropout, 'bottleneck_ratio': br } } for gr, nl, br in zip(up_dense_growth_rates, up_dense_num_layers, up_dense_bottleneck_ratios)] self.up_dense = Module() self.up_trans = Module() up_pairs_params = zip(up_transition_params, up_dense_params) for i, (transition_params_up, dense_params_up) in enumerate(up_pairs_params): transition = TransitionUp(current_channels, **transition_params_up) current_channels = transition.out_channels self.up_trans.add_module(f'trans_{i}', transition) block = DenseBlock(current_channels, **dense_params_up) current_channels = block.out_channels self.up_dense.add_module(f'block_{i}', block) # endregion # region Final convolution self.final = Conv2d(current_channels, out_channels, kernel_size=1, bias=False) # endregion # region Weight initialization for module in self.modules(): if isinstance(module, Conv2d): init.kaiming_normal_(module.weight) elif isinstance(module, BatchNorm2d): module.reset_parameters() elif isinstance(module, Linear): init.xavier_uniform_(module.weight) init.constant_(module.bias, 0)
def __init__(self, batchNorm=True): super(_ShiftingNet_my, self).__init__() self.batchNorm = batchNorm #LGQ the input is change into gray scale self.conv1 = conv(self.batchNorm, 4, 64, kernel_size=7, stride=2) self.conv2 = conv(self.batchNorm, 64, 128, kernel_size=5, stride=2) self.conv3 = conv(self.batchNorm, 128, 256, kernel_size=5, stride=2) self.conv3_1 = conv(self.batchNorm, 256, 256) self.conv4 = conv(self.batchNorm, 256, 512, stride=2) self.conv4_1 = conv(self.batchNorm, 512, 512) self.conv5 = conv(self.batchNorm, 512, 512, stride=2) self.conv5_1 = conv(self.batchNorm, 512, 512) self.conv6 = conv(self.batchNorm, 512, 1024, stride=2) self.conv6_1 = conv(self.batchNorm, 1024, 1024) self.deconv5 = deconv(1024, 512) self.deconv4 = deconv(1026 - 1, 256) self.deconv3 = deconv(770 - 1, 128) self.deconv2 = deconv(386 - 1, 64) #LGQ cancat size is modified size = size -1 # due to our is a single v self.predict_shift6 = predict_flow(1024) self.predict_shift5 = predict_flow(1026 - 1) self.predict_shift4 = predict_flow(770 - 1) self.predict_shift3 = predict_flow(386 - 1) self.predict_shift2 = predict_flow(194 - 1) #self.predict_flow6 = predict_flow(1024) #self.predict_flow5 = predict_flow(1026) #self.predict_flow4 = predict_flow(770) #self.predict_flow3 = predict_flow(386) #self.predict_flow2 = predict_flow(194) # LGQ change the array into 1 D self.upsampled_shift6_to_5 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) self.upsampled_shift5_to_4 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) self.upsampled_shift4_to_3 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) self.upsampled_shift3_to_2 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) #self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) #self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) #self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) #self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False) for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): kaiming_normal_(m.weight, 0.1) if m.bias is not None: constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): constant_(m.weight, 1) constant_(m.bias, 0)
def initialiser(module): init.kaiming_normal_(module.weight.data, a=a, mode=mode, nonlinearity=nonlinearity)
def _weights_init(m): # classname = m.__class__.__name__ # # print(classname) if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d): init.kaiming_normal_(m.weight)
def weights_init_kaiming(m): # https://github.com/lizhengwei1992/ResidualDenseNetwork-Pytorch/blob/master/main.py classname = m.__class__.__name__ if classname.find('Conv2d') != -1: init.kaiming_normal_(m.weight.data)
def __init__(self): super(MotionCompensateSubnet, self).__init__() self.downsample_4x = nn.Sequential( nn.Conv2d(2, 24, kernel_size=5, stride=2, padding=2, bias=False), nn.BatchNorm2d(24), nn.ReLU(inplace=True), nn.Conv2d(24, 24, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(24), nn.ReLU(inplace=True), nn.Conv2d(24, 24, kernel_size=5, stride=2, padding=2, bias=False), nn.BatchNorm2d(24), nn.ReLU(inplace=True), nn.Conv2d(24, 24, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(24), nn.ReLU(inplace=True), nn.Conv2d(24, 32, kernel_size=3, stride=1, padding=1, bias=False), ) self.ps_4x = nn.PixelShuffle(4) self.downsample_2x = nn.Sequential( nn.Conv2d(5, 24, kernel_size=5, stride=2, padding=2, bias=False), nn.BatchNorm2d(24), nn.ReLU(inplace=True), nn.Conv2d(24, 24, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(24), nn.ReLU(inplace=True), nn.Conv2d(24, 24, kernel_size=5, stride=1, padding=2, bias=False), nn.BatchNorm2d(24), nn.ReLU(inplace=True), nn.Conv2d(24, 24, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(24), nn.BatchNorm2d(24), nn.ReLU(inplace=True), nn.Conv2d(24, 8, kernel_size=3, stride=1, padding=1, bias=False), ) self.ps_2x = nn.PixelShuffle(2) self.pixelwise_mc = nn.Sequential( nn.Conv2d(5, 24, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(24), nn.ReLU(inplace=True), nn.Conv2d(24, 24, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(24), nn.ReLU(inplace=True), nn.Conv2d(24, 24, kernel_size=5, stride=1, padding=2, bias=False), nn.BatchNorm2d(24), nn.ReLU(inplace=True), nn.Conv2d(24, 24, kernel_size=3, stride=1, padding=1, bias=False), nn.BatchNorm2d(24), nn.ReLU(inplace=True), nn.Conv2d(24, 2, kernel_size=3, stride=1, padding=1, bias=False), ) self.ps_1x = nn.PixelShuffle(1) for m in self.modules(): if isinstance(m, nn.Conv2d): init.kaiming_normal_(m.weight.data, a=0.1, mode='fan_in', nonlinearity='relu') if m.bias is not None: init.constant_(m.bias.data, 0.0) elif isinstance(m, nn.Linear): init.kaiming_normal_(m.weight.data, a=0.1, mode='fan_in', nonlinearity='relu') init.constant_(m.bias.data, 0.0) elif isinstance(m, nn.BatchNorm2d): init.normal_(m.weight.data, 1.0) init.constant_(m.bias.data, 0.0)
def fc_init_weights(m): # https://stackoverflow.com/questions/49433936/how-to-initialize-weights-in-pytorch/49433937#49433937 if type(m) == nn.Linear: init.kaiming_normal_(m.weight.data)
def _initialize_weights(self): if self.initWay=='kaiming': init.kaiming_normal_(self.conv1.weight, mode='fan_out', nonlinearity='relu') init.kaiming_normal_(self.conv2.weight, mode='fan_out', nonlinearity='relu') init.kaiming_normal_(self.conv3.weight, mode='fan_out', nonlinearity='relu') init.kaiming_normal_(self.conv4.weight) init.kaiming_normal_(self.conv11.weight, mode='fan_out', nonlinearity='relu') init.kaiming_normal_(self.conv22.weight, mode='fan_out', nonlinearity='relu') init.kaiming_normal_(self.conv33.weight, mode='fan_out', nonlinearity='relu') init.kaiming_normal_(self.conv44.weight) elif self.initWay=='ortho': init.orthogonal_(self.conv1.weight, init.calculate_gain('relu')) init.orthogonal_(self.conv2.weight, init.calculate_gain('relu')) init.orthogonal_(self.conv3.weight, init.calculate_gain('relu')) init.orthogonal_(self.conv4.weight) init.orthogonal_(self.conv11.weight, init.calculate_gain('relu')) init.orthogonal_(self.conv22.weight, init.calculate_gain('relu')) init.orthogonal_(self.conv33.weight, init.calculate_gain('relu')) init.orthogonal_(self.conv44.weight) else: print('Only Kaiming or Orthogonal initializer can be used!') exit()
def reset_parameters(self): init.kaiming_normal_(self.weight, mode='fan_out', nonlinearity='relu')
def linear_params(ni, no): return { 'weight': kaiming_normal_(torch.Tensor(no, ni)), 'bias': torch.zeros(no) }
def __init__(self, depth=50, pretrained=True, cut_at_pooling=False, num_features=1024, dropout=0.5, num_classes=0): super(PGSNet, self).__init__() self.depth = depth self.pretrained = pretrained self.cut_at_pooling = cut_at_pooling # Construct base (pretrained) resnet base = resnet50_ibn_b(pretrained=pretrained) base_stn = resnet50_ibn_b(pretrained=pretrained) self.stn = STN() self.conv1 = base.conv1 self.bn1 = base.bn1 self.relu = base.relu self.maxpool = base.maxpool self.layer1 = base.layer1 self.layer2 = base.layer2 self.layer3 = base.layer3 self.layer4 = base.layer4 self.layer4_stn = base_stn.layer4 for mo in self.layer4[0].modules(): if isinstance(mo, nn.Conv2d): mo.stride = (1, 1) for mo in self.layer4_stn[0].modules(): if isinstance(mo, nn.Conv2d): mo.stride = (1, 1) self.mmaxpool = nn.AdaptiveMaxPool2d((1, 1)) if not self.cut_at_pooling: self.num_features = num_features self.dropout = dropout self.has_embedding = num_features > 0 self.num_classes = num_classes out_planes = base.fc.in_features # Append new layers if self.has_embedding: feat = nn.Linear(out_planes, self.num_features) feat_bn = nn.BatchNorm1d(self.num_features) init.kaiming_normal_(feat.weight, mode='fan_out') init.constant_(feat.bias, 0) init.normal_(feat_bn.weight, 1, 0.02) init.constant_(feat_bn.bias, 0.0) embed_layer = [feat, feat_bn] self.embed_layer = nn.Sequential(*embed_layer) feat = nn.Linear(out_planes, self.num_features) feat_bn = nn.BatchNorm1d(self.num_features) init.kaiming_normal_(feat.weight, mode='fan_out') init.constant_(feat.bias, 0) init.normal_(feat_bn.weight, 1, 0.02) init.constant_(feat_bn.bias, 0.0) embed_layer = [feat, feat_bn] self.embed_layer_stn = nn.Sequential(*embed_layer) else: # Change the num_features to CNN output channels self.num_features = out_planes if self.dropout > 0: self.drop = nn.Dropout(self.dropout) if self.num_classes > 0: self.last_fc = nn.Linear(self.num_features, self.num_classes) init.normal_(self.last_fc.weight, std=0.001) init.constant_(self.last_fc.bias, 0.0) self.last_fc_stn = nn.Linear(self.num_features, self.num_classes) init.normal_(self.last_fc_stn.weight, std=0.001) init.constant_(self.last_fc_stn.bias, 0.0) if not self.pretrained: self.reset_params()
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, bias=True, kernel_initializer='normal', batch_norm=False, activation=None): super(SeparableConv2D, self).__init__() conv_depthwise = nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=in_channels, bias=bias) conv_pointwise = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0, dilation=1, groups=1, bias=bias) # init.xavier_normal_(conv_depthwise.weight) # if hasattr(self, 'gain'): # self.kernel_initializer(conv_pointwise.weight, gain=self.gain) # else: # self.kernel_initializer(conv_pointwise.weight) if batch_norm: if activation: self.conv = nn.Sequential( conv_depthwise, conv_pointwise, nn.BatchNorm2d(num_features=out_channels), nn.LeakyReLU(0.1, inplace=True)) else: self.conv = nn.Sequential( conv_depthwise, conv_pointwise, nn.BatchNorm2d(num_features=out_channels)) else: if activation: self.conv = nn.Sequential(conv_depthwise, conv_pointwise, nn.LeakyReLU(0.1, inplace=True)) else: self.conv = nn.Sequential(conv_depthwise, conv_pointwise) for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): kaiming_normal_(m.weight, 0.1) if m.bias is not None: constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): constant_(m.weight, 1) constant_(m.bias, 0)
def weight_init(m): # https://gist.github.com/jeasinema/ed9236ce743c8efaf30fa2ff732749f5 if isinstance(m, nn.Conv1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.Conv2d): init.kaiming_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.Conv3d): init.kaiming_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose2d): init.kaiming_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose3d): init.kaiming_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.BatchNorm1d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.BatchNorm2d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.BatchNorm3d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.Linear): init.kaiming_normal_(m.weight.data) init.normal_(m.bias.data) elif isinstance(m, nn.LSTM): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.LSTMCell): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.GRU): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.GRUCell): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data)
def _weights_init(m): if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d): init.kaiming_normal_(m.weight)
def linear_params(ni, no): return {'weight': kaiming_normal_(torch.Tensor(no, ni)), 'bias': torch.zeros(no)}
def __init__(self, depth=256, in_f=2048, cardinality=1, exist_decoder=True, tasks=None, squeeze=False, se_after_relu=True, norm_per_task=False): super(ConvClassifier, self).__init__() self.bnorm = nn.BatchNorm2d self.squeeze = squeeze kwargs = {"num_features": depth, "affine": affine_par} self.conv2d = ConvCoupledSE(tasks=tasks, process_layers=nn.Conv2d( in_f, depth, kernel_size=3, stride=1, padding=1, dilation=1, bias=False, groups=cardinality), norm=self.bnom, norm_kwargs=kwargs, norm_per_task=norm_per_task, squeeze=self.squeeze, se_after_relu=se_after_relu) if exist_decoder: self.conv2d_final = ConvCoupledSE(tasks=tasks, process_layers=nn.Conv2d( depth, depth, kernel_size=3, stride=1, padding=1, dilation=1, bias=False), norm=self.bnorm, norm_kwargs=kwargs, norm_per_task=norm_per_task, squeeze=self.squeeze, se_after_relu=se_after_relu) else: self.conv2d_final = nn.Sequential( nn.Conv2d(depth * 5, depth, kernel_size=1, stride=1, bias=True)) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, 0.01) elif isinstance(m, nn.BatchNorm2d) or isinstance(m, self.bnorm): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): init.kaiming_normal_(m.weight) m.bias.data.zero_()