def _init_weights(self, m): if isinstance(m, nn.Conv2d): if cfg.KRCNN.CONV_INIT == 'GaussianFill': init.normal_(m.weight, std=0.01) elif cfg.KRCNN.CONV_INIT == 'MSRAFill': mynn.init.MSRAFill(m.weight) else: ValueError('Unexpected cfg.KRCNN.CONV_INIT: {}'.format(cfg.KRCNN.CONV_INIT)) init.constant_(m.bias, 0)
def _init_weights(self, m): if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)): if cfg.MRCNN.CONV_INIT == 'GaussianFill': init.normal_(m.weight, std=0.001) elif cfg.MRCNN.CONV_INIT == 'MSRAFill': mynn.init.MSRAFill(m.weight) else: raise ValueError init.constant_(m.bias, 0)
def __init__(self, version=1.0, num_classes=1000): super(SqueezeNet, self).__init__() if version not in [1.0, 1.1]: raise ValueError("Unsupported SqueezeNet version {version}:" "1.0 or 1.1 expected".format(version=version)) self.num_classes = num_classes if version == 1.0: self.features = nn.Sequential( nn.Conv2d(3, 96, kernel_size=7, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(96, 16, 64, 64), Fire(128, 16, 64, 64), Fire(128, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 32, 128, 128), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(512, 64, 256, 256), ) else: self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(64, 16, 64, 64), Fire(128, 16, 64, 64), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(128, 32, 128, 128), Fire(256, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), Fire(512, 64, 256, 256), ) # Final convolution is initialized differently form the rest final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1) self.classifier = nn.Sequential( nn.Dropout(p=0.5), final_conv, nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d((1, 1)) ) for m in self.modules(): if isinstance(m, nn.Conv2d): if m is final_conv: init.normal_(m.weight, mean=0.0, std=0.01) else: init.kaiming_uniform_(m.weight) if m.bias is not None: init.constant_(m.bias, 0)
def _init_weights(self): if cfg.KRCNN.USE_DECONV: init.normal_(self.deconv.weight, std=0.01) init.constant_(self.deconv.bias, 0) if cfg.KRCNN.CONV_INIT == 'GaussianFill': init.normal_(self.classify.weight, std=0.001) elif cfg.KRCNN.CONV_INIT == 'MSRAFill': mynn.init.MSRAFill(self.classify.weight) else: raise ValueError(cfg.KRCNN.CONV_INIT) init.constant_(self.classify.bias, 0)
def init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): init.kaiming_normal_(m.weight, mode='fan_out') if m.bias is not None: init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): init.constant_(m.weight, 1) init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): init.normal_(m.weight, std=0.001) if m.bias is not None: init.constant_(m.bias, 0)
def init_func(m): classname = m.__class__.__name__ if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): if init_type == 'normal': init.normal_(m.weight.data, 0.0, gain) elif init_type == 'xavier': init.xavier_normal_(m.weight.data, gain=gain) elif init_type == 'kaiming': init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') elif init_type == 'orthogonal': init.orthogonal_(m.weight.data, gain=gain) else: raise NotImplementedError('initialization method [%s] is not implemented' % init_type) if hasattr(m, 'bias') and m.bias is not None: init.constant_(m.bias.data, 0.0) elif classname.find('BatchNorm2d') != -1: init.normal_(m.weight.data, 1.0, gain) init.constant_(m.bias.data, 0.0)
def __init__(self, feat_num, class_num, drop=0): super(Classifier, self).__init__() self.feat_num = feat_num self.class_num = class_num self.drop = drop # BN layer self.classifierBN = nn.BatchNorm1d(self.feat_num) # feat classifeir self.classifierlinear = nn.Linear(self.feat_num, self.class_num) # dropout_layer self.drop = drop if self.drop > 0: self.droplayer = nn.Dropout(drop) init.constant_(self.classifierBN.weight, 1) init.constant_(self.classifierBN.bias, 0) init.normal_(self.classifierlinear.weight, std=0.001) init.constant_(self.classifierlinear.bias, 0)
def _init_weights(self): init.normal_(self.FPN_RPN_conv.weight, std=0.01) init.constant_(self.FPN_RPN_conv.bias, 0) init.normal_(self.FPN_RPN_cls_score.weight, std=0.01) init.constant_(self.FPN_RPN_cls_score.bias, 0) init.normal_(self.FPN_RPN_bbox_pred.weight, std=0.01) init.constant_(self.FPN_RPN_bbox_pred.bias, 0)
def make_a_linear(input_dim, output_dim): linear_model = nn.Linear(input_dim, output_dim) normal_(linear_model.weight, 0, 0.001) constant_(linear_model.bias, 0) return linear_model
def _init_layer(layer): init.normal_(layer.weight) # 使用这种初始化方式能降低过拟合 init.normal_(layer.bias)
def _init_params(self, module, mean=0.1, std=0.1): init.normal_(module.weight, std=0.1) if hasattr(module, 'bias'): init.constant_(module.bias, mean)
class EasyMLP: num_inputs, num_outputs, num_hiddens = 784, 10, 256 net = nn.Sequential( d2l.FlattenLayer(), nn.Linear(num_inputs, num_hiddens), nn.ReLU(), nn.Linear(num_hiddens, num_outputs), ) for params in net.parameters(): init.normal_(params, mean=0, std=0.01) batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) loss = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=0.5) num_epochs = 5 @staticmethod def sgd(params, lr, batch_size): # 为了和原书保持一致,这里除以了batch_size,但是应该是不用除的,因为一般用PyTorch计算loss时就默认已经 # 沿batch维求了平均了。 for param in params: param.data -= lr * param.grad / batch_size # 注意这里更改param时用的param.data @staticmethod def evaluate_accuracy(data_iter, net, device=None): if device is None and isinstance(net, torch.nn.Module): # 如果没指定device就使用net的device device = list(net.parameters())[0].device acc_sum, n = 0.0, 0 with torch.no_grad(): for X, y in data_iter: if isinstance(net, torch.nn.Module): net.eval() # 评估模式, 这会关闭dropout acc_sum += (net(X.to(device)).argmax( dim=1) == y.to(device)).float().sum().cpu().item() net.train() # 改回训练模式 else: # 自定义的模型, 3.13节之后不会用到, 不考虑GPU if ('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数 # 将is_training设置成False acc_sum += (net(X, is_training=False).argmax( dim=1) == y).float().sum().item() else: acc_sum += (net(X).argmax( dim=1) == y).float().sum().item() n += y.shape[0] return acc_sum / n def train(self): # d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer) for epoch in range(self.num_epochs): train_l_sum, train_acc_sum, n = 0.0, 0.0, 0 for X, y in self.train_iter: y_hat = self.net(X) l = self.loss(y_hat, y).sum() # 梯度清零 if self.optimizer is not None: self.optimizer.zero_grad() elif self.params is not None and self.params[ 0].grad is not None: for param in self.params: param.grad.data.zero_() l.backward() if self.optimizer is None: self.sgd(self.params, self.lr, self.batch_size) else: self.optimizer.step() # “softmax回归的简洁实现”一节将用到 train_l_sum += l.item() train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item() n += y.shape[0] test_acc = self.evaluate_accuracy(self.test_iter, self.net) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
def reset_parameters(self): init.xavier_uniform_(self.out.weight) # init.xavier_uniform_(self.hidden.weight) # bias = (3.0 / self.embedding.weight.size(1)) ** 0.5 # init.uniform_(self.embedding.weight, -bias, bias) init.normal_(self.embedding.weight, 0, 1 / self.embedding_dim ** 0.5)
# 1.导入必要包 #%% import numpy as np import torch import torch.nn as nn import torch.optim as optim import torch.nn.init as init # from torch.autograd import Variable torch.manual_seed(11) #设置随机种子 # 2.生产数据 #%% num_data = 1000 num_epoch = 1000 noise = init.normal_(torch.FloatTensor(num_data, 1), std=0.2) # x = init.uniform_(torch.Tensor(num_data, 1), -10, 10) x = init.uniform_(torch.Tensor(num_data, 1), -10, 10) print(x) y = 2 * x + 3 y_noise = 2 * (x + noise) + 3 # print(x,y,y_noise) # 3. 模型和优化 model = nn.Linear(1, 1) # output = model(Variable(x)) output = model(x) loss_func = nn.L1Loss() optimizer = optim.SGD(model.parameters(), lr=0.01)
def weight_init(m): ''' Usage: model = Model() model.apply(weight_init) ''' if isinstance(m, nn.Linear): init_pytorch_defaults(m, version='041') elif isinstance(m, nn.Conv2d): init_pytorch_defaults(m, version='041') elif isinstance(m, nn.BatchNorm1d): init_pytorch_defaults(m, version='041') elif isinstance(m, nn.BatchNorm2d): init_pytorch_defaults(m, version='041') elif isinstance(m, nn.Conv1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.Conv3d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose2d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose3d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.BatchNorm3d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.LSTM): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.LSTMCell): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.GRU): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.GRUCell): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data)
def _init_weights(self): init.normal_(self.distillation.weight, std=0.01) init.constant_(self.distillation.bias, 0)
def _init_weights(self): for i_refine in range(cfg.REFINE_TIMES): init.normal_(self.refine_score[i_refine].weight, std=0.01) init.constant_(self.refine_score[i_refine].bias, 0)
def _init_weights(self): init.normal_(self.mil_score0.weight, std=0.01) init.constant_(self.mil_score0.bias, 0) init.normal_(self.mil_score1.weight, std=0.01) init.constant_(self.mil_score1.bias, 0)
def weight_init(m): """ Usage: model = Model() model.apply(weight_init) """ if isinstance(m, nn.Conv1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.Conv2d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.Conv3d): init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose2d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose3d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.BatchNorm1d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.BatchNorm2d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.BatchNorm3d): init.constant_(m.weight, 1) init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): init.xavier_uniform_(m.weight.data) if m.bias is not None: init.zeros_(m.bias.data) elif isinstance(m, nn.LSTM): for param in m.parameters(): if len(param.shape) >= 2: init.xavier_uniform_(param.data) else: init.zeros_(param.data) elif isinstance(m, nn.LSTMCell): for param in m.parameters(): if len(param.shape) >= 2: init.xavier_uniform_(param.data) else: init.zeros_(param.data) elif isinstance(m, nn.GRU): for param in m.parameters(): if len(param.shape) >= 2: init.xavier_uniform_(param.data) else: init.zeros_(param.data) elif isinstance(m, nn.GRUCell): for param in m.parameters(): if len(param.shape) >= 2: init.xavier_uniform_(param.data) else: init.zeros_(param.data) elif isinstance(m, nn.Embedding): init.uniform_(m.weight.data)
def __init__(self, input_nc, ndf=64, n_layers=4, norm_layer=SynchronizedBatchNorm2d): """Construct a PatchGAN discriminator""" super(NLayerDiscriminator, self).__init__() kw = 4 padw = 1 sequence = [ nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), nn.LeakyReLU(0.2, True) ] nf_mult = 1 nf_mult_prev = 1 for n in range(1, n_layers): # gradually increase the number of filters nf_mult_prev = nf_mult nf_mult = min(2**n, 16) sequence += [ nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=2, padding=padw, bias=False), norm_layer(ndf * nf_mult, affine=True), nn.LeakyReLU(0.2, True) ] kw = 3 if n_layers == 5: nf_mult_prev = nf_mult nf_mult = min(2**n_layers, 8) sequence += [ nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1, padding=padw, bias=False), norm_layer(ndf * nf_mult, affine=True), nn.LeakyReLU(0.2, True) ] nf_mult_prev = nf_mult nf_mult = min(2**n_layers, 4) sequence += [ nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1, padding=padw, bias=False), norm_layer(ndf * nf_mult, affine=True), nn.LeakyReLU(0.2, True) ] if n_layers == 4: nf_mult_prev = nf_mult nf_mult = min(2**n_layers, 4) sequence += [ nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1, padding=padw, bias=False), norm_layer(ndf * nf_mult, affine=True), nn.LeakyReLU(0.2, True) ] kw = 4 sequence += [ nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw) ] # output 1 channel prediction map, hard coded for now self.model = nn.Sequential(*sequence) # weight init for m in self.modules(): classname = m.__class__.__name__ if classname.find('Conv2d') != -1: init.kaiming_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif classname.find('BatchNorm2d') != -1: init.normal_(m.weight.data, mean=1, std=0.02) if m.bias is not None: init.constant_(m.bias.data, 0)
def __init__(self, lambdas, colorguide, input_nc, output_nc, ngf=64, norm_layer=SynchronizedBatchNorm2d, use_dropout=False, n_blocks=4, padding_type='reflect'): assert (n_blocks >= 0) super(Generator, self).__init__() self.lambdas = lambdas self.colorguide = colorguide if padding_type == 'reflect': padding = nn.ReflectionPad2d elif padding_type == 'replicate': padding = nn.ReplicationPad2d if colorguide: model = [ padding(3), # +3 is for color encoding(rgb) nn.Conv2d(input_nc + 3, ngf, kernel_size=7, stride=1, padding=0, bias=False), norm_layer(ngf), nn.LeakyReLU(0.2, True) ] else: model = [ padding(3), nn.Conv2d(input_nc, ngf, kernel_size=7, stride=1, padding=0, bias=False), norm_layer(ngf), nn.LeakyReLU(0.2, True) ] mult = 1 mult_new = mult * 2 for i in range(n_blocks): # add downsampling layers model += [ padding(1), nn.Conv2d(ngf * mult, ngf * mult_new, kernel_size=3, stride=2, padding=0, bias=False), norm_layer(ngf * mult_new), nn.LeakyReLU(0.2, True) ] mult = 2 * mult mult_new = mult * 2 # a sequence of residual blocks for i in range(n_blocks): model += [ ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=False) ] if colorguide: # 3 X 256 color channels for rgb colortrans = [ nn.Linear(3, 3 * 256, bias=False), SynchronizedBatchNorm1d(256), nn.LeakyReLU(0.2, True) ] self.enc_color = nn.Sequential(*colortrans) del colortrans self.enc = nn.Sequential(*model) del model model_img = [] model_wf = [] for i in range(n_blocks): # add upsampling layers mult = 2**(n_blocks - i) model_img += [ padding(1), nn.Conv2d(ngf * mult, int(ngf * mult * 2), kernel_size=3, stride=1, padding=0, bias=False), nn.PixelShuffle(2), norm_layer(int(ngf * mult / 2)), nn.ReLU(True) ] model_wf += [ padding(1), nn.Conv2d(ngf * mult, int(ngf * mult * 2), kernel_size=3, stride=1, padding=0, bias=False), nn.PixelShuffle(2), norm_layer(int(ngf * mult / 2)), nn.ReLU(True) ] self.dec_base_wf = nn.Sequential(*model_wf) self.dec_base_img = nn.Sequential(*model_img) model_img = [padding(3)] model_img += [ nn.Conv2d(int(ngf * mult), output_nc * 3, kernel_size=7, padding=0) ] model_img += [nn.Tanh()] model_wf = [padding(3)] model_wf += [ nn.Conv2d(int(ngf * mult / 2), output_nc, kernel_size=7, padding=0) ] model_wf += [nn.Tanh()] self.dec_img = nn.Sequential(*model_img) self.dec_wf = nn.Sequential(*model_wf) del model_img, model_wf if colorguide: model_color = [nn.Linear(256 * 3, 3, bias=False), nn.Sigmoid()] self.dec_color = nn.Sequential(*model_color) del model_color # weight init for m in self.modules(): classname = m.__class__.__name__ if classname.find('Conv2d') != -1: init.kaiming_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif classname.find('BatchNorm2d') != -1: init.normal_(m.weight.data, mean=1, std=0.02) if m.bias is not None: init.constant_(m.bias.data, 0) elif classname.find('BatchNorm1d') != -1: init.normal_(m.weight.data, mean=1, std=0.02) if m.bias is not None: init.constant_(m.bias.data, 0) elif classname.find('Linear') != -1: init.kaiming_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data)
def __init__(self, n_feature): super(LinearNet, self).__init__() self.linear = nn.Linear(n_feature, 1) def forward(self, x): y = self.linear(x) return y net = LinearNet(num_inputs) print(net) # print(net[0]) # 初始化模型参数 # print('weight:',net[0].weight) init.normal_(net.linear.weight, mean=0, std=0.01) init.constant_(net.linear.bias, val=0) # 也可以直接修改bias的data: net[0].bias.data.fill_(0) # 定义损失函数 loss = nn.MSELoss() # 定义优化算法 optimizer = optim.SGD(net.parameters(), lr=0.03) ''' for i in net.parameters(): print(i) ''' # 调整学习率 for param_group in optimizer.param_groups: param_group['lr'] *= 0.1 # 学习率为之前的0.1倍
'leaky_relu' ] for func in nonlinearity: gain = init.calculate_gain(nonlinearity=func) # print(gain) gain = init.calculate_gain(nonlinearity='leaky_relu', param=0.2) # gain = init.calculate_gain('leaky_relu', 0.2) # uniform t1 = torch.Tensor(3, 1, 2) init.uniform_(t1, a=2, b=3) # normal t2 = torch.Tensor(3, 1, 2) init.normal_(t2, mean=0, std=1) # constant t3 = torch.tensor([3, 1], dtype=torch.int64) init.constant_(t3, 2) t4 = torch.Tensor(3, 1, 2) # dim init.constant_(t4, 2) t5 = torch.tensor(np.ndarray([3, 1], dtype=np.float), dtype=torch.int64) init.constant_(t5, 2) # eye - 2 dim t6 = torch.Tensor(3, 3) # t6 = torch.Tensor(3, 3, 3) # error init.eye_(t6) # ones/zeros
def weights_init_classifier(m): classname = m.__class__.__name__ if classname.find('Linear') != -1: init.normal_(m.weight.data, std=0.001) init.constant_(m.bias.data, 0.0)
def weights_init_classifier(m): classname = m.__class__.__name__ if "Linear" in classname or "MarginInnerProduct" in classname: init.normal_(m.weight.data, std=0.001) if hasattr(m, "bias") and (m.bias is not None): init.constant_(m.bias.data, 0.0)
from models.fcos import FCOS from config import config cfg = config print('Loading pretrained ResNet50 model..') d = torch.load('./resnet50.pth') # print('Loading into FPN50..') fpn = FPN50() dd = fpn.state_dict() for k in d.keys(): if not k.startswith('fc'): # skip fc layers dd[k] = d[k] # print('Saving FCOS..') net = FCOS(cfg) for m in net.modules(): if isinstance(m, nn.Conv2d): init.normal_(m.weight, mean=0, std=0.01) if m.bias is not None: init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() net.head.fpn.load_state_dict(dd) torch.save(net.state_dict(), 'net.pth') print('Done!')
def _init_weights(self): if cfg.MRCNN.CONV_INIT == 'GaussianFill': init.normal_(self.upconv5.weight, std=0.001) elif cfg.MRCNN.CONV_INIT == 'MSRAFill': mynn.init.MSRAFill(self.upconv5.weight) init.constant_(self.upconv5.bias, 0)
dtype=torch.float) labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype=torch.float) batch_size = 32 dataset = Data.TensorDataset(features, labels) data_iter = Data.DataLoader(dataset, batch_size, shuffle=True) device = torch.device("cuda") net = nn.Sequential(nn.Linear(num_inputs, 1)).to(device) print(net) init.normal_(net[0].weight, mean=0, std=0.01) init.constant_(net[0].bias, val=0) loss = nn.MSELoss() optimizer = optim.SGD(net.parameters(), lr=0.03) print(optimizer) num_epochs = 10 for epoch in range(1, num_epochs + 1): for X, Y in data_iter: X = X.to(device) Y = Y.to(device) output = net(X) l = loss(output, Y.view(-1, 1)) optimizer.zero_grad()
def weights_init_classifier(m): classname = m.__class__.__name__ if classname.find('Linear') != -1: init.normal_(m.weight.data, 0, 0.001) init.zeros_(m.bias.data)
def _init_weights(self): init.normal_(self.cls_score.weight, std=0.01) init.constant_(self.cls_score.bias, 0) init.normal_(self.bbox_pred.weight, std=0.001) init.constant_(self.bbox_pred.bias, 0)
def _set_init(self, layer): init.normal_(layer.weight, mean=0., std=.1) init.constant_(layer.bias, B_INIT)
def _initialise_layer(self, layer, mean=0, std=0.001): normal_(layer.weight, mean, std) constant_(layer.bias, mean)
opt = parser.parse_args() print(opt) os.environ["CUDA_VISIBLE_DEVICES"] = str(opt.gpu) print("Random Seed: ", opt.seed) random.seed(opt.seed) torch.manual_seed(opt.seed) torch.cuda.manual_seed_all(opt.seed) os.makedirs('%s/analogy/%s' % (opt.savedir, opt.signature), exist_ok=True) os.makedirs('%s/modules/%s' % (opt.savedir, opt.signature), exist_ok=True) ################################################################################################################# lbnet = network() for mod in list(lbnet.children())[0].children(): if isinstance(mod, nn.Conv2d): init.normal_(mod.weight, 0.0, 0.01) init.constant_(mod.bias, 0.0) optimizer = optim.Adam(lbnet.parameters(), lr=opt.lr) lbnet.cuda() cse_loss = nn.CrossEntropyLoss() cse_loss.cuda() ################################################################################################################# def get_training_batch(data_loader): while True: for sequence in data_loader: batch = sequence[0].cuda(), sequence[1].cuda() yield batch
# 定义模型 # 定义参数 num_inputs, num_outputs, num_hiddens = 784, 10, 256 # 定义网络 net = nn.Sequential( d2l.FlattenLayer(), nn.Linear(num_inputs, num_hiddens), nn.ReLU(), nn.Linear(num_hiddens, num_outputs), ) # 初始化模型参数(但其实初始化网络时,参数已经初始化) for params in net.parameters(): init.normal_(params, mean=0, std=0.01) # 定义损失函数 loss = torch.nn.CrossEntropyLoss() # 定义优化器 optimizer = torch.optim.SGD(net.parameters(), lr=0.5) # 获取数据 batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) # 训练模型 num_epochs = 5 d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer) # lr已经包含在optimizer中
def weight_init(m): ''' Usage: model = Model() model.apply(weight_init) ''' if isinstance(m, nn.Conv1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.Conv2d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.Conv3d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose2d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose3d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data)
def gaussian_fill_w_gain(tensor, activation, dim_in, min_std=0.0) -> None: """ Gaussian initialization with gain.""" gain = math.sqrt(2) if activation == "relu" else 1 init.normal_(tensor, mean=0, std=max(gain * math.sqrt(1 / dim_in), min_std))
def weight_init_helper(m): ''' Usage: model = Model() model.apply(weight_init) https://gist.github.com/jeasinema/ed9236ce743c8efaf30fa2ff732749f5 ''' if isinstance(m, nn.Conv1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.Conv2d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.Conv3d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose2d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose3d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.BatchNorm1d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.BatchNorm2d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.BatchNorm3d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.Linear): init.xavier_normal_(m.weight.data) init.normal_(m.bias.data)
def weight_init(m): # https://gist.github.com/jeasinema/ed9236ce743c8efaf30fa2ff732749f5 if isinstance(m, nn.Conv1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.Conv2d): init.kaiming_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.Conv3d): init.kaiming_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose2d): init.kaiming_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose3d): init.kaiming_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.BatchNorm1d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.BatchNorm2d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.BatchNorm3d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.Linear): init.kaiming_normal_(m.weight.data) init.normal_(m.bias.data) elif isinstance(m, nn.LSTM): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.LSTMCell): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.GRU): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.GRUCell): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data)
def _initialize_weights_norm(self): for m in self.modules(): if isinstance(m, nn.Conv2d): init.normal_(m.weight, std=0.01) if m.bias is not None: # resnet101 conv2d doesn't add bias init.constant_(m.bias, 0.0)
def reset_parameters(self): init.normal_(self.weight) if self.padding_idx is not None: with torch.no_grad(): self.weight[self.padding_idx].fill_(0)