def init_param(model): for name, param in model.named_parameters(): # skip over the embeddings so that the padding index ones are 0 if 'embed' in name: continue elif ('rnn' in name or 'lm' in name) and len(param.size()) >= 2: init.orthogonal(param) else: init.normal(param, 0, 0.01)
def weights_init_orthogonal(m): classname = m.__class__.__name__ print(classname) if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)): init.orthogonal(m.weight.data, gain=1) elif isinstance(m, nn.Linear): init.orthogonal(m.weight.data, gain=1) elif isinstance(m, nn.BatchNorm2d): init.normal(m.weight.data, 1.0, 0.02) init.constant_(m.bias.data, 0.0)
def wt_init(self): for name, param in self.named_parameters(): if 'gru' in name and 'weight' in name: init.orthogonal(param) elif 'linear' in name: init.normal( param, 0, math.sqrt(2. / float(self.hidden_size_1 + self.hidden_size_2 + self.hidden_size_3)))
def __init__(self, input_size , hidden_size, n_layers=1): super(Encoder, self).__init__() self.n_layers = n_layers self.hidden_size = hidden_size #self.embedding = nn.Embedding(vocab_size, emb_size) self.gru = nn.GRU(input_size, hidden_size, dropout=0.2, batch_first=True, bidirectional=True) for w in self.gru.parameters(): # initialize the gate weights with orthogonal if w.dim()>1: weight_init.orthogonal(w)
def initWeights(net, scheme='orthogonal'): print('Initializing weights. Warning: may overwrite sensitive bias parameters (e.g. batchnorm)') for e in net.parameters(): if scheme == 'orthogonal': if len(e.size()) >= 2: init.orthogonal(e) elif scheme == 'normal': init.normal(e, std=1e-2) elif scheme == 'xavier': init.xavier_normal(e)
def weights_init_orthogonal(m): classname = m.__class__.__name__ #print(classname) if 'Conv' in classname: init.orthogonal(m.weight.data, gain=1) elif 'Linear' in classname: init.orthogonal(m.weight.data, gain=1) elif 'BatchNorm' in classname: init.normal(m.weight.data, 1.0, 0.02) init.constant(m.bias.data, 0.0)
def weights_init_orthogonal(m): classname = m.__class__.__name__ # print(classname) if classname.find("Conv") != -1: init.orthogonal(m.weight.data, gain=1) elif classname.find("Linear") != -1: init.orthogonal(m.weight.data, gain=1) elif classname.find("BatchNorm2d") != -1: init.uniform(m.weight.data, 1.0, 0.02) init.constant(m.bias.data, 0.0)
def reset_parameters(self): self.reducer.reset_parameters() init.kaiming_normal(self.word_linear.weight.data) init.constant(self.word_linear.bias.data, val=0) init.kaiming_normal(self.tracker_cell.weight_ih.data) init.orthogonal(self.tracker_cell.weight_hh.data) init.constant(self.tracker_cell.bias_ih.data, val=0) init.constant(self.tracker_cell.bias_hh.data, val=0) init.kaiming_normal(self.trans_linear.weight.data) init.constant(self.trans_linear.bias.data, val=0)
def weights_init_orthogonal(m): classname = m.__class__.__name__ print(classname) if classname.find('Conv') != -1: init.orthogonal(m.weight.data, gain=1) elif classname.find('Linear') != -1: init.orthogonal(m.weight.data, gain=1) elif classname.find('BatchNorm2d') != -1: init.normal(m.weight.data, 1.0, 0.02) init.constant(m.bias.data, 0.0)
def weights_init_orthogonal(m): classname = m.__class__.__name__ print(classname) if classname.find('Conv') != -1: init.orthogonal(m.weight.data, gain=1) elif classname.find('Linear') != -1: init.orthogonal(m.weight.data, gain=1) elif classname.find('BatchNorm2d') != -1: init.normal_(m.weight.data, 1.0, 0.02) init.constant_(m.bias.data, 0.0)
def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, bn_size=4, drop_rate=0, num_classes=p_transform["n_labels"]): super(MyDenseNet, self).__init__() # First convolution self.features = nn.Sequential( OrderedDict([ ('conv0', nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)), ('norm0', nn.BatchNorm2d(num_init_features)), ('relu0', nn.ReLU(inplace=True)), ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), ])) # Each denseblock num_features = num_init_features final_num_features = 0 for i, num_layers in enumerate(block_config): block = torchvision.models.densenet._DenseBlock( num_layers=num_layers, num_input_features=num_features, bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate) self.features.add_module('denseblock%d' % (i + 1), block) num_features = num_features + num_layers * growth_rate if i != len(block_config) - 1: trans = torchvision.models.densenet._Transition( num_input_features=num_features, num_output_features=num_features // 2) self.features.add_module('transition%d' % (i + 1), trans) num_features = num_features // 2 # Final batch norm self.features.add_module('norm5', nn.BatchNorm2d(num_features)) #self.classifier_drop = nn.Dropout(p=0.75) # Linear layer self.fc = nn.Linear(num_features, num_features / 4) init.orthogonal(self.fc.weight, gain=np.sqrt(2.0)) self.classifier = nn.Linear(num_features / 4, num_classes) self.classifier.weight.data.zero_()
def _weight_init(self, m): if isinstance(m, nn.LSTM): for n, p in m.named_parameters(): if 'weight' in n: # init.xavier_normal(p.data) init.orthogonal(p.data) elif 'bias' in n: p.data.zero_()
def reset_parameters(self): """ Initialize parameters following the way proposed in the paper. """ init.orthogonal(self.weight_ih.data) weight_hh_data = torch.eye(self.hidden_size) weight_hh_data = weight_hh_data.repeat(1, 3) self.weight_hh.data.set_(weight_hh_data) # The bias is just set to zero vectors. if self.use_bias: init.constant(self.bias.data, val=0)
def init_weights(self,init_type,init_scale): # Initialize weight matrix for p in self.parameters(): if init_type=="orthogonal" and p.dim()>=2: nninit.orthogonal(p) elif init_type=="uniform": p.data.uniform_(-init_scale, init_scale) elif init_type=="xavier_n" and p.dim()>=2: nninit.xavier_normal(p) elif init_type=="xavier_u" and p.dim()>=2: nninit.xavier_uniform(p)
def __init__(self, x_dim, h_dim, act_func, W=None): super(AutoEncoder, self).__init__() self.x_dim = x_dim self.h_dim = h_dim self.f = act_func if W is None: self.W = Parameter(torch.FloatTensor(x_dim, h_dim)) # init.xavier_uniform(self.W) init.orthogonal(self.W) else: self.W = Parameter(torch.FloatTensor(W))
def __init__(self, depth, pretrained=True, cut_at_pooling=False, num_features=0, norm=False, dropout=0, num_classes=0, num_diff_features=0, iden_pretrain = False, model_path='/media/hh/disc_d/hh/open-reid-master/pretrained model/resnet50.pth'): super(ResNet, self).__init__() self.depth = depth self.pretrained = pretrained self.cut_at_pooling = cut_at_pooling self.iden_pretrain = iden_pretrain # Construct base (pretrained) resnet if depth not in ResNet.__factory: raise KeyError("Unsupported depth:", depth) # self.base = ResNet.__factory[depth](pretrained=pretrained) self.base = baseresnet.ResNet(baseresnet.Bottleneck, [3, 4, 6, 3]) if pretrained is True: self.base.load_state_dict(torch.load(model_path)) self.relu = nn.ReLU(inplace=True) if not self.cut_at_pooling: self.num_features = num_features self.num_diff_features = num_diff_features self.norm = norm self.dropout = dropout self.has_embedding = num_features > 0 self.num_classes = num_classes out_planes = self.base.fc.in_features # Append new layers if self.has_embedding: self.feat = nn.Linear(out_planes, self.num_features) self.feat_bn = nn.BatchNorm1d(self.num_features) init.kaiming_normal(self.feat.weight, mode='fan_out') init.constant(self.feat.bias, 0) init.constant(self.feat_bn.weight, 1) init.constant(self.feat_bn.bias, 0) else: # Change the num_features to CNN output channels self.num_features = out_planes if self.dropout > 0: self.drop = nn.Dropout(self.dropout) if self.num_diff_features > 0: self.diff_feat = nn.Linear(self.num_features, self.num_diff_features) init.orthogonal(self.diff_feat.weight) init.constant(self.diff_feat.bias, 0) if self.num_classes > 0: self.classifier = nn.Linear(self.num_features, self.num_classes) # init.orthogonal(self.classifier.weight) init.normal(self.classifier.weight, std=0.001) init.constant(self.classifier.bias, 0) if not self.pretrained: self.reset_params()
def reset_parameters(self): init.orthogonal(self.alpha_weight_ih.data) alpha_weight_hh_data = torch.eye(self.hidden_size) alpha_weight_hh_data = alpha_weight_hh_data.repeat(1, 1) self.alpha_weight_hh.data.set_(alpha_weight_hh_data) if self.use_bias: init.constant(self.alpha_bias.data, val=0)
def init_weights(self): init.orthogonal(self.lstm.weight_ih_l0) init.uniform(self.lstm.weight_hh_l0, a=-0.01, b=0.01) embedding_weights = torch.FloatTensor(self.vocab_size, 100) init.uniform(embedding_weights, a=-0.25, b=0.25) for id, vec in id_to_vec.items(): embedding_weights[id] = vec self.embedding.weight.data.copy_(embedding_weights)
def init_bilstm(self, hidden_dim): init.xavier_uniform(self.encoder.weight_ih_l0) init.xavier_uniform(self.encoder.weight_ih_l0_reverse) init.orthogonal(self.encoder.weight_hh_l0) init.orthogonal(self.encoder.weight_hh_l0_reverse) bias = self.init_lstm_bias(self.encoder.bias_ih_l0, hidden_dim) self.encoder.bias_ih_l0 = nn.Parameter(bias.clone()) self.encoder.bias_hh_l0 = nn.Parameter(bias.clone()) self.encoder.bias_ih_l0_reverse = nn.Parameter(bias.clone()) self.encoder.bias_hh_l0_reverse = nn.Parameter(bias.clone())
def init_weights(self): init.uniform(self.lstm.weight_ih_l0, a=-0.01, b=0.01) init.orthogonal(self.lstm.weight_hh_l0) self.lstm.weight_ih_l0.requires_grad = True self.lstm.weight_hh_l0.requires_grad = True embedding_weights = torch.FloatTensor(self.vocab_size, self.input_size) for id, vec in id_to_vec.items(): embedding_weights[id] = vec self.embedding.weight = nn.Parameter(embedding_weights, requires_grad=True)
def init_weights(self): init.orthogonal(self.rnn.weight_ih_l0) init.uniform(self.rnn.weight_hh_l0, a=-0.01, b=0.01) glove_embeddings = preprocessing.load_glove_embeddings() embedding_weights = torch.FloatTensor(self.vocab_size, self.input_size) init.uniform(embedding_weights, a=-0.25, b=0.25) for k,v in glove_embeddings.items(): embedding_weights[k] = torch.FloatTensor(v) embedding_weights[0] = torch.FloatTensor([0]*self.input_size) del self.embedding.weight self.embedding.weight = nn.Parameter(embedding_weights)
def init_weights(self): init_range = self._init_range init_std = self._gru_init_std self._l1_embedding_layer.weight.data.copy_( torch.from_numpy(self._l1_emb_vector)) self._l2_embedding_layer.weight.data.copy_( torch.from_numpy(self._l2_emb_vector)) unk_n_var_1 = self._l1_embedding_layer.weight.data[1:2 + self._nr_unk + self._var_size, :] init.normal(unk_n_var_1, 0, 1) unk_n_var_1 /= torch.norm(unk_n_var_1, p=2, dim=1).unsqueeze( 1) # normalise randomly initialised embeddings self._l1_embedding_layer.weight.data[0, :] = 0 if not self._emb_trainable: self._l1_embedding_layer.weight.requires_gard = False # size = entities + ph + non-ent-marker unk_2 = self._l2_embedding_layer.weight.data[2:2 + self._nr_unk, :] init.normal(unk_2, 0, 1) unk_2 /= torch.norm(unk_2, p=2, dim=1).unsqueeze( 1) # normalise randomly initialised embeddings # ^^^ init unk * 100 embeddings self._l2_embedding_layer.weight.data[ 1, :] = self._l1_embedding_layer.weight.data[1, :] # ^^^ share @placeholder embedding self._l2_embedding_layer.weight.data[2 + self._nr_unk: 2 + self._nr_unk + self._var_size, :] = \ self._l1_embedding_layer.weight.data[2 + self._nr_unk: 2 + self._nr_unk + self._var_size, :] # ^^^ share @entityX embeddings self._l2_embedding_layer.weight.data[0, :] = 0 if not self._emb_trainable: self._l2_embedding_layer.weight.requires_gard = False # size = entities + ph + non-ent-marker # DONE: initialise non-zero locations # TODO: randomise in forward step? gain = init.calculate_gain('tanh') for p in self._recurrent_layer.parameters(): if p.dim() == 1: p.data.normal_(0, init_std) else: init.orthogonal(p.data, gain) for p in self._question_recurrent_layer.parameters(): if p.dim() == 1: p.data.normal_(0, init_std) else: init.orthogonal(p.data, gain) # self._embedding_projection_layer.weight.data.uniform_(-init_range, init_range) self._output_layer.weight.data.uniform_(-init_range, init_range) self._output_layer.bias.data.fill_(0) self._mix_matrix.data.uniform_(-init_range, init_range)
def _initialize_weights(self): """" "" if type == 'Tanh': self.nonlinearity = 'tanh' elif type == 'ReLU': self.nonlinearity = 'relu' elif type == 'Leaky': self.nonlinearity = 'leaky_relu' """ init.orthogonal(self.conv1.weight, init.calculate_gain(self.nonlinearity)) init.orthogonal(self.conv2.weight, init.calculate_gain(self.nonlinearity))
def weights_init_orthogonal(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: init.orthogonal(m.weight.data, gain=1) if m.bias is not None: m.bias.data.zero_() elif classname.find('Linear') != -1: init.orthogonal(m.weight.data, gain=1) if m.bias is not None: m.bias.data.zero_() elif classname.find('BatchNorm2d') != -1: init.constant_(m.weight.data, 1.0) init.constant_(m.bias.data, 0.0)
def weights_init_orthogonal(m): classname = m.__class__.__name__ # print(classname) if classname.find('Conv') != -1: try: init.orthogonal(m.weight.data, gain=1) except AttributeError: weights_init_normal(m) elif classname.find('Linear') != -1: init.orthogonal(m.weight.data, gain=1) elif classname.find('BatchNorm2d') != -1: init.normal(m.weight.data, 1.0, 0.02) init.constant(m.bias.data, 0.0)
def init_weights(self, init_type, init_scale): # Initialize weight matrix for p in self.parameters(): if init_type == "orthogonal" and p.dim() >= 2: nninit.orthogonal(p) elif init_type == "uniform": p.data.uniform_(-init_scale, init_scale) elif init_type == "xavier_n" and p.dim() >= 2: nninit.xavier_normal(p) elif init_type == "xavier_u" and p.dim() >= 2: nninit.xavier_uniform(p) # Initialize bias for the linear layer self.hidden2tag.bias.data.fill_(0.0)
def torch_weight_init(m): """ Usage: model = Model() model.apply(weight_init) """ if isinstance(m, nn.Conv1d): init.normal(m.weight.data) init.normal(m.bias.data) elif isinstance(m, nn.Conv2d): init.xavier_normal(m.weight.data) init.normal(m.bias.data) elif isinstance(m, nn.Conv3d): init.xavier_normal(m.weight.data) init.normal(m.bias.data) elif isinstance(m, nn.ConvTranspose1d): init.normal(m.weight.data) init.normal(m.bias.data) elif isinstance(m, nn.ConvTranspose2d): init.xavier_normal(m.weight.data) init.normal(m.bias.data) elif isinstance(m, nn.ConvTranspose3d): init.xavier_normal(m.weight.data) init.normal(m.bias.data) elif isinstance(m, nn.BatchNorm1d): init.normal(m.weight.data, mean=1, std=0.02) init.constant(m.bias.data, 0) elif isinstance(m, nn.BatchNorm2d): init.normal(m.weight.data, mean=1, std=0.02) init.constant(m.bias.data, 0) elif isinstance(m, nn.BatchNorm3d): init.normal(m.weight.data, mean=1, std=0.02) init.constant(m.bias.data, 0) elif isinstance(m, nn.Linear): init.xavier_normal(m.weight.data) init.normal(m.bias.data) elif isinstance(m, nn.LSTM): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal(param.data) else: init.normal(param.data) elif isinstance(m, nn.LSTMCell): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal(param.data) else: init.normal(param.data) elif isinstance(m, nn.GRU): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal(param.data) else: init.normal(param.data) elif isinstance(m, nn.GRUCell): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal(param.data) else: init.normal(param.data)
def reset_parameters(self): """ Initialize parameters following the way proposed in the paper. """ # The input-to-hidden weight matrix is initialized orthogonally. init.orthogonal(self.weight_ih.data) # The hidden-to-hidden weight matrix is initialized as an identity # matrix. weight_hh_data = torch.eye(self.hidden_size) weight_hh_data = weight_hh_data.repeat(1, 3) self.weight_hh.data.set_(weight_hh_data) # The bias is just set to zero vectors. init.constant(self.bias.data, val=0)
def __init__(self, input_size, hidden_size, vocab_size, n_layers=1): super(Decoder, self).__init__() self.n_layers = n_layers self.hidden_size = hidden_size self.vocab_size=vocab_size #self.embedding = nn.Embedding(vocab_size, emb_size) self.gru = nn.GRU(input_size, hidden_size, dropout=0.2, batch_first=True) for w in self.gru.parameters(): # initialize the gate weights with orthogonal if w.dim()>1: weight_init.orthogonal(w) self.out = nn.Linear(hidden_size, vocab_size) self.softmax = nn.LogSoftmax() self.dropout=nn.Dropout(p=0.3)
def __init__(self, in_dim, out_dim, p_dropout=0.0): super().__init__() self.input_dim = in_dim self.output_dim = out_dim self.dropout = p_dropout self.W_i = nn.Linear(in_dim, out_dim) init.xavier_uniform(self.W_i.weight) self.W_i.bias = nn.Parameter(torch.FloatTensor(out_dim).zero_()) self.U_i = nn.Linear(out_dim, out_dim, bias=False) init.orthogonal(self.U_i.weight) self.W_f = nn.Linear(in_dim, out_dim) init.xavier_uniform(self.W_f.weight) self.W_f.bias = nn.Parameter(torch.FloatTensor(out_dim).fill_(1.0)) self.U_f = nn.Linear(out_dim, out_dim) init.orthogonal(self.U_f.weight) self.W_c = nn.Linear(in_dim, out_dim) init.xavier_uniform(self.W_c.weight) self.W_c.bias = nn.Parameter(torch.FloatTensor(out_dim).fill_(0.0)) self.U_c = nn.Linear(out_dim, out_dim) init.orthogonal(self.U_c.weight) self.W_o = nn.Linear(in_dim, out_dim) init.xavier_uniform(self.W_o.weight) self.W_o.bias = nn.Parameter(torch.FloatTensor(out_dim).fill_(0.0)) self.U_o = nn.Linear(out_dim, out_dim) init.orthogonal(self.U_o.weight)
def weights_init_orthogonal(m): classname = m.__class__.__name__ # print(classname) if classname.find('Conv2d') != -1 or classname.find('ConvTranspose') != -1: init.orthogonal(m.weight.data, gain=1) if m.bias is not None: m.bias.data.zero_() elif classname.find('Linear') != -1: init.orthogonal(m.weight.data, gain=1) if m.bias is not None: m.bias.data.zero_() elif classname.find('BatchNorm2d') != -1: init.normal_(m.weight.data, 1.0, 0.02) init.constant_(m.bias.data, 0.0)
def __init__(self, num_features=0, norm=False, dropout=0, num_diff_features=0): super(Trip_embedding, self).__init__() self.num_features = num_features self.num_diff_features = num_diff_features self.norm = norm self.dropout = dropout if self.dropout > 0: self.drop = nn.Dropout(self.dropout) if self.num_diff_features > 0: self.diff_feat = nn.Linear(self.num_features, self.num_diff_features) init.orthogonal(self.diff_feat.weight) init.constant(self.diff_feat.bias, 0)
def initialize_encoder(self): """Manual weight/bias initialization. """ xavier_normal(self.gru_enc_f.weight_ih) orthogonal(self.gru_enc_f.weight_hh) self.gru_enc_f.bias_ih.data.zero_() self.gru_enc_f.bias_hh.data.zero_() xavier_normal(self.gru_enc_b.weight_ih) orthogonal(self.gru_enc_b.weight_hh) self.gru_enc_b.bias_ih.data.zero_() self.gru_enc_b.bias_hh.data.zero_()
def weights_init_orthogonal(m): classname = m.__class__.__name__ # print('initializing [%s] ...' % classname) if classname.find('Conv') != -1: init.orthogonal(m.weight.data, gain=1) if m.bias is not None: m.bias.data.zero_() elif classname.find('Linear') != -1: init.orthogonal(m.weight.data, gain=1) if m.bias is not None: m.bias.data.zero_() elif classname.find('BatchNorm2d') != -1: init.normal(m.weight.data, 1.0, 0.02) init.constant(m.bias.data, 0.0)
def init_func(m): classname = m.__class__.__name__ if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): if init_type == 'normal': init.normal(m.weight.data, 0.0, gain) elif init_type == 'xavier': init.xavier_normal(m.weight.data, gain=gain) elif init_type == 'kaiming': init.kaiming_normal(m.weight.data, a=0, mode='fan_in') elif init_type == 'orthogonal': init.orthogonal(m.weight.data, gain=gain) else: raise NotImplementedError('initialization method [%s] is not implemented' % init_type) if hasattr(m, 'bias') and m.bias is not None: init.constant(m.bias.data, 0.0) elif classname.find('BatchNorm2d') != -1: init.normal(m.weight.data, 1.0, gain) init.constant(m.bias.data, 0.0)
def init_fun(m): classname = m.__class__.__name__ if (classname.find('Conv') == 0 or classname.find('Linear') == 0) and hasattr(m, 'weight'): # print m.__class__.__name__ if init_type == 'gaussian': init.normal(m.weight.data, 0.0, 0.02) elif init_type == 'xavier': init.xavier_normal(m.weight.data, gain=math.sqrt(2)) elif init_type == 'kaiming': init.kaiming_normal(m.weight.data, a=0, mode='fan_in') elif init_type == 'orthogonal': init.orthogonal(m.weight.data, gain=math.sqrt(2)) elif init_type == 'default': pass else: assert 0, "Unsupported initialization: {}".format(init_type) if hasattr(m, 'bias') and m.bias is not None: init.constant(m.bias.data, 0.0)
def __init__(self, margin=0, num_feature=128): super(AdaptTripletLoss, self).__init__() self.margin = margin self.ranking_loss = nn.MarginRankingLoss(margin=margin) self.softmargin_loss = nn.SoftMarginLoss() self.num_classes = num_feature self.adp_metric_embedding1 = nn.Linear(3*self.num_classes, 3*self.num_classes, bias=False) self.adp_metric_embedding1_bn = nn.BatchNorm1d(3*self.num_classes) self.adp_metric_embedding2 = nn.Linear(3*self.num_classes, 2*self.num_classes, bias=False) self.adp_metric_embedding2_bn = nn.BatchNorm1d(2 * self.num_classes) self.adp_metric_embedding3 = nn.Linear(2*self.num_classes, 2*self.num_classes, bias=False) self.adp_metric_embedding3_bn = nn.BatchNorm1d(2 * self.num_classes) self.adp_metric_embedding4 = nn.Linear(2*self.num_classes, 2*self.num_classes, bias=False) # self.adp_metric_embedding2_bn = nn.BatchNorm1d(self.num_classes) # init.constant(self.adp_metric_embedding1.bias,0) # init.constant(self.adp_metric_embedding2.bias,0) init.kaiming_normal(self.adp_metric_embedding1.weight, mode='fan_out') init.kaiming_normal(self.adp_metric_embedding2.weight, mode='fan_out') init.kaiming_normal(self.adp_metric_embedding3.weight, mode='fan_out') init.orthogonal(self.adp_metric_embedding4.weight)
def reset_parameters(self): """ Initialize parameters following the way proposed in the paper. """ # The input-to-hidden weight matrix is initialized orthogonally. init.orthogonal(self.weight_ih.data) # The hidden-to-hidden weight matrix is initialized as an identity # matrix. weight_hh_data = torch.eye(self.hidden_size) weight_hh_data = weight_hh_data.repeat(4, 1) self.weight_hh.data.set_(weight_hh_data) # The bias is just set to zero vectors. init.constant(self.bias.data, val=0) # Initialization of BN parameters. self.bn_ih.reset_parameters() self.bn_hh.reset_parameters() self.bn_c.reset_parameters() self.bn_ih.bias.data.fill_(0) self.bn_hh.bias.data.fill_(0) self.bn_ih.weight.data.fill_(0.1) self.bn_hh.weight.data.fill_(0.1) self.bn_c.weight.data.fill_(0.1)
def _initialize_weights(self): init.orthogonal(self.conv1.weight, init.calculate_gain('relu')) init.orthogonal(self.conv2.weight, init.calculate_gain('relu')) init.orthogonal(self.conv3.weight, init.calculate_gain('relu')) init.orthogonal(self.conv4.weight)
def _initialize_orthogonal(conv): prelu_gain = math.sqrt(2) init.orthogonal(conv.weight, gain=prelu_gain) if conv.bias is not None: conv.bias.data.zero_()