def __init__(self, input_size,n_head=4, identity=False): super(multiSeqAttnMatch, self).__init__() self.hidden_size = input_size // n_head self.w = nn.Parameter(torch.FloatTensor(n_head, input_size, self.hidden_size)) init.xavier_normal(self.w) self.n_head = n_head
def weights_init_xavier(m): classname = m.__class__.__name__ # print(classname) if classname.find('Conv') != -1: init.xavier_normal(m.weight.data, gain=0.02) elif classname.find('Linear') != -1: init.xavier_normal(m.weight.data, gain=0.02) elif classname.find('BatchNorm2d') != -1: init.normal(m.weight.data, 1.0, 0.02) init.constant(m.bias.data, 0.0)
def __init__(self, args): super(BiGRU, self).__init__() self.hidden_dim = args.hidden_dim self.batch_size = args.batch_size self.dropout = nn.Dropout(args.dropout) self.dropout_embed = nn.Dropout(args.dropout_embed) self.word_embeddings = nn.Embedding(args.embed_num, args.embedding_dim) self.bigru = nn.GRU(args.embedding_dim, args.hidden_dim, bidirectional=True, dropout=args.dropout_model) # self.hidden2label1 = nn.Linear(args.hidden_dim * 2, args.hidden_dim) # self.hidden2label2 = nn.Linear(args.hidden_dim, args.class_num) self.hidden2label = nn.Linear(args.hidden_dim * 2, args.class_num) self.hidden = self.init_hidden(args.batch_size) pretrained_weight = np.array(args.pretrained_weight) # print(pretrained_weight.shape) self.word_embeddings.weight.data.copy_(torch.from_numpy(pretrained_weight)) # weight modify, gru do not have forget gate init.xavier_normal(self.bigru.all_weights[0][0], gain=np.sqrt(2.0)) init.xavier_normal(self.bigru.all_weights[0][1], gain=np.sqrt(2.0)) init.xavier_normal(self.bigru.all_weights[1][0], gain=np.sqrt(2.0)) init.xavier_normal(self.bigru.all_weights[1][1], gain=np.sqrt(2.0)) # self.bigru.all_weights[0][3].data.fill_(0.1) # self.bigru.all_weights[0][2].data.fill_(0.1) # self.bigru.all_weights[1][3].data.fill_(0.1) # self.bigru.all_weights[1][2].data.fill_(0.1) self.bn1 = nn.BatchNorm1d(600)
def __init__(self, args): super(BiLSTM_1, self).__init__() self.args = args self.hidden_dim = args.lstm_hidden_dim self.num_layers = args.lstm_num_layers V = args.embed_num D = args.embed_dim C = args.class_num self.dropout = nn.Dropout(args.dropout) self.dropout_embed = nn.Dropout(args.dropout_embed) if args.max_norm is not None: print("max_norm = {} ".format(args.max_norm)) self.embed = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True) else: print("max_norm = {} |||||".format(args.max_norm)) self.embed = nn.Embedding(V, D, scale_grad_by_freq=True) if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) self.bilstm = nn.LSTM(D, self.hidden_dim, num_layers=self.num_layers, bias=True, bidirectional=True, dropout=self.args.dropout) print(self.bilstm) if args.init_weight: print("Initing W .......") init.xavier_normal(self.bilstm.all_weights[0][0], gain=np.sqrt(args.init_weight_value)) init.xavier_normal(self.bilstm.all_weights[0][1], gain=np.sqrt(args.init_weight_value)) init.xavier_normal(self.bilstm.all_weights[1][0], gain=np.sqrt(args.init_weight_value)) init.xavier_normal(self.bilstm.all_weights[1][1], gain=np.sqrt(args.init_weight_value)) # print("eeeeeeeeeeeeeeeeeeeeeeee") # fan_in, fan_out = BiLSTM_1.calculate_fan_in_and_fan_out(self.bilstm.all_weights[1][1]) # print(" in {} out {} ".format(fan_in, fan_out)) # std = np.sqrt(args.init_weight_value) * np.sqrt(2.0 / (fan_in + fan_out)) # print("aaaaaaaaaaaaa {} ".format(std)) # print("self.bilstm.all_weights {} ".format(self.bilstm.all_weights)) # self.bilstm.all_weights[0][3].data.fill_(0) # self.bilstm.all_weights[0][2].data.fill_(0) # self.bilstm.all_weights[1][3].data.fill_(0) # self.bilstm.all_weights[1][2].data.fill_(0) # self.bilstm.all_weights[0][3].data[20:40].fill_(1) # self.bilstm.all_weights[0][3].data[0:20].fill_(0) # self.bilstm.all_weights[0][3].data[40:80].fill_(0) # # self.bilstm.all_weights[0][3].data[40:].fill_(0) # self.bilstm.all_weights[0][2].data[20:40].fill_(1) # self.bilstm.all_weights[0][2].data[0:20].fill_(0) # self.bilstm.all_weights[0][2].data[40:80].fill_(0) # # self.bilstm.all_weights[0][2].data[40:].fill_(0) # self.bilstm.all_weights[1][3].data[20:40].fill_(1) # self.bilstm.all_weights[1][3].data[0:20].fill_(0) # self.bilstm.all_weights[1][3].data[40:80].fill_(0) # # self.bilstm.all_weights[1][3].data[40:].fill_(0) # self.bilstm.all_weights[1][2].data[20:40].fill_(1) # self.bilstm.all_weights[1][2].data[0:20].fill_(0) # self.bilstm.all_weights[1][2].data[40:80].fill_(0) # # self.bilstm.all_weights[1][2].data[40:].fill_(0) # self.hidden2label1 = nn.Linear(self.hidden_dim * 2, self.hidden_dim) # self.hidden2label2 = nn.Linear(self.hidden_dim, C) self.hidden2label = nn.Linear(self.hidden_dim * 2, C) self.hidden = self.init_hidden(self.num_layers, args.batch_size) print("self.hidden", self.hidden)
def __init__(self, args): super(BiLSTM, self).__init__() self.hidden_dim = args.hidden_dim self.batch_size = args.batch_size self.dropout = nn.Dropout(args.dropout) self.dropout_embed = nn.Dropout(args.dropout_embed) self.word_embeddings = nn.Embedding(args.embed_num, args.embedding_dim, max_norm=5.0) self.lstm = nn.LSTM(args.embedding_dim, args.hidden_dim, bidirectional=True, dropout=args.dropout_model) self.hidden2label1 = nn.Linear(args.hidden_dim * 2, args.hidden_dim) self.hidden2label2 = nn.Linear(args.hidden_dim, args.class_num) self.hidden = self.init_hidden(args.batch_size) pretrained_weight = np.array(args.pretrained_weight) # print(pretrained_weight.shape) self.word_embeddings.weight.data.copy_(torch.from_numpy(pretrained_weight)) # weight modify init.xavier_normal(self.lstm.all_weights[0][0], gain=np.sqrt(2.0)) init.xavier_normal(self.lstm.all_weights[0][1], gain=np.sqrt(2.0)) init.xavier_normal(self.lstm.all_weights[1][0], gain=np.sqrt(2.0)) init.xavier_normal(self.lstm.all_weights[1][1], gain=np.sqrt(2.0)) self.lstm.all_weights[0][3].data[20:40].fill_(1) self.lstm.all_weights[0][3].data[0:20].fill_(0) self.lstm.all_weights[0][3].data[40:80].fill_(0) self.lstm.all_weights[0][2].data[20:40].fill_(1) self.lstm.all_weights[0][2].data[0:20].fill_(0) self.lstm.all_weights[0][2].data[40:80].fill_(0) self.lstm.all_weights[1][3].data[20:40].fill_(1) self.lstm.all_weights[1][3].data[0:20].fill_(0) self.lstm.all_weights[1][3].data[40:80].fill_(0) self.lstm.all_weights[1][2].data[20:40].fill_(1) self.lstm.all_weights[1][2].data[0:20].fill_(0) self.lstm.all_weights[1][2].data[40:80].fill_(0)
def __init__(self, args): super(DEEP_CNN_MUI, self).__init__() self.args = args V = args.embed_num V_mui = args.embed_num_mui D = args.embed_dim C = args.class_num Ci = 2 Co = args.kernel_num Ks = args.kernel_sizes if args.max_norm is not None: print("max_norm = {} ".format(args.max_norm)) self.embed_no_static = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True) self.embed_static = nn.Embedding(V_mui, D, max_norm=args.max_norm, scale_grad_by_freq=True) else: print("max_norm = {} ".format(args.max_norm)) self.embed_no_static = nn.Embedding(V, D, scale_grad_by_freq=True) self.embed_static = nn.Embedding(V_mui, D, scale_grad_by_freq=True) if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed_no_static.weight.data.copy_(torch.from_numpy(pretrained_weight)) pretrained_weight_static = np.array(args.pretrained_weight_static) self.embed_static.weight.data.copy_(torch.from_numpy(pretrained_weight_static)) # whether to fixed the word embedding self.embed_no_static.weight.requires_grad = True # cons layer self.convs1 = [nn.Conv2d(Ci, D, (K, D), stride=1, padding=(K//2, 0), bias=True) for K in Ks] self.convs2 = [nn.Conv2d(1, Co, (K, D), stride=1, padding=(K//2, 0), bias=True) for K in Ks] print(self.convs1) print(self.convs2) if args.init_weight: print("Initing W .......") for (conv1, conv2) in zip(self.convs1, self.convs2): init.xavier_normal(conv1.weight.data, gain=np.sqrt(args.init_weight_value)) init.uniform(conv1.bias, 0, 0) init.xavier_normal(conv2.weight.data, gain=np.sqrt(args.init_weight_value)) init.uniform(conv2.bias, 0, 0) # dropout self.dropout = nn.Dropout(args.dropout) # linear in_fea = len(Ks) * Co self.fc1 = nn.Linear(in_features=in_fea, out_features=in_fea // 2, bias=True) self.fc2 = nn.Linear(in_features=in_fea // 2, out_features=C, bias=True)
def init_func(m): classname = m.__class__.__name__ if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): if init_type == 'normal': init.normal(m.weight.data, 0.0, gain) elif init_type == 'xavier': init.xavier_normal(m.weight.data, gain=gain) elif init_type == 'kaiming': init.kaiming_normal(m.weight.data, a=0, mode='fan_in') elif init_type == 'orthogonal': init.orthogonal(m.weight.data, gain=gain) else: raise NotImplementedError('initialization method [%s] is not implemented' % init_type) if hasattr(m, 'bias') and m.bias is not None: init.constant(m.bias.data, 0.0) elif classname.find('BatchNorm2d') != -1: init.normal(m.weight.data, 1.0, gain) init.constant(m.bias.data, 0.0)
def init_fun(m): classname = m.__class__.__name__ if (classname.find('Conv') == 0 or classname.find('Linear') == 0) and hasattr(m, 'weight'): # print m.__class__.__name__ if init_type == 'gaussian': init.normal(m.weight.data, 0.0, 0.02) elif init_type == 'xavier': init.xavier_normal(m.weight.data, gain=math.sqrt(2)) elif init_type == 'kaiming': init.kaiming_normal(m.weight.data, a=0, mode='fan_in') elif init_type == 'orthogonal': init.orthogonal(m.weight.data, gain=math.sqrt(2)) elif init_type == 'default': pass else: assert 0, "Unsupported initialization: {}".format(init_type) if hasattr(m, 'bias') and m.bias is not None: init.constant(m.bias.data, 0.0)
def __init__(self, args): super(CNN, self).__init__() self.args = args # self.conv1l = nn.Conv2d(3,20,5,stride=1,bias=True) # init.xavier_normal() Ci = 1 self.embed = nn.Embedding(args.embed_num, args.embedding_dim) # print(self.embed) # pretrained_weight is a numpy matrix of shape (num_embeddings, embedding_dim) # print(len(args.pretrained_weight)) pretrained_weight = np.array(args.pretrained_weight) # print(pretrained_weight.shape) self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) # print(self.embed) # 15453 Embedding(15453, 128) self.convs1 = [nn.Conv2d(Ci, args.kernel_num, (K, args.embedding_dim)) for K in args.kernel_sizes] # init.xavier_normal([(conv.weight, gain=np.sqrt(2.0) for conv in self.convsl)]) for conv in self.convs1: init.xavier_normal(conv.weight, gain=np.sqrt(2.0)) # init.normal(conv.weight, mean=0, std=0.1) # init.constant(conv.bias, 0.1) # print(self.convs1) # self.conv13 = nn.Conv2d(Ci, Co, (3, D)) # self.conv14 = nn.Conv2d(Ci, Co, (4, D)) # self.conv15 = nn.Conv2d(Ci, Co, (5, D)) # self.conv16 = nn.Conv2d(Ci, Co, (6, D)) self.dropout = nn.Dropout(args.dropout) self.fc1 = nn.Linear(len(args.kernel_sizes) * args.kernel_num, args.class_num) # len(Ks)*Co -> C # self.fc1 = nn.Linear(len(Ks) * Co * 2, C) # len(Ks)*Co*2 -> C # self.bn = nn.BatchNorm1d(1, momentum=0.5) self.bn = nn.BatchNorm2d(1)
def __init__(self, args): super(LSTM, self).__init__() self.args = args # print(args) self.hidden_dim = args.lstm_hidden_dim self.num_layers = args.lstm_num_layers V = args.embed_num D = args.embed_dim C = args.class_num if args.max_norm is not None: print("max_norm = {} ".format(args.max_norm)) self.embed = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True) else: print("max_norm = {} |||||".format(args.max_norm)) self.embed = nn.Embedding(V, D, scale_grad_by_freq=True) # word embedding if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) # lstm self.lstm = nn.LSTM(D, self.hidden_dim, dropout=args.dropout, num_layers=self.num_layers) if args.init_weight: print("Initing W .......") # n = self.lstm.input_size * self.lstm init.xavier_normal(self.lstm.all_weights[0][0], gain=np.sqrt(args.init_weight_value)) init.xavier_normal(self.lstm.all_weights[0][1], gain=np.sqrt(args.init_weight_value)) # linear self.hidden2label = nn.Linear(self.hidden_dim, C) # hidden self.hidden = self.init_hidden(self.num_layers, args.batch_size) # dropout self.dropout = nn.Dropout(args.dropout) self.dropout_embed = nn.Dropout(args.dropout_embed)
def __init__(self, in_size, out_size, kernel_size=3,stride=1, padding=1, activation=nn.ReLU(), space_dropout=False): super(UNetUpBlock, self).__init__() self.conv0 = nn.Conv2d(in_size, out_size, 3, stride=1, padding=1) self.conv = nn.Conv2d(in_size, out_size, kernel_size, stride=1, padding=1) self.conv2 = nn.Conv2d(out_size, out_size, kernel_size,stride=1, padding=1) init.xavier_normal(self.conv0.weight,gain=np.sqrt(2)) init.xavier_normal(self.conv.weight,gain=np.sqrt(2)) init.xavier_normal(self.conv2.weight,gain=np.sqrt(2)) init.constant(self.conv0.bias, 0.1) init.constant(self.conv.bias, 0.1) init.constant(self.conv2.bias, 0.1) self.activation = activation self.upsampler = nn.Upsample(scale_factor=2)
def __init__(self, in_size, out_size, kernel_size=3, stride=1, padding=1, activation = nn.ReLU(), downsample=True): super(UNetConvBlock, self).__init__() self.conv_down = nn.Conv2d(in_size, in_size, kernel_size, stride=2, padding=1) self.conv = nn.Conv2d(in_size, out_size, kernel_size, stride=1, padding=padding) self.conv2 = nn.Conv2d(out_size, out_size, kernel_size,stride=1, padding=1) init.xavier_normal(self.conv_down.weight,gain=np.sqrt(2)) init.xavier_normal(self.conv.weight,gain=np.sqrt(2)) init.xavier_normal(self.conv2.weight,gain=np.sqrt(2)) init.constant(self.conv_down.bias,0.1) init.constant(self.conv.bias, 0.1) init.constant(self.conv2.bias, 0.1) self.activation = activation self.downsample = downsample
def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1): super(MultiHeadAttention, self).__init__() self.n_head = n_head self.d_k = d_k self.d_v = d_v self.w_qs = nn.Parameter(torch.FloatTensor(n_head, d_model, d_k)) self.w_ks = nn.Parameter(torch.FloatTensor(n_head, d_model, d_k)) self.w_vs = nn.Parameter(torch.FloatTensor(n_head, d_model, d_v)) self.attention = ScaledDotProductAttention(d_model) self.layer_norm = LayerNormalization(d_model) self.proj = Linear(n_head*d_v, d_model) self.dropout = nn.Dropout(dropout) init.xavier_normal(self.w_qs) init.xavier_normal(self.w_ks) init.xavier_normal(self.w_vs)
def __init__(self, **kwargs): super(TextCNN, self).__init__() self.input_size = kwargs['input_size'] self.hidden_size = kwargs['hidden_size'] self.output_size = kwargs['output_size'] if 'kernel_num' in kwargs: self.kernel_num = kwargs['kernel_num'] else: self.kernel_num = 256 if 'kernel_sizes' in kwargs: self.kernel_sizes = kwargs['kernel_sizes'] else: self.kernel_sizes = [1, 2, 3, 4] if 'embed_size' in kwargs: self.embed_size = kwargs['embed_size'] else: self.embed_size = kwargs['hidden_size'] if 'dropout' in kwargs: self.dropout = kwargs['dropout'] else: self.dropout = 0.1 if 'wide_conv' in kwargs: self.wide_conv = kwargs['wide_conv'] else: self.wide_conv = False if 'init_weight' in kwargs: self.init_weight = kwargs['init_weight'] else: self.init_weight = False if 'init_weight_value' in kwargs: self.init_weight_value = kwargs['init_weight_value'] else: self.init_weight_value = 2.0 if 'batch_normal' in kwargs: self.batch_normal = kwargs['batch_normal'] else: self.batch_normal = False if 'batch_normal_momentum' in kwargs: self.batch_normal_momentum else: self.batch_normal_momentum = 0.1 if 'batch_normal_affine' in kwargs: self.batch_normal_affine = kwargs['batch_normal_affine'] else: self.batch_normal_affine = False Ci = 1 # input channels, 处理文本,一层通道 Co = self.kernel_num # output channel Ks = self.kernel_sizes # list if 'max_norm' in kwargs: self.embed = nn.Embedding(self.input_size, self.embed_size, max_norm=kwargs['max_norm']) else: self.embed = nn.Embedding(self.input_size, self.embed_size, scale_grad_by_freq=True) if 'word_embedding' in kwargs: pretrained_weight = torch.from_numpy(kwargs['word_embedding']) self.embed.weight.data.copy_(pretrained_weight) self.embed.weight.requires_grad = True if self.wide_conv is True: self.convs1 = [ nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, self.embed_size), stride=(1, 1), padding=(K // 2, 0), dilation=1, bias=True) for K in Ks ] else: self.convs1 = [ nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, self.embed_size), bias=True) for K in Ks ] if self.init_weight: for conv in self.convs1: init.xavier_normal(conv.weight.data, gain=np.sqrt(self.init_weight_value)) fanin, fanout = self.cal_fanin_fanout(conv.weight.data) std = np.sqrt(self.init_weight_value) * np.sqrt( 2.0 / (fanin + fanout)) init.uniform(conv.bias, 0, 0) self.dropout = nn.Dropout(self.dropout) in_fea = len(Ks) * Co self.f1 = nn.Linear(in_fea, in_fea // 2, bias=True) self.f2 = nn.Linear(in_fea // 2, self.output_size, bias=True) if self.batch_normal: self.convs1_bn = nn.BatchNorm2d( num_features=Co, momentum=self.batch_normal_momentum, affine=self.batch_normal_affine) self.f1_bn = nn.BatchNorm1d(num_features=in_fea // 2, momentum=self.batch_normal_momentum, affine=self.batch_normal_affine) self.f2_bn = nn.BatchNorm1d(num_features=self.output_size, momentum=self.batch_normal_momentum, affine=self.batch_normal_affine)
def __init__(self, d_in, d_out, bias=True): super(Linear, self).__init__() self.linear = nn.Linear(d_in, d_out, bias=bias) init.xavier_normal(self.linear.weight)
def __init__(self, args): super(CNN_MUI, self).__init__() self.args = args V = args.embed_num V_mui = args.embed_num_mui D = args.embed_dim C = args.class_num Ci = 2 Co = args.kernel_num Ks = args.kernel_sizes if args.max_norm is not None: print("max_norm = {} ".format(args.max_norm)) self.embed_no_static = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True) self.embed_static = nn.Embedding(V_mui, D, max_norm=args.max_norm, scale_grad_by_freq=True) # self.embed_static = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True) else: print("max_norm = {} ".format(args.max_norm)) self.embed_no_static = nn.Embedding(V, D, scale_grad_by_freq=True) self.embed_static = nn.Embedding(V_mui, D, scale_grad_by_freq=True) # self.embed_static = nn.Embedding(V, D, scale_grad_by_freq=True) if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed_no_static.weight.data.copy_(torch.from_numpy(pretrained_weight)) pretrained_weight_static = np.array(args.pretrained_weight_static) self.embed_static.weight.data.copy_(torch.from_numpy(pretrained_weight_static)) # whether to fixed the word embedding self.embed_no_static.weight.requires_grad = True # self.embed_static.weight.requires_grad = False if args.wide_conv is True: print("using wide convolution") self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1), padding=(K//2, 0), bias=True) for K in Ks] else: print("using narrow convolution") self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), bias=True) for K in Ks] # self.convs1 = [nn.Conv2d(Ci, D, (K, D), stride=1, padding=(K // 2, 0)) for K in Ks] print(self.convs1) if args.init_weight: print("Initing W .......") for conv in self.convs1: init.xavier_normal(conv.weight.data, gain=np.sqrt(args.init_weight_value)) init.uniform(conv.bias, 0, 0) ''' self.conv13 = nn.Conv2d(Ci, Co, (3, D)) self.conv14 = nn.Conv2d(Ci, Co, (4, D)) self.conv15 = nn.Conv2d(Ci, Co, (5, D)) ''' self.dropout = nn.Dropout(args.dropout) in_fea = len(Ks) * Co self.fc1 = nn.Linear(in_features=in_fea, out_features=in_fea // 2, bias=True) self.fc2 = nn.Linear(in_features=in_fea // 2, out_features=C, bias=True) if args.batch_normalizations is True: print("using batch_normalizations in the model......") self.convs1_bn = nn.BatchNorm2d(num_features=Co, momentum=args.bath_norm_momentum, affine=args.batch_norm_affine) self.fc1_bn = nn.BatchNorm1d(num_features=in_fea//2, momentum=args.bath_norm_momentum, affine=args.batch_norm_affine) self.fc2_bn = nn.BatchNorm1d(num_features=C, momentum=args.bath_norm_momentum, affine=args.batch_norm_affine)
def weights_init(m): for _, mi in m._modules.items(): if isinstance(mi, nn.Conv2d) or isinstance(m, nn.Linear): xavier_normal(mi.weight.data) if mi.bias is not None: xavier_normal(mi.bias.data)
def __init__(self, d_in, d_out, bias=True): super(XavierLinear, self).__init__() self.linear = nn.Linear(d_in, d_out, bias=bias) init.xavier_normal(self.linear.weight)
def __init__(self, vocab_size, hidden_size): super(AnswerModule, self).__init__() self.z = nn.Linear(2 * hidden_size, vocab_size) init.xavier_normal(self.z.state_dict()['weight']) self.dropout = nn.Dropout(0.1)
def __init__(self): super(Discriminator, self).__init__() self.conv1 = nn.Conv2d(1,32,kernel_size=3,stride=2,padding=1) # 256x256 self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1) # 128x128 self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1) # 64x64 self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1) # 32x32 self.conv5 = nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1) # 16x16 self.conv6 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1) # 8x8 self.conv7 = nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1) # 4x4 self.conv8 = nn.Conv2d(512, 1024, kernel_size=4, stride=1, padding=0) # 1x1 self.bn1 = nn.BatchNorm2d(32) self.bn2 = nn.BatchNorm2d(64) self.bn3 = nn.BatchNorm2d(128) self.bn4 = nn.BatchNorm2d(256) self.bn5 = nn.BatchNorm2d(256) self.bn6 = nn.BatchNorm2d(512) self.bn7 = nn.BatchNorm2d(512) self.sigmoid = nn.Sigmoid() self.lrelu = nn.LeakyReLU(negative_slope=0.2) init.xavier_normal(self.conv1.weight, gain=np.sqrt(2)) init.constant(self.conv1.bias, 0.1) init.xavier_normal(self.conv2.weight, gain=np.sqrt(2)) init.constant(self.conv2.bias, 0.1) init.xavier_normal(self.conv3.weight, gain=np.sqrt(2)) init.constant(self.conv3.bias, 0.1) init.xavier_normal(self.conv4.weight, gain=np.sqrt(2)) init.constant(self.conv4.bias, 0.1) init.xavier_normal(self.conv5.weight, gain=np.sqrt(2)) init.constant(self.conv5.bias, 0.1) init.xavier_normal(self.conv6.weight, gain=np.sqrt(2)) init.constant(self.conv6.bias, 0.1) init.xavier_normal(self.conv7.weight, gain=np.sqrt(2)) init.constant(self.conv7.bias, 0.1) init.xavier_normal(self.conv8.weight, gain=np.sqrt(2)) init.constant(self.conv8.bias, 0.1)
def weight_init(m): if isinstance(m, nn.Conv2d): init.xavier_normal(m.weight) init.constant(m.bias, 0)
def weights_init(m): classname = m.__class__.__name__ if 'Linear' in classname: init.xavier_normal(m.weight.data) init.constant(m.bias, 0.0)
def init_weight(self): init.xavier_normal(self.img_embed.weight) init.xavier_normal(self.att_embed.weight)
def init_weights(layer): if isinstance(layer, nn.Linear): xavier_normal(layer.weight.data)
def xavier_init(model): for param in model.parameters(): if len(param.size()) == 2: xavier_normal(param)
def __init__(self, dimensions, **kwargs): super(VAE, self).__init__() assert len(dimensions) > 1 # unpack dimension of vae self.embedding_dim = dimensions[0] self.hidden_dims = dimensions[1:-1] self.latent_dim = dimensions[-1] self.dec_final_act = kwargs['decoder_final_activation'] self.device = torch.device('cuda' if ( torch.cuda.is_available()) else 'cpu') self.is_logits = kwargs.get('logits', False) if self.is_logits: self.resconstruction_loss = nn.modules.loss.MSELoss() else: self.resconstruction_loss = self.binary_cross_entropy # Construct layers for encoder and decoder block # Encoder block self.enc_hidden_layers = nn.Sequential() self.enc_hidden_layers.add_module( 'hidden_layer_0', nn.Linear(self.embedding_dim, self.hidden_dims[0])) self.enc_hidden_layers.add_module('h_layer_act_0', nn.ReLU()) for i, _ in enumerate(self.hidden_dims[:-1]): self.enc_hidden_layers.add_module( 'hidden_layer_{}'.format(i + 1), nn.Linear(self.hidden_dims[i], self.hidden_dims[i + 1])), self.enc_hidden_layers.add_module('h_layer_act_{}'.format(i + 1), nn.ReLU()) # define mean and log variance of vae self.z_mean = nn.Linear(self.hidden_dims[-1], self.latent_dim) self.z_log_var = nn.Linear(self.hidden_dims[-1], self.latent_dim) # ~Encoder block # Decoder block dec_hidden_layers = nn.Sequential() dec_hidden_layers.add_module( 'hidden_layer_0', nn.Linear(self.latent_dim, self.hidden_dims[-1])) dec_hidden_layers.add_module('h_layer_act_0', nn.ReLU()) reversed_hidden_dims = list(reversed(self.hidden_dims)) for i, _ in enumerate(reversed(self.hidden_dims)): if i == (len(reversed_hidden_dims) - 1): dec_hidden_layers.add_module( 'hidden_layer_{}'.format(i + 1), nn.Linear(reversed_hidden_dims[i], self.embedding_dim)), else: dec_hidden_layers.add_module( 'hidden_layer_{}'.format(i + 1), nn.Linear(reversed_hidden_dims[i], reversed_hidden_dims[i + 1])), dec_hidden_layers.add_module('h_layer_act_{}'.format(i + 1), nn.ReLU()) # Final activation function of decoder depends on data if self.dec_final_act is not None: if self.dec_final_act == 'sigmoid': dec_hidden_layers.add_module('dec_final_act', nn.Sigmoid()) elif self.dec_final_act == 'tanh': dec_hidden_layers.add_module('dec_final_act', nn.Tanh()) elif self.dec_final_act == 'relu': dec_hidden_layers.add_module('dec_final_act', nn.ReLU()) else: pass self.decoder = dec_hidden_layers # ~ Decoder block for m in self.modules(): if isinstance(m, nn.Linear): init.xavier_normal(m.weight.data) if m.bias is not None: m.bias.data.zero_() self.to(self.device)
def main(): print("Loading data from '%s'" % opt.data) dataset = torch.load(opt.data) if opt.model_type == 'nmt': if dataset.get("type", "text") not in ["bitext", "text"]: print("WARNING: The provided dataset is not bilingual!") elif opt.model_type == 'lm': if dataset.get("type", "text") != 'monotext': print("WARNING: The provided dataset is not monolingual!") else: raise NotImplementedError('Not valid model type %s' % opt.model_type) dict_checkpoint = (opt.train_from if opt.train_from else opt.train_from_state_dict) if dict_checkpoint: print('Loading dicts from checkpoint at %s' % dict_checkpoint) checkpoint = torch.load(dict_checkpoint) if opt.model_type == 'nmt': assert checkpoint.get('type', None) is None or \ checkpoint['type'] == "nmt", \ "The loaded model is not neural machine translation!" elif opt.model_type == 'lm': assert checkpoint['type'] == "lm", \ "The loaded model is not a language model!" dataset['dicts'] = checkpoint['dicts'] trainData = onmt.Dataset(dataset['train']['src'], dataset['train']['tgt'], opt.batch_size, opt.gpus, data_type=dataset.get("type", "text")) validData = onmt.Dataset(dataset['valid']['src'], dataset['valid']['tgt'], opt.batch_size, opt.gpus, volatile=True, data_type=dataset.get("type", "text")) dicts = dataset['dicts'] model_opt = checkpoint['opt'] if dict_checkpoint else opt if dicts.get('tgt', None) is None: # Makes the code compatible with the language model dicts['tgt'] = dicts['src'] if opt.model_type == 'nmt': print(' * vocabulary size. source = %d; target = %d' % (dicts['src'].size(), dicts['tgt'].size())) elif opt.model_type == 'lm': print(' * vocabulary size = %d' % (dicts['src'].size())) print(' * number of training sentences. %d' % len(dataset['train']['src'])) print(' * maximum batch size. %d' % opt.batch_size) print('Building model...') if opt.model_type == 'nmt': decoder = onmt.Decoders.getDecoder(model_opt.decoder_type)( model_opt, dicts['tgt']) encoder = onmt.Encoders.getEncoder(model_opt.encoder_type)( model_opt, dicts['src']) model = onmt.Models.NMTModel(encoder, decoder) elif opt.model_type == 'lm': model = onmt.LanguageModel.LM(model_opt, dicts['src']) generator = nn.Sequential( nn.Linear(model_opt.rnn_size, dicts['tgt'].size()), nn.LogSoftmax()) if opt.train_from: print('Loading model from checkpoint at %s' % opt.train_from) chk_model = checkpoint['model'] generator_state_dict = chk_model.generator.state_dict() model_state_dict = { k: v for k, v in chk_model.state_dict().items() if 'generator' not in k } model.load_state_dict(model_state_dict) generator.load_state_dict(generator_state_dict) opt.start_epoch = checkpoint['epoch'] + 1 if opt.train_from_state_dict: print('Loading model from state_dict at %s' % opt.train_from_state_dict) model.load_state_dict(checkpoint['model']) generator.load_state_dict(checkpoint['generator']) model_opt.start_epoch = opt.start_epoch model_opt.epochs = opt.epochs if len(opt.gpus) >= 1: model.cuda() generator.cuda() else: model.cpu() generator.cpu() if len(opt.gpus) > 1: model = nn.DataParallel(model, device_ids=opt.gpus, dim=1) generator = nn.DataParallel(generator, device_ids=opt.gpus, dim=0) model_opt["gpus"] = opt.gpus model.generator = generator if not opt.train_from_state_dict and not opt.train_from: for p in model.parameters(): #p.data.uniform_(-opt.param_init, opt.param_init) if len(p.data.size()) > 1: init.xavier_normal(p.data) else: p.data.uniform_(-opt.param_init, opt.param_init) model.initialize_parameters(opt.param_init) model.load_pretrained_vectors(opt) if (not opt.train_from_state_dict and not opt.train_from) or opt.change_optimizer: optim = onmt.Optim(opt.optim, opt.learning_rate, opt.max_grad_norm, lr_decay=opt.learning_rate_decay, start_decay_at=opt.start_decay_at) optim.set_parameters(model.parameters()) model_opt.learning_rate = opt.learning_rate model_opt.learning_rate_decay = opt.learning_rate_decay model_opt.save_each = opt.save_each else: print('Loading optimizer from checkpoint:') optim = checkpoint['optim'] optim.optimizer.load_state_dict( checkpoint['optim'].optimizer.state_dict()) optim.set_parameters(model.parameters()) nParams = sum([p.nelement() for p in model.parameters()]) print('* number of parameters: %d' % nParams) if opt.train_from or opt.train_from_state_dict: print(model_opt) model_opt.use_learning_rate_decay = opt.use_learning_rate_decay trainModel(model, trainData, validData, dataset, optim, model_opt)
def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: xavier_normal(m.weight.data) xavier_normal(m.bias.data)
def init_linear(linear): init.xavier_normal(linear.weight) linear.bias.data.zero_()
def __init__(self, args): super(CNN_Text, self).__init__() self.args = args V = args.embed_num D = args.embed_dim C = args.class_num Ci = 1 Co = args.kernel_num Ks = args.kernel_sizes if args.max_norm is not None: print("max_norm = {} ".format(args.max_norm)) self.embed = nn.Embedding(V, D, max_norm=args.max_norm, scale_grad_by_freq=True) # self.embed.weight.data.uniform(-0.1, 0.1) else: print("max_norm = {} ".format(args.max_norm)) self.embed = nn.Embedding(V, D, scale_grad_by_freq=True) if args.word_Embedding: pretrained_weight = np.array(args.pretrained_weight) self.embed.weight.data.copy_(torch.from_numpy(pretrained_weight)) # fixed the word embedding self.embed.weight.requires_grad = True print("dddd {} ".format(self.embed.weight.data.size())) if args.wide_conv is True: print("using wide convolution") self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), stride=(1, 1), padding=(K//2, 0), dilation=1, bias=False) for K in Ks] else: print("using narrow convolution") self.convs1 = [nn.Conv2d(in_channels=Ci, out_channels=Co, kernel_size=(K, D), bias=True) for K in Ks] # self.convs1 = [nn.Conv2d(Ci, D, (K, D), stride=1, padding=(K // 2, 0)) for K in Ks] print(self.convs1) # for con in self.convs1: # print("PP {} ".format(con.weight)) if args.init_weight: print("Initing W .......") for conv in self.convs1: init.xavier_normal(conv.weight.data, gain=np.sqrt(args.init_weight_value)) fan_in, fan_out = CNN_Text.calculate_fan_in_and_fan_out(conv.weight.data) print(" in {} out {} ".format(fan_in, fan_out)) std = np.sqrt(args.init_weight_value) * np.sqrt(2.0 / (fan_in + fan_out)) print("aaaaaaaaaaaaa {} ".format(std)) # init.uniform(conv.bias, 0, 0) self.dropout = nn.Dropout(args.dropout) self.dropout_embed = nn.Dropout(args.dropout_embed) in_fea = len(Ks) * Co # self.fc1 = nn.Linear(in_features=in_fea, out_features=in_fea // 2, bias=True) # self.fc2 = nn.Linear(in_features=in_fea // 2, out_features=C, bias=True) self.fc = nn.Linear(in_features=in_fea, out_features=C, bias=True) # whether to use batch normalizations if args.batch_normalizations is True: print("using batch_normalizations in the model......") self.convs1_bn = nn.BatchNorm2d(num_features=Co, momentum=args.bath_norm_momentum, affine=args.batch_norm_affine) self.fc1_bn = nn.BatchNorm1d(num_features=in_fea//2, momentum=args.bath_norm_momentum, affine=args.batch_norm_affine) self.fc2_bn = nn.BatchNorm1d(num_features=C, momentum=args.bath_norm_momentum, affine=args.batch_norm_affine)