def forward(self, x): x = self.linear1(x) x = nn.tanh(x) x = self.linear2(x) x = nn.tanh(x) x = self.linear3(x) return x
def __init__(self, attention_dim, embed_dim, decoder_dim, vocab_size, encoder_dim=2048, dropout=0.5): super(DecoderWithAttention, self).__init__() self.encoder_dim = encoder_dim self.attention_dim = attention_dim self.embed_dim = embed_dim self.decoder_dim = decoder_dim self.vocab_size = vocab_size self.dropout = dropout self.tanh = nn.tanh() self.attention = Attention(encoder_dim, decoder_dim, attention_dim) self.embedding = create_word_embedding() self.dropout = nn.Dropout(p=self.dropout) self.decode_step = nn.LSTMCell(embed_dim + encoder_dim, decoder_dim, bias=True) self.init_h = nn.Linear(encoder_dim, decoder_dim) self.init_c = nn.Linear(encoder_dim, decoder_dim) #self.f_beta = nn.Linear(decoder_dim, encoder_dim) self.attention_learner_1 = nn.Linear(decoder_dim, 1024) self.leaky_relu = nn.LeakyReLU(0.01) self.attention_learner_2 = nn.Linear(1024, encoder_dim) self.sigmoid = nn.Sigmoid() self.fc_1 = nn.Linear(decoder_dim, 1000) self.fc_2 = nn.Linear(1000, vocab_size) self.init_weights()
def __init__(self, input_size, hidden_dim, output_size): ''' This is the constructor class for the generator Arguments: - input_size : The hidden size of the vector of the latent sample - hidden_dim : The number of neurons for the last number of layers - output_size : The number neurons for the output layer ''' # Define the class variables self.input_size = input_size self.hidden_dim = hidden_dim self.output_size = output_size # Define the modules required by this class self.fc1 = nn.Linear(self.input_size, self.hidden_dim) self.fc2 = nn.Linear(self.hidden_dim, self.hidden_dim * 2) self.fc3 = nn.Linear(self.hidden_dim * 2, self.hidden_dim * 4) self.fc4 = nn.Linear(hidden_dim * 4, output_size) self.dropout = nn.Dropout(0.3) self.tanh = nn.tanh()
def __init__(self): super(EncDec, self).__init__() self.encoder = nn.Sequential( nn.Conv2d(1, 16, stride=2, padding=1, kernel_size=3), nn.BatchNorm2d(16), nn.ReLU(True), nn.Conv2d(16, 32, stride=2, padding=1, kernel_size=3), nn.BatchNorm2d(32), nn.ReLU(True), nn.Conv2d(32, 64, stride=2, padding=1, kernel_size=3), nn.BatchNorm2d(64), nn.ReLU(True), nn.Conv2d(64, 128, stride=2, padding=1, kernel_size=3), nn.BatchNorm2d(64), nn.ReLU(True), ) self.decoder = nn.Sequential( nn.ConvTranspose2d(128, 64, stride=2, padding=1, kernel_size=3), nn.BatchNorm2d(64), nn.ReLU(True), nn.ConvTranspose2d(64, 32, stride=2, padding=1, kernel_size=3), nn.BatchNorm2d(32), nn.ReLU(True), nn.ConvTranspose2d(32, 16, stride=2, padding=1, kernel_size=3), nn.BatchNorm2d(16), nn.ReLU(True), nn.ConvTranspose2d(16, 1, stride=2, padding=1, kernel_size=3), nn.tanh())
def forward(self, state): h = state for layer in self.policy: h = layer(h) h = nn.ReLU(h) mean = nn.tanh(self.mean(h)) * self.action_scale + self.action_bias return mean
def forward(self, x, res): x = self.unpool3(x) x = self.conv3(torch.cat([x, res[2]], dim=1)) x = self.unpool2(x) x = self.conv2(torch.cat([x, res[1]], dim=1)) x = self.unpool1(x) x = self.conv1(torch.cat([x, res[0]], dim=1)) if self.use_tanh: x = nn.tanh()(x) return x
def forward(self, H): size = H.size() #expected = [batch_size,19,3,512] print("Size:") print(size) x = nn.tanh(nn.bmm(H.view(size[0], size[1] * size[2]), self.W1)) x = nn.bmm(self.W2, x) A = nn.Softmax(x, dim=0) E = nn.bmm(torch.transpose(A, 1, 2), H) return E
def __init__(self, args): super(MLP, self).__init__() self.hidden_sizes = args.classifier_hidden_sizes self.actv_fun = args.classifier_actv_fun self.dropout_rate = args.classifier_dropout_rate self.batch_norm = args.classifier_batch_norm self.num_classes = args.num_classes self.encoder_name = args.encoder assert self.encoder_name in ["BoW", "LSTM", "backwardLSTM", "biLSTM", "biLSTM_maxp", "biLSTM_minmax"] assert self.actv_fun in ["ReLU", "tanh", "linear"] if self.encoder_name == "BoW": self.n_dim = 4 * args.emb_dim self.encoder = BoW() elif self.encoder_name == "LSTM": self.n_dim = 4 * args.lstm_hidden_size self.encoder = LSTM_encoder(False, args.emb_dim, args.lstm_hidden_size, args.lstm_num_layers, args.lstm_dropout_rate) elif self.encoder_name == "biLSTM": self.n_dim = 4 * 2 * args.lstm_hidden_size self.encoder = LSTM_encoder(True, args.emb_dim, args.lstm_hidden_size, args.lstm_num_layers, args.lstm_dropout_rate) elif self.encoder_name == "biLSTM_maxp": self.n_dim = 4 * 2 * args.lstm_hidden_size self.encoder = biLSTM_maxp_encoder(args.lstm_hidden_size, args.batch_size, args.emb_dim, args.lstm_num_layers, args.lstm_dropout_rate) modules = [] self.hidden_sizes = [self.n_dim] + self.hidden_sizes n_layers = len(self.hidden_sizes) for i in range(n_layers - 1): modules.append(nn.Linear(self.hidden_sizes[i], self.hidden_sizes[i + 1])) # Activation layer if self.actv_fun == "ReLU": modules.append(nn.ReLU()) elif self.actv_fun == "tanh": modules.append(nn.tanh()) if self.dropout_rate: modules.append(nn.Dropout(p = self.dropout_rate)) if self.batch_norm: modules.append(nn.BatchNorm1d(self.hidden_sizes[i + 1])) modules.append(nn.Linear(self.hidden_sizes[-1], self.num_classes)) self.layers = nn.Sequential(*modules)
def forward(self, x): # ndocs is the batch size, i.e., number of documents in a batch ndocs = x.size(0) doc_len = x.size(1) sent_len = x.size(2) word_len = x.size(3) # x will have shape (ndocs, doc_len, sent_len,word_len) # Get the embeddings of the words; embeddings will be of shape (ndocs, doc_len, sent_len, word_len, emb_dim) x = self.emb_layer(x) char_dim = x.size(-1) x = x.reshape((-1, 1, word_len, char_dim)) print('shape before conv', x.shape) x1 = self.char_conv1(x) x1 = self.char_pool1(x1).squeeze() x2 = self.char_pool2(self.char_conv2(x)).squeeze() x3 = self.char_pool3(self.char_conv3(x)).squeeze().reshape( 1200000, 32, 1) x4 = self.char_pool4(self.char_conv4(x)).squeeze().reshape( 1200000, 32, 1) x5 = self.char_pool5(self.char_conv5(x)).squeeze().reshape( 1200000, 32, 1) x6 = self.char_pool6(self.char_conv6(x)).squeeze().reshape( 1200000, 32, 1) x = torch.cat((x1, x2, x3, x4, x5, x6), 2) #x=x.squeeze() x = x.reshape(-1, 1, 1, word_len) #Pass through the word GRU. It expects input in the form (batch, seq_len, input_size) print('shape for word conv', x.shape) p1 = self.word_conv1(x) print('p1 shape', p1.shape) p1 = self.word_pool1(p1) p2 = self.word_pool2(self.word_conv2(x)) print('after word shapes are', p1.shape, p2.shape) x = torch.cat((p1, p2), 2) x = x.squeeze() #word_rep x = x.reshape(-1, sent_len, 1) #Pass through the sentence GRU. It expects input in the form (batch, seq_len, input_size) x, _ = self.sent_GRU(x) #Average pool to get Document representation doc_rep = x.reshape(-1, 1, doc_len, 1) #Pass the doc_rep through a linear layer and tanh non-linearity doc_rep = nn.tanh(self.doc_linear(doc_rep)) return doc_rep
def __init__(self): super(G, self).__init__() self.linear1 = nn.Linear(100, 4*4*512) self.deconvs = nn.Sequential() layer1 = nn.ConvTranspose2d(512, 256, 5, padding=2, stride=2) layer2 = nn.ConvTranspose2d(256, 128, 5, padding=2, stride=2) layer3 = nn.ConvTranspose2d(128, 3, 5, padding=2, stride=2) nl = nn.LeakyReLU(negative_slope=0.2) self.deconvs.append(layer1) self.deconvs.append(nl) self.deconvs.append(layer2) self.deconvs.append(nl) self.deconvs.append(layer3) self.deconvs.append(nn.tanh()) return
def __init__(self, input_size=784, hidden_size=500, encoding_size=2, activation='relu'): super(VAE, self).__init__() # ENCODER self.fc1 = nn.Linear(input_size, hidden_size) self.fc21 = nn.Linear(hidden_size, encoding_size) # mean of z|x self.fc22 = nn.Linear(hidden_size, encoding_size) # std of z|x # DECODER self.fc3 = nn.Linear(encoding_size, hidden_size) self.fc4 = nn.Linear(hidden_size, input_size) self.encoding_size = encoding_size self.sigmoid = nn.Sigmoid() if activation == 'tanh': self.activation = nn.tanh() else: self.activation = nn.ReLU()
def define_PRSNet(input_nc, output_nc, conv_layers, num_plane, num_quat, biasTerms, useBn, activation, init_gain=0.02, gpu_ids=[]): if activation == 'relu': ac_fun = nn.relu() elif activation == 'tanh': ac_fun = nn.tanh() elif activation == 'lrelu': ac_fun = nn.LeakyReLU(0.2, True) if useBn: print('using batch normalization') net = PRSNet(input_nc, output_nc, conv_layers, num_plane, num_quat, biasTerms, useBn, ac_fun) return init_net(net, init_gain, gpu_ids)
def __init__(self): super(SelectionModel, self).__init__() self.layer = nn.Sequential( nn.Conv2d(18, 64, 3, stride=1, dilation=2), nn.ELU(), nn.BatchNorm2d(64), nn.Conv2d(64, 128, 3, stride=1, dilation=4), nn.ELU(), nn.BatchNorm2d(64), nn.Conv2d(128, 128, 3, stride=1, dilation=8), nn.ELU(), nn.BatchNorm2d(128), nn.Conv2d(128, 128, 3, stride=1, dilation=16), nn.ELU(), nn.BatchNorm2d(128), nn.Conv2d(128, 64, 3, stride=1), nn.ELU(), nn.BatchNorm2d(64), nn.Conv2d(64, 32, 3, stride=1), nn.ELU(), nn.BatchNorm2d(32), nn.Conv2d(32, 4, 3, stride=1), nn.tanh() )
def __init__(self,code_size,img_size,kernel_size = 4, num_input_channels = 3,num_feature_maps = 64 , batch_nomr = True): super(Encoder, self).__init__() if is_power2(max(img_size)): stable_dim = max(img_size) else: stale_dim = min(img_size) if isinstance(img_size,tuple): self.img_size = img_sizeself.final_size = tuple(int(4**x //stable_dim) for x in self.img_size) else: self.img_size = (img_size,img_size) self.final_size = (4,4) self.code_size = code_size self.num_feature_maps = num_feature_mapsself.cl = nn.ModuleList() self.num_layers = int(np.log2(mac(self.img_ssize))) - 2 stride = 2 padding = calcualte_padding(kernel_size, stride) if batch_nomr: self.cl.append(nn.Sequential( nn.Conv2d(self.channels[-1,self.channels[-1]*2,kernel_size, stride=stride, padding= padding // 2,bias = False), nn.BatchNorm2d(self.channels[-1]*2), nn.ReLU(True) )) else: self.cl.append(nn.Sequential( nn.Conv2d(self.channels[-1],self,channels[-1]*2, kernel_size stride = stride, padding = padding //2, bias = False ), nn.ReLU(True) )) self.channels.append(2*self.channels[-1]) for i in range(self.num_layers - 1): if batch_nomr: self.cl.append(nn.Sequential( nn.Conv2d(self.channels[-1],self.channels[-1]* 2, kernel_size , stride = stride, padding= padding //2, bias = False), nn.BatchNorm2d(self.channels[-1]* 2), nn.ReLU(True) )) else: self.cl.append(nn.Sequential( nn.Conv2d(self.channels[-1],self.channels[-1]* 2, kernel_size , stride = stride, padding= padding //2, bias = False), nn.ReLU(True) )) self.channels.append(2 * self.channels[-1]) self.cl.append(nn.Sequential ( nn.Conv1d(self.chnnels[-1],code_size,self.final_size, stride = 1, padding = 0, bias =False), nn.tanh() )) def forward(self, x retian_intermediate= False): if retain_intermediate: h = [x] for conv_layer in self.cl: h.append(conv_layer(h[-1])) return h[-1].view(-1,self.code_size),h[1:-1] else: for conv_layer in self.cl: x = conv_layer(x) return x.view(-1, self.code_size)
def forward(self, x): value = self.critic(x) mu = nn.tanh(self.actor(x)) std = self.log_std.exp().expand_as(mu) dist = Normal(mu, std) return dist, value
def __init__(self, n, vocab_size, dim, h): super(Net, self).__init__() self.embedding = nn.Embedding(vocab_size, dim) self.linear = nn.linear(dim, vocab_size) self.tanh = nn.tanh() self.softmax = nn.softmax()
def forward(self, x): return x * nn.tanh(nn.softplus(x))
def __init__(self, opt): """Initialize model.""" super(Seq2SeqModel, self).__init__() self.vocab_size = opt.vocab_size self.emb_dim = opt.word_vec_size self.num_directions = 2 if opt.bidirectional else 1 self.encoder_size = opt.encoder_size self.decoder_size = opt.decoder_size #self.ctx_hidden_dim = opt.rnn_size self.batch_size = opt.batch_size self.bidirectional = opt.bidirectional self.enc_layers = opt.enc_layers self.dec_layers = opt.dec_layers self.dropout = opt.dropout self.bridge = opt.bridge self.one2many_mode = opt.one2many_mode self.one2many = opt.one2many self.coverage_attn = opt.coverage_attn self.copy_attn = opt.copy_attention self.pad_idx_src = opt.word2idx[pykp.io.PAD_WORD] self.pad_idx_trg = opt.word2idx[pykp.io.PAD_WORD] self.bos_idx = opt.word2idx[pykp.io.BOS_WORD] self.eos_idx = opt.word2idx[pykp.io.EOS_WORD] self.unk_idx = opt.word2idx[pykp.io.UNK_WORD] self.sep_idx = opt.word2idx[pykp.io.SEP_WORD] self.orthogonal_loss = opt.orthogonal_loss self.share_embeddings = opt.share_embeddings self.review_attn = opt.review_attn self.attn_mode = opt.attn_mode self.use_target_encoder = opt.use_target_encoder self.target_encoder_size = opt.target_encoder_size self.device = opt.device self.separate_present_absent = opt.separate_present_absent self.goal_vector_mode = opt.goal_vector_mode self.goal_vector_size = opt.goal_vector_size self.manager_mode = opt.manager_mode self.title_guided = opt.title_guided if self.separate_present_absent: self.peos_idx = opt.word2idx[pykp.io.PEOS_WORD] ''' self.attention_mode = opt.attention_mode # 'dot', 'general', 'concat' self.input_feeding = opt.input_feeding self.copy_attention = opt.copy_attention # bool, enable copy attention or not self.copy_mode = opt.copy_mode # same to `attention_mode` self.copy_input_feeding = opt.copy_input_feeding self.reuse_copy_attn = opt.reuse_copy_attn self.copy_gate = opt.copy_gate self.must_teacher_forcing = opt.must_teacher_forcing self.teacher_forcing_ratio = opt.teacher_forcing_ratio self.scheduled_sampling = opt.scheduled_sampling self.scheduled_sampling_batches = opt.scheduled_sampling_batches self.scheduled_sampling_type = 'inverse_sigmoid' # decay curve type: linear or inverse_sigmoid self.current_batch = 0 # for scheduled sampling self.device = opt.device if self.scheduled_sampling: logging.info("Applying scheduled sampling with %s decay for the first %d batches" % (self.scheduled_sampling_type, self.scheduled_sampling_batches)) if self.must_teacher_forcing or self.teacher_forcing_ratio >= 1: logging.info("Training with All Teacher Forcing") elif self.teacher_forcing_ratio <= 0: logging.info("Training with All Sampling") else: logging.info("Training with Teacher Forcing with static rate=%f" % self.teacher_forcing_ratio) self.get_mask = GetMask(self.pad_idx_src) ''' ''' self.embedding = nn.Embedding( self.vocab_size, self.emb_dim, self.pad_idx_src ) ''' if self.title_guided: self.encoder = RNNEncoderTG( vocab_size=self.vocab_size, embed_size=self.emb_dim, hidden_size=self.encoder_size, num_layers=self.enc_layers, bidirectional=self.bidirectional, pad_token=self.pad_idx_src, dropout=self.dropout ) else: self.encoder = RNNEncoderBasic( vocab_size=self.vocab_size, embed_size=self.emb_dim, hidden_size=self.encoder_size, num_layers=self.enc_layers, bidirectional=self.bidirectional, pad_token=self.pad_idx_src, dropout=self.dropout ) self.decoder = RNNDecoder( vocab_size=self.vocab_size, embed_size=self.emb_dim, hidden_size=self.decoder_size, num_layers=self.dec_layers, memory_bank_size=self.num_directions * self.encoder_size, coverage_attn=self.coverage_attn, copy_attn=self.copy_attn, review_attn=self.review_attn, pad_idx=self.pad_idx_trg, attn_mode=self.attn_mode, dropout=self.dropout, use_target_encoder=self.use_target_encoder, target_encoder_size=self.target_encoder_size, goal_vector_mode=self.goal_vector_mode, goal_vector_size=self.goal_vector_size ) if self.use_target_encoder: self.target_encoder = TargetEncoder( embed_size=self.emb_dim, hidden_size=self.target_encoder_size, vocab_size=self.vocab_size, pad_idx=self.pad_idx_trg ) # use the same embedding layer as that in the decoder self.target_encoder.embedding.weight = self.decoder.embedding.weight self.target_encoder_attention = Attention( self.target_encoder_size, memory_bank_size=self.num_directions * self.encoder_size, coverage_attn=False, attn_mode="general" ) if self.bridge == 'dense': self.bridge_layer = nn.Linear(self.encoder_size * self.num_directions, self.decoder_size) elif opt.bridge == 'dense_nonlinear': self.bridge_layer = nn.tanh(nn.Linear(self.encoder_size * self.num_directions, self.decoder_size)) else: self.bridge_layer = None if self.bridge == 'copy': assert self.encoder_size * self.num_directions == self.decoder_size, 'encoder hidden size and decoder hidden size are not match, please use a bridge layer' if self.separate_present_absent and self.goal_vector_mode > 0: if self.manager_mode == 2: # use GRU as a manager self.manager = nn.GRU(input_size=self.decoder_size, hidden_size=self.goal_vector_size, num_layers=1, bidirectional=False, batch_first=False, dropout=self.dropout) self.bridge_manager = opt.bridge_manager if self.bridge_manager: self.manager_bridge_layer = nn.Linear(self.encoder_size * self.num_directions, self.goal_vector_size) else: self.manager_bridge_layer = None elif self.manager_mode == 1: # use two trainable vectors only self.manager = ManagerBasic(self.goal_vector_size) if self.share_embeddings: self.encoder.embedding.weight = self.decoder.embedding.weight self.init_weights()
def __init__(self, fsdh_input_dim, fsdh_hidden_1, fsdh_hidden_2, fsdh_out_dim, layers, beta=0.3, gamma=1e-3, alpha=1.0, mu=0.1, nbits=32, batch_size=100): super(My_model, self).__init__() # FX_matrix是原数据 Y_matrix是标签 self.layers = layers self.beta = beta self.gamma = gamma self.alpha = alpha self.mu = mu self.nbits = nbits self.batch_size = batch_size self.layer1 = nn.Sequential(nn.Linear(fsdh_input_dim, fsdh_hidden_1), nn.BatchNorm1d(fsdh_hidden_1), nn.ReLU()) self.layer2 = nn.Sequential(nn.Linear(fsdh_hidden_1, fsdh_hidden_2), nn.BatchNorm1d(fsdh_hidden_2), nn.ReLU()) self.layer3 = nn.Linear(fsdh_hidden_2, fsdh_out_dim) # self.W = nn.ParameterList() # self.b = nn.ParameterList() # for k in range(self.layers): # self.W = nn.Parameter(torch.eye(self.nbits,fsdh_out_dim,dtype=torch.float32)) # self.b = nn.Parameter(torch.eye(self.nbits, self.batch_size, dtype=torch.float32)) # self.layer_init = nn.Sequential(nn.Linear(fsdh_input_dim, self.nbits), nn.BatchNorm1d(self.nbits), nn.tanh())
def forward(self, x, squash = False): if squash: x = nn.tanh(x) return self.out(x)
def __init__(self, in_dim, hidden, num_action): self.model = nn.Sequential(nn.Linear(in_dim, hidden), nn.tanh(), nn.Linear(hidden, num_action)) self.softmax = nn.Softmax(dim=-1)
#code for conditional attention(bahadanau attn). import torch from torch import nn import torch.nn.functional as F self._weight = nn.Parameter(torch.FloatTensor(100,1)) #VARIABLE self._act = nn.tanh() #VARIABLE self.lin_1 = nn.Linear(var_1_feat_size, 100, bias=False) #VARIABLE self.lin_2 = nn.Linear(var_2_feat_size, 100, bias=False) #VARIABLE def context(var_2, var_1): ''' var_1 - on which attention is conditioned. var_2 - on which conditional attention is applied. ''' attn_wt = F.softmax((self._act(self.lin_2(var_2) + self.lin_1(var_1))).matmul(self.weight), dim = 1) return torch.mul(attn_wt, var_2) #VARIABLE
def gelu(features: torch.Tensor, approximate: bool = False): if approximate: return 0.5 * features * (1.0 + nn.tanh(0.7978845608028654 * (features + 0.044715 * (features ** 3)))) else: return 0.5 * features * (1.0 + torch.erf(features / 1.4142135623730951))