def __init__(self, args, vocab, transition_system): super(Parser, self).__init__() self.args = args self.vocab = vocab self.transition_system = transition_system self.grammar = self.transition_system.grammar # Embedding layers self.src_embed = nn.Embedding(len(vocab.source), args.embed_size) self.production_embed = nn.Embedding( len(transition_system.grammar) + 1, args.action_embed_size) self.primitive_embed = nn.Embedding(len(vocab.primitive), args.action_embed_size) self.field_embed = nn.Embedding(len(transition_system.grammar.fields), args.field_embed_size) self.type_embed = nn.Embedding(len(transition_system.grammar.types), args.type_embed_size) nn.init.xavier_normal(self.src_embed.weight.data) nn.init.xavier_normal(self.production_embed.weight.data) nn.init.xavier_normal(self.primitive_embed.weight.data) nn.init.xavier_normal(self.field_embed.weight.data) nn.init.xavier_normal(self.type_embed.weight.data) # LSTMs if args.lstm == 'lstm': self.encoder_lstm = nn.LSTM(args.embed_size, args.hidden_size // 2, bidirectional=True) self.decoder_lstm = nn.LSTMCell( args.action_embed_size + # previous action args.action_embed_size + args.field_embed_size + args.type_embed_size + # frontier info args.hidden_size + # parent hidden state args.hidden_size, # input feeding args.hidden_size) else: from .lstm import LSTM, LSTMCell self.encoder_lstm = LSTM(args.embed_size, args.hidden_size // 2, bidirectional=True, dropout=args.dropout) self.decoder_lstm = LSTMCell( args.action_embed_size + # previous action args.action_embed_size + args.field_embed_size + args.type_embed_size + # frontier info args.hidden_size + args.hidden_size, # parent hidden state args.hidden_size, dropout=args.dropout) # pointer net self.src_pointer_net = PointerNet(args.hidden_size, args.hidden_size) self.primitive_predictor = nn.Linear(args.hidden_size, 2) # initialize the decoder's state and cells with encoder hidden states self.decoder_cell_init = nn.Linear(args.hidden_size, args.hidden_size) # attention: dot product attention # project source encoding to decoder rnn's h space self.att_src_linear = nn.Linear(args.hidden_size, args.hidden_size, bias=False) # transformation of decoder hidden states and context vectors before reading out target words # this produces the `attentional vector` in (Luong et al., 2015) self.att_vec_linear = nn.Linear(args.hidden_size + args.hidden_size, args.hidden_size, bias=False) # embedding layers self.query_vec_to_embed = nn.Linear(args.hidden_size, args.embed_size, bias=False) self.production_readout_b = nn.Parameter( torch.FloatTensor(len(transition_system.grammar) + 1).zero_()) self.tgt_token_readout_b = nn.Parameter( torch.FloatTensor(len(vocab.primitive)).zero_()) self.production_readout = self.production_readout_func self.tgt_token_readout = self.tgt_token_readout_func # self.production_readout = nn.Linear(args.hidden_size, len(transition_system.grammar) + 1) # self.tgt_token_readout = nn.Linear(args.hidden_size, len(vocab.primitive)) # dropout layer self.dropout = nn.Dropout(args.dropout) if args.cuda: self.new_long_tensor = torch.cuda.LongTensor self.new_tensor = torch.cuda.FloatTensor else: self.new_long_tensor = torch.LongTensor self.new_tensor = torch.FloatTensor
def __init__(self, input_size, hidden_size, noisin=0): super().__init__() self.cell = nn.LSTMCell(input_size=input_size, hidden_size=hidden_size) self.reset_parameters()
def __init__(self): super(Sequence, self).__init__() self.lstm1 = nn.LSTMCell(1, 51) self.lstm2 = nn.LSTMCell(51, 51) self.linear = nn.Linear(51, 1)
def __init__(self, args): print("Encoder model --- LSTMCELL") super(Encoder_WordLstm, self).__init__() self.args = args # random self.char_embed = nn.Embedding( self.args.embed_char_num, self.args.embed_char_dim, sparse=False, padding_idx=self.args.create_alphabet.char_PaddingID) # for index in range(self.args.embed_char_dim): # self.char_embed.weight.data[self.args.create_alphabet.char_PaddingID][index] = 0 self.char_embed.weight.requires_grad = True self.bichar_embed = nn.Embedding( self.args.embed_bichar_num, self.args.embed_bichar_dim, sparse=False, padding_idx=self.args.create_alphabet.bichar_PaddingID) # for index in range(self.args.embed_bichar_dim): # self.bichar_embed.weight.data[self.args.create_alphabet.bichar_PaddingID][index] = 0 self.bichar_embed.weight.requires_grad = True # fix the word embedding self.static_char_embed = nn.Embedding( self.args.static_embed_char_num, self.args.embed_char_dim, sparse=False, padding_idx=self.args.create_static_alphabet.char_PaddingID) init.uniform(self.static_char_embed.weight, a=-np.sqrt(3 / self.args.embed_char_dim), b=np.sqrt(3 / self.args.embed_char_dim)) self.static_bichar_embed = nn.Embedding( self.args.static_embed_bichar_num, self.args.embed_bichar_dim, sparse=False, padding_idx=self.args.create_static_alphabet.bichar_PaddingID) init.uniform(self.static_bichar_embed.weight, a=-np.sqrt(3 / self.args.embed_bichar_dim), b=np.sqrt(3 / self.args.embed_bichar_dim)) # load external word embedding if args.char_Embedding is True: print("char_Embedding") pretrained_char_weight = np.array(args.pre_char_word_vecs) self.static_char_embed.weight.data.copy_( torch.from_numpy(pretrained_char_weight)) for index in range(self.args.embed_char_dim): self.static_char_embed.weight.data[ self.args.create_static_alphabet.char_PaddingID][index] = 0 self.static_char_embed.weight.requires_grad = False if args.bichar_Embedding is True: print("bichar_Embedding") pretrained_bichar_weight = np.array(args.pre_bichar_word_vecs) self.static_bichar_embed.weight.data.copy_( torch.from_numpy(pretrained_bichar_weight)) # print(self.static_bichar_embed.weight.data[self.args.create_static_alphabet.bichar_PaddingID]) # print(self.static_bichar_embed.weight.data[self.args.create_static_alphabet.bichar_UnkID]) for index in range(self.args.embed_bichar_dim): self.static_bichar_embed.weight.data[ self.args.create_static_alphabet. bichar_PaddingID][index] = 0 self.static_bichar_embed.weight.requires_grad = False self.lstm_left = nn.LSTMCell(input_size=self.args.hidden_size, hidden_size=self.args.rnn_hidden_dim, bias=True) self.lstm_right = nn.LSTMCell(input_size=self.args.hidden_size, hidden_size=self.args.rnn_hidden_dim, bias=True) # init lstm weight and bias init.xavier_uniform(self.lstm_left.weight_ih) init.xavier_uniform(self.lstm_left.weight_hh) init.xavier_uniform(self.lstm_right.weight_ih) init.xavier_uniform(self.lstm_right.weight_hh) value = np.sqrt(6 / (self.args.rnn_hidden_dim + 1)) self.lstm_left.bias_hh.data.uniform_(-value, value) self.lstm_left.bias_ih.data.uniform_(-value, value) self.lstm_right.bias_hh.data.uniform_(-value, value) self.lstm_right.bias_ih.data.uniform_(-value, value) self.hidden_l = self.init_hidden_cell(self.args.batch_size) self.hidden_r = self.init_hidden_cell(self.args.batch_size) self.dropout = nn.Dropout(self.args.dropout) self.dropout_embed = nn.Dropout(self.args.dropout_embed) self.input_dim = (self.args.embed_char_dim + self.args.embed_bichar_dim) * 2 if self.args.use_cuda is True: self.liner = nn.Linear(in_features=self.input_dim, out_features=self.args.hidden_size, bias=True).cuda() else: self.liner = nn.Linear(in_features=self.input_dim, out_features=self.args.hidden_size, bias=True) # init linear init.xavier_uniform(self.liner.weight) init_linear_value = np.sqrt(6 / (self.args.hidden_size + 1)) self.liner.bias.data.uniform_(-init_linear_value, init_linear_value)
def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2): """ Init NMT Model. @param embed_size (int): Embedding size (dimensionality) @param hidden_size (int): Hidden Size (dimensionality) @param vocab (Vocab): Vocabulary object containing src and tgt languages See vocab.py for documentation. @param dropout_rate (float): Dropout probability, for attention """ super(NMT, self).__init__() self.model_embeddings = ModelEmbeddings(embed_size, vocab) self.hidden_size = hidden_size self.dropout_rate = dropout_rate self.vocab = vocab # default values self.encoder = None self.decoder = None self.h_projection = None self.c_projection = None self.att_projection = None self.combined_output_projection = None self.target_vocab_projection = None self.dropout = None ### TODO - Initialize the following variables: ### self.encoder (Bidirectional LSTM with bias) ### self.decoder (LSTM Cell with bias) ### self.h_projection (Linear Layer with no bias), called W_{h} in the PDF. ### self.c_projection (Linear Layer with no bias), called W_{c} in the PDF. ### self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF. ### self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF. ### self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF. ### self.dropout (Dropout Layer) # encoder will be feed the word embeddings for the source sentence, and yield hidden states and cell states for both the forwards and backwards LSTMs self.encoder = nn.LSTM(input_size=embed_size, hidden_size=hidden_size, bidirectional=True, bias=True) # decoder is initialized with a linear projection of the engcoder's final hidden state and final cell state, and feed the matching target sentence word embeddings self.decoder = nn.LSTMCell(input_size=embed_size + hidden_size, hidden_size=hidden_size, bias=True) self.h_projection = nn.Linear(in_features=hidden_size * 2, out_features=hidden_size, bias=False) self.c_projection = nn.Linear(in_features=hidden_size * 2, out_features=hidden_size, bias=False) self.att_projection = nn.Linear(in_features=hidden_size * 2, out_features=hidden_size, bias=False) # transformation of decoder hidden states and context vectors before reading out target words # this produces the `attentional vector` in (Luong et al., 2015) self.combined_output_projection = nn.Linear( in_features=hidden_size * 2 + hidden_size, out_features=hidden_size, bias=False) # prediction layer of the target vocabulary self.target_vocab_projection = nn.Linear(in_features=hidden_size, out_features=len(vocab.tgt), bias=False) self.dropout = nn.Dropout(dropout_rate)
def __init__(self): super(LstmCell, self).__init__() self.LstmCell = nn.LSTMCell(input_size=256, hidden_size=256)
def __init__(self, input_dim, process_step=4): super(QueryEncoder, self).__init__() self.input_dim = input_dim self.process_step = process_step # self.batch_size = batch_size self.process = nn.LSTMCell(input_dim, 2*input_dim)
def __init__(self, in_channels, frame_channels, r, attn_type, attn_win, attn_norm, prenet_type, prenet_dropout, forward_attn, trans_agent, forward_attn_mask, location_attn, attn_K, separate_stopnet): super(Decoder, self).__init__() self.frame_channels = frame_channels self.r_init = r self.r = r self.encoder_embedding_dim = in_channels self.separate_stopnet = separate_stopnet self.max_decoder_steps = 1000 self.stop_threshold = 0.5 # model dimensions self.query_dim = 1024 self.decoder_rnn_dim = 1024 self.prenet_dim = 256 self.attn_dim = 128 self.p_attention_dropout = 0.1 self.p_decoder_dropout = 0.1 # memory -> |Prenet| -> processed_memory prenet_dim = self.frame_channels self.prenet = Prenet(prenet_dim, prenet_type, prenet_dropout, out_features=[self.prenet_dim, self.prenet_dim], bias=False) self.attention_rnn = nn.LSTMCell(self.prenet_dim + in_channels, self.query_dim, bias=True) self.attention = init_attn(attn_type=attn_type, query_dim=self.query_dim, embedding_dim=in_channels, attention_dim=128, location_attention=location_attn, attention_location_n_filters=32, attention_location_kernel_size=31, windowing=attn_win, norm=attn_norm, forward_attn=forward_attn, trans_agent=trans_agent, forward_attn_mask=forward_attn_mask, attn_K=attn_K) self.decoder_rnn = nn.LSTMCell(self.query_dim + in_channels, self.decoder_rnn_dim, bias=True) self.linear_projection = Linear(self.decoder_rnn_dim + in_channels, self.frame_channels * self.r_init) self.stopnet = nn.Sequential( nn.Dropout(0.1), Linear(self.decoder_rnn_dim + self.frame_channels * self.r_init, 1, bias=True, init_gain='sigmoid')) self.memory_truncated = None
def __init__(self, lstm_settings_dict, feature_size_dict={ 'acous': 0, 'visual': 0 }, batch_size=32, seq_length=200, prediction_length=60, embedding_info=[], seq_wind=10): super(LSTMPredictor, self).__init__() # General model settings self.batch_size = batch_size self.seq_length = seq_length self.feature_size_dict = feature_size_dict self.prediction_length = prediction_length # lstm_settings_dict self.lstm_settings_dict = lstm_settings_dict self.feature_size_dict['master'] = 0 if self.lstm_settings_dict['no_subnets']: for act_mod in self.lstm_settings_dict['active_modalities']: self.feature_size_dict['master'] += self.feature_size_dict[ act_mod] else: for act_mod in self.lstm_settings_dict['active_modalities']: self.feature_size_dict['master'] += self.lstm_settings_dict[ 'hidden_dims'][act_mod] self.num_layers = lstm_settings_dict['layers'] # embedding settings self.embedding_info = embedding_info self.embeddings = {'acous': [], 'visual': []} self.embedding_indices = {'acous': [], 'visual': []} self.embed_delete_index_list = {'acous': [], 'visual': []} self.embed_data_types = {'acous': [], 'visual': []} self.len_output_of_embeddings = {'acous': 0, 'visual': 0} self.embedding_flags = {} # attention self.attn = nn.Linear( self.feature_size_dict['visual'] + self.lstm_settings_dict['hidden_dims']['acous'] * 2, self.lstm_settings_dict['hidden_dims']['acous']).type(dtype) self.seq_wind = seq_wind # m = nn.Linear(20, 30); input = torch.randn(128, 20); output = m(input) ##################################### for modality in self.embedding_info.keys(): self.embedding_flags[modality] = bool( len(self.embedding_info[modality])) if self.embedding_flags[modality]: for embedding in self.embedding_info[modality]: self.len_output_of_embeddings[ modality] += 2 * embedding['embedding_out_dim'] for emb_func_indx in range(len(self.embedding_info[modality])): if self.embedding_info[modality][emb_func_indx][ 'embedding_use_func']: self.embeddings[modality].append( nn.Embedding( self.embedding_info[modality][emb_func_indx] ['embedding_num'], self.embedding_info[modality][emb_func_indx] ['embedding_out_dim']).type(dtype)) self.embedding_func = self.embeddings[modality][-1] self.embed_data_types[modality].append(dtype_long) elif self.embedding_info[modality][emb_func_indx][ 'use_glove']: embed_tab_path = self.embedding_info[modality][ emb_func_indx]['glove_embed_table'] glove_embed_table = pickle.load( open(embed_tab_path, 'rb')) glove_embed_table[0] = np.random.normal( 0, 1e5, 300) # need this to deal with BCE error self.embeddings[modality].append( nn.Embedding.from_pretrained( torch.FloatTensor(glove_embed_table).type( dtype), freeze=self.lstm_settings_dict['freeze_glove']) ) self.embedding_func = self.embeddings[modality][-1] self.embed_data_types[modality].append(dtype_long) print('using glove embeddings') else: self.embeddings[modality].append( nn.Linear(self.embedding_info[modality] [emb_func_indx]['embedding_num'], self.embedding_info[modality] [emb_func_indx]['embedding_out_dim'], bias=True).type(dtype)) self.embedding_linear = self.embeddings[modality][-1] self.embed_data_types[modality].append(dtype) self.embedding_indices[modality].append( self.embedding_info[modality][emb_func_indx] ['emb_indices']) # two tuples for start and end for emb_func_indx in range(len(self.embedding_info[modality])): self.embed_delete_index_list[modality] += list( range( self.embedding_indices[modality][emb_func_indx][0] [0], self.embedding_indices[modality] [emb_func_indx][0][1])) self.embed_delete_index_list[modality] += list( range( self.embedding_indices[modality][emb_func_indx][1] [0], self.embedding_indices[modality] [emb_func_indx][1][1])) # Initialize LSTMs self.lstm_dict = {} if self.lstm_settings_dict['no_subnets']: if not (len(self.lstm_settings_dict['active_modalities']) == 1): raise ValueError('Can only have one modality if no subnets') else: self.lstm_settings_dict['is_irregular'][ 'master'] = self.lstm_settings_dict['is_irregular'][ self.lstm_settings_dict['active_modalities'][0]] if self.lstm_settings_dict['is_irregular']['master']: # self.lstm_dict['master'] = nn.LSTMCell(self.feature_size_dict['master'], # self.lstm_settings_dict['hidden_dims']['master']).type(dtype) self.lstm_dict['master'] = nn.LSTMCell( self.feature_size_dict['master'], self.lstm_settings_dict['hidden_dims']['master']).type( dtype) self.lstm_master = self.lstm_dict['master'] else: self.lstm_dict['master'] = nn.LSTM( self.feature_size_dict['master'], self.lstm_settings_dict['hidden_dims']['master']).type( dtype) self.lstm_master = self.lstm_dict['master'] else: # Two subnets self.lstm_settings_dict['is_irregular']['master'] = False self.lstm_dict['master'] = nn.LSTM( self.feature_size_dict['master'], self.lstm_settings_dict['hidden_dims']['master']).type(dtype) self.lstm_master = self.lstm_dict['master'] for lstm in self.lstm_settings_dict['active_modalities']: if self.lstm_settings_dict['is_irregular'][lstm]: # self.lstm_dict[lstm] = nn.LSTMCell(self.feature_size_dict[lstm], # self.lstm_settings_dict['hidden_dims'][lstm]).type(dtype) self.lstm_dict[lstm] = nn.LSTMCell( self.feature_size_dict[lstm], self.lstm_settings_dict['hidden_dims'][lstm]).type( dtype) if lstm == 'acous': self.lstm_cell_acous = self.lstm_dict[lstm] else: self.lstm_cell_visual = self.lstm_dict[lstm] else: self.lstm_dict[lstm] = nn.LSTM( self.feature_size_dict[lstm], self.lstm_settings_dict['hidden_dims'][lstm]).type( dtype) if lstm == 'acous': self.lstm_acous = self.lstm_dict[lstm] else: self.lstm_visual = self.lstm_dict[lstm] if self.lstm_settings_dict['visual_as_id']['visual']: # self.lstm_cell_visual = self.lstm_dict['acous'].copy() self.lstm_visual = self.lstm_dict['acous'] # init dropout layers self.dropout_dict = {} for drop_key, drop_val in self.lstm_settings_dict['dropout'].items(): self.dropout_dict[drop_key] = nn.Dropout(drop_val) setattr(self, 'dropout_' + str(drop_key), self.dropout_dict[drop_key]) self.out = nn.Linear(self.lstm_settings_dict['hidden_dims']['master'], prediction_length).type(dtype) self.init_hidden()
def __init__(self): super(Decoder, self).__init__() self.lstm = nn.LSTMCell(conf('emb-size'), conf('dec-hidden-size')) self.y_concat = nn.Linear(2 * conf('enc-hidden-size') + conf('emb-size'), conf('emb-size'))
trainloader = DataLoader(traindataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4) testdataset = MyDataset('test.csv') testloader = DataLoader(testdataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4) mlp1 = MLP(EMB_SIZE).to(device) mlp2 = MLP(2 * HIDDEN_SIZE).to(device) mlp3 = MLP(2 * HIDDEN_SIZE).to(device) r = Pred(HIDDEN_SIZE).to(device) lstm = nn.LSTMCell(HIDDEN_SIZE, HIDDEN_SIZE).to(device) embed = torch.nn.functional.one_hot optimizer_mlp1 = torch.optim.Adam(mlp1.parameters(), lr=2e-4, weight_decay=1e-4) optimizer_mlp2 = torch.optim.Adam(mlp2.parameters(), lr=2e-4, weight_decay=1e-4) optimizer_mlp3 = torch.optim.Adam(mlp3.parameters(), lr=2e-4, weight_decay=1e-4) optimizer_r = torch.optim.Adam(r.parameters(), lr=2e-4, weight_decay=1e-4) optimizer_lstm = torch.optim.Adam(lstm.parameters(), lr=2e-4, weight_decay=1e-4)
def __init__(self, pc_embedder, address_embedder, cache_line_embedder, positional_embedder, lstm_hidden_size, max_attention_history, loss_fns=None, cache_pc_embedder=None): """Constructs a model to predict evictions from a EvictionEntries history. At each timestep t, receives: - pc_t: program counter of t-th memory access. - a_t: (cache-aligned) address of t-th memory access. - [l^0_t, ..., l^N_t]: the cache lines present in the cache set accessed by a_t. Each cache line consists of the cache-aligned address and the pc of the last access to that address. Computes: c_0, h_0 = zeros(lstm_hidden_size) c_{t + 1}, h_{t + 1} = LSTM([e(pc_t)]; e(a_t)], c_t, h_t) h^i = attention([h_{t - K}, ..., h_t], query=e(l^i_t)) for i = 1, ..., N eviction_score s^i = softmax(f(h^i)) The line with the highest eviction score is evicted. Args: pc_embedder (embed.Embedder): embeds the program counter. address_embedder (embed.Embedder): embeds the address. cache_line_embedder (embed.Embedder): embed the cache line. positional_embedder (embed.Embedder): embeds positions of the access history. lstm_hidden_size (int): dimension of output of LSTM (h and c). max_attention_history (int): maximum number of past hidden states to attend over (K in the equation above). loss_fns (dict): maps a name (str) to a loss function (LossFunction). The name is used in the loss method. Defaults to top_1_log_likelihood. cache_pc_embedder (embed.Embedder | None): embeds the pc of each cache line, if provided. Otherwise cache line pcs are not embedded. """ super(EvictionPolicyModel, self).__init__() self._pc_embedder = pc_embedder self._address_embedder = address_embedder self._cache_line_embedder = cache_line_embedder self._cache_pc_embedder = cache_pc_embedder self._lstm_cell = nn.LSTMCell( pc_embedder.embed_dim + address_embedder.embed_dim, lstm_hidden_size) self._positional_embedder = positional_embedder query_dim = cache_line_embedder.embed_dim if cache_pc_embedder is not None: query_dim += cache_pc_embedder.embed_dim self._history_attention = attention.MultiQueryAttention( attention.GeneralAttention(query_dim, lstm_hidden_size)) # f(h, e(l)) self._cache_line_scorer = nn.Linear( lstm_hidden_size + self._positional_embedder.embed_dim, 1) self._reuse_distance_estimator = nn.Linear( lstm_hidden_size + self._positional_embedder.embed_dim, 1) # Needs to be capped because of limited GPU memory self._max_attention_history = max_attention_history if loss_fns is None: loss_fns = {"log_likelihood": LogProbLoss()} self._loss_fns = loss_fns
def __init__(self, num_steps, x_size, window_size, z_what_size, rnn_hidden_size, encoder_net=[], decoder_net=[], predict_net=[], embed_net=None, bl_predict_net=[], non_linearity='ReLU', decoder_output_bias=None, decoder_output_use_sigmoid=False, use_masking=True, use_baselines=True, baseline_scalar=None, scale_prior_mean=3.0, scale_prior_sd=0.1, pos_prior_mean=0.0, pos_prior_sd=1.0, likelihood_sd=0.3, use_cuda=False): super().__init__() self.num_steps = num_steps self.x_size = x_size self.window_size = window_size self.z_what_size = z_what_size self.rnn_hidden_size = rnn_hidden_size self.use_masking = use_masking self.use_baselines = use_baselines self.baseline_scalar = baseline_scalar self.likelihood_sd = likelihood_sd self.use_cuda = use_cuda prototype = torch.tensor(0.).cuda() if use_cuda else torch.tensor(0.) self.options = dict(dtype=prototype.dtype, device=prototype.device) self.z_pres_size = 1 self.z_where_size = 3 # By making these parameters they will be moved to the gpu # when necessary. (They are not registered with pyro for # optimization.) self.z_where_loc_prior = nn.Parameter( torch.FloatTensor([scale_prior_mean, pos_prior_mean, pos_prior_mean]), requires_grad=False) self.z_where_scale_prior = nn.Parameter( torch.FloatTensor([scale_prior_sd, pos_prior_sd, pos_prior_sd]), requires_grad=False) # Create nn modules. rnn_input_size = x_size ** 2 if embed_net is None else embed_net[-1] rnn_input_size += self.z_where_size + z_what_size + self.z_pres_size nl = getattr(nn, non_linearity) self.rnn = nn.LSTMCell(rnn_input_size, rnn_hidden_size) self.encode = Encoder(window_size ** 2, encoder_net, z_what_size, nl) self.decode = Decoder(window_size ** 2, decoder_net, z_what_size, decoder_output_bias, decoder_output_use_sigmoid, nl) self.predict = Predict(rnn_hidden_size, predict_net, self.z_pres_size, self.z_where_size, nl) self.embed = Identity() if embed_net is None else MLP(x_size ** 2, embed_net, nl, True) self.bl_rnn = nn.LSTMCell(rnn_input_size, rnn_hidden_size) self.bl_predict = MLP(rnn_hidden_size, bl_predict_net + [1], nl) self.bl_embed = Identity() if embed_net is None else MLP(x_size ** 2, embed_net, nl, True) # Create parameters. self.h_init = nn.Parameter(torch.zeros(1, rnn_hidden_size)) self.c_init = nn.Parameter(torch.zeros(1, rnn_hidden_size)) self.bl_h_init = nn.Parameter(torch.zeros(1, rnn_hidden_size)) self.bl_c_init = nn.Parameter(torch.zeros(1, rnn_hidden_size)) self.z_where_init = nn.Parameter(torch.zeros(1, self.z_where_size)) self.z_what_init = nn.Parameter(torch.zeros(1, self.z_what_size)) if use_cuda: self.cuda()
def __init__(self, args): super(JointNS, self).__init__(args) self.image_feature_size = 512 self.object_feature_size = 512 self.hidden_size = 512 self.num_layers = 3 self.loss_function = args.loss self.number_of_cp = args.number_of_cp self.environment = args.instance_environment self.sequence_length = args.sequence_length self.gpu_ids = args.gpu_ids self.all_obj_names = args.object_list self.use_gt_cp = args.use_gt_cp self.clean_force = True # configs w.r.t. two losses self.joint_two_losses = args.joint_two_losses self.loss1_or_loss2 = None if args.loss1_w < 0.00001: self.loss1_or_loss2 = False # update loss2 only elif args.loss2_w < 0.00001: self.loss1_or_loss2 = True # update loss1 only self.loss1_optim, self.loss2_optim, self.joint_optim = None, None, None # neural force simulator self.use_image_feature = True if not self.use_image_feature: self.one_ns_layer = MLPNS(hidden_size=64, layer_norm=False) else: self.one_ns_layer = NSWithImageFeature(hidden_size=64, layer_norm=False, image_feature_dim=512) # self.ns_layer = {obj_name: MLPNS(hidden_size=64, layer_norm=False) for obj_name in self.all_obj_names} # force predictor networks. self.feature_extractor = resnet18(pretrained=args.pretrain) del self.feature_extractor.fc self.feature_extractor.eval() self.input_feature_size = self.object_feature_size self.cp_feature_size = self.number_of_cp * 3 self.image_embed = combine_block_w_do(512, 64, args.dropout_ratio) self.contact_point_image_embed = combine_block_w_do( 512, 64, args.dropout_ratio) input_object_embed_size = torch.Tensor( [3 + 4, 100, self.object_feature_size]) self.input_object_embed = input_embedding_net( input_object_embed_size.long().tolist(), dropout=args.dropout_ratio) self.contact_point_input_object_embed = input_embedding_net( input_object_embed_size.long().tolist(), dropout=args.dropout_ratio) state_embed_size = torch.Tensor([ NoGradEnvState.total_size + self.cp_feature_size, 100, self.object_feature_size ]) self.state_embed = input_embedding_net( state_embed_size.long().tolist(), dropout=args.dropout_ratio) self.lstm_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7, hidden_size=self.hidden_size, batch_first=True, num_layers=self.num_layers) self.contact_point_encoder = nn.LSTM(input_size=self.hidden_size + 64 * 7 * 7, hidden_size=self.hidden_size, batch_first=True, num_layers=self.num_layers) contact_point_decoder_size = torch.Tensor( [self.hidden_size, 100, (3) * self.number_of_cp]) self.contact_point_decoder = input_embedding_net( contact_point_decoder_size.long().tolist(), dropout=args.dropout_ratio) self.lstm_decoder = nn.LSTMCell(input_size=self.hidden_size * 2, hidden_size=self.hidden_size) forces_directions_decoder_size = torch.Tensor( [self.hidden_size, 100, (3) * self.number_of_cp]) self.forces_directions_decoder = input_embedding_net( forces_directions_decoder_size.long().tolist(), dropout=args.dropout_ratio) assert args.batch_size == 1, 'have not been implemented yet, because of the environment' assert self.number_of_cp == 5 # for five fingers self.all_objects_keypoint_tensor = get_all_objects_keypoint_tensors( args.data) if args.gpu_ids != -1: for obj, val in self.all_objects_keypoint_tensor.items(): self.all_objects_keypoint_tensor[obj] = val.cuda() self.force_predictor_modules = [ self.feature_extractor, self.image_embed, self.contact_point_image_embed, self.input_object_embed, self.contact_point_input_object_embed, self.state_embed, self.lstm_encoder, self.contact_point_encoder, self.contact_point_decoder, self.forces_directions_decoder ] # see gradients for debugging self.vis_grad = args.vis_grad self.grad_vis = None self.train_res = args.train_res or self.vis_grad
def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2, input_feed=True, label_smoothing=0.): super(NMT, self).__init__() self.embed_size = embed_size self.hidden_size = hidden_size self.dropout_rate = dropout_rate self.vocab = vocab self.input_feed = True self.src_code_embed = nn.Embedding(len(vocab.src_code), embed_size, padding_idx=vocab.src_code['<pad>']) self.src_nl_embed = nn.Embedding(len(vocab.src_nl), embed_size, padding_idx=vocab.src_nl['<pad>']) self.tgt_embed = nn.Embedding(len(vocab.tgt), embed_size, padding_idx=vocab.tgt['<pad>']) self.code_encoder_lstm = nn.LSTM(embed_size, hidden_size, bidirectional=True) self.nl_encoder_lstm = nn.LSTM(embed_size, hidden_size, bidirectional=True) decoder_lstm_input = embed_size + ( 4 * hidden_size) if self.input_feed else embed_size self.decoder_lstm = nn.LSTMCell(decoder_lstm_input, hidden_size) # attention: dot product attention # project source encoding to decoder rnn's state spacexxxx self.att_src_code_linear = nn.Linear(hidden_size * 2, hidden_size, bias=False) self.att_src_nl_linear = nn.Linear(hidden_size * 2, hidden_size, bias=False) # transformation of decoder hidden states and context vectors before reading out target words # this produces the `attentional vector` in (Luong et al., 2015) self.att_vec_linear = nn.Linear(hidden_size * 2 * 2 + hidden_size, hidden_size, bias=False) # prediction layer of the target vocabulary self.readout = nn.Linear(hidden_size, len(vocab.tgt), bias=False) # dropout layer self.dropout = nn.Dropout(self.dropout_rate) # initialize the decoder's state and cells with encoder hidden states self.decoder_cell_init = nn.Linear(hidden_size * 2, hidden_size) # copy related layers self.p_gen_linear = nn.Linear(hidden_size * 9 + embed_size, 3) self.label_smoothing = label_smoothing if label_smoothing > 0.: self.label_smoothing_loss = LabelSmoothingLoss( label_smoothing, tgt_vocab_size=len(vocab.tgt), padding_idx=vocab.tgt['<pad>'])
def __init__(self, agent_params, **kwargs): # call the super-class init super(ActorCritic, self).__init__() self.gamma = agent_params['gamma'] # discount factor self.input_dims = agent_params['input_dims'] self.action_dims = agent_params['action_dims'] if 'rfsize' not in agent_params.keys(): self.rfsize = kwargs.get('rfsize', 4) else: self.rfsize = agent_params['rfsize'] if 'padding' not in agent_params.keys(): self.padding = kwargs.get('padding', 1) else: self.padding = agent_params['padding'] if 'dilation' not in agent_params.keys(): self.dilation = 1 else: self.dilation = kwargs.get('dilation', 1) if 'stride' not in agent_params.keys(): self.stride = kwargs.get('stride', 1) else: self.stride = agent_params['stride'] if 'batch_size' not in agent_params.keys(): self.batch_size = kwargs.get('batch_size', 1) else: self.batch_size = agent_params['batch_size'] self.use_SR = kwargs.get('use_SR', True) if 'hidden_types' in agent_params.keys(): if len(agent_params['hidden_dims']) != len( agent_params['hidden_types']): raise Exception( 'Incorrect specification of hidden layer dimensions') hidden_types = agent_params['hidden_types'] # create lists for tracking hidden layers self.hidden = nn.ModuleList() self.hidden_dims = agent_params['hidden_dims'] self.hx = [] self.cx = [] # calculate dimensions for each layer for ind, htype in enumerate(hidden_types): if htype not in ['linear', 'lstm', 'gru', 'conv', 'pool']: raise Exception( f'Unrecognized type for hidden layer {ind}') if ind == 0: input_d = self.input_dims else: if hidden_types[ind - 1] in [ 'conv', 'pool' ] and not htype in ['conv', 'pool']: input_d = int(np.prod(self.hidden_dims[ind - 1])) else: input_d = self.hidden_dims[ind - 1] if htype in ['conv', 'pool']: output_d = tuple(self.conv_output(input_d)) self.hidden_dims[ind] = output_d else: output_d = self.hidden_dims[ind] # construct the layer if htype is 'linear': self.hidden.append(nn.Linear(input_d, output_d)) self.hx.append(None) self.cx.append(None) elif htype is 'lstm': self.hidden.append(nn.LSTMCell(input_d, output_d)) self.hx.append( Variable(torch.zeros(self.batch_size, output_d))) self.cx.append( Variable(torch.zeros(self.batch_size, output_d))) elif htype is 'gru': self.hidden.append(nn.GRUCell(input_d, output_d)) self.hx.append( Variable(torch.zeros(self.batch_size, output_d))) self.cx.append(None) elif htype is 'conv': in_channels = input_d[0] out_channels = output_d[0] self.hidden.append( nn.Conv2d(in_channels, out_channels, kernel_size=self.rfsize, padding=self.padding, stride=self.stride, dilation=self.dilation)) self.hx.append(None) self.cx.append(None) elif htype is 'pool': self.hidden.append( nn.MaxPool2d(kernel_size=self.rfsize, padding=self.padding, stride=self.stride, dilation=self.dilation)) self.hx.append(None) self.cx.append(None) # create the actor and critic layers self.layers = [self.input_dims ] + self.hidden_dims + [self.action_dims] self.output = nn.ModuleList([ nn.Linear(output_d, self.action_dims), #actor nn.Linear(output_d, 1) #critic ]) if self.use_SR: self.SR = nn.Linear(output_d, output_d) # psi else: self.layers = [self.input_dims, self.action_dims] self.output = nn.ModuleList([ nn.Linear(input_dimensions, action_dimensions), # ACTOR nn.Linear(input_dimensions, 1) ]) # CRITIC self.output_d = self.hidden_dims[-1] self.saved_actions = [] self.saved_rewards = [] self.saved_phi = [] self.saved_psi = [] ''' main_params = [] SR_params = [] for name, para in self.named_parameters(): if name[0:2] == 'SR': SR_params.append(para) else: main_params.append(para) self.SR_opt = opt([{'params': SR_params, 'lr': 0.01 * agent_params.eta}]) # opt([{'params': freeze, 'lr': 0.0}, {'params': unfreeze, 'lr': agent_params['eta']}], lr=0.0) self.optimizer = opt(main_params, lr=agent_params.eta) ''' self.optimizer = optim.Adam(self.parameters(), lr=agent_params['eta'])
def LSTMCell(input_size, hidden_size, **kwargs): m = nn.LSTMCell(input_size, hidden_size, **kwargs) for name, param in m.named_parameters(): if 'weight' in name or 'bias' in name: param.data.uniform_(-0.1, 0.1) return m
def test_lstm_cell(self): model = nn.LSTMCell(RNN_INPUT_SIZE, RNN_HIDDEN_SIZE) input = torch.randn(BATCH_SIZE, RNN_INPUT_SIZE) h0 = torch.randn(BATCH_SIZE, RNN_HIDDEN_SIZE) c0 = torch.randn(BATCH_SIZE, RNN_HIDDEN_SIZE) self.run_model_test(model, train=False, batch_size=BATCH_SIZE, input=(input, (h0, c0)), use_gpu=False)
def __init__(self, num_inputs, action_space): super(ActorCritic, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1) self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1) self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1) self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1) self.lstm = nn.LSTMCell(32 * 3 * 3, 256) num_outputs = action_space.n self.critic_linear = nn.Linear(256, 1) self.actor_linear = nn.Linear(256, num_outputs) ################################################################ self.icm_conv1 = nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1) self.icm_conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1) self.icm_conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1) self.icm_conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1) # self.icm_lstm = nn.LSTMCell(32 * 3 * 3, 256) self.inverse_linear1 = nn.Linear(288 + 288, 256) self.inverse_linear2 = nn.Linear(256, num_outputs) self.forward_linear1 = nn.Linear(288 + num_outputs, 256) self.forward_linear2 = nn.Linear(256, 288) # self.inverse_linear1 = nn.Linear(256 + 256, 256) # self.inverse_linear2 = nn.Linear(256, num_outputs) # self.forward_linear1 = nn.Linear(256 + num_outputs, 256) # self.forward_linear2 = nn.Linear(256, 256) ################################################################ self.apply(weights_init) self.inverse_linear1.weight.data = normalized_columns_initializer( self.inverse_linear1.weight.data, 0.01) self.inverse_linear1.bias.data.fill_(0) self.inverse_linear2.weight.data = normalized_columns_initializer( self.inverse_linear2.weight.data, 1.0) self.inverse_linear2.bias.data.fill_(0) self.forward_linear1.weight.data = normalized_columns_initializer( self.forward_linear1.weight.data, 0.01) self.forward_linear1.bias.data.fill_(0) self.forward_linear2.weight.data = normalized_columns_initializer( self.forward_linear2.weight.data, 1.0) self.forward_linear2.bias.data.fill_(0) ''' self.icm_lstm.bias_ih.data.fill_(0) self.icm_lstm.bias_hh.data.fill_(0) ''' ################################################################ self.actor_linear.weight.data = normalized_columns_initializer( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = normalized_columns_initializer( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.train()
def test_lstm_cell_is_half(self): cell = nn.LSTMCell(self.h, self.h) self.run_cell_test(cell, state_tuple=True)
def __init__(self, args): super(Graph_MFN, self).__init__() # print("Graph_MFN initialization ....") # print(args) self.d_l, self.d_a, self.d_v = args.feature_dims self.dh_l, self.dh_a, self.dh_v = args.hidden_dims_l, args.hidden_dims_a, args.hidden_dims_v total_h_dim = self.dh_l + self.dh_a + self.dh_v self.mem_dim = args.memsize self.inner_node_dim = args.inner_node_dim self.singleton_l_size = args.hidden_dims_l self.singleton_a_size = args.hidden_dims_a self.singleton_v_size = args.hidden_dims_v # Here Changed! (rm window_dim) # window_dim = args.windowsize output_dim = args.num_classes # Here Changed! (rm attInShape, use inner_node_dim instead) # attInShape = total_h_dim * window_dim # gammaInShape = attInShape + self.mem_dim gammaInShape = self.inner_node_dim + self.mem_dim # Todo : we need get inner_node_dim from args. final_out = total_h_dim + self.mem_dim # h_att1 = args.NN1Config_shapes h_att2 = args.NNConfig_shapes h_gamma1 = args.gamma1Config_shapes h_gamma2 = args.gamma2Config_shapes h_out = args.outConfig_shapes # att1_dropout = args.NN1Config_drop att2_dropout = args.NNConfig_drop gamma1_dropout = args.gamma1Config_drop gamma2_dropout = args.gamma2Config_drop out_dropout = args.outConfig_drop self.lstm_l = nn.LSTMCell(self.d_l, self.dh_l) self.lstm_a = nn.LSTMCell(self.d_a, self.dh_a) self.lstm_v = nn.LSTMCell(self.d_v, self.dh_v) # Here Changed! Todo : add Arg param singleton_l singleton_a singleton_v self.l_transform = nn.Linear(self.dh_l * 2, self.singleton_l_size) self.a_transform = nn.Linear(self.dh_a * 2, self.singleton_a_size) self.v_transform = nn.Linear(self.dh_v * 2, self.singleton_v_size) # Here Changed! (initialize the DFG part) Todo : add Arg param inner node dimension. pattern_model = nn.Sequential(nn.Linear(100, self.inner_node_dim)).to( args.device) efficacy_model = nn.Sequential(nn.Linear(100, self.inner_node_dim)).to( args.device ) # Note : actually here inner_node_dim can change arbitrarily self.graph_mfn = DynamicFusionGraph(pattern_model, [ self.singleton_l_size, self.singleton_a_size, self.singleton_v_size ], self.inner_node_dim, efficacy_model, args.device).to(args.device) # Here Changed! (delete att1 ) # self.att1_fc1 = nn.Linear(attInShape, h_att1) # self.att1_fc2 = nn.Linear(h_att1, attInShape) # self.att1_dropout = nn.Dropout(att1_dropout) # Here Changed! (alter the dim param.) self.att2_fc1 = nn.Linear( self.inner_node_dim, h_att2 ) # Note: might (inner_node_dim = self.mem_dim) is a common choice. self.att2_fc2 = nn.Linear(h_att2, self.mem_dim) self.att2_dropout = nn.Dropout(att2_dropout) self.gamma1_fc1 = nn.Linear(gammaInShape, h_gamma1) self.gamma1_fc2 = nn.Linear(h_gamma1, self.mem_dim) self.gamma1_dropout = nn.Dropout(gamma1_dropout) self.gamma2_fc1 = nn.Linear(gammaInShape, h_gamma2) self.gamma2_fc2 = nn.Linear(h_gamma2, self.mem_dim) self.gamma2_dropout = nn.Dropout(gamma2_dropout) self.out_fc1 = nn.Linear(final_out, h_out) self.out_fc2 = nn.Linear(h_out, output_dim) self.out_dropout = nn.Dropout(out_dropout)
def __init__(self, ins = 2, es = 8, hs = 16): super(EncoderRNN, self).__init__() self.hs = hs self.linear1 = nn.Linear(ins, es) self.lstm1 = nn.LSTMCell(es, hs) self.gru1 = nn.GRUCell(es, hs)
def __init__(self, obs_space, action_space, use_memory=False, use_text=False): super().__init__() # Decide which components are enabled self.use_text = use_text self.use_memory = use_memory self.recurrent = use_memory # Define image embedding image_chans = obs_space["image"][2] self.image_conv = nn.Sequential( nn.Conv2d(image_chans, 16, (2, 2)), nn.ReLU(), nn.MaxPool2d((2, 2)), nn.Conv2d(16, 32, (2, 2)), nn.ReLU() #, # nn.Conv2d(32, 64, (2, 2)), # nn.ReLU() ) n = obs_space["image"][0] m = obs_space["image"][1] # self.image_embedding_size = ((n-1)//2-2)*((m-1)//2-2)*64 # original. image_embedding_size is basically the number of elements at output of self.image_conv(x), not accounting for batch size. self.image_embedding_size = 32 * 6 # 32 is outchan, 6 is h*w # Define memory if self.use_memory: self.memory_rnn = nn.LSTMCell(self.image_embedding_size, self.semi_memory_size) # Define text embedding if self.use_text: self.word_embedding_size = 32 self.word_embedding = nn.Embedding(obs_space["text"], self.word_embedding_size) self.text_embedding_size = 128 self.text_rnn = nn.GRU(self.word_embedding_size, self.text_embedding_size, batch_first=True) # Resize image embedding self.embedding_size = self.semi_memory_size if self.use_text: self.embedding_size += self.text_embedding_size # Define actor's model if isinstance(action_space, gym.spaces.Discrete): self.actor = nn.Sequential(nn.Linear(self.embedding_size, 16), nn.Tanh(), nn.Linear(16, action_space.n)) else: raise ValueError("Unknown action space: " + str(action_space)) # Define critic's model self.critic = nn.Sequential(nn.Linear(self.embedding_size, 16), nn.Tanh(), nn.Linear(16, 1)) # Initialize parameters correctly self.apply(initialize_parameters)
def __init__(self, input_size, hidden_size): super(AdaptiveLSTMCell, self).__init__() self.lstm_cell = nn.LSTMCell(input_size, hidden_size) self.x_gate = nn.Linear(input_size, hidden_size) self.h_gate = nn.Linear(hidden_size, hidden_size)
def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2): """ Init NMT Model. @param embed_size (int): Embedding size (dimensionality) @param hidden_size (int): Hidden Size (dimensionality) @param vocab (Vocab): Vocabulary object containing src and tgt languages See vocab.py for documentation. @param dropout_rate (float): Dropout probability, for attention """ super(NMT, self).__init__() self.model_embeddings = ModelEmbeddings(embed_size, vocab) self.hidden_size = hidden_size self.dropout_rate = dropout_rate self.vocab = vocab # default values self.encoder = None self.decoder = None self.h_projection = None self.c_projection = None self.att_projection = None self.combined_output_projection = None self.target_vocab_projection = None self.dropout = None ### YOUR CODE HERE (~8 Lines) ### TODO - Initialize the following variables: ### self.encoder (Bidirectional LSTM with bias) ### self.decoder (LSTM Cell with bias) ### self.h_projection (Linear Layer with no bias), called W_{h} in the PDF. ### self.c_projection (Linear Layer with no bias), called W_{c} in the PDF. ### self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF. ### self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF. ### self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF. ### self.dropout (Dropout Layer) ### ### Use the following docs to properly initialize these variables: ### LSTM: ### https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM ### LSTM Cell: ### https://pytorself.ch.org/docs/stable/nn.html#torch.nn.LSTMCell ### Linear Layer: ### https://pytorch.org/docs/stable/nn.html#torch.nn.Linear ### Dropout Layer: ### https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout self.encoder = nn.LSTM(embed_size, hidden_size, bias=True, dropout=self.dropout_rate, bidirectional=True) self.decoder = nn.LSTMCell( embed_size + hidden_size, hidden_size, bias=True) # why add embed+hidden for input size? self.h_projection = nn.Linear( hidden_size * 2, hidden_size, bias=False) # prj output of last h_state of encode (R^2h) to R^h self.c_projection = nn.Linear(hidden_size * 2, hidden_size, bias=False) self.att_projection = nn.Linear(hidden_size * 2, hidden_size, bias=False) self.combined_output_projection = nn.Linear( hidden_size * 3, hidden_size, bias=False) # use after combined attention output and h_decode self.target_vocab_projection = nn.Linear( hidden_size, len(vocab.tgt), bias=False) # for softmax of last self.dropout = nn.Dropout(self.dropout_rate)
def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2): """ Init NMT Model. @param embed_size (int): Embedding size (dimensionality) @param hidden_size (int): Hidden Size (dimensionality) @param vocab (Vocab): Vocabulary object containing src and tgt languages See vocab.py for documentation. @param dropout_rate (float): Dropout probability, for attention """ super(NMT, self).__init__() self.model_embeddings = ModelEmbeddings(embed_size, vocab) self.hidden_size = hidden_size self.dropout_rate = dropout_rate self.vocab = vocab # default values self.encoder = None self.decoder = None self.h_projection = None self.c_projection = None self.att_projection = None self.combined_output_projection = None self.target_vocab_projection = None self.dropout = None ### YOUR CODE HERE (~8 Lines) ### TODO - Initialize the following variables: ### self.encoder (Bidirectional LSTM with bias) ### self.decoder (LSTM Cell with bias) ### self.h_projection (Linear Layer with no bias), called W_{h} in the PDF. ### self.c_projection (Linear Layer with no bias), called W_{c} in the PDF. ### self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF. ### self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF. ### self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF. ### self.dropout (Dropout Layer) ### ### Use the following docs to properly initialize these variables: ### LSTM: ### https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM ### LSTM Cell: ### https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell ### Linear Layer: ### https://pytorch.org/docs/stable/nn.html#torch.nn.Linear ### Dropout Layer: ### https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout self.encoder = nn.LSTM(embed_size, self.hidden_size, num_layers=1, bias=True, bidirectional=True) self.decoder = nn.LSTMCell( embed_size + self.hidden_size, self.hidden_size, bias=True, ) self.h_projection = nn.Linear(2 * embed_size, embed_size, False) self.c_projection = nn.Linear(2 * embed_size, embed_size, False) self.att_projection = nn.Linear(2 * embed_size, embed_size, False) self.combined_output_projection = nn.Linear(3 * embed_size, embed_size, False) self.target_vocab_projection = nn.Linear( embed_size, len(self.vocab.tgt), False ) # all these layers are called projection because project input dim vector to output dim vector self.dropout = nn.Dropout(dropout_rate)
def __init__(self, latents, actions, hiddens, gaussians): super().__init__(latents, actions, hiddens, gaussians) self.rnn = nn.LSTMCell(latents + actions, hiddens)
def __init__(self, cell='gru', use_baseline=True, n_actions=10, n_units=64, fusion_dim=128, n_input=76, n_hidden=128, demo_dim=17, n_output=1, dropout=0.0, lamda=0.5, device='cpu'): super(Agent, self).__init__() self.cell = cell self.use_baseline = use_baseline self.n_actions = n_actions self.n_units = n_units self.n_input = n_input self.n_hidden = n_hidden self.n_output = n_output self.dropout = dropout self.lamda = lamda self.fusion_dim = fusion_dim self.demo_dim = demo_dim self.device = device self.agent1_action = [] self.agent1_prob = [] self.agent1_entropy = [] self.agent1_baseline = [] self.agent2_action = [] self.agent2_prob = [] self.agent2_entropy = [] self.agent2_baseline = [] self.agent1_fc1 = nn.Linear(self.n_hidden + self.demo_dim, self.n_units) self.agent2_fc1 = nn.Linear(self.n_input + self.demo_dim, self.n_units) self.agent1_fc2 = nn.Linear(self.n_units, self.n_actions) self.agent2_fc2 = nn.Linear(self.n_units, self.n_actions) if use_baseline == True: self.agent1_value = nn.Linear(self.n_units, 1) self.agent2_value = nn.Linear(self.n_units, 1) if self.cell == 'lstm': self.rnn = nn.LSTMCell(self.n_input, self.n_hidden) else: self.rnn = nn.GRUCell(self.n_input, self.n_hidden) for name, param in self.rnn.named_parameters(): if 'bias' in name: nn.init.constant(param, 0.0) elif 'weight' in name: nn.init.orthogonal_(param) if dropout > 0.0: self.nn_dropout = nn.Dropout(p=dropout) self.init_h = nn.Linear(self.demo_dim, self.n_hidden) self.init_c = nn.Linear(self.demo_dim, self.n_hidden) self.fusion = nn.Linear(self.n_hidden + self.demo_dim, self.fusion_dim) self.output = nn.Linear(self.fusion_dim, self.n_output) self.sigmoid = nn.Sigmoid() self.softmax = nn.Softmax() self.tanh = nn.Tanh() self.relu = nn.ReLU()
def _set_cell(self): return nn.LSTMCell(input_size=self.in_dims + self.pb_dims, hidden_size=self.unit_nums)
def __init__(self, embed_size, hidden_size, vocab, dropout_rate=0.2): """ Init NMT Model. @param embed_size (int): Embedding size (dimensionality) @param hidden_size (int): Hidden Size, the size of hidden states (dimensionality) @param vocab (Vocab): Vocabulary object containing src and tgt languages See vocab.py for documentation. @param dropout_rate (float): Dropout probability, for attention """ super(NMT, self).__init__() self.model_embeddings = ModelEmbeddings(embed_size, vocab) self.hidden_size = hidden_size self.dropout_rate = dropout_rate self.vocab = vocab # default values self.encoder = None self.decoder = None self.h_projection = None self.c_projection = None self.att_projection = None self.combined_output_projection = None self.target_vocab_projection = None self.dropout = None # For sanity check only, not relevant to implementation self.gen_sanity_check = False self.counter = 0 ### YOUR CODE HERE (~8 Lines) ### TODO - Initialize the following variables: ### self.encoder (Bidirectional LSTM with bias) ### self.decoder (LSTM Cell with bias) ### self.h_projection (Linear Layer with no bias), called W_{h} in the PDF. ### self.c_projection (Linear Layer with no bias), called W_{c} in the PDF. ### self.att_projection (Linear Layer with no bias), called W_{attProj} in the PDF. ### self.combined_output_projection (Linear Layer with no bias), called W_{u} in the PDF. ### self.target_vocab_projection (Linear Layer with no bias), called W_{vocab} in the PDF. ### self.dropout (Dropout Layer) ### ### Use the following docs to properly initialize these variables: ### LSTM: ### https://pytorch.org/docs/stable/nn.html#torch.nn.LSTM ### LSTM Cell: ### https://pytorch.org/docs/stable/nn.html#torch.nn.LSTMCell ### Linear Layer: ### https://pytorch.org/docs/stable/nn.html#torch.nn.Linear ### Dropout Layer: ### https://pytorch.org/docs/stable/nn.html#torch.nn.Dropout self.encoder = nn.LSTM(input_size=embed_size, hidden_size=self.hidden_size, bias=True, bidirectional=True) self.decoder = nn.LSTMCell(input_size=embed_size + self.hidden_size, hidden_size=self.hidden_size, bias=True) self.h_projection = nn.Linear(self.hidden_size * 2, self.hidden_size, bias=False) # W_{h} self.c_projection = nn.Linear(self.hidden_size * 2, self.hidden_size, bias=False) # W_{c} self.att_projection = nn.Linear(self.hidden_size * 2, self.hidden_size, bias=False) # W_{attProj} self.combined_output_projection = nn.Linear(self.hidden_size * 3, self.hidden_size, bias=False) # W_{u} self.target_vocab_projection = nn.Linear(self.hidden_size, len(self.vocab.tgt), bias=False) # W_vocab self.dropout = nn.Dropout(p=self.dropout_rate)