def validation_step(self, batch, batch_idx): x, y = batch y_hat = self.model(x) loss = F.cross_entropy(y_hat, y) pred = ... return {"loss": loss, "pred": pred}
def validation_step(self, batch, batch_idx): x, y = batch y_hat = self.model(x) val_loss = F.cross_entropy(y_hat, y) self.log("val_loss", val_loss) pred = ... return pred # <- this is the new line here
def attention(query, key, value, mask=None, dropout=None): """Application of generalised attention [Inputs] query : standard query matrix of size:(None, no_query, head_dim) key : standary key matrix of size : (None, no_keys, head_dim values : standardn value matrix of size : (None, no_keys=no_values, model_dim) mask : mask matrix of shape (None, no_query, no_keys) dropout : dropout rate [Output] context_vectors : context results after attention of size : (None, no_query, model_dim) p_attn : matrix of attention probabilities to help in visualisation of size : (None, no_query, no_keys)""" d_k = query.size(-1) scores = torch.matmul(quer, key.transpose(-2,-1)) / math.sqrt(d_k) if mask is not None: scores = scores.masked_fill(mask == 0, -1e9) p_attn = F.softmax(scores, dim=-1) if dropout is not None: p_attn = dropout(p_attn) return torch.matmul(p_attn, value), p_attn
def training_step(self, batch, batch_idx): x, y = batch y_hat = self.model(x) loss = F.cross_entropy(y_hat, y) # logs metrics for each training_step # and the average across each epoch, to the logger and progress-bar self.log("train_loss", loss, on_step=True, on_epoch=True, logger=True, prog_bar=True) return loss
def forward(self, x): x = F.max_pool2d(F.relu(self.conv1(x), (2, 2))) x = F.max_pool2d(F.relu(self.conv2(x)), 2) x = x.view(-1, self.num_flat_features(x)) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x
def forward(self, sentence): concatenated_output_state = [] word_indices = [] for word in sentence: self.char_hidden_state = self.init_hidden( ) # Refresh hidden state, detaching it from the earlier sequence character_indices = word[ 1] # This has already been wrapped as a Torch.LongTensor character_level_embeddings = self.character_embeddings( character_indices ) # Use the tensor to index into the lookup table output_lstm, self.char_hidden_state = self.character_lstm( character_level_embeddings.view(len(sentence), 1, EMBEDDING_DIM), self.char_hidden_state) concatenated_output_state.append( output_lstm) # Append LSTM state to the list word_indices.append(word[0]) concatenated_output_state = torch.unsqueeze( concatenated_output_state, 0) # Convert to a tensor, add an extra first dimension word_embeddings = self.word_embeddings( torch.tensor(word_indices, dtype=torch.long).view(len(sentence), 1, EMBEDDING_DIM)) concatenated_characters_and_words = torch.cat( (word_embeddings, concatenated_output_state), len(list(word_embeddings.size())) - 1) # Concatenate the tensors along their last axis lstm_output_state, self.hidden = self.word_lstm( concatenated_characters_and_words, self.hidden) tag_space = self.hidden2tag(lstm_output_state.view( len(sentence), -1)) # A Linear layer mapping from tag space to scores tag_scores = F.log_softmax( tag_space, dim=None ) # Softmax along all dimensions. Log_softmax is required for NLLLoss return tag_scores
def forward(self, track): for note in track: gate_input = torch.cat(note, self.state) forget = F.sigmoid(self.forgetgate(gate_input)) self.state *= forget inp = F.sigmoid(self.inputgate(gate_input)) candidates = F.tanh(self.cadidate_gen(note)) self.state += inp * candidates output = F.relu(self.outputlayer1(self.state)) output = F.relu(self.outputlayer2(output)) return F.log_softmax(output)
def forward(self, idx, targets=None): b, t = idx.size() # t -> len of seq # b -> Batch Size assert t <= self.block_size, "exhausted the block size" token_embeddings = self.tok_emb(idx) position_embeddings = self.pos_emb[:,:t,:] x = self.drop(token_embeddings + position_embeddings) x = self.blocks(x) x = self.ln_f(x) logits = self.head(x) loss = None if targets is not None: loss = F.cross_entropy( logits.view(-1, logits.view(-1), target.view(-1)) ) return logits, loss
def forward(self, x, layer_past=None): B, T, C = x.size() # | B -> Batch # | T -> Time step (sequence len) # | C -> Embedding Dim # B x nh x T x hs k = self.key(x).view(B,T, self.n_head, C // self.n_head).transpose(1,2) q = self.query(x).view(B,T, self.n_head, C // self.n_head).transpose(1,2) v = self.value(x).view(B,T, self.n_head, C // self.n_head).transpose(1,2) # How does tensor multiplication works? Like how to check # if two tensors are compatible for tensor multiplication att = (q @ k.transpose(-2,-1)) * (1.0 / math.sqrt(k.size(-1))) att = att.masked_fill(self.mask[:,:,:T,:T]==0, float('-inf')) att = F.softmax(att, dim=1) att = self.attn_drop(att) y = att @ v # (B, nh, T, T) x (B,nh,T,hs) => (B, nh, T, hs) y = y.transpose(1,2).contiguous().view(B,T,C) y = self.resid_drop(self.proj(y)) return y
def forward(self, x): ## Define forward behavior x = F.relu(self.conv1(x)) # takes 224*224 x = self.maxpool(x) # gives 112*112 x = F.relu(self.conv2(x)) # takes 112*112 x = self.maxpool(x) # gives 56*56 x = F.relu(self.conv3(x)) # takes 56*56 x = self.maxpool(x) # gives 28*28 x = F.relu(self.conv4(x)) # takes 28*28 x = self.maxpool(x) # gives 14*14 x = F.relu(self.conv5(x)) # takes 14*14 x = self.maxpool(x) # gives 7*7 x = x.view(-1, 288 * 7 * 7) # flattening output of convolutional part x = self.dropout(x) x = self.dropout(F.relu(self.fc1(x))) x = self.dropout(F.relu(self.fc2(x))) x = self.fc3( x) # dropout and activation function is not used on last layer return x
def training_step(self, batch, batch_idx): x, y = batch y_hat = self.model(x) loss = F.cross_entropy(y_hat, y) preds = ... return {"loss": loss, "preds": preds}
def training_step(self, batch, batch_idx): x, y = batch y_hat = self.model(x) loss = F.cross_entropy(y_hat, y) return loss
def forward(self, x): x = F.relu(self.linear1(x)) x = F.relu(self.linear2(x)) x = F.relu(self.linear3(x)) return self.linear4(x)
def forward(self, x): x = F.relu(self.bn1(self.conv1(x))) x = F.relu(self.bn2(self.conv2(x))) x = F.relu(self.bn3(self.conv3(x))) return self.head(x.view(x.size()[0], -1))
def forward(self, x): """Performs generator step on input""" return F.log_softmax(self.proj(x), dim=-1)
def forward(self, x): """Performs the feed forward""" return self.w_2(self.dropout(F.relu(self.w_1(x))))
def encode(self, x): h1 = F.relu(self.fc1(x)) return self.fc21(h1), self.fc22(h1)
def forward(self, x): return F.hardtanh(x)
def decode(self, z): # z is the intermediary code, constructed by sigma and mu h3 = F.relu(self.fc3(z)) # sigmod let output in [-1, 1] return F.sigmod(self.fc4(h3))
unflow = vn * -3 # if a tile is adjacent to both flows, nothing will flow there weights = [ [ [flow], [unflow], [zero], [zero], [zero] ], [ [flow], [unflow], [zero], [zero], [zero] ], ] self.conv.init() self.conv.weight = torch.nn.Parameter(weights, required_grad=False) hardtanh = F.hardtanh(min_val=0) def forward(obs): '''obs: 2D image, suppose channel 0 indicates the poweredness of a tile, and channel 1 indicates the presence of a zone.''' powered = obs[0] zone = obs[1] return hardtanh(self.conv(obs))
def validation_step(self, batch, batch_idx): x, y = batch y_hat = self.model(x) val_loss = F.cross_entropy(y_hat, y) self.log("val_loss", val_loss)