def log_softmax_lose(): input_size = 784 hidden_sizes = [128, 64] output_size = 10 # Build a feed-forward network model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]), nn.ReLU(), nn.Linear(hidden_sizes[0], hidden_sizes[1]), nn.ReLU(), nn.Linear(hidden_sizes[1], output_size), nn.LogSoftmax(dim=1)) # Define the loss criterion = nn.NLLLoss() trainloader, testloader, _ = get_mnist_loader() images, labels = next(iter(trainloader)) images = images.view(images.shape[0], -1) # Flatten images print(images.shape) # Forward pass, get our log-probabilities logps = model(images) # Calculate the loss with the logps and the labels loss = criterion(logps, labels) print(loss) print('Before backward pass: \n', model[0].weight.grad) loss.backward() print('After backward pass: \n', model[0].weight.grad)
def logit_lose(): input_size = 784 hidden_sizes = [128, 64] output_size = 10 # Build a feed-forward network model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]), nn.ReLU(), nn.Linear(hidden_sizes[0], hidden_sizes[1]), nn.ReLU(), nn.Linear(hidden_sizes[1], output_size)) # Define the loss criterion = nn.CrossEntropyLoss() trainloader, testloader, _ = get_mnist_loader() images, labels = next(iter(trainloader)) images = images.view(images.shape[0], -1) # Flatten images # Forward pass, get our logits logits = model(images) # Calculate the loss with the logits and the labels loss = criterion(logits, labels) print(loss)
x = self.dropout(F.relu(self.fc1(x))) # 随机丢掉一些net x = self.dropout(F.relu(self.fc2(x))) x = self.dropout(F.relu(self.fc3(x))) x = F.log_softmax(self.fc4(x), dim=1) return x if __name__ == '__main__': epochs = 5 batch_size = 64 model = Classifier() criterion: nn.NLLLoss = nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=0.003) train_loader, test_loader, valid_loader = get_mnist_loader( batch_size=batch_size, valid_size=0.2) train_losses, valid_losses = [], [] valid_loss_min = np.Inf # set initial "min" to infinity for e in range(epochs): tot_train_loss = 0 """train the model""" model.train() # prep model for training for data, target in train_loader: # clear the gradients of all optimized variables optimizer.zero_grad() # forward pass: compute predicted outputs by passing inputs to the model output = model(data) # calculate the loss loss = criterion(output, target) # backward pass: compute gradient of the loss with respect to model parameters
x = self.pool(x) # add second hidden layer x = F.relu(self.conv2(x)) x = self.pool(x) # compressed representation # decode # add transpose conv layers, with relu activation function x = F.relu(self.t_conv1(x)) # output layer (with sigmoid for scaling from 0 to 1) x = torch.sigmoid(self.t_conv2(x)) return x if __name__ == '__main__': batch_size = 20 train_loader, test_loader, valid_loader = get_mnist_loader( batch_size=batch_size, is_norm=False) model = ConvAutoEncoder(encoding_dim=32) print(model) """comparing pixel values in input and output images, it's best to use a loss that meant for a regression task""" criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) n_epochs = 20 for epoch in range(1, n_epochs + 1): train_loss = 0.0 for data in train_loader: images, _ = data optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, images)
from book.pytorch.utils.helper import get_mnist_loader if __name__ == '__main__': input_size = 784 hidden_sizes = [128, 64] output_size = 10 model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]), nn.ReLU(), nn.Linear(hidden_sizes[0], hidden_sizes[1]), nn.ReLU(), nn.Linear(hidden_sizes[1], output_size), nn.LogSoftmax(dim=1)) criterion = nn.NLLLoss() optimizer = optim.SGD(model.parameters(), lr=0.003) trainloader, testloader, _ = get_mnist_loader() epochs = 5 for e in range(epochs): running_loss = 0 for images, labels in trainloader: # Flatten MNIST images into a 784 long vector images = images.view(images.shape[0], -1) optimizer.zero_grad() output = model(images) loss = criterion(output, labels) loss.backward() optimizer.step() running_loss += loss.item()
def weights_init_uniform_rule(m): classname = m.__class__.__name__ # for every Linear layer in a model.. if classname.find('Linear') != -1: # get the number of the inputs n = m.in_features y = 1.0 / np.sqrt(n) m.weight.data.uniform_(-y, y) m.bias.data.fill_(0) if __name__ == '__main__': num_workers = 0 batch_size = 64 valid_size = 0.2 train_loader, test_loader, valid_loader = get_mnist_loader( batch_size, valid_size, num_workers) # model_0 = Net(constant_weight=0) # model_1 = Net(constant_weight=1) # model_list = [(model_0, 'All Zeros'), (model_1, 'All Ones')] # compare_init_weights(model_list, 'All Zeros vs All Ones', train_loader, valid_loader) model_uniform = Net() model_uniform.apply(weights_init_uniform) compare_init_weights([(model_uniform, 'Uniform Weights')], 'Uniform', train_loader, valid_loader) """ Good practice is to start your weights in the range of [-y, y] where y = 1 / sqrt(n) """ model_centered = Net() model_centered.apply(weights_init_uniform_center) model_rule = Net() model_rule.apply(weights_init_uniform_rule)