def test_learning_rate_update(self): """Check the learning rate update is applied to tile.""" loss_func = mse_loss x_b = Tensor([[0.1, 0.2], [0.2, 0.4]]) y_b = Tensor([[0.3], [0.6]]) layer1 = self.get_layer(2, 3) layer2 = self.get_layer(3, 1) model = Sequential(layer1, layer2) if self.use_cuda: x_b = x_b.cuda() y_b = y_b.cuda() model = model.cuda() opt = AnalogSGD(model.parameters(), lr=0.5) opt.regroup_param_groups(model) new_lr = 0.07 for param_group in opt.param_groups: param_group['lr'] = new_lr pred = model(x_b) loss = loss_func(pred, y_b) loss.backward() opt.step() if not layer1.analog_tile.get_analog_ctx().use_torch_update: self.assertAlmostEqual(layer1.analog_tile.get_learning_rate(), new_lr)
def create_sgd_optimizer(model): """Create the analog-aware optimizer. Args: model (nn.Module): model to be trained. """ optimizer = AnalogSGD(model.parameters(), lr=0.05) optimizer.regroup_param_groups(model) return optimizer
def create_sgd_optimizer(model, learning_rate): """Create the analog-aware optimizer. Args: model (nn.Module): model to be trained learning_rate (float): global parameter to define learning rate """ optimizer = AnalogSGD(model.parameters(), lr=learning_rate) optimizer.regroup_param_groups(model) return optimizer
def train_model(model, loss_func, x_b, y_b): """Train the model.""" opt = AnalogSGD(model.parameters(), lr=0.1) opt.regroup_param_groups(model) epochs = 10 for _ in range(epochs): opt.zero_grad() pred = model(x_b) loss = loss_func(pred, y_b) loss.backward() opt.step()
def get_optimizer(self, learning_rate: float, model: Module) -> Optimizer: """Return the `Optimizer` for the experiment. Args: learning_rate: the learning rate used by the optimizer. model: the neural network to be trained. Returns: the optimizer to be used in the experiment. """ optimizer = AnalogSGD(model.parameters(), lr=learning_rate) optimizer.regroup_param_groups(model) return optimizer
def test_learning_rate_update_fn(self): """Check the learning rate update is applied to tile.""" layer1 = self.get_layer(2, 3) layer2 = self.get_layer(3, 1) model = Sequential(layer1, layer2) if self.use_cuda: model = model.cuda() opt = AnalogSGD(model.parameters(), lr=0.5) opt.regroup_param_groups(model) new_lr = 0.07 opt.set_learning_rate(new_lr) self.assertAlmostEqual(layer1.analog_tile.get_learning_rate(), new_lr) self.assertAlmostEqual(layer2.analog_tile.get_learning_rate(), new_lr)
def train_once(model, y_in, y_out, analog_if, use_cuda=False): """Train once.""" criterion = MSELoss() optimizer = AnalogSGD(model.parameters(), lr=0.5, momentum=0.0, nesterov=0.0) optimizer.regroup_param_groups(model) if analog_if: # why is this format so difference? # TODO: better use same state format as for native Pytorch's LSTM? if use_cuda: states = [ LSTMState( zeros(y_in.size()[1], model.hidden_size).cuda(), zeros(y_in.size()[1], model.hidden_size).cuda()) for _ in range(model.num_layers) ] else: states = [ LSTMState(zeros(y_in.size()[1], model.hidden_size), zeros(y_in.size()[1], model.hidden_size)) for _ in range(model.num_layers) ] else: if use_cuda: states = (zeros(model.num_layers, y_in.size()[1], model.hidden_size).cuda(), zeros(model.num_layers, y_in.size()[1], model.hidden_size).cuda()) else: states = (zeros(model.num_layers, y_in.size()[1], model.hidden_size), zeros(model.num_layers, y_in.size()[1], model.hidden_size)) for _ in range(2): optimizer.zero_grad() pred, _ = model(y_in, states) loss = criterion(pred.mean(axis=2, keepdim=True), y_out) loss.backward() optimizer.step() return pred.detach().cpu().numpy()
def get_model_and_x(self): """Trains a simple model.""" # Prepare the datasets (input and expected output). x = Tensor([[0.1, 0.2, 0.4, 0.3], [0.2, 0.1, 0.1, 0.3]]) y = Tensor([[1.0, 0.5], [0.7, 0.3]]) # Define a single-layer network, using a constant step device type. rpu_config = self.get_rpu_config() rpu_config.forward.out_res = -1. # Turn off (output) ADC discretization. rpu_config.forward.w_noise_type = WeightNoiseType.ADDITIVE_CONSTANT rpu_config.forward.w_noise = 0.02 rpu_config.noise_model = PCMLikeNoiseModel(g_max=25.0) model = AnalogLinear(4, 2, bias=True, rpu_config=rpu_config) # Move the model and tensors to cuda if it is available. if self.use_cuda: x = x.cuda() y = y.cuda() model.cuda() # Define an analog-aware optimizer, preparing it for using the layers. opt = AnalogSGD(model.parameters(), lr=0.1) opt.regroup_param_groups(model) for _ in range(100): opt.zero_grad() # Add the training Tensor to the model (input). pred = model(x) # Add the expected output Tensor. loss = mse_loss(pred, y) # Run training (backward propagation). loss.backward() opt.step() return model, x
def main(): """Train a PyTorch GAN analog model to generate fake characters alla MNIST dataset.""" # Make sure the directory where to save the results exist. # Results include examples of the fake images generated. os.makedirs(RESULTS, exist_ok=True) torch.manual_seed(SEED) # Load MNIST dataset as tensors. dataloader = DataLoader( MNIST(PATH_DATASET, download=True, transform=transforms.ToTensor()), batch_size=BATCH_SIZE, shuffle=True, ) print(f'\n{datetime.now().time().replace(microsecond=0)} --- ' f'Started GAN Example') gen = Generator(Z_DIM).to(DEVICE) gen_opt = AnalogSGD(gen.parameters(), lr=LR) gen_opt.regroup_param_groups(gen) disc = Discriminator().to(DEVICE) disc_opt = AnalogSGD(disc.parameters(), lr=LR) disc_opt.regroup_param_groups(disc) print(RPU_CONFIG) print(gen) print(disc) criterion = nn.BCEWithLogitsLoss() training_loop(gen, disc, gen_opt, disc_opt, criterion, dataloader, N_EPOCHS, DISPLAY_STEP) show_animation_fake_images() print(f'{datetime.now().time().replace(microsecond=0)} --- ' f'Completed GAN Example')
from aihwkit.simulator.rpu_base import cuda # Prepare the datasets (input and expected output). x = Tensor([[0.1, 0.2, 0.4, 0.3], [0.2, 0.1, 0.1, 0.3]]) y = Tensor([[1.0, 0.5], [0.7, 0.3]]) # Define a single-layer network, using a constant step device type. rpu_config = SingleRPUConfig(device=ConstantStepDevice()) model = AnalogLinear(4, 2, bias=True, rpu_config=rpu_config) # Move the model and tensors to cuda if it is available. if cuda.is_compiled(): x = x.cuda() y = y.cuda() model.cuda() # Define an analog-aware optimizer, preparing it for using the layers. opt = AnalogSGD(model.parameters(), lr=0.1) opt.regroup_param_groups(model) for epoch in range(100): # Add the training Tensor to the model (input). pred = model(x) # Add the expected output Tensor. loss = mse_loss(pred, y) # Run training (backward propagation). loss.backward() opt.step() print('Loss error: {:.16f}'.format(loss))
y_out = torch.stack(y_out_2d, dim=0).transpose(0, 1).unsqueeze(2) if WITH_EMBEDDING: if WITH_BIDIR: model = AnalogBidirRNNNetwork() else: model = AnalogRNNNetwork() else: if WITH_BIDIR: model = AnalogBidirRNNNetwork_noEmbedding() else: model = AnalogRNNNetwork_noEmbedding() model = model.to(DEVICE) optimizer = AnalogSGD(model.parameters(), lr=LEARNING_RATE) optimizer.regroup_param_groups(model) criterion = nn.MSELoss() # train losses = [] for i in range(EPOCHS): optimizer.zero_grad() pred, states = model(y_in, None) loss = criterion(pred, y_out) print('Epoch = %d: Train Perplexity = %f' % (i, np.exp(loss.detach().cpu().numpy()))) loss.backward() optimizer.step()