def init_lgm(gm_type, input_size, encoder_layers, latent_size, decoder_layers=None): encoder_layers += [('linear', [2 * latent_size])] encoder = FeedForward(input_size, encoder_layers, flatten=False) if decoder_layers is None: decoder_layers = encoder_layers[:-1] decoder_layers.append(('linear', [latent_size])) decoder_layers = transpose_layer_defs(decoder_layers, input_size) decoder = FeedForward(latent_size, decoder_layers, flatten=True) return LGM(latent_size, encoder, decoder)
def feedforward(config: BaseConfig, trial: optuna.trial.Trial) -> pl.LightningModule: """Returns a tunable PyTorch lightning feedforward module. Args: config (BaseConfig): the hard-coded configuration. trial (optuna.Trial): optuna trial. Returns: pl.LightningModule: a lightning module. """ model = FeedForward(num_inputs=config.NUM_INPUTS, num_outputs=config.NUM_OUTPUTS, num_hidden=trial.suggest_int('num_hidden', 1, 4), num_layers=trial.suggest_int('num_layers', 1, 2), dropout=trial.suggest_float('dropout', 0.0, 0.5), activation=trial.suggest_categorical( 'activation', ['relu', 'none'])) training_config = get_training_config( lr=trial.suggest_loguniform('lr', 1e-3, 1e-0), weight_decay=trial.suggest_loguniform('weight_decay', 1e-5, 1e-1), max_epochs=config.MAX_EPOCHS) pl_model = TemporalConvNet(training_config=training_config, lr=trial.suggest_loguniform('lr', 1e-3, 1e-0), weight_decay=trial.suggest_loguniform( 'weight_decay', 1e-5, 1e-1), max_epochs=config.MAX_EPOCHS) return pl_model
def init_decoder(latent_size, decoder_layers, img_size, noisy_latents=True, learn_latent_stats=False): output_layer = decoder_layers[-1] output_layer_name, output_layer_args = output_layer[:2] n_channels = img_size[0] if output_layer_name == 'tconv': output_layer_args = [n_channels] + output_layer_args[1:] elif output_layer_name == 'linear': output_layer_args = ([n_channels * output_layer_args[0]] + output_layer_args[1:]) output_layer = [output_layer_name, output_layer_args] + output_layer[2:] decoder_layers = decoder_layers[:-1] + [output_layer] decoder = FeedForward(latent_size, decoder_layers) # noise_layer = GaussianNoise(0.01) # decoder_layers.insert(0, noise_layer) return decoder
def __init__(self, n_actions, latent_size): super().__init__() self.n_actions = n_actions self.latent_size = latent_size self.projection = FeedForward(2 * latent_size + n_actions, [('linear', [128]), ('relu', ), ('batch_norm', [1]), ('linear', [latent_size])])
def init_composer(gm_type, n_actions, input_size, encoder_layers, latent_size, mixing_layer=None, decoder_layers=None): encoder_layers += [('linear', [2 * latent_size])] encoder = FeedForward(input_size, encoder_layers, flatten=False) if decoder_layers is None: decoder_layers = encoder_layers[:-1] decoder_layers.append(('linear', [latent_size])) decoder_layers = transpose_layer_defs(decoder_layers, input_size) decoder = FeedForward(latent_size, decoder_layers, flatten=True) lgm = LGM(latent_size, encoder, decoder) return Composer(latent_size, n_actions, lgm)
def main(): # Parse Arguments --> Convert from Namespace --> Dict --> Namespace because of weird WandB Bug args = Namespace(**ArgumentParser().parse_args().as_dict()) # Create Logger if args.run_name is None: run_name = "%s-%s-%d-%.1g" % (args.model, args.opt, args.bsz, args.lr) + '+' + \ datetime.now().strftime('%m/%d-[%H:%M]') else: run_name = args.run_name + '+' + datetime.now().strftime( '%m/%d-[%H:%M]') wandb = WandbLogger(name=run_name, save_dir=args.save_dir, project=args.project, offline=not args.sync) # Create MNIST Module if args.model == 'feedforward': nn = FeedForward(args) elif args.model == 'cnn': nn = CNN(args) # Prepare Data and Populate Data Loader nn.prepare_data() nn.train_dataloader() # Create Trainer trainer = pl.Trainer(default_save_path=args.save_dir, max_epochs=10, logger=wandb, gpus=args.gpus) # Watch Histogram of Gradients wandb.experiment.watch(nn, log='gradients', log_freq=100) # Fit trainer.fit(nn)
def __init__(self, features: List[str], targets: List[str], norm: Normalize, ds: xr.Dataset, q10_init: int = 1.5, hidden_dim: int = 128, num_layers: int = 2, learning_rate: float = 0.01, weight_decay: float = 0.1, dropout: float = 0., activation: bool = 'tanh', num_steps: int = 0) -> None: """Hybrid Q10 model. Note that restoring is not working currently as the model training is only taking some minutes. """ super().__init__() self.save_hyperparameters('features', 'targets', 'q10_init', 'hidden_dim', 'num_layers', 'dropout', 'activation', 'learning_rate', 'weight_decay') self.features = features self.targets = targets self.q10_init = q10_init self.input_norm = norm.get_normalization_layer(variables=self.features, invert=False, stack=True) self.nn = FeedForward( num_inputs=len(self.features), num_outputs=len(self.targets), num_hidden=hidden_dim, num_layers=num_layers, dropout=dropout, dropout_last=False, activation=activation, ) self.target_norm = norm.get_normalization_layer(variables=self.targets, invert=False, stack=True) self.target_denorm = norm.get_normalization_layer( variables=self.targets, invert=True, stack=True) self.criterion = torch.nn.MSELoss() self.q10 = torch.nn.Parameter(torch.ones(1) * self.q10_init) self.ta_ref = 15.0 self.num_steps = num_steps # Used for strring results. self.ds = ds # Error if more than 100000 steps--ok here, but careful if you copy code for other projects!. self.q10_history = np.zeros(100000, dtype=np.float32) * np.nan
class Q10Model(pl.LightningModule): def __init__(self, features: List[str], targets: List[str], norm: Normalize, ds: xr.Dataset, q10_init: int = 1.5, hidden_dim: int = 128, num_layers: int = 2, learning_rate: float = 0.01, weight_decay: float = 0.1, dropout: float = 0., activation: bool = 'tanh', num_steps: int = 0) -> None: """Hybrid Q10 model. Note that restoring is not working currently as the model training is only taking some minutes. """ super().__init__() self.save_hyperparameters('features', 'targets', 'q10_init', 'hidden_dim', 'num_layers', 'dropout', 'activation', 'learning_rate', 'weight_decay') self.features = features self.targets = targets self.q10_init = q10_init self.input_norm = norm.get_normalization_layer(variables=self.features, invert=False, stack=True) self.nn = FeedForward( num_inputs=len(self.features), num_outputs=len(self.targets), num_hidden=hidden_dim, num_layers=num_layers, dropout=dropout, dropout_last=False, activation=activation, ) self.target_norm = norm.get_normalization_layer(variables=self.targets, invert=False, stack=True) self.target_denorm = norm.get_normalization_layer( variables=self.targets, invert=True, stack=True) self.criterion = torch.nn.MSELoss() self.q10 = torch.nn.Parameter(torch.ones(1) * self.q10_init) self.ta_ref = 15.0 self.num_steps = num_steps # Used for strring results. self.ds = ds # Error if more than 100000 steps--ok here, but careful if you copy code for other projects!. self.q10_history = np.zeros(100000, dtype=np.float32) * np.nan def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: # Note that `x` is a dict of features and targets, input_norm extracts *only* features and stacks # them along last dimension. z = self.input_norm(x) # Forward pass through NN. z = self.nn(z) # No denormalization done currently. rb = torchf.softplus(z) # Physical part. reco = rb * self.q10**(0.1 * (x['ta'] - self.ta_ref)) return reco, rb def criterion_normed(self, y_hat: torch.Tensor, y: torch.Tensor) -> torch.Tensor: """Calculate criterion on normalized predictions and target.""" return self.criterion(self.target_norm(y_hat), self.target_norm(y)) def training_step(self, batch: Dict[str, torch.Tensor], batch_idx: int) -> torch.Tensor: # Split batch (a dict) into actual data and the time-index returned by the dataset. batch, _ = batch # self(...) calls self.forward(...) with some extras. The `rb` is not needed here. reco_hat, _ = self(batch) # Calculate loss on normalized data. loss = self.criterion_normed(reco_hat, batch['reco']) # Save Q10 values, we want to know how they evolve with training, self.q10_history[self.global_step] = self.q10.item() # Logging. self.log('train_loss', loss, on_step=False, on_epoch=True, prog_bar=False, logger=True) self.log('q10', self.q10, on_step=True, on_epoch=False, prog_bar=False, logger=True) return loss def validation_step(self, batch: Dict[str, torch.Tensor], batch_idx: int) -> Dict[str, torch.Tensor]: # Predictions are stored in validation step. This is not best practice, but we are more interested # in predictions over training than on the final test predictions here. # Split batch (a dict) into actual data and the time-index returned by the dataset. batch, idx = batch # self(...) calls self.forward(...) with some extras. The `rb` is not needed here. reco_hat, rb_hat = self(batch) # Calculate loss on normalized data. loss = self.criterion_normed(reco_hat, batch['reco']) # Calculate loss on normalized data. self.log('valid_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True) # This dict is available in `validation_epoch_end`. return {'reco_hat': reco_hat, 'rb_hat': rb_hat, 'idx': idx} def validation_epoch_end(self, validation_step_outputs) -> None: # Iterate results from each validation step. for item in validation_step_outputs: reco_hat = item['reco_hat'][:, 0].cpu() rb_hat = item['rb_hat'][:, 0].cpu() idx = item['idx'].cpu() # Assign predictions to the right time steps. self.ds['reco_pred'].values[self.current_epoch, idx] = reco_hat self.ds['rb_pred'].values[self.current_epoch, idx] = rb_hat def test_step(self, batch: Dict[str, torch.Tensor], batch_idx: int) -> Dict[str, torch.Tensor]: # Evaluation on test set. batch, _ = batch reco_hat, _ = self(batch) loss = self.criterion_normed(reco_hat, batch['reco']) self.log('test_loss', loss) def configure_optimizers(self) -> torch.optim.Optimizer: optimizer = torch.optim.AdamW([{ 'params': self.nn.parameters(), 'weight_decay': self.hparams.weight_decay, 'learning_rate': self.hparams.learning_rate }, { 'params': [self.q10], 'weight_decay': 0.0, 'learning_rate': self.hparams.learning_rate * 10 }]) return optimizer @staticmethod def add_model_specific_args( parent_parser: ArgumentParser) -> ArgumentParser: parser = ArgumentParser(parents=[parent_parser], add_help=False) parser.add_argument('--hidden_dim', type=int, default=16) parser.add_argument('--num_layers', type=int, default=2) parser.add_argument('--c', type=float, default=0.1) parser.add_argument('--learning_rate', type=float, default=0.05) parser.add_argument('--weight_decay', type=float, default=0.1) return parser