generator: Generator = 'default' encoder_decoder: EncoderDecoder @option(TransformerConfigs.feed_forward, 'default') def _feed_forward(c: TransformerConfigs): return FeedForward(c.d_model, c.d_ff, c.dropout) ### MHA def _mha(c: TransformerConfigs): return MultiHeadAttention(c.n_heads, c.d_model) calculate(TransformerConfigs.encoder_attn, 'mha', _mha) calculate(TransformerConfigs.decoder_attn, 'mha', _mha) calculate(TransformerConfigs.decoder_mem_attn, 'mha', _mha) ### Relative MHA def _relative_mha(c: TransformerConfigs): from .relative_mha import RelativeMultiHeadAttention return RelativeMultiHeadAttention(c.n_heads, c.d_model) calculate(TransformerConfigs.encoder_attn, 'relative', _relative_mha) calculate(TransformerConfigs.decoder_attn, 'relative', _relative_mha) calculate(TransformerConfigs.decoder_mem_attn, 'relative', _relative_mha)
x = self.layers(x) return x.view(x.shape[0], -1) def _weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: nn.init.normal_(m.weight.data, 0.0, 0.02) elif classname.find('BatchNorm') != -1: nn.init.normal_(m.weight.data, 1.0, 0.02) nn.init.constant_(m.bias.data, 0) # We import the [simple gan experiment]((simple_mnist_experiment.html) and change the # generator and discriminator networks calculate(Configs.generator, 'cnn', lambda c: Generator().to(c.device)) calculate(Configs.discriminator, 'cnn', lambda c: Discriminator().to(c.device)) def main(): conf = Configs() experiment.create(name='mnist_dcgan') experiment.configs(conf, {'discriminator': 'cnn', 'generator': 'cnn', 'label_smoothing': 0.01}) with experiment.start(): conf.run() if __name__ == '__main__':
tracker.add('accuracy.valid', accuracy(output[idx_valid], labels[idx_valid])) # Save logs tracker.save() @option(Configs.dataset) def cora_dataset(c: Configs): """ Create Cora dataset """ return CoraDataset(c.include_edges) # Get the number of classes calculate(Configs.n_classes, lambda c: len(c.dataset.classes)) # Number of features in the input calculate(Configs.in_features, lambda c: c.dataset.features.shape[1]) @option(Configs.model) def gat_model(c: Configs): """ Create GAT model """ return GAT(c.in_features, c.n_hidden, c.n_classes, c.n_heads, c.dropout).to(c.device) @option(Configs.optimizer) def _optimizer(c: Configs): """
[transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, ))]) @option(Configs.batch_step) def gan_batch_step(c: Configs): return GANBatchStep(discriminator=c.discriminator, generator=c.generator, discriminator_optimizer=c.discriminator_optimizer, generator_optimizer=c.generator_optimizer, discriminator_loss=c.discriminator_loss, generator_loss=c.generator_loss, discriminator_k=c.discriminator_k) calculate(Configs.generator, 'mlp', lambda c: Generator().to(c.device)) calculate(Configs.discriminator, 'mlp', lambda c: Discriminator().to(c.device)) calculate(Configs.generator_loss, lambda c: GeneratorLogitsLoss(c.label_smoothing).to(c.device)) calculate(Configs.discriminator_loss, lambda c: DiscriminatorLogitsLoss(c.label_smoothing).to(c.device)) @option(Configs.discriminator_optimizer) def _discriminator_optimizer(c: Configs): opt_conf = OptimizerConfigs() opt_conf.optimizer = 'Adam' opt_conf.parameters = c.discriminator.parameters() opt_conf.learning_rate = 2.5e-4 # Setting exponent decay rate for first moment of gradient, # $\beta_`$ to `0.5` is important.
@option(Configs.model) def cnn_model(c: Configs): return CnnModel(price_mean=c.train_dataset.price_mean, price_std=c.train_dataset.price_std, volume_mean=c.train_dataset.volume_mean, volume_std=c.train_dataset.volume_std, y_mean=c.train_dataset.y_mean, y_std=c.train_dataset.y_std, activation=c.activation, conv_sizes=c.conv_sizes, dropout=c.dropout).to(c.device) calculate(Configs.activation, 'relu', [], lambda: nn.ReLU()) calculate(Configs.activation, 'sigmoid', [], lambda: nn.Sigmoid()) def main(): experiment.create() conf = Configs() conf.activation = 'relu' conf.dropout = 0.1 experiment.configs(conf, {'conv_sizes': [(128, 2), (256, 4)], 'optimizer.learning_rate': 1e-4, 'optimizer.optimizer': 'Adam'}) with experiment.start(): with monit.section('Initialize'):
tracker.add(f'loss.{self.seq_len - 1}.', self.loss_func(output[self.seq_len - 1], target[self.seq_len - 1])) # Log the loss at the first token tracker.add(f'loss.0.', self.loss_func(output[0], target[0])) # Log the loss at the final token tracker.add(f'loss.{int(output.shape[0]) - 1}.', self.loss_func(output[-1], target[-1])) def _alibi_mha(c: TransformerConfigs): """ Create an ALiBi attention module """ return AlibiMultiHeadAttention(c.n_heads, c.d_model, dropout_prob=c.dropout) # Set all attention mechanisms to ALiBi calculate(TransformerConfigs.encoder_attn, 'alibi_mha', _alibi_mha) calculate(TransformerConfigs.decoder_attn, 'alibi_mha', _alibi_mha) calculate(TransformerConfigs.decoder_mem_attn, 'alibi_mha', _alibi_mha) @option(Configs.valid_loader) def shuffled_longer_valid_loader(c: Configs): """ Shuffled validation data loader with `valid_seq_len` sequence length """ return DataLoader(SequentialUnBatchedDataset(text=c.text.valid, dataset=c.text, seq_len=c.valid_seq_len), batch_size=c.batch_size, collate_fn=transpose_batch, shuffle=True)
summary: This experiment generates MNIST images using convolutional neural network. --- # WGAN experiment with MNIST """ from labml import experiment from labml.configs import calculate # Import configurations from [DCGAN experiment](../dcgan/index.html) from labml_nn.gan.dcgan import Configs # Import [Wasserstein GAN losses](./index.html) from labml_nn.gan.wasserstein import GeneratorLoss, DiscriminatorLoss # Set configurations options for Wasserstein GAN losses calculate(Configs.generator_loss, 'wasserstein', lambda c: GeneratorLoss()) calculate(Configs.discriminator_loss, 'wasserstein', lambda c: DiscriminatorLoss()) def main(): # Create configs object conf = Configs() # Create experiment experiment.create(name='mnist_wassertein_dcgan', comment='test') # Override configurations experiment.configs( conf, { 'discriminator': 'cnn', 'generator': 'cnn', 'label_smoothing': 0.01,
""" from labml import experiment from labml.configs import option, calculate from labml_nn.transformers import TransformerConfigs from labml_nn.transformers.basic.autoregressive_experiment import AutoregressiveTransformer, Configs # ### Rotary PE attention def _rotary_pe_mha(c: TransformerConfigs): from labml_nn.transformers.rope import RotaryPEMultiHeadAttention return RotaryPEMultiHeadAttention(c.n_heads, c.d_model) # Configuration options calculate(TransformerConfigs.encoder_attn, 'rotary', _rotary_pe_mha) calculate(TransformerConfigs.decoder_attn, 'rotary', _rotary_pe_mha) calculate(TransformerConfigs.decoder_mem_attn, 'rotary', _rotary_pe_mha) @option(Configs.model, 'rotary_pe_transformer') def _model(c: Configs): """ Create an autoregressive model and initialize weights """ m = AutoregressiveTransformer(c.transformer.encoder, c.transformer.src_embed, c.transformer.generator).to(c.device) return m
from labml.configs import BaseConfigs, option, calculate from labml.internal.configs.processor import ConfigProcessor class Sample(BaseConfigs): total_global_steps: int = 10 workers_count: int = 12 input_model: int model: int @option(Sample.input_model) def input_model(c: Sample): return c.total_global_steps * 2 calculate(Sample.model, [Sample.workers_count], lambda x: x * 5) configs = Sample() processor = ConfigProcessor(configs) processor() processor.print() print(configs.__dict__)