def __init__(self, input_dim=88, z_dim=100, emission_dim=100, transition_dim=200, rnn_dim=600, num_layers=1, rnn_dropout_rate=0.0, num_iafs=0, iaf_dim=50, use_cuda=False): super().__init__() # instantiate PyTorch modules used in the model and guide below self.emitter = Emitter(input_dim, z_dim, emission_dim) self.trans = GatedTransition(z_dim, transition_dim) self.combiner = Combiner(z_dim, rnn_dim) # dropout just takes effect on inner layers of rnn rnn_dropout_rate = 0. if num_layers == 1 else rnn_dropout_rate self.rnn = nn.RNN(input_size=input_dim, hidden_size=rnn_dim, nonlinearity='relu', batch_first=True, bidirectional=False, num_layers=num_layers, dropout=rnn_dropout_rate) # if we're using normalizing flows, instantiate those too self.iafs = [affine_autoregressive(z_dim, hidden_dims=[iaf_dim]) for _ in range(num_iafs)] self.iafs_modules = nn.ModuleList(self.iafs) # define a (trainable) parameters z_0 and z_q_0 that help define the probability # distributions p(z_1) and q(z_1) # (since for t = 1 there are no previous latents to condition on) self.z_0 = nn.Parameter(torch.zeros(z_dim)) self.z_q_0 = nn.Parameter(torch.zeros(z_dim)) # define a (trainable) parameter for the initial hidden state of the rnn self.h_0 = nn.Parameter(torch.zeros(1, 1, rnn_dim)) self.use_cuda = use_cuda # if on gpu cuda-ize all PyTorch (sub)modules if use_cuda: self.cuda()
def __init__(self, _c: "VAEConfig"): super().__init__() self._c = _c self.image_flatten_dim = _c.image_dim[0] * _c.image_dim[1] adam_params = { "lr": _c.init_lr, "betas": (0.96, 0.999), "clip_norm": 10.0, "lrd": 0.99996, "weight_decay": 2.0 } self.optimizer = ClippedAdam(adam_params) self.emitter = Decoder(_c.z_dim, _c.emitter_channel, dropout_p=_c.dropout_rate) self.trans = GatedTransition(_c.z_dim, _c.transition_dim) self.combiner = Combiner(_c.z_dim, _c.rnn_dim) self.crnn = ConvRNN(_c.image_dim, _c.rnn_dim, _c.rnn_layers, _c.dropout_rate, use_lstm=_c.use_lstm, channels=_c.crnn_channel) self.iafs = [ affine_autoregressive(_c.z_dim, hidden_dims=[_c.iaf_dim]) for _ in range(_c.num_iafs) ] self.iafs_modules = nn.ModuleList(self.iafs) self.z_0 = nn.Parameter(torch.zeros(_c.z_dim)) self.z_q_0 = nn.Parameter(torch.zeros(_c.z_dim)) self.h_0 = nn.Parameter(torch.zeros(1, 1, _c.rnn_dim)) if _c.use_lstm: self.c_0 = nn.Parameter(torch.zeros(1, 1, _c.rnn_dim)) self.cuda()
def test_affine_autoregressive_shapes(self): for stable in [True, False]: for shape in [(3, ), (3, 4), (3, 4, 2)]: input_dim = shape[-1] self._test_shape( shape, T.affine_autoregressive(input_dim, stable=stable))
def test_affine_autoregressive_inverses(self): for stable in [True, False]: for input_dim in [2, 5, 10]: self._test_inverse( input_dim, T.affine_autoregressive(input_dim, stable=stable))
def __init__(self, input_channels=1, z_channels=16, emission_channels=[32, 16], transition_channels=32, flatten_channels=[16, 32], rnn_input_dim=32, rnn_channels=32, kernel_size=3, height=100, width=100, num_layers=1, rnn_dropout_rate=0.0, num_iafs=0, iaf_dim=50, use_cuda=False): super().__init__() self.input_channels = input_channels self.rnn_input_dim = rnn_input_dim self.height = height self.width = width # Call functions self.emitter = Emitter(width, height, input_channels, z_channels, emission_channels, kernel_size) self.trans = GatedTransition(z_channels, transition_channels) self.combiner = Combiner(z_channels, rnn_channels) self.flatten = Flattener(width, height, input_channels, rnn_input_dim, flatten_channels, kernel_size) # Instantiate RNN if use_cuda: self.device = 'cuda' else: self.device = 'cpu' # Setup RNN rnn_dropout_rate = 0. if num_layers == 1 else rnn_dropout_rate self.rnn = nn.RNN(input_size=rnn_input_dim, hidden_size=rnn_channels, batch_first=True, bidirectional=False, num_layers=num_layers, dropout=rnn_dropout_rate) # Normalizing flows, Inverse Autoregressive Flows self.iafs = [ affine_autoregressive(z_channels, hidden_dims=[iaf_dim]) for _ in range(num_iafs) ] self.iafs_modules = nn.ModuleList(self.iafs) # Initiate parameters z_0 and z_q_0 to build the probability # distributions p(z_1) and q(z_1) self.z_0 = nn.Parameter(torch.zeros(z_channels)) self.z_q_0 = nn.Parameter(torch.zeros(z_channels)) # Initial hidden state of the rnn self.h_0 = nn.Parameter(torch.zeros(1, 1, rnn_channels)) # If we are on GPU self.use_cuda = use_cuda if use_cuda: self.cuda()
def __init__( self, latent_dim, num_item, hidden_dim=16, ability_merge='mean', conditional_posterior=False, generative_model='irt', num_iafs=0, iaf_dim=32, ): super().__init__() self.latent_dim = latent_dim self.ability_dim = latent_dim self.response_dim = 1 self.hidden_dim = hidden_dim self.num_item = num_item self.ability_merge = ability_merge self.conditional_posterior = conditional_posterior self.generative_model = generative_model self.num_iafs = num_iafs self.iaf_dim = iaf_dim self._set_item_feat_dim() self._set_irt_num() if self.num_iafs > 0: self.iafs = [ affine_autoregressive(self.latent_dim, hidden_dims=[self.iaf_dim]) for _ in range(self.num_iafs) ] self.iafs_modules = nn.ModuleList(self.iafs) if self.conditional_posterior: self.ability_encoder = ConditionalAbilityInferenceNetwork( self.ability_dim, self.response_dim, self.item_feat_dim, self.hidden_dim, ability_merge=self.ability_merge, ) else: self.ability_encoder = AbilityInferenceNetwork( self.ability_dim, self.response_dim, self.hidden_dim, ability_merge=self.ability_merge, ) self.item_encoder = ItemInferenceNetwork(self.num_item, self.item_feat_dim) if self.generative_model == 'link': self.decoder = LinkedIRT( irt_model=f'{self.irt_num}pl', hidden_dim=self.hidden_dim, ) elif self.generative_model == 'deep': self.decoder = DeepIRT( self.ability_dim, irt_model=f'{self.irt_num}pl', hidden_dim=self.hidden_dim, ) elif self.generative_model == 'residual': self.decoder = ResidualIRT( self.ability_dim, irt_model=f'{self.irt_num}pl', hidden_dim=self.hidden_dim, ) self.apply(self.weights_init)