def __init__(self, rbf_dim, dim): super(Interaction, self).__init__() self._dim = dim self.node_layer1 = nn.Linear(dim, dim, bias=False) self.cfconv = CFConv(rbf_dim, dim, Softplus(beta=0.5, threshold=14)) self.node_layer2 = nn.Sequential( nn.Linear(dim, dim), Softplus(beta=0.5, threshold=14), nn.Linear(dim, dim) )
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, bias=True): # Init torch module super(GNJConv2d, self).__init__() # Init conv params self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = _pair(kernel_size) self.stride = stride self.padding = padding self.dilation = dilation # Init filter latents self.weight_mu = Parameter(Tensor(out_channels, in_channels, *self.kernel_size)) self.weight_logvar = Parameter(Tensor(out_channels, in_channels, *self.kernel_size)) self.bias = bias self.bias_mu = Parameter(Tensor(out_channels)) if self.bias else None self.bias_logvar = Parameter(Tensor(out_channels)) if self.bias else None # Init prior latents self.z_mu = Parameter(Tensor(out_channels)) self.z_logvar = Parameter(Tensor(out_channels)) # Set initial parameters self._init_params() # for brevity to conv2d calls self.convargs = [self.stride, self.padding, self.dilation] # util activations self.sigmoid = Sigmoid() self.softplus = Softplus()
def __init__(self, vocab_size, embed_dim): super(BayesianSG, self).__init__() # Sizes self.vocab_size = vocab_size self.embed_dim = embed_dim # Priors self.prior_locs = Embedding(vocab_size, embed_dim) self.prior_scales = Embedding(vocab_size, embed_dim) # Inference self.embeddings = Embedding(vocab_size, embed_dim) self.encoder = Linear(2 * embed_dim, 2 * embed_dim) self.affine_loc = Linear(2 * embed_dim, embed_dim) self.affine_scale = Linear(2 * embed_dim, embed_dim) self.std_normal = MultivariateNormal(torch.zeros(embed_dim), torch.eye(embed_dim)) # Generation self.affine_vocab = Linear(embed_dim, vocab_size) # Functions self.softmax = Softmax(dim=1) self.softplus = Softplus() self.relu = ReLU()
def __init__(self, predictor_layers: Iterable[int], hidden_size: int, **unused: Any): """ Initialize model. Parameters ---------- predictor_layers: Iterable[int] Layer sizes for MLP as some sort of iterable. hidden_size: int Dimensionality of hidden activations. """ super().__init__() self.predictor_layers = predictor_layers self.hidden_size = hidden_size self.window_size = 1 # Init layers self.model = nn.Sequential() last_layer_size = predictor_layers[0] self.model.add_module("input", nn.Linear(hidden_size, last_layer_size)) self.model.add_module("relu0", ReLU()) for layer_n, current_layer_size in enumerate(predictor_layers[1:]): self.model.add_module(f"hidden{layer_n+1}", nn.Linear(last_layer_size, current_layer_size)) self.model.add_module(f"relu{layer_n+1}", ReLU()) last_layer_size = current_layer_size self.model.add_module("out", nn.Linear(last_layer_size, 1)) # Output scalar alpha_t self.model.add_module("relu_out", Softplus()) # Init buffers self.hidden_buffer = [] # Buffer where to store hidden states self._buffer_copy = [] # Buffer to copy main buffer to in case the model is switching between modes
def get_activation(name): act_name = name.lower() m = re.match(r"(\w+)\((\d+\.\d+)\)", act_name) if m is not None: act_name, alpha = m.groups() alpha = float(alpha) print(act_name, alpha) else: alpha = 1.0 if act_name == 'softplus': return Softplus() elif act_name == 'ssp': return SSP() elif act_name == 'elu': return ELU(alpha) elif act_name == 'relu': return ReLU() elif act_name == 'selu': return SELU() elif act_name == 'celu': return CELU(alpha) elif act_name == 'sigmoid': return Sigmoid() elif act_name == 'tanh': return Tanh() else: raise NameError("Not supported activation: {}".format(name))
def __init__(self, dim_z=latent_dim): super(ConditionalEncoder, self).__init__() self.dim_z = dim_z kernel_size = 3 stride = 2 padding = self.same_padding(kernel_size) self.conv0 = Sequential( Conv2d(colors_dim, 16, kernel_size=1, stride=1), LeakyReLU(negative_slope=negative_slope), ) self.conv1 = Sequential( Conv2d(16, 32, kernel_size=kernel_size, stride=stride, padding=padding), BatchNorm2d(32, momentum=momentum), LeakyReLU(negative_slope=negative_slope), ) self.conv2 = Sequential( Conv2d(32, 64, kernel_size=kernel_size, stride=stride, padding=padding), BatchNorm2d(64, momentum=momentum), LeakyReLU(negative_slope=negative_slope), ) self.conv3 = Sequential( Conv2d(64, 128, kernel_size=kernel_size, stride=stride, padding=padding), BatchNorm2d(128, momentum=momentum), LeakyReLU(negative_slope=negative_slope), Flatten(), # next layer takes flat input with labels appended ) self.dense1 = Sequential(Linear(8192, 2048), BatchNorm1d(2048, momentum=momentum), LeakyReLU(negative_slope=negative_slope)) self.dense2 = Sequential(Linear(2048, self.dim_z), BatchNorm1d(self.dim_z, momentum=momentum), LeakyReLU(negative_slope=negative_slope)) self.embedding = Sequential( Linear(labels_dim, self.dim_z), BatchNorm1d(self.dim_z, momentum=momentum), LeakyReLU(negative_slope=negative_slope), ) ## the following take the same input from dense1 self.dense_z_mu = Linear(128 * 2, self.dim_z) self.dense_z_std = Sequential( Linear(self.dim_z * 2, self.dim_z), Softplus(), ) self.set_optimizer(optimizer, lr=learning_rate, betas=betas)
def __init__(self, img_dim=4096, label_dim=114, latent_dim=200): super(Encoder, self).__init__() self.img_dim = img_dim self.label_dim = label_dim self.latent_dim = latent_dim self.fc1 = Linear(img_dim + label_dim, 1000) self.fc21 = Linear(1000, latent_dim) self.fc22 = Linear(1000, latent_dim) self.softplus = Softplus()
def __init__(self, img_dim=4096, label_dim=114, latent_dim=200): super(Decoder, self).__init__() self.img_dim = img_dim self.label_dim = label_dim self.latent_dim = latent_dim self.fc1 = Linear(latent_dim + label_dim, 1000) self.fc2 = Linear(1000, img_dim) self.softplus = Softplus() self.sigmoid = Sigmoid()
def __init__(self, p_dim, q_dim, h_dim=128, dropout=0.0): super(HamiltonianDerivation, self).__init__() # self.gcl = GraphConvolutionLayer(p_dim + q_dim, h_dim, h_dims=[], dropout=dropout) self.align_attend = AlignAttendPooling(p_dim + q_dim, h_dim, radius=1, dropout=dropout, use_gru=False) self.relu = ELU() self.linear = Linear(h_dim, 1) self.softplus = Softplus()
def __init__(self): super(Encoder1, self).__init__() kernel_size = 3 stride = 2 padding = self.same_padding(kernel_size) self.conv1 = Sequential( Conv2d(colors_dim, 8, kernel_size=kernel_size, stride=stride, padding=padding), ReLU(), ) self.conv2 = Sequential( Conv2d(8, 16, kernel_size=kernel_size, stride=stride, padding=padding), BatchNorm2d(16), ReLU(), ) self.conv3 = Sequential( Conv2d(16, 32, kernel_size=kernel_size, stride=stride, padding=padding), ReLU(), ) self.conv4 = Sequential( Conv2d(32, 64, kernel_size=kernel_size, stride=stride, padding=padding), ReLU(), ) self.dense1 = Sequential( Flatten(), Linear(1024, 256), ReLU(), ) ## the following take the same input self.dense_z_mu = Linear(256, parameter.latent_dim) if parameter.alpha: self.dense_z_std = Sequential( Linear(256, parameter.latent_dim), Softplus(), ) self.set_optimizer(parameter.optimizer, lr=parameter.learning_rate, betas=parameter.betas)
def transform_to_distribution_params(params, distr_dim=1, eps=1e-6): """Apply nonlinearities to unconstrained model outputs so they can be represented as parameters of either Normal or Normal-Wishart distributions""" if len(params) > 3: all_means, all_stds = [], [] for i in range(len(params) // 2): all_means.append(params[i * 2].unsqueeze(0)) all_stds.append(Softplus()(params[i * 2 + 1].unsqueeze(0)) + eps) return torch.cat(all_means, dim=0), torch.cat(all_stds, dim=0) mean = params[0] std = Softplus()(params[1]) + eps if len(params) == 2: return [mean, std] elif len(params) == 3: beta = Softplus()(params[2]) + eps min_df = 3 #min_df = params[0].size(distr_dim) + 2 # !!! kappa, nu = beta, beta + min_df return [mean.unsqueeze(-1), std.unsqueeze(-1), kappa, nu]
def __init__(self): super(Encoder4, self).__init__() kernel_size = 3 stride = 2 padding = self.same_padding(kernel_size) self.conv0 = Sequential( Conv2d(colors_dim, 16, kernel_size=1, stride=1), LeakyReLU(negative_slope=parameter.negative_slope), ) self.conv1 = Sequential( Conv2d(16, 32, kernel_size=kernel_size, stride=stride, padding=padding), BatchNorm2d(32, momentum=parameter.momentum), LeakyReLU(negative_slope=parameter.negative_slope), ) self.conv2 = Sequential( Conv2d(32, 64, kernel_size=kernel_size, stride=stride, padding=padding), BatchNorm2d(64, momentum=parameter.momentum), LeakyReLU(negative_slope=parameter.negative_slope), ) self.conv3 = Sequential( Conv2d(64, 128, kernel_size=kernel_size, stride=stride, padding=padding), BatchNorm2d(128, momentum=parameter.momentum), LeakyReLU(negative_slope=parameter.negative_slope), Flatten(), # next layer takes flat input with labels appended ) self.dense1 = Sequential( Linear(8192 + labels_dim, 1024), BatchNorm1d(1024, momentum=parameter.momentum), LeakyReLU(negative_slope=parameter.negative_slope)) ## the following take the same input from dense1 self.dense_z_mu = Linear(1024, parameter.latent_dim) if parameter.alpha: self.dense_z_std = Sequential( Linear(1024, parameter.latent_dim), Softplus(), ) self.set_optimizer(parameter.optimizer, lr=parameter.learning_rate, betas=parameter.betas)
def __init__(self, in_features, out_features=1, support=(-0.1, 1.1), dist_type="hardkuma"): super(KumaGate, self).__init__() self.dist_type = dist_type self.layer_a = Sequential(Linear(in_features, out_features), Softplus()) self.layer_b = Sequential(Linear(in_features, out_features), Softplus()) # support must be Tensors s_min = torch.Tensor([support[0]]).to(device) s_max = torch.Tensor([support[1]]).to(device) self.support = [s_min, s_max] self.a = None self.b = None
def __init__(self, n_f, ndim, hidden=300): super(HGN, self).__init__(aggr='add') # "Add" aggregation. self.pair_energy = Seq(Lin(2 * n_f, hidden), Softplus(), Lin(hidden, hidden), Softplus(), Lin(hidden, hidden), Softplus(), Lin(hidden, 1)) self.self_energy = Seq(Lin(n_f, hidden), Softplus(), Lin(hidden, hidden), Softplus(), Lin(hidden, hidden), Softplus(), Lin(hidden, 1)) self.ndim = ndim
def __init__(self, inp_dim_main=None, inp_dim_var=None, hidden_dim=None, lmb=0.1, batch_size=320000): set_seed(42) super().__init__() self.net_main = Sequential(Linear(inp_dim_main, hidden_dim), ReLU(), Linear(hidden_dim, 1)) self.net_var = Sequential(Linear(inp_dim_var, hidden_dim), ReLU(), Linear(hidden_dim, 1), Softplus()) self.lmb = lmb self.batch_size = batch_size
def __init__(self, beta=1, shift=2, threshold=20): super().__init__(beta, threshold) self.shift = shift self.softplus = Softplus(beta, threshold)
- some auxiliary losses - compute the integral of Q (to convert it to a policy) You should rather use ICNNBN2 which is the most up to date class (and is supposed to be the same as in the TF code) """ import torch as t from torch.nn import SELU, ReLU, BatchNorm1d as BN, Softplus, Sigmoid, LeakyReLU from utils import variable import numpy as np from itertools import product from copy import deepcopy import math sigmoid = Sigmoid() softplus = Softplus() relu = ReLU() class ICNN(t.nn.Module): """ CONCAVE Q network THE ACTION DIM IS HARDCODED TO 2 HERE """ def __init__(self, n_layers, hidden_dim, input_dim, activation=SELU()): super().__init__() self.n_layers = n_layers self.hidden_dim = hidden_dim self.activation = activation
def __init__(self, q_dim, h_dim=32, dropout=0.0, use_cuda=False): super(PotentialEnergy, self).__init__() self.use_cuda = use_cuda self.linear1 = Linear(q_dim, h_dim, bias=True) self.softplus = Softplus()
def __init__(self, raw_c): self.raw_c = raw_c self.softplus = Softplus()
def retrieve_BSG_vectors(model_path, task_path, candidates_dict, word2index, threshold): model = torch.load(model_path) # Retrieve parameters embeddings = model['embeddings.weight'] encoder_W = model['encoder.weight'] encoder_b = model['encoder.bias'] affine_loc_W = model['affine_loc.weight'] affine_scale_W = model['affine_scale.weight'] affine_loc_b = model['affine_loc.bias'] affine_scale_b = model['affine_scale.bias'] softplus = Softplus() relu = ReLU() with open(task_path, 'r') as f_in: lines = f_in.readlines() target2locs = defaultdict(list) target2scales = defaultdict(list) target2strings = defaultdict(list) target2sentIDs = defaultdict(list) target2alternatives = defaultdict(list) skip_count = 0 for line in lines: target, sentID, target_position, context = line.split('\t') target_word = target.split('.')[0] context_ids = [ word2index[w] for w in context.split() if w in word2index ] # might be empty try: target_id = word2index[target_word] except KeyError: # target word not in dictionary, skip it skip_count += 1 continue alternatives = candidates_dict[target_word] alternative_count = 0 good_alternatives = [] alternative_ids = [] for a in alternatives: try: alternative_ids += [word2index[a]] good_alternatives += [a] alternative_count += 1 except KeyError: # alternative word not in dictionary pass if alternative_count < threshold: skip_count += 1 continue center_embeds = torch.stack( [embeddings[w] for w in [target_id] + alternative_ids]) # [a+1,d] center_embeds = center_embeds.unsqueeze(1) # [a+1,1,d] center_embeds = center_embeds.repeat(1, len(context_ids), 1) # [a+1,c,d] context_embeds = torch.stack([embeddings[i] for i in context_ids]) # [c,d] context_embeds = context_embeds.unsqueeze(0) # [1,c,d] context_embeds = context_embeds.repeat(len(alternative_ids) + 1, 1, 1) # [a+1,c,d] encoder_input = torch.cat((center_embeds, context_embeds), dim=2) # [a+1,c,2d] # Encode context-aware representations h = relu(encoder_input @ torch.t(encoder_W) + encoder_b) # [a+1,c,2d] h = torch.sum(h, dim=1) # [a+1,2d] # Inference step loc_vecs = h @ torch.t(affine_loc_W) + affine_loc_b # [a+1,d] scale_vecs = softplus(h @ torch.t(affine_scale_W) + affine_scale_b) # [a+1,d] target2locs[target].append(loc_vecs.numpy()) target2scales[target].append(scale_vecs.numpy()) target2strings[target].append(target) target2sentIDs[target].append(sentID) target2alternatives[target].append(good_alternatives) return target2locs, target2scales, target2strings, target2sentIDs, target2alternatives, skip_count
# print("chain 1 ", h.requires_grad) # # print("ffnn3-4", len(h[0].squeeze()), int((len(h[0].squeeze()) + dim_Z) / 2), dim_Z) # print(h.shape, len(h)) for i in range(0, len(h)): # print("***",h[i].squeeze()) # print("FOR ffnn3 i",i) mu_h = ffnn3(h[i].squeeze(), linear_activation=True) # print("Chain 2 ", mu_h.requires_grad) # print(mu_h.shape) # print("FOR ffnn4 i", i) ffnn4.softmax = Softplus() sigma = ffnn4(h[i].squeeze()) # print("Chain 3 ", sigma.requires_grad) epsilon = multivariate_n.sample() z = mu_h + epsilon * sigma z_table[i] = z # print(z_param.shape, z_param,) z_param[i, 0, :], z_param[i, 1, :] = mu_h, sigma # print("Chain 4", z_param[i,0,:].requires_grad) # print("Chain 5", z_param[i, 1, :].requires_grad) # Generative network ------------------------------------------------------------------------------------------- cat_x = torch.zeros(m, len(V1.w2i)) cat_y = torch.zeros(m, len(V2.w2i))
def retrieve_embedalign_vectors(model_path, task_path, candidates_dict, word2index, threshold): model = torch.load(model_path) # Retrieve parameters embeddings = model['embeddings.weight'] mean_W = model['inference_net.affine1.weight'] var_W = model['inference_net.affine2.weight'] mean_b = model['inference_net.affine1.bias'] var_b = model['inference_net.affine2.bias'] softplus = Softplus() with open(task_path, 'r') as f_in: lines = f_in.readlines() target2means = defaultdict(list) target2vars = defaultdict(list) target2strings = defaultdict(list) target2sentIDs = defaultdict(list) target2alternatives = defaultdict(list) skip_count = 0 for line in lines: target, sentID, target_position, context = line.split('\t') target_word = target.split('.')[0] context_ids = [ word2index[w] for w in context.split() if w in word2index ] # might be empty try: target_id = word2index[target_word] except KeyError: # target word not in dictionary, skip it skip_count += 1 continue alternatives = candidates_dict[target_word] alternative_count = 0 good_alternatives = [] alternative_ids = [] for a in alternatives: try: alternative_ids += [word2index[a]] good_alternatives += [a] alternative_count += 1 except KeyError: # alternative word not in dictionary pass if alternative_count < threshold: skip_count += 1 continue context_embeds = torch.stack([embeddings[i] for i in context_ids]) context_avg = torch.mean(context_embeds, dim=0) context_avg = context_avg.repeat(alternative_count + 1, 1) context_avg = torch.tensor(context_avg) embeds = [embeddings[w] for w in [target_id] + alternative_ids] embeds = torch.stack(embeds) h = torch.cat((embeds, context_avg), dim=1) mean_vecs = h @ torch.t(mean_W) + mean_b var_vecs = h @ torch.t(var_W) + var_b var_vecs = softplus(var_vecs) target2means[target].append(mean_vecs.numpy()) target2vars[target].append(var_vecs.numpy()) target2strings[target].append(target) target2sentIDs[target].append(sentID) target2alternatives[target].append(good_alternatives) return target2means, target2vars, target2strings, target2sentIDs, target2alternatives, skip_count
def test_scvi(save_path): n_latent = 5 adata = synthetic_iid() model = SCVI(adata, n_latent=n_latent) model.train(1, check_val_every_n_epoch=1, train_size=0.5) model = SCVI(adata, n_latent=n_latent, var_activation=Softplus()) model.train(1, check_val_every_n_epoch=1, train_size=0.5) # tests __repr__ print(model) assert model.is_trained is True z = model.get_latent_representation() assert z.shape == (adata.shape[0], n_latent) assert len(model.history["elbo_train"]) == 1 model.get_elbo() model.get_marginal_ll(n_mc_samples=3) model.get_reconstruction_error() model.get_normalized_expression(transform_batch="batch_1") adata2 = synthetic_iid() model.get_elbo(adata2) model.get_marginal_ll(adata2, n_mc_samples=3) model.get_reconstruction_error(adata2) latent = model.get_latent_representation(adata2, indices=[1, 2, 3]) assert latent.shape == (3, n_latent) denoised = model.get_normalized_expression(adata2) assert denoised.shape == adata.shape denoised = model.get_normalized_expression( adata2, indices=[1, 2, 3], transform_batch="batch_1" ) denoised = model.get_normalized_expression( adata2, indices=[1, 2, 3], transform_batch=["batch_0", "batch_1"] ) assert denoised.shape == (3, adata2.n_vars) sample = model.posterior_predictive_sample(adata2) assert sample.shape == adata2.shape sample = model.posterior_predictive_sample( adata2, indices=[1, 2, 3], gene_list=["1", "2"] ) assert sample.shape == (3, 2) sample = model.posterior_predictive_sample( adata2, indices=[1, 2, 3], gene_list=["1", "2"], n_samples=3 ) assert sample.shape == (3, 2, 3) model.get_feature_correlation_matrix(correlation_type="pearson") model.get_feature_correlation_matrix( adata2, indices=[1, 2, 3], correlation_type="spearman", rna_size_factor=500, n_samples=5, ) model.get_feature_correlation_matrix( adata2, indices=[1, 2, 3], correlation_type="spearman", rna_size_factor=500, n_samples=5, transform_batch=["batch_0", "batch_1"], ) params = model.get_likelihood_parameters() assert params["mean"].shape == adata.shape assert ( params["mean"].shape == params["dispersions"].shape == params["dropout"].shape ) params = model.get_likelihood_parameters(adata2, indices=[1, 2, 3]) assert params["mean"].shape == (3, adata.n_vars) params = model.get_likelihood_parameters( adata2, indices=[1, 2, 3], n_samples=3, give_mean=True ) assert params["mean"].shape == (3, adata.n_vars) model.get_latent_library_size() model.get_latent_library_size(adata2, indices=[1, 2, 3]) # test transfer_anndata_setup adata2 = synthetic_iid(run_setup_anndata=False) transfer_anndata_setup(adata, adata2) model.get_elbo(adata2) # test automatic transfer_anndata_setup + on a view adata = synthetic_iid() model = SCVI(adata) adata2 = synthetic_iid(run_setup_anndata=False) model.get_elbo(adata2[:10]) # test that we catch incorrect mappings adata = synthetic_iid() adata2 = synthetic_iid(run_setup_anndata=False) transfer_anndata_setup(adata, adata2) adata2.uns["_scvi"]["categorical_mappings"]["_scvi_labels"]["mapping"] = np.array( ["label_4", "label_0", "label_2"] ) with pytest.raises(ValueError): model.get_elbo(adata2) # test that same mapping different order doesn't raise error adata = synthetic_iid() adata2 = synthetic_iid(run_setup_anndata=False) transfer_anndata_setup(adata, adata2) adata2.uns["_scvi"]["categorical_mappings"]["_scvi_labels"]["mapping"] = np.array( ["label_1", "label_0", "label_2"] ) model.get_elbo(adata2) # should automatically transfer setup # test mismatched categories raises ValueError adata2 = synthetic_iid(run_setup_anndata=False) adata2.obs.labels.cat.rename_categories(["a", "b", "c"], inplace=True) with pytest.raises(ValueError): model.get_elbo(adata2) # test differential expression model.differential_expression(groupby="labels", group1="label_1") model.differential_expression( groupby="labels", group1="label_1", group2="label_2", mode="change" ) model.differential_expression(groupby="labels") model.differential_expression(idx1=[0, 1, 2], idx2=[3, 4, 5]) model.differential_expression(idx1=[0, 1, 2]) # transform batch works with all different types a = synthetic_iid(run_setup_anndata=False) batch = np.zeros(a.n_obs) batch[:64] += 1 a.obs["batch"] = batch setup_anndata(a, batch_key="batch") m = SCVI(a) m.train(1, train_size=0.5) m.get_normalized_expression(transform_batch=1) m.get_normalized_expression(transform_batch=[0, 1]) # test get_likelihood_parameters() when dispersion=='gene-cell' model = SCVI(adata, dispersion="gene-cell") model.get_likelihood_parameters() # test train callbacks work a = synthetic_iid() m = scvi.model.SCVI(a) lr_monitor = LearningRateMonitor() m.train( callbacks=[lr_monitor], max_epochs=10, log_every_n_steps=1, plan_kwargs={"reduce_lr_on_plateau": True}, ) assert "lr-Adam" in m.history.keys()
def train(self): print("-------------Training---------------") print("-------------------------------------") prev_loss = 0 for epoch in range(self.epochs): print("*****************EPOCH ", epoch, "**************************") updates = 0 training_loss = 0 start = time.time() multivariate_n = MultivariateNormal(torch.zeros(self.dim_Z), torch.eye(self.dim_Z)) for L_batch in self.minibatch(): updates += 1 L1_batch = L_batch[0] L2_batch = L_batch[1] mask_l1 = torch.Tensor(np.where(L1_batch > 0, 1, 0)) mask_l2 = torch.Tensor(np.where(L2_batch > 0, 1, 0)) # This check is required because the LSTM network depends on fixed batch size if L1_batch.shape[0] != self.batch_size: continue h_1 = self.lstm(L1_batch) h = (h_1[:, :, 0:self.hidden_dim] + h_1[:, :, self.hidden_dim:]) / 2 # h_1 = self.approxbi.getEmbedding(L1_batch) # h = h_1 mu_h = self.ffnn3(h, linear_activation=True) self.ffnn4.softmax = Softplus() sigma = self.ffnn4(h) epsilon = multivariate_n.sample(( self.batch_size, self.sentence_length, )) z = mu_h + epsilon * torch.sqrt(sigma) cat_x = self.ffnn1(z) cat_y = self.ffnn2(z) self.lstm.zero_grad() self.ffnn1.zero_grad() self.ffnn2.zero_grad() self.ffnn3.zero_grad() self.ffnn4.zero_grad() elbo_c = ELBO(self.sentence_length, self.sentence_length) elbo_p1 = elbo_c.elbo_p1(cat_x, L1_batch, mask_l1) # print(elbo_p1) elbo_p2 = elbo_c.elbo_p2(cat_y, L2_batch, mask_l2) # print(elbo_p2) elbo_p3 = elbo_c.elbo_p3([mu_h, sigma]) # print(elbo_p3) loss = -(elbo_p1 + elbo_p2 - elbo_p3) training_loss += loss.data[0] loss.backward(retain_graph=True) self.opt.step() print("iter %r: loss=%.4f, time=%.2fs" % (epoch, training_loss / updates, time.time() - start)) mloss = training_loss / updates print("iter %r: loss=%.4f, time=%.2fs" % (epoch, mloss, time.time() - start)) if not prev_loss or mloss < prev_loss: prev_loss = mloss torch.save(training.ffnn1, 'ffnn1.pt') torch.save(training.ffnn2, 'ffnn2.pt') torch.save(training.ffnn3, 'ffnn3.pt') torch.save(training.ffnn4, 'ffnn4.pt') torch.save(training.lstm, 'lstm.pt') self.V1.save_word_indexes("L1") self.V2.save_word_indexes("L2")