def load_named_sparse(input_filename, key): from ipdb import set_trace as bp bp() npy = np.load(input_filename)[key] coo_matrix = sparse.coo_matrix((npy['data'], (npy['row'], npy['col'])), shape=npy['shape']) return coo_matrix.tocsc()
def Apply(): global mod_vps, apply_response r = request if r.method == 'POST': jsondata = r.get_json() if type(jsondata) is str: rcv_data = json.loads(jsondata) else: rcv_data = jsondata # type(rcv_data) must be dict if type(rcv_data) is not dict: print("Invalid post data type from client") apply_response = {"vps_IDandConf": [[0], [0]]} response_pickled = jsonpickle.encode(apply_response) return Response(response=response_pickled, status=200, mimetype="application/json") K = rcv_data['K'] gps_lat = rcv_data['gps_lat'] gps_lon = rcv_data['gps_lon'] gps_accuracy = rcv_data['gps_accuracy'] image_data = rcv_data['image_data'] image_size = rcv_data['image_size'] query = deserialize_image(image_data, image_size) timestamp = rcv_data['timestamp'] streetview_server_ipaddr = rcv_data['streetview_server_ipaddr'] # print(K, gps_lat, gps_lon, gps_accuracy, streetview_server_ipaddr) try: # vps_IDandConf = dummy_apply(image, K, gps_lat, gps_lon, gps_accuracy, 0) vps_IDandConf = mod_vps.apply(query, K, gps_lat, gps_lon, gps_accuracy, timestamp, ipaddr=streetview_server_ipaddr) except: bp() apply_response = { "vps_IDandConf": vps_IDandConf, 'timestamp': timestamp } response_pickled = jsonpickle.encode(apply_response) return Response(response=response_pickled, status=200, mimetype="application/json") if r.method == 'GET': if not 'apply_response' in globals(): apply_response = { "vps_IDandConf": np.zeros((2, 3)).tolist(), 'timestamp': 0 } response_pickled = jsonpickle.encode(apply_response) return Response(response=response_pickled, status=200, mimetype="application/json")
def balancedMiniDataset(trainset, size, limit, fullyBalanced=True): counter = np.zeros(20) #counter += size #if fullyBalanced: # dropClasses = [50,39,80,74,43,37,31,84,48,89,40,27,45,12,26,29,30,66,68,69,71,83,0] #else: # dropClasses = [45,12,26,29,30,66,68,69,71,83,0] #for ccc in dropClasses: # counter[dropClasses] = size #for ccc in keepClasses: # counter[keepClasses] = 0 iterating = True step = 0 subsetToInclude = [] subsetToNotInclude = [] #subsetToNotInclude += list(range(step)) while iterating and step < limit: #bp() try: #bp() label = np.array(trainset[step][1]) #bp() if np.all(counter + label <= size) and (not fullyBalanced or np.sum(label).item() == 1): counter += label print(counter, step) subsetToInclude.append(step) else: subsetToNotInclude.append(step) if np.min(counter) >= size: print("Completely Balanced Dataset") iterating = False except: print(step) if step%1000 == 0: print(step) step += 1 #subsetToNotInclude += list(range(step, len(trainset))) #subsetToNotInclude = subsetToNotInclude[:10000] #while len(subsetToNotInclude) < 10000: # try: # label = np.array(trainset[step][1]) # subsetToNotInclude.append(step) # except: # print(step) #subsetToNotInclude += list(range(step, len(trainset))) np.savetxt('/home/users/alimirz1/SemisupervisedAttention/saved_batches/coco_splits/'+str(size)+'_per_top20class.csv', np.array(subsetToInclude), delimiter=',') np.savetxt('/home/users/alimirz1/SemisupervisedAttention/saved_batches/coco_splits/'+str(size)+'_per_top20class_validation.csv', np.array(subsetToNotInclude), delimiter=',') bp() return torch.utils.data.Subset(trainset, subsetToInclude), torch.utils.data.Subset(trainset, subsetToNotInclude)
def __init__(self): self.ipaddr = 'localhost' self.gps_lat = 0.0 #Latitude self.gps_lon = 0.0 #Longitude self.vps_lat = 0.0 # Latitude from VPS function self.vps_long = 0.0 # Longitude from VPS function self.angle = -1 # road direction (radian) self.vps_prob = -1 # reliablity of the result. 0: fail ~ 1: success self.K = int(3) # K for Top-K for best matching if self.init_vps_IDandConf(self.K) < 0: #init_vps_IDandConf after setting self.K bp() self.ToTensor = transforms.ToTensor() self.verbose = False # 1 : print internal results self.StreetViewServerAvaiable = True self.callcounter_gSV = 0 # N of call of getStreetView(), for debugging purpose
def __init__(self, structFile, input_transform=None, onlyDB=False): super().__init__() self.input_transform = input_transform self.dbStruct = parse_dbStruct(structFile) self.images = [join(root_dir, dbIm) for dbIm in self.dbStruct.dbImage] bp() if not onlyDB: self.images += [ join(queries_dir, qIm) for qIm in self.dbStruct.qImage ] self.whichSet = self.dbStruct.whichSet self.dataset = self.dbStruct.dataset self.positives = None self.distances = None
def _read(self, file_path): examples = pickle.load(open(file_path, "rb")) for ix, example in enumerate(examples): padded_batch_size = example["max_entity_per_doc"] mat = example["text"].todense() _, vocab_size = mat.shape all_idx = [i for i in range(example["text"].shape[0])] entities_idx = [ entity["entity_text_ids"] for entity in example["entities"] ] all_entities_idx = list(itertools.chain(*entities_idx)) context_idx = [i for i in all_idx if i not in all_entities_idx] if len(context_idx) == 0: continue if len(entities_idx) == 0: continue entities = np.stack([mat[elm].sum(0) for elm in entities_idx]) # bp() try: context = np.stack(mat[context_idx]) except: bp() # vec = np.zeros((padded_batch_size, vocab_size)) # vec[:entities.shape[0], :] = entities vec = entities vec = context # vec = mat if self._use_doc_info: d = mat.sum(0).repeat(len(entities_idx), axis=0) # vec_d = np.zeros((padded_batch_size, vocab_size)) # vec_d[d.shape[0], :] = d vec_d = d vec = np.concatenate([vec, vec_d], axis=1) instance = self.text_to_instance(vec) # bp() if instance is not None: yield instance
def checking_return_value(self): K = self.K vps_imgID = self.vps_IDandConf[0] vps_imgConf = self.vps_IDandConf[1] if (len(vps_imgID) != K) or (len(vps_imgConf) != K): dsmg("Error : K result") bp() return -1 ErrCnt = K for i in vps_imgID: #if (isinstance(vps_imgID[0],np.uint64) == False): if (isinstance(vps_imgID[0], int) == False): ErrCnt = ErrCnt - 1 if K != ErrCnt: bp() return -1 ErrCnt = K for i in vps_imgConf: #if (isinstance(vps_imgConf[0],np.double) == False): if (isinstance(vps_imgConf[0], float) == False): ErrCnt = ErrCnt - 1 if K != ErrCnt: bp() return -1 return 0
def forward(self, # pylint: disable=arguments-differ tokens: Union[Dict[str, torch.IntTensor], torch.IntTensor], epoch_num: List[int] = None): """ Parameters ---------- tokens: ``Union[Dict[str, torch.IntTensor], torch.IntTensor]`` A batch of tokens. We expect tokens to be represented in one of two ways: 1. As token IDs. This representation will be used with downstream models, where bag-of-word count embedding must be done on the fly. If token IDs are provided, we use the bag-of-word-counts embedder to embed these tokens during training. 2. As pre-computed bag of words vectors. This representation will be used during pretraining, where we can precompute bag-of-word counts and train much faster. epoch_num: ``List[int]`` Output of epoch tracker """ # For easy transfer to the GPU. self.device = self.vae.get_beta().device # pylint: disable=W0201 output_dict = {} if not self.training: self._kld_weight = 1.0 # pylint: disable=W0201 else: self.update_kld_weight(epoch_num) # if you supply input as token IDs, embed them into bag-of-word-counts with a token embedder if isinstance(tokens, dict): embedded_tokens = (self._bag_of_words_embedder(tokens['tokens']).to(device=self.device)) else: embedded_tokens = tokens _, num_p, x_dim = embedded_tokens.shape if self._use_doc_info: # bp() embedded_doc_tokens, embedded_entity_tokens = embedded_tokens.split(x_dim // 2, dim=1) weights = torch.softmax(self.interpolation, dim=0) embedded_tokens = weights[0] * embedded_doc_tokens + weights[1] * embedded_entity_tokens else: # bp() assert x_dim == self.vocab.get_vocab_size(self.vocab_namespace) # Encode the text into a shared representation for both the VAE embedded_tokens = embedded_tokens.sum(1) encoder_output = self.vae.encode(embedded_tokens) # Perform variational inference. variational_output = self.vae(encoder_output) # Reconstructed bag-of-words from the VAE with background bias. reconstructed_bow = variational_output['reconstruction'] + self._background_freq # Apply batchnorm to the reconstructed bag of words. # Helps with word variety in topic space. reconstructed_bow = self.bow_bn(reconstructed_bow) if self._apply_batchnorm_on_recon else reconstructed_bow # Reconstruction log likelihood: log P(x | z) = log softmax(z beta + b) if self._use_doc_info: reconstruction_loss = self.bow_reconstruction_loss(reconstructed_bow, embedded_entity_tokens) else: # bp() reconstruction_loss = self.bow_reconstruction_loss(reconstructed_bow, embedded_tokens) # KL-divergence that is returned is the mean of the batch by default. negative_kl_divergence = variational_output['negative_kl_divergence'] # Compute ELBO elbo = negative_kl_divergence * self._kld_weight + reconstruction_loss loss = -torch.mean(elbo) output_dict['loss'] = loss theta = variational_output['theta'] # Keep track of internal states for use downstream activations: List[Tuple[str, torch.FloatTensor]] = [] # intermediate_input = embedded_tokens # for layer_index, layer in enumerate(self.vae.encoder._linear_layers): # pylint: disable=protected-access # intermediate_input = layer(intermediate_input) # activations.append((f"encoder_layer_{layer_index}", intermediate_input)) activations.append(('theta', theta)) output_dict['activations'] = activations # Update metrics nkld = -torch.mean(negative_kl_divergence) nll = -torch.mean(reconstruction_loss) if torch.isnan(nkld): bp() if torch.isnan(nll): bp() if torch.isnan(loss): bp() self.metrics['nkld'](nkld) self.metrics['nll'](nll) self.metrics['perp'](loss) # batch_num is tracked for kl weight annealing self.batch_num += 1 self.compute_custom_metrics_once_per_epoch(epoch_num) self.metrics['npmi'] = self._cur_npmi return output_dict
def __init__(self, vocab: Vocabulary, bow_embedder: TokenEmbedder, vae: VAE, apply_batchnorm_on_recon: bool = False, batchnorm_weight_learnable: bool = False, batchnorm_bias_learnable: bool = True, kl_weight_annealing: str = "constant", linear_scaling: float = 1000.0, sigmoid_weight_1: float = 0.25, sigmoid_weight_2: float = 15, reference_counts: str = None, reference_vocabulary: str = None, use_background: str = False, background_data_path: str = None, update_background_freq: bool = False, track_topics: bool = True, track_npmi: bool = True, initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super().__init__(vocab, regularizer) self.metrics = {'nkld': Average(), 'nll': Average(), 'perp': Average()} self.vocab = vocab self.vae = vae self.track_topics = track_topics self.track_npmi = track_npmi self.vocab_namespace = "avitm" self._update_background_freq = update_background_freq avitm_vocab_size = self.vocab.get_vocab_size(self.vocab_namespace) self._background_freq = self.initialize_bg_from_file( file_=background_data_path) if use_background else 0 self._ref_counts = reference_counts if reference_vocabulary is not None: # Compute data necessary to compute NPMI every epoch logger.info("Loading reference vocabulary.") self._ref_vocab = read_json(cached_path(reference_vocabulary)) self._ref_vocab_index = dict( zip(self._ref_vocab, range(len(self._ref_vocab)))) logger.info("Loading reference count matrix.") self._ref_count_mat = load_sparse(cached_path(self._ref_counts)) logger.info("Computing word interaction matrix.") self._ref_doc_counts = (self._ref_count_mat > 0).astype(float) self._ref_interaction = self._ref_doc_counts.T.dot( self._ref_doc_counts) self._ref_doc_sum = np.array( self._ref_doc_counts.sum(0).tolist()[0]) logger.info("Generating npmi matrices.") (self._npmi_numerator, self._npmi_denominator) = self.generate_npmi_vals( self._ref_interaction, self._ref_doc_sum) self.n_docs = self._ref_count_mat.shape[0] self._bag_of_words_embedder = bow_embedder self._kl_weight_annealing = kl_weight_annealing self._linear_scaling = float(linear_scaling) self._sigmoid_weight_1 = float(sigmoid_weight_1) self._sigmoid_weight_2 = float(sigmoid_weight_2) if kl_weight_annealing == "linear": self._kld_weight = min(1.0, 1 / self._linear_scaling) elif kl_weight_annealing == "sigmoid": self._kld_weight = float( 1 / (1 + np.exp(-self._sigmoid_weight_1 * (1 - self._sigmoid_weight_2)))) elif kl_weight_annealing == "constant": self._kld_weight = 1.0 else: raise ConfigurationError( "anneal type {} not found".format(kl_weight_annealing)) # setup batchnorm self._apply_batchnorm_on_recon = apply_batchnorm_on_recon if apply_batchnorm_on_recon: self.bow_bn = create_trainable_BatchNorm1d( avitm_vocab_size, weight_learnable=batchnorm_weight_learnable, bias_learnable=batchnorm_bias_learnable, eps=0.001, momentum=0.001, affine=True) # Maintain these states for periodically printing topics and updating KLD self._metric_epoch_tracker = 0 self._kl_epoch_tracker = 0 self._cur_epoch = 0 self._cur_npmi = 0.0 self.batch_num = 0 initializer(self) bp()
def forward(self, # pylint: disable=arguments-differ tokens: Union[Dict[str, torch.IntTensor], torch.IntTensor], entities: Union[Dict[str, torch.IntTensor], torch.IntTensor], epoch_num: List[int] = None): """ Parameters ---------- tokens: ``Union[Dict[str, torch.IntTensor], torch.IntTensor]`` A batch of tokens. We expect tokens to be represented in one of two ways: 1. As token IDs. This representation will be used with downstream models, where bag-of-word count embedding must be done on the fly. If token IDs are provided, we use the bag-of-word-counts embedder to embed these tokens during training. 2. As pre-computed bag of words vectors. This representation will be used during pretraining, where we can precompute bag-of-word counts and train much faster. epoch_num: ``List[int]`` Output of epoch tracker """ if self.batch_num in []: bp() # For easy transfer to the GPU. self.device = self.vae.get_beta().device # pylint: disable=W0201 # bp() output_dict = {} self.update_npmi() self.update_topics(epoch_num) if not self.training: self._kld_weight = 1.0 # pylint: disable=W0201 else: self.update_kld_weight(epoch_num) # if you supply input as token IDs, embed them into bag-of-word-counts with a token embedder if isinstance(tokens, dict): embedded_tokens = (self._bag_of_words_embedder(tokens['tokens']) .to(device=self.device)) else: embedded_tokens = tokens # embedded_tokens = embedded_tokens.sum(1) # Encode the text into a shared representation for both the VAE # and downstream classifiers to use. # bp() encoder_output = self.vae.encoder(embedded_tokens) # Perform variational inference. variational_output = self.vae(encoder_output) # Reconstructed bag-of-words from the VAE with background bias. reconstructed_bow = variational_output['reconstruction'] + self._background_freq # Apply batchnorm to the reconstructed bag of words. # Helps with word variety in topic space. reconstructed_bow = self.bow_bn(reconstructed_bow) # Reconstruction log likelihood: log P(x | z) = log softmax(z beta + b) reconstruction_loss = self.bow_reconstruction_loss(reconstructed_bow, embedded_tokens) # KL-divergence that is returned is the mean of the batch by default. negative_kl_divergence = variational_output['negative_kl_divergence'] # Compute ELBO elbo = negative_kl_divergence * self._kld_weight + reconstruction_loss loss = -torch.mean(elbo) open(f"{self.vae._get_name()}_loss.txt", "a+").write(f"{loss} \n") if torch.isnan(loss): bp() output_dict['loss'] = loss theta = variational_output['theta'] # Keep track of internal states for use downstream activations: List[Tuple[str, torch.FloatTensor]] = [] intermediate_input = embedded_tokens for layer_index, layer in enumerate(self.vae.encoder._linear_layers): # pylint: disable=protected-access intermediate_input = layer(intermediate_input) activations.append((f"encoder_layer_{layer_index}", intermediate_input)) activations.append(('theta', theta)) output_dict['activations'] = activations # bp() # Update metrics self.metrics['nkld'](-torch.mean(negative_kl_divergence)) self.metrics['nll'](-torch.mean(reconstruction_loss)) # batch_num is tracked for kl weight annealing self.batch_num += 1 self.metrics['npmi'] = self._cur_npmi return output_dict
if isfile(resume_ckpt): print("=> loading checkpoint '{}'".format(resume_ckpt)) checkpoint = torch.load(resume_ckpt, map_location=lambda storage, loc: storage) opt.start_epoch = checkpoint['epoch'] best_metric = checkpoint['best_score'] model.load_state_dict(checkpoint['state_dict']) model = model.to(device) if opt.mode == 'train': optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( resume_ckpt, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(resume_ckpt)) bp() if opt.mode.lower() == 'test': print('===> Running evaluation step') epoch = 1 recalls = test(whole_test_set, epoch, write_tboard=False) elif opt.mode.lower() == 'cluster': print('===> Calculating descriptors and clusters') get_clusters(whole_train_set) elif opt.mode.lower() == 'train': print('===> Training model') writer = SummaryWriter(log_dir=join( opt.runsPath, datetime.now().strftime('%b%d_%H-%M-%S') + '_' + opt.arch + '_' + opt.pooling)) # write checkpoints in logdir
def forward(self, entity_vector: torch.FloatTensor): # pylint: disable = W0221 """ Given the input representation, produces the reconstruction from theta as well as the negative KL-divergence, theta itself, and the parameters of the distribution. """ output = {} # bp() # get shape dim for later use batch_size, max_num_entity, _ = entity_vector.shape # prior -- N(0, 1) p_params = { "mean": self.p_mu.repeat(batch_size, 1), "sigma": self.p_sigma.repeat(batch_size, 1), "log_variance": self.p_log_var.repeat(batch_size, 1) } # estimate persona in bottom-up direction s_tilde = self.encoder_entity(entity_vector) e_tilde = gumbel_softmax(s_tilde) g_tilde = self.pooling_layer(e_tilde, dim=1) # g_tilde = (batch_size, P) if self.pooling_func == "max": g_tilde = g_tilde[0] if g_tilde.shape[1] != self.encoder_entity_global.get_input_dim(): bp() g_tilde_hidden = self.encoder_entity_global(g_tilde) type_params = self.estimate_params(g_tilde_hidden, self.mean_projection_type, self.log_var_projection_type, self.mean_bn_type, self.log_var_bn_type) # calculate for the distribution for document representation # estimate the intermediate document representation d = self.reparameterize(type_params) theta = self._z_dropout(d) theta = torch.softmax(theta, dim=-1) output.update({ "theta": theta, "type_params": type_params, "type_negative_kl_divergence": self.compute_negative_kld(q_params=type_params, p_params=p_params) }) f = self._decoder_type.weight.t() if self._stochastic_weight: f = torch.nn.functional.softmax(f, dim=1) if self._apply_batchnorm_on_decoder: f = self.decoder_bn_topic(f) # (batch_size, num_type) -> (batch_size, P) = global persona representation g = theta @ f output["global_persona"] = g # decode type representation to persona representation # (batch_size, max_num_entity, P) -- equivalent to sampling from multinomial(n=1, p_1, ... p_P) persona_proportion = gumbel_softmax( g.unsqueeze(1).repeat(1, max_num_entity, 1)) q_persona_params = {"logit": g_tilde} p_persona_params = {"logit": g} persona_proportion = self._z_dropout(persona_proportion) # bp() output.update({ "persona": persona_proportion, "persona_params": q_persona_params, "persona_negative_kl_divergence": self.compute_negative_kld(q_params=q_persona_params, p_params=p_persona_params, type="multinomial") }) # bp() # decode persona representation to topic representation W = self._decoder_persona.weight.t() if self._apply_batchnorm_on_decoder: W = self.decoder_bn_persona(W) if self._stochastic_weight: W = torch.nn.functional.softmax(W, dim=1) # bp() # persona_reconstruction = topic_proportion calculated from persona proportion persona_reconstruction = torch.softmax(persona_proportion @ W, dim=-1) output["persona_reconstruction"] = persona_reconstruction # decode topic representation(proportion) to distribution over word(unnormalized) beta = self._decoder_topic.weight.t() if self._apply_batchnorm_on_decoder: beta = self.decoder_bn_topic(beta) if self._stochastic_weight: beta = torch.nn.functional.softmax(beta, dim=1) # bp() bow_reconstruction = persona_reconstruction @ beta output["bow_reconstruction"] = bow_reconstruction return output
def forward(self, entity_vector: torch.FloatTensor): # pylint: disable = W0221 """ Given the input representation, produces the reconstruction from theta as well as the negative KL-divergence, theta itself, and the parameters of the distribution. """ output = {} batch_size, max_num_entity, _ = entity_vector.shape # prior -- N(0, 1) p_params = { "mean": self.p_mu, "sigma": self.p_sigma, "log_variance": self.p_log_var } # bp() # estimate persona hidden_s = self.encoder_entity(entity_vector) # TODO: bn on entity are not used. wonder: should we run a batch on all global entity representation s_params = self.estimate_params(hidden_s, self.mean_projection_entity, self.log_variance_projection_entity, self.mean_bn_entity, self.log_var_bn_entity) s = self.reparameterize(s_params) global_s, _ = s.max(1) # free for other function choices e.g. avg(.) hidden_d = self.encoder_topic(global_s) d_params = self.estimate_params(hidden_d, self.mean_projection_topic, self.log_variance_projection_topic, self.mean_bn_topic, self.log_var_bn_topic) d = self.reparameterize(d_params) output.update({ "d": d, "d_params": d_params, "d_negative_kl_divergence": self.compute_negative_kld(q_params=d_params, p_params=p_params) }) d = d.unsqueeze(1).repeat(1, max_num_entity, 1) p_s_params = { "mean": d, "sigma": torch.ones_like(d), "log_variance": torch.zeros_like(d) } output.update({ "s": s, "s_params": s_params, "s_negative_kl_divergence": self.compute_negative_kld(q_params=s_params, p_params=p_s_params) }) e = torch.softmax(s, dim=-1) beta = self._decoder_persona.weight.t() if self._apply_batchnorm_on_decoder: beta = self.decoder_bn_persona(beta) if self._stochastic_beta: beta = torch.nn.functional.softmax(beta, dim=1) e_reconstruction = e @ beta output["e_reconstruction"] = e_reconstruction bp() return output
def getIDConf(self): if self.checking_return_value() < 0: print("Error : vps.py's return value") bp() return self.vps_IDandConf
def forward( self, # pylint: disable=arguments-differ doc: Union[Dict[str, torch.IntTensor], torch.IntTensor], entities: Union[Dict[str, torch.IntTensor], torch.IntTensor], epoch_num: List[int] = None): """ Parameters ---------- doc: ``Union[Dict[str, torch.IntTensor], torch.IntTensor]`` A batch of tokens. We expect tokens to be represented in one of two ways: 1. As token IDs. This representation will be used with downstream models, where bag-of-word count embedding must be done on the fly. If token IDs are provided, we use the bag-of-word-counts embedder to embed these tokens during training. 2. As pre-computed bag of words vectors. This representation will be used during pretraining, where we can precompute bag-of-word counts and train much faster. epoch_num: ``List[int]`` Output of epoch tracker """ # bp() if self.batch_num in []: bp() # For easy transfer to the GPU. self.device = self.vae.get_beta().device # pylint: disable=W0201 output_dict = {} self.update_npmi() self.update_topics_and_personas(epoch_num) if not self.training: self._kld_weight = 1.0 # pylint: disable=W0201 else: self.update_kld_weight(epoch_num) # bp() # if you supply input as token IDs, embed them into bag-of-word-counts with a token embedder if isinstance(entities, dict): embedded_entities = (self._bag_of_words_embedder( entities['tokens']).to(device=self.device)) else: embedded_entities = entities # Encode the text into a shared representation for both the VAE # and downstream classifiers to use. # bp() variational_output = self.vae(embedded_entities) entities_mask = (embedded_entities.sum(-1) != 0).float() # bp() # Reconstructed bag-of-words from the VAE with background bias. # doc_reconstructed_bow = variational_output['doc_reconstruction'] + self._background_freq entity_reconstructed_bow = variational_output[ 'bow_reconstruction'] + self._background_freq # Apply batchnorm to the reconstructed bag of words. # Helps with word variety in topic space. # doc_reconstructed_bow = self.doc_bow_bn(doc_reconstructed_bow) # entity_reconstructed_bow = self.entity_bow_bn(entity_reconstructed_bow) * entities_mask.unsqueeze(-1) # Reconstruction log likelihood: log P(x | z) = log softmax(z beta + b) # reconstruction_loss = self.bow_reconstruction_loss(doc_reconstructed_bow, embedded_docs) # bp() reconstruction_loss = (self.bow_reconstruction_loss( entity_reconstructed_bow, embedded_entities) * entities_mask).sum(1) # KL-divergence that is returned is the mean of the batch by default. doc_negative_kl_divergence = variational_output[ 'type_negative_kl_divergence'] # masked sum of entity KL-divergence since there are some paddings # bp() entity_negative_kl_divergence = variational_output[ "persona_negative_kl_divergence"] * entities_mask.sum(1) # total KL-divergence is the sum of doc's KL and entities' KL negative_kl_divergence = doc_negative_kl_divergence * self._doc_kld_weight \ + entity_negative_kl_divergence * self._entity_kld_weight # Compute ELBO elbo = negative_kl_divergence + reconstruction_loss # bp() loss = -torch.mean(elbo) if torch.isnan(loss): bp() output_dict['loss'] = loss # bp() # Update metrics self.metrics['nkld'](-torch.mean(negative_kl_divergence)) self.metrics['d_nkld'](-torch.mean(doc_negative_kl_divergence)) self.metrics['e_nkld'](-torch.mean(entity_negative_kl_divergence)) self.metrics['nll'](-torch.mean(reconstruction_loss)) # bp() # batch_num is tracked for kl weight annealing self.batch_num += 1 self.metrics['e_npmi'] = self._cur_entity_npmi self.metrics['d_npmi'] = self._cur_doc_npmi return output_dict