def disambiguate_samples(data, distance): n_before = data.shape[0] # it would be better if centroids were calculated without the duplicates centroids = utils.get_centroids(data[:, :-1], data[:, -1]) to_remove = set() to_add = [] for i in range(n_before): if i in to_remove: continue ambiguity = False for j in range(i + 1, n_before): if sum(abs(data[i, :-1] - data[j, :-1]) > 0) == 0: ambiguity = True to_remove.add(i) to_remove.add(j) if ambiguity: dist = np.array([[distance(data[i, :-1], centroids[c]), c] for c in centroids.keys()]) data[i, -1] = dist[np.argmin(dist[:, 0]), 1] to_add.append(data[i, :]) data = np.delete(data, list(to_remove), axis=0) data = np.vstack([data, np.array(to_add)]) if n_before - data.shape[0] > 0: print('\tRemoved {} ambiguous samples.'.format(n_before - data.shape[0])) return data
def forward(self, embeddings): torch.clamp(self.w, 1e-6) centroids = get_centroids(embeddings) cossim = get_cossim(embeddings, centroids) sim_matrix = self.w * cossim.to(self.device) + self.b loss, _ = calc_loss(sim_matrix) return loss
def forward(self, embeddings, y=None): #pdb.set_trace() torch.clamp(self.w, 1e-6) centroids = get_centroids(embeddings) cossim = get_cossim(embeddings, centroids) sim_matrix = self.w*cossim + self.b loss, _ = calc_loss(sim_matrix) return loss
def forward(self, embeddings): torch.clamp(self.w, hp.re_num) centroids = utils.get_centroids(embeddings) cossim = utils.get_cossim(embeddings, centroids) sim_matrix = self.w * cossim + self.b loss, _ = utils.cal_loss(sim_matrix) return loss
def forward(self, embeddings, embedder_net, lamb): torch.clamp(self.w, 1e-6) centroids = get_centroids(embeddings) cossim = get_cossim(embeddings, centroids) sim_matrix = self.w * cossim.to(self.device) + self.b per_loss, _ = calc_loss(sim_matrix) weights = embedder_net.LSTM_stack.all_weights norm_loss = lamb * torch.sum( torch.Tensor([ torch.norm(weights[i][j].data.to(self.device), 2) for i in range(hp.model.num_layer) for j in range(4) ])) loss = per_loss + norm_loss return loss, per_loss, norm_loss