def __init__(self, test_cls, dataset): self.vocab = Vocabulary('../data/{}/{}.vocab'.format(dataset, dataset)) self.Emb = nn.Embedding.from_pretrained(self.vocab.embedding, freeze=False) self.Emb = gpu_wrapper(self.Emb) if test_cls == 'TextCNN': self.C = Discriminator(kernels=config.textCNN_kernels, conv_dim=config.textCNN_conv_dim, dim_h=100, D=2, dropout=config.textCNN_dropout) else: raise ValueError() self.C = gpu_wrapper(self.C) self.train_set, self.test_set, self.val_set = None, None, None self.logger, self.optim, self.best_acc = None, None, 0 self.iter_num = 0 self.lr = config.textCNN_lr self.dataset = dataset self.model_name = test_cls + '-' + dataset self.noisy = True self.total_iters = 200000 self.beta1 = 0.5 self.beta2 = 0.999 self.batch_size = 64 self.num_workers = 8 self.ROUND = 4 self.sample_step = 4000 self.lr_decay_step = 1000 self.num_iters_decay = 0 self.max_len = 20
def forward(self, go, sent_len=None, bare=None): """ :param go: shape = (n_batch, 16) :param sent_len: shape = (n_batch, ) or None :param bare: shape = (n_batch, 15) or None :return: """ B = go.shape[0] if not self.training: # ----- Prior Network ----- latent_vector = self.generate_gaussian( B) # shape = (n_batch, latent_dim) # ----- Initial Decoding States ----- assert self.enc_bi init_states = gpu_wrapper( torch.zeros([ self.enc_layers, B, self.n_dir * self.hid_dim ])).float() # shape = (layers, n_batch, n_dir * hid_dim) return self.Decoder(init_states=init_states, latent_vector=latent_vector, helper=go) else: # ----- Encoding ----- outputs, last_states = self.Encoder(bare, sent_len) # ext_outputs.shape = (n_batch, 15, n_dir * hid_dim) # last_states.shape = (layers * n_dir, n_batch, hid_dim) last_states = last_states.transpose(0, 1).contiguous().view( B, -1) # shape = (n_batch, layers * n_dir * hid_dim) # ----- Posterior Network ----- gaussian_dist, latent_vector = self.PosteriorGaussian(last_states) # latent_vector.shape = (n_batch, latent_dim) gaussian_dist_couple, _ = self.PosteriorGaussianCouple(last_states) # ----- Initial Decoding States ----- assert self.enc_bi init_states = gpu_wrapper( torch.zeros([ self.enc_layers, B, self.n_dir * self.hid_dim ])).float() # shape = (layers, n_batch, n_dir * hid_dim) init_input = self.toInit( latent_vector) # shape = (n_batch, emb_dim) init_input_couple = self.toInitCouple( gaussian_dist_couple.mean) # shape = (n_batch, emb_dim) logits = self.Decoder(init_states=init_states, init_input=init_input, helper=go) logits_couple = self.DecoderCouple(init_states=init_states, init_input=init_input_couple, helper=go) return logits, gaussian_dist, latent_vector, logits_couple, init_input, init_input_couple
def forward(self, go, sent_len=None, bare=None): """ :param go: shape = (n_batch, 16) :param sent_len: shape = (n_batch, ) or None :param bare: shape = (n_batch, 15) or None :return: """ B = go.shape[0] if not self.training: # ----- Prior Network ----- latent_vector = self.generate_gaussian(B) # shape = (n_batch, latent_dim) # ----- Initial Decoding States ----- assert self.enc_bi init_states = gpu_wrapper(torch.zeros([self.enc_layers, B, self.n_dir * self.hid_dim])).float() # shape = (layers, n_batch, n_dir * hid_dim) return self.Decoder(init_states=init_states, latent_vector=latent_vector, helper=go) else: # ----- Encoding ----- outputs, last_states = self.Encoder(bare, sent_len) # ext_outputs.shape = (n_batch, 15, n_dir * hid_dim) # last_states.shape = (layers * n_dir, n_batch, hid_dim) last_states = last_states.transpose(0, 1).contiguous().view(B, -1) # shape = (n_batch, layers * n_dir * hid_dim # ----- Posterior Network ----- Q0, z0 = self.PosteriorGaussian(last_states) # z0.shape = (n_batch, latent_dim) Q0_couple, _ = self.PosteriorGaussianCouple(last_states) # ----- Flows ----- zk, sum_log_jacobian = self.Flows(z0=z0, cond=last_states) # zk.shape = (n_batch, latent_dim) # sum_log_jacobian.shape = (n_batch, ) zk_couple, _ = self.FlowsCouple(z0=Q0_couple.mean, cond=last_states) # ----- Bag-of-Words logits ----- BoW_logits = self.BoW(zk) # shape = (n_bathc, voc_size) # ----- Initial Decoding States ----- assert self.enc_bi init_states = gpu_wrapper(torch.zeros([self.enc_layers, B, self.n_dir * self.hid_dim])).float() # shape = (layers, n_batch, n_dir * hid_dim) init_input = self.toInit(zk) # shape = (n_batch, emb_dim) init_input_couple = self.toInitCouple(zk_couple) # shape = (n_batch, emb_dim) logits = self.Decoder(init_states=init_states, init_input=init_input, helper=go) logits_couple = self.DecoderCouple(init_states=init_states, init_input=init_input_couple, helper=go) return logits, Q0, z0, zk, sum_log_jacobian, BoW_logits, logits_couple, init_input, init_input_couple
def __init__(self, hid_dim, latent_dim, enc_layers, dec_layers, dropout, enc_bi, dec_max_len, beam_size, WEAtt_type, encoder_emb, decoder_emb, pad_id, n_flows, flow_type): super(VAE_NF, self).__init__() assert encoder_emb.num_embeddings == decoder_emb.num_embeddings assert encoder_emb.embedding_dim == decoder_emb.embedding_dim self.voc_size = encoder_emb.num_embeddings self.emb_dim = encoder_emb.embedding_dim self.hid_dim = hid_dim self.enc_layers = enc_layers self.dec_layers = dec_layers self.dropout = dropout self.enc_bi = enc_bi self.n_dir = 2 if self.enc_bi else 1 self.dec_max_len = dec_max_len self.beam_size = beam_size self.WEAtt_type = WEAtt_type self.latent_dim = latent_dim self.n_flows = n_flows self.flow_type = flow_type self.Encoder = Encoder(emb_dim=self.emb_dim, hid_dim=self.hid_dim, n_layer=self.enc_layers, dropout=self.dropout, bi=self.enc_bi, embedding=encoder_emb) self.PriorGaussian = torch.distributions.Normal( gpu_wrapper(torch.zeros(self.latent_dim)), gpu_wrapper(torch.ones(self.latent_dim))) self.PosteriorGaussian = Gaussian(in_dim=self.hid_dim * self.n_dir * self.enc_layers, out_dim=self.latent_dim) self.Decoder = Decoder(voc_size=self.voc_size, latent_dim=self.latent_dim, emb_dim=self.emb_dim, hid_dim=self.hid_dim * self.n_dir, n_layer=self.dec_layers, dropout=self.dropout, max_len=self.dec_max_len, beam_size=self.beam_size, WEAtt_type=self.WEAtt_type, embedding=decoder_emb) self.BoW = nn.Linear(self.latent_dim, self.voc_size) self.Flows = NormalizingFlows(cond_dim=self.hid_dim * self.n_dir * self.enc_layers, latent_dim=self.latent_dim, n_flows=self.n_flows, flow_type=self.flow_type) self.criterionSeq = SeqLoss(voc_size=self.voc_size, pad=pad_id, end=None, unk=None)
def forward(self, go, sent_len=None, bare=None): """ :param go: shape = (n_batch, 16) :param sent_len: shape = (n_batch, ) or None :param bare: shape = (n_batch, 15) or None :return: """ B = go.shape[0] if not self.training: # ----- Prior Network ----- latent_vector = self.generate_gaussian( B) # shape = (n_batch, latent_dim) # ----- Initial Decoding States ----- assert self.enc_bi init_states = gpu_wrapper( torch.zeros([ self.enc_layers, B, self.n_dir * self.hid_dim ])).float() # shape = (layers, n_batch, n_dir * hid_dim) return self.Decoder(init_states=init_states, latent_vector=latent_vector, helper=go) else: # ----- Encoding ----- outputs, last_states = self.Encoder(bare, sent_len) # ext_outputs.shape = (n_batch, 15, n_dir * hid_dim) # last_states.shape = (layers * n_dir, n_batch, hid_dim) last_states = last_states.transpose(0, 1).contiguous().view( B, -1) # shape = (n_batch, layers * n_dir * hid_dim # ----- Posterior Network ----- Q0, z0 = self.PosteriorGaussian(last_states) # z0.shape = (n_batch, latent_dim) # ----- Flows ----- zk, sum_log_jacobian = self.Flows(z0=z0, cond=last_states) # zk.shape = (n_batch, latent_dim) # sum_log_jacobian.shape = (n_batch, ) # ----- Initial Decoding States ----- assert self.enc_bi init_states = gpu_wrapper( torch.zeros([ self.enc_layers, B, self.n_dir * self.hid_dim ])).float() # shape = (layers, n_batch, n_dir * hid_dim) return self.Decoder(init_states=init_states, latent_vector=zk, helper=go), Q0, z0, zk, sum_log_jacobian
def class_score(self, sents, labels): """ :param sents: [[str x T] x N] :param labels: [int x N] :return: float, accuracy of classification. """ self.C.train(mode=False) self.Emb.train(mode=False) with torch.no_grad(): _size = 0 _batch = [] preds = [] for sent in sents: _size += 1 l = len(sent) if l > self.max_len: sent = sent[:self.max_len] sent_id = [self.vocab.word2id[w] for w in sent] padding = [self.vocab.word2id['<pad>']] * (self.max_len - l) bare = gpu_wrapper(torch.LongTensor(sent_id + padding)) # shape = (20, ) _batch.append(bare) if _size == self.batch_size: _size = 0 batch = torch.stack(_batch, dim=0) # shape = (n_batch, 20) emb = self.Emb(batch) # shape = (n_batch, 20, emb_dim) cls = self.C(emb).squeeze(1) # shape = (n_batch, ) pred = (cls > 0.5).float() # shape = (n_batch, ) preds.append(pred) _batch = [] if _size != 0: batch = torch.stack(_batch, dim=0) # shape = (n_batch, 20) emb = self.Emb(batch) # shape = (n_batch, 20, emb_dim) cls = self.C(emb).squeeze(1) # shape = (n_batch, ) pred = (cls > 0.5).float() # shape = (n_batch, ) preds.append(pred) preds = torch.cat(preds, dim=0) # shape = (N, ) # print(' '.join([str(int(_)) for _ in preds])) labels = gpu_wrapper( torch.tensor(np.array(labels, dtype=np.float32))) # shape = (N, ) # print(preds) # print(labels) assert preds.shape[0] == labels.shape[0] n_wrong = torch.abs(preds - labels).sum().item() n_all = preds.shape[0] self.C.train(mode=True) self.Emb.train(mode=True) return (n_all - n_wrong) / n_all
def __sample_w_rej(self, shape): c = torch.sqrt((4 * (self.scale ** 2)) + (self.__m - 1) ** 2) b_true = (-2 * self.scale + c) / (self.__m - 1) # using Taylor approximation with a smooth swift from 10 < scale < 11 # to avoid numerical errors for large scale b_app = (self.__m - 1) / (4 * self.scale) s = torch.min(torch.max(gpu_wrapper(torch.tensor([0.])), self.scale - 10), gpu_wrapper(torch.tensor([1.]))) b = b_app * s + b_true * (1 - s) a = (self.__m - 1 + 2 * self.scale + c) / 4 d = (4 * a * b) / (1 + b) - (self.__m - 1) * np.log(self.__m - 1) self.__b, (self.__e, self.__w) = b, self.__while_loop(b, a, d, shape) return self.__w
def test_lm(self, go, sent_len, bare, eos, n_sample): B = go.shape[0] # ----- Encoding ----- outputs, last_states = self.Encoder(bare, sent_len) # ext_outputs.shape = (n_batch, 15, n_dir * hid_dim) # last_states.shape = (layers * n_dir, n_batch, hid_dim) latent_vector = self.toLatent( last_states.transpose(0, 1).contiguous().view( B, -1)) # shape = (n_batch, latent_dim) # ----- Initial Decoding States ----- assert self.enc_bi init_states = gpu_wrapper( torch.zeros([ self.enc_layers, B, self.n_dir * self.hid_dim ])).float() # shape = (layers, n_batch, n_dir * hid_dim) logits = self.Decoder(init_states=init_states, latent_vector=latent_vector, helper=go, test_lm=True) # shape = (n_batch, 16, V) xent = self.criterionSeq(logits, eos, keep_batch=True) # shape = (n_batch, ) kl = torch.zeros_like(xent) + float('inf') # shape = (n_batch, ) nll = xent + kl # shape = (n_batch, ) return xent, nll, kl, latent_vector
def saliency(self, go, sent_len=None, bare=None): B = go.shape[0] # ----- Encoding ----- outputs, last_states = self.Encoder(bare, sent_len) # ext_outputs.shape = (n_batch, 15, n_dir * hid_dim) # last_states.shape = (layers * n_dir, n_batch, hid_dim) last_states = last_states.transpose(0, 1).contiguous().view( B, -1) # shape = (n_batch, layers * n_dir * hid_dim) # ----- Posterior Network ----- gaussian_dist, latent_vector = self.PosteriorGaussian(last_states) # latent_vector.shape = (n_batch, latent_dim) # ----- Bag-of-Words logits ----- BoW_logits = self.BoW(latent_vector) # shape = (n_bathc, voc_size) # ----- Initial Decoding States ----- assert self.enc_bi init_states = gpu_wrapper( torch.zeros([ self.enc_layers, B, self.n_dir * self.hid_dim ])).float() # shape = (layers, n_batch, n_dir * hid_dim) logits = self.Decoder(init_states=init_states, latent_vector=latent_vector, helper=go) return logits, gaussian_dist, self.Decoder.toInit( latent_vector), last_states
def test_lm(self, post_bare, post_len, resp_go, resp_len, resp_bare, resp_eos, n_sample): B = post_bare.shape[0] # ----- Post Encoding ----- post_outputs, post_last_states = self.PostEncoder(post_bare, post_len) # post_outputs.shape = (n_batch, 15, n_dir * hid_dim) # post_last_states.shape = (layers * n_dir, n_batch, hid_dim) post_last_states = post_last_states.transpose(0, 1).contiguous().view( B, -1) # shape = (n_batch, layers * n_dir * hid_dim) post_repr = self.PostRepr( post_last_states) # shape = (n_batch, emb_dim) # ----- Initial Decoding States ----- assert self.enc_bi init_states = gpu_wrapper( torch.zeros([ self.enc_layers, B, self.n_dir * self.hid_dim ])).float() # shape = (layers, n_batch, n_dir * hid_dim) logits = self.Decoder(init_states=init_states, post_repr=post_repr, latent_vector=None, helper=resp_go, test_lm=True) # ----- Importance sampling estimation ----- xent = self.criterionSeq(logits, resp_eos, keep_batch=True) # shape = (n_batch, ) nll = xent return xent, nll, torch.zeros_like(xent)
def sample_from_prior(self, post_bare, post_len, resp_go): """ :param go: shape = (n_batch, 16) :return: """ B = resp_go.shape[0] # ----- Post Encoding ----- post_outputs, post_last_states = self.PostEncoder(post_bare, post_len) # post_outputs.shape = (n_batch, 15, n_dir * hid_dim) # post_last_states.shape = (layers * n_dir, n_batch, hid_dim) post_last_states = post_last_states.transpose(0, 1).contiguous().view( B, -1) # shape = (n_batch, layers * n_dir * hid_dim) post_repr = self.PostRepr( post_last_states) # shape = (n_batch, emb_dim) # ----- Initial Decoding States ----- assert self.enc_bi init_states = gpu_wrapper( torch.zeros([ self.enc_layers, B, self.n_dir * self.hid_dim ])).float() # shape = (layers, n_batch, n_dir * hid_dim) preds = self.Decoder(init_states=init_states, post_repr=post_repr, latent_vector=None, helper=resp_go) return preds
def __init__(self, voc_size, pad, end, unk): super(SeqLoss, self).__init__() self.voc_size = voc_size self.word_weight = gpu_wrapper(torch.ones(voc_size)) self.word_weight[pad] = 0. self.word_weight[end] = 1.0 self.word_weight[unk] = 1.0
def forward(self, logits, gts, keep_batch=False): """ :param logits: (?, T, V) :param gts: (?, T) :param keep_batch: bool. :return: Scalar or (?). """ if logits.shape[0] == 0: assert gts.shape[0] == 0 return gpu_wrapper(torch.FloatTensor([0])).squeeze(0) assert logits.shape[:-1] == gts.shape if not keep_batch: xent = F.cross_entropy(input=logits.contiguous().view( -1, self.voc_size), target=gts.view(-1), weight=self.word_weight) return xent else: T = logits.shape[-2] stuct_shape = list(logits.shape[:-2]) xent = F.cross_entropy(input=logits.contiguous().view( -1, self.voc_size), target=gts.view(-1), weight=self.word_weight, reduction='none') xent = xent.view(stuct_shape + [T]) # shape = (?, T) xent = xent.sum(-1) # shape = (?) return xent
def importance_sampling_mi(self, vmf_dist, n_sample): assert n_sample % _n_sample == 0 B = vmf_dist.mean.shape[0] samplify = { 'log_qz': [], 'log_qzx': [], 'z': [] } for sample_id in range(n_sample // _n_sample): # ----- Sampling ----- _z = vmf_dist.rsample(torch.Size([_n_sample])) # shape = (_n_sample, n_batch, latent_dim) assert tuple(_z.shape) == (_n_sample, B, self.latent_dim) _log_qzx = vmf_dist.log_prob(_z) # shape = (_n_sample, n_batch) _log_qz = vmf_dist.log_prob(_z.unsqueeze(2).expand(-1, -1, B, -1)) # shape = (_n_sample, n_batch, n_batch) # Exclude itself. _log_qz.masked_fill_(gpu_wrapper(torch.eye(B).long()).eq(1).unsqueeze(0).expand(_n_sample, -1, -1), -float('inf')) # shape = (_n_sample, n_batch, n_batch) _log_qz = (log_sum_exp(_log_qz, dim=2) - np.log(B - 1)) # shape = (_n_sample, n_batch) samplify['log_qzx'].append(_log_qzx) # shape = (_n_sample, n_batch) samplify['log_qz'].append(_log_qz) # shape = (_n_sample, n_batch) samplify['z'].append(_z) # shape = (_n_sample, n_batch, out_dim) for key in samplify.keys(): samplify[key] = torch.cat(samplify[key], dim=0) # shape = (n_sample, ?) # ----- Importance sampling for MI ----- mi = samplify['log_qzx'].mean(0) - samplify['log_qz'].mean(0) return mi, samplify['z'].transpose(0, 1)
def __init__(self, loc, scale, validate_args=None): self.dtype = loc.dtype self.loc = loc self.scale = scale self.__m = loc.shape[-1] self.__e1 = gpu_wrapper(torch.Tensor([1.] + [0] * (loc.shape[-1] - 1))) super(VonMisesFisher, self).__init__(self.loc.size(), validate_args=validate_args)
def forward(self, input, target_is_real): """Note that another implementation is available for max-entropy aimed generator""" if self.gan_type == 'LSGAN': if target_is_real: return torch.pow(torch.sigmoid(input) - 1, 2).mean() else: return torch.pow(torch.sigmoid(input), 2).mean() elif self.gan_type == 'vanillaGAN': input = input.view(-1) if target_is_real: return F.binary_cross_entropy_with_logits( input, gpu_wrapper(Variable(torch.ones(input.shape[0])))) else: return F.binary_cross_entropy_with_logits( input, gpu_wrapper(Variable(torch.zeros(input.shape[0])))) else: raise ValueError()
def preprocess_data(self, data): bare_0, go_0, eos_0, len_0, bare_1, go_1, eos_1, len_1 = data n_batch = bare_0.shape[0] bare_0 = gpu_wrapper(bare_0) # shape = (n_batch, 20) go_0 = gpu_wrapper(go_0) # shape = (n_batch, 21) eos_0 = gpu_wrapper(eos_0) # shape = (n_batch, 21) len_0 = gpu_wrapper(len_0) # shape = (n_batch, ) label_0 = gpu_wrapper(torch.zeros(n_batch)) # shape = (n_batch, ) bare_1 = gpu_wrapper(bare_1) # shape = (n_batch, 20) go_1 = gpu_wrapper(go_1) # shape = (n_batch, 21) eos_1 = gpu_wrapper(eos_1) # shape = (n_batch, 21) len_1 = gpu_wrapper(len_1) # shape = (n_batch, ) label_1 = gpu_wrapper(torch.ones(n_batch)) # shape = (n_batch, ) return bare_0, go_0, eos_0, len_0, label_0, bare_1, go_1, eos_1, len_1, label_1
def gen_interps(self, bareA, sent_lenA, bareB, sent_lenB, go, n_interps): """ :param bareA: shape = (n_batch, 15) :param sent_lenA: shape = (n_batch, ) :param bareB: shape = (n_batch, 15) :param sent_lenB: shape = (n_batch, ) :param go: shape = (n_batch, 16) :param n_interps: int. :return: """ B = go.shape[0] # ---------- A ---------- # ----- Encoding ----- _, last_statesA = self.Encoder(bareA, sent_lenA) # _.shape = (n_batch, 15, n_dir * hid_dim) # last_statesA.shape = (layers * n_dir, n_batch, hid_dim) last_statesA = last_statesA.transpose(0, 1).contiguous().view( B, -1) # shape = (n_batch, layers * n_dir * hid_dim) # ----- Posterior Network ----- gaussA, _ = self.PosteriorGaussian(last_statesA) z0A = gaussA.mean # z0A.shape = (n_batch, latent_dim) # ---------- B ---------- # ----- Encoding ----- _, last_statesB = self.Encoder(bareB, sent_lenB) # _.shape = (n_batch, 15, n_dir * hid_dim) # last_statesB.shape = (layers * n_dir, n_batch, hid_dim) last_statesB = last_statesB.transpose(0, 1).contiguous().view( B, -1) # shape = (n_batch, layers * n_dir * hid_dim) # ----- Posterior Network ----- gaussB, _ = self.PosteriorGaussian(last_statesB) z0B = gaussB.mean # z0B.shape = (n_batch, latent_dim) # ----- Initial Decoding States ----- assert self.enc_bi init_states = gpu_wrapper( torch.zeros([ self.enc_layers, B, self.n_dir * self.hid_dim ])).float() # shape = (layers, n_batch, n_dir * hid_dim) interps = [[] for _ in range(B)] for in_id in range(n_interps + 2): _zk = z0A * ((n_interps - in_id + 1) / (n_interps + 1)) + z0B * ( in_id / (n_interps + 1)) # shape = (n_batch, latent_dim) _init_input = self.toInit(_zk) # shape = (n_batch, emb_dim) _interp = self.Decoder(init_states=init_states, init_input=_init_input, helper=go) for b_id, _b_interp in enumerate(_interp): interps[b_id].append(_b_interp) return interps
def forward(self, post_bare, post_len, resp_go, resp_len, resp_bare): """ :param post_bare: shape = (n_batch, 15) :param post_len: shape = (n_batch, ) :param resp_go: shape = (n_batch, 16) :param resp_len: shape = (n_batch, 15) :param resp_bare: shape = (n_batch, 15) :return: """ B = resp_go.shape[0] if not self.training: raise NotImplementedError() else: # ----- Post Encoding ----- post_outputs, post_last_states = self.PostEncoder( post_bare, post_len) # post_outputs.shape = (n_batch, 15, n_dir * hid_dim) # post_last_states.shape = (layers * n_dir, n_batch, hid_dim) post_last_states = post_last_states.transpose( 0, 1).contiguous().view( B, -1) # shape = (n_batch, layers * n_dir * hid_dim) post_repr = self.PostRepr( post_last_states) # shape = (n_batch, emb_dim) # ----- Response Encoding ----- _, resp_last_states = self.RespEncoder(resp_bare, resp_len) # resp_outputs.shape = (n_batch, 15, n_dir * hid_dim) # resp_last_states.shape = (layers * n_dir, n_batch, hid_dim) resp_last_states = resp_last_states.transpose( 0, 1).contiguous().view( B, -1) # shape = (n_batch, layers * n_dir * hid_dim) # ----- Prior Network ----- prior_dist, prior_latent = self.PriorGaussian(post_last_states) # prior_latent.shape = (n_batch, hid_dim) # ----- Posterior Network ----- posterior_dist, posterior_latent = self.PosteriorGaussian( torch.cat([resp_last_states, post_last_states], dim=1)) # posterior_latent.shape = (n_batch, hid_dim) # ----- Initial Decoding States ----- assert self.enc_bi init_states = gpu_wrapper( torch.zeros([ self.enc_layers, B, self.n_dir * self.hid_dim ])).float() # shape = (layers, n_batch, n_dir * hid_dim) return self.Decoder(init_states=init_states, post_repr=post_repr, latent_vector=posterior_latent, helper=resp_go), prior_dist, posterior_dist
def forward(self, go, sent_len=None, bare=None): """ :param go: shape = (n_batch, 16) :param sent_len: shape = (n_batch, ) or None :param bare: shape = (n_batch, 15) or None :return: """ B = go.shape[0] if not self.training: # ----- Prior Network ----- latent_vector = self.generate_uniform(B) # shape = (n_batch, latent_dim) # ----- Initial Decoding States ----- assert self.enc_bi init_states = gpu_wrapper(torch.zeros([self.enc_layers, B, self.n_dir * self.hid_dim])).float() # shape = (layers, n_batch, n_dir * hid_dim) return self.Decoder(init_states=init_states, latent_vector=latent_vector, helper=go) else: # ----- Encoding ----- outputs, last_states = self.Encoder(bare, sent_len) # ext_outputs.shape = (n_batch, 15, n_dir * hid_dim) # last_states.shape = (layers * n_dir, n_batch, hid_dim) last_states = last_states.transpose(0, 1).contiguous().view(B, -1) # shape = (n_batch, layers * n_dir * hid_dim # ----- Posterior Network ----- vmf_dist, latent_vector = self.PosteriorVMF(last_states) # latent_vector.shape = (n_batch, latent_dim) prior_unif = HypersphericalUniform(dim=self.latent_dim) # ----- Initial Decoding States ----- assert self.enc_bi init_states = gpu_wrapper(torch.zeros([self.enc_layers, B, self.n_dir * self.hid_dim])).float() # shape = (layers, n_batch, n_dir * hid_dim) return self.Decoder(init_states=init_states, latent_vector=latent_vector, helper=go), vmf_dist, prior_unif
def preprocess_data(self, data): bare_0, go_0, eos_0, len_0, bare_1, go_1, eos_1, len_1 = data n_batch = bare_0.shape[0] s_idx_0 = [ix for ix, l in sorted(enumerate(len_0), key=lambda x: x[1], reverse=True)] res_idx_0 = [a for a, b in sorted(enumerate(s_idx_0), key=lambda x: x[1])] bare_0 = gpu_wrapper(bare_0[s_idx_0, :]) go_0 = gpu_wrapper(go_0[s_idx_0, :]) eos_0 = gpu_wrapper(eos_0[s_idx_0, :]) len_0 = gpu_wrapper(len_0[s_idx_0]) y_0 = gpu_wrapper(torch.zeros(n_batch)) s_idx_1 = [ix for ix, l in sorted(enumerate(len_1), key=lambda x: x[1], reverse=True)] res_idx_1 = [a for a, b in sorted(enumerate(s_idx_1), key=lambda x: x[1])] bare_1 = gpu_wrapper(bare_1[s_idx_1, :]) go_1 = gpu_wrapper(go_1[s_idx_1, :]) eos_1 = gpu_wrapper(eos_1[s_idx_1, :]) len_1 = gpu_wrapper(len_1[s_idx_1]) y_1 = gpu_wrapper(torch.ones(n_batch)) return bare_0, go_0, eos_0, len_0, y_0, res_idx_0, bare_1, go_1, eos_1, len_1, y_1, res_idx_1
def importance_sampling(self, vmf_dist, go, eos, n_sample): B = go.shape[0] assert n_sample % _n_sample == 0 samplify = { 'xent': [], 'log_pz': [], 'log_pxz': [], 'log_qzx': [], 'z': [] } for sample_id in range(n_sample // _n_sample): # ----- Sampling ----- _z = vmf_dist.rsample(torch.Size([_n_sample])) # shape = (_n_sample, n_batch, latent_dim) assert tuple(_z.shape) == (_n_sample, B, self.latent_dim) # ----- Initial Decoding States ----- assert self.enc_bi _init_states = gpu_wrapper(torch.zeros([self.enc_layers, _n_sample * B, self.n_dir * self.hid_dim])).float() # shape = (layers, _n_sample * n_batch, n_dir * hid_dim) _init_input = self.toInit(_z) # shape = (_n_sample, n_batch, emb_dim) # ----- Importance sampling for NLL ----- _logits = self.Decoder(init_states=_init_states, # shape = (layers, _n_sample * n_batch, n_dir * hid_dim) init_input=_init_input.view(_n_sample * B, self.emb_dim), # shape = (_n_sample * n_batch, out_dim) helper=go.unsqueeze(0).expand(_n_sample, -1, -1).contiguous().view(_n_sample * B, -1), # shape = (_n_sample * n_batch, 15) test_lm=True) # shape = (_n_sample * n_batch, 16, V) _xent = self.criterionSeq(_logits, # shape = (_n_sample * n_batch, 16, V) eos.unsqueeze(0).expand(_n_sample, -1, -1).contiguous().view(_n_sample * B, -1), # shape = (_n_sample * n_batch, 16) keep_batch=True).view(_n_sample, B) # shape = (_n_sample, n_batch) _log_pz = self.PriorUniform.log_prob(_z) # shape = (_n_sample, n_batch) _log_pxz = - _xent # shape = (_n_sample, n_batch) _log_qzx = vmf_dist.log_prob(_z) # shape = (_n_sample, n_batch) samplify['xent'].append(_xent) # shape = (_n_sample, n_batch) samplify['log_pz'].append(_log_pz) # shape = (_n_sample, n_batch) samplify['log_pxz'].append(_log_pxz) # shape = (_n_sample, n_batch) samplify['log_qzx'].append(_log_qzx) # shape = (_n_sample, n_batch) samplify['z'].append(_z) # shape = (_n_sample, n_batch, out_dim) for key in samplify.keys(): samplify[key] = torch.cat(samplify[key], dim=0) # shape = (n_sample, ?) ll = log_sum_exp(samplify['log_pz'] + samplify['log_pxz'] - samplify['log_qzx'], dim=0) - np.log(n_sample) # shape = (n_batch, ) nll = - ll # shape = (n_batch, ) # ----- Importance sampling for KL ----- kl = (samplify['log_qzx'] - samplify['log_pz']).mean(0) # shape = (n_batch, ) return samplify['xent'].mean(0), nll, kl, samplify['z'].transpose(0, 1)
def forward(self, input, target_is_real): if self.gan_type == 'LSGAN': if target_is_real: return torch.pow(F.sigmoid(input) - 1, 2).mean() else: return torch.pow(F.sigmoid(input), 2).mean() elif self.gan_type == 'vanillaGAN': input = input.view(-1) if target_is_real: return F.binary_cross_entropy_with_logits(input, gpu_wrapper(Variable(torch.ones(input.shape[0])))) else: return F.binary_cross_entropy_with_logits(input, gpu_wrapper(Variable(torch.zeros(input.shape[0])))) elif self.gan_type == 'WGAN_hinge': if target_is_real: return F.relu(1.0 - input).mean() else: return F.relu(input + 1.0).mean() else: raise ValueError()
def decode_from(self, latents, go): """ :param latents: shape = (n_batch, latent_dim) :param go: shape = (n_batch, 16) :return: """ B = latents.shape[0] init_states = gpu_wrapper(torch.zeros([self.enc_layers, B, self.n_dir * self.hid_dim])).float() # shape = (layers, n_batch, n_dir * hid_dim) return self.Decoder(latent_vector=latents, helper=go)
def rsample(self, shape=torch.Size()): shape = shape if isinstance(shape, torch.Size) else torch.Size([shape]) w = self.__sample_w3(shape=shape) if self.__m == 3 else self.__sample_w_rej(shape=shape) v = (gpu_wrapper(torch.distributions.Normal(0, 1).sample( shape + torch.Size(self.loc.shape))).transpose(0, -1)[1:]).transpose(0, -1) v = v / v.norm(dim=-1, keepdim=True) w_ = torch.sqrt(torch.clamp(1 - (w ** 2), 1e-10)) x = torch.cat((w, w_ * v), -1) z = self.__householder_rotation(x) return z.type(self.dtype)
def __while_loop(self, b, a, d, shape): b, a, d = [e.repeat(*shape, *([1] * len(self.scale.shape))) for e in (b, a, d)] w, e, bool_mask = torch.zeros_like(b), torch.zeros_like(b), (torch.ones_like(b) == 1) shape = shape + torch.Size(self.scale.shape) while bool_mask.sum() != 0: e_ = gpu_wrapper(torch.distributions.Beta((self.__m - 1) / 2, (self.__m - 1) / 2).sample(shape[:-1]).reshape(shape)) u = gpu_wrapper(torch.distributions.Uniform(0, 1).sample(shape)) w_ = (1 - (1 + b) * e_) / (1 - (1 - b) * e_) t = (2 * a * b) / (1 - (1 - b) * e_) accept = ((self.__m - 1) * t.log() - t + d) > torch.log(u) reject = 1 - accept w[bool_mask * accept] = w_[bool_mask * accept] e[bool_mask * accept] = e_[bool_mask * accept] bool_mask[bool_mask * accept] = reject[bool_mask * accept] return e, w
def importance_sampling_mi(self, Q0, last_states, n_sample): assert n_sample % _n_sample == 0 B = Q0.mean.shape[0] samplify = { 'log_qz': [], 'log_qzx': [], 'z': [] } for sample_id in range(n_sample // _n_sample): # ----- Sampling ----- _z0 = Q0.rsample(torch.Size([_n_sample])) # shape = (_n_sample, n_batch, out_dim) assert tuple(_z0.shape) == (_n_sample, B, self.latent_dim) # ----- Flows ----- _zk, _sum_log_jacobian = self.Flows(z0=_z0.contiguous().view(_n_sample * B, self.latent_dim), # shape = (_n_sample * n_batch, out_dim) cond=last_states.unsqueeze(0).expand(_n_sample, -1, -1).contiguous().view(_n_sample * B, -1) # shape = (_n_sample * n_batch, layers * n_dir * hid_dim) ) # _zk.shape = (_n_sample * n_batch, latent_dim) # _sum_log_jacobian.shape = (_n_sample * n_batch, ) _zk = _zk.view(_n_sample, B, self.latent_dim) # shape = (_n_sample, n_batch, latent_dim) _sum_log_jacobian = _sum_log_jacobian.view(_n_sample, B) # shape = (_n_sample, n_batch) # ----- Flows for the aggregate posterior ----- _, _sum_log_jacobian_batch = self.Flows(z0=_z0.unsqueeze(2).expand(-1, -1, B, -1).contiguous().view(_n_sample * B * B, self.latent_dim), # shape = (_n_sample * n_batch * n_batch, out_dim) cond=last_states.unsqueeze(0).unsqueeze(1).expand(_n_sample, B, -1, -1).contiguous().view(_n_sample * B * B, -1) # shape = (_n_sample * n_batch * n_batch, layers * n_dir * hid_dim) ) # _sum_log_jacobian_batch.shape = (_n_sample * n_batch * n_batch, ) _sum_log_jacobian_batch = _sum_log_jacobian_batch.view(_n_sample, B, B) # shape = (_n_sample, n_batch, n_batch) _log_qzx = Q0.log_prob(_z0).sum(2) - _sum_log_jacobian # shape = (_n_sample, n_batch) _log_qz = Q0.log_prob(_z0.unsqueeze(2).expand(-1, -1, B, -1)).sum(3) - _sum_log_jacobian_batch # shape = (_n_sample, n_batch, n_batch) # Exclude itself. _log_qz.masked_fill_(gpu_wrapper(torch.eye(B).long()).eq(1).unsqueeze(0).expand(_n_sample, -1, -1), -float('inf')) # shape = (_n_sample, n_batch, n_batch) _log_qz = (log_sum_exp(_log_qz, dim=2) - np.log(B - 1)) # shape = (_n_sample, n_batch) samplify['log_qzx'].append(_log_qzx) # shape = (_n_sample, n_batch) samplify['log_qz'].append(_log_qz) # shape = (_n_sample, n_batch) samplify['z'].append(_zk) # shape = (_n_sample, n_batch, out_dim) for key in samplify.keys(): samplify[key] = torch.cat(samplify[key], dim=0) # shape = (n_sample, ?) # ----- Importance sampling for MI ----- mi = samplify['log_qzx'].mean(0) - samplify['log_qz'].mean(0) return mi, samplify['z'].transpose(0, 1)
def build(self): print('----- Loading language model data -----') self.train_set = Yelp('train', False, config.sentiment, config.direction) self.test_set = Yelp('test', False, config.sentiment, config.direction) self.val_set = Yelp('dev', False, config.sentiment, config.direction) self.ntokens = self.train_set.vocab.size self.go = self.train_set.go self.eos = self.train_set.eos self.pad = self.train_set.pad self.word_weight = gpu_wrapper(torch.ones(self.ntokens)) self.word_weight[self.pad] = 0. self.model = MODEL.RNNModel(config.model, self.ntokens, config.emsize, config.nhid, config.nlayers, config.dropout, config.dropouth, config.dropouti, config.dropoute, config.wdrop, config.tied)
def forward(self, sample_probs, reward, mask=None): """ :param sample_probs: shape = (n_batch, *) :param mask: shape = (n_batch, *) or None :param reward: shape = (n_batch, ) :return: """ if sample_probs is None: return gpu_wrapper(torch.zeros([1]).squeeze(0)) sample_probs = sample_probs.contiguous().view(-1) sample_logprobs = torch.log(sample_probs) reward = reward.contiguous().view(-1) if mask is not None: mask = mask.float().contiguous().view(-1) output = -sample_logprobs * reward * mask output = torch.sum(output) / torch.sum(mask) else: output = -sample_logprobs * reward output = output.mean() return output
def sample_from_prior(self, go): """ :param go: shape = (n_batch, 16) :return: """ B = go.shape[0] # ----- Prior Network ----- latent_vector = self.generate_gaussian( B) # shape = (n_batch, latent_dim) # ----- Initial Decoding States ----- assert self.enc_bi init_states = gpu_wrapper( torch.zeros([ self.enc_layers, B, self.n_dir * self.hid_dim ])).float() # shape = (layers, n_batch, n_dir * hid_dim) return self.Decoder(init_states=init_states, latent_vector=latent_vector, helper=go)