示例#1
0
    def __init__(self, test_cls, dataset):

        self.vocab = Vocabulary('../data/{}/{}.vocab'.format(dataset, dataset))
        self.Emb = nn.Embedding.from_pretrained(self.vocab.embedding,
                                                freeze=False)
        self.Emb = gpu_wrapper(self.Emb)
        if test_cls == 'TextCNN':
            self.C = Discriminator(kernels=config.textCNN_kernels,
                                   conv_dim=config.textCNN_conv_dim,
                                   dim_h=100,
                                   D=2,
                                   dropout=config.textCNN_dropout)
        else:
            raise ValueError()
        self.C = gpu_wrapper(self.C)

        self.train_set, self.test_set, self.val_set = None, None, None
        self.logger, self.optim, self.best_acc = None, None, 0
        self.iter_num = 0
        self.lr = config.textCNN_lr
        self.dataset = dataset
        self.model_name = test_cls + '-' + dataset
        self.noisy = True
        self.total_iters = 200000
        self.beta1 = 0.5
        self.beta2 = 0.999
        self.batch_size = 64
        self.num_workers = 8
        self.ROUND = 4
        self.sample_step = 4000
        self.lr_decay_step = 1000
        self.num_iters_decay = 0
        self.max_len = 20
示例#2
0
    def forward(self, go, sent_len=None, bare=None):
        """

        :param go: shape = (n_batch, 16)
        :param sent_len: shape = (n_batch, ) or None
        :param bare: shape = (n_batch, 15) or None
        :return:
        """
        B = go.shape[0]

        if not self.training:
            # ----- Prior Network -----
            latent_vector = self.generate_gaussian(
                B)  # shape = (n_batch, latent_dim)

            # ----- Initial Decoding States -----
            assert self.enc_bi
            init_states = gpu_wrapper(
                torch.zeros([
                    self.enc_layers, B, self.n_dir * self.hid_dim
                ])).float()  # shape = (layers, n_batch, n_dir * hid_dim)

            return self.Decoder(init_states=init_states,
                                latent_vector=latent_vector,
                                helper=go)
        else:
            # ----- Encoding -----
            outputs, last_states = self.Encoder(bare, sent_len)
            # ext_outputs.shape = (n_batch, 15, n_dir * hid_dim)
            # last_states.shape = (layers * n_dir, n_batch, hid_dim)
            last_states = last_states.transpose(0, 1).contiguous().view(
                B, -1)  # shape = (n_batch, layers * n_dir * hid_dim)

            # ----- Posterior Network -----
            gaussian_dist, latent_vector = self.PosteriorGaussian(last_states)
            # latent_vector.shape = (n_batch, latent_dim)
            gaussian_dist_couple, _ = self.PosteriorGaussianCouple(last_states)

            # ----- Initial Decoding States -----
            assert self.enc_bi
            init_states = gpu_wrapper(
                torch.zeros([
                    self.enc_layers, B, self.n_dir * self.hid_dim
                ])).float()  # shape = (layers, n_batch, n_dir * hid_dim)

            init_input = self.toInit(
                latent_vector)  # shape = (n_batch, emb_dim)
            init_input_couple = self.toInitCouple(
                gaussian_dist_couple.mean)  # shape = (n_batch, emb_dim)

            logits = self.Decoder(init_states=init_states,
                                  init_input=init_input,
                                  helper=go)

            logits_couple = self.DecoderCouple(init_states=init_states,
                                               init_input=init_input_couple,
                                               helper=go)

            return logits, gaussian_dist, latent_vector, logits_couple, init_input, init_input_couple
示例#3
0
    def forward(self, go, sent_len=None, bare=None):
        """

        :param go: shape = (n_batch, 16)
        :param sent_len: shape = (n_batch, ) or None
        :param bare: shape = (n_batch, 15) or None
        :return:
        """
        B = go.shape[0]

        if not self.training:
            # ----- Prior Network -----
            latent_vector = self.generate_gaussian(B)  # shape = (n_batch, latent_dim)

            # ----- Initial Decoding States -----
            assert self.enc_bi
            init_states = gpu_wrapper(torch.zeros([self.enc_layers, B, self.n_dir * self.hid_dim])).float()  # shape = (layers, n_batch, n_dir * hid_dim)

            return self.Decoder(init_states=init_states,
                                latent_vector=latent_vector,
                                helper=go)
        else:
            # ----- Encoding -----
            outputs, last_states = self.Encoder(bare, sent_len)
            # ext_outputs.shape = (n_batch, 15, n_dir * hid_dim)
            # last_states.shape = (layers * n_dir, n_batch, hid_dim)
            last_states = last_states.transpose(0, 1).contiguous().view(B, -1)  # shape = (n_batch, layers * n_dir * hid_dim

            # ----- Posterior Network -----
            Q0, z0 = self.PosteriorGaussian(last_states)
            # z0.shape = (n_batch, latent_dim)
            Q0_couple, _ = self.PosteriorGaussianCouple(last_states)

            # ----- Flows -----
            zk, sum_log_jacobian = self.Flows(z0=z0, cond=last_states)
            # zk.shape = (n_batch, latent_dim)
            # sum_log_jacobian.shape = (n_batch, )
            zk_couple, _ = self.FlowsCouple(z0=Q0_couple.mean, cond=last_states)

            # ----- Bag-of-Words logits -----
            BoW_logits = self.BoW(zk)  # shape = (n_bathc, voc_size)

            # ----- Initial Decoding States -----
            assert self.enc_bi
            init_states = gpu_wrapper(torch.zeros([self.enc_layers, B, self.n_dir * self.hid_dim])).float()  # shape = (layers, n_batch, n_dir * hid_dim)

            init_input = self.toInit(zk)  # shape = (n_batch, emb_dim)
            init_input_couple = self.toInitCouple(zk_couple)  # shape = (n_batch, emb_dim)

            logits = self.Decoder(init_states=init_states,
                                  init_input=init_input,
                                  helper=go)

            logits_couple = self.DecoderCouple(init_states=init_states,
                                                   init_input=init_input_couple,
                                                   helper=go)

            return logits, Q0, z0, zk, sum_log_jacobian, BoW_logits, logits_couple, init_input, init_input_couple
示例#4
0
    def __init__(self, hid_dim, latent_dim, enc_layers, dec_layers, dropout,
                 enc_bi, dec_max_len, beam_size, WEAtt_type, encoder_emb,
                 decoder_emb, pad_id, n_flows, flow_type):
        super(VAE_NF, self).__init__()
        assert encoder_emb.num_embeddings == decoder_emb.num_embeddings
        assert encoder_emb.embedding_dim == decoder_emb.embedding_dim
        self.voc_size = encoder_emb.num_embeddings
        self.emb_dim = encoder_emb.embedding_dim
        self.hid_dim = hid_dim
        self.enc_layers = enc_layers
        self.dec_layers = dec_layers
        self.dropout = dropout
        self.enc_bi = enc_bi
        self.n_dir = 2 if self.enc_bi else 1
        self.dec_max_len = dec_max_len
        self.beam_size = beam_size
        self.WEAtt_type = WEAtt_type
        self.latent_dim = latent_dim
        self.n_flows = n_flows
        self.flow_type = flow_type

        self.Encoder = Encoder(emb_dim=self.emb_dim,
                               hid_dim=self.hid_dim,
                               n_layer=self.enc_layers,
                               dropout=self.dropout,
                               bi=self.enc_bi,
                               embedding=encoder_emb)
        self.PriorGaussian = torch.distributions.Normal(
            gpu_wrapper(torch.zeros(self.latent_dim)),
            gpu_wrapper(torch.ones(self.latent_dim)))
        self.PosteriorGaussian = Gaussian(in_dim=self.hid_dim * self.n_dir *
                                          self.enc_layers,
                                          out_dim=self.latent_dim)
        self.Decoder = Decoder(voc_size=self.voc_size,
                               latent_dim=self.latent_dim,
                               emb_dim=self.emb_dim,
                               hid_dim=self.hid_dim * self.n_dir,
                               n_layer=self.dec_layers,
                               dropout=self.dropout,
                               max_len=self.dec_max_len,
                               beam_size=self.beam_size,
                               WEAtt_type=self.WEAtt_type,
                               embedding=decoder_emb)
        self.BoW = nn.Linear(self.latent_dim, self.voc_size)
        self.Flows = NormalizingFlows(cond_dim=self.hid_dim * self.n_dir *
                                      self.enc_layers,
                                      latent_dim=self.latent_dim,
                                      n_flows=self.n_flows,
                                      flow_type=self.flow_type)

        self.criterionSeq = SeqLoss(voc_size=self.voc_size,
                                    pad=pad_id,
                                    end=None,
                                    unk=None)
示例#5
0
    def forward(self, go, sent_len=None, bare=None):
        """

        :param go: shape = (n_batch, 16)
        :param sent_len: shape = (n_batch, ) or None
        :param bare: shape = (n_batch, 15) or None
        :return:
        """
        B = go.shape[0]

        if not self.training:
            # ----- Prior Network -----
            latent_vector = self.generate_gaussian(
                B)  # shape = (n_batch, latent_dim)

            # ----- Initial Decoding States -----
            assert self.enc_bi
            init_states = gpu_wrapper(
                torch.zeros([
                    self.enc_layers, B, self.n_dir * self.hid_dim
                ])).float()  # shape = (layers, n_batch, n_dir * hid_dim)

            return self.Decoder(init_states=init_states,
                                latent_vector=latent_vector,
                                helper=go)
        else:
            # ----- Encoding -----
            outputs, last_states = self.Encoder(bare, sent_len)
            # ext_outputs.shape = (n_batch, 15, n_dir * hid_dim)
            # last_states.shape = (layers * n_dir, n_batch, hid_dim)
            last_states = last_states.transpose(0, 1).contiguous().view(
                B, -1)  # shape = (n_batch, layers * n_dir * hid_dim

            # ----- Posterior Network -----
            Q0, z0 = self.PosteriorGaussian(last_states)
            # z0.shape = (n_batch, latent_dim)

            # ----- Flows -----
            zk, sum_log_jacobian = self.Flows(z0=z0, cond=last_states)
            # zk.shape = (n_batch, latent_dim)
            # sum_log_jacobian.shape = (n_batch, )

            # ----- Initial Decoding States -----
            assert self.enc_bi
            init_states = gpu_wrapper(
                torch.zeros([
                    self.enc_layers, B, self.n_dir * self.hid_dim
                ])).float()  # shape = (layers, n_batch, n_dir * hid_dim)

            return self.Decoder(init_states=init_states,
                                latent_vector=zk,
                                helper=go), Q0, z0, zk, sum_log_jacobian
示例#6
0
    def class_score(self, sents, labels):
        """

        :param sents: [[str x T] x N]
        :param labels: [int x N]
        :return: float, accuracy of classification.
        """
        self.C.train(mode=False)
        self.Emb.train(mode=False)
        with torch.no_grad():
            _size = 0
            _batch = []
            preds = []
            for sent in sents:
                _size += 1
                l = len(sent)
                if l > self.max_len:
                    sent = sent[:self.max_len]
                sent_id = [self.vocab.word2id[w] for w in sent]
                padding = [self.vocab.word2id['<pad>']] * (self.max_len - l)
                bare = gpu_wrapper(torch.LongTensor(sent_id +
                                                    padding))  # shape = (20, )
                _batch.append(bare)
                if _size == self.batch_size:
                    _size = 0
                    batch = torch.stack(_batch, dim=0)  # shape = (n_batch, 20)
                    emb = self.Emb(batch)  # shape = (n_batch, 20, emb_dim)
                    cls = self.C(emb).squeeze(1)  # shape = (n_batch, )
                    pred = (cls > 0.5).float()  # shape = (n_batch, )
                    preds.append(pred)
                    _batch = []
            if _size != 0:
                batch = torch.stack(_batch, dim=0)  # shape = (n_batch, 20)
                emb = self.Emb(batch)  # shape = (n_batch, 20, emb_dim)
                cls = self.C(emb).squeeze(1)  # shape = (n_batch, )
                pred = (cls > 0.5).float()  # shape = (n_batch, )
                preds.append(pred)
            preds = torch.cat(preds, dim=0)  # shape = (N, )
            # print(' '.join([str(int(_)) for _ in preds]))
            labels = gpu_wrapper(
                torch.tensor(np.array(labels,
                                      dtype=np.float32)))  # shape = (N, )
            # print(preds)
            # print(labels)
            assert preds.shape[0] == labels.shape[0]
            n_wrong = torch.abs(preds - labels).sum().item()
            n_all = preds.shape[0]

        self.C.train(mode=True)
        self.Emb.train(mode=True)
        return (n_all - n_wrong) / n_all
示例#7
0
    def __sample_w_rej(self, shape):
        c = torch.sqrt((4 * (self.scale ** 2)) + (self.__m - 1) ** 2)
        b_true = (-2 * self.scale + c) / (self.__m - 1)

        # using Taylor approximation with a smooth swift from 10 < scale < 11
        # to avoid numerical errors for large scale
        b_app = (self.__m - 1) / (4 * self.scale)
        s = torch.min(torch.max(gpu_wrapper(torch.tensor([0.])), self.scale - 10), gpu_wrapper(torch.tensor([1.])))
        b = b_app * s + b_true * (1 - s)

        a = (self.__m - 1 + 2 * self.scale + c) / 4
        d = (4 * a * b) / (1 + b) - (self.__m - 1) * np.log(self.__m - 1)

        self.__b, (self.__e, self.__w) = b, self.__while_loop(b, a, d, shape)
        return self.__w
示例#8
0
    def test_lm(self, go, sent_len, bare, eos, n_sample):
        B = go.shape[0]

        # ----- Encoding -----
        outputs, last_states = self.Encoder(bare, sent_len)
        # ext_outputs.shape = (n_batch, 15, n_dir * hid_dim)
        # last_states.shape = (layers * n_dir, n_batch, hid_dim)
        latent_vector = self.toLatent(
            last_states.transpose(0, 1).contiguous().view(
                B, -1))  # shape = (n_batch, latent_dim)

        # ----- Initial Decoding States -----
        assert self.enc_bi
        init_states = gpu_wrapper(
            torch.zeros([
                self.enc_layers, B, self.n_dir * self.hid_dim
            ])).float()  # shape = (layers, n_batch, n_dir * hid_dim)

        logits = self.Decoder(init_states=init_states,
                              latent_vector=latent_vector,
                              helper=go,
                              test_lm=True)  # shape = (n_batch, 16, V)
        xent = self.criterionSeq(logits, eos,
                                 keep_batch=True)  # shape = (n_batch, )
        kl = torch.zeros_like(xent) + float('inf')  # shape = (n_batch, )

        nll = xent + kl  # shape = (n_batch, )

        return xent, nll, kl, latent_vector
示例#9
0
    def saliency(self, go, sent_len=None, bare=None):
        B = go.shape[0]

        # ----- Encoding -----
        outputs, last_states = self.Encoder(bare, sent_len)
        # ext_outputs.shape = (n_batch, 15, n_dir * hid_dim)
        # last_states.shape = (layers * n_dir, n_batch, hid_dim)
        last_states = last_states.transpose(0, 1).contiguous().view(
            B, -1)  # shape = (n_batch, layers * n_dir * hid_dim)

        # ----- Posterior Network -----
        gaussian_dist, latent_vector = self.PosteriorGaussian(last_states)
        # latent_vector.shape = (n_batch, latent_dim)

        # ----- Bag-of-Words logits -----
        BoW_logits = self.BoW(latent_vector)  # shape = (n_bathc, voc_size)

        # ----- Initial Decoding States -----
        assert self.enc_bi
        init_states = gpu_wrapper(
            torch.zeros([
                self.enc_layers, B, self.n_dir * self.hid_dim
            ])).float()  # shape = (layers, n_batch, n_dir * hid_dim)

        logits = self.Decoder(init_states=init_states,
                              latent_vector=latent_vector,
                              helper=go)

        return logits, gaussian_dist, self.Decoder.toInit(
            latent_vector), last_states
示例#10
0
    def test_lm(self, post_bare, post_len, resp_go, resp_len, resp_bare,
                resp_eos, n_sample):
        B = post_bare.shape[0]

        # ----- Post Encoding -----
        post_outputs, post_last_states = self.PostEncoder(post_bare, post_len)
        # post_outputs.shape = (n_batch, 15, n_dir * hid_dim)
        # post_last_states.shape = (layers * n_dir, n_batch, hid_dim)
        post_last_states = post_last_states.transpose(0, 1).contiguous().view(
            B, -1)  # shape = (n_batch, layers * n_dir * hid_dim)
        post_repr = self.PostRepr(
            post_last_states)  # shape = (n_batch, emb_dim)

        # ----- Initial Decoding States -----
        assert self.enc_bi
        init_states = gpu_wrapper(
            torch.zeros([
                self.enc_layers, B, self.n_dir * self.hid_dim
            ])).float()  # shape = (layers, n_batch, n_dir * hid_dim)

        logits = self.Decoder(init_states=init_states,
                              post_repr=post_repr,
                              latent_vector=None,
                              helper=resp_go,
                              test_lm=True)

        # ----- Importance sampling estimation -----
        xent = self.criterionSeq(logits, resp_eos,
                                 keep_batch=True)  # shape = (n_batch, )
        nll = xent

        return xent, nll, torch.zeros_like(xent)
示例#11
0
    def sample_from_prior(self, post_bare, post_len, resp_go):
        """

        :param go: shape = (n_batch, 16)
        :return:
        """
        B = resp_go.shape[0]

        # ----- Post Encoding -----
        post_outputs, post_last_states = self.PostEncoder(post_bare, post_len)
        # post_outputs.shape = (n_batch, 15, n_dir * hid_dim)
        # post_last_states.shape = (layers * n_dir, n_batch, hid_dim)
        post_last_states = post_last_states.transpose(0, 1).contiguous().view(
            B, -1)  # shape = (n_batch, layers * n_dir * hid_dim)
        post_repr = self.PostRepr(
            post_last_states)  # shape = (n_batch, emb_dim)

        # ----- Initial Decoding States -----
        assert self.enc_bi
        init_states = gpu_wrapper(
            torch.zeros([
                self.enc_layers, B, self.n_dir * self.hid_dim
            ])).float()  # shape = (layers, n_batch, n_dir * hid_dim)

        preds = self.Decoder(init_states=init_states,
                             post_repr=post_repr,
                             latent_vector=None,
                             helper=resp_go)
        return preds
示例#12
0
 def __init__(self, voc_size, pad, end, unk):
     super(SeqLoss, self).__init__()
     self.voc_size = voc_size
     self.word_weight = gpu_wrapper(torch.ones(voc_size))
     self.word_weight[pad] = 0.
     self.word_weight[end] = 1.0
     self.word_weight[unk] = 1.0
示例#13
0
    def forward(self, logits, gts, keep_batch=False):
        """
        :param logits: (?, T, V)
        :param gts: (?, T)
        :param keep_batch: bool.
        :return: Scalar or (?).
        """
        if logits.shape[0] == 0:
            assert gts.shape[0] == 0
            return gpu_wrapper(torch.FloatTensor([0])).squeeze(0)

        assert logits.shape[:-1] == gts.shape
        if not keep_batch:
            xent = F.cross_entropy(input=logits.contiguous().view(
                -1, self.voc_size),
                                   target=gts.view(-1),
                                   weight=self.word_weight)
            return xent
        else:
            T = logits.shape[-2]
            stuct_shape = list(logits.shape[:-2])
            xent = F.cross_entropy(input=logits.contiguous().view(
                -1, self.voc_size),
                                   target=gts.view(-1),
                                   weight=self.word_weight,
                                   reduction='none')
            xent = xent.view(stuct_shape + [T])  # shape = (?, T)
            xent = xent.sum(-1)  # shape = (?)
            return xent
示例#14
0
    def importance_sampling_mi(self, vmf_dist, n_sample):
        assert n_sample % _n_sample == 0

        B = vmf_dist.mean.shape[0]

        samplify = {
            'log_qz': [],
            'log_qzx': [],
            'z': []
        }
        for sample_id in range(n_sample // _n_sample):
            # ----- Sampling -----
            _z = vmf_dist.rsample(torch.Size([_n_sample]))  # shape = (_n_sample, n_batch, latent_dim)
            assert tuple(_z.shape) == (_n_sample, B, self.latent_dim)

            _log_qzx = vmf_dist.log_prob(_z)  # shape = (_n_sample, n_batch)
            _log_qz = vmf_dist.log_prob(_z.unsqueeze(2).expand(-1, -1, B, -1))  # shape = (_n_sample, n_batch, n_batch)
            # Exclude itself.
            _log_qz.masked_fill_(gpu_wrapper(torch.eye(B).long()).eq(1).unsqueeze(0).expand(_n_sample, -1, -1), -float('inf'))  # shape = (_n_sample, n_batch, n_batch)
            _log_qz = (log_sum_exp(_log_qz, dim=2) - np.log(B - 1))  # shape = (_n_sample, n_batch)

            samplify['log_qzx'].append(_log_qzx)  # shape = (_n_sample, n_batch)
            samplify['log_qz'].append(_log_qz)  # shape = (_n_sample, n_batch)
            samplify['z'].append(_z)  # shape = (_n_sample, n_batch, out_dim)

        for key in samplify.keys():
            samplify[key] = torch.cat(samplify[key], dim=0)  # shape = (n_sample, ?)

        # ----- Importance sampling for MI -----
        mi = samplify['log_qzx'].mean(0) - samplify['log_qz'].mean(0)

        return mi, samplify['z'].transpose(0, 1)
示例#15
0
    def __init__(self, loc, scale, validate_args=None):
        self.dtype = loc.dtype
        self.loc = loc
        self.scale = scale
        self.__m = loc.shape[-1]
        self.__e1 = gpu_wrapper(torch.Tensor([1.] + [0] * (loc.shape[-1] - 1)))

        super(VonMisesFisher, self).__init__(self.loc.size(), validate_args=validate_args)
示例#16
0
 def forward(self, input, target_is_real):
     """Note that another implementation is available for max-entropy aimed generator"""
     if self.gan_type == 'LSGAN':
         if target_is_real:
             return torch.pow(torch.sigmoid(input) - 1, 2).mean()
         else:
             return torch.pow(torch.sigmoid(input), 2).mean()
     elif self.gan_type == 'vanillaGAN':
         input = input.view(-1)
         if target_is_real:
             return F.binary_cross_entropy_with_logits(
                 input, gpu_wrapper(Variable(torch.ones(input.shape[0]))))
         else:
             return F.binary_cross_entropy_with_logits(
                 input, gpu_wrapper(Variable(torch.zeros(input.shape[0]))))
     else:
         raise ValueError()
示例#17
0
    def preprocess_data(self, data):
        bare_0, go_0, eos_0, len_0, bare_1, go_1, eos_1, len_1 = data
        n_batch = bare_0.shape[0]

        bare_0 = gpu_wrapper(bare_0)  # shape = (n_batch, 20)
        go_0 = gpu_wrapper(go_0)  # shape = (n_batch, 21)
        eos_0 = gpu_wrapper(eos_0)  # shape = (n_batch, 21)
        len_0 = gpu_wrapper(len_0)  # shape = (n_batch, )
        label_0 = gpu_wrapper(torch.zeros(n_batch))  # shape = (n_batch, )

        bare_1 = gpu_wrapper(bare_1)  # shape = (n_batch, 20)
        go_1 = gpu_wrapper(go_1)  # shape = (n_batch, 21)
        eos_1 = gpu_wrapper(eos_1)  # shape = (n_batch, 21)
        len_1 = gpu_wrapper(len_1)  # shape = (n_batch, )
        label_1 = gpu_wrapper(torch.ones(n_batch))  # shape = (n_batch, )

        return bare_0, go_0, eos_0, len_0, label_0, bare_1, go_1, eos_1, len_1, label_1
示例#18
0
    def gen_interps(self, bareA, sent_lenA, bareB, sent_lenB, go, n_interps):
        """

        :param bareA: shape = (n_batch, 15)
        :param sent_lenA: shape = (n_batch, )
        :param bareB: shape = (n_batch, 15)
        :param sent_lenB: shape = (n_batch, )
        :param go: shape = (n_batch, 16)
        :param n_interps: int.
        :return:
        """
        B = go.shape[0]

        # ---------- A ----------
        # ----- Encoding -----
        _, last_statesA = self.Encoder(bareA, sent_lenA)
        # _.shape = (n_batch, 15, n_dir * hid_dim)
        # last_statesA.shape = (layers * n_dir, n_batch, hid_dim)
        last_statesA = last_statesA.transpose(0, 1).contiguous().view(
            B, -1)  # shape = (n_batch, layers * n_dir * hid_dim)

        # ----- Posterior Network -----
        gaussA, _ = self.PosteriorGaussian(last_statesA)
        z0A = gaussA.mean
        # z0A.shape = (n_batch, latent_dim)

        # ---------- B ----------
        # ----- Encoding -----
        _, last_statesB = self.Encoder(bareB, sent_lenB)
        # _.shape = (n_batch, 15, n_dir * hid_dim)
        # last_statesB.shape = (layers * n_dir, n_batch, hid_dim)
        last_statesB = last_statesB.transpose(0, 1).contiguous().view(
            B, -1)  # shape = (n_batch, layers * n_dir * hid_dim)

        # ----- Posterior Network -----
        gaussB, _ = self.PosteriorGaussian(last_statesB)
        z0B = gaussB.mean
        # z0B.shape = (n_batch, latent_dim)

        # ----- Initial Decoding States -----
        assert self.enc_bi
        init_states = gpu_wrapper(
            torch.zeros([
                self.enc_layers, B, self.n_dir * self.hid_dim
            ])).float()  # shape = (layers, n_batch, n_dir * hid_dim)

        interps = [[] for _ in range(B)]
        for in_id in range(n_interps + 2):
            _zk = z0A * ((n_interps - in_id + 1) / (n_interps + 1)) + z0B * (
                in_id / (n_interps + 1))  # shape = (n_batch, latent_dim)
            _init_input = self.toInit(_zk)  # shape = (n_batch, emb_dim)
            _interp = self.Decoder(init_states=init_states,
                                   init_input=_init_input,
                                   helper=go)
            for b_id, _b_interp in enumerate(_interp):
                interps[b_id].append(_b_interp)
        return interps
示例#19
0
    def forward(self, post_bare, post_len, resp_go, resp_len, resp_bare):
        """

        :param post_bare: shape = (n_batch, 15)
        :param post_len: shape = (n_batch, )
        :param resp_go: shape = (n_batch, 16)
        :param resp_len: shape = (n_batch, 15)
        :param resp_bare: shape = (n_batch, 15)
        :return:
        """
        B = resp_go.shape[0]

        if not self.training:
            raise NotImplementedError()
        else:
            # ----- Post Encoding -----
            post_outputs, post_last_states = self.PostEncoder(
                post_bare, post_len)
            # post_outputs.shape = (n_batch, 15, n_dir * hid_dim)
            # post_last_states.shape = (layers * n_dir, n_batch, hid_dim)
            post_last_states = post_last_states.transpose(
                0, 1).contiguous().view(
                    B, -1)  # shape = (n_batch, layers * n_dir * hid_dim)
            post_repr = self.PostRepr(
                post_last_states)  # shape = (n_batch, emb_dim)

            # ----- Response Encoding -----
            _, resp_last_states = self.RespEncoder(resp_bare, resp_len)
            # resp_outputs.shape = (n_batch, 15, n_dir * hid_dim)
            # resp_last_states.shape = (layers * n_dir, n_batch, hid_dim)
            resp_last_states = resp_last_states.transpose(
                0, 1).contiguous().view(
                    B, -1)  # shape = (n_batch, layers * n_dir * hid_dim)

            # ----- Prior Network -----
            prior_dist, prior_latent = self.PriorGaussian(post_last_states)
            # prior_latent.shape = (n_batch, hid_dim)

            # ----- Posterior Network -----
            posterior_dist, posterior_latent = self.PosteriorGaussian(
                torch.cat([resp_last_states, post_last_states], dim=1))
            # posterior_latent.shape = (n_batch, hid_dim)

            # ----- Initial Decoding States -----
            assert self.enc_bi
            init_states = gpu_wrapper(
                torch.zeros([
                    self.enc_layers, B, self.n_dir * self.hid_dim
                ])).float()  # shape = (layers, n_batch, n_dir * hid_dim)

            return self.Decoder(init_states=init_states,
                                post_repr=post_repr,
                                latent_vector=posterior_latent,
                                helper=resp_go), prior_dist, posterior_dist
示例#20
0
    def forward(self, go, sent_len=None, bare=None):
        """

        :param go: shape = (n_batch, 16)
        :param sent_len: shape = (n_batch, ) or None
        :param bare: shape = (n_batch, 15) or None
        :return:
        """
        B = go.shape[0]

        if not self.training:
            # ----- Prior Network -----
            latent_vector = self.generate_uniform(B)  # shape = (n_batch, latent_dim)

            # ----- Initial Decoding States -----
            assert self.enc_bi
            init_states = gpu_wrapper(torch.zeros([self.enc_layers, B, self.n_dir * self.hid_dim])).float()  # shape = (layers, n_batch, n_dir * hid_dim)

            return self.Decoder(init_states=init_states,
                                latent_vector=latent_vector,
                                helper=go)
        else:
            # ----- Encoding -----
            outputs, last_states = self.Encoder(bare, sent_len)
            # ext_outputs.shape = (n_batch, 15, n_dir * hid_dim)
            # last_states.shape = (layers * n_dir, n_batch, hid_dim)
            last_states = last_states.transpose(0, 1).contiguous().view(B, -1)  # shape = (n_batch, layers * n_dir * hid_dim

            # ----- Posterior Network -----
            vmf_dist, latent_vector = self.PosteriorVMF(last_states)
            # latent_vector.shape = (n_batch, latent_dim)

            prior_unif = HypersphericalUniform(dim=self.latent_dim)

            # ----- Initial Decoding States -----
            assert self.enc_bi
            init_states = gpu_wrapper(torch.zeros([self.enc_layers, B, self.n_dir * self.hid_dim])).float()  # shape = (layers, n_batch, n_dir * hid_dim)

            return self.Decoder(init_states=init_states,
                                latent_vector=latent_vector,
                                helper=go), vmf_dist, prior_unif
示例#21
0
    def preprocess_data(self, data):
        bare_0, go_0, eos_0, len_0, bare_1, go_1, eos_1, len_1 = data
        n_batch = bare_0.shape[0]

        s_idx_0 = [ix for ix, l in sorted(enumerate(len_0), key=lambda x: x[1], reverse=True)]
        res_idx_0 = [a for a, b in sorted(enumerate(s_idx_0), key=lambda x: x[1])]
        bare_0 = gpu_wrapper(bare_0[s_idx_0, :])
        go_0 = gpu_wrapper(go_0[s_idx_0, :])
        eos_0 = gpu_wrapper(eos_0[s_idx_0, :])
        len_0 = gpu_wrapper(len_0[s_idx_0])
        y_0 = gpu_wrapper(torch.zeros(n_batch))

        s_idx_1 = [ix for ix, l in sorted(enumerate(len_1), key=lambda x: x[1], reverse=True)]
        res_idx_1 = [a for a, b in sorted(enumerate(s_idx_1), key=lambda x: x[1])]
        bare_1 = gpu_wrapper(bare_1[s_idx_1, :])
        go_1 = gpu_wrapper(go_1[s_idx_1, :])
        eos_1 = gpu_wrapper(eos_1[s_idx_1, :])
        len_1 = gpu_wrapper(len_1[s_idx_1])
        y_1 = gpu_wrapper(torch.ones(n_batch))

        return bare_0, go_0, eos_0, len_0, y_0, res_idx_0, bare_1, go_1, eos_1, len_1, y_1, res_idx_1
示例#22
0
    def importance_sampling(self, vmf_dist, go, eos, n_sample):
        B = go.shape[0]
        assert n_sample % _n_sample == 0

        samplify = {
            'xent': [],
            'log_pz': [],
            'log_pxz': [],
            'log_qzx': [],
            'z': []
        }
        for sample_id in range(n_sample // _n_sample):

            # ----- Sampling -----
            _z = vmf_dist.rsample(torch.Size([_n_sample]))  # shape = (_n_sample, n_batch, latent_dim)
            assert tuple(_z.shape) == (_n_sample, B, self.latent_dim)

            # ----- Initial Decoding States -----
            assert self.enc_bi
            _init_states = gpu_wrapper(torch.zeros([self.enc_layers, _n_sample * B, self.n_dir * self.hid_dim])).float()  # shape = (layers, _n_sample * n_batch, n_dir * hid_dim)

            _init_input = self.toInit(_z)  # shape = (_n_sample, n_batch, emb_dim)

            # ----- Importance sampling for NLL -----
            _logits = self.Decoder(init_states=_init_states,  # shape = (layers, _n_sample * n_batch, n_dir * hid_dim)
                                   init_input=_init_input.view(_n_sample * B, self.emb_dim),  # shape = (_n_sample * n_batch, out_dim)
                                   helper=go.unsqueeze(0).expand(_n_sample, -1, -1).contiguous().view(_n_sample * B, -1),  # shape = (_n_sample * n_batch, 15)
                                   test_lm=True)  # shape = (_n_sample * n_batch, 16, V)
            _xent = self.criterionSeq(_logits,  # shape = (_n_sample * n_batch, 16, V)
                                      eos.unsqueeze(0).expand(_n_sample, -1, -1).contiguous().view(_n_sample * B, -1),  # shape = (_n_sample * n_batch, 16)
                                      keep_batch=True).view(_n_sample, B)  # shape = (_n_sample, n_batch)

            _log_pz = self.PriorUniform.log_prob(_z)  # shape = (_n_sample, n_batch)
            _log_pxz = - _xent  # shape = (_n_sample, n_batch)
            _log_qzx = vmf_dist.log_prob(_z)  # shape = (_n_sample, n_batch)

            samplify['xent'].append(_xent)  # shape = (_n_sample, n_batch)
            samplify['log_pz'].append(_log_pz)  # shape = (_n_sample, n_batch)
            samplify['log_pxz'].append(_log_pxz)  # shape = (_n_sample, n_batch)
            samplify['log_qzx'].append(_log_qzx)  # shape = (_n_sample, n_batch)
            samplify['z'].append(_z)  # shape = (_n_sample, n_batch, out_dim)

        for key in samplify.keys():
            samplify[key] = torch.cat(samplify[key], dim=0)  # shape = (n_sample, ?)

        ll = log_sum_exp(samplify['log_pz'] + samplify['log_pxz'] - samplify['log_qzx'], dim=0) - np.log(n_sample)  # shape = (n_batch, )
        nll = - ll  # shape = (n_batch, )

        # ----- Importance sampling for KL -----
        kl = (samplify['log_qzx'] - samplify['log_pz']).mean(0)  # shape = (n_batch, )

        return samplify['xent'].mean(0), nll, kl, samplify['z'].transpose(0, 1)
示例#23
0
    def forward(self, input, target_is_real):

        if self.gan_type == 'LSGAN':
            if target_is_real:
                return torch.pow(F.sigmoid(input) - 1, 2).mean()
            else:
                return torch.pow(F.sigmoid(input), 2).mean()
        elif self.gan_type == 'vanillaGAN':
            input = input.view(-1)
            if target_is_real:
                return F.binary_cross_entropy_with_logits(input,
                                                          gpu_wrapper(Variable(torch.ones(input.shape[0]))))
            else:
                return F.binary_cross_entropy_with_logits(input,
                                                          gpu_wrapper(Variable(torch.zeros(input.shape[0]))))
        elif self.gan_type == 'WGAN_hinge':
            if target_is_real:
                return F.relu(1.0 - input).mean()
            else:
                return F.relu(input + 1.0).mean()
        else:
            raise ValueError()
示例#24
0
    def decode_from(self, latents, go):
        """

        :param latents: shape = (n_batch, latent_dim)
        :param go: shape = (n_batch, 16)
        :return:
        """
        B = latents.shape[0]

        init_states = gpu_wrapper(torch.zeros([self.enc_layers, B, self.n_dir * self.hid_dim])).float()  # shape = (layers, n_batch, n_dir * hid_dim)

        return self.Decoder(latent_vector=latents,
                            helper=go)
示例#25
0
    def rsample(self, shape=torch.Size()):
        shape = shape if isinstance(shape, torch.Size) else torch.Size([shape])

        w = self.__sample_w3(shape=shape) if self.__m == 3 else self.__sample_w_rej(shape=shape)

        v = (gpu_wrapper(torch.distributions.Normal(0, 1).sample(
            shape + torch.Size(self.loc.shape))).transpose(0, -1)[1:]).transpose(0, -1)
        v = v / v.norm(dim=-1, keepdim=True)

        w_ = torch.sqrt(torch.clamp(1 - (w ** 2), 1e-10))
        x = torch.cat((w, w_ * v), -1)
        z = self.__householder_rotation(x)

        return z.type(self.dtype)
示例#26
0
    def __while_loop(self, b, a, d, shape):

        b, a, d = [e.repeat(*shape, *([1] * len(self.scale.shape))) for e in (b, a, d)]
        w, e, bool_mask = torch.zeros_like(b), torch.zeros_like(b), (torch.ones_like(b) == 1)

        shape = shape + torch.Size(self.scale.shape)

        while bool_mask.sum() != 0:
            e_ = gpu_wrapper(torch.distributions.Beta((self.__m - 1) / 2, (self.__m - 1) / 2).sample(shape[:-1]).reshape(shape))
            u = gpu_wrapper(torch.distributions.Uniform(0, 1).sample(shape))

            w_ = (1 - (1 + b) * e_) / (1 - (1 - b) * e_)
            t = (2 * a * b) / (1 - (1 - b) * e_)

            accept = ((self.__m - 1) * t.log() - t + d) > torch.log(u)
            reject = 1 - accept

            w[bool_mask * accept] = w_[bool_mask * accept]
            e[bool_mask * accept] = e_[bool_mask * accept]

            bool_mask[bool_mask * accept] = reject[bool_mask * accept]

        return e, w
示例#27
0
    def importance_sampling_mi(self, Q0, last_states, n_sample):
        assert n_sample % _n_sample == 0

        B = Q0.mean.shape[0]

        samplify = {
            'log_qz': [],
            'log_qzx': [],
            'z': []
        }
        for sample_id in range(n_sample // _n_sample):
            # ----- Sampling -----
            _z0 = Q0.rsample(torch.Size([_n_sample]))  # shape = (_n_sample, n_batch, out_dim)
            assert tuple(_z0.shape) == (_n_sample, B, self.latent_dim)

            # ----- Flows -----
            _zk, _sum_log_jacobian = self.Flows(z0=_z0.contiguous().view(_n_sample * B, self.latent_dim), # shape = (_n_sample * n_batch, out_dim)
                                                cond=last_states.unsqueeze(0).expand(_n_sample, -1, -1).contiguous().view(_n_sample * B, -1)
                                                # shape = (_n_sample * n_batch, layers * n_dir * hid_dim)
                                                )
            # _zk.shape = (_n_sample * n_batch, latent_dim)
            # _sum_log_jacobian.shape = (_n_sample * n_batch, )
            _zk = _zk.view(_n_sample, B, self.latent_dim)  # shape = (_n_sample, n_batch, latent_dim)
            _sum_log_jacobian = _sum_log_jacobian.view(_n_sample, B)  # shape = (_n_sample, n_batch)

            # ----- Flows for the aggregate posterior -----
            _, _sum_log_jacobian_batch = self.Flows(z0=_z0.unsqueeze(2).expand(-1, -1, B, -1).contiguous().view(_n_sample * B * B, self.latent_dim),  # shape = (_n_sample * n_batch * n_batch, out_dim)
                                                    cond=last_states.unsqueeze(0).unsqueeze(1).expand(_n_sample, B, -1, -1).contiguous().view(_n_sample * B * B, -1)  # shape = (_n_sample * n_batch * n_batch, layers * n_dir * hid_dim)
                                                    )
            # _sum_log_jacobian_batch.shape = (_n_sample * n_batch * n_batch, )
            _sum_log_jacobian_batch = _sum_log_jacobian_batch.view(_n_sample, B, B)  # shape = (_n_sample, n_batch, n_batch)

            _log_qzx = Q0.log_prob(_z0).sum(2) - _sum_log_jacobian  # shape = (_n_sample, n_batch)
            _log_qz = Q0.log_prob(_z0.unsqueeze(2).expand(-1, -1, B, -1)).sum(3) - _sum_log_jacobian_batch  # shape = (_n_sample, n_batch, n_batch)
            # Exclude itself.
            _log_qz.masked_fill_(gpu_wrapper(torch.eye(B).long()).eq(1).unsqueeze(0).expand(_n_sample, -1, -1), -float('inf'))  # shape = (_n_sample, n_batch, n_batch)
            _log_qz = (log_sum_exp(_log_qz, dim=2) - np.log(B - 1))  # shape = (_n_sample, n_batch)

            samplify['log_qzx'].append(_log_qzx)  # shape = (_n_sample, n_batch)
            samplify['log_qz'].append(_log_qz)  # shape = (_n_sample, n_batch)
            samplify['z'].append(_zk)  # shape = (_n_sample, n_batch, out_dim)

        for key in samplify.keys():
            samplify[key] = torch.cat(samplify[key], dim=0)  # shape = (n_sample, ?)

        # ----- Importance sampling for MI -----
        mi = samplify['log_qzx'].mean(0) - samplify['log_qz'].mean(0)

        return mi, samplify['z'].transpose(0, 1)
    def build(self):
        print('----- Loading language model data -----')
        self.train_set = Yelp('train', False, config.sentiment, config.direction)
        self.test_set = Yelp('test', False, config.sentiment, config.direction)
        self.val_set = Yelp('dev', False, config.sentiment, config.direction)

        self.ntokens = self.train_set.vocab.size
        self.go = self.train_set.go
        self.eos = self.train_set.eos
        self.pad = self.train_set.pad
        self.word_weight = gpu_wrapper(torch.ones(self.ntokens))
        self.word_weight[self.pad] = 0.

        self.model = MODEL.RNNModel(config.model, self.ntokens, config.emsize, config.nhid, config.nlayers,
                                    config.dropout, config.dropouth, config.dropouti, config.dropoute, config.wdrop,
                                    config.tied)
示例#29
0
    def forward(self, sample_probs, reward, mask=None):
        """

        :param sample_probs: shape = (n_batch, *)
        :param mask: shape = (n_batch, *) or None
        :param reward: shape = (n_batch, )
        :return:
        """
        if sample_probs is None:
            return gpu_wrapper(torch.zeros([1]).squeeze(0))
        sample_probs = sample_probs.contiguous().view(-1)
        sample_logprobs = torch.log(sample_probs)
        reward = reward.contiguous().view(-1)
        if mask is not None:
            mask = mask.float().contiguous().view(-1)
            output = -sample_logprobs * reward * mask
            output = torch.sum(output) / torch.sum(mask)
        else:
            output = -sample_logprobs * reward
            output = output.mean()
        return output
示例#30
0
    def sample_from_prior(self, go):
        """

        :param go: shape = (n_batch, 16)
        :return:
        """
        B = go.shape[0]

        # ----- Prior Network -----
        latent_vector = self.generate_gaussian(
            B)  # shape = (n_batch, latent_dim)

        # ----- Initial Decoding States -----
        assert self.enc_bi
        init_states = gpu_wrapper(
            torch.zeros([
                self.enc_layers, B, self.n_dir * self.hid_dim
            ])).float()  # shape = (layers, n_batch, n_dir * hid_dim)

        return self.Decoder(init_states=init_states,
                            latent_vector=latent_vector,
                            helper=go)