Exemplo n.º 1
0
    def forward(self, input, hidden, users, return_h=False):
        emb, mask = embedded_dropout(
            self.encoder, input, dropout=self.dropoute if self.training else 0)
        #emb = self.idrop(emb)

        emb = emb.cuda()
        emb = self.lockdrop(emb, self.dropouti)

        if self.use_ind:
            uemb = []
            for ik in range(len(users)):
                tuemb = []
                for qk in range(len(users[ik])):
                    uex, _ = embedded_dropout(
                        self.user_embed,
                        input[ik][qk],
                        dropout=self.dropoute if self.training else 0,
                        mask=mask)
                    tuemb.append(uex)
                uemb.append(torch.stack(tuemb))

            uemb = torch.stack(uemb).cuda()

            if self.induse == 'cat':
                raw_output = torch.cat([emb, uemb], 2)
            elif self.induse == 'sum':
                raw_output = emb + uemb
        else:
            uemb = None
            raw_output = emb

        new_hidden = []
        #raw_output, hidden = self.rnn(emb, hidden)
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output
            raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                #self.hdrop(raw_output)
                raw_output = self.lockdrop(raw_output, self.dropouth)
                outputs.append(raw_output)
        hidden = new_hidden

        output = self.lockdrop(raw_output, self.dropout)
        outputs.append(output)

        result = output.view(output.size(0) * output.size(1), output.size(2))
        if return_h:
            return result, hidden, raw_outputs, outputs
        return result, hidden
    def forward(self, inputWords, inputLang, hidden, return_h=False):
        wordEmb = embedded_dropout(
            self.word_encoder,
            inputWords,
            dropout=self.dropoute if self.training else 0)
        wordEmb = self.lockdrop(wordEmb, self.dropouti)

        langEmb = embedded_dropout(
            self.lang_encoder,
            inputLang,
            dropout=self.dropoute if self.training else 0)
        langEmb = self.lockdrop(langEmb, self.dropouti)

        if (self.useLangEncoder == True):
            raw_output = torch.cat((wordEmb, langEmb), 2)
        else:
            raw_output = wordEmb
        new_hidden = []
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output
            raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                raw_output = self.lockdrop(raw_output, self.dropouth)
                outputs.append(raw_output)
        hidden = new_hidden

        output = self.lockdrop(raw_output, self.dropout)
        outputs.append(output)

        predBasis = output.view(
            output.size(0) * output.size(1), output.size(2))

        langPred = self.langDecoder(predBasis)

        decoded = self.decoder(predBasis)

        if not self.langDecoderBias is None:
            biasTerm = self.langDecoderBias(langPred)
            decoded += biasTerm

        result = decoded.view(output.size(0), output.size(1), decoded.size(1))
        langResult = langPred.view(output.size(0), output.size(1),
                                   langPred.size(1))

        if return_h:
            return result, langResult, hidden, raw_outputs, outputs
        return result, langResult, hidden
    def forward(self, input, hidden, return_h=False):
        emb = embedded_dropout(
            self.encoder, input,
            dropout=self.dropoute if self.training else 0)  #dropoute=0.1
        #emb = self.idrop(emb)

        emb = self.lockdrop(emb, self.dropouti)  #dropouti=0.65

        raw_output = emb
        new_hidden = []
        #raw_output, hidden = self.rnn(emb, hidden)
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output
            raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                #self.hdrop(raw_output)
                raw_output = self.lockdrop(raw_output,
                                           self.dropouth)  #dropouth=0.3
                outputs.append(raw_output)
        hidden = new_hidden

        output = self.lockdrop(
            raw_output, self.dropout
        )  #output dropout=0.4 (not mentioned in Lookahead paper)
        outputs.append(output)

        result = output.view(output.size(0) * output.size(1), output.size(2))
        if return_h:  #only on training
            return result, hidden, raw_outputs, outputs
        return result, hidden
Exemplo n.º 4
0
    def forward(self,
                input,
                hidden,
                return_h=False,
                return_prob=False,
                detach=False):
        batch_size = input.size(1)

        emb = embedded_dropout(self.encoder,
                               input,
                               dropout=self.dropoute if self.training else 0)
        #emb = self.idrop(emb)

        emb = self.lockdrop(emb, self.dropouti)

        raw_output = emb
        new_hidden = []
        #raw_output, hidden = self.rnn(emb, hidden)
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output
            raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                #self.hdrop(raw_output)
                raw_output = self.lockdrop(raw_output, self.dropouth)
                outputs.append(raw_output)
        hidden = new_hidden

        output = self.lockdrop(raw_output, self.dropout)
        # output = self.surrogate_net(output)

        outputs.append(output)
        if detach:
            output = output.detach()

        latent = self.latent(output)
        latent = self.lockdrop(latent, self.dropoutl)
        logit = self.decoder(latent.view(-1, self.ninp))

        prior_logit = self.prior(output).contiguous().view(-1, self.n_experts)
        prior = nn.functional.softmax(prior_logit)

        prob = nn.functional.softmax(logit.view(-1, self.ntoken)).view(
            -1, self.n_experts, self.ntoken)
        prob = (prob * prior.unsqueeze(2).expand_as(prob)).sum(1)

        if return_prob:
            model_output = prob
        else:
            log_prob = torch.log(prob.add_(1e-8))
            model_output = log_prob

        model_output = model_output.view(-1, batch_size, self.ntoken)

        if return_h:
            return model_output, hidden, raw_outputs, outputs
        return model_output, hidden
Exemplo n.º 5
0
    def forward(self, input, hidden, v=None, s=None, return_h=False):
        emb = embedded_dropout(self.encoder,
                               input,
                               dropout=self.dropoute if self.training else 0)
        #emb = self.idrop(emb)

        emb = self.lockdrop(emb, self.dropouti)

        raw_output = emb
        new_hidden = []
        new_velocity = []
        new_scale = []
        #raw_output, hidden = self.rnn(emb, hidden)
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output
            if self.rnn_type == 'MLSTM' or self.rnn_type == 'NLSTM':
                raw_output, new_h, new_v = rnn(raw_output, hidden[l], v[l])
            elif self.rnn_type == 'ALSTM':
                raw_output, new_h, new_v, new_s = rnn(raw_output, hidden[l],
                                                      v[l], s[l])
            else:
                raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            if self.rnn_type == 'MLSTM' or self.rnn_type == 'NLSTM':
                new_velocity.append(new_v)
            if self.rnn_type == 'ALSTM':
                new_velocity.append(new_v)
                new_scale.append(new_s)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                #self.hdrop(raw_output)
                raw_output = self.lockdrop(raw_output, self.dropouth)
                outputs.append(raw_output)
        hidden = new_hidden
        if self.rnn_type == 'MLSTM' or self.rnn_type == 'NLSTM':
            v = new_velocity

        if self.rnn_type == 'ALSTM':
            v = new_velocity
            s = new_scale

        output = self.lockdrop(raw_output, self.dropout)
        outputs.append(output)

        result = output.view(output.size(0) * output.size(1), output.size(2))
        if return_h:
            if self.rnn_type == 'MLSTM' or self.rnn_type == 'NLSTM':
                return result, hidden, v, raw_outputs, outputs
            elif self.rnn_type == 'ALSTM':
                return result, hidden, v, s, raw_outputs, outputs
            else:
                return result, hidden, raw_outputs, outputs
        if self.rnn_type == 'MLSTM' or self.rnn_type == 'NLSTM':
            return result, hidden, v
        elif self.rnn_type == 'ALSTM':
            return result, hidden, v, s
        else:
            return result, hidden
Exemplo n.º 6
0
    def forward(self, input, hidden, return_h=False, train=False):
        emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training and self.use_dropout else 0)
        emb = self.lockdrop(emb, self.dropouti if self.use_dropout else 0)
        raw_output = emb
        new_hidden = []
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output
            raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)

            if l != self.nlayers - 1:
                raw_output = self.lockdrop(raw_output, self.dropouth if self.use_dropout else 0)
                outputs.append(raw_output)

        hidden = new_hidden
        output = self.lockdrop(raw_output, self.dropout if self.use_dropout else 0)
        outputs.append(output)
        result = output.view(output.size(0)*output.size(1), output.size(2))
        weight = self.encoder.weight if self.tie_weights or self.joint_emb is not None else self.decoder.weight
        bias = self.decoder.bias if self.tie_weights or self.joint_emb is None else self.bias.weight

        if self.joint_emb is not None:
            result, weight = self.apply_drill(output, weight)

        if return_h:
            return result, weight, bias, hidden, raw_outputs, outputs
        return result, weight, bias, hidden
Exemplo n.º 7
0
    def forward(self, input, hidden, return_h=False):
        emb = embedded_dropout(self.encoder, input)

        #emb = self.idrop(emb)

        emb = self.lockdrop(emb, self.dropouti)

        raw_output = emb
        new_hidden = []
        #raw_output, hidden = self.rnn(emb, hidden)
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output
            #print rnn
            raw_output, new_h = self.run_lstmcell(rnn, raw_output, hidden[l])
            del (rnn)
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                #self.hdrop(raw_output)
                raw_output = self.lockdrop(raw_output, self.dropouth)
                outputs.append(raw_output)
        #hidden = new_hidden
        #hidden = raw_output[-1], new_hidden

        output = self.lockdrop(raw_output, self.dropout)
        outputs.append(output)

        decoded = self.decoder(
            output.view(output.size(0) * output.size(1), output.size(2)))
        result = decoded.view(output.size(0), output.size(1), decoded.size(1))
        if return_h:
            return result, raw_outputs, outputs
        return result
Exemplo n.º 8
0
    def forward(self, input, hidden, return_h=False):
        emb = embedded_dropout(self.encoder,
                               input,
                               dropout=self.dropoute if self.training else 0)
        emb = self.lockdrop(emb, self.dropouti)

        raw_output = emb
        new_hidden = []
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):
            raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                raw_output = self.lockdrop(raw_output, self.dropouth)
                outputs.append(raw_output)
        hidden = new_hidden

        output = self.lockdrop(raw_output, self.dropout)
        outputs.append(output)

        result = output
        if return_h:
            return result, hidden, raw_outputs, outputs
        return result, hidden
Exemplo n.º 9
0
    def forward(self, input, hidden, return_h=False):
        emb, sigma = embedded_dropout(self.encoder, torch.ones_like(self.encoder.weight), input,
                                      dropout=self.dropoute if self.training else 0,
                                      is_training=self.training)
        if self.training:
            m = torch.distributions.normal.Normal(torch.zeros_like(sigma), torch.ones_like(sigma) * 1)
            sigma = m.sample() * 0.2
            emb += sigma 
        emb = self.lockdrop(emb, self.dropouti)

        raw_output = emb
        new_hidden = []
        #raw_output, hidden = self.rnn(emb, hidden)
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output
            raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                raw_output = self.lockdrop(raw_output, self.dropouth)
                outputs.append(raw_output)
        hidden = new_hidden

        output = self.lockdrop(raw_output, self.dropout)
        outputs.append(output)

        result = output.view(output.size(0)*output.size(1), output.size(2))
        if return_h:
            return result, hidden, raw_outputs, outputs
        return result, hidden
Exemplo n.º 10
0
    def forward(self, input, hidden, return_h=False):
        emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0)
        #emb = self.idrop(emb)

        emb = self.lockdrop(emb, self.dropouti)

        raw_output = emb
        new_hidden = []
        #raw_output, hidden = self.rnn(emb, hidden)
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output
            raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                #self.hdrop(raw_output)
                raw_output = self.lockdrop(raw_output, self.dropouth)
                outputs.append(raw_output)
        hidden = new_hidden

        output = self.lockdrop(raw_output, self.dropout)
        outputs.append(output)

        result = output.view(output.size(0)*output.size(1), output.size(2))
        if return_h:
            return result, hidden, raw_outputs, outputs
        return result, hidden
Exemplo n.º 11
0
    def forward(self, input, hidden, return_h=False):
        emb = embedded_dropout(self.encoder,
                               input,
                               dropout=self.dropoute if self.training else 0)
        emb = self.lockdrop(emb, self.dropouti)

        raw_output = emb
        new_hidden = []

        # this is multilayer because the Salesforce version is, and I'll need it later.
        # but for now, all experiments will be with 1-layer versions
        raw_outputs = []
        outputs = []
        for layer, rnn in enumerate(self.rnns):
            raw_output, new_h = rnn(raw_output, hidden[layer])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if layer != self.nlayers - 1:
                raw_output = self.lockdrop(raw_output, self.dropouth)
                outputs.append(raw_output)
        hidden = new_hidden

        output = self.lockdrop(raw_output, self.dropout)

        decoded = self.decoder(
            output.view(output.size(0) * output.size(1), output.size(2)))
        result = decoded.view(output.size(0), output.size(1), decoded.size(1))
        if return_h:
            return result, hidden, raw_outputs, outputs
        return result, hidden
Exemplo n.º 12
0
    def forward(self, input, hidden, return_h=False):

        emb = embedded_dropout(self.encoder,
                               input,
                               dropout=self.dropoute if self.training else 0)
        emb = self.lockdrop(emb, self.dropouti)

        raw_output = emb
        new_hidden = []
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output

            rnn.h2h.mask_weights(self.wdrop)
            rnn.i2h.mask_weights(0)

            raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                raw_output = self.lockdrop(raw_output, self.dropouth)
                outputs.append(raw_output)
        hidden = new_hidden

        output = self.lockdrop(raw_output, self.dropouto)
        outputs.append(output)

        decoded = self.decoder(
            output.view(output.size(0) * output.size(1), output.size(2)))
        result = decoded.view(output.size(0), output.size(1), decoded.size(1))
        if return_h:
            return result, hidden, raw_outputs, outputs
        return result, hidden
Exemplo n.º 13
0
    def forward(self, input, hidden, c_hidden, return_h=False):
        emb = embedded_dropout(self.encoder, input, dropout=self.dropoute if self.training else 0)
        emb_drop = self.lockdrop(emb, self.dropouti)
        rnn_h = emb_drop
        new_hidden = []
        rnn_hs, dropped_rnn_hs = [], []
        span_scores = None
        for l, rnn in enumerate(self.rnns):
            rnn_h, new_h = rnn(rnn_h, hidden[l])
            rnn_hs.append(rnn_h)
            raw_rnn_h = rnn_h
            new_hidden.append(new_h)
            if l != self.nlayers - 1:
                rnn_h = self.lockdrop(rnn_h, self.dropouth)
                dropped_rnn_hs.append(rnn_h)
            if l == self.nlayers - 2:
                span_scores, context, ch = self._att_(rnn_h, c_hidden)
                context = self.lockdrop(context, self.dropouth)
                feats = torch.cat([rnn_h, context], dim=2)
                gate = self._hidden_gate_(feats).sigmoid()
                context = self.nonlinearity(self._hidden_layer_(context))
                context = self.lockdrop(context, self.dropouth)
                rnn_h = raw_rnn_h * gate + context * (1. - gate)
                rnn_h = self.lockdrop(rnn_h, self.dropouth)

        output = self.lockdrop(rnn_h, self.dropout)
        dropped_rnn_hs.append(output)
        assert len(dropped_rnn_hs) == len(rnn_hs)
        result = output.view(output.size(0) * output.size(1), output.size(-1))
        if return_h:
            return result, span_scores, new_hidden, ch, rnn_hs, dropped_rnn_hs
        return result, span_scores, new_hidden, ch
Exemplo n.º 14
0
 def forward(self, input, hidden, return_h=False):
     emb = embedded_dropout(self.encoder,
                            input,
                            dropout=self.dropoute if self.training else 0)
     emb = self.lockdrop(emb, self.dropouti)
     # for rnn in self.rnns:
     #     if self.wdrop:
     #         tmp = rnn.module
     #     else:
     #         tmp = rnn
     #     tmp.flatten_parameters()
     raw_output = emb
     new_hidden = []
     raw_outputs = []
     outputs = []
     for l, rnn in enumerate(self.rnns):
         current_input = raw_output
         raw_output, new_h = rnn(raw_output, hidden[l])
         new_hidden.append(new_h)
         raw_outputs.append(raw_output)
         if l != self.nlayers - 1:
             #self.hdrop(raw_output)
             raw_output = self.lockdrop(raw_output, self.dropouth)
             outputs.append(raw_output)
     hidden = new_hidden
     output = self.lockdrop(raw_output, self.dropout)
     outputs.append(output)
     result = output.view(output.size(0) * output.size(1), output.size(2))
     if return_h:
         return result, hidden, raw_outputs, outputs
     return result, hidden
Exemplo n.º 15
0
    def forward(self, input, hidden, return_h=False):
        emb = embedded_dropout(self.encoder,
                               input,
                               dropout=self.dropoute if self.training else 0)
        #emb = self.idrop(emb)

        # emb = self.encoder(input)
        emb = self.lockdrop(emb, self.dropouti)

        raw_output = emb
        new_hidden = []
        #raw_output, hidden = self.rnn(emb, hidden)
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output
            raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                #self.hdrop(raw_output)
                raw_output = self.lockdrop(raw_output, self.dropouth)
                outputs.append(raw_output)
        hidden = new_hidden

        output = self.lockdrop(raw_output, self.dropout)
        outputs.append(output)

        result = output.view(output.size(0) * output.size(1), output.size(2))
        if return_h:
            return result, hidden, raw_outputs, outputs
        return result, hidden
Exemplo n.º 16
0
    def forward(self, input, hidden, return_h=False):
        nlayers = self.nlayers
        mask = compute_mask(input.transpose(1, 0))

        emb = embedded_dropout(self.encoder,
                               input,
                               dropout=self.dropoute if self.training else 0)
        # emb = self.idrop(emb)

        emb = self.lockdrop(emb, self.dropouti)

        lengths = mask.eq(1).long().sum(1)  # bs
        lengths_sort, idx_sort = torch.sort(lengths, dim=0,
                                            descending=True)  # bs
        _, idx_unsort = torch.sort(idx_sort, dim=0)  # bs

        emb_sort = emb.index_select(1, idx_sort)  # sl * bs * ninp
        hid_sort = [(h[0].index_select(1, idx_sort),
                     h[1].index_select(1, idx_sort)) for h in hidden]

        #         raw_output = emb_sort
        new_hidden = []
        raw_outputs = []
        raw_outputs_sorted = []
        outputs = []

        for l, rnn in enumerate(self.rnns):
            emb_sort = torch.nn.utils.rnn.pack_padded_sequence(
                emb_sort, lengths_sort)
            current_input = emb_sort
            emb_sort, new_h = rnn(emb_sort, hid_sort[l])
            emb_sort, _ = torch.nn.utils.rnn.pad_packed_sequence(emb_sort)

            new_hidden.append(new_h)
            raw_outputs.append(emb_sort)

            if l != nlayers - 1:
                emb_sort = self.lockdrop(emb_sort, self.dropouth)
                outputs.append(emb_sort)

        raw_outputs = [
            raw_output.index_select(1, idx_unsort)
            for raw_output in raw_outputs
        ]
        new_hidden = [(h_sort[0].index_select(1, idx_unsort),
                       h_sort[1].index_select(1, idx_unsort))
                      for h_sort in new_hidden]

        hidden = new_hidden

        output = self.lockdrop(emb_sort, self.dropout)
        outputs.append(output)
        result = output.view(output.size(0) * output.size(1), output.size(2))

        if return_h:
            return result, hidden, raw_outputs, outputs
        return result, hidden
Exemplo n.º 17
0
    def forward(self, input, hidden, return_h=False):
        emb = embedded_dropout(self.encoder,
                               input,
                               dropout=self.dropoute if self.training else 0)
        #emb = self.idrop(emb)

        emb = self.lockdrop(emb, self.dropouti)

        raw_output = emb
        new_hidden = []
        #raw_output, hidden = self.rnn(emb, hidden)
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output
            raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                #self.hdrop(raw_output)
                raw_output = self.lockdrop(raw_output, self.dropouth)
                outputs.append(raw_output)
        hidden = new_hidden

        output = self.lockdrop(raw_output, self.dropout)
        outputs.append(output)

        #print (output.view(output.size(0), output.size(1), output.size(2)).shape)

        output_fw = output.view(output.size(0), output.size(1),
                                output.size(2))[:, :, :400]
        output_bw = output.view(output.size(0), output.size(1),
                                output.size(2))[:, :, 400:]

        #print (output_fw.view(output_fw.size(0)*output_fw.size(1), output_fw.size(2)).shape)
        #print (output_bw.view(output_bw.size(0)*output_bw.size(1), output_bw.size(2)).shape)

        decoded_fw = self.decoder_fw(
            output_fw.view(
                output_fw.size(0) * output_fw.size(1), output_fw.size(2)))
        decoded_bw = self.decoder_bw(
            output_bw.view(
                output_bw.size(0) * output_bw.size(1), output_bw.size(2)))

        result_fw = decoded_fw.view(
            output_fw.size(0) * output_fw.size(1), decoded_fw.size(1))
        result_bw = decoded_bw.view(
            output_bw.size(0) * output_bw.size(1), decoded_bw.size(1))
        #result = decoded.view(output.size(0)*output.size(1), decoded.size(1))

        #result = output.view(output.size(0)*output.size(1), output.size(2))
        if return_h:
            #return result, hidden, raw_outputs, outputs
            #return result, hidden, decode_fw, decode_bw
            return result_fw, result_bw, hidden, raw_outputs, outputs
        return result_fw, result_bw, hidden
Exemplo n.º 18
0
    def evaluate(self, data, eos_tokens=None, dump_hiddens=False):

        # get weights and compute WX for all words
        weights_ih, bias_ih = self.rnn.module.weight_ih_l0, self.rnn.module.bias_ih_l0  # only one layer for the moment
        weights_hh, bias_hh = self.rnn.module.weight_hh_l0, self.rnn.module.bias_hh_l0

        all_words = torch.LongTensor([i for i in range(self.ntoken)]).cuda()
        all_words = embedded_dropout(
            self.encoder,
            all_words,
            dropout=self.dropoute if self.training else 0)

        all_words_times_W = torch.nn.functional.linear(all_words, weights_ih,
                                                       bias_ih)

        # iterate over data set and compute loss
        total_loss, hidden = 0, self.init_hidden(1)
        i = 0

        entropy, hiddens, all_hiddens = [], [], []
        while i < data.size(0):

            hidden_times_U = torch.nn.functional.linear(
                hidden[0].repeat(self.ntoken, 1), weights_hh, bias_hh)
            output = self.nonlinearity(all_words_times_W + hidden_times_U)

            if dump_hiddens: hiddens.append(output[data[i]].data.cpu().numpy())

            distance = self.dist_fn(hidden[0], output, self.bias)
            softmaxed = torch.nn.functional.log_softmax(self.temp *
                                                        distance.view(-1),
                                                        dim=0)
            raw_loss = -softmaxed[data[i]].item()

            total_loss += raw_loss / data.size(0)
            entropy.append(raw_loss)

            if not eos_tokens is None and data[i].data.cpu().numpy(
            )[0] in eos_tokens:
                hidden = self.init_hidden(1)
                if dump_hiddens:
                    all_hiddens.append(hiddens)
                    hiddens = []
            else:
                hidden = output[data[i]].view(1, 1, -1)
            hidden = repackage_hidden(hidden)

            i = i + 1

        all_hiddens = all_hiddens if not eos_tokens is None else hiddens

        if dump_hiddens:
            return total_loss, np.array(entropy), all_hiddens
        else:
            return total_loss, np.array(entropy)
Exemplo n.º 19
0
    def forward(self, input, hidden, return_h=False, reset_experience=True):
        emb = embedded_dropout(self.encoder,
                               input,
                               dropout=self.dropoute if self.training else 0)
        #emb = self.idrop(emb)

        emb = self.lockdrop(emb, self.dropouti)

        raw_output = emb
        new_hidden = []
        #raw_output, hidden = self.rnn(emb, hidden)
        raw_outputs = []
        outputs = []
        if self.debug:
            debug_mems = []
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output
            if 'dnc' in self.rnn_type.lower():
                raw_output = raw_output.transpose(0, 1)
                if self.debug:
                    raw_output, new_h, debug = rnn(
                        raw_output,
                        hidden[l],
                        reset_experience=reset_experience,
                        pass_through_memory=True)
                    debug_mems.append(debug)
                else:
                    raw_output, new_h = rnn(raw_output,
                                            hidden[l],
                                            reset_experience=reset_experience)
                raw_output = raw_output.transpose(0, 1)
            else:
                raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                #self.hdrop(raw_output)
                raw_output = self.lockdrop(raw_output, self.dropouth)
                outputs.append(raw_output)
        hidden = new_hidden

        output = self.lockdrop(raw_output, self.dropout).contiguous()
        outputs.append(output)

        decoded = self.decoder(
            output.view(output.size(0) * output.size(1), output.size(2)))
        result = decoded.view(output.size(0), output.size(1), decoded.size(1))
        if return_h:
            if self.debug:
                return result, hidden, raw_outputs, outputs, debug_mems
            return result, hidden, raw_outputs, outputs
        if self.debug:
            return result, hidden, debug_mems
        return result, hidden
Exemplo n.º 20
0
    def forward(self, input, hidden, return_h=False):
        emb = embedded_dropout(self.encoder,
                               input,
                               dropout=self.dropoute if self.training else 0)
        #emb = self.idrop(emb)

        emb = self.lockdrop(emb, self.dropouti)

        raw_output = emb
        new_hidden = []
        #raw_output, hidden = self.rnn(emb, hidden)
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output
            # Each rnn is a layer!
            # each raw_output has shape seq_len x batch_size x nb_hidden
            # new_h is a tuple of 2 elements, each of size 1 x batch_size x nb_hidden (last h and last c)
            if (self.rnn_type != 'MYLSTM' and self.rnn_type != 'MYFASTLSTM'
                    and self.rnn_type != 'SIMPLEPLASTICLSTM'
                    and self.rnn_type != 'PLASTICLSTM'
                    and self.rnn_type != 'FASTPLASTICLSTM'
                    and self.rnn_type != 'SPLITLSTM'):
                raw_output, new_h = rnn(raw_output, hidden[l])
            else:
                single_h = hidden[
                    l]  # actually a tuple, includes the h and the c (and for plastic LTMS, includes Hebb as third element!)
                singleouts = []
                for z in range(raw_output.shape[0]):
                    singleout, single_h = rnn(raw_output[z], single_h)
                    #if z==0:
                    #    print("RANDOM NUMBER 1:",float(torch.rand(1)))
                    singleouts.append(singleout)
                new_h = single_h  # the last (h,c[,hebb]) after the sequence is processed
                raw_output = torch.stack(singleouts)
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                #self.hdrop(raw_output)
                # lockdrop will zero out some output units over the whole sequence (separately chosen for each batch, but fixed across sequence)
                #pdb.set_trace()
                raw_output = self.lockdrop(raw_output, self.dropouth)
                outputs.append(raw_output)
                #pdb.set_trace()
        hidden = new_hidden
        #pdb.set_trace()

        output = self.lockdrop(raw_output, self.dropout)
        outputs.append(output)

        result = output.view(output.size(0) * output.size(1), output.size(2))
        if return_h:
            return result, hidden, raw_outputs, outputs
        return result, hidden
Exemplo n.º 21
0
    def forward(self, input, hidden):
        emb = embedded_dropout(self.encoder,
                               input,
                               dropout=self.dropoute if self.training else 0)
        emb = self.lockdrop(emb, self.dropouti)

        output, h_n = self.rnns(emb, hidden)

        output = self.lockdrop(output, self.dropout)

        output = output.view(output.size(0) * output.size(1), output.size(2))
        return output, h_n
Exemplo n.º 22
0
    def forward(self, words, stag, mask):
        """
        tokens: Variable of LongTensor, shape (bsize, ntoken,)
        mock_emb: mock embedding for convolution overhead
        """

        bsz, ntoken = words.size()
        emb_words = embedded_dropout(self.encoder, words, dropout=self.dropoute if self.training else 0)
        emb_words = self.drop(emb_words)

        emb_stags = embedded_dropout(self.tag_encoder, stag, dropout=self.dropoute if self.training else 0)
        emb_stags = self.drop(emb_stags)


        def run_rnn(input, rnn, lengths):
            sorted_idx = numpy.argsort(lengths)[::-1].tolist()
            rnn_input = pack_padded_sequence(input[sorted_idx], lengths[sorted_idx], batch_first=True)
            rnn_out, _ = rnn(rnn_input)  # (bsize, ntoken, hidsize*2)
            rnn_out, _ = pad_packed_sequence(rnn_out, batch_first=True)
            rnn_out = rnn_out[numpy.argsort(sorted_idx).tolist()]

            return rnn_out

        sent_lengths = (mask.sum(dim=1)).data.cpu().numpy().astype('int')
        dst_lengths = sent_lengths - 1
        emb_plus_tag = torch.cat([emb_words, emb_stags], dim=-1)

        rnn1_out = run_rnn(emb_plus_tag, self.word_rnn, sent_lengths)

        terminal = self.terminal(rnn1_out.view(-1, self.hid_size*2))
        tag = self.arc(terminal)  # (bsize, ndst, tagsize)

        conv_out = self.conv1(rnn1_out.permute(0, 2, 1)).permute(0, 2, 1)  # (bsize, ndst, hidsize)
        rnn2_out = run_rnn(conv_out, self.arc_rnn, dst_lengths)

        non_terminal = self.non_terminal(rnn2_out.view(-1, self.hid_size*2))
        distance = self.distance(rnn2_out.view(-1, self.hid_size*2)).squeeze(dim=-1)  # (bsize, ndst)
        arc = self.arc(non_terminal)  # (bsize, ndst, arcsize)
        return distance.view(bsz, ntoken - 1), arc.contiguous().view(-1, self.arc_size), tag.view(-1, self.arc_size)
Exemplo n.º 23
0
 def forward(self, inputs):
     embedded = embedded_dropout(
         self.embedding, inputs, dropout=self.embed_drop_ratio if self.training else 0)
     embedded = self.lockdrop(embedded, self.locked_drope)
     raw_output = embedded
     for l, rnn in enumerate(self.rnns):
         raw_output, _ = rnn(raw_output)
         if l != self.nlayer-1:
             raw_output = self.lockdrop(raw_output, self.locked_droph)
     outputs = self.lockdrop(raw_output, self.locked_dropo)
     dropped_output = outputs
     outputs = self.out(outputs)
     return outputs, raw_output, dropped_output
Exemplo n.º 24
0
    def forward(self, input, hidden):
        emb = embedded_dropout(self.input_embedding,
                               input,
                               dropout=self.dropoute if self.training else 0)
        emb = self.lockdrop(emb, self.dropouti)
        # emb shape: (S, N, emsize)
        # hidden shape: (nlayers, N, nhid)
        output, h_n = self.rnns(emb, hidden)
        # output shape: (S, N, nhid)
        #  h_n shape: (nlayers, N, nhid)
        output = self.lockdrop(output, self.dropout)

        output = output.view(output.size(0) * output.size(1), output.size(2))
        return output, h_n
Exemplo n.º 25
0
    def forward(self, input, hidden, return_h=False, return_prob=False):
        batch_size = input.size(1)

        # usedp = False if we are at normal eval
        emb = embedded_dropout(self.encoder, input, dropout=self.dropoute, usedp=(self.training and self.use_dropout))
        # emb = self.idrop(emb)

        emb = self.lockdrop(emb, dropout=self.dropouti if self.use_dropout else 0)

        raw_output = emb
        new_hidden = []
        # raw_output, hidden = self.rnn(emb, hidden)
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output
            raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                # self.hdrop(raw_output)
                raw_output = self.lockdrop(raw_output, dropout=self.dropouth if self.use_dropout else 0)
                outputs.append(raw_output)
        hidden = new_hidden

        output = self.lockdrop(raw_output, dropout=self.dropout if self.use_dropout else 0)
        outputs.append(output)  # this i G

        latent = self.latent(output)  # this is H (tanh(W1 * G)
        latent = self.lockdrop(latent, dropout=self.dropoutl if self.use_dropout else 0)
        logit = self.decoder(latent.view(-1, self.ninp))  # this is the logit = W2 * H

        prior_logit = self.prior(output).contiguous().view(-1, self.n_experts)  # W3 * G
        prior = nn.functional.softmax(prior_logit, -1)  # softmax ( W3 * G )

        prob = nn.functional.softmax(logit.view(-1, self.ntoken), -1).view(-1, self.n_experts, self.ntoken)  # N x M
        prob = (prob * prior.unsqueeze(2).expand_as(prob)).sum(1)

        if return_prob:
            model_output = prob
        else:
            log_prob = torch.log(prob.add_(1e-8))
            model_output = log_prob

        model_output = model_output.view(-1, batch_size, self.ntoken)

        if return_h:
            return model_output, hidden, raw_outputs, outputs
        return model_output, hidden
Exemplo n.º 26
0
    def forward(self, input, hidden, return_h=False):
        emb = embedded_dropout(self.encoder,
                               input,
                               dropout=self.dropoute if self.training else 0)

        raw_output, hidden = self.rnn(emb, hidden)
        # self.distance = distances
        output = self.lockdrop(raw_output, self.dropout)

        result = output.view(output.size(0) * output.size(1), output.size(2))
        decoded = self.decoder(result)

        # result = output.view(output.size(0)*output.size(1), output.size(2))
        if return_h:
            return result, hidden, raw_output, decoded
        return result, hidden, decoded
Exemplo n.º 27
0
    def forward(self, input, hidden, use_dropout=True, return_h=False):
        #hiddenstack = torch.stack(hidden[1],0)#ADDED
        #hiddenparam = torch.nn.Parameter(hiddenstack)# ADDED
        if not use_dropout:
            if self.rnn_type == 'QRNN': raise NotImplementedError
        use_dropout = use_dropout and self.use_dropout
        emb = embedded_dropout(
            self.encoder,
            input,
            dropout=self.dropoute if self.training and use_dropout else 0)
        if use_dropout:
            emb = self.lockdrop(emb, self.dropouti)

        raw_output = emb
        new_hidden = []
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output

            # if self.use_dropout isn't true, then the rnn doesn't even have a use_dropout param
            if self.use_dropout and self.rnn_type == 'LSTM':
                raw_output, new_h = rnn(raw_output,
                                        hidden[l],
                                        use_dropout=use_dropout)
            else:
                raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)

            if l != self.nlayers - 1:
                if use_dropout:
                    raw_output = self.lockdrop(raw_output, self.dropouth)
                outputs.append(raw_output)
        hidden = new_hidden

        if use_dropout:
            output = self.lockdrop(raw_output, self.dropout)
        else:
            output = raw_output
        outputs.append(output)

        result = output.view(output.size(0) * output.size(1), output.size(2))
        if return_h:
            return result, hidden, raw_outputs, outputs, emb
        return result, hidden
Exemplo n.º 28
0
    def forward(self, input, prev_targets, hidden, return_h=False):

        combined_targets = torch.cat((input.unsqueeze(-1), prev_targets.unsqueeze(-1)), -1)

        emb = embedded_dropout(self.encoder, combined_targets, dropout=self.dropoute if self.training else 0)


        emb = emb.view(input.shape[0],input.shape[1], -1)

        emb = self.lockdrop(emb, self.dropouti)

        combined = emb

        raw_output = combined
        new_hidden = []
        raw_outputs = []
        outputs = []
        for l, rnn in enumerate(self.rnns):

            raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                raw_output = self.lockdrop(raw_output, self.dropouth)
                outputs.append(raw_output)
        hidden = new_hidden

        output = self.lockdrop(raw_output, self.dropout)
        outputs.append(output)

        out_size_orig0 = output.size(0)
        out_size_orig1 = output.size(1)

        output_c = torch.tanh(self.combiner(output.view(output.size(0)*output.size(1), output.size(2))))

        output_c = output_c.view(output.size(0), output.size(1), -1)


        output_c_dropped = self.lockdrop(output_c, self.dropoutcomb)


        decoded = self.decoder(output_c_dropped)
        result = decoded.view(out_size_orig0, out_size_orig1, decoded.size(2))
        if return_h:
            return result, hidden, raw_outputs, outputs
        return result, hidden
    def forward(self, input, hidden, partial_output, return_h=False):
        emb = embedded_dropout(self.encoder,
                               input,
                               dropout=self.dropoute if self.training else 0)
        #emb = self.idrop(emb)

        emb = self.lockdrop(emb, self.dropouti)

        raw_output = emb
        new_hidden = []
        #raw_output, hidden = self.rnn(emb, hidden)
        raw_outputs = []
        outputs = []
        timescale_invgamma = scipy.stats.invgamma.isf(np.linspace(0, 1, 1151),
                                                      a=0.56,
                                                      scale=1)[1:]
        np.save('timescale_invgamma.txt', timescale_invgamma)
        for l, rnn in enumerate(self.rnns):
            current_input = raw_output
            #print('Partial', partial_output)
            if partial_output:
                if l == 2:
                    i = partial_output
                    current_input[:, :, (i - 1) * 50:(i) * 50] = torch.tensor(
                        np.zeros(50), dtype=torch.float)
                    #print(np.mean(timescale_invgamma[(i-1)*50:(i)*50]))
                    #print(torch.sum(raw_output[:,:,(i-1)*50:(i)*50] ))
                    #print(torch.sum(raw_output[:,:,(i)*50:] ))
                    #print(i*50)
            raw_output, new_h = rnn(raw_output, hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.nlayers - 1:
                #self.hdrop(raw_output)
                raw_output = self.lockdrop(raw_output, self.dropouth)
                outputs.append(raw_output)
        hidden = new_hidden

        output = self.lockdrop(raw_output, self.dropout)
        outputs.append(output)

        result = output.view(output.size(0) * output.size(1), output.size(2))
        if return_h:
            return result, hidden, raw_outputs, outputs
        return result, hidden
Exemplo n.º 30
0
    def forward(self, input, hidden, return_h=False):
        emb = embedded_dropout(
            self.encoder, input,
            dropout=self.dropoute if self.training else 0
        )

        emb = self.lockdrop(emb, self.dropouti)

        raw_output, hidden, raw_outputs, outputs, distances, nm_hs = self.rnn(emb, hidden)
        self.distance = distances

        output = self.lockdrop(raw_output, self.dropout)

        result = output.view(output.size(0)*output.size(1), output.size(2))
        if return_h:
            return result, hidden, raw_outputs, outputs, nm_hs
        else:
            return result, hidden
 def forward(self, X):
     emb = embedded_dropout(self.embedding, X, dropout=self.edrop if self.training else 0)
     if self.standard_dropout:
         raw_output = F.dropout(emb, p=self.idrop, training=self.training)
     else:
         raw_output = self.lockdrop(emb, self.idrop)
     new_hidden, new_cell_state = [], []
     for l, rnn in enumerate(self.rnns):
         raw_output, (new_h, new_c) = rnn(raw_output)
         if self.standard_dropout:
             raw_output = F.dropout(raw_output, p=self.odrop, training=self.training)
         else:
             raw_output = self.lockdrop(raw_output, self.odrop)         
         new_hidden.append(new_h)
         new_cell_state.append(new_c)
     hidden = torch.cat(new_hidden, 0)
     cell_state = torch.cat(new_cell_state, 0)
     final_output = self.output_layer(raw_output)
     return final_output[:, -1, 0], hidden, cell_state