class Seq2Seq(BaseModel):
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V = vocab_size
        D = wordvec_size
        H = hidden_size
        self.encoder = Encoder(V, D, H)
        self.decoder = Decoder(V, D, H)
        self.softmax = TimeSoftmaxWithLoss()
        self.params = self.encoder.params + self.decoder.params
        self.grads = self.encoder.grads + self.decoder.grads

    def forward(self, xs, ts):
        decoder_xs = ts[:, :-1]
        decoder_ts = ts[:, 1:]
        h = self.encoder.forward(xs)
        score = self.decoder.forward(decoder_xs, h)
        loss = self.softmax.forward(score, decoder_ts)
        return loss

    def backward(self, dout=1):
        dout = self.softmax.backward(dout)
        dh = self.decoder.backward(dout)
        dout = self.encoder.backward(dh)
        return dout

    def generate(self, xs, start_id, sample_size):
        h = self.encoder.forward(xs)
        sampled = self.decoder.generate(h, start_id, sample_size)
        return sampled
class TestDecoder(unittest.TestCase):
    def setUp(self):
        vocab_size = 13
        wordvec_size = 100
        hidden_size = 100
        self.decoder = Decoder(vocab_size, wordvec_size, hidden_size)
        self.xs = np.random.randint(0, 13, (13, 100))
        self.h = np.random.randn(13, 100)

    def test_forward(self):
        score = self.decoder.forward(self.xs, self.h)
        self.assertEqual((13, 100, 13), score.shape)

    def test_backward(self):
        dscore = self.decoder.forward(self.xs, self.h)
        dh = self.decoder.backward(dscore)
        self.assertEqual((13, 100), dh.shape)

    def test_generate(self):
        h = np.random.randn(1, 100)
        start_id = 0
        sample_size = 10
        sampled = self.decoder.generate(h, start_id, sample_size)
        self.assertEqual(10, len(sampled))
示例#3
0
            # Set content target
            criterion.content_layers[1]:setTarget(outputLatent)

            # Compute loss
            output = dec:forward(outputLatent):clone() # forward through decoder, generate transformed images
            loss = criterion:forward(output) # forward through loss network, compute loss functions
            contentLoss = criterion.contentLoss
            styleLoss = criterion.styleLoss
            tvLoss = 0
            if opt.tvWeight > 0 .
                tvLoss = criterion.net.get(2).loss
            

            # Backpropagate gradients
            decGrad = criterion.backward(output) # backprop through loss network, compute gradients w.r.t. the transformed images
            dec.backward(outputLatent, decGrad) # backprop gradients through decoder

            # Optionally train the decoder to reconstruct style images
            styleReconLoss = 0
            if opt.reconStyle :
                criterion.setContentTarget(styleInput)
                styleRecon = dec.forward(styleLatent).clone()
                styleReconLoss = criterion.forward(styleRecon)
                decGrad = criterion.backward(styleRecon)
                dec.backward(styleLatent, decGrad)
                loss =  loss + styleReconLoss
            
            
            table.insert(history, {optimState.iterCounter, loss, contentLoss, styleLoss, styleReconLoss})
            maybe_print(loss, contentLoss, styleLoss, tvLoss, timer)
            if opt.reconStyle :