示例#1
0
class MultitaskBD(Layer):
    """Text to image and back."""
    def __init__(self, size_vocab, size_embed, size, size_out, depth, out_depth=1, # FIXME USE THIS PARAM
                 gru_activation=tanh, visual_activation=linear,
                 dropout_prob=0.0):
        autoassign(locals())
        self.Embed = Embedding(self.size_vocab, self.size_embed)
        self.Encode = StackedGRUH0(self.size_embed, self.size, self.depth,
                                   activation=self.gru_activation, dropout_prob=self.dropout_prob)
        self.ToVis   = Dense(self.size, self.size_out)
        self.FromVis = Dense(self.size_out, self.size)
        self.Decode = StackedGRU(self.size_embed, self.size, self.depth,
                                 activation=self.gru_activation, dropout_prob=self.dropout_prob)
        self.PredictT   = Dense(size_in=self.size, size_out=self.size_embed)
        self.params = params(self.Embed, self.Encode, self.ToVis, self.FromVis, self.Decode, self.PredictT)
        
    def __call__(self, inp, out_prev, img):
        img_out = self.visual_activation(self.ToVis(last(self.Encode(self.Embed(inp)))))
        txt_out = softmax3d(self.Embed.unembed(self.PredictT(self.Decode(self.visual_activation(self.FromVis(img)),
                                                                   self.Embed(out_prev)))))
        return (img_out, txt_out)
    
    def predictor_v(self):
        """Return function to predict image vector from input."""
        input = T.imatrix()
        return theano.function([input], self.visual_activation(self.ToVis(last(self.Encode(self.Embed(input))))))
示例#2
0
class MultitaskMM(Layer):
    """Shared recurrent encoder with visual decoder + textual decoder."""
    def __init__(self, size_vocab, size_embed, size, size_out, depth, out_depth=1, # FIXME USE THIS PARAM
                 gru_activation=tanh, visual_activation=linear,
                 dropout_prob=0.0):
        autoassign(locals())
        self.Embed = Embedding(self.size_vocab, self.size_embed)
        self.Encode = StackedGRUH0(self.size_embed, self.size, self.depth,
                                   activation=self.gru_activation, dropout_prob=self.dropout_prob)
        self.DecodeT = StackedGRU(self.size_embed, self.size, self.depth,
                                  activation=self.gru_activation, dropout_prob=self.dropout_prob)
        self.PredictT   = Dense(size_in=self.size, size_out=self.size_embed)
        self.DecodeV = Dense(self.size, self.size_out)
        self.params = params(self.Embed, self.DecodeT, self.PredictT, self.DecodeV) 
        
    def __call__(self, inp, out_prev, _img):
        rep = last(self.Encode(self.Embed(inp)))
        img = self.visual_activation(self.DecodeV(rep))
        txt = softmax3d(self.Embed.unembed(self.PredictT(self.DecodeT(rep, self.Embed(out_prev)))))
        return (img, txt)
    
    def predictor_v(self):
        """Return function to predict image vector from input."""
        input    = T.imatrix()
        return theano.function([input], self.visual_activation(self.DecodeV(last(self.Encode(self.Embed(input))))))
示例#3
0
class MultitaskY(Layer):
    """Joint Encode + separate pathways."""
    
    def __init__(self, size_vocab, size_embed, size, size_out, depth, textual,
                 out_depth=1,
                 gru_activation=tanh,
                 visual_activation=linear,
                 dropout_prob=0.0):
        autoassign(locals())
        self.Embed   = Embedding(self.size_vocab, self.size_embed)
        self.Joint   = StackedGRUH0(self.size_embed, self.size, self.depth,
                                    activation=self.gru_activation, dropout_prob=self.dropout_prob)
        self.Visual  = Visual(self.size, self.size, self.size_out, self.depth, out_depth=self.out_depth,
                              gru_activation=self.gru_activation, dropout_prob=self.dropout_prob)
        self.Textual = textual(self.size, self.size, self.depth,
                               gru_activation=self.gru_activation, dropout_prob=self.dropout_prob)
        self.params  = params(self.Embed, self.Visual, self.Textual)

    def __call__(self, inp, output_prev, _img):
        inp_e = self.Joint(self.Embed(inp))
        output_prev_e  = self.Embed(output_prev)
        img   = self.visual_activation(self.Visual(inp_e))
        txt   = softmax3d(self.Embed.unembed(self.Textual(inp_e, output_prev_e, _img)))
        return (img, txt)
    
    def predictor_v(self):
        """Return function to predict image vector from input."""
        input    = T.imatrix()
        return theano.function([input], self.visual_activation(self.Visual(self.Embed(input))))
示例#4
0
class Multitask(Layer):
    """Visual encoder combined with a textual task."""
    
    def __init__(self, size_vocab, size_embed, size, size_out, depth, textual,
                 out_depth=1,
                 gru_activation=tanh,
                 visual_activation=linear,
                 dropout_prob=0.0):
        autoassign(locals())
        self.Embed   =  Embedding(self.size_vocab, self.size_embed)
        self.Visual  = Visual(self.size_embed, self.size, self.size_out, self.depth, out_depth=self.out_depth,
                              gru_activation=self.gru_activation, dropout_prob=self.dropout_prob)
        self.Textual = textual(self.size_embed, self.size, self.size_out, self.depth,
                               gru_activation=self.gru_activation, dropout_prob=self.dropout_prob)
        self.params  = params(self.Embed, self.Visual, self.Textual)

        
    def __call__(self, inp, output_prev, img):
        inp_e = self.Embed(inp)
        output_prev_e  = self.Embed(output_prev)
        img_pred   = self.visual_activation(self.Visual(inp_e))
        txt_pred   = softmax3d(self.Embed.unembed(self.Textual(inp_e, output_prev_e, img)))
        return (img_pred, txt_pred)

    
    def predictor_v(self):
        """Return function to predict image vector from input."""
        input    = T.imatrix()
        return theano.function([input], self.visual_activation(self.Visual(self.Embed(input))))
示例#5
0
class MultitaskLMY(Layer):
    """Alternative visual encoder combined with a textual decoder.

    Textual decoder starts from final state of encoder instead of from
    image. Shared hidden layer plus specialized layers.
    """

    def __init__(
        self,
        size_vocab,
        size_embed,
        size,
        size_out,
        depth,
        depth_spec=1,
        visual_encoder=StackedGRUH0,
        gru_activation=clipped_rectify,
        visual_activation=linear,
        dropout_prob=0.0,
    ):
        autoassign(locals())
        self.Embed = Embedding(self.size_vocab, self.size_embed)
        self.Shared = StackedGRUH0(
            self.size_embed, self.size, self.depth, activation=self.gru_activation, dropout_prob=self.dropout_prob
        )
        self.Visual = Visual(
            self.size,
            self.size,
            self.size_out,
            self.depth_spec,
            encoder=self.visual_encoder,
            gru_activation=self.gru_activation,
            visual_activation=self.visual_activation,
            dropout_prob=self.dropout_prob,
        )
        self.LM = StackedGRU(
            self.size, self.size, self.depth_spec, activation=self.gru_activation, dropout_prob=self.dropout_prob
        )
        self.ToTxt = Dense(self.size, self.size_embed)  # try direct softmax

    def params(self):
        return params(self.Embed, self.Shared, self.Visual, self.LM, self.ToTxt)

    def __call__(self, inp, output_prev, _img):
        shared = self.Shared(self.Embed(inp))
        img_pred = self.Visual(shared)
        txt_pred = softmax3d(self.Embed.unembed(self.ToTxt(self.LM(last(shared), self.Embed(output_prev)))))
        return (img_pred, txt_pred)

    def predictor_v(self):
        """Return function to predict image vector from input."""
        input = T.imatrix()
        return theano.function([input], self.Visual(self.Shared(self.Embed(input))))

    def predictor_r(self):
        """Return function to predict representation from input."""
        input = T.imatrix()
        return theano.function([input], last(self.Shared(self.Embed(input))))
示例#6
0
class MultitaskLMD(Layer):
    """Alternative visual encoder combined with a textual decoder.

    Textual decoder starts from final state of encoder instead of from image.
"""
    def __init__(self,
                 size_vocab,
                 size_embed,
                 size,
                 size_out,
                 depth,
                 gru_activation=clipped_rectify,
                 visual_activation=linear,
                 visual_encoder=StackedGRUH0,
                 dropout_prob=0.0):
        autoassign(locals())
        self.Embed = Embedding(self.size_vocab, self.size_embed)
        self.Visual = Visual(self.size_embed,
                             self.size,
                             self.size_out,
                             self.depth,
                             encoder=self.visual_encoder,
                             gru_activation=self.gru_activation,
                             visual_activation=self.visual_activation,
                             dropout_prob=self.dropout_prob)
        self.LM = StackedGRU(self.size_embed,
                             self.size,
                             self.depth,
                             activation=self.gru_activation,
                             dropout_prob=self.dropout_prob)
        self.ToTxt = Dense(self.size, self.size_embed)  # try direct softmax

    def params(self):
        return params(self.Embed, self.Visual, self.LM, self.ToTxt)

    def __call__(self, inp, output_prev, _img):
        rep = self.Visual.encode(self.Embed(inp))
        img_pred = self.Visual.visual_activation(self.Visual.ToImg(rep))
        txt_pred = softmax3d(
            self.Embed.unembed(
                self.ToTxt(self.LM(rep, self.Embed(output_prev)))))
        return (img_pred, txt_pred)

    def predictor_v(self):
        """Return function to predict image vector from input."""
        input = T.imatrix()
        return theano.function([input], self.Visual(self.Embed(input)))

    def predictor_r(self):
        """Return function to predict representation from input."""
        input = T.imatrix()
        return theano.function([input], self.Visual.encode(self.Embed(input)))
示例#7
0
class MultitaskLMC(Layer):
    """Visual encoder combined with a textual decoder."""

    def __init__(
        self,
        size_vocab,
        size_embed,
        size,
        size_out,
        depth,
        gru_activation=clipped_rectify,
        visual_activation=linear,
        visual_encoder=StackedGRUH0,
        dropout_prob=0.0,
    ):
        autoassign(locals())
        self.Embed = Embedding(self.size_vocab, self.size_embed)
        self.Visual = Visual(
            self.size_embed,
            self.size,
            self.size_out,
            self.depth,
            encoder=self.visual_encoder,
            gru_activation=self.gru_activation,
            visual_activation=self.visual_activation,
            dropout_prob=self.dropout_prob,
        )
        self.LM = StackedGRU(
            self.size_embed, self.size, self.depth, activation=self.gru_activation, dropout_prob=self.dropout_prob
        )
        self.FromImg = Dense(self.size_out, self.size)
        self.ToTxt = Dense(self.size, self.size_embed)  # try direct softmax

    def params(self):
        return params(self.Embed, self.Visual, self.LM, self.FromImg, self.ToTxt)

    def __call__(self, inp, output_prev, img):
        img_pred = self.Visual(self.Embed(inp))
        txt_pred = softmax3d(self.Embed.unembed(self.ToTxt(self.LM(self.FromImg(img), self.Embed(output_prev)))))
        return (img_pred, txt_pred)

    def predictor_v(self):
        """Return function to predict image vector from input."""
        input = T.imatrix()
        return theano.function([input], self.Visual(self.Embed(input)))

    def predictor_r(self):
        """Return function to predict representation from input."""
        input = T.imatrix()
        return theano.function([input], self.Visual.encode(self.Embed(input)))