class Discriminator(object):
    def __init__(self, x_k, n_steps, hidden_dim):
        self.x_k = x_k
        self.hidden_dim = hidden_dim
        constraint = lambda: ClipConstraint(1e-2)
        self.lstm = LSTM(hidden_dim)
        self.lstm.build((None, n_steps, 1))
        for w in self.lstm.trainable_weights:
            # print("Weight: {}".format(w))
            self.lstm.constraints[w] = constraint()
        self.dense = Dense(1, W_constraint=constraint())
        self.dense.build((None, hidden_dim))
        self.weights = self.lstm.trainable_weights + self.dense.trainable_weights
        self.constraints = self.lstm.constraints.copy()
        self.constraints.update(self.dense.constraints)
        # print("Constraints: {}".format(self.constraints))

    def call(self, x):
        return self.dense.call(self.lstm.call(x))
Пример #2
0
def get_lstm_controller(controller_output_dim,
                        controller_input_dim,
                        activation='relu',
                        batch_size=1,
                        max_steps=1):
    controller = LSTM(
        units=controller_output_dim,
        # kernel_initializer='random_normal',
        # bias_initializer='random_normal',
        activation=activation,
        stateful=True,
        return_state=True,
        return_sequences=
        False,  # does not matter because for controller the sequence len is 1?
        implementation=2,  # best for gpu. other ones also might not work.
        batch_input_shape=(batch_size, max_steps, controller_input_dim),
        name='lstm_controller')
    controller.build(input_shape=(batch_size, max_steps, controller_input_dim))
    return controller
Пример #3
0
class Stack(Recurrent):
    """ Stack and queue network
    
    
    output_dim = output dimension
    n_slots = number of memory slot
    m_length = dimention of the memory
    rnn_size = output length of the memory controler
    inner_rnn = "lstm" only lstm is supported 
    stack = True to create neural stack or False to create neural queue
    
    
    from Learning to Transduce with Unbounded Memory
    [[http://arxiv.org/pdf/1506.02516.pdf]]
    """
    def __init__(self,
                 output_dim,
                 n_slots,
                 m_length,
                 inner_rnn='lstm',
                 rnn_size=64,
                 stack=True,
                 init='glorot_uniform',
                 inner_init='orthogonal',
                 input_dim=None,
                 input_length=None,
                 **kwargs):
        self.output_dim = output_dim
        self.n_slots = n_slots + 1  # because we start at time 1
        self.m_length = m_length
        self.init = init
        self.inner_init = inner_init
        if inner_rnn != "lstm":
            print "Only lstm is supported"
            raise
        self.inner_rnn = inner_rnn
        self.rnn_size = rnn_size
        self.stack = stack

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(Stack, self).__init__(**kwargs)

    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(activation='relu',
                           input_dim=input_dim + self.m_length,
                           input_length=input_leng,
                           output_dim=self.output_dim,
                           init=self.init,
                           inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(input_dim=input_dim + self.m_length,
                            input_length=input_leng,
                            output_dim=self.rnn_size,
                            init=self.init,
                            forget_bias_init='zero',
                            inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()

        self.init_h = K.zeros((self.rnn_size))

        self.W_d = self.rnn.init((self.rnn_size, 1))
        self.W_u = self.rnn.init((self.rnn_size, 1))

        self.W_v = self.rnn.init((self.rnn_size, self.m_length))
        self.W_o = self.rnn.init((self.rnn_size, self.output_dim))

        self.b_d = K.zeros((1, ), name="b_d")
        self.b_u = K.zeros((1, ), name="b_u")
        self.b_v = K.zeros((self.m_length, ))
        self.b_o = K.zeros((self.output_dim, ))

        self.trainable_weights = self.rnn.trainable_weights + [
            self.W_d, self.b_d, self.W_v, self.b_v, self.W_u, self.b_u,
            self.W_o, self.b_o, self.init_h
        ]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.rnn_size))
            self.trainable_weights = self.trainable_weights + [
                self.init_c,
            ]
        #self.trainable_weights =[self.W_d]

    def get_initial_states(self, X):

        batch_size = X.shape[0]

        init_r = K.zeros((self.m_length)).dimshuffle('x', 0).repeat(batch_size,
                                                                    axis=0)
        init_V = K.zeros(
            (self.n_slots, self.m_length)).dimshuffle('x', 0,
                                                      1).repeat(batch_size,
                                                                axis=0)
        init_S = K.zeros((self.n_slots)).dimshuffle('x', 0).repeat(batch_size,
                                                                   axis=0)
        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)

        itime = K.zeros((1, ), dtype=np.int32)

        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size,
                                                             axis=0)
            return [init_r, init_V, init_S, itime, init_h, init_c]

    @property
    def output_shape(self):
        input_shape = self.input_shape
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def step(self, x, states):

        r_tm1, V_tm1, s_tm1, time = states[:4]
        h_tm1 = states[4:]

        r_tm1 = r_tm1

        op_t, h_t = _update_controller(self, T.concatenate([x, r_tm1],
                                                           axis=-1), h_tm1)

        # op_t = op_t  + print_name_shape("W_d",self.W_d.get_value())
        op_t = op_t
        #op_t = op_t[:,0,:]
        d_t = K.sigmoid(K.dot(op_t, self.W_d) + self.b_d)
        u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)
        v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v)
        o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o)

        time = time + 1
        V_t, s_t, r_t = _update_neural_stack(self,
                                             V_tm1,
                                             s_tm1,
                                             d_t[::, 0],
                                             u_t[::, 0],
                                             v_t,
                                             time[0],
                                             stack=self.stack)

        return o_t, [r_t, V_t, s_t, time] + h_t
Пример #4
0
class NeuralTuringMachine(Recurrent):
    """ Neural Turing Machines

    Non obvious parameter:
    ----------------------
    shift_range: int, number of available shifts, ex. if 3, avilable shifts are
                 (-1, 0, 1)
    n_slots: number of memory locations
    m_length: memory length at each location

    Known issues:
    -------------
    Theano may complain when n_slots == 1.

    """
    def __init__(self,
                 output_dim,
                 n_slots,
                 m_length,
                 shift_range=3,
                 inner_rnn='gru',
                 init='glorot_uniform',
                 inner_init='orthogonal',
                 input_dim=None,
                 input_length=None,
                 **kwargs):
        self.output_dim = output_dim
        self.n_slots = n_slots
        self.m_length = m_length
        self.shift_range = shift_range
        self.init = init
        self.inner_init = inner_init
        self.inner_rnn = inner_rnn

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(NeuralTuringMachine, self).__init__(**kwargs)

    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(activation='relu',
                           input_dim=input_dim + self.m_length,
                           input_length=input_leng,
                           output_dim=self.output_dim,
                           init=self.init,
                           inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(input_dim=input_dim + self.m_length,
                            input_length=input_leng,
                            output_dim=self.output_dim,
                            init=self.init,
                            forget_bias_init='zero',
                            inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()

        # initial memory, state, read and write vecotrs
        self.M = theano.shared((.001 * np.ones((1, )).astype(floatX)))
        self.init_h = K.zeros((self.output_dim))
        self.init_wr = self.rnn.init((self.n_slots, ))
        self.init_ww = self.rnn.init((self.n_slots, ))

        # write
        self.W_e = self.rnn.init((self.output_dim, self.m_length))  # erase
        self.b_e = K.zeros((self.m_length))
        self.W_a = self.rnn.init((self.output_dim, self.m_length))  # add
        self.b_a = K.zeros((self.m_length))

        # get_w  parameters for reading operation
        self.W_k_read = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_read = self.rnn.init((self.m_length, ))
        self.W_c_read = self.rnn.init(
            (self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_read = K.zeros((3))
        self.W_s_read = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_read = K.zeros((self.shift_range))  # b_s lol! not intentional

        # get_w  parameters for writing operation
        self.W_k_write = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_write = self.rnn.init((self.m_length, ))
        self.W_c_write = self.rnn.init(
            (self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_write = K.zeros((3))
        self.W_s_write = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_write = K.zeros((self.shift_range))

        self.C = _circulant(self.n_slots, self.shift_range)

        self.trainable_weights = self.rnn.trainable_weights + [
            self.W_e, self.b_e, self.W_a, self.b_a, self.W_k_read,
            self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read,
            self.b_s_read, self.W_k_write, self.b_k_write, self.W_s_write,
            self.b_s_write, self.W_c_write, self.b_c_write, self.M,
            self.init_h, self.init_wr, self.init_ww
        ]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.output_dim))
            self.trainable_weights = self.trainable_weights + [
                self.init_c,
            ]

    def _read(self, w, M):
        return (w[:, :, None] * M).sum(axis=1)

    def _write(self, w, e, a, M):
        Mtilda = M * (1 - w[:, :, None] * e[:, None, :])
        Mout = Mtilda + w[:, :, None] * a[:, None, :]
        return Mout

    def _get_content_w(self, beta, k, M):
        num = beta[:, None] * _cosine_distance(M, k)
        return _softmax(num)

    def _get_location_w(self, g, s, C, gamma, wc, w_tm1):
        wg = g[:, None] * wc + (1 - g[:, None]) * w_tm1
        Cs = (C[None, :, :, :] * wg[:, None, None, :]).sum(axis=3)
        wtilda = (Cs * s[:, :, None]).sum(axis=1)
        wout = _renorm(wtilda**gamma[:, None])
        return wout

    def _get_controller_output(self, h, W_k, b_k, W_c, b_c, W_s, b_s):
        k = T.tanh(T.dot(h, W_k) + b_k)  # + 1e-6
        c = T.dot(h, W_c) + b_c
        beta = T.nnet.relu(c[:, 0]) + 1e-4
        g = T.nnet.sigmoid(c[:, 1])
        gamma = T.nnet.relu(c[:, 2]) + 1.0001
        s = T.nnet.softmax(T.dot(h, W_s) + b_s)
        return k, beta, g, gamma, s

    def get_initial_states(self, X):
        batch_size = X.shape[0]
        init_M = self.M.dimshuffle(0, 'x', 'x').repeat(
            batch_size, axis=0).repeat(self.n_slots,
                                       axis=1).repeat(self.m_length, axis=2)
        init_M = init_M.flatten(ndim=2)

        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_wr = self.init_wr.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_ww = self.init_ww.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size,
                                                             axis=0)
            return [
                init_M,
                T.nnet.softmax(init_wr),
                T.nnet.softmax(init_ww), init_h, init_c
            ]
        else:
            return [
                init_M,
                T.nnet.softmax(init_wr),
                T.nnet.softmax(init_ww), init_h
            ]

    @property
    def output_shape(self):
        input_shape = self.input_shape
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def get_full_output(self, train=False):
        """
        This method is for research and visualization purposes. Use it as
        X = model.get_input()  # full model
        Y = ntm.get_output()    # this layer
        F = theano.function([X], Y, allow_input_downcast=True)
        [memory, read_address, write_address, rnn_state] = F(x)

        if inner_rnn == "lstm" use it as
        [memory, read_address, write_address, rnn_cell, rnn_state] = F(x)

        """
        # input shape: (nb_samples, time (padded with zeros), input_dim)
        X = self.get_input(train)
        assert K.ndim(X) == 3
        if K._BACKEND == 'tensorflow':
            if not self.input_shape[1]:
                raise Exception('When using TensorFlow, you should define ' +
                                'explicitely the number of timesteps of ' +
                                'your sequences. Make sure the first layer ' +
                                'has a "batch_input_shape" argument ' +
                                'including the samples axis.')

        mask = self.get_output_mask(train)
        if mask:
            # apply mask
            X *= K.cast(K.expand_dims(mask), X.dtype)
            masking = True
        else:
            masking = False

        if self.stateful:
            initial_states = self.states
        else:
            initial_states = self.get_initial_states(X)

        states = rnn_states(self.step,
                            X,
                            initial_states,
                            go_backwards=self.go_backwards,
                            masking=masking)
        return states

    def step(self, x, states):
        M_tm1, wr_tm1, ww_tm1 = states[:3]
        # reshape
        M_tm1 = M_tm1.reshape((x.shape[0], self.n_slots, self.m_length))
        # read
        h_tm1 = states[3:]
        k_read, beta_read, g_read, gamma_read, s_read = self._get_controller_output(
            h_tm1[0], self.W_k_read, self.b_k_read, self.W_c_read,
            self.b_c_read, self.W_s_read, self.b_s_read)
        wc_read = self._get_content_w(beta_read, k_read, M_tm1)
        wr_t = self._get_location_w(g_read, s_read, self.C, gamma_read,
                                    wc_read, wr_tm1)
        M_read = self._read(wr_t, M_tm1)

        # update controller
        h_t = _update_controller(self, x, h_tm1, M_read)

        # write
        k_write, beta_write, g_write, gamma_write, s_write = self._get_controller_output(
            h_t[0], self.W_k_write, self.b_k_write, self.W_c_write,
            self.b_c_write, self.W_s_write, self.b_s_write)
        wc_write = self._get_content_w(beta_write, k_write, M_tm1)
        ww_t = self._get_location_w(g_write, s_write, self.C, gamma_write,
                                    wc_write, ww_tm1)
        e = T.nnet.sigmoid(T.dot(h_t[0], self.W_e) + self.b_e)
        a = T.tanh(T.dot(h_t[0], self.W_a) + self.b_a)
        M_t = self._write(ww_t, e, a, M_tm1)

        M_t = M_t.flatten(ndim=2)

        return h_t[0], [M_t, wr_t, ww_t] + h_t
Пример #5
0
class DRAW(Recurrent):
    '''DRAW

    Parameters:
    ===========
    output_dim : encoder/decoder dimension
    code_dim : random sample dimension (reparametrization trick output)
    input_shape : (n_channels, rows, cols)
    N_enc : Size of the encoder's filter bank (MNIST default: 2)
    N_dec : Size of the decoder's filter bank (MNIST default: 5)
    n_steps : number of sampling steps (or how long it takes to draw, default 64)
    inner_rnn : str with rnn type ('gru' default)
    truncate_gradient : int (-1 default)
    return_sequences : bool (False default)
    '''
    theano_rng = theano_rng()

    def __init__(self, output_dim, code_dim, N_enc=2, N_dec=5, n_steps=64,
                 inner_rnn='gru', truncate_gradient=-1, return_sequences=False,
                 canvas_activation=T.nnet.sigmoid, init='glorot_uniform',
                 inner_init='orthogonal', input_shape=None, **kwargs):
        self.output_dim = output_dim  # this is 256 for MNIST
        self.code_dim = code_dim  # this is 100 for MNIST
        self.N_enc = N_enc
        self.N_dec = N_dec
        self.truncate_gradient = truncate_gradient
        self.return_sequences = return_sequences
        self.n_steps = n_steps
        self.canvas_activation = canvas_activation
        self.init = init
        self.inner_init = inner_init
        self.inner_rnn = inner_rnn

        self.height = input_shape[1]
        self.width = input_shape[2]

        self._input_shape = input_shape
        super(DRAW, self).__init__(**kwargs)

    def build(self):
        self.input = T.tensor4()

        if self.inner_rnn == 'gru':
            self.enc = GRU(
                input_length=self.n_steps,
                input_dim=self._input_shape[0]*2*self.N_enc**2 + self.output_dim,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
            self.dec = GRU(
                input_length=self.n_steps,
                input_dim=self.code_dim, output_dim=self.output_dim,
                init=self.init,
                inner_init=self.inner_init)

        elif self.inner_rnn == 'lstm':
            self.enc = LSTM(
                input_length=self.n_steps,
                input_dim=self._input_shape[0]*2*self.N_enc**2 + self.output_dim,
                output_dim=self.output_dim, init=self.init, inner_init=self.inner_init)
            self.dec = LSTM(
                input_length=self.n_steps,
                input_dim=self.code_dim, output_dim=self.output_dim,
                init=self.init, inner_init=self.inner_init)
        else:
            raise ValueError('This type of inner_rnn is not supported')

        self.enc.build()
        self.dec.build()

        self.init_canvas = shared_zeros(self._input_shape)  # canvas and hidden state
        self.init_h_enc = shared_zeros((self.output_dim))  # initial values
        self.init_h_dec = shared_zeros((self.output_dim))  # should be trained
        self.L_enc = self.enc.init((self.output_dim, 5))  # "read" attention parameters (eq. 21)
        self.L_dec = self.enc.init((self.output_dim, 5))  # "write" attention parameters (eq. 28)
        self.b_enc = shared_zeros((5))  # "read" attention parameters (eq. 21)
        self.b_dec = shared_zeros((5))  # "write" attention parameters (eq. 28)
        self.W_patch = self.enc.init((self.output_dim, self.N_dec**2*self._input_shape[0]))
        self.b_patch = shared_zeros((self.N_dec**2*self._input_shape[0]))
        self.W_mean = self.enc.init((self.output_dim, self.code_dim))
        self.W_sigma = self.enc.init((self.output_dim, self.code_dim))
        self.b_mean = shared_zeros((self.code_dim))
        self.b_sigma = shared_zeros((self.code_dim))
        self.trainable_weights = self.enc.trainable_weights + self.dec.trainable_weights + [
            self.L_enc, self.L_dec, self.b_enc, self.b_dec, self.W_patch,
            self.b_patch, self.W_mean, self.W_sigma, self.b_mean, self.b_sigma,
            self.init_canvas, self.init_h_enc, self.init_h_dec]

        if self.inner_rnn == 'lstm':
            self.init_cell_enc = shared_zeros((self.output_dim))     # initial values
            self.init_cell_dec = shared_zeros((self.output_dim))     # should be trained
            self.trainable_weights = self.trainable_weights + [self.init_cell_dec, self.init_cell_enc]

    def set_previous(self, layer, connection_map={}):
        self.previous = layer
        self.build()
        self.init_updates()

    def init_updates(self):
        self.get_output(train=True)  # populate regularizers list

    def _get_attention.trainable_weights(self, h, L, b, N):
        p = T.dot(h, L) + b
        gx = self.width * (p[:, 0]+1) / 2.
        gy = self.height * (p[:, 1]+1) / 2.
        sigma2 = T.exp(p[:, 2])
        delta = T.exp(p[:, 3]) * (max(self.width, self.height) - 1) / (N - 1.)
        gamma = T.exp(p[:, 4])
        return gx, gy, sigma2, delta, gamma
Пример #6
0
class Stack(Recurrent):
    """ Stack and queue network
    
    
    output_dim = output dimension
    n_slots = number of memory slot
    m_length = dimention of the memory
    rnn_size = output length of the memory controler
    inner_rnn = "lstm" only lstm is supported 
    stack = True to create neural stack or False to create neural queue
    
    
    from Learning to Transduce with Unbounded Memory
    [[http://arxiv.org/pdf/1506.02516.pdf]]
    """
    def __init__(self,
                 output_dim,
                 n_slots,
                 m_length,
                 inner_rnn='lstm',
                 rnn_size=64,
                 stack=True,
                 init='glorot_uniform',
                 inner_init='orthogonal',
                 input_dim=None,
                 input_length=None,
                 **kwargs):
        self.output_dim = output_dim
        self.n_slots = n_slots + 1  # because we start at time 1
        self.m_length = m_length
        self.init = init
        self.inner_init = inner_init
        if inner_rnn != "lstm":
            print "Only lstm is supported"
            raise
        self.inner_rnn = inner_rnn
        self.rnn_size = rnn_size
        self.stack = stack

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(Stack, self).__init__(**kwargs)

    def build(self, input_shape):
        self.input_spec = [InputSpec(shape=input_shape)]
        input_leng, input_dim = input_shape[1:]

        if self.inner_rnn == 'gru':
            self.rnn = GRU(activation='relu',
                           input_dim=input_dim + self.m_length,
                           input_length=input_leng,
                           output_dim=self.output_dim,
                           init=self.init,
                           inner_init=self.inner_init,
                           consume_less='gpu',
                           name="{}_inner_rnn".format(self.name))
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(input_dim=input_dim + self.m_length,
                            input_length=input_leng,
                            output_dim=self.rnn_size,
                            init=self.init,
                            forget_bias_init='zero',
                            inner_init=self.inner_init,
                            consume_less='gpu',
                            name="{}_inner_rnn".format(self.name))
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        inner_shape = list(input_shape)
        inner_shape[-1] = input_dim + self.m_length
        self.rnn.build(inner_shape)

        self.init_h = K.zeros((self.rnn_size),
                              name="{}_init_h".format(self.name))

        self.W_d = self.rnn.init((self.rnn_size, 1),
                                 name="{}_W_d".format(self.name))
        self.W_u = self.rnn.init((self.rnn_size, 1),
                                 name="{}_W_u".format(self.name))

        self.W_v = self.rnn.init((self.rnn_size, self.m_length),
                                 name="{}_W_v".format(self.name))
        self.W_o = self.rnn.init((self.rnn_size, self.output_dim),
                                 name="{}_W_o".format(self.name))

        self.b_d = K.zeros((1, ), name="{}_b_d".format(self.name))
        self.b_u = K.zeros((1, ), name="{}_b_u".format(self.name))
        self.b_v = K.zeros((self.m_length, ), name="{}_b_v".format(self.name))
        self.b_o = K.zeros((self.output_dim, ),
                           name="{}_b_o".format(self.name))

        self.trainable_weights = self.rnn.trainable_weights + [
            self.W_d, self.b_d, self.W_v, self.b_v, self.W_u, self.b_u,
            self.W_o, self.b_o, self.init_h
        ]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.rnn_size),
                                  name="{}_init_c".format(self.name))
            self.trainable_weights = self.trainable_weights + [
                self.init_c,
            ]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weight

    def get_initial_states(self, X):

        batch_size = X.shape[0]

        init_r = K.zeros((self.m_length)).dimshuffle('x', 0).repeat(batch_size,
                                                                    axis=0)
        init_V = K.zeros(
            (self.n_slots, self.m_length)).dimshuffle('x', 0,
                                                      1).repeat(batch_size,
                                                                axis=0)
        init_S = K.zeros((self.n_slots)).dimshuffle('x', 0).repeat(batch_size,
                                                                   axis=0)
        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)

        itime = K.zeros((1, ), dtype=np.int32)

        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size,
                                                             axis=0)
            return [init_r, init_V, init_S, itime, init_h, init_c]

    def get_output_shape_for(self, input_shape):
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def step(self, x, states):
        r_tm1, V_tm1, s_tm1, time = states[:4]
        h_tm1 = states[4:]

        op_t, h_t = _update_controller(self, T.concatenate([x, r_tm1],
                                                           axis=-1), h_tm1)

        d_t = K.sigmoid(K.dot(op_t, self.W_d) + self.b_d)
        u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)
        v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v)
        o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o)

        time = time + 1
        V_t, s_t, r_t = _update_neural_stack(self,
                                             V_tm1,
                                             s_tm1,
                                             d_t[::, 0],
                                             u_t[::, 0],
                                             v_t,
                                             time[0],
                                             stack=self.stack)

        return o_t, [r_t, V_t, s_t, time] + h_t

    def get_config(self):
        config = {
            'output_dim': self.output_dim,
            'n_slots': self.n_slots,
            'm_length': self.m_length,
            'init': self.init,
            'inner_init': self.inner_init,
            'inner_rnn ': self.inner_rnn,
            'rnn_size': self.rnn_size,
            'stack': self.stack
        }
        base_config = super(Stack, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
Пример #7
0
class NeuralTuringMachine(Recurrent):
    """ Neural Turing Machines

    Non obvious parameter:
    ----------------------
    shift_range: int, number of available shifts, ex. if 3, avilable shifts are
                 (-1, 0, 1)
    n_slots: number of memory locations
    m_length: memory length at each location

    Known issues:
    -------------
    Theano may complain when n_slots == 1.

    """
    def __init__(self, output_dim, n_slots, m_length, shift_range=3,
                 inner_rnn='gru',
                 init='glorot_uniform', inner_init='orthogonal',
                 input_dim=None, input_length=None, **kwargs):
        self.output_dim = output_dim
        self.n_slots = n_slots
        self.m_length = m_length
        self.shift_range = shift_range
        self.init = init
        self.inner_init = inner_init
        self.inner_rnn = inner_rnn

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(NeuralTuringMachine, self).__init__(**kwargs)

    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(
                activation='relu',
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                forget_bias_init='zero',
                inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()

        # initial memory, state, read and write vecotrs
        self.M = theano.shared((.001*np.ones((1,)).astype(floatX)))
        self.init_h = K.zeros((self.output_dim))
        self.init_wr = self.rnn.init((self.n_slots,))
        self.init_ww = self.rnn.init((self.n_slots,))

        # write
        self.W_e = self.rnn.init((self.output_dim, self.m_length))  # erase
        self.b_e = K.zeros((self.m_length))
        self.W_a = self.rnn.init((self.output_dim, self.m_length))  # add
        self.b_a = K.zeros((self.m_length))

        # get_w  parameters for reading operation
        self.W_k_read = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_read = self.rnn.init((self.m_length, ))
        self.W_c_read = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_read = K.zeros((3))
        self.W_s_read = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_read = K.zeros((self.shift_range))  # b_s lol! not intentional

        # get_w  parameters for writing operation
        self.W_k_write = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_write = self.rnn.init((self.m_length, ))
        self.W_c_write = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_write = K.zeros((3))
        self.W_s_write = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_write = K.zeros((self.shift_range))

        self.C = _circulant(self.n_slots, self.shift_range)

        self.trainable_weights = self.rnn.trainable_weights + [
            self.W_e, self.b_e,
            self.W_a, self.b_a,
            self.W_k_read, self.b_k_read,
            self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read,
            self.W_k_write, self.b_k_write,
            self.W_s_write, self.b_s_write,
            self.W_c_write, self.b_c_write,
            self.M,
            self.init_h, self.init_wr, self.init_ww]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.output_dim))
            self.trainable_weights = self.trainable_weights + [self.init_c, ]

    def _read(self, w, M):
        return (w[:, :, None]*M).sum(axis=1)

    def _write(self, w, e, a, M):
        Mtilda = M * (1 - w[:, :, None]*e[:, None, :])
        Mout = Mtilda + w[:, :, None]*a[:, None, :]
        return Mout

    def _get_content_w(self, beta, k, M):
        num = beta[:, None] * _cosine_distance(M, k)
        return _softmax(num)

    def _get_location_w(self, g, s, C, gamma, wc, w_tm1):
        wg = g[:, None] * wc + (1-g[:, None])*w_tm1
        Cs = (C[None, :, :, :] * wg[:, None, None, :]).sum(axis=3)
        wtilda = (Cs * s[:, :, None]).sum(axis=1)
        wout = _renorm(wtilda ** gamma[:, None])
        return wout

    def _get_controller_output(self, h, W_k, b_k, W_c, b_c, W_s, b_s):
        k = T.tanh(T.dot(h, W_k) + b_k)  # + 1e-6
        c = T.dot(h, W_c) + b_c
        beta = T.nnet.relu(c[:, 0]) + 1e-4
        g = T.nnet.sigmoid(c[:, 1])
        gamma = T.nnet.relu(c[:, 2]) + 1.0001
        s = T.nnet.softmax(T.dot(h, W_s) + b_s)
        return k, beta, g, gamma, s

    def get_initial_states(self, X):
        batch_size = X.shape[0]
        init_M = self.M.dimshuffle(0, 'x', 'x').repeat(
            batch_size, axis=0).repeat(self.n_slots, axis=1).repeat(
            self.m_length, axis=2)
        init_M = init_M.flatten(ndim=2)

        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_wr = self.init_wr.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_ww = self.init_ww.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
            return [init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww),
                    init_h, init_c]
        else:
            return [init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww),
                    init_h]

    @property
    def output_shape(self):
        input_shape = self.input_shape
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def get_full_output(self, train=False):
        """
        This method is for research and visualization purposes. Use it as
        X = model.get_input()  # full model
        Y = ntm.get_output()    # this layer
        F = theano.function([X], Y, allow_input_downcast=True)
        [memory, read_address, write_address, rnn_state] = F(x)

        if inner_rnn == "lstm" use it as
        [memory, read_address, write_address, rnn_cell, rnn_state] = F(x)

        """
        # input shape: (nb_samples, time (padded with zeros), input_dim)
        X = self.get_input(train)
        assert K.ndim(X) == 3
        if K._BACKEND == 'tensorflow':
            if not self.input_shape[1]:
                raise Exception('When using TensorFlow, you should define ' +
                                'explicitely the number of timesteps of ' +
                                'your sequences. Make sure the first layer ' +
                                'has a "batch_input_shape" argument ' +
                                'including the samples axis.')

        mask = self.get_output_mask(train)
        if mask:
            # apply mask
            X *= K.cast(K.expand_dims(mask), X.dtype)
            masking = True
        else:
            masking = False

        if self.stateful:
            initial_states = self.states
        else:
            initial_states = self.get_initial_states(X)

        states = rnn_states(self.step, X, initial_states,
                            go_backwards=self.go_backwards,
                            masking=masking)
        return states

    def step(self, x, states):
        M_tm1, wr_tm1, ww_tm1 = states[:3]
        # reshape
        M_tm1 = M_tm1.reshape((x.shape[0], self.n_slots, self.m_length))
        # read
        h_tm1 = states[3:]
        k_read, beta_read, g_read, gamma_read, s_read = self._get_controller_output(
            h_tm1[0], self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read)
        wc_read = self._get_content_w(beta_read, k_read, M_tm1)
        wr_t = self._get_location_w(g_read, s_read, self.C, gamma_read,
                                    wc_read, wr_tm1)
        M_read = self._read(wr_t, M_tm1)

        # update controller
        h_t = _update_controller(self, x, h_tm1, M_read)

        # write
        k_write, beta_write, g_write, gamma_write, s_write = self._get_controller_output(
            h_t[0], self.W_k_write, self.b_k_write, self.W_c_write,
            self.b_c_write, self.W_s_write, self.b_s_write)
        wc_write = self._get_content_w(beta_write, k_write, M_tm1)
        ww_t = self._get_location_w(g_write, s_write, self.C, gamma_write,
                                    wc_write, ww_tm1)
        e = T.nnet.sigmoid(T.dot(h_t[0], self.W_e) + self.b_e)
        a = T.tanh(T.dot(h_t[0], self.W_a) + self.b_a)
        M_t = self._write(ww_t, e, a, M_tm1)

        M_t = M_t.flatten(ndim=2)

        return h_t[0], [M_t, wr_t, ww_t] + h_t
Пример #8
0
class NeuralTuringMachine(Recurrent):
    """ Neural Turing Machines
    
    Parameters:
    -----------
    shift_range: int, number of available shifts, ex. if 3, avilable shifts are
                 (-1, 0, 1)
    n_slots: number of memory locations
    m_length: memory length at each location
    inner_rnn: str, supported values are 'gru' and 'lstm'
    output_dim: hidden state size (RNN controller output_dim)

    Known issues and TODO:
    ----------------------
    Theano may complain when n_slots == 1.
    Add multiple reading and writing heads.

    """
    def __init__(self,
                 output_dim,
                 n_slots,
                 m_length,
                 shift_range=3,
                 inner_rnn='gru',
                 truncate_gradient=-1,
                 return_sequences=False,
                 init='glorot_uniform',
                 inner_init='orthogonal',
                 input_dim=None,
                 input_length=None,
                 **kwargs):
        self.output_dim = output_dim
        self.n_slots = n_slots
        self.m_length = m_length
        self.shift_range = shift_range
        self.init = init
        self.inner_init = inner_init
        self.inner_rnn = inner_rnn
        self.return_sequences = return_sequences
        self.truncate_gradient = truncate_gradient

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(NeuralTuringMachine, self).__init__(**kwargs)

    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(input_dim=input_dim + self.m_length,
                           input_length=input_leng,
                           output_dim=self.output_dim,
                           init=self.init,
                           inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(input_dim=input_dim + self.m_length,
                            input_length=input_leng,
                            output_dim=self.output_dim,
                            init=self.init,
                            inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()

        # initial memory, state, read and write vecotrs
        self.M = theano.shared((.001 * np.ones((1, )).astype(floatX)))
        self.init_h = shared_zeros((self.output_dim))
        self.init_wr = self.rnn.init((self.n_slots, ))
        self.init_ww = self.rnn.init((self.n_slots, ))

        # write
        self.W_e = self.rnn.init((self.output_dim, self.m_length))  # erase
        self.b_e = shared_zeros((self.m_length))
        self.W_a = self.rnn.init((self.output_dim, self.m_length))  # add
        self.b_a = shared_zeros((self.m_length))

        # get_w  parameters for reading operation
        self.W_k_read = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_read = self.rnn.init((self.m_length, ))
        self.W_c_read = self.rnn.init(
            (self.output_dim,
             3))  # 3 = beta, g, gamma see eq. 5, 7, 9 in Graves et. al 2014
        self.b_c_read = shared_zeros((3))
        self.W_s_read = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_read = shared_zeros((self.shift_range))

        # get_w  parameters for writing operation
        self.W_k_write = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_write = self.rnn.init((self.m_length, ))
        self.W_c_write = self.rnn.init(
            (self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_write = shared_zeros((3))
        self.W_s_write = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_write = shared_zeros((self.shift_range))

        self.C = _circulant(self.n_slots, self.shift_range)

        self.params = self.rnn.params + [
            self.W_e, self.b_e, self.W_a, self.b_a, self.W_k_read,
            self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read,
            self.b_s_read, self.W_k_write, self.b_k_write, self.W_s_write,
            self.b_s_write, self.W_c_write, self.b_c_write, self.M,
            self.init_h, self.init_wr, self.init_ww
        ]

        if self.inner_rnn == 'lstm':
            self.init_c = shared_zeros((self.output_dim))
            self.params = self.params + [
                self.init_c,
            ]

    def _read(self, w, M):
        return (w[:, :, None] * M).sum(axis=1)

    def _write(self, w, e, a, M, mask):
        Mtilda = M * (1 - w[:, :, None] * e[:, None, :])
        Mout = Mtilda + w[:, :, None] * a[:, None, :]
        return mask[:, None, None] * Mout + (1 - mask[:, None, None]) * M

    def _get_content_w(self, beta, k, M):
        num = beta[:, None] * _cosine_distance(M, k)
        return _softmax(num)

    def _get_location_w(self, g, s, C, gamma, wc, w_tm1, mask):
        wg = g[:, None] * wc + (1 - g[:, None]) * w_tm1
        Cs = (C[None, :, :, :] * wg[:, None, None, :]).sum(axis=3)
        wtilda = (Cs * s[:, :, None]).sum(axis=1)
        wout = _renorm(wtilda**gamma[:, None])
        return mask[:, None] * wout + (1 - mask[:, None]) * w_tm1

    def _get_controller_output(self, h, W_k, b_k, W_c, b_c, W_s, b_s):
        k = T.tanh(T.dot(h, W_k) + b_k)  # + 1e-6
        c = T.dot(h, W_c) + b_c
        beta = T.nnet.relu(c[:, 0]) + 1e-6
        g = T.nnet.sigmoid(c[:, 1])
        gamma = T.nnet.relu(c[:, 2]) + 1
        s = T.nnet.softmax(T.dot(h, W_s) + b_s)
        return k, beta, g, gamma, s

    def _get_initial_states(self, batch_size):
        init_M = self.M.dimshuffle(0, 'x', 'x').repeat(
            batch_size, axis=0).repeat(self.n_slots,
                                       axis=1).repeat(self.m_length, axis=2)

        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_wr = self.init_wr.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_ww = self.init_ww.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size,
                                                             axis=0)
            return init_M, T.nnet.softmax(init_wr), T.nnet.softmax(
                init_ww), init_h, init_c
        else:
            return init_M, T.nnet.softmax(init_wr), T.nnet.softmax(
                init_ww), init_h

    def _step(self, x, mask, M_tm1, wr_tm1, ww_tm1, *args):
        # read
        if self.inner_rnn == 'lstm':
            h_tm1 = args[0:2][::-1]  # (cell_tm1, h_tm1)
        else:
            h_tm1 = args[0:1]  # (h_tm1, )
        k_read, beta_read, g_read, gamma_read, s_read = self._get_controller_output(
            h_tm1[-1], self.W_k_read, self.b_k_read, self.W_c_read,
            self.b_c_read, self.W_s_read, self.b_s_read)
        wc_read = self._get_content_w(beta_read, k_read, M_tm1)
        wr_t = self._get_location_w(g_read, s_read, self.C, gamma_read,
                                    wc_read, wr_tm1, mask)
        M_read = self._read(wr_t, M_tm1)

        # update controller
        h_t = _update_controller(self, x, h_tm1, M_read, mask)

        # write
        k_write, beta_write, g_write, gamma_write, s_write = self._get_controller_output(
            h_t[-1], self.W_k_write, self.b_k_write, self.W_c_write,
            self.b_c_write, self.W_s_write, self.b_s_write)
        wc_write = self._get_content_w(beta_write, k_write, M_tm1)
        ww_t = self._get_location_w(g_write, s_write, self.C, gamma_write,
                                    wc_write, ww_tm1, mask)
        e = T.nnet.sigmoid(T.dot(h_t[-1], self.W_e) + self.b_e)
        a = T.tanh(T.dot(h_t[-1], self.W_a) + self.b_a)
        M_t = self._write(ww_t, e, a, M_tm1, mask)

        return (M_t, wr_t, ww_t) + h_t

    def get_output(self, train=False):
        outputs = self.get_full_output(train)

        if self.return_sequences:
            return outputs[-1]
        else:
            return outputs[-1][:, -1]

    @property
    def output_shape(self):
        input_shape = self.input_shape
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def get_full_output(self, train=False):
        """
        This method is for research and visualization purposes. Use it as:
        X = model.get_input()  # full model
        Y = ntm.get_output()    # this layer
        F = theano.function([X], Y, allow_input_downcast=True)
        [memory, read_address, write_address, rnn_state] = F(x)

        if inner_rnn == "lstm" use it as:
        [memory, read_address, write_address, rnn_cell, rnn_state] = F(x)

        """
        X = self.get_input(train)
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)[:, :, 0]
        X = X.dimshuffle((1, 0, 2))

        init_states = self._get_initial_states(X.shape[1])
        outputs, updates = theano.scan(
            self._step,
            sequences=[X, padded_mask],
            outputs_info=init_states,
            non_sequences=self.params,
            truncate_gradient=self.truncate_gradient)

        out = [
            outputs[0].dimshuffle((1, 0, 2, 3)),
            outputs[1].dimshuffle(1, 0, 2), outputs[2].dimshuffle(
                (1, 0, 2)), outputs[3].dimshuffle((1, 0, 2))
        ]
        if self.inner_rnn == 'lstm':
            out + [outputs[4].dimshuffle((1, 0, 2))]
        return out
Пример #9
0
class DRAW(Recurrent):
    '''DRAW

    Parameters:
    ===========
    output_dim : encoder/decoder dimension
    code_dim : random sample dimension (reparametrization trick output)
    input_shape : (n_channels, rows, cols)
    N_enc : Size of the encoder's filter bank (MNIST default: 2)
    N_dec : Size of the decoder's filter bank (MNIST default: 5)
    n_steps : number of sampling steps (or how long it takes to draw, default 64)
    inner_rnn : str with rnn type ('gru' default)
    truncate_gradient : int (-1 default)
    return_sequences : bool (False default)
    '''
    theano_rng = theano_rng()

    def __init__(self,
                 output_dim,
                 code_dim,
                 N_enc=2,
                 N_dec=5,
                 n_steps=64,
                 inner_rnn='gru',
                 truncate_gradient=-1,
                 return_sequences=False,
                 canvas_activation=T.nnet.sigmoid,
                 init='glorot_uniform',
                 inner_init='orthogonal',
                 input_shape=None,
                 **kwargs):
        self.output_dim = output_dim  # this is 256 for MNIST
        self.code_dim = code_dim  # this is 100 for MNIST
        self.N_enc = N_enc
        self.N_dec = N_dec
        self.truncate_gradient = truncate_gradient
        self.return_sequences = return_sequences
        self.n_steps = n_steps
        self.canvas_activation = canvas_activation
        self.init = init
        self.inner_init = inner_init
        self.inner_rnn = inner_rnn

        self.height = input_shape[1]
        self.width = input_shape[2]

        self._input_shape = input_shape
        super(DRAW, self).__init__(**kwargs)

    def build(self):
        self.input = T.tensor4()

        if self.inner_rnn == 'gru':
            self.enc = GRU(input_length=self.n_steps,
                           input_dim=self._input_shape[0] * 2 * self.N_enc**2 +
                           self.output_dim,
                           output_dim=self.output_dim,
                           init=self.init,
                           inner_init=self.inner_init)
            self.dec = GRU(input_length=self.n_steps,
                           input_dim=self.code_dim,
                           output_dim=self.output_dim,
                           init=self.init,
                           inner_init=self.inner_init)

        elif self.inner_rnn == 'lstm':
            self.enc = LSTM(
                input_length=self.n_steps,
                input_dim=self._input_shape[0] * 2 * self.N_enc**2 +
                self.output_dim,
                output_dim=self.output_dim,
                init=self.init,
                inner_init=self.inner_init)
            self.dec = LSTM(input_length=self.n_steps,
                            input_dim=self.code_dim,
                            output_dim=self.output_dim,
                            init=self.init,
                            inner_init=self.inner_init)
        else:
            raise ValueError('This type of inner_rnn is not supported')

        self.enc.build()
        self.dec.build()

        self.init_canvas = shared_zeros(
            self._input_shape)  # canvas and hidden state
        self.init_h_enc = shared_zeros((self.output_dim))  # initial values
        self.init_h_dec = shared_zeros((self.output_dim))  # should be trained
        self.L_enc = self.enc.init(
            (self.output_dim, 5))  # "read" attention parameters (eq. 21)
        self.L_dec = self.enc.init(
            (self.output_dim, 5))  # "write" attention parameters (eq. 28)
        self.b_enc = shared_zeros((5))  # "read" attention parameters (eq. 21)
        self.b_dec = shared_zeros((5))  # "write" attention parameters (eq. 28)
        self.W_patch = self.enc.init(
            (self.output_dim, self.N_dec**2 * self._input_shape[0]))
        self.b_patch = shared_zeros((self.N_dec**2 * self._input_shape[0]))
        self.W_mean = self.enc.init((self.output_dim, self.code_dim))
        self.W_sigma = self.enc.init((self.output_dim, self.code_dim))
        self.b_mean = shared_zeros((self.code_dim))
        self.b_sigma = shared_zeros((self.code_dim))
        self.trainable_weights = self.enc.trainable_weights + self.dec.trainable_weights + [
            self.L_enc, self.L_dec, self.b_enc, self.b_dec, self.W_patch,
            self.b_patch, self.W_mean, self.W_sigma, self.b_mean, self.b_sigma,
            self.init_canvas, self.init_h_enc, self.init_h_dec
        ]

        if self.inner_rnn == 'lstm':
            self.init_cell_enc = shared_zeros(
                (self.output_dim))  # initial values
            self.init_cell_dec = shared_zeros(
                (self.output_dim))  # should be trained
            self.trainable_weights = self.trainable_weights + [
                self.init_cell_dec, self.init_cell_enc
            ]

    def set_previous(self, layer, connection_map={}):
        self.previous = layer
        self.build()
        self.init_updates()

    def init_updates(self):
        self.get_output(train=True)  # populate regularizers list

    def _get_attention_trainable_weights(self, h, L, b, N):
        p = T.dot(h, L) + b
        gx = self.width * (p[:, 0] + 1) / 2.
        gy = self.height * (p[:, 1] + 1) / 2.
        sigma2 = T.exp(p[:, 2])
        delta = T.exp(p[:, 3]) * (max(self.width, self.height) - 1) / (N - 1.)
        gamma = T.exp(p[:, 4])
        return gx, gy, sigma2, delta, gamma

    def _get_filterbank(self, gx, gy, sigma2, delta, N):
        small = 1e-4
        i = T.arange(N)
        a = T.arange(self.width)
        b = T.arange(self.height)

        mx = gx[:, None] + delta[:, None] * (i - N / 2. - .5)
        my = gy[:, None] + delta[:, None] * (i - N / 2. - .5)

        Fx = T.exp(-(a - mx[:, :, None])**2 / 2. / sigma2[:, None, None])
        Fx /= (Fx.sum(axis=-1)[:, :, None] + small)
        Fy = T.exp(-(b - my[:, :, None])**2 / 2. / sigma2[:, None, None])
        Fy /= (Fy.sum(axis=-1)[:, :, None] + small)
        return Fx, Fy

    def _read(self, x, gamma, Fx, Fy):
        Fyx = (Fy[:, None, :, :, None] * x[:, :, None, :, :]).sum(axis=3)
        FxT = Fx.dimshuffle(0, 2, 1)
        FyxFx = (Fyx[:, :, :, :, None] * FxT[:, None, None, :, :]).sum(axis=3)
        return gamma[:, None, None, None] * FyxFx

    def _get_patch(self, h):
        write_patch = T.dot(h, self.W_patch) + self.b_patch
        write_patch = write_patch.reshape(
            (h.shape[0], self._input_shape[0], self.N_dec, self.N_dec))
        return write_patch

    def _write(self, write_patch, gamma, Fx, Fy):
        Fyx = (Fy[:, None, :, :, None] *
               write_patch[:, :, :, None, :]).sum(axis=2)
        FyxFx = (Fyx[:, :, :, :, None] * Fx[:, None, None, :, :]).sum(axis=3)
        return FyxFx / gamma[:, None, None, None]

    def _get_sample(self, h, eps):
        mean = T.dot(h, self.W_mean) + self.b_mean
        # eps = self.theano_rng.normal(avg=0., std=1., size=mean.shape)
        logsigma = T.dot(h, self.W_sigma) + self.b_sigma
        sigma = T.exp(logsigma)
        if self._train_state:
            sample = mean + eps * sigma
        else:
            sample = mean + 0 * eps * sigma
        kl = -.5 - logsigma + .5 * (mean**2 + sigma**2)
        # kl = .5 * (mean**2 + sigma**2 - logsigma - 1)
        return sample, kl.sum(axis=-1)

    def _get_rnn_input(self, x, rnn):
        if self.inner_rnn == 'gru':
            x_z = T.dot(x, rnn.W_z) + rnn.b_z
            x_r = T.dot(x, rnn.W_r) + rnn.b_r
            x_h = T.dot(x, rnn.W_h) + rnn.b_h
            return x_z, x_r, x_h

        elif self.inner_rnn == 'lstm':
            xi = T.dot(x, rnn.W_i) + rnn.b_i
            xf = T.dot(x, rnn.W_f) + rnn.b_f
            xc = T.dot(x, rnn.W_c) + rnn.b_c
            xo = T.dot(x, rnn.W_o) + rnn.b_o
            return xi, xf, xc, xo

    def _get_rnn_state(self, rnn, *args):
        mask = 1.  # no masking
        if self.inner_rnn == 'gru':
            x_z, x_r, x_h, h_tm1 = args
            h = rnn._step(x_z, x_r, x_h, mask, h_tm1, rnn.U_z, rnn.U_r,
                          rnn.U_h)
            return h
        elif self.inner_rnn == 'lstm':
            xi, xf, xc, xo, h_tm1, cell_tm1 = args
            h, cell = rnn._step(xi, xf, xo, xc, mask, h_tm1, cell_tm1, rnn.U_i,
                                rnn.U_f, rnn.U_o, rnn.U_c)
            return h, cell

    def _get_initial_states(self, X):
        batch_size = X.shape[0]
        canvas = self.init_canvas.dimshuffle('x', 0, 1, 2).repeat(batch_size,
                                                                  axis=0)
        init_enc = self.init_h_enc.dimshuffle('x', 0).repeat(batch_size,
                                                             axis=0)
        init_dec = self.init_h_dec.dimshuffle('x', 0).repeat(batch_size,
                                                             axis=0)
        if self.inner_rnn == 'lstm':
            init_cell_enc = self.init_cell_enc.dimshuffle('x',
                                                          0).repeat(batch_size,
                                                                    axis=0)
            init_cell_dec = self.init_cell_dec.dimshuffle('x',
                                                          0).repeat(batch_size,
                                                                    axis=0)
            return canvas, init_enc, init_cell_enc, init_cell_dec
        else:
            return canvas, init_enc, init_dec

    def _step(self, eps, canvas, h_enc, h_dec, x, *args):
        x_hat = x - self.canvas_activation(canvas)
        gx, gy, sigma2, delta, gamma = self._get_attention_trainable_weights(
            h_dec, self.L_enc, self.b_enc, self.N_enc)
        Fx, Fy = self._get_filterbank(gx, gy, sigma2, delta, self.N_enc)
        read_x = self._read(x, gamma, Fx, Fy).flatten(ndim=2)
        read_x_hat = self._read(x_hat, gamma, Fx, Fy).flatten(ndim=2)
        enc_input = T.concatenate([read_x, read_x_hat, h_dec], axis=-1)

        x_enc_z, x_enc_r, x_enc_h = self._get_rnn_input(enc_input, self.enc)
        new_h_enc = self._get_rnn_state(self.enc, x_enc_z, x_enc_r, x_enc_h,
                                        h_enc)
        sample, kl = self._get_sample(new_h_enc, eps)

        x_dec_z, x_dec_r, x_dec_h = self._get_rnn_input(sample, self.dec)
        new_h_dec = self._get_rnn_state(self.dec, x_dec_z, x_dec_r, x_dec_h,
                                        h_dec)

        gx_w, gy_w, sigma2_w, delta_w, gamma_w = self._get_attention_trainable_weights(
            new_h_dec, self.L_dec, self.b_dec, self.N_dec)
        Fx_w, Fy_w = self._get_filterbank(gx_w, gy_w, sigma2_w, delta_w,
                                          self.N_dec)
        write_patch = self._get_patch(new_h_dec)
        new_canvas = canvas + self._write(write_patch, gamma_w, Fx_w, Fy_w)
        return new_canvas, new_h_enc, new_h_dec, kl

    def _step_lstm(self, eps, canvas, h_enc, cell_enc, h_dec, cell_dec, x,
                   *args):
        x_hat = x - self.canvas_activation(canvas)
        gx, gy, sigma2, delta, gamma = self._get_attention_trainable_weights(
            h_dec, self.L_enc, self.b_enc, self.N_enc)
        Fx, Fy = self._get_filterbank(gx, gy, sigma2, delta, self.N_enc)
        read_x = self._read(x, gamma, Fx, Fy).flatten(ndim=2)
        read_x_hat = self._read(x_hat, gamma, Fx, Fy).flatten(ndim=2)
        enc_input = T.concatenate(
            [read_x, read_x_hat, h_dec.flatten(ndim=2)], axis=1)

        x_enc_i, x_enc_f, x_enc_c, x_enc_o = self._get_rnn_input(
            enc_input, self.enc)
        new_h_enc, new_cell_enc = self._get_rnn_state(self.enc, x_enc_i,
                                                      x_enc_f, x_enc_c,
                                                      x_enc_o, h_enc, cell_enc)
        sample, kl = self._get_sample(new_h_enc, eps)

        x_dec_i, x_dec_f, x_dec_c, x_dec_o = self._get_rnn_input(
            sample, self.dec)
        new_h_dec, new_cell_dec = self._get_rnn_state(self.dec, x_dec_i,
                                                      x_dec_f, x_dec_c,
                                                      x_dec_o, h_dec, cell_dec)

        gx_w, gy_w, sigma2_w, delta_w, gamma_w = self._get_attention_trainable_weights(
            new_h_dec, self.L_dec, self.b_dec, self.N_dec)
        Fx_w, Fy_w = self._get_filterbank(gx_w, gy_w, sigma2_w, delta_w,
                                          self.N_dec)
        write_patch = self._get_patch(new_h_dec)
        new_canvas = canvas + self._write(write_patch, gamma_w, Fx_w, Fy_w)
        return new_canvas, new_h_enc, new_cell_enc, new_h_dec, new_cell_dec, kl

    def get_output(self, train=False):
        self._train_state = train
        X, eps = self.get_input(train).values()
        eps = eps.dimshuffle(1, 0, 2)

        if self.inner_rnn == 'gru':
            outputs, updates = scan(
                self._step,
                sequences=eps,
                outputs_info=self._get_initial_states(X) + (None, ),
                non_sequences=[
                    X,
                ] + self.trainable_weights,
                # n_steps=self.n_steps,
                truncate_gradient=self.truncate_gradient)

        elif self.inner_rnn == 'lstm':
            outputs, updates = scan(self._step_lstm,
                                    sequences=eps,
                                    outputs_info=self._get_initial_states(X) +
                                    (None, ),
                                    non_sequences=[
                                        X,
                                    ] + self.trainable_weights,
                                    truncate_gradient=self.truncate_gradient)

        kl = outputs[-1].sum(axis=0).mean()
        if train:
            # self.updates = updates
            self.regularizers = [
                SimpleCost(kl),
            ]
        if self.return_sequences:
            return [outputs[0].dimshuffle(1, 0, 2, 3, 4), kl]
        else:
            return [outputs[0][-1], kl]
Пример #10
0
class Stack(Recurrent):
    """ Stack and queue network
    
    
    output_dim = output dimension
    n_slots = number of memory slot
    m_length = dimention of the memory
    rnn_size = output length of the memory controler
    inner_rnn = "lstm" only lstm is supported 
    stack = True to create neural stack or False to create neural queue
    
    
    from Learning to Transduce with Unbounded Memory
    [[http://arxiv.org/pdf/1506.02516.pdf]]
    """
    def __init__(self, output_dim, n_slots, m_length,
                 inner_rnn='lstm',rnn_size=64, stack=True,
                 init='glorot_uniform', inner_init='orthogonal',
                 input_dim=None, input_length=None, **kwargs):
        self.output_dim = output_dim
        self.n_slots = n_slots + 1  # because we start at time 1
        self.m_length = m_length
        self.init = init
        self.inner_init = inner_init
        if inner_rnn != "lstm":
            print "Only lstm is supported"
            raise
        self.inner_rnn = inner_rnn
        self.rnn_size = rnn_size
        self.stack = stack

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(Stack, self).__init__(**kwargs)

    def build(self, input_shape):
        self.input_spec = [InputSpec(shape=input_shape)]
        input_leng, input_dim = input_shape[1:]

        if self.inner_rnn == 'gru':
            self.rnn = GRU(
                activation='relu',
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init, consume_less='gpu', name="{}_inner_rnn".format(self.name))
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.rnn_size, init=self.init,
                forget_bias_init='zero',
                inner_init=self.inner_init, consume_less='gpu', name="{}_inner_rnn".format(self.name))
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        inner_shape = list(input_shape)
        inner_shape[-1] = input_dim+self.m_length
        self.rnn.build(inner_shape)


        self.init_h = K.zeros((self.rnn_size), name="{}_init_h".format(self.name))

        self.W_d = self.rnn.init((self.rnn_size,1), name="{}_W_d".format(self.name))
        self.W_u = self.rnn.init((self.rnn_size,1), name="{}_W_u".format(self.name))

        self.W_v = self.rnn.init((self.rnn_size,self.m_length), name="{}_W_v".format(self.name))
        self.W_o = self.rnn.init((self.rnn_size,self.output_dim), name="{}_W_o".format(self.name))

        self.b_d = K.zeros((1,), name="{}_b_d".format(self.name))
        self.b_u = K.zeros((1,), name="{}_b_u".format(self.name))
        self.b_v = K.zeros((self.m_length,), name="{}_b_v".format(self.name))
        self.b_o = K.zeros((self.output_dim,), name="{}_b_o".format(self.name))

        
        self.trainable_weights = self.rnn.trainable_weights + [
           self.W_d, self.b_d,
           self.W_v, self.b_v,
           self.W_u,  self.b_u,
           self.W_o, self.b_o, self.init_h]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.rnn_size), name="{}_init_c".format(self.name))
            self.trainable_weights = self.trainable_weights + [self.init_c, ]
			
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weight

    def get_initial_states(self, X):

        batch_size = X.shape[0]
        
        init_r = K.zeros((self.m_length)).dimshuffle('x',0).repeat(batch_size,axis=0)
        init_V = K.zeros((self.n_slots,self.m_length)).dimshuffle('x',0,1).repeat(batch_size,axis=0)
        init_S = K.zeros((self.n_slots)).dimshuffle('x',0).repeat(batch_size,axis=0)
        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)

        itime = K.zeros((1,),dtype=np.int32)
        
        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
            return [init_r , init_V,init_S,itime,init_h,init_c]
      
    def get_output_shape_for(self, input_shape):
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def step(self, x, states):
        r_tm1, V_tm1,s_tm1,time = states[:4]
        h_tm1 = states[4:]

        op_t, h_t = _update_controller(self, T.concatenate([x, r_tm1], axis=-1),
                                             h_tm1)
              
        d_t = K.sigmoid( K.dot(op_t, self.W_d)  + self.b_d)  
        u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)
        v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v)
        o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) 
        
        time = time + 1
        V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], 
                                             u_t[::,0], v_t,time[0],stack=self.stack)

        return o_t, [r_t, V_t, s_t, time] + h_t

    def get_config(self):
        config = {'output_dim': self.output_dim,
		          'n_slots': self.n_slots,
                  'm_length': self.m_length,
                  'init': self.init,
                  'inner_init': self.inner_init,
                  'inner_rnn ': self.inner_rnn,
                  'rnn_size': self.rnn_size,
                  'stack': self.stack}
        base_config = super(Stack, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
Пример #11
0
class DRAW(Recurrent):
    '''DRAW

    Parameters:
    ===========
    output_dim : encoder/decoder dimension
    code_dim : random sample dimension (reparametrization trick output)
    input_shape : (n_channels, rows, cols)
    N_enc : Size of the encoder's filter bank (MNIST default: 2)
    N_dec : Size of the decoder's filter bank (MNIST default: 5)
    n_steps : number of sampling steps (or how long it takes to draw, default 64)
    inner_rnn : str with rnn type ('gru' default)
    truncate_gradient : int (-1 default)
    return_sequences : bool (False default)
    '''
    theano_rng = theano_rng()

    def __init__(self, output_dim, code_dim, N_enc=2, N_dec=5, n_steps=64,
                 inner_rnn='gru', truncate_gradient=-1, return_sequences=False,
                 canvas_activation=T.nnet.sigmoid, init='glorot_uniform',
                 inner_init='orthogonal', input_shape=None, **kwargs):
        self.output_dim = output_dim  # this is 256 for MNIST
        self.code_dim = code_dim  # this is 100 for MNIST
        self.N_enc = N_enc
        self.N_dec = N_dec
        self.truncate_gradient = truncate_gradient
        self.return_sequences = return_sequences
        self.n_steps = n_steps
        self.canvas_activation = canvas_activation
        self.init = init
        self.inner_init = inner_init
        self.inner_rnn = inner_rnn

        self.height = input_shape[1]
        self.width = input_shape[2]

        self._input_shape = input_shape
        super(DRAW, self).__init__(**kwargs)

    def build(self):
        self.input = T.tensor4()

        if self.inner_rnn == 'gru':
            self.enc = GRU(
                input_length=self.n_steps,
                input_dim=self._input_shape[0]*2*self.N_enc**2 + self.output_dim,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
            self.dec = GRU(
                input_length=self.n_steps,
                input_dim=self.code_dim, output_dim=self.output_dim,
                init=self.init,
                inner_init=self.inner_init)

        elif self.inner_rnn == 'lstm':
            self.enc = LSTM(
                input_length=self.n_steps,
                input_dim=self._input_shape[0]*2*self.N_enc**2 + self.output_dim,
                output_dim=self.output_dim, init=self.init, inner_init=self.inner_init)
            self.dec = LSTM(
                input_length=self.n_steps,
                input_dim=self.code_dim, output_dim=self.output_dim,
                init=self.init, inner_init=self.inner_init)
        else:
            raise ValueError('This type of inner_rnn is not supported')

        self.enc.build()
        self.dec.build()

        self.init_canvas = shared_zeros(self._input_shape)  # canvas and hidden state
        self.init_h_enc = shared_zeros((self.output_dim))  # initial values
        self.init_h_dec = shared_zeros((self.output_dim))  # should be trained
        self.L_enc = self.enc.init((self.output_dim, 5))  # "read" attention parameters (eq. 21)
        self.L_dec = self.enc.init((self.output_dim, 5))  # "write" attention parameters (eq. 28)
        self.b_enc = shared_zeros((5))  # "read" attention parameters (eq. 21)
        self.b_dec = shared_zeros((5))  # "write" attention parameters (eq. 28)
        self.W_patch = self.enc.init((self.output_dim, self.N_dec**2*self._input_shape[0]))
        self.b_patch = shared_zeros((self.N_dec**2*self._input_shape[0]))
        self.W_mean = self.enc.init((self.output_dim, self.code_dim))
        self.W_sigma = self.enc.init((self.output_dim, self.code_dim))
        self.b_mean = shared_zeros((self.code_dim))
        self.b_sigma = shared_zeros((self.code_dim))
        self.trainable_weights = self.enc.trainable_weights + self.dec.trainable_weights + [
            self.L_enc, self.L_dec, self.b_enc, self.b_dec, self.W_patch,
            self.b_patch, self.W_mean, self.W_sigma, self.b_mean, self.b_sigma,
            self.init_canvas, self.init_h_enc, self.init_h_dec]

        if self.inner_rnn == 'lstm':
            self.init_cell_enc = shared_zeros((self.output_dim))     # initial values
            self.init_cell_dec = shared_zeros((self.output_dim))     # should be trained
            self.trainable_weights = self.trainable_weights + [self.init_cell_dec, self.init_cell_enc]

    def set_previous(self, layer, connection_map={}):
        self.previous = layer
        self.build()
        self.init_updates()

    def init_updates(self):
        self.get_output(train=True)  # populate regularizers list

    def _get_attention_trainable_weights(self, h, L, b, N):
        p = T.dot(h, L) + b
        gx = self.width * (p[:, 0]+1) / 2.
        gy = self.height * (p[:, 1]+1) / 2.
        sigma2 = T.exp(p[:, 2])
        delta = T.exp(p[:, 3]) * (max(self.width, self.height) - 1) / (N - 1.)
        gamma = T.exp(p[:, 4])
        return gx, gy, sigma2, delta, gamma

    def _get_filterbank(self, gx, gy, sigma2, delta, N):
        small = 1e-4
        i = T.arange(N)
        a = T.arange(self.width)
        b = T.arange(self.height)

        mx = gx[:, None] + delta[:, None] * (i - N/2. - .5)
        my = gy[:, None] + delta[:, None] * (i - N/2. - .5)

        Fx = T.exp(-(a - mx[:, :, None])**2 / 2. / sigma2[:, None, None])
        Fx /= (Fx.sum(axis=-1)[:, :, None] + small)
        Fy = T.exp(-(b - my[:, :, None])**2 / 2. / sigma2[:, None, None])
        Fy /= (Fy.sum(axis=-1)[:, :, None] + small)
        return Fx, Fy

    def _read(self, x, gamma, Fx, Fy):
        Fyx = (Fy[:, None, :, :, None] * x[:, :, None, :, :]).sum(axis=3)
        FxT = Fx.dimshuffle(0, 2, 1)
        FyxFx = (Fyx[:, :, :, :, None] * FxT[:, None, None, :, :]).sum(axis=3)
        return gamma[:, None, None, None] * FyxFx

    def _get_patch(self, h):
        write_patch = T.dot(h, self.W_patch) + self.b_patch
        write_patch = write_patch.reshape((h.shape[0], self._input_shape[0],
                                           self.N_dec, self.N_dec))
        return write_patch

    def _write(self, write_patch, gamma, Fx, Fy):
        Fyx = (Fy[:, None, :, :, None] * write_patch[:, :, :, None, :]).sum(axis=2)
        FyxFx = (Fyx[:, :, :, :, None] * Fx[:, None, None, :, :]).sum(axis=3)
        return FyxFx / gamma[:, None, None, None]

    def _get_sample(self, h, eps):
        mean = T.dot(h, self.W_mean) + self.b_mean
        # eps = self.theano_rng.normal(avg=0., std=1., size=mean.shape)
        logsigma = T.dot(h, self.W_sigma) + self.b_sigma
        sigma = T.exp(logsigma)
        if self._train_state:
            sample = mean + eps * sigma
        else:
            sample = mean + 0 * eps * sigma
        kl = -.5 - logsigma + .5 * (mean**2 + sigma**2)
        # kl = .5 * (mean**2 + sigma**2 - logsigma - 1)
        return sample, kl.sum(axis=-1)

    def _get_rnn_input(self, x, rnn):
        if self.inner_rnn == 'gru':
            x_z = T.dot(x, rnn.W_z) + rnn.b_z
            x_r = T.dot(x, rnn.W_r) + rnn.b_r
            x_h = T.dot(x, rnn.W_h) + rnn.b_h
            return x_z, x_r, x_h

        elif self.inner_rnn == 'lstm':
            xi = T.dot(x, rnn.W_i) + rnn.b_i
            xf = T.dot(x, rnn.W_f) + rnn.b_f
            xc = T.dot(x, rnn.W_c) + rnn.b_c
            xo = T.dot(x, rnn.W_o) + rnn.b_o
            return xi, xf, xc, xo

    def _get_rnn_state(self, rnn, *args):
        mask = 1.  # no masking
        if self.inner_rnn == 'gru':
            x_z, x_r, x_h, h_tm1 = args
            h = rnn._step(x_z, x_r, x_h, mask, h_tm1,
                          rnn.U_z, rnn.U_r, rnn.U_h)
            return h
        elif self.inner_rnn == 'lstm':
            xi, xf, xc, xo, h_tm1, cell_tm1 = args
            h, cell = rnn._step(xi, xf, xo, xc, mask,
                                h_tm1, cell_tm1,
                                rnn.U_i, rnn.U_f, rnn.U_o, rnn.U_c)
            return h, cell

    def _get_initial_states(self, X):
        batch_size = X.shape[0]
        canvas = self.init_canvas.dimshuffle('x', 0, 1, 2).repeat(batch_size,
                                                                  axis=0)
        init_enc = self.init_h_enc.dimshuffle('x', 0).repeat(batch_size, axis=0)
        init_dec = self.init_h_dec.dimshuffle('x', 0).repeat(batch_size, axis=0)
        if self.inner_rnn == 'lstm':
            init_cell_enc = self.init_cell_enc.dimshuffle('x', 0).repeat(batch_size, axis=0)
            init_cell_dec = self.init_cell_dec.dimshuffle('x', 0).repeat(batch_size, axis=0)
            return canvas, init_enc, init_cell_enc, init_cell_dec
        else:
            return canvas, init_enc, init_dec

    def _step(self, eps, canvas, h_enc, h_dec, x, *args):
        x_hat = x - self.canvas_activation(canvas)
        gx, gy, sigma2, delta, gamma = self._get_attention_trainable_weights(
            h_dec, self.L_enc, self.b_enc, self.N_enc)
        Fx, Fy = self._get_filterbank(gx, gy, sigma2, delta, self.N_enc)
        read_x = self._read(x, gamma, Fx, Fy).flatten(ndim=2)
        read_x_hat = self._read(x_hat, gamma, Fx, Fy).flatten(ndim=2)
        enc_input = T.concatenate([read_x, read_x_hat, h_dec], axis=-1)

        x_enc_z, x_enc_r, x_enc_h = self._get_rnn_input(enc_input, self.enc)
        new_h_enc = self._get_rnn_state(self.enc, x_enc_z, x_enc_r, x_enc_h,
                                        h_enc)
        sample, kl = self._get_sample(new_h_enc, eps)

        x_dec_z, x_dec_r, x_dec_h = self._get_rnn_input(sample, self.dec)
        new_h_dec = self._get_rnn_state(self.dec, x_dec_z, x_dec_r, x_dec_h,
                                        h_dec)

        gx_w, gy_w, sigma2_w, delta_w, gamma_w = self._get_attention_trainable_weights(
            new_h_dec, self.L_dec, self.b_dec, self.N_dec)
        Fx_w, Fy_w = self._get_filterbank(gx_w, gy_w, sigma2_w, delta_w,
                                          self.N_dec)
        write_patch = self._get_patch(new_h_dec)
        new_canvas = canvas + self._write(write_patch, gamma_w, Fx_w, Fy_w)
        return new_canvas, new_h_enc, new_h_dec, kl

    def _step_lstm(self, eps, canvas, h_enc, cell_enc,
                   h_dec, cell_dec, x, *args):
        x_hat = x - self.canvas_activation(canvas)
        gx, gy, sigma2, delta, gamma = self._get_attention_trainable_weights(
            h_dec, self.L_enc, self.b_enc, self.N_enc)
        Fx, Fy = self._get_filterbank(gx, gy, sigma2, delta, self.N_enc)
        read_x = self._read(x, gamma, Fx, Fy).flatten(ndim=2)
        read_x_hat = self._read(x_hat, gamma, Fx, Fy).flatten(ndim=2)
        enc_input = T.concatenate([read_x, read_x_hat, h_dec.flatten(ndim=2)], axis=1)

        x_enc_i, x_enc_f, x_enc_c, x_enc_o = self._get_rnn_input(enc_input,
                                                                 self.enc)
        new_h_enc, new_cell_enc = self._get_rnn_state(
            self.enc, x_enc_i, x_enc_f, x_enc_c, x_enc_o, h_enc, cell_enc)
        sample, kl = self._get_sample(new_h_enc, eps)

        x_dec_i, x_dec_f, x_dec_c, x_dec_o = self._get_rnn_input(sample,
                                                                 self.dec)
        new_h_dec, new_cell_dec = self._get_rnn_state(
            self.dec, x_dec_i, x_dec_f, x_dec_c, x_dec_o, h_dec, cell_dec)

        gx_w, gy_w, sigma2_w, delta_w, gamma_w = self._get_attention_trainable_weights(
            new_h_dec, self.L_dec, self.b_dec, self.N_dec)
        Fx_w, Fy_w = self._get_filterbank(gx_w, gy_w, sigma2_w, delta_w,
                                          self.N_dec)
        write_patch = self._get_patch(new_h_dec)
        new_canvas = canvas + self._write(write_patch, gamma_w, Fx_w, Fy_w)
        return new_canvas, new_h_enc, new_cell_enc, new_h_dec, new_cell_dec, kl

    def get_output(self, train=False):
        self._train_state = train
        X, eps = self.get_input(train).values()
        eps = eps.dimshuffle(1, 0, 2)

        if self.inner_rnn == 'gru':
            outputs, updates = scan(self._step,
                                    sequences=eps,
                                    outputs_info=self._get_initial_states(X) + (None, ),
                                    non_sequences=[X, ] + self.trainable_weights,
                                    # n_steps=self.n_steps,
                                    truncate_gradient=self.truncate_gradient)

        elif self.inner_rnn == 'lstm':
            outputs, updates = scan(self._step_lstm,
                                    sequences=eps,
                                    outputs_info=self._get_initial_states(X) + (None, ),
                                    non_sequences=[X, ] + self.trainable_weights,
                                    truncate_gradient=self.truncate_gradient)

        kl = outputs[-1].sum(axis=0).mean()
        if train:
            # self.updates = updates
            self.regularizers = [SimpleCost(kl), ]
        if self.return_sequences:
            return [outputs[0].dimshuffle(1, 0, 2, 3, 4), kl]
        else:
            return [outputs[0][-1], kl]
Пример #12
0
class NeuralTuringMachine(Recurrent):
    def __init__(self, output_dim, memory_size, shift_range=3,
                 init='glorot_uniform', inner_init='orthogonal',
                 input_dim=None, input_length=None, **kwargs):
        self.output_dim = output_dim
        self.n_slots = memory_size[1]
        self.m_length = memory_size[0]
        self.shift_range = shift_range
        self.init = init
        self.inner_init = inner_init
        self.input_dim = input_dim
        self.input_length = input_length
        self.u = None
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(NeuralTuringMachine, self).__init__(**kwargs)

    def build(self, input_shape):
        self.u = input_shape
        input_leng, input_dim = input_shape[1:]
        # self.input = T.tensor3()

        self.rnn = LSTM(
            input_dim=input_dim + self.m_length,
            input_length=input_leng,
            output_dim=self.output_dim, init=self.init,
            forget_bias_init='zero',
            inner_init=self.inner_init)
        self.rnn.build(input_shape)
        self.M = theano.shared((.001 * np.ones((1,)).astype(floatX)))
        self.init_h = K.zeros((self.output_dim))
        self.init_wr = self.rnn.init((self.n_slots,))
        self.init_ww = self.rnn.init((self.n_slots,))

        # write
        self.W_e = self.rnn.init((self.output_dim, self.m_length))  # erase
        self.b_e = K.zeros((self.m_length))
        self.W_a = self.rnn.init((self.output_dim, self.m_length))  # add
        self.b_a = K.zeros((self.m_length))

        # get_w  parameters for reading operation
        self.W_k_read = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_read = self.rnn.init((self.m_length,))
        self.W_c_read = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_read = K.zeros((3))
        self.W_s_read = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_read = K.zeros((self.shift_range))  # b_s lol! not intentional

        # get_w  parameters for writing operation
        self.W_k_write = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_write = self.rnn.init((self.m_length,))
        self.W_c_write = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_write = K.zeros((3))
        self.W_s_write = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_write = K.zeros((self.shift_range))

        self.C = _circulant(self.n_slots, self.shift_range)

        self.trainable_weights = self.rnn.trainable_weights + [
            self.W_e, self.b_e,
            self.W_a, self.b_a,
            self.W_k_read, self.b_k_read,
            self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read,
            self.W_k_write, self.b_k_write,
            self.W_s_write, self.b_s_write,
            self.W_c_write, self.b_c_write,
            self.M,
            self.init_h, self.init_wr, self.init_ww]

        self.init_c = K.zeros((self.output_dim))
        self.trainable_weights = self.trainable_weights + [self.init_c, ]

    def _read(self, w, M):
        return (w[:, :, None] * M).sum(axis=1)

    def _write(self, w, e, a, M):
        Mtilda = M * (1 - w[:, :, None] * e[:, None, :])
        Mout = Mtilda + w[:, :, None] * a[:, None, :]
        return Mout

    def _get_content_w(self, beta, k, M):
        num = beta[:, None] * _cosine_distance(M, k)
        return _softmax(num)

    def _get_location_w(self, g, s, C, gamma, wc, w_tm1):
        wg = g[:, None] * wc + (1 - g[:, None]) * w_tm1
        Cs = (C[None, :, :, :] * wg[:, None, None, :]).sum(axis=3)
        wtilda = (Cs * s[:, :, None]).sum(axis=1)
        wout = _renorm(wtilda ** gamma[:, None])
        return wout

    def _get_controller_output(self, h, W_k, b_k, W_c, b_c, W_s, b_s):
        k = T.tanh(T.dot(h, W_k) + b_k)  # + 1e-6
        c = T.dot(h, W_c) + b_c
        beta = T.nnet.relu(c[:, 0]) + 1e-4
        g = T.nnet.sigmoid(c[:, 1])
        gamma = T.nnet.relu(c[:, 2]) + 1.0001
        s = T.nnet.softmax(T.dot(h, W_s) + b_s)
        return k, beta, g, gamma, s

    def get_initial_states(self, X):
        batch_size = X.shape[0]
        init_M = self.M.dimshuffle(0, 'x', 'x').repeat(
            batch_size, axis=0).repeat(self.n_slots, axis=1).repeat(
            self.m_length, axis=2)
        init_M = init_M.flatten(ndim=2)

        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_wr = self.init_wr.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_ww = self.init_ww.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        return [init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww),
                init_h, init_c]

    @property
    def output_shape(self):
        input_shape = self.input_shape
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def call(self, x, mask=None):

        input_shape = self.u
        print(input_shape)
        if K._BACKEND == 'tensorflow':
            if not input_shape[1]:
                raise Exception('When using TensorFlow, you should define '
                                'explicitly the number of timesteps of '
                                'your sequences.\n'
                                'If your first layer is an Embedding, '
                                'make sure to pass it an "input_length" '
                                'argument. Otherwise, make sure '
                                'the first layer has '
                                'an "input_shape" or "batch_input_shape" '
                                'argument, including the time axis. '
                                'Found input shape at layer ' + self.name +
                                ': ' + str(input_shape))
        if self.stateful:
            initial_states = self.states
        else:
            initial_states = self.get_initial_states(x)
        constants = self.get_constants(x)
        preprocessed_input = self.preprocess_input(x)

        last_output, outputs, states = K.rnn(self.step, preprocessed_input,
                                             initial_states,
                                             go_backwards=self.go_backwards,
                                             mask=mask,
                                             constants=constants,
                                             unroll=self.unroll,
                                             input_length=input_shape[1])
        if self.stateful:
            self.updates = []
            for i in range(len(states)):
                self.updates.append((self.states[i], states[i]))

        if self.return_sequences:
            return outputs
        else:
            return last_output

    def step(self, x, states):
        M_tm1, wr_tm1, ww_tm1 = states[:3]
        # reshape
        M_tm1 = M_tm1.reshape((x.shape[0], self.n_slots, self.m_length))
        # read
        h_tm1 = states[3:]
        k_read, beta_read, g_read, gamma_read, s_read = self._get_controller_output(
            h_tm1[0], self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read)
        wc_read = self._get_content_w(beta_read, k_read, M_tm1)
        wr_t = self._get_location_w(g_read, s_read, self.C, gamma_read,
                                    wc_read, wr_tm1)
        M_read = self._read(wr_t, M_tm1)

        # update controller
        h_t = _update_controller(self, x, h_tm1, M_read)

        # write
        k_write, beta_write, g_write, gamma_write, s_write = self._get_controller_output(
            h_t[0], self.W_k_write, self.b_k_write, self.W_c_write,
            self.b_c_write, self.W_s_write, self.b_s_write)
        wc_write = self._get_content_w(beta_write, k_write, M_tm1)
        ww_t = self._get_location_w(g_write, s_write, self.C, gamma_write,
                                    wc_write, ww_tm1)
        e = T.nnet.sigmoid(T.dot(h_t[0], self.W_e) + self.b_e)
        a = T.tanh(T.dot(h_t[0], self.W_a) + self.b_a)
        M_t = self._write(ww_t, e, a, M_tm1)

        M_t = M_t.flatten(ndim=2)

        return h_t[0], [M_t, wr_t, ww_t] + h_t
Пример #13
0
class NeuralTuringMachine(Recurrent):
    def __init__(self, output_dim, memory_size, shift_range=3,
                 init='glorot_uniform', inner_init='orthogonal',
                 input_dim=None, input_length=None, **kwargs):
        self.output_dim = output_dim
        self.n_slots = memory_size[1]
        self.m_length = memory_size[0]
        self.shift_range = shift_range
        self.init = init
        self.inner_init = inner_init

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(NeuralTuringMachine, self).__init__(**kwargs)

    def build(self, input_shape):
        input_leng, input_dim = input_shape[1:]
       # self.input = T.tensor3()

        self.lstm = LSTM(
            input_dim=input_dim + self.m_length,
            input_length=input_leng,
            output_dim=self.output_dim, init=self.init,
            forget_bias_init='zero',
            inner_init=self.inner_init)

        self.lstm.build(input_shape)

        # initial memory, state, read and write vecotrs
        self.M = theano.shared((.001 * np.ones((1,)).astype(floatX)))
        self.init_h = backend.zeros((self.output_dim))
        self.init_wr = self.lstm.init((self.n_slots,))
        self.init_ww = self.lstm.init((self.n_slots,))

        # write
        self.W_e = self.lstm.init((self.output_dim, self.m_length))  # erase
        self.b_e = backend.zeros((self.m_length))
        self.W_a = self.lstm.init((self.output_dim, self.m_length))  # add
        self.b_a = backend.zeros((self.m_length))

        # get_w  parameters for reading operation
        self.W_k_read = self.lstm.init((self.output_dim, self.m_length))
        self.b_k_read = self.lstm.init((self.m_length,))
        self.W_c_read = self.lstm.init((self.output_dim, 3))
        self.b_c_read = backend.zeros((3))
        self.W_s_read = self.lstm.init((self.output_dim, self.shift_range))
        self.b_s_read = backend.zeros((self.shift_range))  # b_s lol! not intentional

        # get_w  parameters for writing operation
        self.W_k_write = self.lstm.init((self.output_dim, self.m_length))
        self.b_k_write = self.lstm.init((self.m_length,))
        self.W_c_write = self.lstm.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_write = backend.zeros((3))
        self.W_s_write = self.lstm.init((self.output_dim, self.shift_range))
        self.b_s_write = backend.zeros((self.shift_range))

        self.C = circulant(self.n_slots, self.shift_range)

        self.trainable_weights = self.lstm.trainable_weights + [
            self.W_e, self.b_e,
            self.W_a, self.b_a,
            self.W_k_read, self.b_k_read,
            self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read,
            self.W_k_write, self.b_k_write,
            self.W_s_write, self.b_s_write,
            self.W_c_write, self.b_c_write,
            self.M,
            self.init_h, self.init_wr, self.init_ww]

        self.init_c = backend.zeros((self.output_dim))
        self.trainable_weights = self.trainable_weights + [self.init_c, ]

    def read(self, w, M):
        return (w[:, :, None] * M).sum(axis=1)

    def write(self, w, e, a, M):
        Mtilda = M * (1 - w[:, :, None] * e[:, None, :])
        Mout = Mtilda + w[:, :, None] * a[:, None, :]
        return Mout

    def get_content_w(self, beta, k, M):
        num = beta[:, None] * cosine_similarity(M, k)
        return soft_max(num)

    def get_location_w(self, g, s, C, gamma, wc, w_tm1):
        wg = g[:, None] * wc + (1 - g[:, None]) * w_tm1
        Cs = (C[None, :, :, :] * wg[:, None, None, :]).sum(axis=3)
        wtilda = (Cs * s[:, :, None]).sum(axis=1)
        wout = re_norm(wtilda ** gamma[:, None])
        return wout

    def get_controller_output(self, h, W_k, b_k, W_c, b_c, W_s, b_s):
        k = T.tanh(T.dot(h, W_k) + b_k)  # + 1e-6
        c = T.dot(h, W_c) + b_c
        beta = T.nnet.relu(c[:, 0]) + 1e-4
        g = T.nnet.sigmoid(c[:, 1])
        gamma = T.nnet.relu(c[:, 2]) + 1.0001
        s = T.nnet.softmax(T.dot(h, W_s) + b_s)
        return k, beta, g, gamma, s

    def get_output_shape_for(self, input_shape):
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def call(self, x, mask = None):
        M_tm1, wr_tm1, ww_tm1 = mask[:3]
        # reshape
        M_tm1 = M_tm1.reshape((x.shape[0], self.n_slots, self.m_length))
        # read
        h_tm1 = mask[3:]
        k_read, beta_read, g_read, gamma_read, s_read = self.get_controller_output(
            h_tm1[0], self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read)
        wc_read = self.get_content_w(beta_read, k_read, M_tm1)
        wr_t = self.get_location_w(g_read, s_read, self.C, gamma_read,
                                   wc_read, wr_tm1)
        M_read = self.read(wr_t, M_tm1)

        # update controller
        h_t = update_controller(self, x, h_tm1, M_read)

        # write
        k_write, beta_write, g_write, gamma_write, s_write = self.get_controller_output(
            h_t[0], self.W_k_write, self.b_k_write, self.W_c_write,
            self.b_c_write, self.W_s_write, self.b_s_write)
        wc_write = self.get_content_w(beta_write, k_write, M_tm1)
        ww_t = self.get_location_w(g_write, s_write, self.C, gamma_write,
                                   wc_write, ww_tm1)
        e = T.nnet.sigmoid(T.dot(h_t[0], self.W_e) + self.b_e)
        a = T.tanh(T.dot(h_t[0], self.W_a) + self.b_a)
        M_t = self.write(ww_t, e, a, M_tm1)

        M_t = M_t.flatten(ndim=2)

        return h_t[0], [M_t, wr_t, ww_t] + h_t
Пример #14
0
class Stack(Recurrent):
    """ Neural Turing Machines

    Non obvious parameter:
    ----------------------
    shift_range: int, number of available shifts, ex. if 3, avilable shifts are
                 (-1, 0, 1)
    n_slots: number of memory locations
    m_length: memory length at each location

    Known issues:
    -------------
    Theano may complain when n_slots == 1.

    """
    def __init__(self, output_dim, n_slots, m_length,
                 inner_rnn='lstm',rnn_size=64, stack=True,
                 init='glorot_uniform', inner_init='orthogonal',
                 input_dim=None, input_length=None, **kwargs):
        self.output_dim = output_dim
        self.n_slots = n_slots + 1  # because we start at time 1
        self.m_length = m_length
        self.init = init
        self.inner_init = inner_init
        self.inner_rnn = inner_rnn
        self.rnn_size = rnn_size
        self.stack = stack

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(Stack, self).__init__(**kwargs)

    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(
                activation='relu',
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.rnn_size, init=self.init,
                forget_bias_init='zero',
                inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()


        self.init_h = K.zeros((self.rnn_size))

        self.W_d = self.rnn.init((self.rnn_size,1))
        self.W_u = self.rnn.init((self.rnn_size,1))

        self.W_v = self.rnn.init((self.rnn_size,self.m_length))
        self.W_o = self.rnn.init((self.rnn_size,self.output_dim))

        self.b_d = K.zeros((1,),name="b_d")
        self.b_u = K.zeros((1,),name="b_u")
        self.b_v = K.zeros((self.m_length,))
        self.b_o = K.zeros((self.output_dim,))

        
        self.trainable_weights = self.rnn.trainable_weights + [
           self.W_d, self.b_d,
            self.W_v, self.b_v,
            self.W_u,  self.b_u,
            self.W_o, self.b_o, self.init_h]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.rnn_size))
            self.trainable_weights = self.trainable_weights + [self.init_c, ]
        #self.trainable_weights =[self.W_d]
       

    def get_initial_states(self, X):
        
        
        batch_size = X.shape[0]
        
        init_r = K.zeros((self.m_length)).dimshuffle('x',0).repeat(batch_size,axis=0)
        init_V = K.zeros((self.n_slots,self.m_length)).dimshuffle('x',0,1).repeat(batch_size,axis=0)
        init_S = K.zeros((self.n_slots)).dimshuffle('x',0).repeat(batch_size,axis=0)
        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)

        itime = K.zeros((1,),dtype=np.int32)
        

        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
            return [init_r , init_V,init_S,itime,init_h,init_c]
      
    @property
    def output_shape(self):
        input_shape = self.input_shape
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def get_full_output(self, train=False):
        """
        This method is for research and visualization purposes. Use it as
        X = model.get_input()  # full model
        Y = ntm.get_output()    # this layer
        F = theano.function([X], Y, allow_input_downcast=True)
        [memory, read_address, write_address, rnn_state] = F(x)

        if inner_rnn == "lstm" use it as
        [memory, read_address, write_address, rnn_cell, rnn_state] = F(x)

        """
        # input shape: (nb_samples, time (padded with zeros), input_dim)
        X = self.get_input(train)
        assert K.ndim(X) == 3
        if K._BACKEND == 'tensorflow':
            if not self.input_shape[1]:
                raise Exception('When using TensorFlow, you should define ' +
                                'explicitely the number of timesteps of ' +
                                'your sequences. Make sure the first layer ' +
                                'has a "batch_input_shape" argument ' +
                                'including the samples axis.')

        mask = self.get_output_mask(train)
        if mask:
            # apply mask
            X *= K.cast(K.expand_dims(mask), X.dtype)
            masking = True
        else:
            masking = False

        if self.stateful:
            initial_states = self.states
        else:
            initial_states = self.get_initial_states(X)

        states = rnn_states(self.step, X, initial_states,
                            go_backwards=self.go_backwards,
                            masking=masking)
        return states

    def step(self, x, states):
        
        r_tm1, V_tm1,s_tm1,time = states[:4]
        h_tm1 = states[4:]
        
        def print_name_shape(name,x):
            return T.cast( K.sum(theano.printing.Print(name)(x.shape)) * 0,"float32")
        
        
        r_tm1 = r_tm1 +  print_name_shape("out\nr_tm1",r_tm1) + \
                          print_name_shape("V_tm1",V_tm1) + \
                          print_name_shape("s_tm1",s_tm1) + \
                          print_name_shape("x",x) + \
                          print_name_shape("h_tm1_0",h_tm1[0]) + \
                          print_name_shape("h_tm1_1",h_tm1[1]) 
                         
        
        op_t, h_t = self._update_controller( T.concatenate([x, r_tm1], axis=-1),
                                             h_tm1)
              
       # op_t = op_t  + print_name_shape("W_d",self.W_d.get_value()) 
        op_t = op_t + print_name_shape("afterop_t",op_t)
        #op_t = op_t[:,0,:]
        ao = K.dot(op_t, self.W_d)  
        ao = ao +print_name_shape("ao",ao)
        d_t = K.sigmoid( ao + self.b_d)  + print_name_shape("afterop2_t",op_t)
        u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)+ print_name_shape("d_t",op_t)
        v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v) + print_name_shape("u_t",u_t)
        o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) + print_name_shape("v_t",v_t)
        
        o_t = o_t + print_name_shape("afterbulk_t",o_t)
        
        time = time + 1
        V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], 
                                             u_t[::,0], v_t,time[0],stack=self.stack)
        
        #V_t, s_t, r_t = V_tm1,s_tm1,T.sum(V_tm1,axis = 1)
        V_t  = V_t + print_name_shape("o_t",o_t) + \
                          print_name_shape("r_t",r_t) + \
                          print_name_shape("V_t",V_t) +\
                          print_name_shape("s_t",s_t) 
                        # T.cast( theano.printing.Print("time")(time[0]),"float32")
        #time = T.set_subtensor(time[0],time[0] +)
        
        
       
        return o_t, [r_t, V_t, s_t, time] + h_t



        
    
    def _update_controller(self, inp , h_tm1):
        """We have to update the inner RNN inside the NTM, this
        is the function to do it. Pretty much copy+pasta from Keras
        """
    
        def print_name_shape(name,x,shape=True):
            if shape:
                return T.cast( K.sum(theano.printing.Print(name)(x.shape)) * 0,"float32")
            else:
                return theano.printing.Print(name)(x)
                
        
        
        #1 is for gru, 2 is for lstm
        if len(h_tm1) in [1,2]:
            if hasattr(self.rnn,"get_constants"):
                BW,BU = self.rnn.get_constants(inp)
                h_tm1 += (BW,BU)
        # update state
                
        op_t, h = self.rnn.step(inp + print_name_shape("inp",inp), h_tm1)
    
        
        return op_t + print_name_shape("opt",op_t) +print_name_shape("h",h[0])  +print_name_shape("h",h[1])\
                , h
Пример #15
0
class Stack(Recurrent):
    """ Neural Turing Machines

    Non obvious parameter:
    ----------------------
    shift_range: int, number of available shifts, ex. if 3, avilable shifts are
                 (-1, 0, 1)
    n_slots: number of memory locations
    m_length: memory length at each location

    Known issues:
    -------------
    Theano may complain when n_slots == 1.

    """
    def __init__(self,
                 output_dim,
                 n_slots,
                 m_length,
                 inner_rnn='lstm',
                 rnn_size=64,
                 stack=True,
                 init='glorot_uniform',
                 inner_init='orthogonal',
                 input_dim=None,
                 input_length=None,
                 **kwargs):
        self.output_dim = output_dim
        self.n_slots = n_slots + 1  # because we start at time 1
        self.m_length = m_length
        self.init = init
        self.inner_init = inner_init
        self.inner_rnn = inner_rnn
        self.rnn_size = rnn_size
        self.stack = stack

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(Stack, self).__init__(**kwargs)

    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(activation='relu',
                           input_dim=input_dim + self.m_length,
                           input_length=input_leng,
                           output_dim=self.output_dim,
                           init=self.init,
                           inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(input_dim=input_dim + self.m_length,
                            input_length=input_leng,
                            output_dim=self.rnn_size,
                            init=self.init,
                            forget_bias_init='zero',
                            inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()

        self.init_h = K.zeros((self.rnn_size))

        self.W_d = self.rnn.init((self.rnn_size, 1))
        self.W_u = self.rnn.init((self.rnn_size, 1))

        self.W_v = self.rnn.init((self.rnn_size, self.m_length))
        self.W_o = self.rnn.init((self.rnn_size, self.output_dim))

        self.b_d = K.zeros((1, ), name="b_d")
        self.b_u = K.zeros((1, ), name="b_u")
        self.b_v = K.zeros((self.m_length, ))
        self.b_o = K.zeros((self.output_dim, ))

        self.trainable_weights = self.rnn.trainable_weights + [
            self.W_d, self.b_d, self.W_v, self.b_v, self.W_u, self.b_u,
            self.W_o, self.b_o, self.init_h
        ]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.rnn_size))
            self.trainable_weights = self.trainable_weights + [
                self.init_c,
            ]
        #self.trainable_weights =[self.W_d]

    def get_initial_states(self, X):

        batch_size = X.shape[0]

        init_r = K.zeros((self.m_length)).dimshuffle('x', 0).repeat(batch_size,
                                                                    axis=0)
        init_V = K.zeros(
            (self.n_slots, self.m_length)).dimshuffle('x', 0,
                                                      1).repeat(batch_size,
                                                                axis=0)
        init_S = K.zeros((self.n_slots)).dimshuffle('x', 0).repeat(batch_size,
                                                                   axis=0)
        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)

        itime = K.zeros((1, ), dtype=np.int32)

        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size,
                                                             axis=0)
            return [init_r, init_V, init_S, itime, init_h, init_c]

    @property
    def output_shape(self):
        input_shape = self.input_shape
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def get_full_output(self, train=False):
        """
        This method is for research and visualization purposes. Use it as
        X = model.get_input()  # full model
        Y = ntm.get_output()    # this layer
        F = theano.function([X], Y, allow_input_downcast=True)
        [memory, read_address, write_address, rnn_state] = F(x)

        if inner_rnn == "lstm" use it as
        [memory, read_address, write_address, rnn_cell, rnn_state] = F(x)

        """
        # input shape: (nb_samples, time (padded with zeros), input_dim)
        X = self.get_input(train)
        assert K.ndim(X) == 3
        if K._BACKEND == 'tensorflow':
            if not self.input_shape[1]:
                raise Exception('When using TensorFlow, you should define ' +
                                'explicitely the number of timesteps of ' +
                                'your sequences. Make sure the first layer ' +
                                'has a "batch_input_shape" argument ' +
                                'including the samples axis.')

        mask = self.get_output_mask(train)
        if mask:
            # apply mask
            X *= K.cast(K.expand_dims(mask), X.dtype)
            masking = True
        else:
            masking = False

        if self.stateful:
            initial_states = self.states
        else:
            initial_states = self.get_initial_states(X)

        states = rnn_states(self.step,
                            X,
                            initial_states,
                            go_backwards=self.go_backwards,
                            masking=masking)
        return states

    def step(self, x, states):

        r_tm1, V_tm1, s_tm1, time = states[:4]
        h_tm1 = states[4:]

        def print_name_shape(name, x):
            return T.cast(
                K.sum(theano.printing.Print(name)(x.shape)) * 0, "float32")


        r_tm1 = r_tm1 +  print_name_shape("out\nr_tm1",r_tm1) + \
                          print_name_shape("V_tm1",V_tm1) + \
                          print_name_shape("s_tm1",s_tm1) + \
                          print_name_shape("x",x) + \
                          print_name_shape("h_tm1_0",h_tm1[0]) + \
                          print_name_shape("h_tm1_1",h_tm1[1])

        op_t, h_t = self._update_controller(T.concatenate([x, r_tm1], axis=-1),
                                            h_tm1)

        # op_t = op_t  + print_name_shape("W_d",self.W_d.get_value())
        op_t = op_t + print_name_shape("afterop_t", op_t)
        #op_t = op_t[:,0,:]
        ao = K.dot(op_t, self.W_d)
        ao = ao + print_name_shape("ao", ao)
        d_t = K.sigmoid(ao + self.b_d) + print_name_shape("afterop2_t", op_t)
        u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u) + print_name_shape(
            "d_t", op_t)
        v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v) + print_name_shape(
            "u_t", u_t)
        o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) + print_name_shape(
            "v_t", v_t)

        o_t = o_t + print_name_shape("afterbulk_t", o_t)

        time = time + 1
        V_t, s_t, r_t = _update_neural_stack(self,
                                             V_tm1,
                                             s_tm1,
                                             d_t[::, 0],
                                             u_t[::, 0],
                                             v_t,
                                             time[0],
                                             stack=self.stack)

        #V_t, s_t, r_t = V_tm1,s_tm1,T.sum(V_tm1,axis = 1)
        V_t  = V_t + print_name_shape("o_t",o_t) + \
                          print_name_shape("r_t",r_t) + \
                          print_name_shape("V_t",V_t) +\
                          print_name_shape("s_t",s_t)
        # T.cast( theano.printing.Print("time")(time[0]),"float32")
        #time = T.set_subtensor(time[0],time[0] +)

        return o_t, [r_t, V_t, s_t, time] + h_t

    def _update_controller(self, inp, h_tm1):
        """We have to update the inner RNN inside the NTM, this
        is the function to do it. Pretty much copy+pasta from Keras
        """
        def print_name_shape(name, x, shape=True):
            if shape:
                return T.cast(
                    K.sum(theano.printing.Print(name)(x.shape)) * 0, "float32")
            else:
                return theano.printing.Print(name)(x)

        #1 is for gru, 2 is for lstm
        if len(h_tm1) in [1, 2]:
            if hasattr(self.rnn, "get_constants"):
                BW, BU = self.rnn.get_constants(inp)
                h_tm1 += (BW, BU)
        # update state

        op_t, h = self.rnn.step(inp + print_name_shape("inp", inp), h_tm1)


        return op_t + print_name_shape("opt",op_t) +print_name_shape("h",h[0])  +print_name_shape("h",h[1])\
                , h
Пример #16
0
class NeuralTuringMachine(Recurrent):
    """ Neural Turing Machines
    
    Parameters:
    -----------
    shift_range: int, number of available shifts, ex. if 3, avilable shifts are
                 (-1, 0, 1)
    n_slots: number of memory locations
    m_length: memory length at each location
    inner_rnn: str, supported values are 'gru' and 'lstm'
    output_dim: hidden state size (RNN controller output_dim)

    Known issues and TODO:
    ----------------------
    Theano may complain when n_slots == 1.
    Add multiple reading and writing heads.

    """
    def __init__(self, output_dim, n_slots, m_length, shift_range=3,
                 inner_rnn='gru', truncate_gradient=-1, return_sequences=False,
                 init='glorot_uniform', inner_init='orthogonal',
                 input_dim=None, input_length=None, **kwargs):
        self.output_dim = output_dim
        self.n_slots = n_slots
        self.m_length = m_length
        self.shift_range = shift_range
        self.init = init
        self.inner_init = inner_init
        self.inner_rnn = inner_rnn
        self.return_sequences = return_sequences
        self.truncate_gradient = truncate_gradient

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(NeuralTuringMachine, self).__init__(**kwargs)

    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()

        # initial memory, state, read and write vecotrs
        self.M = theano.shared((.001*np.ones((1,)).astype(floatX)))
        self.init_h = shared_zeros((self.output_dim))
        self.init_wr = self.rnn.init((self.n_slots,))
        self.init_ww = self.rnn.init((self.n_slots,))

        # write
        self.W_e = self.rnn.init((self.output_dim, self.m_length))  # erase
        self.b_e = shared_zeros((self.m_length))
        self.W_a = self.rnn.init((self.output_dim, self.m_length))  # add
        self.b_a = shared_zeros((self.m_length))

        # get_w  parameters for reading operation
        self.W_k_read = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_read = self.rnn.init((self.m_length, ))
        self.W_c_read = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9 in Graves et. al 2014
        self.b_c_read = shared_zeros((3))
        self.W_s_read = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_read = shared_zeros((self.shift_range)) 

        # get_w  parameters for writing operation
        self.W_k_write = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_write = self.rnn.init((self.m_length, ))
        self.W_c_write = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_write = shared_zeros((3))
        self.W_s_write = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_write = shared_zeros((self.shift_range))

        self.C = _circulant(self.n_slots, self.shift_range)

        self.params = self.rnn.params + [
            self.W_e, self.b_e,
            self.W_a, self.b_a,
            self.W_k_read, self.b_k_read,
            self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read,
            self.W_k_write, self.b_k_write,
            self.W_s_write, self.b_s_write,
            self.W_c_write, self.b_c_write,
            self.M,
            self.init_h, self.init_wr, self.init_ww]

        if self.inner_rnn == 'lstm':
            self.init_c = shared_zeros((self.output_dim))
            self.params = self.params + [self.init_c, ]

    def _read(self, w, M):
        return (w[:, :, None]*M).sum(axis=1)

    def _write(self, w, e, a, M, mask):
        Mtilda = M * (1 - w[:, :, None]*e[:, None, :])
        Mout = Mtilda + w[:, :, None]*a[:, None, :]
        return mask[:, None, None]*Mout + (1-mask[:, None, None])*M

    def _get_content_w(self, beta, k, M):
        num = beta[:, None] * _cosine_distance(M, k)
        return _softmax(num)

    def _get_location_w(self, g, s, C, gamma, wc, w_tm1, mask):
        wg = g[:, None] * wc + (1-g[:, None])*w_tm1
        Cs = (C[None, :, :, :] * wg[:, None, None, :]).sum(axis=3)
        wtilda = (Cs * s[:, :, None]).sum(axis=1)
        wout = _renorm(wtilda ** gamma[:, None])
        return mask[:, None] * wout + (1-mask[:, None])*w_tm1

    def _get_controller_output(self, h, W_k, b_k, W_c, b_c, W_s, b_s):
        k = T.tanh(T.dot(h, W_k) + b_k)  # + 1e-6
        c = T.dot(h, W_c) + b_c
        beta = T.nnet.relu(c[:, 0]) + 1e-6
        g = T.nnet.sigmoid(c[:, 1])
        gamma = T.nnet.relu(c[:, 2]) + 1
        s = T.nnet.softmax(T.dot(h, W_s) + b_s)
        return k, beta, g, gamma, s

    def _get_initial_states(self, batch_size):
        init_M = self.M.dimshuffle(0, 'x', 'x').repeat(
           batch_size, axis=0).repeat(self.n_slots, axis=1).repeat(
               self.m_length, axis=2)

        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_wr = self.init_wr.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_ww = self.init_ww.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
            return init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww), init_h, init_c
        else:
            return init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww), init_h

    def _step(self, x, mask, M_tm1, wr_tm1, ww_tm1, *args):
        # read
        if self.inner_rnn == 'lstm':
            h_tm1 = args[0:2][::-1]  # (cell_tm1, h_tm1)
        else:
            h_tm1 = args[0:1]  # (h_tm1, )
        k_read, beta_read, g_read, gamma_read, s_read = self._get_controller_output(
            h_tm1[-1], self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read)
        wc_read = self._get_content_w(beta_read, k_read, M_tm1)
        wr_t = self._get_location_w(g_read, s_read, self.C, gamma_read,
                                    wc_read, wr_tm1, mask)
        M_read = self._read(wr_t, M_tm1)

        # update controller
        h_t = _update_controller(self, x, h_tm1, M_read, mask)

        # write
        k_write, beta_write, g_write, gamma_write, s_write = self._get_controller_output(
            h_t[-1], self.W_k_write, self.b_k_write, self.W_c_write,
            self.b_c_write, self.W_s_write, self.b_s_write)
        wc_write = self._get_content_w(beta_write, k_write, M_tm1)
        ww_t = self._get_location_w(g_write, s_write, self.C, gamma_write,
                                    wc_write, ww_tm1, mask)
        e = T.nnet.sigmoid(T.dot(h_t[-1], self.W_e) + self.b_e)
        a = T.tanh(T.dot(h_t[-1], self.W_a) + self.b_a)
        M_t = self._write(ww_t, e, a, M_tm1, mask)

        return (M_t, wr_t, ww_t) + h_t

    def get_output(self, train=False):
        outputs = self.get_full_output(train)

        if self.return_sequences:
            return outputs[-1]
        else:
            return outputs[-1][:, -1]

    @property
    def output_shape(self):
        input_shape = self.input_shape
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def get_full_output(self, train=False):
        """
        This method is for research and visualization purposes. Use it as:
        X = model.get_input()  # full model
        Y = ntm.get_output()    # this layer
        F = theano.function([X], Y, allow_input_downcast=True)
        [memory, read_address, write_address, rnn_state] = F(x)

        if inner_rnn == "lstm" use it as:
        [memory, read_address, write_address, rnn_cell, rnn_state] = F(x)

        """
        X = self.get_input(train)
        padded_mask = self.get_padded_shuffled_mask(train, X, pad=1)[:, :, 0]
        X = X.dimshuffle((1, 0, 2))

        init_states = self._get_initial_states(X.shape[1])
        outputs, updates = theano.scan(self._step,
                                       sequences=[X, padded_mask],
                                       outputs_info=init_states,
                                       non_sequences=self.params,
                                       truncate_gradient=self.truncate_gradient)

        out = [outputs[0].dimshuffle((1, 0, 2, 3)),
               outputs[1].dimshuffle(1, 0, 2),
               outputs[2].dimshuffle((1, 0, 2)),
               outputs[3].dimshuffle((1, 0, 2))]
        if self.inner_rnn == 'lstm':
            out + [outputs[4].dimshuffle((1, 0, 2))]
        return out
Пример #17
0
class NeuralTuringMachine(Recurrent):
    print(7)
    """ Neural Turing Machines
    Non obvious parameter:
    ----------------------
    shift_range: int, number of available shifts, ex. if 3, avilable shifts are
                 (-1, 0, 1)
    n_slots: number of memory locations
    m_length: memory length at each location
    Known issues:
    -------------
    Theano may complain when n_slots == 1.
    """

    def __init__(self, output_dim, n_slots, m_length, shift_range=3,
                 inner_rnn='lstm',
                 init='glorot_uniform', inner_init='orthogonal',
                 input_dim=4, input_length=5, **kwargs):
        self.output_dim = output_dim
        self.n_slots = n_slots
        self.m_length = m_length
        self.shift_range = shift_range
        self.init = init
        self.inner_init = inner_init
        self.inner_rnn = inner_rnn

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(NeuralTuringMachine, self).__init__(**kwargs)

    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(
                activation='relu',
                input_dim=input_dim + self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(
                input_dim=input_dim + self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                forget_bias_init='zero',
                inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()

        # initial memory, state, read and write vecotrs
        self.M = theano.shared((.001 * np.ones((1,)).astype(floatX)))
        print(self.M)
        self.init_h = K.zeros((self.output_dim))
        self.init_wr = self.rnn.init((self.n_slots,))
        self.init_ww = self.rnn.init((self.n_slots,))

        # write
        self.W_e = self.rnn.init((self.output_dim, self.m_length))  # erase
        self.b_e = K.zeros((self.m_length))
        self.W_a = self.rnn.init((self.output_dim, self.m_length))  # add
        self.b_a = K.zeros((self.m_length))

        # get_w  parameters for reading operation
        self.W_k_read = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_read = self.rnn.init((self.m_length,))
        self.W_c_read = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_read = K.zeros((3))
        self.W_s_read = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_read = K.zeros((self.shift_range))  # b_s lol! not intentional

        # get_w  parameters for writing operation
        self.W_k_write = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_write = self.rnn.init((self.m_length,))
        self.W_c_write = self.rnn.init((self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_write = K.zeros((3))
        self.W_s_write = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_write = K.zeros((self.shift_range))

        self.C = _circulant(self.n_slots, self.shift_range)

        self.trainable_weights = self.rnn.trainable_weights + [
            self.W_e, self.b_e,
            self.W_a, self.b_a,
            self.W_k_read, self.b_k_read,
            self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read,
            self.W_k_write, self.b_k_write,
            self.W_s_write, self.b_s_write,
            self.W_c_write, self.b_c_write,
            self.M,
            self.init_h, self.init_wr, self.init_ww]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.output_dim))
            self.trainable_weights = self.trainable_weights + [self.init_c, ]

    def _read(self, w, M):
        return (w[:, :, None] * M).sum(axis=1)

    def _write(self, w, e, a, M):
        Mtilda = M * (1 - w[:, :, None] * e[:, None, :])
        Mout = Mtilda + w[:, :, None] * a[:, None, :]
        return Mout

    def _get_content_w(self, beta, k, M):
        num = beta[:, None] * _cosine_distance(M, k)
        return _softmax(num)

    def _get_location_w(self, g, s, C, gamma, wc, w_tm1):
        wg = g[:, None] * wc + (1 - g[:, None]) * w_tm1
        Cs = (C[None, :, :, :] * wg[:, None, None, :]).sum(axis=3)
        wtilda = (Cs * s[:, :, None]).sum(axis=1)
        wout = _renorm(wtilda ** gamma[:, None])
        return wout

    def _get_controller_output(self, h, W_k, b_k, W_c, b_c, W_s, b_s):
        k = T.tanh(T.dot(h, W_k) + b_k)  # + 1e-6
        c = T.dot(h, W_c) + b_c
        beta = T.nnet.relu(c[:, 0]) + 1e-4
        g = T.nnet.sigmoid(c[:, 1])
        gamma = T.nnet.relu(c[:, 2]) + 1.0001
        s = T.nnet.softmax(T.dot(h, W_s) + b_s)
        return k, beta, g, gamma, s

    def get_initial_states(self, X):
        batch_size = X.shape[0]
        init_M = self.M.dimshuffle(0, 'x', 'x').repeat(
            batch_size, axis=0).repeat(self.n_slots, axis=1).repeat(
            self.m_length, axis=2)
        init_M = init_M.flatten(ndim=2)

        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_wr = self.init_wr.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        init_ww = self.init_ww.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
            return [init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww),
                    init_h, init_c]
        else:
            return [init_M, T.nnet.softmax(init_wr), T.nnet.softmax(init_ww),
                    init_h]

    @property
    def output_shape(self):
        input_shape = self.input_shape
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim


    def step(self, x, states):
        '''print(self.input_shape)
        print(self.n_slots)
        print(self.m_length)'''
        M_tm1, wr_tm1, ww_tm1 = states[:3]
        # reshape
        M_tm1 = M_tm1.reshape((x.shape[0], self.n_slots, self.m_length))
        # read
        h_tm1 = states[3:]
        k_read, beta_read, g_read, gamma_read, s_read = self._get_controller_output(
            h_tm1[0], self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read)
        wc_read = self._get_content_w(beta_read, k_read, M_tm1)
        wr_t = self._get_location_w(g_read, s_read, self.C, gamma_read,
                                    wc_read, wr_tm1)
        M_read = self._read(wr_t, M_tm1)

        # update controller
        h_t = _update_controller(self, x, h_tm1, M_read)

        # write
        k_write, beta_write, g_write, gamma_write, s_write = self._get_controller_output(
            h_t[0], self.W_k_write, self.b_k_write, self.W_c_write,
            self.b_c_write, self.W_s_write, self.b_s_write)
        wc_write = self._get_content_w(beta_write, k_write, M_tm1)
        ww_t = self._get_location_w(g_write, s_write, self.C, gamma_write,
                                    wc_write, ww_tm1)
        e = T.nnet.sigmoid(T.dot(h_t[0], self.W_e) + self.b_e)
        a = T.tanh(T.dot(h_t[0], self.W_a) + self.b_a)
        M_t = self._write(ww_t, e, a, M_tm1)

        M_t = M_t.flatten(ndim=2)
        print(h_t[0], [M_t, wr_t, ww_t] + h_t)


        return h_t[0], [M_t, wr_t, ww_t] + h_t
Пример #18
0
class Stack(Recurrent):
    """ Stack and queue network
    
    
    output_dim = output dimension
    n_slots = number of memory slot
    m_length = dimention of the memory
    rnn_size = output length of the memory controler
    inner_rnn = "lstm" only lstm is supported 
    stack = True to create neural stack or False to create neural queue
    
    
    from Learning to Transduce with Unbounded Memory
    [[http://arxiv.org/pdf/1506.02516.pdf]]
    """
    def __init__(self, output_dim, n_slots, m_length,
                 inner_rnn='lstm',rnn_size=64, stack=True,
                 init='glorot_uniform', inner_init='orthogonal',
                 input_dim=None, input_length=None, **kwargs):
        self.output_dim = output_dim
        self.n_slots = n_slots + 1  # because we start at time 1
        self.m_length = m_length
        self.init = init
        self.inner_init = inner_init
        if inner_rnn != "lstm":
            print "Only lstm is supported"
            raise
        self.inner_rnn = inner_rnn
        self.rnn_size = rnn_size
        self.stack = stack

        self.input_dim = input_dim
        self.input_length = input_length
        if self.input_dim:
            kwargs['input_shape'] = (self.input_length, self.input_dim)
        super(Stack, self).__init__(**kwargs)

    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(
                activation='relu',
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(
                input_dim=input_dim+self.m_length,
                input_length=input_leng,
                output_dim=self.rnn_size, init=self.init,
                forget_bias_init='zero',
                inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()


        self.init_h = K.zeros((self.rnn_size))

        self.W_d = self.rnn.init((self.rnn_size,1))
        self.W_u = self.rnn.init((self.rnn_size,1))

        self.W_v = self.rnn.init((self.rnn_size,self.m_length))
        self.W_o = self.rnn.init((self.rnn_size,self.output_dim))

        self.b_d = K.zeros((1,),name="b_d")
        self.b_u = K.zeros((1,),name="b_u")
        self.b_v = K.zeros((self.m_length,))
        self.b_o = K.zeros((self.output_dim,))

        
        self.trainable_weights = self.rnn.trainable_weights + [
           self.W_d, self.b_d,
            self.W_v, self.b_v,
            self.W_u,  self.b_u,
            self.W_o, self.b_o, self.init_h]

        if self.inner_rnn == 'lstm':
            self.init_c = K.zeros((self.rnn_size))
            self.trainable_weights = self.trainable_weights + [self.init_c, ]
        #self.trainable_weights =[self.W_d]
       

    def get_initial_states(self, X):
        
        
        batch_size = X.shape[0]
        
        init_r = K.zeros((self.m_length)).dimshuffle('x',0).repeat(batch_size,axis=0)
        init_V = K.zeros((self.n_slots,self.m_length)).dimshuffle('x',0,1).repeat(batch_size,axis=0)
        init_S = K.zeros((self.n_slots)).dimshuffle('x',0).repeat(batch_size,axis=0)
        init_h = self.init_h.dimshuffle(('x', 0)).repeat(batch_size, axis=0)

        itime = K.zeros((1,),dtype=np.int32)
        

        if self.inner_rnn == 'lstm':
            init_c = self.init_c.dimshuffle(('x', 0)).repeat(batch_size, axis=0)
            return [init_r , init_V,init_S,itime,init_h,init_c]
      
    @property
    def output_shape(self):
        input_shape = self.input_shape
        if self.return_sequences:
            return input_shape[0], input_shape[1], self.output_dim
        else:
            return input_shape[0], self.output_dim

    def step(self, x, states):
        
        r_tm1, V_tm1,s_tm1,time = states[:4]
        h_tm1 = states[4:]
 
        
        
        r_tm1 = r_tm1
        
        op_t, h_t = _update_controller(self, T.concatenate([x, r_tm1], axis=-1),
                                             h_tm1)
              
       # op_t = op_t  + print_name_shape("W_d",self.W_d.get_value()) 
        op_t = op_t
        #op_t = op_t[:,0,:]
        d_t = K.sigmoid( K.dot(op_t, self.W_d)  + self.b_d)  
        u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)
        v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v)
        o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) 
        
        
        time = time + 1
        V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], 
                                             u_t[::,0], v_t,time[0],stack=self.stack)
        

       
        return o_t, [r_t, V_t, s_t, time] + h_t