Exemplo n.º 1
0
    def output(self, input, n_batch=None):
        ###--- Unpool

        if self.poolsize[0] == 1 and self.poolsize[1] == 1:
            unpool_out = input
        else:
            unpool_out = Textra.repeat(Textra.repeat(
                input, self.poolsize[0], axis=2),
                                       self.poolsize[1],
                                       axis=3) * self.mask

        image_shape = list(self.image_shape)
        if n_batch is not None:
            image_shape[0] = n_batch

        ###--- Unpool + conv
        # convolve input feature maps with filters
        if self.border_mode == 'same':
            conv_out = dnn.dnn_conv(
                img=unpool_out,
                kerns=self.W,
                subsample=(1, 1),
                border_mode=self.border,
                #conv_mode='cross'
            )
        else:
            raise Exception('Unknown conv type')

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')
        return (lin_output
                if self.activation is None else self.activation(lin_output))
Exemplo n.º 2
0
    def test_repeatOp(self):
        for ndim in range(3):
            x = T.TensorType(config.floatX, [False] * ndim)()
            a = np.random.random((10, ) * ndim).astype(config.floatX)

            for axis in self._possible_axis(ndim):
                for dtype in tensor.discrete_dtypes:
                    r_var = T.scalar(dtype=dtype)
                    r = numpy.asarray(3, dtype=dtype)
                    if dtype in self.numpy_unsupported_dtypes:
                        self.assertRaises(TypeError,
                                repeat, x, r_var, axis=axis)
                    else:
                        f = theano.function([x, r_var],
                                            repeat(x, r_var, axis=axis))
                        assert np.allclose(np.repeat(a, r, axis=axis),
                                           f(a, r))

                        r_var = T.vector(dtype=dtype)
                        if axis is None:
                            r = np.random.random_integers(
                                    5, size=a.size).astype(dtype)
                        else:
                            r = np.random.random_integers(
                                    5, size=(10,)).astype(dtype)

                        f = theano.function([x, r_var],
                                            repeat(x, r_var, axis=axis))
                        assert np.allclose(np.repeat(a, r, axis=axis),
                                           f(a, r))
Exemplo n.º 3
0
    def reverseConv(self, activations, img_shape, flipped_filter, dim2=1):
       
        # Reverse max pooling first
        self.zp = activations.reshape((self.output.shape[0] * self.output.shape[1] * self.output.shape[2], self.output.shape[3]))
        lengthen = repeat(activations, self.poolsize[0], axis=2)
        self.lengthen = repeat(lengthen, self.poolsize[1], axis=3)
        self.w_shape = self.W.shape
        self.changed_W = self.W.dimshuffle(1,0,2,3)
        
	# Reversing the convolutional step	
        rev_conv_out = conv.conv2d(input=self.lengthen, filters=self.changed_W[:,:,::-1,::-1],filter_shape=flipped_filter,image_shape=img_shape, border_mode='full')
     
        #convert to "same" (from full)
        s1 = numpy.floor((self.filter_shape[2]-1)/2.0).astype(int)
        e1 = numpy.ceil((self.filter_shape[2]-1)/2.0).astype(int)

	#Time must be the same forward = time is same, frequency is valid, backward = time is same, frequency is full

        if dim2: #convert to "valid" (from full) 
            s2 = numpy.floor((self.filter_shape[3]-1)/2.0).astype(int)
            e2 = numpy.ceil((self.filter_shape[3]-1)/2.0).astype(int)
            if s1 == e1:
		rev_conv_out = rev_conv_out[:,:,:,s2:-e2]
	    else:
		rev_conv_out = rev_conv_out[:,:,s1:-e1,s2:-e2]
        else:
            rev_conv_out = rev_conv_out[:,:,s1:-e1,:]

	self.reverseOutput=rev_conv_out
Exemplo n.º 4
0
def fawn_recurrent(inpt_mean, inpt_var, weights_mean, weights_var, f,
                   initial_mean, initial_var):

    f_transfer = lookup(f, transfer_)

    def step(inpt_mean, inpt_var, him_m1, hiv_m1, hom_m1, hov_m1):
        wm, wv = weights_mean, weights_var

        pres_mean = T.dot(inpt_mean, wm)
        pres_var = (T.dot(inpt_mean**2, wv) + T.dot(inpt_var, wm**2) +
                    T.dot(inpt_var, wv))

        post_mean, post_var = f_transfer(pres_mean, pres_var)
        return pres_mean, pres_var, post_mean, post_var

    if initial_mean.ndim == 1:
        initial_mean = repeat(initial_mean.dimshuffle('x', 0),
                              inpt_mean.shape[1],
                              axis=0)
    if initial_var.ndim == 1:
        initial_var = repeat(initial_var.dimshuffle('x', 0),
                             inpt_mean.shape[1],
                             axis=0)

    (hidden_in_mean_rec, hidden_in_var_rec, hidden_mean_rec,
     hidden_var_rec), _ = theano.scan(step,
                                      sequences=[inpt_mean, inpt_var],
                                      outputs_info=[
                                          T.zeros_like(inpt_mean[0]),
                                          T.zeros_like(inpt_mean[0]),
                                          initial_mean, initial_var
                                      ])

    return (hidden_in_mean_rec, hidden_in_var_rec, hidden_mean_rec,
            hidden_var_rec)
Exemplo n.º 5
0
    def test_repeatOp(self):
        for ndim in range(3):
            x = T.TensorType(config.floatX, [False] * ndim)()
            a = np.random.random((10, ) * ndim).astype(config.floatX)

            for axis in self._possible_axis(ndim):
                for dtype in tensor.discrete_dtypes:
                    r_var = T.scalar(dtype=dtype)
                    r = numpy.asarray(3, dtype=dtype)
                    if dtype in self.numpy_unsupported_dtypes:
                        self.assertRaises(TypeError,
                                          repeat,
                                          x,
                                          r_var,
                                          axis=axis)
                    else:
                        f = theano.function([x, r_var],
                                            repeat(x, r_var, axis=axis))
                        assert np.allclose(np.repeat(a, r, axis=axis), f(a, r))

                        r_var = T.vector(dtype=dtype)
                        if axis is None:
                            r = np.random.random_integers(
                                5, size=a.size).astype(dtype)
                        else:
                            r = np.random.random_integers(
                                5, size=(10, )).astype(dtype)

                        f = theano.function([x, r_var],
                                            repeat(x, r_var, axis=axis))
                        assert np.allclose(np.repeat(a, r, axis=axis), f(a, r))
Exemplo n.º 6
0
    def output(self, input, n_batch=None):
        ###--- Unpool

        if self.poolsize[0] == 1 and self.poolsize[1] == 1:
            unpool_out = input
        else:
            unpool_out = Textra.repeat(Textra.repeat(input, self.poolsize[0], axis = 2), self.poolsize[1], axis = 3) * self.mask

        image_shape = list(self.image_shape)
        if n_batch is not None:
            image_shape[0] = n_batch

        ###--- Unpool + conv
        # convolve input feature maps with filters
        if self.border_mode == 'same':
            conv_out = dnn.dnn_conv(
                img=unpool_out,
                kerns=self.W,
                subsample=(1,1),
                border_mode=self.border,
                #conv_mode='cross'
            )
        else:
            raise Exception('Unknown conv type')  

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')
        return (
            lin_output if self.activation is None
            else self.activation(lin_output)
        )
Exemplo n.º 7
0
    def drop_output(self, input, drop=0, rng=None, p=0.5):
        ###--- Unpool

        if self.poolsize[0] == 1 and self.poolsize[1] == 1:
            unpool_out = input
        else:
            unpool_out = Textra.repeat(Textra.repeat(input, self.poolsize[0], axis = 2), self.poolsize[1], axis = 3) * self.mask

        image_shape = list(self.image_shape)
        if n_batch is not None:
            image_shape[0] = n_batch

        ###--- Unpool + conv
        # convolve input feature maps with filters
        if self.border_mode == 'valid':
            conv_out = conv.conv2d(
                input=unpool_out,
                filters=self.W,
                filter_shape=self.filter_shape,
                image_shape=image_shape,
                border_mode='valid'
            )
        elif self.border_mode == 'same':
            conv_out = conv.conv2d(
                input=unpool_out,
                filters=self.W,
                filter_shape=self.filter_shape,
                image_shape=image_shape,
                border_mode='full'
            )
            padding_w = theano.shared((self.filter_shape[2] - 1) / 2)
            padding_h = theano.shared((self.filter_shape[3] - 1) / 2)
            conv_out = conv_out[:,:,padding_w:-padding_w,padding_h:-padding_h]
        elif self.border_mode == 'full':
            conv_out = conv.conv2d(
                input=unpool_out,
                filters=self.W,
                filter_shape=self.filter_shape,
                image_shape=image_shape,
                border_mode='full'
            )
        else:
            raise Exception('Unknown conv type')

        # downsample each feature map individually, using maxpooling
        
        

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')
        output= (
            lin_output if self.activation is None
            else self.activation(lin_output)
        )
        droppedOutput = nonlinearity.dropout(rng, output, p)
        return T.switch(T.neq(drop, 0), droppedOutput, output)
Exemplo n.º 8
0
 def unpool(self, input):
     unpool = T.grad(T.sum(self.pool_out), wrt=self.pool_in) * \
                     repeat(repeat(input,
                                   self.poolsize[0],
                                   2),
                            self.poolsize[1],
                            3)
     return unpool
Exemplo n.º 9
0
def gen(Z, w, w1, w2, w3):
    h0 = ReLU(batchnorm(T.dot(Z, w)))
    h1 = ReLU(batchnorm(T.dot(h0, w1)))
    h1_output = h1.reshape((h1.shape[0], nkerns[2], 7, 7))
    h2_input = repeat(repeat(h1_output, 2, 2), 2, 3)
    h2 = ReLU(batchnorm(conv2d(h2_input, w2, border_mode='half')))
    h3_input = repeat(repeat(h2, 2, 2), 2, 3)
    h3 = T.tanh(conv2d(h3_input, w3, border_mode='half'))
    return h3
Exemplo n.º 10
0
    def test_repeatOp(self):
        for ndim in [1, 3]:
            x = T.TensorType(config.floatX, [False] * ndim)()
            a = np.random.random((10, ) * ndim).astype(config.floatX)

            for axis in self._possible_axis(ndim):
                for dtype in tensor.integer_dtypes:
                    r_var = T.scalar(dtype=dtype)
                    r = np.asarray(3, dtype=dtype)
                    if (dtype == 'uint64' or
                            (dtype in self.numpy_unsupported_dtypes and
                                r_var.ndim == 1)):
                        self.assertRaises(TypeError, repeat, x, r_var, axis=axis)
                    else:
                        f = theano.function([x, r_var],
                                            repeat(x, r_var, axis=axis))
                        assert np.allclose(np.repeat(a, r, axis=axis),
                                           f(a, r))

                        r_var = T.vector(dtype=dtype)
                        if axis is None:
                            r = np.random.randint(
                                1, 6, size=a.size).astype(dtype)
                        else:
                            r = np.random.randint(
                                1, 6, size=(10,)).astype(dtype)

                        if dtype in self.numpy_unsupported_dtypes and r_var.ndim == 1:
                            self.assertRaises(TypeError,
                                              repeat, x, r_var, axis=axis)
                        else:
                            f = theano.function([x, r_var],
                                                repeat(x, r_var, axis=axis))
                            assert np.allclose(np.repeat(a, r, axis=axis),
                                               f(a, r))

                        # check when r is a list of single integer, e.g. [3].
                        r = np.random.randint(
                            1, 11, size=()).astype(dtype) + 2
                        f = theano.function([x],
                                            repeat(x, [r], axis=axis))
                        assert np.allclose(np.repeat(a, r, axis=axis),
                                           f(a))
                        assert not np.any([isinstance(n.op, RepeatOp)
                                           for n in f.maker.fgraph.toposort()])

                        # check when r is  theano tensortype that broadcastable is (True,)
                        r_var = theano.tensor.TensorType(broadcastable=(True,),
                                                         dtype=dtype)()
                        r = np.random.randint(1, 6, size=(1,)).astype(dtype)
                        f = theano.function([x, r_var],
                                            repeat(x, r_var, axis=axis))
                        assert np.allclose(np.repeat(a, r[0], axis=axis),
                                           f(a, r))
                        assert not np.any([isinstance(n.op, RepeatOp)
                                           for n in f.maker.fgraph.toposort()])
Exemplo n.º 11
0
    def drop_output(self, input, drop=0, rng=None, p=0.5):
        ###--- Unpool

        if self.poolsize[0] == 1 and self.poolsize[1] == 1:
            unpool_out = input
        else:
            unpool_out = Textra.repeat(Textra.repeat(
                input, self.poolsize[0], axis=2),
                                       self.poolsize[1],
                                       axis=3) * self.mask

        image_shape = list(self.image_shape)
        if n_batch is not None:
            image_shape[0] = n_batch

        ###--- Unpool + conv
        # convolve input feature maps with filters
        if self.border_mode == 'valid':
            conv_out = conv.conv2d(input=unpool_out,
                                   filters=self.W,
                                   filter_shape=self.filter_shape,
                                   image_shape=image_shape,
                                   border_mode='valid')
        elif self.border_mode == 'same':
            conv_out = conv.conv2d(input=unpool_out,
                                   filters=self.W,
                                   filter_shape=self.filter_shape,
                                   image_shape=image_shape,
                                   border_mode='full')
            padding_w = theano.shared((self.filter_shape[2] - 1) / 2)
            padding_h = theano.shared((self.filter_shape[3] - 1) / 2)
            conv_out = conv_out[:, :, padding_w:-padding_w,
                                padding_h:-padding_h]
        elif self.border_mode == 'full':
            conv_out = conv.conv2d(input=unpool_out,
                                   filters=self.W,
                                   filter_shape=self.filter_shape,
                                   image_shape=image_shape,
                                   border_mode='full')
        else:
            raise Exception('Unknown conv type')

        # downsample each feature map individually, using maxpooling

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')
        output = (lin_output
                  if self.activation is None else self.activation(lin_output))
        droppedOutput = nonlinearity.dropout(rng, output, p)
        return T.switch(T.neq(drop, 0), droppedOutput, output)
Exemplo n.º 12
0
 def output(self, dropout_active=False):
     X = self.embedded()
     out, _ = theano.scan(self.op.step,
                          sequences=[X],
                          outputs_info=[repeat(self.op.id, X.shape[1], axis=0)]
                      )
     return out[-1]
Exemplo n.º 13
0
    def step(time_idx,lstm_hidden):
        M_pad = repeat(P.memory_init.dimshuffle((0,'x',1)) , lstm_hidden.shape[1] , axis=1 )
        M_curr_temp = T.concatenate([M_pad , lstm_hidden[:time_idx,:,:]] , axis=0)
        M_curr      = M_curr_temp.transpose((1,0,2))
        input_curr  = lstm_hidden[time_idx,:,:]

        weight_prev = T.zeros([input_curr.shape[0] , time_idx+1])
        weight_inter = weight_prev

        for head in heads:
            weight_inter, att_w_inter, key = build_head_curr(
                weight_inter, M_curr , head, input_curr)

        weight_curr = weight_inter
        entropy_temp = -1*(weight_curr*T.log(weight_curr))
        entropy = T.sum(entropy_temp , axis=1)

        key_normalize = T.nnet.softmax(key)
        key_entropy_temp = -1*(key_normalize*T.log(key_normalize))
        key_entropy = T.sum(key_entropy_temp , axis=1)

        att_w_curr  = att_w_inter

        att_M_curr = att_w_curr.dimshuffle(0,'x',1)*M_curr
        read_curr = build_read(att_M_curr, weight_curr)
        output = controller(input_curr, read_curr)

        return output,entropy,key_entropy
Exemplo n.º 14
0
    def step(time_idx,lstm_hidden):
        M_pad = repeat(P.memory_init.dimshuffle((0,'x',1)) , lstm_hidden.shape[1] , axis=1 )
        M_curr_temp = T.concatenate([M_pad , lstm_hidden[:time_idx,:,:]] , axis=0)
        M_curr      = M_curr_temp.transpose((1,0,2))
        input_curr  = lstm_hidden[time_idx,:,:]

        weight_prev = T.zeros([input_curr.shape[0] , time_idx+1])
        weight_inter = weight_prev

        for head in heads:
            weight_inter, att_w_inter = build_head_curr(
                weight_inter, M_curr , head, input_curr)

        weight_curr = weight_inter
        pad_matrix = T.zeros((input_curr.shape[0],lstm_hidden.shape[0]-weight_curr.shape[1]),dtype='float32')
        weight_pad = T.concatenate([weight_curr,pad_matrix],axis=1)
        entropy_temp = -1*(weight_curr*T.log(weight_curr))
        entropy = T.sum(entropy_temp , axis=1)
        att_w_curr  = att_w_inter

        att_M_curr = att_w_curr.dimshuffle(0,'x',1)*M_curr
        read_curr = build_read(att_M_curr, weight_curr)
        output = controller(input_curr, read_curr)

        return output,entropy,weight_pad
Exemplo n.º 15
0
 def output(self, dropout_active=False):
     X = self.embedded()
     out, _ = theano.scan(
         self.op.step,
         sequences=[X],
         outputs_info=[repeat(self.op.id, X.shape[1], axis=0)])
     return out[-1]
Exemplo n.º 16
0
    def get_output_for(self, input, **kwargs):
        data, mask_max = input
        #return Textra.repeat(Textra.repeat(data, self.factor[0], axis=2), self.factor[1], axis=3) * mask_max
        window = np.zeros(self.factor, dtype=np.float32)
        window[0, 0] = 1
        mask_unpool = np.tile(window.reshape((1, ) + self.factor),
                              self.input_shapes[0][1:])
        mask_unpool = T.shape_padleft(mask_unpool, n_ones=1)

        rs = np.random.RandomState(1234)
        rng = theano.tensor.shared_randomstreams.RandomStreams(
            rs.randint(999999))
        mask_binomial = rng.binomial(n=1,
                                     p=self.noise,
                                     size=self.input_shapes[1][1:])
        mask_binomial = T.shape_padleft(T.cast(mask_binomial, dtype='float32'),
                                        n_ones=1)

        mask = mask_binomial * mask_unpool + (1 - mask_binomial) * mask_max
        return Textra.repeat(Textra.repeat(data, self.factor[0], axis=2),
                             self.factor[1],
                             axis=3) * mask
Exemplo n.º 17
0
    def test_infer_shape(self):
        for ndim in [1, 3]:
            x = T.TensorType(config.floatX, [False] * ndim)()
            shp = (np.arange(ndim) + 1) * 3
            a = np.random.random(shp).astype(config.floatX)

            for axis in self._possible_axis(ndim):
                for dtype in ["int8", "uint8", "uint64"]:
                    r_var = T.scalar(dtype=dtype)
                    r = np.asarray(3, dtype=dtype)
                    if dtype in self.numpy_unsupported_dtypes:
                        r_var = T.vector(dtype=dtype)
                        with pytest.raises(TypeError):
                            repeat(x, r_var)
                    else:
                        self._compile_and_check(
                            [x, r_var],
                            [RepeatOp(axis=axis)(x, r_var)],
                            [a, r],
                            self.op_class,
                        )

                        r_var = T.vector(dtype=dtype)
                        if axis is None:
                            r = np.random.randint(1, 6,
                                                  size=a.size).astype(dtype)
                        elif a.size > 0:
                            r = np.random.randint(
                                1, 6, size=a.shape[axis]).astype(dtype)
                        else:
                            r = np.random.randint(1, 6,
                                                  size=(10, )).astype(dtype)

                        self._compile_and_check(
                            [x, r_var],
                            [RepeatOp(axis=axis)(x, r_var)],
                            [a, r],
                            self.op_class,
                        )
Exemplo n.º 18
0
    def drop_output(self, input, drop=0, rng=None, p=0.5):
        ###--- Unpool

        if self.poolsize[0] == 1 and self.poolsize[1] == 1:
            unpool_out = input
        else:
            unpool_out = Textra.repeat(Textra.repeat(input, self.poolsize[0], axis = 2), self.poolsize[1], axis = 3) * self.mask

        image_shape = list(self.image_shape)
        if n_batch is not None:
            image_shape[0] = n_batch

        if self.border_mode == 'same':
            conv_out = dnn.dnn_conv(
                img=unpool_out,
                kerns=self.W,
                subsample=(1,1),
                border_mode=self.border,
                #conv_mode='cross'
            )
        else:
            raise Exception('Unknown conv type')
        
        if self.cnorm:
            print 'cnorm size', self.filter_shape[0]/8+1
            conv_out=ContrastCrossChannels.ContrastCrossChannels(input=conv_out, n=self.filter_shape[0]/8+1)

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')
        output= (
            lin_output if self.activation is None
            else self.activation(lin_output)
        )
        droppedOutput = nonlinearity.dropout(rng, output, p)
        return T.switch(T.neq(drop, 0), droppedOutput, output)
Exemplo n.º 19
0
def fawn_recurrent(
    inpt_mean, inpt_var, weights_mean, weights_var,
    f,
    initial_mean, initial_var):

    f_transfer = lookup(f, transfer_)
    def step(inpt_mean, inpt_var, him_m1, hiv_m1, hom_m1, hov_m1):
        wm, wv = weights_mean, weights_var

        pres_mean = T.dot(inpt_mean, wm)
        pres_var = (T.dot(inpt_mean ** 2, wv)
                    + T.dot(inpt_var, wm ** 2)
                    + T.dot(inpt_var, wv)
                    )

        post_mean, post_var = f_transfer(pres_mean, pres_var)
        return pres_mean, pres_var, post_mean, post_var


    if initial_mean.ndim == 1:
        initial_mean = repeat(
            initial_mean.dimshuffle('x', 0), inpt_mean.shape[1], axis=0)
    if initial_var.ndim == 1:
        initial_var = repeat(
            initial_var.dimshuffle('x', 0), inpt_mean.shape[1], axis=0)

    (hidden_in_mean_rec, hidden_in_var_rec, hidden_mean_rec, hidden_var_rec), _ = theano.scan(
        step,
        sequences=[inpt_mean, inpt_var],
        outputs_info=[T.zeros_like(inpt_mean[0]),
                      T.zeros_like(inpt_mean[0]),
                      initial_mean,
                      initial_var])

    return (hidden_in_mean_rec, hidden_in_var_rec,
            hidden_mean_rec, hidden_var_rec)
Exemplo n.º 20
0
 def output(self, dropout_active=False):
     X = self.l_in.output(dropout_active=dropout_active)
     if self.p_drop > 0. and dropout_active:
         X = dropout(X, self.p_drop)
     x_in = T.dot(X, self.w_in) + self.b_in
     out, _ = theano.scan(
         self.step,
         sequences=[x_in],
         outputs_info=[repeat(self.h0, x_in.shape[1], axis=0)],
         non_sequences=[self.w_rec],
         truncate_gradient=self.truncate_gradient)
     if self.seq_output:
         return out
     else:
         return out[-1]
Exemplo n.º 21
0
    def step(time_idx,lstm_hidden,input_hidden,weighted_mem):#lstm_hidden is used to generate weight
        M_pad = repeat(P.memory_init.dimshuffle((0,'x',1)) , lstm_hidden.shape[1] , axis=1 )
        weighted_M_pad = repeat(P.weighted_memory_init.dimshuffle((0,'x',1)) , lstm_hidden.shape[1] , axis=1 )

        M_curr_temp = T.concatenate([M_pad , lstm_hidden[:time_idx,:,:]] , axis=0)
        weighted_M_curr_temp = T.concatenate([weighted_M_pad , weighted_mem[:time_idx,:,:]] , axis=0)

        M_curr      = M_curr_temp.transpose((1,0,2))
        weighted_M_curr      = weighted_M_curr_temp.transpose((1,0,2))
        input_curr  = input_hidden[time_idx,:,:]

        weight_prev = T.zeros([input_curr.shape[0] , time_idx+1])
        weight_inter = weight_prev

        for head in heads:
            weight_inter = build_head_curr(
                weight_inter, M_curr , head, input_curr)

        weight_curr = weight_inter

        read_curr = build_read(weighted_M_curr, weight_curr)
        output = controller(input_curr, read_curr)

        return output
Exemplo n.º 22
0
 def output(self, dropout_active=False):
     X = self.l_in.output(dropout_active=dropout_active)
     if self.p_drop > 0. and dropout_active:
         X = dropout(X, self.p_drop)
     x_in = T.dot(X, self.w_in) + self.b_in
     out, _ = theano.scan(self.step,
         sequences=[x_in],
         outputs_info=[repeat(self.h0, x_in.shape[1], axis=0)],
         non_sequences=[self.w_rec],
         truncate_gradient=self.truncate_gradient
     )
     if self.seq_output:
         return out
     else:
         return out[-1]
Exemplo n.º 23
0
def gen(Z, w, w1, w2, w3, w4):
    h0 = ReLU(batchnorm(T.dot(Z, w)))
    h1_input = h0.reshape((h0.shape[0], nkerns[3], 4, 4))
    h1 = ReLU(batchnorm(conv2d(h1_input, w1, border_mode='half')))
    h2_input = repeat(repeat(h1, 2, 2), 2, 3)
    h2 = ReLU(batchnorm(conv2d(h2_input, w2, border_mode='half')))
    h3_input = repeat(repeat(h2, 2, 2), 2, 3)
    h3 = ReLU(batchnorm(conv2d(h3_input, w3, border_mode='half')))
    h4_input = repeat(repeat(h3, 2, 2), 2, 3)
    h4 = T.tanh(conv2d(h4_input, w4, border_mode='half'))
    return h4
Exemplo n.º 24
0
 def output(self, dropout_active=False):
     X = self.l_in.output(dropout_active=dropout_active)
     if self.p_drop > 0. and dropout_active:
         X = dropout(X, self.p_drop)
     x_z = T.dot(X, self.w_z) + self.b_z
     x_r = T.dot(X, self.w_r) + self.b_r
     x_h = T.dot(X, self.w_h) + self.b_h
     out, _ = theano.scan(
         self.step,
         sequences=[x_z, x_r, x_h],
         outputs_info=[repeat(self.h0, x_h.shape[1], axis=0)],
         non_sequences=[self.u_z, self.u_r, self.u_h],
         truncate_gradient=self.truncate_gradient)
     if self.seq_output:
         return out
     else:
         return out[-1]
Exemplo n.º 25
0
 def output(self, dropout_active=False):
     X = self.l_in.output(dropout_active=dropout_active)
     if self.p_drop > 0. and dropout_active:
         X = dropout(X, self.p_drop)
     x_z = T.dot(X, self.w_z) + self.b_z
     x_r = T.dot(X, self.w_r) + self.b_r
     x_h = T.dot(X, self.w_h) + self.b_h
     out, _ = theano.scan(self.step, 
         sequences=[x_z, x_r, x_h], 
         outputs_info=[repeat(self.h0, x_h.shape[1], axis=0)], 
         non_sequences=[self.u_z, self.u_r, self.u_h],
         truncate_gradient=self.truncate_gradient
     )
     if self.seq_output:
         return out
     else:
         return out[-1]  
Exemplo n.º 26
0
Arquivo: rnn.py Projeto: makarl/breze
def recurrent_layer(hidden_inpt, hidden_to_hidden, f, initial_hidden):
    def step(x, hi_tm1):
        h_tm1 = f(hi_tm1)
        hi = T.dot(h_tm1, hidden_to_hidden) + x
        return hi

    # Modify the initial hidden state to obtain several copies of
    # it, one per sample.
    initial_hidden_b = repeat(initial_hidden, hidden_inpt.shape[1], axis=0)
    initial_hidden_b = initial_hidden_b.reshape(
        (hidden_inpt.shape[1], hidden_inpt.shape[2]))

    hidden_in_rec, _ = theano.scan(step,
                                   sequences=hidden_inpt,
                                   outputs_info=[initial_hidden_b])

    hidden_rec = f(hidden_in_rec)

    return hidden_in_rec, hidden_rec
Exemplo n.º 27
0
Arquivo: rnn.py Projeto: Wiebke/breze
def recurrent_layer_stateful(hidden_inpt, hidden_to_hidden, f, initial_hidden):
    def step(x, s_m1, hi_tm1, h_tm1):
        hi = T.dot(h_tm1, hidden_to_hidden)
        hi += x
        s, h = f(s_m1, hi)
        return s, hi, h

    initial_hidden_b = repeat(
        initial_hidden.dimshuffle('x', 0), hidden_inpt.shape[1], axis=0)

    (states, hidden_in_rec, hidden_rec), _ = theano.scan(
        step,
        sequences=hidden_inpt,
        outputs_info=[
            T.zeros_like(initial_hidden_b),
            T.zeros_like(hidden_inpt[0]),
            initial_hidden_b])

    return states, hidden_in_rec, hidden_rec
Exemplo n.º 28
0
Arquivo: rnn.py Projeto: ddofer/breze
def recurrent_layer(hidden_inpt, hidden_to_hidden, f, initial_hidden):
    def step(x, hi_tm1):
        h_tm1 = f(hi_tm1)
        hi = T.dot(h_tm1, hidden_to_hidden) + x
        return hi

    # Modify the initial hidden state to obtain several copies of
    # it, one per sample.
    initial_hidden_b = repeat(initial_hidden, hidden_inpt.shape[1], axis=0)
    initial_hidden_b = initial_hidden_b.reshape(
        (hidden_inpt.shape[1], hidden_inpt.shape[2]))

    hidden_in_rec, _ = theano.scan(
        step,
        sequences=hidden_inpt,
        outputs_info=[initial_hidden_b])

    hidden_rec = f(hidden_in_rec)

    return hidden_in_rec, hidden_rec
Exemplo n.º 29
0
def recurrent_layer_stateful(hidden_inpt, hidden_to_hidden, f, initial_hidden):
    def step(x, s_m1, hi_tm1, h_tm1):
        hi = T.dot(h_tm1, hidden_to_hidden)
        hi += x
        s, h = f(s_m1, hi)
        return s, hi, h

    initial_hidden_b = repeat(initial_hidden.dimshuffle('x', 0),
                              hidden_inpt.shape[1],
                              axis=0)

    (states, hidden_in_rec,
     hidden_rec), _ = theano.scan(step,
                                  sequences=hidden_inpt,
                                  outputs_info=[
                                      T.zeros_like(initial_hidden_b),
                                      T.zeros_like(hidden_inpt[0]),
                                      initial_hidden_b
                                  ])

    return states, hidden_in_rec, hidden_rec
Exemplo n.º 30
0
    def step(time_idx,lstm_hidden):
        M_pad = repeat(P.memory_init.dimshuffle((0,'x',1)) , lstm_hidden.shape[1] , axis=1 )
        M_curr_temp = T.concatenate([M_pad , lstm_hidden[:time_idx,:,:]] , axis=0)
        M_curr      = M_curr_temp.transpose((1,0,2))
        input_curr  = lstm_hidden[time_idx,:,:]

        weight_prev = T.zeros([input_curr.shape[0] , time_idx+1])
        weight_inter = weight_prev

        for head in heads:
            weight_inter, att_w_inter = build_head_curr(
                weight_inter, M_curr , head, input_curr)

        weight_curr = weight_inter
        att_w_curr  = att_w_inter

        att_M_curr = att_w_curr.dimshuffle(0,'x',1)*M_curr
        read_curr = build_read(att_M_curr, weight_curr)
        output = controller(input_curr, read_curr)

        return output
Exemplo n.º 31
0
 def output(self, pool=True):
     X = self.input
     if self.backward:
         # flip along second axis
         X = X[:, ::-1]
         self.mask = self.mask[:, ::-1]
     # shuffle dimension so scan over axis 1
     X = X.dimshuffle(1, 0, 2)
     if self.mask is not None:
         mask = self.mask.dimshuffle(1, 0)
         seq_input = [mask, X]
         step = self.step_masked
     else:
         seq_input = [X]
         step = self.step
     out, _ = theano.scan(
         step,
         sequences=seq_input,
         outputs_info=[repeat(self.h0, X.shape[1], axis=0)],
         non_sequences=[self.u_z, self.u_r, self.u_h],
         truncate_gradient=self.truncate_gradient
     )
     # shuffle dimension back
     out = out.dimshuffle(1, 0, 2)
     if pool:
         if self.mask is not None:
             out = (out * self.mask[:, :, None]).sum(axis=1)
             out = out / self.mask.sum(axis=1)[:, None]
             return out
         return T.mean(out, axis=1)
     elif self.seq_output:
         if self.mask is not None:
             return out * self.mask[:, :, None]
         else:
             return out
     else:
         return out[-1]
Exemplo n.º 32
0
    def __init__(self,
                 rng,
                 input,
                 filter_shape,
                 image_shape,
                 poolsize=(2, 2),
                 border_mode='same',
                 activation=None,
                 mask=None):
        """
        Allocate a LeNetConvPoolLayer with shared variable internal parameters.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height, filter width)

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows, #cols)
        """

        assert image_shape[1] == filter_shape[1]
        self.input = input

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        fan_in = numpy.prod(filter_shape[1:])
        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" /
        #   pooling size

        ###--- Change / to *

        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) *
                   numpy.prod(poolsize))
        # initialize weights with random weights
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound,
                                                         high=W_bound,
                                                         size=filter_shape),
                                             dtype=theano.config.floatX),
                               borrow=True)

        # the bias is a 1D tensor -- one bias per output feature map
        b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        ###--- Unpool

        if poolsize[0] == 1 and poolsize[1] == 1:
            self.unpool_out = input
        else:
            if mask is None:
                window = np.zeros((poolsize), dtype=np.float32)
                window[0, 0] = 1
                mask = theano.shared(
                    np.tile(window.reshape([1, 1] + poolsize), input_shape))

            self.unpool_out = Textra.repeat(
                Textra.repeat(input, poolsize[0],
                              axis=2), poolsize[1], axis=3) * mask

        relu_output = (self.unpool_out
                       if activation is None else activation(self.unpool_out))

        ###--- Unpool + conv
        # convolve input feature maps with filters
        if border_mode == 'valid':
            conv_out = conv.conv2d(input=relu_output,
                                   filters=self.W,
                                   filter_shape=filter_shape,
                                   image_shape=image_shape,
                                   border_mode='valid')
        elif border_mode == 'same':
            conv_out = conv.conv2d(input=relu_output,
                                   filters=self.W,
                                   filter_shape=filter_shape,
                                   image_shape=image_shape,
                                   border_mode='full')
            padding_w = theano.shared((filter_shape[2] - 1) / 2)
            padding_h = theano.shared((filter_shape[3] - 1) / 2)
            conv_out = conv_out[:, :, padding_w:-padding_w,
                                padding_h:-padding_h]
        elif border_mode == 'full':
            conv_out = conv.conv2d(input=relu_output,
                                   filters=self.W,
                                   filter_shape=filter_shape,
                                   image_shape=image_shape,
                                   border_mode='full')
        else:
            raise Exception('Unknown conv type')

        # downsample each feature map individually, using maxpooling

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        self.output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')

        # store parameters of this layer
        self.params = [self.W, self.b]
Exemplo n.º 33
0
def recurrent_layer(in_mean, in_var, weights, f, initial_hidden_mean,
                    initial_hidden_var, p_dropout):
    """Return a theano variable representing a recurrent layer.

    Parameters
    ----------

    in_mean : Theano variable
        Sequence tensor of shape ``(t, n ,d)``. Represents the mean of the
        input to the layer.

    in_var : Theano variable
        Sequence tensor. Represents the variance of the input to the layer.
        Either (a) same shape as the mean or (b) scalar.

    weights : Theano variable
        Theano matrix of shape ``(d, d)``. Represents the recurrent weight
        matrix the hiddens are right multiplied with.

    f : function
        Function that takes a theano variable and returns a theano variable of
        the same shape. Meant as transfer function of the layer.

    initial_hidden : Theano variable
        Theano vector of size ``d``, representing the initial hidden state.

    p_dropout : Theano variable
        Scalar representing the probability that unit is dropped out.


    Returns
    -------

    hidden_in_mean_rec : Theano variable
        Theano sequence tensor representing the mean of the hidden activations
        before the application of ``f``.

    hidden_in_var_rec : Theano variable
        Theano sequence tensor representing the varianceof the hidden
        activations before the application of ``f``.

    hidden_mean_rec : Theano variable
        Theano sequence tensor representing the mean of the hidden activations
        after the application of ``f``.

    hidden_var_rec : Theano variable
        Theano sequence tensor representing the varianceof the hidden
        activations after the application of ``f``.
    """
    def step(inpt_mean, inpt_var, him_m1, hiv_m1, hom_m1, hov_m1):
        hom = T.dot(hom_m1, weights) * p_dropout + inpt_mean

        p_keep = 1 - p_dropout
        dropout_var = p_dropout * (1 - p_dropout)

        element_var = (hov_m1 * dropout_var + (hom_m1**2) * dropout_var +
                       hov_m1 * p_keep**2)

        hov = T.dot(element_var, weights**2) + inpt_var

        fhom, fhov = f(hom, hov)

        return hom, hov, fhom, fhov

    if initial_hidden_mean.ndim == 1:
        initial_hidden_mean = repeat(initial_hidden_mean.dimshuffle('x', 0),
                                     in_mean.shape[1],
                                     axis=0)
    if initial_hidden_var.ndim == 1:
        initial_hidden_var = repeat(initial_hidden_var.dimshuffle('x', 0),
                                    in_mean.shape[1],
                                    axis=0)

    (hidden_in_mean_rec, hidden_in_var_rec, hidden_mean_rec,
     hidden_var_rec), _ = theano.scan(step,
                                      sequences=[in_mean, in_var],
                                      outputs_info=[
                                          T.zeros_like(in_mean[0]),
                                          T.zeros_like(in_mean[0]),
                                          initial_hidden_mean,
                                          initial_hidden_var
                                      ])

    #hidden_mean_rec, hidden_var_rec = f(
    #    hidden_in_mean_rec, hidden_in_var_rec)

    return (hidden_in_mean_rec, hidden_in_var_rec, hidden_mean_rec,
            hidden_var_rec)
Exemplo n.º 34
0
    def __init__(self,
                 n_state,
                 n_action,
                 scale_action=1.0,
                 mean_learning_rate=0.01,
                 sigma_learning_rate=0.001,
                 gamma=0.99):

        self.n_state = n_state
        self.n_action = n_action
        self.scale_action = scale_action
        self.mean_learning_rate = mean_learning_rate
        self.sigma_learning_rate = sigma_learning_rate
        self.gamma = gamma

        self.episode_state_history = []
        self.episode_action_history = []
        self.episode_reward_history = []

        self.all_states = []
        self.all_actions = []
        self.all_rewards = []

        def action_nonlinearity(x):
            return self.scale_action * tanh(x)

        # Neural Network for the policy

        def policy_network(state):
            input_state = InputLayer(input_var=state, shape=(None, n_state))

            dense = DenseLayer(input_state,
                               num_units=n_state,
                               nonlinearity=tanh,
                               W=Normal(0.1, 0.0),
                               b=Constant(0.0))

            dense = DenseLayer(dense,
                               num_units=n_state,
                               nonlinearity=tanh,
                               W=Normal(0.1, 0.0),
                               b=Constant(0.0))

            mean = DenseLayer(dense,
                              num_units=n_action,
                              nonlinearity=action_nonlinearity,
                              W=Normal(0.1, 0.0),
                              b=Constant(0.0))

            sigma = DenseLayer(dense,
                               num_units=n_action,
                               nonlinearity=T.exp,
                               W=Normal(0.1, 0.0),
                               b=Constant(0.0))

            return mean, sigma

        # Defining the system variables (state, action, reward)

        self.X_state = T.fmatrix()
        self.X_action = T.fmatrix()
        self.X_reward = T.fmatrix()

        # Policy and distribution functions

        self.policy_mean_, self.policy_sigma_ = policy_network(self.X_state)
        self.policy_mean = get_output(self.policy_mean_)
        self.policy_sigma = get_output(self.policy_sigma_)

        self.action_dist = theano.function(
            inputs=[self.X_state],
            outputs=[self.policy_mean, self.policy_sigma],
            allow_input_downcast=True)

        # log policy grads

        # d_f / d_u     = (action - mu) / sigma ^2
        # d_f / d_sigma = - 1 / sigma + (action - mu) ^ 2 / sigma ^3

        # E[d_J / d_u]     = (d_f / d_u) * R
        # E[d_J / d_sigma] =  (d_f / d_sigma) * R

        self.policy = (-2 * T.log(self.policy_sigma) +
                       (self.X_action - self.policy_mean)**2 *
                       self.policy_sigma**-2) * repeat(
                           self.X_reward, n_action, axis=1)
        self.policy = self.policy.mean()

        # Parameters to optimize

        self.mean_params = get_all_params(self.policy_mean_)
        self.sigma_params = get_all_params(self.policy_sigma_)

        # Gradients w.r.t. Parameters

        self.mean_grads = T.grad(self.policy, self.mean_params)
        self.sigma_grads = T.grad(self.policy, self.sigma_params)

        # Update equations

        self.mean_updates = adam(self.mean_grads,
                                 self.mean_params,
                                 learning_rate=self.mean_learning_rate)

        self.sigma_updates = adam(self.sigma_grads,
                                  self.sigma_params,
                                  learning_rate=self.sigma_learning_rate)

        self.update_mean_network = theano.function(
            inputs=[self.X_state, self.X_action, self.X_reward],
            outputs=None,
            updates=self.mean_updates,
            allow_input_downcast=True)

        self.update_sigma_network = theano.function(
            inputs=[self.X_state, self.X_action, self.X_reward],
            outputs=None,
            updates=self.sigma_updates,
            allow_input_downcast=True)
Exemplo n.º 35
0
def make_train(image_size , word_size , first_hidden_size , proj_size , reg_lambda) :
    #initialize model
    P = Parameters()
    image_projecting = image_project.build(P, image_size, proj_size)
    batched_triplet_encoding , vector_triplet_encoding = triplet_encoding.build(P , word_size , first_hidden_size , proj_size)   

    image_vector = T.vector()

    #training
    correct_triplet =  [T.vector(dtype='float32') , T.vector(dtype='float32') , T.vector(dtype='float32')] #[E,R,E]
    negative_triplet = [T.matrix(dtype='float32') , T.matrix(dtype='float32') , T.matrix(dtype='float32')]

    image_projection_vector = image_projecting(image_vector)
    image_projection_matrix = repeat(image_projection_vector.dimshuffle(('x',0)) , negative_triplet[0].shape[0] , axis=0)
    correct_triplet_encoding_vector = vector_triplet_encoding(correct_triplet[0] , correct_triplet[1] , correct_triplet[2])
    negative_triplet_encoding_matrix = batched_triplet_encoding(negative_triplet[0] , negative_triplet[1] , negative_triplet[2])

    correct_cross_dot_scalar = T.dot(image_projection_vector , correct_triplet_encoding_vector)
    negative_cross_dot_vector = T.batched_dot(image_projection_matrix , negative_triplet_encoding_matrix)

    #margin cost
    zero_cost = T.zeros_like(negative_cross_dot_vector)
    margin_cost = 1 - correct_cross_dot_scalar + negative_cross_dot_vector
    cost_vector = T.switch(T.gt(zero_cost , margin_cost) , zero_cost , margin_cost)

    #regulizar cost
    params = P.values()
    l2 = T.sum(0)
    for p in params:
        l2 = l2 + (p ** 2).sum()        
    cost = T.sum(cost_vector)/T.shape(negative_triplet[0])[0] + reg_lambda * l2 #assume word vector has been put into P #unsolved
    grads = [T.clip(g, -100, 100) for g in T.grad(cost, wrt=params)]

    lr = T.scalar(name='learning rate',dtype='float32')
    train = theano.function(
        inputs=[image_vector, correct_triplet[0], correct_triplet[1], correct_triplet[2], negative_triplet[0], negative_triplet[1], negative_triplet[2], lr],
        outputs=cost,
        updates=updates.rmsprop(params, grads, learning_rate=lr),
        allow_input_downcast=True
    )

    #valid
    valid = theano.function(
        inputs=[image_vector, correct_triplet[0], correct_triplet[1], correct_triplet[2], negative_triplet[0], negative_triplet[1], negative_triplet[2]],
        outputs=cost,
        allow_input_downcast=True

    )
    #visualize
    image_project_fun = theano.function(
        inputs=[image_vector],
        outputs=image_projection_vector,
        allow_input_downcast=True
    )
    #testing
    all_triplet = [T.matrix(dtype='float32') , T.matrix(dtype='float32') , T.matrix(dtype='float32')]
    image_projection_matrix_test = repeat(image_projection_vector.dimshuffle(('x',0)) , all_triplet[0].shape[0] , axis=0)
    all_triplet_encoding_matrix = batched_triplet_encoding(all_triplet[0] , all_triplet[1] , all_triplet[2])
    all_cross_dot_vector = T.batched_dot(image_projection_matrix_test , all_triplet_encoding_matrix)

    test = theano.function(
        inputs=[image_vector, all_triplet[0], all_triplet[1], all_triplet[2]],
        outputs=all_cross_dot_vector,
        allow_input_downcast=True

    )

    return P , train , valid , image_project_fun , test
Exemplo n.º 36
0
 def repeat(self, repeats, axis=None):
     """See `theano.tensor.repeat`"""
     from theano.tensor.extra_ops import repeat
     return repeat(self, repeats, axis)
Exemplo n.º 37
0
 def get_output_for(self, input, **kwargs):
     return Textra.repeat(Textra.repeat(input, self.factor[0], axis=2),
                          self.factor[1],
                          axis=3)
Exemplo n.º 38
0
def get_timit_waveform():
    
    #load training data wavefiles
    train_filenames = []
    with open('train.list') as f:
        for line in f:
            train_filenames.append(line.rstrip('\n'))

    file_pre = '/vega/stats/users/sl3368/TIMIT_process/TimitWav/train/'
    train_audio = []
    for filename in train_filenames:
        f,w = wavfile.read(file_pre+filename)
        train_audio.append(w)


    #load training data phoneme labels 
    phn = h5py.File('TIMIT_TRAIN.mat')
    phn_data = phn['data']

    #initializing encoder
    enc = OneHotEncoder(n_values=41,dtype=numpy.int16,sparse=False)

    train_phn = []
    for i in range(len(train_audio)):
        ref = phn_data[i][0]
        labels = phn[ref]
	phonemes = labels[2]
	phonemes = numpy.reshape(phonemes,(len(phonemes),1))
	
	
	#need to encode and repeat for each sample
	encoded_phonemes = enc.fit_transform(phonemes)

        #repeat for the sampling rate 1600 this case!!
        rep_enc_phonemes = repeat(encoded_phonemes,160,axis=0).eval()

	train_phn.append(rep_enc_phonemes)

    print 'training done...'
    #load test data wavefiles
    test_filenames = []
    with open('test.list') as f:
        for line in f:
            test_filenames.append(line.rstrip('\n'))

    file_pre = '/vega/stats/users/sl3368/TIMIT_process/TimitWav/test/'
    test_audio = []
    for filename in test_filenames:
        f,w = wavfile.read(file_pre+filename)
        test_audio.append(w)


    #load testing data phoneme labels 
    phn = h5py.File('TIMIT_TEST.mat')
    phn_data = phn['data']

    #initializing encoder
    enc = OneHotEncoder(n_values=41,dtype=numpy.int16,sparse=False)

    test_phn = []
    for i in range(len(test_audio)):
        ref = phn_data[i][0]
        labels = phn[ref]
	phonemes = labels[2]
	phonemes = numpy.reshape(phonemes,(len(phonemes),1))

	#need to encode and repeat for each sample
	encoded_phonemes = enc.fit_transform(phonemes)

        #repeat for the sampling rate 16000 this case!!
        rep_enc_phonemes = repeat(encoded_phonemes,160,axis=0).eval()

	test_phn.append(rep_enc_phonemes)

    return train_audio,train_phn,test_audio,test_phn
Exemplo n.º 39
0
def get_timit_specs_images(window_size):

    #get training spectrograms
    f = h5py.File('/vega/stats/users/sl3368/Data_LC/timit/train/timit_stim_1.mat')
    train_stim = numpy.transpose(f['stimulus_zscore'])
   
    #need to construct windows
    train_stim_windows = numpy.zeros((train_stim.shape[0]/5000,5000-window_size,window_size,60))
    half = window_size/2
    for j in range(len(train_stim)/5000):
        for i in range(j*5000,(j+1)*5000-window_size):
            temp_window = train_stim[i:i+window_size]
            train_stim_windows[j][i] = temp_window
            #single_window = numpy.reshape(temp_window,(1,window_size*train_stim.shape[1]))
 
    train_filenames = []
    with open('train.list') as f:
        for line in f:
            train_filenames.append(line.rstrip('\n'))

    #load training data phoneme labels 
    phn = h5py.File('TIMIT_TRAIN.mat')
    phn_data = phn['data']

    #initializing encoder
    enc = OneHotEncoder(n_values=41,dtype=numpy.int16,sparse=False)

    train_phn = []
    for i in range(len(train_filenames)):
        ref = phn_data[i][0]
        labels = phn[ref]
	phonemes = labels[2]
	phonemes = numpy.reshape(phonemes,(len(phonemes),1))

	#need to encode and repeat for each sample
	encoded_phonemes = enc.fit_transform(phonemes)

        #repeat for the sampling rate 10 this case!!
        rep_enc_phonemes = repeat(encoded_phonemes,10,axis=0).eval()

	train_phn.append(rep_enc_phonemes)

    train_phn = train_phn[half:len(train_phn)-half]
    
    #get testing spectrograms
    f = h5py.File('/vega/stats/users/sl3368/Data_LC/timit/test/timit_stim_1.mat')
    test_stim = numpy.transpose(f['stimulus_zscore'])
    
    #need to construct windows
    test_stim_windows = numpy.zeros((test_stim.shape[0]/5000,5000-window_size,window_size,60))
    half = window_size/2
    for j in range(len(test_stim)/5000):
        for i in range(j*5000,(j+1)*5000-window_size):
            temp_window = test_stim[i:i+window_size]
            test_stim_windows[j][i] = temp_window
            #single_window = numpy.reshape(temp_window,(1,window_size*train_stim.shape[1]))

    #load test data wavefiles
    test_filenames = []
    with open('test.list') as f:
        for line in f:
            test_filenames.append(line.rstrip('\n'))

    #load testing data phoneme labels 
    phn = h5py.File('TIMIT_TEST.mat')
    phn_data = phn['data']

    #initializing encoder
    enc = OneHotEncoder(n_values=41,dtype=numpy.int16,sparse=False)

    test_phn = []
    for i in range(len(test_filenames)):
        ref = phn_data[i][0]
        labels = phn[ref]
	phonemes = labels[2]
	phonemes = numpy.reshape(phonemes,(len(phonemes),1))

	#need to encode and repeat for each sample
	encoded_phonemes = enc.fit_transform(phonemes)

        #repeat for the sampling rate 10 this case!!
        rep_enc_phonemes = repeat(encoded_phonemes,10,axis=0).eval()

	test_phn.append(rep_enc_phonemes)


    test_phn = test_phn[half:len(test_phn)-half]

    return train_stim_windows,train_phn,test_stim_windows,test_phn
Exemplo n.º 40
0
def get_timit_specs():
    
    #get training spectrograms
    f = h5py.File('/vega/stats/users/sl3368/Data_LC/timit/train/timit_stim_1.mat')
    train_stim = numpy.transpose(f['stimulus_zscore'])
    
    train_filenames = []
    with open('train.list') as f:
        for line in f:
            train_filenames.append(line.rstrip('\n'))

    #load training data phoneme labels 
    phn = h5py.File('TIMIT_TRAIN.mat')
    phn_data = phn['data']

    #initializing encoder
    enc = OneHotEncoder(n_values=41,dtype=numpy.int16,sparse=False)

    train_phn = []
    for i in range(len(train_filenames)):
        ref = phn_data[i][0]
        labels = phn[ref]
	phonemes = labels[2]
	phonemes = numpy.reshape(phonemes,(len(phonemes),1))

	#need to encode and repeat for each sample
	encoded_phonemes = enc.fit_transform(phonemes)

        #repeat for the sampling rate 10 this case!!
        rep_enc_phonemes = repeat(encoded_phonemes,10,axis=0).eval()

	train_phn.append(rep_enc_phonemes)

    
    #get testing spectrograms
    f = h5py.File('/vega/stats/users/sl3368/Data_LC/timit/test/timit_stim_1.mat')
    test_stim = numpy.transpose(f['stimulus_zscore'])
    
    #load test data wavefiles
    test_filenames = []
    with open('test.list') as f:
        for line in f:
            test_filenames.append(line.rstrip('\n'))

    #load testing data phoneme labels 
    phn = h5py.File('TIMIT_TEST.mat')
    phn_data = phn['data']

    #initializing encoder
    enc = OneHotEncoder(n_values=41,dtype=numpy.int16,sparse=False)

    test_phn = []
    for i in range(len(test_filenames)):
        ref = phn_data[i][0]
        labels = phn[ref]
	phonemes = labels[2]
	phonemes = numpy.reshape(phonemes,(len(phonemes),1))

	#need to encode and repeat for each sample
	encoded_phonemes = enc.fit_transform(phonemes)

        #repeat for the sampling rate 10 this case!!
        rep_enc_phonemes = repeat(encoded_phonemes,10,axis=0).eval()

	test_phn.append(rep_enc_phonemes)

    return train_stim,train_phn,test_stim,test_phn
Exemplo n.º 41
0
def make_train(image_size , word_size , first_hidden_size , proj_size , reg_lambda) :
    #initialize model
    P = Parameters()
    image_projecting = image_project.build(P, image_size, proj_size)
    batched_triplet_encoding , vector_triplet_encoding = triplet_encoding.build(P , word_size , first_hidden_size , proj_size)   

    image_vector = T.vector()

    #training
    correct_triplet =  [T.vector(dtype='float32') , T.vector(dtype='float32') , T.vector(dtype='float32')] #[E,R,E]
    negative_triplet = [T.matrix(dtype='float32') , T.matrix(dtype='float32') , T.matrix(dtype='float32')]

    image_projection_vector = image_projecting(image_vector)
    image_projection_matrix = repeat(image_projection_vector.dimshuffle(('x',0)) , negative_triplet[0].shape[0] , axis=0)
    correct_triplet_encoding_vector = vector_triplet_encoding(correct_triplet[0] , correct_triplet[1] , correct_triplet[2])
    negative_triplet_encoding_matrix = batched_triplet_encoding(negative_triplet[0] , negative_triplet[1] , negative_triplet[2])

    correct_cross_dot_scalar = T.dot(image_projection_vector , correct_triplet_encoding_vector)
    negative_cross_dot_vector = T.batched_dot(image_projection_matrix , negative_triplet_encoding_matrix)

    #margin cost
    zero_cost = T.zeros_like(negative_cross_dot_vector)
    margin_cost = 1 - correct_cross_dot_scalar + negative_cross_dot_vector
    cost_vector = T.switch(T.gt(zero_cost , margin_cost) , zero_cost , margin_cost)

    #regulizar cost
    params = P.values()
    l2 = T.sum(0)
    for p in params:
        l2 = l2 + (p ** 2).sum()        
    cost = T.sum(cost_vector)/T.shape(negative_triplet[0])[0] + reg_lambda * l2 #assume word vector has been put into P #unsolved
    grads = [T.clip(g, -100, 100) for g in T.grad(cost, wrt=params)]

    lr = T.scalar(name='learning rate',dtype='float32')
    train = theano.function(
        inputs=[image_vector, correct_triplet[0], correct_triplet[1], correct_triplet[2], negative_triplet[0], negative_triplet[1], negative_triplet[2], lr],
        outputs=cost,
        updates=updates.rmsprop(params, grads, learning_rate=lr),
        allow_input_downcast=True
    )

    #valid
    valid = theano.function(
        inputs=[image_vector, correct_triplet[0], correct_triplet[1], correct_triplet[2], negative_triplet[0], negative_triplet[1], negative_triplet[2]],
        outputs=cost,
        allow_input_downcast=True

    )
    #testing
    all_triplet = [T.matrix(dtype='float32') , T.matrix(dtype='float32') , T.matrix(dtype='float32')]
    image_projection_matrix_test = repeat(image_projection_vector.dimshuffle(('x',0)) , all_triplet[0].shape[0] , axis=0)
    all_triplet_encoding_matrix = batched_triplet_encoding(all_triplet[0] , all_triplet[1] , all_triplet[2])
    all_cross_dot_vector = T.batched_dot(image_projection_matrix_test , all_triplet_encoding_matrix)

    test = theano.function(
        inputs=[image_vector, all_triplet[0], all_triplet[1], all_triplet[2]],
        outputs=all_cross_dot_vector,
        allow_input_downcast=True

    )

#default
    P_default = Parameters()
    P_default['left']     = 2 * (np.random.rand(word_size) - 0.5)
    P_default['right']    = 2 * (np.random.rand(word_size) - 0.5)
    P_default['relation'] = 2 * (np.random.rand(word_size) - 0.5)

    correct_triplet_d =  [T.vector(dtype='float32') , T.vector(dtype='float32') , T.vector(dtype='float32')] #[E,R,E]
    negative_triplet_d = [T.matrix(dtype='float32') , T.matrix(dtype='float32') , T.matrix(dtype='float32')]    

    correct_triplet_d_train = [correct_triplet_d,correct_triplet_d,correct_triplet_d]
    negative_triplet_d_train = [negative_triplet_d,negative_triplet_d,negative_triplet_d]

    cost = 0
    for i in range(3) :
        if i == 0 :
            correct_triplet_d_train[0]  = [correct_triplet_d[0],P_default['relation'],P_default['right']]
            negative_triplet_d_train[0] = [negative_triplet_d[0],repeat(P_default['relation'].dimshuffle(('x',0)),negative_triplet_d[0].shape[0] , axis=0),repeat(P_default['right'].dimshuffle(('x',0)),negative_triplet_d[0].shape[0] , axis=0)]
        elif i == 1 :
            correct_triplet_d_train[1]  = [P_default['left'],correct_triplet_d[1],P_default['right']]
            negative_triplet_d_train[1] = [repeat(P_default['left'].dimshuffle(('x',0)),negative_triplet_d[1].shape[0] , axis=0),negative_triplet_d[1],repeat(P_default['right'].dimshuffle(('x',0)),negative_triplet_d[1].shape[0] , axis=0)]
        elif i == 2 :
            correct_triplet_d_train[2]  = [P_default['left'],P_default['relation'],correct_triplet_d[2]]
            negative_triplet_d_train[2] = [repeat(P_default['left'].dimshuffle(('x',0)),negative_triplet_d[2].shape[0] , axis=0),repeat(P_default['relation'].dimshuffle(('x',0)),negative_triplet_d[2].shape[0] , axis=0),negative_triplet_d[2]]

        image_projection_matrix_d = repeat(image_projection_vector.dimshuffle(('x',0)) , negative_triplet_d[i].shape[0] , axis=0)
        correct_triplet_encoding_vector_d = vector_triplet_encoding(correct_triplet_d_train[i][0] , correct_triplet_d_train[i][1] , correct_triplet_d_train[i][2])
        negative_triplet_encoding_matrix_d = batched_triplet_encoding(negative_triplet_d_train[i][0] , negative_triplet_d_train[i][1] , negative_triplet_d_train[i][2])

        correct_cross_dot_scalar_d = T.dot(image_projection_vector , correct_triplet_encoding_vector_d)
        negative_cross_dot_vector_d = T.batched_dot(image_projection_matrix_d , negative_triplet_encoding_matrix_d)

        #margin cost
        zero_cost_d = T.zeros_like(negative_cross_dot_vector_d)
        margin_cost_d = 1 - correct_cross_dot_scalar_d + negative_cross_dot_vector_d
        cost_vector_d = T.switch(T.gt(zero_cost_d , margin_cost_d) , zero_cost_d , margin_cost_d)        

        cost = cost + T.sum(cost_vector_d)/T.shape(negative_triplet[i])[0]

    params_d = P_default.values()
    l2 = T.sum(0)
    for p in params_d:
        l2 = l2 + (p ** 2).sum()
    cost = cost + 0.01*l2

    grads = [T.clip(g, -100, 100) for g in T.grad(cost, wrt=params_d)]

    train_default = theano.function(
        inputs=[image_vector, correct_triplet_d[0], correct_triplet_d[1], correct_triplet_d[2], negative_triplet_d[0], negative_triplet_d[1], negative_triplet_d[2], lr],
        outputs=cost,
        updates=updates.rmsprop(params_d, grads, learning_rate=lr),
        allow_input_downcast=True
    )

    all_triplet_d = [T.matrix(dtype='float32') , T.matrix(dtype='float32') , T.matrix(dtype='float32')]
    all_triplet_d_test = [all_triplet_d,all_triplet_d,all_triplet_d]
    result = [[],[],[]]
    for i in range(3) :
        image_projection_matrix_test_d = repeat(image_projection_vector.dimshuffle(('x',0)) , all_triplet[i].shape[0] , axis=0)
        if i == 0 :
            all_triplet_d_test[0] = [all_triplet_d[0],repeat(P_default['relation'].dimshuffle(('x',0)),all_triplet_d[0].shape[0] , axis=0),repeat(P_default['right'].dimshuffle(('x',0)),all_triplet_d[0].shape[0] , axis=0)]
        elif i == 1 :
            all_triplet_d_test[1] = [repeat(P_default['left'].dimshuffle(('x',0)),all_triplet_d[1].shape[0] , axis=0),all_triplet_d[1],repeat(P_default['right'].dimshuffle(('x',0)),all_triplet_d[1].shape[0] , axis=0)]
        elif i == 2 :
            all_triplet_d_test[2] = [repeat(P_default['left'].dimshuffle(('x',0)),all_triplet_d[2].shape[0] , axis=0),repeat(P_default['relation'].dimshuffle(('x',0)),all_triplet_d[2].shape[0] , axis=0),all_triplet_d[2]]

        all_triplet_encoding_matrix_d = batched_triplet_encoding(all_triplet_d_test[i][0] , all_triplet_d_test[i][1] , all_triplet_d_test[i][2])
        result[i] = T.batched_dot(image_projection_matrix_test_d , all_triplet_encoding_matrix_d)

    test_default = theano.function(
        inputs=[image_vector, all_triplet_d[0], all_triplet_d[1], all_triplet_d[2]],
        outputs=result,
        allow_input_downcast=True

    )


    return P , P_default , train , valid , test , train_default , test_default
Exemplo n.º 42
0
 def repeat(self, repeats, axis=None):
     """See `theano.tensor.repeat`"""
     from theano.tensor.extra_ops import repeat
     return repeat(self, repeats, axis)
Exemplo n.º 43
0
def recurrent_layer(in_mean, in_var, weights, f,
                    initial_hidden_mean, initial_hidden_var,
                    p_dropout):
    """Return a theano variable representing a recurrent layer.

    Parameters
    ----------

    in_mean : Theano variable
        Sequence tensor of shape ``(t, n ,d)``. Represents the mean of the
        input to the layer.

    in_var : Theano variable
        Sequence tensor. Represents the variance of the input to the layer.
        Either (a) same shape as the mean or (b) scalar.

    weights : Theano variable
        Theano matrix of shape ``(d, d)``. Represents the recurrent weight
        matrix the hiddens are right multiplied with.

    f : function
        Function that takes a theano variable and returns a theano variable of
        the same shape. Meant as transfer function of the layer.

    initial_hidden : Theano variable
        Theano vector of size ``d``, representing the initial hidden state.

    p_dropout : Theano variable
        Scalar representing the probability that unit is dropped out.


    Returns
    -------

    hidden_in_mean_rec : Theano variable
        Theano sequence tensor representing the mean of the hidden activations
        before the application of ``f``.

    hidden_in_var_rec : Theano variable
        Theano sequence tensor representing the varianceof the hidden
        activations before the application of ``f``.

    hidden_mean_rec : Theano variable
        Theano sequence tensor representing the mean of the hidden activations
        after the application of ``f``.

    hidden_var_rec : Theano variable
        Theano sequence tensor representing the varianceof the hidden
        activations after the application of ``f``.
    """
    def step(inpt_mean, inpt_var, him_m1, hiv_m1, hom_m1, hov_m1):
        hom = T.dot(hom_m1, weights) * p_dropout + inpt_mean

        p_keep = 1 - p_dropout
        dropout_var = p_dropout * (1 - p_dropout)

        element_var = (hov_m1 * dropout_var
                       + (hom_m1 ** 2) * dropout_var
                       + hov_m1 * p_keep ** 2)

        hov = T.dot(element_var, weights ** 2) + inpt_var

        fhom, fhov = f(hom, hov)

        return hom, hov, fhom, fhov

    if initial_hidden_mean.ndim == 1:
        initial_hidden_mean = repeat(
            initial_hidden_mean.dimshuffle('x', 0), in_mean.shape[1], axis=0)
    if initial_hidden_var.ndim == 1:
        initial_hidden_var = repeat(
            initial_hidden_var.dimshuffle('x', 0), in_mean.shape[1], axis=0)

    (hidden_in_mean_rec, hidden_in_var_rec, hidden_mean_rec, hidden_var_rec), _ = theano.scan(
        step,
        sequences=[in_mean, in_var],
        outputs_info=[T.zeros_like(initial_hidden_mean),
                      T.zeros_like(initial_hidden_var),
                      initial_hidden_mean,
                      initial_hidden_var])

    #hidden_mean_rec, hidden_var_rec = f(
    #    hidden_in_mean_rec, hidden_in_var_rec)

    return (hidden_in_mean_rec, hidden_in_var_rec,
            hidden_mean_rec, hidden_var_rec)
def deconv_and_depool(X, w, b=None, activation=rectify):
    X = repeat(X, repeats=2, axis=2)
    X = repeat(X, repeats=2, axis=3)
    return activation(deconv(X, w, b))
Exemplo n.º 45
0
    def output_random_generation(self, input, n_batch=144):
        ###--- Unpool

        image_shape = list(self.image_shape)
        image_shape[0] = n_batch
        #print '---', image_shape
        if self.random_mask is None:
            image_shape[2]/=self.poolsize[0]
            image_shape[3]/=self.poolsize[1]
            window = np.zeros((self.poolsize), dtype=np.float32)
            window[0, 0] = 1
            self.random_mask = theano.shared(np.tile(window.reshape([1, 1]+self.poolsize), image_shape))
            image_shape[2]*=self.poolsize[0]
            image_shape[3]*=self.poolsize[1]
        #print '----', image_shape

        if self.poolsize[0] == 1 and self.poolsize[1] == 1:
            unpool_out = input
        else:
            unpool_out = Textra.repeat(Textra.repeat(input, self.poolsize[0], axis = 2), self.poolsize[1], axis = 3) * self.random_mask
        
        ###--- Unpool + conv
        # convolve input feature maps with filters

        if self.border_mode == 'same':
            conv_out = dnn.dnn_conv(
                img=unpool_out,
                kerns=self.W,
                subsample=(1,1),
                border_mode=self.border,
                #conv_mode='cross'
            )
        else:
            raise Exception('Unknown conv type')

        '''
        if self.border_mode == 'valid':
            conv_out = conv.conv2d(
                input=unpool_out,
                filters=self.W,
                filter_shape=self.filter_shape,
                image_shape=image_shape,
                border_mode='valid'
            )
        elif self.border_mode == 'same':
            conv_out = conv.conv2d(
                input=unpool_out,
                filters=self.W,
                filter_shape=self.filter_shape,
                image_shape=image_shape,
                border_mode='full'
            )
            padding_w = theano.shared((self.filter_shape[2] - 1) / 2)
            padding_h = theano.shared((self.filter_shape[3] - 1) / 2)
            conv_out = conv_out[:,:,padding_w:-padding_w,padding_h:-padding_h]
        elif self.border_mode == 'full':
            conv_out = conv.conv2d(
                input=unpool_out,
                filters=self.W,
                filter_shape=self.filter_shape,
                image_shape=image_shape,
                border_mode='full'
            )
        else:
            raise Exception('Unknown conv type')
        '''

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')
        return (
            lin_output if self.activation is None
            else self.activation(lin_output)
        )
Exemplo n.º 46
0
    xtp1 = T.cast(T.argmax(srng.multinomial(n=1, pvals=ot), axis=1), floatX)

    s_updates = OrderedDict()
    for s, st, num_h in zip(ss, sts, args.num_hs):
        if T.lt(xt.shape[0], s.shape[0]):
            pad = T.zeros((s.shape[0] - xt.shape[0], num_h), dtype=floatX)
            st = T.concatenate([st, pad], axis=0)
        s_updates[s] = st
    return [ot, xtp1], s_updates


[o, _], gru_train_updates = theano.scan(gru_step,
                                        outputs_info=[None, None],
                                        sequences=[X.T] + dropout_masks)
o = o.dimshuffle((1, 0, 2))
p_hat = o[repeat(T.arange(o.shape[0]).dimshuffle(0, "x"), o.shape[1], axis=1),
          repeat(T.arange(o.shape[1]).dimshuffle("x", 0), o.shape[0], axis=0),
          T.cast(y, "int32")]
y_mask = T.neq(y, -1)  # Evolves into c_fagrigus
cross_entropy = -T.mean(
    T.sum(T.log(p_hat) * y_mask, axis=1) / T.sum(y_mask, axis=1))


def perplexity(y, o):
    p_hat = o[
        np.repeat(np.arange(o.shape[0]).reshape((-1, 1)), o.shape[1], axis=1),
        np.repeat(np.arange(o.shape[1]).reshape(
            (1, -1)), o.shape[0], axis=0), np.cast["int32"](y)]
    y_mask = y != -1
    cross_entropy = -np.mean(
        np.sum(np.log(p_hat) * y_mask, axis=1) / np.sum(y_mask, axis=1))
Exemplo n.º 47
0
    def cnn_creator(kernel):
        if kernel.shape[0] != 8:
            raise Exception('Expected cnn kernel with 8 subkernels.'
                            '\nReceived kernel has {0} '
                            'subkernel(s).'.format(kernel.shape[0]))

        src_data = T.tensor4(name="source_data")
        grt_data = T.tensor4(name="ground_truth_data")

        # ***********************************************************
        w1 = kernel[0]
        b1 = kernel[1]
        w2 = kernel[2]
        b2 = kernel[3]
        w3 = kernel[4]
        b3 = kernel[5]
        w4 = kernel[6]
        b4 = kernel[7]

        w1_shape = kernel[0].eval().shape
        b1_shape = kernel[1].eval().shape
        w2_shape = kernel[2].eval().shape
        b2_shape = kernel[3].eval().shape
        w3_shape = kernel[4].eval().shape
        b3_shape = kernel[5].eval().shape
        w4_shape = kernel[6].eval().shape
        b4_shape = kernel[7].eval().shape

        # ***********************************************************

        def relu(value, alpha=0.05):
            return T.switch(value > 0, value, alpha * value)

        def softmax4d(value):
            e_x = theano.tensor.exp(value - value.max(axis=1,
                                                      keepdims=True))
            return e_x / e_x.sum(axis=1, keepdims=True)

        def create_param(w_shape):
            param_values = numpy.zeros(w_shape)
            shared = theano.shared(
                numpy.asarray(param_values,
                              dtype=theano.config.floatX),
                borrow=True)
            return shared

        # ***********************************************************

        conv_1 = nnet.conv2d(input=src_data, filters=w1, ) + \
                 b1.dimshuffle('x', 0, 'x', 'x')
        pool_1 = downsample.max_pool_2d(conv_1, (2, 2))
        l1_out = relu(pool_1)

        conv_2 = nnet.conv2d(input=l1_out, filters=w2) + \
                 b2.dimshuffle('x', 0, 'x', 'x')
        pool_2 = downsample.max_pool_2d(conv_2, (2, 2))
        l2_out = relu(pool_2)

        conv_3 = nnet.conv2d(input=l2_out, filters=w3) + \
                 b3.dimshuffle('x', 0, 'x', 'x')
        pool_3 = downsample.max_pool_2d(conv_3, (2, 2))
        l3_out = relu(pool_3)

        conv_4 = nnet.conv2d(input=l3_out, filters=w4) + \
                 b4.dimshuffle('x', 0, 'x', 'x')
        pool_4 = downsample.max_pool_2d(conv_4, (2, 2))
        l4_out = relu(pool_4)

        scaled_up_y = ops.repeat(l4_out, 16, axis=2)
        scaled_up_y_x = ops.repeat(scaled_up_y, 16, axis=3)

        softmax = softmax4d(scaled_up_y_x)
        eps = 1e-7
        clipped_softmax = softmax.clip(eps, 1 - eps)

        # ***********************************************************

        max_val = clipped_softmax.argmax(axis=1, keepdims=True)

        # ***********************************************************

        ds_softmax = clipped_softmax.dimshuffle(0, 2, 3, 1)
        rs_softmax = ds_softmax.reshape((-1, 3))

        ds_grt_data = grt_data.dimshuffle(0, 2, 3, 1)
        rs_grt_data = ds_grt_data.reshape((-1, 3))

        cross = T.nnet.categorical_crossentropy(rs_softmax,
                                                rs_grt_data)

        cost = T.mean(cross)

        # ***********************************************************

        params = [w1, w2, w3, w4, b1, b2, b3, b4]
        gparams = [T.grad(cost, param) for param in params]

        # ***********************************************************

        prev_eg2_w1 = create_param(w1_shape)
        prev_eg2_w2 = create_param(w2_shape)
        prev_eg2_w3 = create_param(w3_shape)
        prev_eg2_w4 = create_param(w4_shape)
        prev_eg2_b1 = create_param(b1_shape)
        prev_eg2_b2 = create_param(b2_shape)
        prev_eg2_b3 = create_param(b3_shape)
        prev_eg2_b4 = create_param(b4_shape)

        prev_eg2s = [prev_eg2_w1, prev_eg2_w2, prev_eg2_w3,
                     prev_eg2_w4, prev_eg2_b1, prev_eg2_b2,
                     prev_eg2_b3, prev_eg2_b4]

        prev_edx2_w1 = create_param(w1_shape)
        prev_edx2_w2 = create_param(w2_shape)
        prev_edx2_w3 = create_param(w3_shape)
        prev_edx2_w4 = create_param(w4_shape)
        prev_edx2_b1 = create_param(b1_shape)
        prev_edx2_b2 = create_param(b2_shape)
        prev_edx2_b3 = create_param(b3_shape)
        prev_edx2_b4 = create_param(b4_shape)

        prev_edx2s = [prev_edx2_w1, prev_edx2_w2, prev_edx2_w3,
                      prev_edx2_w4, prev_edx2_b1, prev_edx2_b2,
                      prev_edx2_b3, prev_edx2_b4]

        rho = 0.95
        cur_eg2s = [rho * prev_eg2 + (1.0 - rho) * T.sqr(gparam)
                    for prev_eg2, gparam in zip(prev_eg2s, gparams)]

        ada_eps = 1e-9
        dxs = [T.sqrt(edx2 + ada_eps) / T.sqrt(eg2 + ada_eps) * gparam
               for edx2, eg2, gparam in
               zip(prev_edx2s, cur_eg2s, gparams)]

        cur_edx2s = [rho * prev_edx2 + (1.0 - rho) * T.sqr(dx)
                     for prev_edx2, dx in
                     zip(prev_edx2s, dxs)]

        learning_rate = 1
        cur_params = [param - learning_rate * dx
                      for param, dx in zip(params, dxs)]

        update_params = [(param, new_param)
                         for param, new_param in
                         zip(params, cur_params)]

        update_prev_eg2 = [(prev_eg2, eg2) for prev_eg2, eg2
                           in zip(prev_eg2s, cur_eg2s)]

        update_prev_edx2 = [(prev_edx2, edx2) for prev_edx2, edx2
                            in zip(prev_edx2s, cur_edx2s)]

        updates = update_params + update_prev_eg2 + update_prev_edx2

        # ***********************************************************

        f_cnn = theano.function([src_data],
                                theano.Out(max_val, borrow=True))

        f_cost = theano.function(inputs=[src_data, grt_data],
                                 outputs=cost)

        f_train = theano.function(inputs=[src_data, grt_data],
                                  outputs=cost,
                                  updates=updates)

        return f_train, f_cnn, f_cost
Exemplo n.º 48
0
    def output_random_generation(self, input, n_batch=144):
        ###--- Unpool

        image_shape = list(self.image_shape)
        image_shape[0] = n_batch
        #print '---', image_shape
        if self.random_mask is None:
            image_shape[2] /= self.poolsize[0]
            image_shape[3] /= self.poolsize[1]
            window = np.zeros((self.poolsize), dtype=np.float32)
            window[0, 0] = 1
            self.random_mask = theano.shared(
                np.tile(window.reshape([1, 1] + self.poolsize), image_shape))
            image_shape[2] *= self.poolsize[0]
            image_shape[3] *= self.poolsize[1]
        #print '----', image_shape

        if self.poolsize[0] == 1 and self.poolsize[1] == 1:
            unpool_out = input
        else:
            unpool_out = Textra.repeat(Textra.repeat(
                input, self.poolsize[0], axis=2),
                                       self.poolsize[1],
                                       axis=3) * self.random_mask

        ###--- Unpool + conv
        # convolve input feature maps with filters
        if self.border_mode == 'valid':
            conv_out = conv.conv2d(input=unpool_out,
                                   filters=self.W,
                                   filter_shape=self.filter_shape,
                                   image_shape=image_shape,
                                   border_mode='valid')
        elif self.border_mode == 'same':
            conv_out = conv.conv2d(input=unpool_out,
                                   filters=self.W,
                                   filter_shape=self.filter_shape,
                                   image_shape=image_shape,
                                   border_mode='full')
            padding_w = theano.shared((self.filter_shape[2] - 1) / 2)
            padding_h = theano.shared((self.filter_shape[3] - 1) / 2)
            conv_out = conv_out[:, :, padding_w:-padding_w,
                                padding_h:-padding_h]
        elif self.border_mode == 'full':
            conv_out = conv.conv2d(input=unpool_out,
                                   filters=self.W,
                                   filter_shape=self.filter_shape,
                                   image_shape=image_shape,
                                   border_mode='full')
        else:
            raise Exception('Unknown conv type')

        # downsample each feature map individually, using maxpooling

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        lin_output = conv_out + self.b.dimshuffle('x', 0, 'x', 'x')
        return (lin_output
                if self.activation is None else self.activation(lin_output))
Exemplo n.º 49
0
def get_timit_waveform():

    #load training data wavefiles
    train_filenames = []
    with open('train.list') as f:
        for line in f:
            train_filenames.append(line.rstrip('\n'))

    file_pre = '/vega/stats/users/sl3368/TIMIT_process/TimitWav/train/'
    train_audio = []
    for filename in train_filenames:
        f, w = wavfile.read(file_pre + filename)
        train_audio.append(w)

    #load training data phoneme labels
    phn = h5py.File('TIMIT_TRAIN.mat')
    phn_data = phn['data']

    #initializing encoder
    enc = OneHotEncoder(n_values=41, dtype=numpy.int16, sparse=False)

    train_phn = []
    for i in range(len(train_audio)):
        ref = phn_data[i][0]
        labels = phn[ref]
        phonemes = labels[2]
        phonemes = numpy.reshape(phonemes, (len(phonemes), 1))

        #need to encode and repeat for each sample
        encoded_phonemes = enc.fit_transform(phonemes)

        #repeat for the sampling rate 1600 this case!!
        rep_enc_phonemes = repeat(encoded_phonemes, 160, axis=0).eval()

        train_phn.append(rep_enc_phonemes)

    print 'training done...'
    #load test data wavefiles
    test_filenames = []
    with open('test.list') as f:
        for line in f:
            test_filenames.append(line.rstrip('\n'))

    file_pre = '/vega/stats/users/sl3368/TIMIT_process/TimitWav/test/'
    test_audio = []
    for filename in test_filenames:
        f, w = wavfile.read(file_pre + filename)
        test_audio.append(w)

    #load testing data phoneme labels
    phn = h5py.File('TIMIT_TEST.mat')
    phn_data = phn['data']

    #initializing encoder
    enc = OneHotEncoder(n_values=41, dtype=numpy.int16, sparse=False)

    test_phn = []
    for i in range(len(test_audio)):
        ref = phn_data[i][0]
        labels = phn[ref]
        phonemes = labels[2]
        phonemes = numpy.reshape(phonemes, (len(phonemes), 1))

        #need to encode and repeat for each sample
        encoded_phonemes = enc.fit_transform(phonemes)

        #repeat for the sampling rate 16000 this case!!
        rep_enc_phonemes = repeat(encoded_phonemes, 160, axis=0).eval()

        test_phn.append(rep_enc_phonemes)

    return train_audio, train_phn, test_audio, test_phn
Exemplo n.º 50
0
    def __init__(self,
                 model,
                 learning_rate=0.1,
                 pred_given=None,
                 arg0_given=None,
                 arg1_given=None,
                 arg2_given=None):
        self.learning_rate = learning_rate
        self.model = model
        self.network = model.pair_projection_model
        self.event_network = self.network.event_network

        self.pred_given = pred_given
        self.arg0_given = arg0_given
        self.arg1_given = arg1_given
        self.arg2_given = arg2_given

        self.learning_rate_var = T.scalar("learning_rate",
                                          dtype=theano.config.floatX)

        # Create variables for the unobserved inputs (RHS), which we're sampling
        self.pred_size = (1, self.network.event_network.pred_vector_size)
        self.rhs_pred = theano.shared(
            numpy.zeros(self.pred_size, dtype=theano.config.floatX),
            borrow=True,
        )
        self.arg_size = (1, self.network.event_network.arg_vector_size)
        self.rhs_arg0 = theano.shared(
            numpy.zeros(self.arg_size, dtype=theano.config.floatX),
            borrow=True,
        )
        self.rhs_arg1 = theano.shared(
            numpy.zeros(self.arg_size, dtype=theano.config.floatX),
            borrow=True,
        )
        self.rhs_arg2 = theano.shared(
            numpy.zeros(self.arg_size, dtype=theano.config.floatX),
            borrow=True,
        )
        self.arg_vectors = [self.rhs_arg0, self.rhs_arg1, self.rhs_arg2]
        self.input_vector_size = self.pred_size[1] + 3 * self.arg_size[1]
        rhs_vector = T.concatenate(
            [self.rhs_pred, self.rhs_arg0, self.rhs_arg1, self.rhs_arg2],
            axis=1)
        # Rebuild the prediction function so that it uses our new vectors on the RHS
        # Repeat them over the first dimension, a single RHS vector is compared to all LHS vectors (context)
        rhs_projection = theano.clone(
            self.event_network.projection_layer,
            replace={
                self.event_network.input_vector:
                extra_ops.repeat(rhs_vector,
                                 self.event_network.predicate_input_a.shape[0],
                                 axis=0)
            })
        prediction = theano.clone(
            self.network.prediction,
            replace={self.network.input_b: rhs_projection},
            share_inputs=True)
        # The prediction value is the coherence output, which we will use as our objective to maximize
        # Average it over the context inputs (comparing each to the single RHS vector)
        chain_coherence = T.mean(prediction)

        # The optimization fn updates the RHS vectors to maximize the mean coherence with the LHS
        self.params = []
        # Only optimize the positions that haven't been fixed
        if pred_given is None:
            self.params.append(self.rhs_pred)
        if arg0_given is None:
            self.params.append(self.rhs_arg0)
        if arg1_given is None:
            self.params.append(self.rhs_arg1)
        if arg2_given is None:
            self.params.append(self.rhs_arg2)
        if len(self.params) == 0:
            raise ValueError(
                "all RHS event components have been fixed, so there's nothing left to sample!"
            )

        cost = -T.log(chain_coherence)
        # Differentiate cost w.r.t. the RHS vectors to get the updates
        gparams = [T.grad(cost, param) for param in self.params]
        updates = [(param, param - self.learning_rate_var * gparam)
                   for param, gparam in zip(self.params, gparams)]

        self.optimize = theano.function(
            inputs=[
                self.event_network.predicate_input_a,
                self.event_network.arg0_input_a,
                self.event_network.arg1_input_a,
                self.event_network.arg2_input_a,
                theano.Param(self.learning_rate_var,
                             default=self.learning_rate)
            ],
            outputs=[cost, chain_coherence],
            updates=updates,
        )
        self.score_vector = theano.function(
            inputs=[
                self.event_network.predicate_input_a,
                self.event_network.arg0_input_a,
                self.event_network.arg1_input_a,
                self.event_network.arg2_input_a,
            ],
            outputs=chain_coherence,
        )

        self.positions = [
            self.rhs_pred, self.rhs_arg0, self.rhs_arg1, self.rhs_arg2
        ]
        self.givens = [pred_given, arg0_given, arg1_given, arg2_given]
        self.vector_vocabs = [
            self.model.pair_projection_model.event_network.predicate_vectors.
            get_value(),
            self.model.pair_projection_model.event_network.argument0_vectors.
            get_value(),
            self.model.pair_projection_model.event_network.argument1_vectors.
            get_value(),
            self.model.pair_projection_model.event_network.argument2_vectors.
            get_value(),
        ]

        # Set the non-updated input vectors to the right values
        if not all(x is None
                   for x in [pred_given, arg0_given, arg1_given, arg2_given]):
            if pred_given is not None:
                if pred_given not in self.model.pred_vocab:
                    warnings.warn(
                        "predicate '%s' not in vocabulary: not constraining sample on predicate"
                        % pred_given)
                self.set_given(0, pred_given)
            if arg0_given is not None:
                if arg0_given == "--":
                    # Special value meaning fix to empty
                    self.set_given(1, None)
                else:
                    if arg0_given not in self.model.arg_vocab:
                        warnings.warn(
                            "arg '%s' not in vocabulary: not constraining sample on arg0"
                            % arg0_given)
                    self.set_given(1, arg0_given)
            if arg1_given is not None:
                if arg1_given == "--":
                    self.set_given(2, None)
                else:
                    if arg1_given not in self.model.arg_vocab:
                        warnings.warn(
                            "arg '%s' not in vocabulary: not constraining sample on arg1"
                            % arg1_given)
                    self.set_given(2, arg1_given)
            if arg2_given is not None:
                if arg2_given == "--":
                    self.set_given(3, None)
                else:
                    if arg2_given not in self.model.arg_vocab:
                        warnings.warn(
                            "arg '%s' not in vocabulary: not constraining sample on arg2"
                            % arg2_given)
                    self.set_given(3, arg2_given)
Exemplo n.º 51
0
    def from_model(model,
                   neighbour_finder=None,
                   learning_rate=0.1,
                   num_samples=1,
                   slimline_model=None):
        learning_rate_var = T.scalar("learning_rate",
                                     dtype=theano.config.floatX)
        network = model.pair_projection_model

        # Create variables for the unobserved input vector (RHS), which we're sampling
        projection_size = network.event_network.projection_size
        rhs_projection = theano.shared(
            numpy.zeros((num_samples, projection_size),
                        dtype=theano.config.floatX),
            borrow=True,
        )
        # The prediction value is the coherence output, which we will use as our objective to maximize
        # Compute it as the composition of the observed LHS event(s) and the unobserved RHS event
        # Repeat over the first dimension, so a single RHS vector is compared to all LHS vectors (context)
        prediction = theano.clone(
            network.prediction,
            replace={
                network.input_b:
                extra_ops.repeat(
                    rhs_projection,
                    network.event_network.predicate_input_a.shape[0],
                    axis=0),
                network.event_network.predicate_input_a:
                T.tile(network.event_network.predicate_input_a,
                       (num_samples, )),
                network.event_network.arg0_input_a:
                T.tile(network.event_network.arg0_input_a, (num_samples, )),
                network.event_network.arg1_input_a:
                T.tile(network.event_network.arg1_input_a, (num_samples, )),
                network.event_network.arg2_input_a:
                T.tile(network.event_network.arg2_input_a, (num_samples, )),
            },
            share_inputs=True)
        # Average it over the context inputs (comparing each to the single RHS vector)
        chain_coherence = T.mean(prediction)

        # The optimization fn updates the RHS vector to maximize the mean coherence with the LHS
        params = [rhs_projection]
        cost = -T.log(chain_coherence)
        # Differentiate cost w.r.t. the RHS vectors to get the updates
        gparams = [T.grad(cost, param) for param in params]
        updates = [(param, param - learning_rate_var * gparam)
                   for param, gparam in zip(params, gparams)]

        optimize = theano.function(
            inputs=[
                network.event_network.predicate_input_a,
                network.event_network.arg0_input_a,
                network.event_network.arg1_input_a,
                network.event_network.arg2_input_a,
                theano.Param(learning_rate_var, default=learning_rate)
            ],
            outputs=[cost, chain_coherence],
            updates=updates,
        )
        score_vector = theano.function(
            inputs=[
                network.event_network.predicate_input_a,
                network.event_network.arg0_input_a,
                network.event_network.arg1_input_a,
                network.event_network.arg2_input_a,
            ],
            outputs=chain_coherence,
        )

        if slimline_model is not None:
            # Use the slimline version of the model to keep a reference to for sampling purposes
            model = slimline_model

        return NextEventProjectionSampler(projection_size, rhs_projection,
                                          optimize, score_vector, model,
                                          neighbour_finder, learning_rate,
                                          num_samples)
Exemplo n.º 52
0
    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2), border_mode='same', activation=None, mask=None):
        """
        Allocate a LeNetConvPoolLayer with shared variable internal parameters.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height, filter width)

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows, #cols)
        """

        assert image_shape[1] == filter_shape[1]
        self.input = input

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        fan_in = numpy.prod(filter_shape[1:])
        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" /
        #   pooling size

        ###--- Change / to *

        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) *
                   numpy.prod(poolsize))
        # initialize weights with random weights
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        self.W = theano.shared(
            numpy.asarray(
                rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
                dtype=theano.config.floatX
            ),
            borrow=True
        )

        # the bias is a 1D tensor -- one bias per output feature map
        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        ###--- Unpool

        if poolsize[0] == 1 and poolsize[1] == 1:
            self.unpool_out = input
        else:
            if mask is None:
                window = np.zeros((poolsize), dtype=np.float32)
                window[0, 0] = 1
                mask = theano.shared(np.tile(window.reshape([1, 1]+poolsize), input_shape))

            self.unpool_out = Textra.repeat(Textra.repeat(input, poolsize[0], axis = 2), poolsize[1], axis = 3) * mask

        relu_output = (
            self.unpool_out if activation is None
            else activation(self.unpool_out)
        )

        ###--- Unpool + conv
        # convolve input feature maps with filters
        if border_mode == 'valid':
            conv_out = conv.conv2d(
                input=relu_output,
                filters=self.W,
                filter_shape=filter_shape,
                image_shape=image_shape,
                border_mode='valid'
            )
        elif border_mode == 'same':
            conv_out = conv.conv2d(
                input=relu_output,
                filters=self.W,
                filter_shape=filter_shape,
                image_shape=image_shape,
                border_mode='full'
            )
            padding_w = theano.shared((filter_shape[2] - 1) / 2)
            padding_h = theano.shared((filter_shape[3] - 1) / 2)
            conv_out = conv_out[:,:,padding_w:-padding_w,padding_h:-padding_h]
        elif border_mode == 'full':
            conv_out = conv.conv2d(
                input=relu_output,
                filters=self.W,
                filter_shape=filter_shape,
                image_shape=image_shape,
                border_mode='full'
            )
        else:
            raise Exception('Unknown conv type')

        # downsample each feature map individually, using maxpooling
        
        

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        self.output =  conv_out + self.b.dimshuffle('x', 0, 'x', 'x')
        

        # store parameters of this layer
        self.params = [self.W, self.b]
Exemplo n.º 53
0
def get_timit_specs():

    #get training spectrograms
    f = h5py.File(
        '/vega/stats/users/sl3368/Data_LC/timit/train/timit_stim_1.mat')
    train_stim = numpy.transpose(f['stimulus_zscore'])

    train_filenames = []
    with open('train.list') as f:
        for line in f:
            train_filenames.append(line.rstrip('\n'))

    #load training data phoneme labels
    phn = h5py.File('TIMIT_TRAIN.mat')
    phn_data = phn['data']

    #initializing encoder
    enc = OneHotEncoder(n_values=41, dtype=numpy.int16, sparse=False)

    train_phn = []
    for i in range(len(train_filenames)):
        ref = phn_data[i][0]
        labels = phn[ref]
        phonemes = labels[2]
        phonemes = numpy.reshape(phonemes, (len(phonemes), 1))

        #need to encode and repeat for each sample
        encoded_phonemes = enc.fit_transform(phonemes)

        #repeat for the sampling rate 10 this case!!
        rep_enc_phonemes = repeat(encoded_phonemes, 10, axis=0).eval()

        train_phn.append(rep_enc_phonemes)

    #get testing spectrograms
    f = h5py.File(
        '/vega/stats/users/sl3368/Data_LC/timit/test/timit_stim_1.mat')
    test_stim = numpy.transpose(f['stimulus_zscore'])

    #load test data wavefiles
    test_filenames = []
    with open('test.list') as f:
        for line in f:
            test_filenames.append(line.rstrip('\n'))

    #load testing data phoneme labels
    phn = h5py.File('TIMIT_TEST.mat')
    phn_data = phn['data']

    #initializing encoder
    enc = OneHotEncoder(n_values=41, dtype=numpy.int16, sparse=False)

    test_phn = []
    for i in range(len(test_filenames)):
        ref = phn_data[i][0]
        labels = phn[ref]
        phonemes = labels[2]
        phonemes = numpy.reshape(phonemes, (len(phonemes), 1))

        #need to encode and repeat for each sample
        encoded_phonemes = enc.fit_transform(phonemes)

        #repeat for the sampling rate 10 this case!!
        rep_enc_phonemes = repeat(encoded_phonemes, 10, axis=0).eval()

        test_phn.append(rep_enc_phonemes)

    return train_stim, train_phn, test_stim, test_phn
Exemplo n.º 54
0
def get_interpolated_hiddens(old_hidden, n_timesteps, n_samples,
                             interpolation_mask, number_cons_hiddens):
    '''
        old_hidden: old_hidden_matrix which needs to be interpolated.
                  : number_of_hiddens * batch_size * Hidden_Size
        number_of_reduced_timstamps
        alphas  = [1, 0.8, 0.6, 0.4, 0.2]
        alpha is the interpolation mask as of now, which
        ne  eds to be passed as a function parameter.
        For ex, given hiddens, h1, h2, h3, h_n-1
        You get, [h1, h2], [h2,  h3], [h_n-2, h_n-1] so basically, n-1 pairs.
        Number of interolations need to be done. i.e relative clock times.
    '''
    alpha = interpolation_mask
    hidden_size = 1024
    batch_size = 32

    num_cons_hiddens = number_cons_hiddens
    num_reduced_hiddens = num_cons_hiddens + 1
    number_interp = len(interpolation_mask)

    X = old_hidden.dimshuffle(1, 0, 2)
    new_matrix2 = repeat(X, 2, axis=1)
    new_matrix2 = tensor.roll(new_matrix2, -1, axis=1)
    new_matrix2 = new_matrix2[:, 0:2 * num_reduced_hiddens - 2, :]
    new_matrix2 = new_matrix2.reshape(
        [n_samples, num_cons_hiddens, 2, hidden_size])

    def _step_slice(m_, interp_mask):
        interp_ret = []
        for i in range(number_interp):
            interp_ret.append(interp_mask[i] * m_[0] +
                              (1 - interp_mask[i]) * m_[1])
        return interp_ret

    _step = _step_slice

    def step_batch(m_, alpha):
        seqs = m_
        rval, updates = theano.scan(_step,
                                    sequences=seqs,
                                    non_sequences=[alpha])
        return rval

    _batch_step = step_batch
    seqs = new_matrix2
    rval, updates = theano.scan(_batch_step,
                                sequences=seqs,
                                non_sequences=[alpha])
    out = []
    out_batch = []
    for batch_index in range(batch_size):
        for i in range(num_cons_hiddens):
            something = [rval[j][batch_index][i] for j in range(number_interp)]
            if i == 0:
                out = something
            if i >= 1:
                out = tensor.concatenate([out, something], axis=0)
        if batch_index == 0:
            out_batch = out
        if batch_index == 1:
            out_batch = tensor.stacklists([out_batch, out])
        if batch_index > 1:
            out = tensor.reshape(out, [1, n_timesteps - 2, hidden_size])
            out_batch = tensor.concatenate([out_batch, out])

    zero_pad = tensor.zeros(
        [out_batch.shape[0], number_interp, out_batch.shape[2]])
    out_batch = tensor.concatenate([zero_pad, out_batch], axis=1)
    return out_batch
    def __init__(self, data, image_shape, filter_shape, poolsize, sparse_coeff, activation='sigmoid',
                 tied_weight=False, is_linear=False, do_max_pool=False):
        rng = np.random.RandomState(None)
        self.data = data
        self.batchsize = image_shape[0]
        self.in_channels   = image_shape[1]
        self.in_height     = image_shape[2]
        self.in_width      = image_shape[3]
        self.flt_channels  = filter_shape[0]
        self.flt_height    = filter_shape[2]
        self.flt_width     = filter_shape[3]
        self.input = T.ftensor4('input')
        # self.input = input.reshape(image_shape)
        hidden_layer=ConvolutionLayer(rng,
                                      input=self.input,
                                      filter_shape=filter_shape,
                                      act=activation,
                                      border_mode='full',
                                      if_pool=do_max_pool)

        self.hidden_image_shape = (self.batchsize,
                                   self.flt_channels,
                                   self.in_height+self.flt_height-1,
                                   self.in_width+self.flt_width-1)

        self.hidden_pooled_image_shape = (self.batchsize,
                                          self.flt_channels,
                                          (self.in_height+self.flt_height-1)/2,
                                          (self.in_width+self.flt_width-1)/2)

        self.hidden_filter_shape = (self.in_channels,
                                    self.flt_channels,
                                    self.flt_height,
                                    self.flt_width)
        if sparse_coeff == 0:
            if do_max_pool:
                hidden_layer_output = repeat(hidden_layer.output,
                                             repeats=2,
                                             axis=2)
                hidden_layer_output = repeat(hidden_layer_output,
                                             repeats=2,
                                             axis=3)
            else:
                hidden_layer_output = hidden_layer.output
        else:
            feature_map = hidden_layer.output

            # first per featuremap, then across featuremap
            # feature_map_vec = feature_map.reshape((feature_map.shape[0],
            #                                        feature_map.shape[1], feature_map.shape[2]*feature_map.shape[3]))
            # feat_sparsity = feature_map_vec.norm(2, axis=2)
            # feat_sparsity = feat_sparsity.dimshuffle(0, 1, 'x', 'x')
            # feature_map1 = np.divide(feature_map, feat_sparsity+1e-9)
            # examp_sparsity = feature_map1.norm(2, axis=1)
            # examp_sparsity = examp_sparsity.dimshuffle(0, 'x', 1, 2)
            # feature_map2 = np.divide(feature_map1, examp_sparsity+1e-9)

            # first across featuremap, then per featuremap
            examp_sparsity = feature_map.norm(2, axis=1)
            examp_sparsity = examp_sparsity.dimshuffle(0, 'x', 1, 2)
            feature_map1 = np.divide(feature_map, examp_sparsity+1e-9)
            feature_map1_vec = feature_map1.reshape((feature_map1.shape[0],
                                                   feature_map1.shape[1], feature_map1.shape[2]*feature_map1.shape[3]))
            feat_sparsity = feature_map1_vec.norm(2, axis=2)
            feat_sparsity = feat_sparsity.dimshuffle(0, 1, 'x', 'x')
            feature_map2 = np.divide(feature_map1, feat_sparsity+1e-9)

            if do_max_pool:
                hidden_layer_output = repeat(feature_map2,
                                             repeats=2,
                                             axis=2)
                hidden_layer_output = repeat(hidden_layer_output,
                                             repeats=2,
                                             axis=3)
            else:
                hidden_layer_output = feature_map2

        # recon_layer_input = hidden_layer_output

        if is_linear:
            recon_layer=ConvolutionLayer(rng,
                                         input=hidden_layer_output,
                                         filter_shape=self.hidden_filter_shape,
                                         act='linear',
                                         border_mode='valid')
        else:
            recon_layer=ConvolutionLayer(rng,
                                         input=hidden_layer_output,
                                         filter_shape=self.hidden_filter_shape,
                                         act=activation,
                                         border_mode='valid')


        self.tied_weight = tied_weight
        if self.tied_weight:
            # recon_layer.W = hidden_layer.W
            # recon_layer.W = recon_layer.W.dimshuffle(1,0,2,3)
            weight = hidden_layer.W.get_value()
            recon_layer.W.set_value(weight.transpose(1,0,2,3), borrow=True)

        self.layers = [hidden_layer, recon_layer]
        self.params = sum([layer.params for layer in self.layers], [])

        # self.params = hidden_layer.params + recon_layer.params


        L1_sparsity = hidden_layer_output.norm(1, axis=(2, 3))
        # L1_sparsity = T.sum(np.abs(feature_map2), axis=(2, 3))

        # sparse_filter = T.mean(L1_sparsity.sum(axis=1), axis=(0))
        sparse_filter = T.mean(L1_sparsity, axis=(0, 1))

        # sparsity = T.mean(feature_map2, axis=(2,3))
        # sparse_filter = T.mean(sparsity, axis=(0, 1))

        # L=T.sum(T.pow(T.sub(recon_layer.output, self.input), 2), axis=0)
        L=T.sum(T.pow(T.sub(recon_layer.output, self.input), 2), axis=(1,2,3)) # sum over channel,height, width

        cost = 0.5*T.mean(L) + sparse_coeff * sparse_filter

        grads = T.grad(cost, self.params)

        # learning_rate = 0.1
        # updates = [(param_i, param_i-learning_rate*grad_i)
        #            for param_i, grad_i in zip(self.params, grads)]

        updates = adadelta_updates(self.params, grads, rho=0.95, eps=1e-6)

        # self.train = theano.function(
        # [self.input],
        # cost,
        # updates=updates,
        # name="train cae model")
        index = T.lscalar('index')
        batch_begin = index * self.batchsize
        batch_end = batch_begin + self.batchsize

        self.train = theano.function(
                    inputs=[index],
                    outputs=cost,
                    updates=updates,
                    givens={
                        self.input: self.data[batch_begin:batch_end]
                    },
                    name="train cae model")

        self.activation = downsample.max_pool_2d(
                input=hidden_layer.output,
                ds=poolsize,
                ignore_border=True)

        # self.get_activation = theano.function(
        #     [self.input],
        #     self.activation,
        #     updates=None,
        #     name='get hidden activation')
        # num = T.bscalar
        self.get_activation = theano.function(
            inputs=[index],
            # outputs=self.activation,
            outputs=hidden_layer.output if do_max_pool else self.activation,
            updates=None,
            givens={
                self.input: self.data[batch_begin:batch_end]
            },
            name='get hidden activation')

        # self.get_reconstruction = theano.function(
        #                     inputs=[self.input],
        #                     outputs=recon_layer.output,
        #                     updates=None,
        #                     name='get reconstruction')
        self.get_reconstruction = theano.function(
                            inputs=[index],
                            outputs=recon_layer.output,
                            updates=None,
                            givens={
                                self.input: self.data[batch_begin:batch_end]
                            },
                            name='get reconstruction')
Exemplo n.º 56
0
def get_timit_specs_images(window_size):

    #get training spectrograms
    f = h5py.File(
        '/vega/stats/users/sl3368/Data_LC/timit/train/timit_stim_1.mat')
    train_stim = numpy.transpose(f['stimulus_zscore'])

    #need to construct windows
    train_stim_windows = numpy.zeros(
        (train_stim.shape[0] / 5000, 5000 - window_size, window_size, 60))
    half = window_size / 2
    for j in range(len(train_stim) / 5000):
        for i in range(j * 5000, (j + 1) * 5000 - window_size):
            temp_window = train_stim[i:i + window_size]
            train_stim_windows[j][i] = temp_window
            #single_window = numpy.reshape(temp_window,(1,window_size*train_stim.shape[1]))

    train_filenames = []
    with open('train.list') as f:
        for line in f:
            train_filenames.append(line.rstrip('\n'))

    #load training data phoneme labels
    phn = h5py.File('TIMIT_TRAIN.mat')
    phn_data = phn['data']

    #initializing encoder
    enc = OneHotEncoder(n_values=41, dtype=numpy.int16, sparse=False)

    train_phn = []
    for i in range(len(train_filenames)):
        ref = phn_data[i][0]
        labels = phn[ref]
        phonemes = labels[2]
        phonemes = numpy.reshape(phonemes, (len(phonemes), 1))

        #need to encode and repeat for each sample
        encoded_phonemes = enc.fit_transform(phonemes)

        #repeat for the sampling rate 10 this case!!
        rep_enc_phonemes = repeat(encoded_phonemes, 10, axis=0).eval()

        train_phn.append(rep_enc_phonemes)

    train_phn = train_phn[half:len(train_phn) - half]

    #get testing spectrograms
    f = h5py.File(
        '/vega/stats/users/sl3368/Data_LC/timit/test/timit_stim_1.mat')
    test_stim = numpy.transpose(f['stimulus_zscore'])

    #need to construct windows
    test_stim_windows = numpy.zeros(
        (test_stim.shape[0] / 5000, 5000 - window_size, window_size, 60))
    half = window_size / 2
    for j in range(len(test_stim) / 5000):
        for i in range(j * 5000, (j + 1) * 5000 - window_size):
            temp_window = test_stim[i:i + window_size]
            test_stim_windows[j][i] = temp_window
            #single_window = numpy.reshape(temp_window,(1,window_size*train_stim.shape[1]))

    #load test data wavefiles
    test_filenames = []
    with open('test.list') as f:
        for line in f:
            test_filenames.append(line.rstrip('\n'))

    #load testing data phoneme labels
    phn = h5py.File('TIMIT_TEST.mat')
    phn_data = phn['data']

    #initializing encoder
    enc = OneHotEncoder(n_values=41, dtype=numpy.int16, sparse=False)

    test_phn = []
    for i in range(len(test_filenames)):
        ref = phn_data[i][0]
        labels = phn[ref]
        phonemes = labels[2]
        phonemes = numpy.reshape(phonemes, (len(phonemes), 1))

        #need to encode and repeat for each sample
        encoded_phonemes = enc.fit_transform(phonemes)

        #repeat for the sampling rate 10 this case!!
        rep_enc_phonemes = repeat(encoded_phonemes, 10, axis=0).eval()

        test_phn.append(rep_enc_phonemes)

    test_phn = test_phn[half:len(test_phn) - half]

    return train_stim_windows, train_phn, test_stim_windows, test_phn
Exemplo n.º 57
0
def get_interpolated_hiddens(old_hidden,  n_timesteps,
                             n_samples, interpolation_mask,
                             number_cons_hiddens):
    '''
        old_hidden: old_hidden_matrix which needs to be interpolated.
                  : number_of_hiddens * batch_size * Hidden_Size
        number_of_reduced_timstamps
        alphas  = [1, 0.8, 0.6, 0.4, 0.2]
        alpha is the interpolation mask as of now, which
        ne  eds to be passed as a function parameter.
        For ex, given hiddens, h1, h2, h3, h_n-1
        You get, [h1, h2], [h2,  h3], [h_n-2, h_n-1] so basically, n-1 pairs.
        Number of interolations need to be done. i.e relative clock times.
    '''
    alpha = interpolation_mask
    hidden_size = 1024
    batch_size = 32


    num_cons_hiddens = number_cons_hiddens
    num_reduced_hiddens = num_cons_hiddens + 1
    number_interp = len(interpolation_mask)

    X  = old_hidden.dimshuffle(1, 0, 2)
    new_matrix2 = repeat(X, 2, axis=1)
    new_matrix2 = tensor.roll(new_matrix2, -1, axis=1)
    new_matrix2 = new_matrix2[:, 0:2*num_reduced_hiddens-2, :]
    new_matrix2 = new_matrix2.reshape([n_samples, num_cons_hiddens, 2, hidden_size])

    def _step_slice(m_, interp_mask):
        interp_ret = []
        for i in range(number_interp):
            interp_ret.append(interp_mask[i] * m_[0] + (1-interp_mask[i])* m_[1])
        return interp_ret

    _step = _step_slice

    def step_batch(m_, alpha):
        seqs = m_
        rval, updates = theano.scan(_step,
                                    sequences=seqs,
                                    non_sequences=[alpha])
        return rval

    _batch_step = step_batch
    seqs = new_matrix2
    rval, updates = theano.scan(_batch_step,
                                sequences=seqs,
                                non_sequences=[alpha])
    out=[]
    out_batch =[]
    for batch_index in range(batch_size):
        for i in range(num_cons_hiddens):
            something =  [rval[j][batch_index][i] for j in range(number_interp)]
            if i==0:
                out = something
            if i >=1:
                out  = tensor.concatenate([out, something], axis=0)
        if batch_index == 0:
            out_batch = out
        if batch_index == 1:
            out_batch = tensor.stacklists([out_batch, out])
        if batch_index > 1:
            out = tensor.reshape(out,[1, n_timesteps-2, hidden_size])
            out_batch = tensor.concatenate([out_batch, out])

    zero_pad = tensor.zeros([out_batch.shape[0], number_interp , out_batch.shape[2]])
    out_batch = tensor.concatenate([zero_pad, out_batch], axis=1)
    return out_batch