示例#1
0
 def input_row_from_variables(ori_ip,dest_ip,ori_lat,ori_long,dest_lat,dest_long,ori_type,dest_type,dist):
     '''Create an input row for the MLP from the inputs'''
     
     input_row = tensor.zeros([input_size])
     
     offset = 0
     
     ips = [ori_ip,dest_ip]
     for ip in ips:
         for _ in range(4):
             input_row = add_one_shot(input_row, offset, tensor.mod(ip,256))
             ip = tensor.int_div(ip,256)
             offset += 256
     
     for lat_,long_ in [(ori_lat,ori_long),(dest_lat,dest_long)]:
         translated_lat = tensor.iround((coordinate_size-1)*(lat_/180 + 0.5))
         input_row = add_thermo(input_row, offset,translated_lat)
         offset += coordinate_size
         
         translated_long = tensor.iround((coordinate_size-1)*(long_/360 + 0.5))
         input_row = add_thermo(input_row, offset,translated_long)
         offset += coordinate_size
     
     for type_ in [ori_type,dest_type]:
         add_one_shot(input_row, offset, type_ +1)
         offset += type_size
     
     translated_dist = tensor.iround((dest_size-1)*(tensor.minimum(1,dist/max_earth_distance)))
     input_row = add_thermo(input_row, offset,translated_dist)
     
     #could be useful if we want to add something
     offset +=dest_size
     
     return input_row
示例#2
0
def ShiftConv(w_t_g, s_t, N):
    shift = 2.*s_t-1.
    Z = T.mod(shift+N, N)
    simj = 1 - (Z - T.floor(Z))
    imj = T.mod(T.arange(N) + T.iround(T.floor(Z)),N)
    w_t_g_roll_1 = T.roll(w_t_g, -T.iround(T.floor(Z)))
    w_t_g_roll_2 = T.roll(w_t_g, -(T.iround(T.floor(Z))+1))
    w_t_s = w_t_g_roll_1*simj + w_t_g_roll_2*(1-simj)
    return w_t_s
示例#3
0
 def rot_filters(self, theta):
     fsize = self.filter_size[0]
     ind = T.as_tensor_variable(
         np.indices((fsize, fsize)) - (fsize - 1.0) / 2.0)
     rotate = T.stack(T.cos(theta), -T.sin(theta), T.sin(theta),
                      T.cos(theta)).reshape((2, 2))
     ind_rot = T.tensordot(rotate, ind, axes=((0, 0))) + (fsize - 1.0) / 2.0
     transy = T.clip(ind_rot[0], 0, fsize - 1 - .00001)
     transx = T.clip(ind_rot[1], 0, fsize - 1 - .00001)
     vert = T.iround(transy)
     horz = T.iround(transx)
     return self.W[:, :, vert, horz]
示例#4
0
    def __init__(self, srng, data, image_shape, train_flag):
        """
        A layer that applies random transformation to the input image

        :type srng: theano.sandbox.rng_mrg.MRG_RandomStreams
        :param srng: symbolic random number generator

        :type data: theano.tensor.dtensor4
        :param data: symbolic image tensor, of shape image_shape

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type train_flag: symbolic boolean
        :param train_flag: whether or not it's training


        """

        if train_flag is False:
            self.output = data
            return

        p = srng.uniform(size=(1, ), ndim=1)[0]

        temp = ifelse(p > .5, data, data[:, :, :, ::-1])

        pad_x = 2
        pad_y = 2

        temp_padded = theano.shared(numpy.zeros(
            shape=(image_shape[0], image_shape[1], image_shape[2] + pad_y * 2,
                   image_shape[3] + pad_x * 2),
            dtype=theano.config.floatX),
                                    borrow=True)

        # TODO: simplify this, only need 1 number
        rand_x = T.iround(
            srng.uniform(size=(1, ), low=-0.49, high=4.49, ndim=1))[0]
        rand_y = T.iround(
            srng.uniform(size=(1, ), low=-0.49, high=4.49, ndim=1))[0]

        temp_padded = T.set_subtensor(
            temp_padded[:, :, rand_y:rand_y + image_shape[2],
                        rand_x:rand_x + image_shape[3]], temp)

        self.output = temp_padded[:, :, pad_y:pad_y + image_shape[2],
                                  pad_x:pad_x + image_shape[3]]

        self.params = []
示例#5
0
    def __init__(self, srng, data, image_shape, train_flag):
        """
        A layer that applies random transformation to the input image

        :type srng: theano.sandbox.rng_mrg.MRG_RandomStreams
        :param srng: symbolic random number generator

        :type data: theano.tensor.dtensor4
        :param data: symbolic image tensor, of shape image_shape

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type train_flag: symbolic boolean
        :param train_flag: whether or not it's training


        """

        if train_flag is False:
            self.output = data
            return

        p = srng.uniform(size=(1,), ndim=1)[0]

        temp = ifelse(p > .5, data, data[:, :, :, ::-1])

        pad_x = 2
        pad_y = 2

        temp_padded = theano.shared(
            numpy.zeros(
                shape=(image_shape[0], image_shape[1], image_shape[2] + pad_y * 2, image_shape[3] + pad_x * 2),
                dtype=theano.config.floatX
            ),
            borrow=True
        )

        # TODO: simplify this, only need 1 number
        rand_x = T.iround(srng.uniform(size=(1,), low=-0.49, high=4.49, ndim=1))[0]
        rand_y = T.iround(srng.uniform(size=(1,), low=-0.49, high=4.49, ndim=1))[0]

        temp_padded = T.set_subtensor(
            temp_padded[:, :, rand_y:rand_y+image_shape[2], rand_x:rand_x+image_shape[3]],
            temp
        )

        self.output = temp_padded[:, :, pad_y:pad_y+image_shape[2], pad_x:pad_x+image_shape[3]]

        self.params = []
    def get_pseudo_likelihood_cost(self, updates):
        """Stochastic approximation to the pseudo-likelihood"""

        # index of bit i in expression p(x_i | x_{\i})
        bit_i_idx = theano.shared(value=0, name = 'bit_i_idx')

        # binarize the input image by rounding to nearest integer
        xi = T.iround(self.input)

        # calculate free energy for the given bit configuration
        fe_xi = self.free_energy(xi)

        # flip bit x_i of matrix xi and preserve all other bits x_{\i}
        # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx]
        # NB: slice(start,stop,step) is the python object used for
        # slicing, e.g. to index matrix x as follows: x[start:stop:step]
        xi_flip = T.setsubtensor(xi, 1-xi[:, bit_i_idx], 
                                 idx_list=(slice(None,None,None),bit_i_idx))

        # calculate free energy with bit flipped
        fe_xi_flip = self.free_energy(xi_flip)

        # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i}))) 
        cost = self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi))

        # increment bit_i_idx % number as part of updates
        updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible

        return cost
示例#7
0
def float_max_pooling(roi, target_y, target_x):
    nb_channels, size_y, size_x = roi.shape
    scale_y = 1.0 * size_y / target_y
    start_indices_y = tt.iround(tt.arange(target_y) *
                                scale_y)  # start indices for each block
    start_indices_y = tt.clip(start_indices_y, 0, size_y - 1)
    next_indices_y = tt.concatenate([start_indices_y[1:], [size_y]], axis=0)

    scale_x = 1.0 * size_x / target_x
    start_indices_x = tt.iround(tt.arange(target_x) * scale_x)
    start_indices_x = tt.clip(start_indices_x, 0, size_x - 1)
    next_indices_x = tt.concatenate([start_indices_x[1:], [size_x]], axis=0)
    pool_out, _ = scan(fn=float_for_x,
                       sequences=[start_indices_y, next_indices_y],
                       non_sequences=[roi, start_indices_x, next_indices_x])
    return pool_out.dimshuffle(2, 0, 1)
示例#8
0
    def get_pseudo_likelihood_cost(self, updates):
        """Stochastic approximation to the pseudo-likelihood"""

        # index of bit i in expression p(x_i | x_{\i})
        bit_i_idx = theano.shared(value=0, name = 'bit_i_idx')

        # binarize the input image by rounding to nearest integer
        xi = T.iround(self.input)

        # calculate free energy for the given bit configuration
        fe_xi = self.free_energy(xi)

        # flip bit x_i of matrix xi and preserve all other bits x_{\i}
        # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx]
        # NB: slice(start,stop,step) is the python object used for
        # slicing, e.g. to index matrix x as follows: x[start:stop:step]
        # In our case, idx_list is a tuple. The first element of the tuple
        # describes what slice we want from the first dimension. 
        # ``slice(None,None,None)`` means that we want all values, equivalent
        # to numpy notation ``:``. The second element of the tuple is the 
        # value bit_i_idx, meaning that we are looking for [:,bit_i_idx]. 
        xi_flip = T.setsubtensor(xi, 1-xi[:, bit_i_idx], 
                                 idx_list=(slice(None,None,None),bit_i_idx))

        # calculate free energy with bit flipped
        fe_xi_flip = self.free_energy(xi_flip)

        # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i}))) 
        cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi)))

        # increment bit_i_idx % number as part of updates
        updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible

        return cost
示例#9
0
文件: srbm.py 项目: Ducz0r/NeuralNets
 def get_pseudo_likelihood_cost(self, updates):
     """Stochastic approximation to the pseudo-likelihood."""
     # index of bit i in expression p(x_i | x_{\i})
     bit_i_idx = theano.shared(value=0, name='bit_i_idx')
 
     # binarize the input image by rounding to nearest integer
     xi = T.iround(self.input)
 
     # calculate free energy for the given bit configuration
     fe_xi = self.free_energy(xi)
 
     # flip bit x_i of matrix xi and preserve all other bits x_{\i}
     # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx], but assigns
     # the result to xi_flip, instead of working in place on xi.
     xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx])
 
     # calculate free energy with bit flipped
     fe_xi_flip = self.free_energy(xi_flip)
 
     # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i})))
     cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi)))
 
     # increment bit_i_idx % number as part of updates
     updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible
 
     return cost
示例#10
0
def _theano_cpu_multi_batch_beam(array, start_idxs, batch_lens, beam_width, wrap_mode, pad_left=0, pad_right=0, idx_dim=0, batch_dim=1):
  array = T.as_tensor(array)
  start_idxs = T.as_tensor(start_idxs)
  if start_idxs.dtype.startswith("float"):
    start_idxs = T.iround(start_idxs)
  batch_lens = T.as_tensor(batch_lens)
  if batch_lens.dtype.startswith("float"):
    batch_lens = T.iround(batch_lens)
  beam_width = T.as_tensor(beam_width)
  if beam_width.dtype.startswith("float"):
    beam_width = T.iround(beam_width)
  pad_left = T.as_tensor(pad_left)
  pad_right = T.as_tensor(pad_right)
  assert array.ndim >= 2
  assert start_idxs.ndim == 1
  assert batch_lens.ndim == 1
  assert beam_width.ndim == 0
  assert idx_dim < array.ndim
  assert batch_dim < array.ndim
  assert idx_dim != batch_dim
  n_batch = array.shape[batch_dim]

  if idx_dim != 0: raise NotImplementedError
  if batch_dim != 1: raise NotImplementedError
  if wrap_mode != "wrap_around": raise NotImplementedError

  idxs_0 = start_idxs.dimshuffle('x', 0)  # (beam,batch)
  idxs = idxs_0 + T.arange(beam_width).dimshuffle(0, 'x')  # (beam,batch)
  idxs_wrapped = idxs % batch_lens.dimshuffle('x', 0)  # (beam,batch)
  batches = T.arange(n_batch)  # (batch,)
  beam = array[idxs_wrapped[:, batches], batches]  # (beam,batch,...)
  if wrap_mode == "wrap_around":
    pass  # Done that.
  elif wrap_mode == "pad":
    cond_left = T.lt(idxs, 0)  # (beam,batch)
    cond_right = T.ge(idxs, batch_lens.dimshuffle('x', 0))  # (beam,batch)
    cond_left_bc = cond_left.dimshuffle(beam_width, n_batch, *([1] * (array.ndim - 2)))
    cond_right_bc = cond_right.dimshuffle(beam_width, n_batch, *([1] * (array.ndim - 2)))
    pad_left_bc = pad_left.dimshuffle(*(['x'] * (array.ndim - pad_left.ndim) +
                                        [pad_left.shape[i] for i in range(pad_left.ndim)]))
    pad_right_bc = pad_left.dimshuffle(*(['x'] * (array.ndim - pad_right.ndim) +
                                         [pad_right.shape[i] for i in range(pad_right.ndim)]))
    beam = T.switch(cond_left_bc, beam, T.cast(pad_left_bc, dtype=array.dtype))
    beam = T.switch(cond_right_bc, beam, T.cast(pad_right_bc, dtype=array.dtype))
  else:
    raise Exception("MultiBatchBeam: unknown wrap mode: %r" % wrap_mode)
  return beam
示例#11
0
def slice_pooling(roi, target_y, target_x):
    nb_channels, size_y, size_x = roi.shape
    # WxH -> axH
    scale_x = 1.0 * size_x / target_x  # scale to shrink/expand
    slice_indices_x = tt.iround(tt.arange(target_x) *
                                scale_x)  # indices from which to take slices
    slice_indices_x = tt.clip(slice_indices_x, 0,
                              size_x - 1)  # min = 0, max= size_x-1
    # slice along x axis
    roi = roi[:, :, slice_indices_x]
    # axH -> axb
    scale_y = 1.0 * size_y / target_y
    slice_indices_y = tt.iround(tt.arange(target_y) * scale_y)
    slice_indices_y = tt.clip(slice_indices_y, 0, size_y -
                              1)  # maximum can not be greater than size_y-1
    roi = roi[:, slice_indices_y, :]
    return roi
示例#12
0
def slice_pooling1(roi, target_y, target_x):
    nb_channels, size_y, size_x = roi.shape
    # WxH -> Wxb
    scale_y = 1.0 * size_y / target_y
    slice_indices_y = tt.iround(tt.arange(target_y) * scale_y)
    slice_indices_y = tt.clip(slice_indices_y, 0, size_y - 1)
    roi = roi[:, slice_indices_y, :]
    # Wxb -> axb
    scale_x = 1.0 * size_x / target_x  # scale to shrink
    slice_indices_x = tt.iround(tt.arange(target_x) *
                                scale_x)  # indices from which to take slices
    slice_indices_x = tt.clip(slice_indices_x, 0,
                              size_x - 1)  # min = 0, max= size_x-1
    # slice along x axis
    roi = roi[:, :, slice_indices_x]

    return roi
示例#13
0
    def input_row_from_variables(ori_ip, dest_ip, ori_lat, ori_long, dest_lat,
                                 dest_long, ori_type, dest_type, dist,
                                 latency):
        '''Create an input row for the MLP from the inputs'''

        input_row = tensor.zeros([input_size])

        offset = 0

        ips = [ori_ip, dest_ip]
        for ip in ips:
            for _ in range(4):
                input_row = add_one_shot(input_row, offset,
                                         tensor.mod(ip, 256))
                ip = tensor.int_div(ip, 256)
                offset += 256

        for lat_, long_ in [(ori_lat, ori_long), (dest_lat, dest_long)]:
            translated_lat = tensor.iround(
                (coordinate_size - 1) * (lat_ / 180 + 0.5))
            input_row = add_thermo(input_row, offset, translated_lat)
            offset += coordinate_size

            translated_long = tensor.iround(
                (coordinate_size - 1) * (long_ / 360 + 0.5))
            input_row = add_thermo(input_row, offset, translated_long)
            offset += coordinate_size

        for type_ in [ori_type, dest_type]:
            input_row = add_one_shot(input_row, offset, type_ + 1)
            offset += type_size

        translated_dist = tensor.iround(
            (dist_size - 1) * (tensor.minimum(1, dist / max_earth_distance)))
        input_row = add_thermo(input_row, offset, translated_dist)
        offset += dist_size

        translated_dist = tensor.iround(
            (small_dist_size - 1) *
            (tensor.minimum(1, dist / max_earth_distance)))
        input_row = add_thermo(input_row, offset, translated_dist)

        #could be useful if we want to add something
        offset += small_dist_size

        return input_row
示例#14
0
文件: ae.py 项目: xiaoyili/xylearn
    def reconstruct(self, testX, rounding=0):
        if rounding:
            z = T.iround(self.propdown(self.propup(testX)))
        else:
            z = self.propdown(self.propup(testX))

        fn = theano.function([], theano.Out(z, borrow=True), name='ae_recon')
        return fn()
示例#15
0
 def get_output_for(self, input, **kwargs):
     # this is a bit hacky but should allow us to feed
     # input in here that stems from an array containing
     # multiple inputs of different types
     #input = T.cast(input * self.scale_task, 'int32')
     input = T.iround(input * self.scale_task)
     res = self.W[input]
     return res
示例#16
0
def ShiftConv(w_t_g, s_t, N, num_shifts):
    # pad = (num_shifts//2, (num_shifts-1)//2)
    # w_t_g_pd_ = T.concatenate([w_t_g[(-pad[0]-1):-1], w_t_g, w_t_g[:(pad[1])]])
    # w_t_g_pd = w_t_g_pd_.dimshuffle('x','x','x', 0)
    # filter = s_t.dimshuffle('x', 'x', 'x', 0)
    # convolution = T.nnet.conv2d(w_t_g_pd, filter,
    # input_shape=(1, 1, 1, N + pad[0] + pad[1]),
    # filter_shape=(1, 1, 1, num_shifts),
    # subsample=(1, 1),
    # border_mode='valid')
    # w_t_s = convolution[0, 0, 0, :]
    shift = 2.*s_t-1.
    Z = T.mod(shift+N, N)
    simj = 1 - (Z - T.floor(Z))
    imj = T.mod(T.arange(N) + T.iround(T.floor(Z)),N)
    w_t_g_roll_1 = T.roll(w_t_g, -T.iround(T.floor(Z)))
    w_t_g_roll_2 = T.roll(w_t_g, -(T.iround(T.floor(Z))+1))
    w_t_s = w_t_g_roll_1*simj + w_t_g_roll_2*(1-simj)
    return w_t_s
示例#17
0
文件: ae.py 项目: xiaoyili/xylearn
    def project(self, dataX, rounding=0):
        """
        project dataX into hidden space. In other words, when rbm was trained,
        we can get new representation
        """
        if rounding:
            h1_mean = T.iround(self.propup(dataX))
        else:
            h1_mean = self.propup(dataX)
        fn = theano.function([], theano.Out(h1_mean, borrow=True), name='project')

        return fn()
示例#18
0
文件: models.py 项目: lamblin/pings
 def input_row_from_variables(ip_, lat_, long_, type_):
     '''Create an input row for the MLP from the inputs'''
     
     input_row = tensor.zeros([input_size])
     offset = 0
     
     for _ in range(4):
         input_row = add_one_shot(input_row, offset, tensor.mod(ip_, 256))
         ip_ = tensor.int_div(ip_, 256)
         offset += 256
     
     translated_lat = tensor.iround((coordinate_size-1) * (lat_/180 + 0.5))
     input_row = add_thermo(input_row, offset, translated_lat)
     offset += coordinate_size
     
     translated_long = tensor.iround((coordinate_size-1) * (long_/360 + 0.5))
     input_row = add_thermo(input_row, offset, translated_long)
     offset += coordinate_size
     
     input_row = add_one_shot(input_row, offset, type_ +1)
     offset += type_size
     
     return input_row
        def TopAccuracy2C(pred=None, truth=None, symmetric=False):

            M1s = T.ones_like(truth, dtype=np.int8)
            LRsel = T.triu(M1s, 24)
            MLRsel = T.triu(M1s, 12)
            SMLRsel = T.triu(M1s, 6)
            MRsel = MLRsel - LRsel
            SRsel = SMLRsel - MLRsel

            dataLen = truth.shape[0]

            pred0 = pred[:, :, 0]

            if symmetric:
                avg_pred = (pred0 + pred0.dimshuffle(1, 0)) / 2.0
            else:
                avg_pred = pred0

            #pred_truth = T.concatenate( (avg_pred, truth.dimshuffle(0, 1, 'x') ), axis=2)
            pred_truth = T.stack([avg_pred, T.cast(truth, 'int32')], axis=2)

            accuracyList = []
            for Rsel in [LRsel, MRsel, MLRsel, SRsel]:
                selected_pred_truth = pred_truth[Rsel.nonzero()]

                ## sort by the predicted value for label 0 from the largest to the smallest
                selected_pred_truth_sorted = selected_pred_truth[(
                    selected_pred_truth[:, 0]).argsort()[::-1]]

                #print 'topRatio =', topRatio
                numTops = T.minimum(T.iround(dataLen * topRatio),
                                    selected_pred_truth_sorted.shape[0])

                selected_sorted_truth = T.cast(
                    selected_pred_truth_sorted[:, -1], 'int32')
                numTruths = T.bincount(selected_sorted_truth, minlength=2)
                numCorrects = T.bincount(selected_sorted_truth[0:numTops],
                                         minlength=2)
                #numTops = T.minimum(numTops, numTruths[0])
                accuracyList.append(
                    T.stack([
                        numCorrects[0] * 1. /
                        (numTops + 0.001), numTops, numTruths[0]
                    ],
                            axis=0))

            return T.stacklists(accuracyList)
示例#20
0
def _per_roi_pooling(coord, x):
    #x = tt.tensor3() # 512x7x7 float tensor
    #coord = tt.fvector() # [ xmin, ymin, xmax, ymax ] in [0,1] x-width,y-height
    # step 1: float coord to int
    nb_rows = x.shape[1]  # height,y
    nb_cols = x.shape[2]  # width,x
    icoords = tt.iround(
        coord * [nb_cols, nb_rows, nb_cols, nb_rows
                 ])  # xmin,xmax multiply nb_cols, ymin,ymax multiply nb_rows
    # 0 <= xmin < nb_cols
    xmin = tt.clip(icoords[0], 0, nb_cols - 1)
    # 0 <= ymin < nb_rows
    ymin = tt.clip(icoords[1], 0, nb_rows - 1)

    xmax = tt.clip(icoords[2], 1 + xmin,
                   nb_cols)  # min(xmax) = 1+xmin, max(xmax) = nb_cols
    ymax = tt.clip(icoords[3], 1 + ymin,
                   nb_rows)  # min (ymax) = 1+ymin, max(ymax) = nb_rows

    # if xmin == xmax == nb_cols
    xmin = ifelse(tt.eq(xmax, xmin), xmax - 1, xmin)
    # if ymin == ymax == nb_rows
    ymin = ifelse(tt.eq(ymax, ymin), ymax - 1, ymin)

    # step 2: extract raw sub-stensor
    roi = x[:, ymin:ymax, xmin:xmax]
    # step 3: resize raw to target_hx target_w
    '''
    # method1 (slow): upsampling -> downsampling 
    subtensor_h = ymax - ymin
    subtensor_w = xmax - xmin
    # upsample by ( target_h, target_w ) -> ( subtensor_h * target_h, subtensor_w * target_w )
    kernel = tt.ones((target_h, target_w)) # create ones filter
    roi_up,_ =scan(fn=lambda r2d, kernel: kron(r2d,kernel),sequences = roi,non_sequences = kernel)
    # downsample to (target_h, target_w)
    #target = roi_up[:,::subtensor_h,::subtensor_w]
    target = max_pooling(roi_up, subtensor_h, subtensor_w)
    '''
    # method 2
    if cfg.NET.POOL_METHOD == 'slicepool':
        target = slice_pooling(roi, target_h, target_w)
    else:
        target = float_max_pooling(roi, target_h, target_w)
    return K.flatten(target)
示例#21
0
 def get_output_for(self, input, **kwargs):
     # this is a bit hacky but should allow us to feed
     # input in here that stems from an array containing
     # multiple inputs of different types
     #input = T.cast(input * self.scale_task, 'int32')
     input = T.iround(input * self.scale_task)
     if self.L is not None:
         #if self.rank1:
         W = T.dot(self.W, self.W.T)
         #else:
         #    W = self.W
         if self.multiplicative:
             #W += T.eye(self.n_tasks)
             res = (1 + W[input]) * self.L[input]
         else:
             res = W[input] + self.L[input]
     else:
         res = self.W[input]
     return res
示例#22
0
 def unsup_cost_pseudo_likelihood(self, updates):
     
     index_bit_i = theano.shared(value=0, name='index_bit_i')
     
     #visible inputs rounded to the nearest bit
     vi_bit_i = T.iround(self.vis_input)
     #unsupervised free energy for rounded visible inputs
     vi_free_energy = self.unsup_free_energy(vi_bit_i)
     
     #visible input with bit i flipped (i.e. 0->1, 1->0)
     vi_flipped_bit_i = T.set_subtensor(vi_bit_i[:, index_bit_i], 1 - vi_bit_i[:, index_bit_i])
     #unsupervised free energy for visible inputs with bit i flipped
     vi_flipped_free_energy = self.unsup_free_energy(vi_flipped_bit_i)
     
     cost = T.mean(self.vis_num * T.log(T.nnet.sigmoid(vi_flipped_free_energy - vi_free_energy)))
     
     updates[index_bit_i] = (index_bit_i + 1) % self.vis_num
     
     return cost
示例#23
0
    def unsup_cost_pseudo_likelihood(self, updates):

        index_bit_i = theano.shared(value=0, name='index_bit_i')

        #visible inputs rounded to the nearest bit
        vi_bit_i = T.iround(self.vis_input)
        #unsupervised free energy for rounded visible inputs
        vi_free_energy = self.unsup_free_energy(vi_bit_i)

        #visible input with bit i flipped (i.e. 0->1, 1->0)
        vi_flipped_bit_i = T.set_subtensor(vi_bit_i[:, index_bit_i],
                                           1 - vi_bit_i[:, index_bit_i])
        #unsupervised free energy for visible inputs with bit i flipped
        vi_flipped_free_energy = self.unsup_free_energy(vi_flipped_bit_i)

        cost = T.mean(
            self.vis_num *
            T.log(T.nnet.sigmoid(vi_flipped_free_energy - vi_free_energy)))

        updates[index_bit_i] = (index_bit_i + 1) % self.vis_num

        return cost
示例#24
0
    def get_pseudo_likelihood_cost(self, updates):
        """Stochastic approximation to the pseudo-likelihood"""

        # index of bit i in expression p(x_i | x_{\i})
        bit_i_idx = theano.shared(value=0, name='bit_i_idx')

        # binarize the input image by rounding to nearest integer
        xi = T.iround(self.input)

        # calculate free energy for the given bit configuration
        fe_xi = self.free_energy(xi)

        # flip bit x_i of matrix xi and preserve all other bits x_{\i}
        # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx]
        # NB: slice(start,stop,step) is the python object used for
        # slicing, e.g. to index matrix x as follows: x[start:stop:step]
        # In our case, idx_list is a tuple. The first element of the tuple
        # describes what slice we want from the first dimension.
        # ``slice(None,None,None)`` means that we want all values, equivalent
        # to numpy notation ``:``. The second element of the tuple is the
        # value bit_i_idx, meaning that we are looking for [:,bit_i_idx].
        xi_flip = T.setsubtensor(xi,
                                 1 - xi[:, bit_i_idx],
                                 idx_list=(slice(None, None, None), bit_i_idx))

        # calculate free energy with bit flipped
        fe_xi_flip = self.free_energy(xi_flip)

        # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i})))
        cost = T.mean(self.n_visible *
                      T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi)))

        # increment bit_i_idx % number as part of updates
        updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible

        return cost
示例#25
0
        def do_preproc(input,label):
            if not self.need_proc:
                return input,label
            if self.deal_label:
                assert len(label_shape)==3
            else:
                outlabel=label
            srs=self.rng
            w = self.image_shape[-1]
            h = self.image_shape[-2]
            target = T.as_tensor_variable(np.indices((self.image_shape[-2], self.image_shape[-1])))
            if self.deal_label:
                tarlab = T.as_tensor_variable(np.indices((self.label_shape[-2], label_shape[-1])))
                lw = self.label_shape[-1]
                lh = self.label_shape[-2]

            # Translate
            if self.translation:
                transln = self.translation * srs.uniform((2, 1, 1), -1)
                target += transln
                if self.deal_label:
                    tarlab += transln

            # Apply elastic transform
            if self.magnitude:
                # Build a gaussian filter
                var = self.sigma ** 2
                filt = np.array([[np.exp(-.5 * (i * i + j * j) / var)
                                 for i in range(-self.sigma, self.sigma + 1)]
                                 for j in range(-self.sigma, self.sigma + 1)], dtype=float_x)
                filt /= 2 * np.pi * var

                # Elastic
                elast = self.magnitude * srs.normal((2, h, w))
                elast = sigconv.conv2d(elast, filt, (2, h, w), filt.shape, 'full')
                elast = elast[:, self.sigma:h + self.sigma, self.sigma:w + self.sigma]
                target += elast
                if deal_label:
                    raise NotImplementedError()

            # Center at 'about' half way
            if self.zoom-1 or self.angle:
                origin = srs.uniform((2, 1, 1), .25, .75) * \
                         np.array((h, w)).reshape((2, 1, 1))
                if self.deal_label:
                    lorigin = srs.uniform((2, 1, 1), .25, .75) * \
                         np.array((lh, lw)).reshape((2, 1, 1))
                    tarlab -= lorigin
                target -= origin

                # Zoom
                if self.zoom-1:
                    zoomer = T.exp(np.log(self.zoom) * srs.uniform((2, 1, 1), -1))
                    target *= zoomer
                    if self.deal_label:
                        tarlab *= zoomer

                # Rotate
                if self.angle:
                    theta = self.angle * np.pi / 180 * srs.uniform(low=-1)
                    c, s = T.cos(theta), T.sin(theta)
                    rotate = T.stack(c, -s, s, c).reshape((2,2))
                    target = T.tensordot(rotate, target, axes=((0, 0)))
                    if self.deal_label:
                        tarlab = T.tensordot(rotate, targlab, axes=((0, 0)))

                # Uncenter
                target += origin
                if self.deal_label:
                    tarlab -= lorigin

            # Clip the mapping to valid range and linearly interpolate
            transy = T.clip(target[0], 0, h - 1 - .001)
            transx = T.clip(target[1], 0, w - 1 - .001)
            if self.deal_label:
                ltransy = T.clip(tarlab[0], 0, lh - 1 - .001)
                ltransx = T.clip(tarlab[1], 0, lw - 1 - .001)

            if self.nearest:
                vert = T.iround(transy)
                horz = T.iround(transx)
                output = input[:, :, vert, horz]
            else:
                topp = T.cast(transy, 'int32')
                left = T.cast(transx, 'int32')
                fraction_y = T.cast(transy - topp, float_x)
                fraction_x = T.cast(transx - left, float_x)

                output = input[:, :, topp, left] * (1 - fraction_y) * (1 - fraction_x) + \
                         input[:, :, topp, left + 1] * (1 - fraction_y) * fraction_x + \
                         input[:, :, topp + 1, left] * fraction_y * (1 - fraction_x) + \
                         input[:, :, topp + 1, left + 1] * fraction_y * fraction_x
            if self.deal_lab:
                vert = T.iround(ltransy)
                horz = T.iround(ltransx)
                outlabel = label[:, vert, horz]


            # Now add some noise
            if self.pflip:
                mask = srs.binomial(n=1, p=self.pflip, size=input.shape, dtype=float_x)
                output = (1 - output) * mask + output * (1 - mask)
            return output,outlabel
示例#26
0
    def get_output_for(self, grids, **kwargs):
        height = width = depth = self.grid_side

        # np.indices() returns 3 train_grids exactly as big as the original one.
        # The first grid contains the X coordinate of each point at the location of the point.
        # The second grid contains the Y coordinate of each point at the location of the point.
        # The third grid contains the Z coordinate of each point at the location of the point.
        indices_grids = T.as_tensor_variable(np.indices((width, height, depth),
                                                        dtype=floatX),
                                             name="grid_indices")

        # Translate:
        # the translation vector will be broad-casted:
        # t_x will be added to all values in the first indices grid
        # t_y will be added to all values in the second indices grid
        # t_z will be added to all values in the third indices grid
        # resulting in a translation in the direction of translation_vector
        indices_grids = T.add(indices_grids, self._translation_vector())

        # Rotate:
        # the origin is just the center point in the grid
        origin = T.as_tensor_variable(np.array(
            (width // 2, height // 2, depth // 2), dtype=floatX).reshape(
                (3, 1, 1, 1)),
                                      name='origin')
        # We first center all indices, just as in the translation above
        indices_grids = T.sub(indices_grids, origin)

        # T.tensordot is a generalized version of a dot product.
        # The axes parameter is of length 2, and it gives the axis for each of the two tensors
        # passed, over which the summation will occur. Of course, those two axis need to be of the
        # same dimension.
        # Here we have a (3 x 3) matrix <dot product> (3, width, height, depth) grid, and the
        # summation happens over the first axis (index 0). The result is of size
        # (3 x width x height x depth) and contains again 3 train_grids of this time
        # **rotated** indices for each dimension X, Y, Z respectively.
        indices_grids = T.tensordot(self._rotation_matrix(),
                                    indices_grids,
                                    axes=(0, 0))

        # Decenter
        indices_grids = T.add(indices_grids, origin)

        # Since indices_grids was transformed, we now might have indices at certain locations
        # that are out of the range of the original grid. We this need to clip them to valid values.
        # For the first grid: between 0 and width - 1
        # For the second grid: between 0 and height - 1
        # For the third grid: between 0 and depth - 1
        # Note that now te index train_grids might contain real numbers (not only integers).
        x_indices = T.clip(indices_grids[0], 0, width - 1 - .001)
        y_indices = T.clip(indices_grids[1], 0, height - 1 - .001)
        z_indices = T.clip(indices_grids[2], 0, depth - 1 - .001)

        if self.interpolation == "nearest":
            # Here we just need to round the indices for each spatial dimension to the closest
            # integer, and than index the original input grid with the 3 indices train_grids
            # (numpy style indexing with arrays) to obtain the final result. Note that here,
            # as usual, the multi-dim array that you index with has the
            # same spatial dimensionality as the multi-dim array being index.

            # We intentionally flatten everything before indexing, so that Theano can use
            # ArraySubtensor1 instead of ArraySubtensor, because only the former can be run
            # on the GPU.
            # https://groups.google.com/forum/#!topic/theano-users/XkPJP6on50Y
            flat_grids, flat_indices = grids.reshape(
                (grids.shape[0], grids.shape[1], -1)), \
                                       width * height * T.iround(
                                           x_indices).flatten() + \
                                       height * T.iround(y_indices).flatten() + \
                                       T.iround(z_indices).flatten()
            output = flat_grids[:, :, flat_indices]
            output = output.reshape(grids.shape)
        else:
            flat_grids = grids.reshape((grids.shape[0], grids.shape[1], -1))
            # For linear interpolation, we use the transformed indices x_indices, y_indices and
            # z_indices to linearly calculate the desired values at each of the original indices
            # in each dimension.

            # Again, everything is flattened so that Theano can put it on the GPU, just as in
            # the other part of this if block.
            # https://groups.google.com/forum/#!topic/theano-users/XkPJP6on50Y
            top = T.cast(y_indices, 'int32').flatten()
            left = T.cast(x_indices, 'int32').flatten()
            forward = T.cast(z_indices, 'int32').flatten()

            x_indices = x_indices.flatten()
            y_indices = y_indices.flatten()
            z_indices = z_indices.flatten()

            # this computes the amount of shift into each direction from the original position
            fraction_y = T.cast(y_indices - top,
                                theano.config.floatX).flatten()
            fraction_x = T.cast(x_indices - left,
                                theano.config.floatX).flatten()
            fraction_z = T.cast(z_indices - forward,
                                theano.config.floatX).flatten()

            # then the new value is the linear combination based on the shifts in all
            # of the 8 possible directions in 3D
            output = flat_grids[:, :, self.grid_side ** 2 * top + self.grid_side * left + forward] \
                     * (1 - fraction_y) * (1 - fraction_x) * (1 - fraction_z) + \
                     flat_grids[:, :,
                     self.grid_side ** 2 * top + self.grid_side * left + (forward + 1)] \
                     * (1 - fraction_y) * (1 - fraction_x) * fraction_z + \
                     flat_grids[:, :,
                     self.grid_side ** 2 * top + self.grid_side * (left + 1) + forward] \
                     * (1 - fraction_y) * fraction_x * (1 - fraction_z) + \
                     flat_grids[:, :,
                     self.grid_side ** 2 * top + self.grid_side * (left + 1) + (forward + 1)] \
                     * (1 - fraction_y) * fraction_x * fraction_z + \
                     flat_grids[:, :,
                     self.grid_side ** 2 * (top + 1) + self.grid_side * left + forward] \
                     * fraction_y * (1 - fraction_x) * (1 - fraction_z) + \
                     flat_grids[:, :,
                     self.grid_side ** 2 * (top + 1) + self.grid_side * left + (forward + 1)] \
                     * fraction_y * (1 - fraction_x) * fraction_z + \
                     flat_grids[:, :,
                     self.grid_side ** 2 * (top + 1) + self.grid_side * (left + 1) + forward] \
                     * fraction_y * fraction_x * (1 - fraction_z) + \
                     flat_grids[:, :,
                     self.grid_side ** 2 * (top + 1) + self.grid_side * (left + 1) + (forward + 1)] \
                     * fraction_y * fraction_x * fraction_z
            output = output.reshape(grids.shape)

        return output
示例#27
0
        network, output = load_network(netfile)
    except Exception, e:
        print("Could not load network: %s" % e)

    print("Loading test dataset...")
    # load test data chunk
    dl = DataLoader(image_size=IMAGE_SIZE)
    test_filenames = dl.test_images
    n_predictions = len(test_filenames)
    print("Compiling theano functions...")
    # set up symbolic variables
    X = T.tensor4('X')
    X_batch = T.tensor4('X_batch')
    batch_index = T.iscalar('batch_index')

    pred = T.iround(output.get_output(X_batch, deterministic=True))
    predict = theano.function(
        [theano.Param(X_batch)],
        pred,
        givens={
            X: X_batch
            },
        )

    print("Predicting...")
    predictions = []
    i = 0
    for test_chunk in dl.test_gen():
        n_batches = int(np.ceil(len(test_chunk) * 1. / BATCH_SIZE))
        for b in xrange(n_batches):
            predictions.append(predict(test_chunk[b * BATCH_SIZE: (b + 1) * BATCH_SIZE]))
示例#28
0
def _theano_cpu_multi_batch_beam(array,
                                 start_idxs,
                                 batch_lens,
                                 beam_width,
                                 wrap_mode,
                                 pad_left=0,
                                 pad_right=0,
                                 idx_dim=0,
                                 batch_dim=1):
    array = T.as_tensor(array)
    start_idxs = T.as_tensor(start_idxs)
    if start_idxs.dtype.startswith("float"):
        start_idxs = T.iround(start_idxs)
    batch_lens = T.as_tensor(batch_lens)
    if batch_lens.dtype.startswith("float"):
        batch_lens = T.iround(batch_lens)
    beam_width = T.as_tensor(beam_width)
    if beam_width.dtype.startswith("float"):
        beam_width = T.iround(beam_width)
    pad_left = T.as_tensor(pad_left)
    pad_right = T.as_tensor(pad_right)
    assert array.ndim >= 2
    assert start_idxs.ndim == 1
    assert batch_lens.ndim == 1
    assert beam_width.ndim == 0
    assert idx_dim < array.ndim
    assert batch_dim < array.ndim
    assert idx_dim != batch_dim
    n_batch = array.shape[batch_dim]

    if idx_dim != 0: raise NotImplementedError
    if batch_dim != 1: raise NotImplementedError
    if wrap_mode != "wrap_around": raise NotImplementedError

    idxs_0 = start_idxs.dimshuffle('x', 0)  # (beam,batch)
    idxs = idxs_0 + T.arange(beam_width).dimshuffle(0, 'x')  # (beam,batch)
    idxs_wrapped = idxs % batch_lens.dimshuffle('x', 0)  # (beam,batch)
    batches = T.arange(n_batch)  # (batch,)
    beam = array[idxs_wrapped[:, batches], batches]  # (beam,batch,...)
    if wrap_mode == "wrap_around":
        pass  # Done that.
    elif wrap_mode == "pad":
        cond_left = T.lt(idxs, 0)  # (beam,batch)
        cond_right = T.ge(idxs, batch_lens.dimshuffle('x', 0))  # (beam,batch)
        cond_left_bc = cond_left.dimshuffle(beam_width, n_batch,
                                            *([1] * (array.ndim - 2)))
        cond_right_bc = cond_right.dimshuffle(beam_width, n_batch,
                                              *([1] * (array.ndim - 2)))
        pad_left_bc = pad_left.dimshuffle(
            *(['x'] * (array.ndim - pad_left.ndim) +
              [pad_left.shape[i] for i in range(pad_left.ndim)]))
        pad_right_bc = pad_left.dimshuffle(
            *(['x'] * (array.ndim - pad_right.ndim) +
              [pad_right.shape[i] for i in range(pad_right.ndim)]))
        beam = T.switch(cond_left_bc, beam,
                        T.cast(pad_left_bc, dtype=array.dtype))
        beam = T.switch(cond_right_bc, beam,
                        T.cast(pad_right_bc, dtype=array.dtype))
    else:
        raise Exception("MultiBatchBeam: unknown wrap mode: %r" % wrap_mode)
    return beam
示例#29
0
 def get_output_for(self, input, **kwargs):
     return T.iround(T.clip(input.pop(0), self.min, self.max),
                     mode="half_away_from_zero")
示例#30
0
    def __init__(self,
                 n_input=3,
                 n_memblock=100,
                 n_output=2,
                 lr=0.0001,
                 m=0.9,
                 l2rate=0.0001,
                 dense=True):
        self.dense = dense
        input_sequence = T.matrix()
        gold_sequence = T.matrix()  # 1, n_output

        #input_sequence.tag.test_value = [[0,0,1],[0,1,0],[1,0,0]]
        #gold_sequence.tag.test_value = [[1,0],[0,1],[0,0]]
        ''' START WEIGHTS - 0=forward; 1=backward'''
        wiig = shared_normal(n_input, n_memblock,
                             0.01, "wiig0"), shared_normal(
                                 n_input, n_memblock, 0.01,
                                 "wiig1")  # Weights from inputs to gates
        wmig = shared_normal(
            n_memblock, n_memblock, 0.01, "wmig0"), shared_normal(
                n_memblock, n_memblock, 0.01,
                "wmig1")  # Weights from cells to gates - peepholes
        #big = shared_zeros(n_memblock,"big0"),shared_zeros(n_memblock,"big1")
        big = theano.shared(
            numpy.zeros(n_memblock, dtype=theano.config.floatX),
            "big0"), theano.shared(
                numpy.zeros(n_memblock, dtype=theano.config.floatX), "big1")

        wifg = shared_normal(n_input, n_memblock, 0.01,
                             "wifg0"), shared_normal(n_input, n_memblock, 0.01,
                                                     "wifg1")
        wmfg = shared_normal(n_memblock, n_memblock, 0.01,
                             "wmfg0"), shared_normal(n_memblock, n_memblock,
                                                     0.01, "wmfg1")
        #bfg = shared_zeros(n_memblock,"bfg0"),shared_zeros(n_memblock,"bfg1")
        bfg = theano.shared(
            numpy.zeros(n_memblock, dtype=theano.config.floatX),
            "bfg0"), theano.shared(
                numpy.zeros(n_memblock, dtype=theano.config.floatX), "bfg1")

        wiog = shared_normal(n_input, n_memblock, 0.01,
                             "wiog0"), shared_normal(n_input, n_memblock, 0.01,
                                                     "wiog1")
        wmog = shared_normal(n_memblock, n_memblock, 0.01,
                             "wmog0"), shared_normal(n_memblock, n_memblock,
                                                     0.01, "wmog1")
        #bog = shared_zeros(n_memblock,"bog0"),shared_zeros(n_memblock,"bog1")
        bog = theano.shared(
            numpy.zeros(n_memblock, dtype=theano.config.floatX),
            "bog0"), theano.shared(
                numpy.zeros(n_memblock, dtype=theano.config.floatX), "bog1")

        wim = shared_normal(n_input, n_memblock, 0.01, "wim0"), shared_normal(
            n_input, n_memblock, 0.01, "wim1")  # Weight from input to mem
        #bm = shared_zeros(n_memblock,"bm0"),shared_zeros(n_memblock,"bm1") # Bias from input to mem
        bm = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),
                           "bm0"), theano.shared(
                               numpy.zeros(n_memblock,
                                           dtype=theano.config.floatX), "bm1")

        wmo = shared_normal(n_memblock, n_output, 0.01, "wmo0"), shared_normal(
            n_memblock, n_output, 0.01, "wmo1")  # Weight from input to mem

        slo = theano.shared(numpy.random.normal(scale=0.01),
                            name="slo0"), theano.shared(
                                numpy.random.normal(scale=0.01), name="slo1")
        bo = theano.shared(numpy.zeros(n_output, dtype=theano.config.floatX),
                           "bo")  # Bias from input to mem
        ''' END OF WEIGHTS '''

        self.params = wiig[0], wiig[1], big[0], big[1], wifg[0], wifg[1], bfg[
            0], bfg[1], wiog[0], wiog[1], bog[0], bog[1], wmig[0], wmig[
                1], wmfg[0], wmfg[1], wmog[0], wmog[1], wim[0], wim[1], bm[
                    0], bm[1], wmo[0], wmo[1], slo[0], slo[1], bo
        ''' START DELTAS - 0=forward; 1=backward'''
        dwiig = shared_normal(n_input, n_memblock,
                              0.01, "dwiig0"), shared_normal(
                                  n_input, n_memblock, 0.01,
                                  "dwiig1")  # Weights from inputs to gates
        dwmig = shared_normal(
            n_memblock, n_memblock, 0.01, "dwmig0"), shared_normal(
                n_memblock, n_memblock, 0.01,
                "dwmig1")  # Weights from cells to gates - peepholes
        #dbig = shared_zeros(n_memblock,"big0"),shared_zeros(n_memblock,"dbig1")
        dbig = theano.shared(
            numpy.zeros(n_memblock, dtype=theano.config.floatX),
            "dbig0"), theano.shared(
                numpy.zeros(n_memblock, dtype=theano.config.floatX), "dbig1")

        dwifg = shared_normal(n_input, n_memblock, 0.01,
                              "dwifg0"), shared_normal(n_input, n_memblock,
                                                       0.01, "dwifg1")
        dwmfg = shared_normal(n_memblock, n_memblock, 0.01,
                              "dwmfg0"), shared_normal(n_memblock, n_memblock,
                                                       0.01, "dwmfg1")
        #dbfg = shared_zeros(n_memblock,"bfg0"),shared_zeros(n_memblock,"dbfg1")
        dbfg = theano.shared(
            numpy.zeros(n_memblock, dtype=theano.config.floatX),
            "dbfg0"), theano.shared(
                numpy.zeros(n_memblock, dtype=theano.config.floatX), "dbfg1")

        dwiog = shared_normal(n_input, n_memblock, 0.01,
                              "dwiog0"), shared_normal(n_input, n_memblock,
                                                       0.01, "dwiog1")
        dwmog = shared_normal(n_memblock, n_memblock, 0.01,
                              "dwmog0"), shared_normal(n_memblock, n_memblock,
                                                       0.01, "dwmog1")
        #dbog = shared_zeros(n_memblock,"bog0"),shared_zeros(n_memblock,"dbog1")
        dbog = theano.shared(
            numpy.zeros(n_memblock, dtype=theano.config.floatX),
            "dbog0"), theano.shared(
                numpy.zeros(n_memblock, dtype=theano.config.floatX), "dbog1")

        dwim = shared_normal(n_input, n_memblock,
                             0.01, "dwim0"), shared_normal(
                                 n_input, n_memblock, 0.01,
                                 "dwim1")  # Weight from input to mem
        #dbm = shared_zeros(n_memblock,"bm0"),shared_zeros(n_memblock,"dbm1") # Bias from input to mem
        dbm = theano.shared(
            numpy.zeros(n_memblock, dtype=theano.config.floatX),
            "dbm0"), theano.shared(
                numpy.zeros(n_memblock, dtype=theano.config.floatX), "dbm1")

        dwmo = shared_normal(n_memblock, n_output,
                             0.01, "dwmo0"), shared_normal(
                                 n_memblock, n_output, 0.01,
                                 "dwmo1")  # Weight from input to mem

        dslo = theano.shared(numpy.random.normal(scale=0.01),
                             name="dslo0"), theano.shared(
                                 numpy.random.normal(scale=0.01), name="dslo1")

        dbo = theano.shared(numpy.zeros(n_output, dtype=theano.config.floatX),
                            "dbo")  # Bias from input to mem
        ''' END OF DELTAS '''

        self.deltas = dwiig[0], dwiig[1], dbig[0], dbig[1], dwifg[0], dwifg[
            1], dbfg[0], dbfg[1], dwiog[0], dwiog[1], dbog[0], dbog[1], dwmig[
                0], dwmig[1], dwmfg[0], dwmfg[1], dwmog[0], dwmog[1], dwim[
                    0], dwim[1], dbm[0], dbm[1], dwmo[0], dwmo[1], dslo[
                        0], dslo[1], dbo

        init_mem = shared_zeros(n_memblock)

        # EXPRESSIONS - Forward
        def recurrence(input, pmem, i):
            i = i.value
            ingate = sig(T.dot(input, wiig[i]) + T.dot(pmem, wmig[i]) + big[i])
            forgate = sig(
                T.dot(input, wifg[i]) + T.dot(pmem, wmfg[i]) + bfg[i])
            #mem      = forgate * pmem            + ingate * T.tanh(T.dot(input, wim[i]) + bm[i]) # Use sig or tan???
            mem = T.tanh(forgate * pmem +
                         ingate * T.tanh(T.dot(input, wim[i]) + bm[i])
                         )  # instead of identity, use tanh for mem out
            outgate = sig(T.dot(input, wiog[i]) + T.dot(mem, wmog[i]) + bog[i])
            layerout = T.tanh(T.dot(outgate * mem, wmo[i]))
            #print layerout.shape.eval()
            return mem, layerout

        #Forward Pass
        (_,
         output_sequencef), updf = theano.scan(fn=recurrence,
                                               sequences=input_sequence,
                                               non_sequences=0,
                                               outputs_info=[init_mem, None])
        (_,
         output_sequencebp), updb = theano.scan(fn=recurrence,
                                                sequences=input_sequence,
                                                non_sequences=1,
                                                outputs_info=[init_mem, None],
                                                go_backwards=True)
        output_sequenceb = output_sequencebp[::-1]

        presig_output_sequence, train_updates = theano.scan(
            fn=lambda x, y: (x * slo[0] + y * slo[1] + bo),
            sequences=[output_sequencef, output_sequenceb],
            outputs_info=[None])

        # avoid log(0) for log(scan(sigmoid()))
        output_sequence = sig(presig_output_sequence)
        # output_sequence become a batch of output vectors
        train_updates.update(updf)
        train_updates.update(updb)

        l2 = 0
        for p in self.params:
            l2 += T.sum(p * p)

        # Loss Function
        outloss = T.nnet.binary_crossentropy(
            output_sequence, gold_sequence).mean(
            ) + l2 * l2rate  # TODO: check if the dimensions match here
        # consider using multi-category? because binary allows multiple 1's in the vector

        # Backward Pass
        gradient = T.grad(outloss,
                          self.params,
                          consider_constant=[input_sequence, gold_sequence])

        train_updates.update(
            ((p, p + m * d - lr * g)
             for p, g, d in zip(self.params, gradient, self.deltas)))
        train_updates.update(
            ((d, m * d - lr * g)
             for p, g, d in zip(self.params, gradient, self.deltas)))

        target = T.iround(gold_sequence)
        output = T.iround(output_sequence)
        tp = T.sum(T.and_(target, output))
        p = tp / (T.sum(target))
        r = tp / (T.sum(output))
        f = (2 * p * r) / (p + r)

        ct = T.sum(target)
        co = T.sum(output)

        #self.train_function = theano.function([input_sequence,gold_sequence], [output_sequence], updates=train_updates)
        self.train_function = theano.function([input_sequence, gold_sequence],
                                              [],
                                              updates=train_updates)
        #self.validate_function = theano.function([input_sequence,gold_sequence], [outloss,output_sequence])
        self.test_function = theano.function([input_sequence, gold_sequence],
                                             [outloss, ct, co, tp])
        self.generate_function = theano.function([input_sequence], output)
示例#31
0
    def __init__(self,
                 inpt,
                 img_sz,
                 num_maps=1,
                 translation=0,
                 zoom=1,
                 magnitude=0,
                 sigma=1,
                 pflip=0,
                 angle=0,
                 rand_gen=None,
                 invert_image=False,
                 nearest=False):
        self.inpt = inpt
        self.img_sz = img_sz
        self.translation = translation
        self.zoom = zoom
        self.magnitude = magnitude
        self.sigma = sigma
        self.invert = invert_image
        self.nearest = nearest

        self.out_sz = img_sz
        self.num_maps = num_maps
        self.n_out = self.num_maps * self.out_sz**2
        self.params = []
        self.representation = ('Elastic Maps:{:d} Size:{:2d} Translation:{:} '
                               'Zoom:{} Mag:{:2d} Sig:{:2d} Noise:{} '
                               'Angle:{} Invert:{} '
                               'Interpolation: {}'.format(
                                   self.num_maps, img_sz, translation, zoom,
                                   magnitude, sigma, pflip, angle,
                                   invert_image,
                                   'Nearest' if nearest else 'Linear'))

        if invert_image:
            inpt = 1 - inpt

        assert zoom > 0
        if not (magnitude or translation or pflip or angle) and zoom == 1:
            self.output = inpt
            self.debugout = [self.output, tt.as_tensor_variable((0, 0))]
            return

        srs = tt.shared_randomstreams.RandomStreams(
            rand_gen.randint(1e6) if rand_gen else None)
        h = w = img_sz

        # Humble as-is beginning
        target = tt.as_tensor_variable(np.indices((h, w)))

        # Translate
        if translation:
            transln = translation * srs.uniform((2, 1, 1), -1)
            target += transln

        # Apply elastic transform
        if magnitude:
            # Build a gaussian filter
            var = sigma**2
            filt = np.array([[
                np.exp(-.5 * (i * i + j * j) / var)
                for i in range(-sigma, sigma + 1)
            ] for j in range(-sigma, sigma + 1)],
                            dtype=float_x)
            filt /= 2 * np.pi * var

            # Elastic
            elast = magnitude * srs.normal((2, h, w))
            elast = sigconv.conv2d(elast, filt, (2, h, w), filt.shape, 'full')
            elast = elast[:, sigma:h + sigma, sigma:w + sigma]
            target += elast

        # Center at 'about' half way
        if zoom - 1 or angle:
            origin = srs.uniform((2, 1, 1), .25, .75) * \
                     np.array((h, w)).reshape((2, 1, 1))
            target -= origin

            # Zoom
            if zoom - 1:
                zoomer = tt.exp(np.log(zoom) * srs.uniform((2, 1, 1), -1))
                target *= zoomer

            # Rotate
            if angle:
                theta = angle * np.pi / 180 * srs.uniform(low=-1)
                c, s = tt.cos(theta), tt.sin(theta)
                rotate = tt.stack(c, -s, s, c).reshape((2, 2))
                target = tt.tensordot(rotate, target, axes=((0, 0)))

            # Uncenter
            target += origin

        # Clip the mapping to valid range and linearly interpolate
        transy = tt.clip(target[0], 0, h - 1 - .001)
        transx = tt.clip(target[1], 0, w - 1 - .001)

        if nearest:
            vert = tt.iround(transy)
            horz = tt.iround(transx)
            output = inpt[:, :, vert, horz]
        else:
            topp = tt.cast(transy, 'int32')
            left = tt.cast(transx, 'int32')
            fraction_y = tt.cast(transy - topp, float_x)
            fraction_x = tt.cast(transx - left, float_x)

            output = inpt[:, :, topp, left] * (1 - fraction_y) * (1 - fraction_x) + \
                     inpt[:, :, topp, left + 1] * (1 - fraction_y) * fraction_x + \
                     inpt[:, :, topp + 1, left] * fraction_y * (1 - fraction_x) + \
                     inpt[:, :, topp + 1, left + 1] * fraction_y * fraction_x

        # Now add some noise
        if pflip:
            mask = srs.binomial(n=1, p=pflip, size=inpt.shape, dtype=float_x)
            output = (1 - output) * mask + output * (1 - mask)

        self.output = output
        self.debugout = [
            self.output,
            target - np.indices((h, w)),
        ]

        if translation:
            self.debugout.append(transln)
        if zoom - 1 or angle:
            self.debugout.append(origin)
        if angle:
            self.debugout.append(theta * 180 / np.pi)
        if zoom - 1:
            self.debugout.append(zoomer)
示例#32
0
	def theano_get_proj(rot, datar,datai):
		ret=[datar[T.iround(rot[2]), T.iround(rot[1]), T.iround(rot[0])].T,
			datai[T.iround(rot[2]), T.iround(rot[1]), T.iround(rot[0])].T]
		return ret
示例#33
0
    def __init__(self, inpt, img_sz,
                 num_maps = 1,
                 translation=0,
                 zoom=1,
                 magnitude=0,
                 sigma=1,
                 pflip=0,
                 angle=0,
                 rand_gen=None,
                 invert_image=False,
                 nearest=False):
        self.inpt = inpt
        self.img_sz = img_sz
        self.translation = translation
        self.zoom = zoom
        self.magnitude = magnitude
        self.sigma = sigma
        self.invert = invert_image
        self.nearest = nearest

        self.out_sz = img_sz
        self.num_maps = num_maps
        self.n_out = self.num_maps * self.out_sz ** 2
        self.params = []
        self.representation = ('Elastic Maps:{:d} Size:{:2d} Translation:{:} '
                               'Zoom:{} Mag:{:d} Sig:{:d} Noise:{} '
                               'Angle:{} Invert:{} '
                               'Interpolation:{}'.format(
            self.num_maps, img_sz,
            translation, zoom, magnitude, sigma,
            pflip, angle, invert_image,
            'Nearest' if nearest else 'Linear'))

        if invert_image:
            inpt = 1 - inpt

        assert zoom > 0
        if not (magnitude or translation or pflip or angle) and zoom == 1:
            self.output = inpt
            self.debugout = [self.output, tt.as_tensor_variable((0, 0))]
            return

        srs = tt.shared_randomstreams.RandomStreams(rand_gen.randint(1e6)
                                                    if rand_gen else None)
        h = w = img_sz

        # Humble as-is beginning
        target = tt.as_tensor_variable(np.indices((h, w)))

        # Translate
        if translation:
            transln = translation * srs.uniform((2, 1, 1), -1)
            target += transln

        # Apply elastic transform
        if magnitude:
            # Build a gaussian filter
            var = sigma ** 2
            filt = np.array([[np.exp(-.5 * (i * i + j * j) / var)
                             for i in range(-sigma, sigma + 1)]
                             for j in range(-sigma, sigma + 1)], dtype=float_x)
            filt /= 2 * np.pi * var

            # Elastic
            elast = magnitude * srs.normal((2, h, w))
            elast = sigconv.conv2d(elast, filt, (2, h, w), filt.shape, 'full')
            elast = elast[:, sigma:h + sigma, sigma:w + sigma]
            target += elast

        # Center at 'about' half way
        if zoom-1 or angle:
            origin = srs.uniform((2, 1, 1), .25, .75) * \
                     np.array((h, w)).reshape((2, 1, 1))
            target -= origin

            # Zoom
            if zoom-1:
                zoomer = tt.exp(np.log(zoom) * srs.uniform((2, 1, 1), -1))
                target *= zoomer

            # Rotate
            if angle:
                theta = angle * np.pi / 180 * srs.uniform(low=-1)
                c, s = tt.cos(theta), tt.sin(theta)
                rotate = tt.stack(c, -s, s, c).reshape((2,2))
                target = tt.tensordot(rotate, target, axes=((0, 0)))

            # Uncenter
            target += origin

        # Clip the mapping to valid range and linearly interpolate
        transy = tt.clip(target[0], 0, h - 1 - .001)
        transx = tt.clip(target[1], 0, w - 1 - .001)

        if nearest:
            vert = tt.iround(transy)
            horz = tt.iround(transx)
            output = inpt[:, :, vert, horz]
        else:
            topp = tt.cast(transy, 'int32')
            left = tt.cast(transx, 'int32')
            fraction_y = tt.cast(transy - topp, float_x)
            fraction_x = tt.cast(transx - left, float_x)

            output = inpt[:, :, topp, left] * (1 - fraction_y) * (1 - fraction_x) + \
                     inpt[:, :, topp, left + 1] * (1 - fraction_y) * fraction_x + \
                     inpt[:, :, topp + 1, left] * fraction_y * (1 - fraction_x) + \
                     inpt[:, :, topp + 1, left + 1] * fraction_y * fraction_x

        # Now add some noise
        if pflip:
            mask = srs.binomial(n=1, p=pflip, size=inpt.shape, dtype=float_x)
            output = (1 - output) * mask + output * (1 - mask)

        self.output = output
        self.debugout = [self.output,
                         target - np.indices((h, w)),]

        if translation:
            self.debugout.append(transln)
        if zoom-1 or angle:
            self.debugout.append(origin)
        if angle:
            self.debugout.append(theta*180/np.pi)
        if zoom-1:
            self.debugout.append(zoomer)
示例#34
0
 def __init__(self, n_input=3, n_memblock=100, n_output=2, lr=0.0001, m=0.9):
     input_sequence = T.matrix()
     gold_sequence = T.matrix() # 1, n_output
     
     #input_sequence.tag.test_value = [[0,0,1],[0,1,0],[1,0,0]]
     #gold_sequence.tag.test_value = [[1,0],[0,1],[0,0]]
     
     ''' START WEIGHTS - 0=forward; 1=backward'''
     wiig = shared_normal(n_input, n_memblock, 0.01,"wiig0"),shared_normal(n_input, n_memblock, 0.01,"wiig1") # Weights from inputs to gates
     wmig = shared_normal(n_memblock, n_memblock, 0.01,"wmig0"),shared_normal(n_memblock, n_memblock, 0.01,"wmig1") # Weights from cells to gates - peepholes
     #big = shared_zeros(n_memblock,"big0"),shared_zeros(n_memblock,"big1")
     big = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"big0"),theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"big1")
     
     wifg = shared_normal(n_input, n_memblock, 0.01,"wifg0"),shared_normal(n_input, n_memblock, 0.01,"wifg1")
     wmfg = shared_normal(n_memblock, n_memblock, 0.01,"wmfg0"),shared_normal(n_memblock, n_memblock, 0.01,"wmfg1")
     #bfg = shared_zeros(n_memblock,"bfg0"),shared_zeros(n_memblock,"bfg1")
     bfg = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"bfg0"),theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"bfg1")
     
     wiog = shared_normal(n_input, n_memblock, 0.01,"wiog0"),shared_normal(n_input, n_memblock, 0.01,"wiog1")
     wmog = shared_normal(n_memblock, n_memblock, 0.01,"wmog0"),shared_normal(n_memblock, n_memblock, 0.01,"wmog1")
     #bog = shared_zeros(n_memblock,"bog0"),shared_zeros(n_memblock,"bog1")
     bog = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"bog0"),theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"bog1")
     
     wim = shared_normal(n_input, n_memblock, 0.01,"wim0"),shared_normal(n_input, n_memblock, 0.01,"wim1") # Weight from input to mem
     #bm = shared_zeros(n_memblock,"bm0"),shared_zeros(n_memblock,"bm1") # Bias from input to mem
     bm = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"bm0"),theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"bm1")
     
     wmo = shared_normal(n_memblock, n_output, 0.01,"wmo0"),shared_normal(n_memblock, n_output, 0.01,"wmo1") # Weight from input to mem
     
     bo = theano.shared(numpy.zeros(n_output, dtype=theano.config.floatX),"bo") # Bias from input to mem
     ''' END OF WEIGHTS '''
     
     self.params = wiig[0], big[0], wifg[0], bfg[0], wiog[0], bog[0], wmig[0], wmfg[0], wmog[0], wim[0], bm[0], wmo[0], wiig[1], big[1], wifg[1], bfg[1], wiog[1], bog[1], wmig[1], wmfg[1], wmog[1], wim[1], bm[1], wmo[1], bo
     
     ''' START DELTAS - 0=forward; 1=backward'''
     dwiig = shared_normal(n_input, n_memblock, 0.01,"dwiig0"),shared_normal(n_input, n_memblock, 0.01,"dwiig1") # Weights from inputs to gates
     dwmig = shared_normal(n_memblock, n_memblock, 0.01,"dwmig0"),shared_normal(n_memblock, n_memblock, 0.01,"dwmig1") # Weights from cells to gates - peepholes
     #dbig = shared_zeros(n_memblock,"big0"),shared_zeros(n_memblock,"dbig1")
     dbig = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"dbig0"),theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"dbig1")
     
     dwifg = shared_normal(n_input, n_memblock, 0.01,"dwifg0"),shared_normal(n_input, n_memblock, 0.01,"dwifg1")
     dwmfg = shared_normal(n_memblock, n_memblock, 0.01,"dwmfg0"),shared_normal(n_memblock, n_memblock, 0.01,"dwmfg1")
     #dbfg = shared_zeros(n_memblock,"bfg0"),shared_zeros(n_memblock,"dbfg1")
     dbfg = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"dbfg0"),theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"dbfg1")
     
     dwiog = shared_normal(n_input, n_memblock, 0.01,"dwiog0"),shared_normal(n_input, n_memblock, 0.01,"dwiog1")
     dwmog = shared_normal(n_memblock, n_memblock, 0.01,"dwmog0"),shared_normal(n_memblock, n_memblock, 0.01,"dwmog1")
     #dbog = shared_zeros(n_memblock,"bog0"),shared_zeros(n_memblock,"dbog1")
     dbog = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"dbog0"),theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"dbog1")
     
     dwim = shared_normal(n_input, n_memblock, 0.01,"dwim0"),shared_normal(n_input, n_memblock, 0.01,"dwim1") # Weight from input to mem
     #dbm = shared_zeros(n_memblock,"bm0"),shared_zeros(n_memblock,"dbm1") # Bias from input to mem
     dbm = theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"dbm0"),theano.shared(numpy.zeros(n_memblock, dtype=theano.config.floatX),"dbm1")
     
     dwmo = shared_normal(n_memblock, n_output, 0.01,"dwmo0"),shared_normal(n_memblock, n_output, 0.01,"dwmo1") # Weight from input to mem
     
     dbo = theano.shared(numpy.zeros(n_output, dtype=theano.config.floatX),"dbo") # Bias from input to mem
     ''' END OF DELTAS '''
     
     self.deltas = dwiig[0], dbig[0], dwifg[0], dbfg[0], dwiog[0], dbog[0], dwmig[0], dwmfg[0], dwmog[0], dwim[0], dbm[0], dwmo[0], dwiig[1], dbig[1], dwifg[1], dbfg[1], dwiog[1], dbog[1], dwmig[1], dwmfg[1], dwmog[1], dwim[1], dbm[1], dwmo[1], dbo
     
     
     init_mem = shared_zeros(n_memblock)
     
     # EXPRESSIONS - Forward
     def recurrence(input, pmem, i):
         i = i.value
         ingate = sig(T.dot(input, wiig[i]) + T.dot(pmem, wmig[i]) + big[i])
         forgate = sig(T.dot(input, wifg[i]) + T.dot(pmem, wmfg[i]) + bfg[i])
         mem = forgate * pmem + ingate * T.tanh(T.dot(input, wim[i]) + bm[i]) # Use sig or tan???
         outgate = sig(T.dot(input, wiog[i]) + T.dot(mem, wmog[i]) + bog[i])
         layerout = T.dot(outgate * mem, wmo[i])
         #output = sig(T.dot(outgate * mem, wmo) + bo)
         return mem, layerout
     
     #Forward Pass
     (mem_sequencef, output_sequencef), updf = theano.scan(fn=recurrence,
                                                        sequences = input_sequence,
                                                        non_sequences = 0,
                                                        outputs_info = [init_mem, None])
     (mem_sequenceb, output_sequenceb), updb = theano.scan(fn=recurrence,
                                                        sequences = input_sequence,
                                                        non_sequences = 1,
                                                        outputs_info = [init_mem, None],
                                                        go_backwards=True)
     output_sequenceb = output_sequenceb[::-1]
     output_sequence, train_updates = theano.scan(fn=lambda x, y: sig(x + y + bo),
                                                   sequences = [output_sequencef, output_sequenceb],
                                                   outputs_info=[None])
     train_updates.update(updf)
     train_updates.update(updb)
     # output_sequence become a batch of output vectors
     
     # Loss Function
     outloss = T.nnet.binary_crossentropy(output_sequence, gold_sequence).mean() # TODO: check if the dimensions match here
     # consider using multi-category? because binary allows multiple 1's in the vector
 
     # Backward Pass
     gradient = T.grad(outloss, self.params, consider_constant=[input_sequence, gold_sequence])
     
     train_updates.update(((p, p + m * d - lr * g) for p, g, d in zip(self.params, gradient, self.deltas)))
     train_updates.update(((d, m * d - lr * g) for p, g, d in zip(self.params, gradient, self.deltas)))
     
     target = T.iround(gold_sequence)
     output = T.iround(output_sequence)
     tp = T.sum(T.and_(target,output))
     p = tp/(T.sum(target))
     r = tp/(T.sum(output))
     f = ( 2 * p * r )/(p+r)
     
     ct = T.sum(target)
     co = T.sum(output)
 
     #self.train_function = theano.function([input_sequence,gold_sequence], [output_sequence], updates=train_updates)
     self.train_function = theano.function([input_sequence,gold_sequence], [], updates=train_updates)
     #self.validate_function = theano.function([input_sequence,gold_sequence], [outloss,output_sequence])
     self.test_function = theano.function([input_sequence,gold_sequence], [outloss, ct, co, tp])
     self.generate_function = theano.function([input_sequence], output)
    def train(self, samples, labels, valid_samples=[], valid_labels=None):
        # Check parameters.
        for i in range(labels.size):
            assert labels[i] in range(self.nb_classes)

        nb_samples = len(samples) if self.nb_samples == None else self.nb_samples
        # Set and compile the theano gradient descent update function.
        # Tensor for latent vectors.
        lat = theano.shared(
            np.empty([self.nb_classes, nb_samples, self.nb_features],
                     dtype=theano.config.floatX),
            name='lat'
        )
        # Tensor for latent constants.
        lat_cst = theano.shared(
            np.empty([self.nb_classes, nb_samples],
                     dtype=theano.config.floatX),
            name='lat_cst'
        )
        # Cost function.
        regularization = (
            0.5 * T.dot(T.flatten(self.beta), T.flatten(self.beta))
        )
        scores = T.batched_dot(lat, self.beta[1:,:].T).T + lat_cst.T + self.beta[0,:]
        losses = T.log(T.nnet.softmax(scores)[T.arange(nb_samples), labels])
        cost_sym = (
            regularization - self.C * T.sum(losses)
        )

        # Optimization of the cost with RPROP.
        weights_shape = self.beta.get_value().shape
        nb_weights = np.prod(weights_shape)
        # Keep step for each weight into a shared theano vector.
        steps = theano.shared(
            np.array(
                [self.learning_rate] * nb_weights,
                theano.config.floatX
            ).reshape(weights_shape),
            name='steps'
        )
        # Gradient of the cost function.
        grad = T.grad(cost_sym, self.beta)
        grad_f = theano.function(
            [],
            grad
        )
        # Perform the first iteration "manually", to initialize prev_grad properly.
        lat_val, cst_val = self.latent_function(
            self.beta.get_value()[1:,:],
            samples,
            labels,
            self.latent_args
        )
        assert lat_val.shape == (self.nb_classes, nb_samples, self.nb_features)
        assert cst_val.shape == (self.nb_classes, nb_samples)
        lat.set_value(lat_val)
        lat_cst.set_value(cst_val)
        init_grad = grad_f()
        self.beta.set_value(self.beta.get_value() - steps.get_value() 
                            * np.sign(init_grad))
        # Keep the gradient from the previous iteration into another shared theano
        # vector.
        prev_grad = theano.shared(
            init_grad,
            name='g(t-1)'
        )
        # Update rules for RPROP, scaling weight-wise step up when gradient signs 
        # agree, down when they disagree.
        # Vector containing 0 if the sign of the gradients disagree, 1 otherwise.
        sign_idx = T.iround((T.sgn(prev_grad * grad) + 1.)/2.).flatten()
        # Using the previously defined indices, we index into a matrix to get the 
        # new step vector.
        new_steps = T.stack(
            self.dec_rate * steps.flatten(), self.inc_rate * steps.flatten()
        )[sign_idx, T.arange(nb_weights)].reshape(weights_shape)
        # Specifies the updates at each iteration. We update the steps, the 
        # gradient from the previous iteration, and the actual descent.
        updates = [
            (steps, T.clip(new_steps, 10E-5, 0.1)),
            (prev_grad, grad),
            (self.beta, self.beta - steps * T.sgn(grad))
        ]
        # Full theano rprop function. Outputs the cost and gradient norm at the 
        # current point, and updates all the variables accordingly.
        rprop_descent = theano.function(
            [],
            [cost_sym, grad.norm(2)],
            updates=updates
        )
        eps = 10E-3
        
        for t_gd in range(self.nb_gd_iter):
            # Compute the best negative latent vectors.
            lat_val, cst_val = self.latent_function(
                self.beta.get_value()[1:,:],
                samples,
                labels,
                self.latent_args
            )
            lat.set_value(lat_val)
            lat_cst.set_value(cst_val)
            cost_val, grad_norm = rprop_descent()
            if self.verbose:
                print "Epoch " + repr(t_gd + 1)
                print "Cost: " + repr(cost_val)
                print "Gradient norm: " + repr(grad_norm)
                print "Mean step size: " + repr(steps.get_value().mean())
            if grad_norm <= eps:
                break
        self.intercept_ = self.beta.get_value()[0,:]
        self.coef_ = self.beta.get_value()[1:,:]
        # Trick for Theano to free GPU memory, hopefully.
        lat.set_value([[[]]])
        lat_cst.set_value([[]])