def _model(self, layer_inp, inp_corruption_type=None, inp_corruption_level=0, hid_corruption_type=None, hid_corruption_level=0, L1_hiddens=0, L2_weights=0): # Make model act = self.act inp = corrupt(layer_inp, inp_corruption_type, inp_corruption_level) hid = act(T.dot(inp, self.params.w_enc) + self.params.b) hid = corrupt(hid, hid_corruption_type, hid_corruption_level) out = T.dot(hid, self.params.w_dec) # Make cost function cost = T.mean((0.5 * (out - layer_inp)**2).sum(axis=1)) # Add L1 hiddens cost if L1_hiddens > 0: cost += L1_hiddens * abs(hid).sum(1).mean() # Add L2 weight cost if L2_weights > 0: cost += L2_weights * ((self.params.w_enc**2.).sum() + (self.params.w_dec**2.).sum()) return hid, cost
def _model(self, layer_inp, inp_corruption_type=None, inp_corruption_level=0, hid_corruption_type = None, hid_corruption_level = 0, cost_dropout_level = 0, L1_hiddens = 0, L2_weights = 0): # For conciseness act = self.act # DC centering inp = layer_inp - self.params.dc # Contrast normalization inp /= self.params.std # Corrupt input enc = corrupt(inp, inp_corruption_type, inp_corruption_level) # Apply PCA enc = T.dot(enc, self.params.pca) # Encode enc = act(T.dot(enc, self.params.w_enc) + self.params.b) # Corrupt encoder output enc = corrupt(enc, hid_corruption_type, hid_corruption_level) # Decode dec = T.dot(enc, self.params.w_dec) # Reverse PCA dec = T.dot(dec, self.params.pca.T) # Make cost function cost = corrupt(0.5*(dec - inp)**2, 'zeromask', cost_dropout_level) cost = T.mean(cost.sum(axis = 1)) # Add L1 hiddens cost if L1_hiddens > 0: cost += L1_hiddens * T.sum(abs(enc)) # Add L2 weight cost if L2_weights > 0: cost += L2_weights * ((self.params.w_enc**2.).sum() + (self.params.w_dec**2.).sum()) # Add orthogonality cost # dot = T.dot(self.params.w_enc.T, self.params.w_enc) # cost += 0.000000000000000001 * T.sum(abs(dot - T.zeros_like(dot))) return enc, cost
def enc(self, x, corruption_type=None, corruption_level=0): # For readability W = self.params().encoder_weights b = self.params().selection_biases f = self.params().selection_factor inp_act = self.params.hp.inp_act #sel_act = self.params.hp.sel_act variant = self.params.hp.variant # Corrupt input x = corrupt(x, corruption_type, corruption_level) # Scale if using biased noise x = x / T.cast( 1 - corruption_level, th.config.floatX ) if corruption_type == 'zeromask' and corruption_level > 0 else x # Compute input activation x = inp_act(batchdot(x, W)) if variant == 'sigmoid_product': x = sigmoid(x.prod(0) + b) * x elif variant == 'tanh_product': x = tanh(x.prod(0) * f) * x elif variant == 'step_product': x = step(x.prod(0) - 0.01) * x elif variant == 'step_sum': x = step(x.sum(0) - 1) * x # Encode return x
def __init__(self, params, model_inp, layer_inp, corruption_type=None, corruption_level=0): # Parameters self.params = params self.model_inp = model_inp self.layer_inp = layer_inp self.corruption_type = corruption_type self.corruption_level = corruption_level # corrupt input corr_inp = corrupt(layer_inp, corruption_type, corruption_level) out = [] self.nb_channels = 0 if params.mean_filter_size!= 0: out += [params.m_act(conv(corr_inp, params.mean_filters) + params.mean_b.dimshuffle('x', 0, 'x', 'x'))] self.nb_channels += params.mean_filter_size[0] if params.cov_filter_size != 0: f = conv(corr_inp, params.cov_filters)**2 out += [params.c_act(conv(f, params.cov_mapping) + params.cov_b.dimshuffle('x', 0, 'x', 'x'))] self.nb_channels += params.map_filter_size[0] self.out = T.concatenate(out, axis=1)
def enc(self, x, corruption_type=None, corruption_level=0): # For readability W = self.params().encoder_weights # Corrupt input x = corrupt(x, corruption_type, corruption_level) # Encode, max and return return batchdot(x, W)
def dec(self, h, corruption_type=None, corruption_level=0): # For readability W = self.params().encoder_weights # Corrupt input h = corrupt(h, corruption_type, corruption_level) # Maxout and decode return batchdot(h * eq(h, h.max(0, keepdims=True)), W.dimshuffle(0, 2, 1)).sum(0)
def __init__(self, inp, labels, nb_in, nb_out, dropout_level=0): """ numeric regression layer """ ########################################################################################### # Storage self.nb_in = nb_in self.nb_out = nb_out self.dropout_level = dropout_level ########################################################################################### # Learn model # Weight initialization self.B = th.shared(np.zeros(nb_out, dtype=th.config.floatX), borrow=False, name='Numeric regression biases') self.W = th.shared(np.random.uniform(low=-1. / np.sqrt(nb_in), high=1. / np.sqrt(nb_in), size=(nb_in, nb_out)).astype( th.config.floatX), borrow=False, name='Numeric regression weights') # Corrupted model noisy_inp = corrupt(inp, 'zeromask' if dropout_level > 0 else None, dropout_level) self.noisy_pred = T.dot(noisy_inp, self.W) + self.B self.noisy_cost = T.sum( (0.5 * (self.noisy_pred - labels)**2).sum(axis=1)) # Clean model clean_inp = inp self.clean_pred = (T.dot(clean_inp, self.W) + self.B) / T.cast( 1 - dropout_level, th.config.floatX) self.clean_cost = T.mean( (0.5 * (self.clean_pred - labels)**2).sum(axis=1)) # Prediction model self.pred = self.clean_pred # Prediction error (for compatibility with supervised learning algorithm) self.error = T.mean(abs(self.clean_pred - labels)) ########################################################################################### # For for interactions with other models self.inp = inp self.labels = labels self.out = self.clean_pred self.params = [self.W, self.B]
def dec(self, h, corruption_type=None, corruption_level=0): # For readability W = self.params().encoder_weights S = self.params.encoder_selection # Corrupt input h = corrupt(h, corruption_type, corruption_level) # Scale if using biased noise h = h / T.cast(1-corruption_level, th.config.floatX) if corruption_type == 'zeromask' and corruption_level > 0 else h # Decode return T.dot(T.dot(h, S.T), W.T)
def _model(self, inp, inp_corruption_type=None, inp_corruption_level=0, hid_corruption_type=None, hid_corruption_level=0, L1_hiddens=0, L2_weights=0): # Encoder #enc_input_sz = self.input_sz #enc_filter_sz = self.filter_sz corr_inp = corrupt(inp, inp_corruption_type, inp_corruption_level) hid = self.act( conv(corr_inp, self.params.w_enc, border_mode='valid') + self.params.b_enc.dimshuffle('x', 0, 'x', 'x')) # Decoder #dec_input_sz = (enc_input_sz[0], enc_filter_sz[0], enc_input_sz[2]-enc_filter_sz[2]+1, enc_input_sz[3]-enc_filter_sz[3]+1) #dec_filter_sz = (int(np.prod(enc_input_sz[1:])), enc_filter_sz[0], 1, 1) corr_hid = corrupt(hid, hid_corruption_type, hid_corruption_level) out = conv(corr_hid, self.params.w_dec, border_mode='valid') # Make cost function cost = T.mean((0.5 * (out.flatten(2) - inp.flatten(2))**2).sum(axis=1)) # Add L1 hiddens cost if L1_hiddens > 0: cost += L1_hiddens * abs(hid).sum(1).mean() # Add L2 weight cost if L2_weights > 0: cost += L2_weights * ((self.params.w_enc**2.).sum() + (self.params.w_dec**2.).sum()) return hid, cost
def dec(self, h, corruption_type=None, corruption_level=0): # For readability W = self.params().encoder_weights # Corrupt input h = corrupt(h, corruption_type, corruption_level) # Scale if using biased noise h = h / T.cast( 1 - corruption_level, th.config.floatX ) if corruption_type == 'zeromask' and corruption_level > 0 else h # Decode return batchdot(h, W.dimshuffle(0, 2, 1))
def dec(self, x, corruption_type=None, corruption_level=0): # For readability D = self.params().decoder_weights # Corrupt input x = corrupt(x, corruption_type, corruption_level) # Scale if using biased noise x = x / T.cast( 1 - corruption_level, th.config.floatX ) if corruption_type == 'zeromask' and corruption_level > 0 else x # Multiply and decode return T.dot(x, D)
def enc(self, x, corruption_type=None, corruption_level=0): # For readability E = self.params().encoder_weights b = self.params().encoder_biases inp_act = self.params.hp.inp_act # Corrupt input x = corrupt(x, corruption_type, corruption_level) # Scale if using biased noise x = x / T.cast( 1 - corruption_level, th.config.floatX ) if corruption_type == 'zeromask' and corruption_level > 0 else x # Encode, max and return return inp_act(T.dot(x, E) + b) - b
def __init__(self, params, act, model_inp, layer_inp, corruption_type=None, corruption_level=0): # Parameters self.params = params self.model_inp = model_inp self.layer_inp = layer_inp self.corruption_type = corruption_type self.corruption_level = corruption_level # Model corr_inp = corrupt(layer_inp, corruption_type, corruption_level) self.out = act( conv(corr_inp, params.W, border_mode='valid') + params.B.dimshuffle('x', 0, 'x', 'x'))
def enc(self, x, corruption_type=None, corruption_level=0): # For readability W = self.params().encoder_weights S = self.params.encoder_selection threshold = T.cast(self.params.hp.threshold, x.dtype) # Corrupt input x = corrupt(x, corruption_type, corruption_level) # Scale if using biased noise x = x / T.cast(1-corruption_level, th.config.floatX) if corruption_type == 'zeromask' and corruption_level > 0 else x # Filters f = relu(T.dot(x, W)) # Selection function s = step(T.exp(T.dot(T.log(f), S))-threshold) # Encode return s * T.dot(f, S)
def _update_model(self): # Corrupt input corr_inp = corrupt(self.inp, 'zeromask', self.dropout_level) # Apply convolution out = conv(corr_inp, self.params.W, border_mode='valid') + self.params.B.dimshuffle( 'x', 0, 'x', 'x') # Remember convolution output shape out_shape = out.shape #out_shape = thprint("out.shape = ")(out.shape) # Reshape to softmax format (2D) out = out.dimshuffle(0, 2, 3, 1).reshape( (out.size // self.nb_classes, self.nb_classes)) # Compute class probability self.prob = softmax(out) # Class prediction self.pred = T.argmax(self.prob, axis=1).reshape( (out_shape[0], out_shape[2], out_shape[3], out_shape[1] // self.nb_classes)).dimshuffle(0, 3, 1, 2) # Compute softmax cost self.cost = -T.mean( T.log( self.prob[T.arange(self.prob.shape[0]), self.conv_labels.dimshuffle(0, 2, 3, 1).flatten()])) # Reshape class prob to convolutional format self.prob = self.prob.reshape( (out_shape[0], out_shape[2], out_shape[3], out_shape[1])).dimshuffle(0, 3, 1, 2) # Prediction error self.error = T.mean(T.neq(self.pred, self.conv_labels))
def __init__(self, model_inp, layer_inp, act, inp_patch_sz, nb_decoders, nb_inp, nb_hid, corruption_type, corruption_level): ########################################################################################### # Model self.act = act self.nb_inp = nb_inp self.nb_hid = nb_hid # Encoder self.W_enc = th.shared(uniform(low=-1. / np.sqrt(nb_inp), high=1. / np.sqrt(nb_inp), size=(nb_inp, nb_hid)).astype(th.config.floatX), borrow=True, name='Encoder weights') self.b_enc = th.shared(np.zeros(nb_hid, dtype=th.config.floatX), borrow=True, name='Encoder biases') corr_inp = corrupt(layer_inp, corruption_type, corruption_level) noisy_hiddens = act(T.dot(corr_inp, self.W_enc) + self.b_enc) clean_hiddens = act(T.dot(layer_inp, self.W) + self.b_enc) # Using multiple decoders self.W_dec = [] self.b_dec = [] self.noisy_cost = [] self.clean_cost = [] for dec_ind in range(nb_decoders): W_dec = th.shared(uniform(low=-1. / nb_hid, high=1. / nb_hid, size=(nb_hid, nb_inp)).astype(th.config.floatX), borrow=True, name='Decoder {} weights'.format(dec_ind)) b_dec = th.shared(np.zeros(nb_inp, dtype=th.config.floatX), borrow=True, name='Decoder {} biases'.format(dec_ind)) self.W_dec += [W_dec] self.b_dec += [b_dec] noisy_recons = T.dot(noisy_hiddens, W_dec) + b_dec self.noisy_cost += T.mean( (0.5 * (noisy_recons - layer_inp)**2).sum(axis=1)) clean_recons = T.dot(clean_hiddens, W_dec) + b_dec self.clean_cost += T.mean( (0.5 * (clean_recons - layer_inp)**2).sum(axis=1)) # Using multiple masks ########################################################################################### # For for interactions with other models self.inp = layer_inp self.out = clean_hiddens self.params = [self.W, self.b_enc] + self.W_dec + self.b_dec self.encoder_params = [self.W, self.b_enc] ########################################################################################### # Trainer object self.trainer = sgd(model_inp, self.noisy_cost, self.clean_cost, self.params)