def copy_coherence_function(self, input_a=None, input_b=None, arg_idx_input=None, salience_input=None): """ Build a new coherence function, copying all weights and such from this network, replacing components given as kwargs. Note that this uses the same shared variables and any other non-replaced components as the network's original expression graph: bear in mind if you use it to update weights or combine with other graphs. """ input_a = input_a or self.input_a input_b = input_b or self.input_b arg_idx_input = arg_idx_input or self.arg_idx_input salience_input = salience_input or self.salience_input # Build a new coherence function, combining these two projections input_vector = T.concatenate([ input_a, input_b, arg_idx_input.dimshuffle(0, 'x'), salience_input ], axis=input_a.ndim - 1) # Initialize each layer as an autoencoder. # We'll then set its weights and never use it as an autoencoder layers = [] layer_outputs = [] input_size = \ self.event_vector_network.layer_sizes[-1] * 2 + \ 1 + self.num_salience_features layer_input = input_vector for layer_size in self.layer_sizes: layers.append( DenoisingAutoencoder(x=layer_input, n_visible=input_size, n_hidden=layer_size, non_linearity='tanh')) input_size = layer_size layer_input = layers[-1].hidden_layer layer_outputs.append(layer_input) final_projection = layer_input # Set the weights of all layers to the ones trained in the base network for layer, layer_weights in zip(layers, self.get_weights()): layer.set_weights(layer_weights) # Add a final layer # This is simply a logistic regression layer to predict # a coherence score for the input pair activation = \ T.dot(final_projection, self.prediction_weights) + \ self.prediction_bias # Remove the last dimension, which should now just be of size 1 activation = activation.reshape(activation.shape[:-1], ndim=activation.ndim - 1) prediction = T.nnet.sigmoid(activation) return prediction, input_vector, layers, layer_outputs, activation
def build_projection_layer(pred_input, subj_input, obj_input, pobj_input, vectors, empty_subj_vector, empty_obj_vector, empty_pobj_vector, input_size, layer_sizes): # Rearrange these so we can test for -1 indices # In the standard case, this does dimshuffle((0, "x")), which changes # a 1D vector into a column vector shuffled_dims = tuple(list(range(subj_input.ndim)) + ['x']) subj_input_col = subj_input.dimshuffle(shuffled_dims) obj_input_col = obj_input.dimshuffle(shuffled_dims) pobj_input_col = pobj_input.dimshuffle(shuffled_dims) # Make the input to the first autoencoder by selecting the appropriate # vectors from the given matrices input_vector = T.concatenate([ vectors[pred_input], T.switch(T.neq(subj_input_col, -1), vectors[subj_input], empty_subj_vector), T.switch(T.neq(obj_input_col, -1), vectors[obj_input], empty_obj_vector), T.switch(T.neq(pobj_input_col, -1), vectors[pobj_input], empty_pobj_vector), ], axis=pred_input.ndim) # Build and initialize each layer of the autoencoder previous_output = input_vector layers = [] layer_outputs = [] for layer_size in layer_sizes: layers.append( DenoisingAutoencoder( x=previous_output, n_hidden=layer_size, n_visible=input_size, non_linearity='tanh', )) input_size = layer_size previous_output = layers[-1].hidden_layer layer_outputs.append(previous_output) projection_layer = previous_output return input_vector, layers, layer_outputs, projection_layer
# ============================================================================= if arg == 'ncomp': feed_list = [2**x for x in range(2, 12)] for i in feed_list: print "\n Evaluating for ncomp=" + str(i) t = 'hlayer=' + str(i) dae = DAE(model_name='hidden_layers', pickle_name=arg, test_name=t, n_components=i, main_dir='hidden_layers/', enc_act_func='sigmoid', dec_act_func='sigmoid', loss_func='mean_squared', num_epochs=31, batch_size=12, dataset='cifar10', xavier_init=1, opt='adam', learning_rate=0.001, momentum=0.5, corr_type='gaussian', corr_frac=0.6, verbose=1, seed=-1) dae.fit(trX, val_dict, teX, restore_previous_model=False) dae.reset() # ============================================================================= # Testing learning rates # =============================================================================
def __init__(self, event_vector_network, layer_sizes, use_salience=True, salience_features=None): self.event_vector_network = event_vector_network self.layer_sizes = layer_sizes self.input_a, self.input_b = \ self.event_vector_network.get_projection_pair() self.arg_idx_input = T.vector('arg_type') self.neg_arg_idx_input = T.vector('neg_arg_type') self.input_vector = T.concatenate( (self.input_a, self.input_b, self.arg_idx_input.dimshuffle(0, 'x')), axis=1) self.use_salience = use_salience if self.use_salience: self.salience_input = T.matrix('salience') # variables for negative entity salience self.neg_salience_input = T.matrix('neg_salience') self.input_vector = T.concatenate( (self.input_vector, self.salience_input), axis=1) # Initialize each layer as an autoencoder, # allowing us to initialize it by pretraining self.layers = [] self.layer_outputs = [] input_size = \ self.event_vector_network.layer_sizes[-1] * 2 + 1 self.num_salience_features = 0 self.salience_features = [] if self.use_salience: assert salience_features is not None self.salience_features = salience_features self.num_salience_features = len(self.salience_features) input_size += self.num_salience_features layer_input = self.input_vector for layer_size in layer_sizes: self.layers.append( DenoisingAutoencoder(input=layer_input, n_visible=input_size, n_hidden=layer_size, non_linearity="tanh")) input_size = layer_size layer_input = self.layers[-1].hidden_layer self.layer_outputs.append(layer_input) self.final_projection = layer_input # Add a final layer, which will only ever be trained with # a supervised objective # This is simply a logistic regression layer to predict # a coherence score for the input pair self.prediction_weights = theano.shared( # Just initialize to zeros, so we start off predicting 0.5 # for every input numpy.asarray(numpy.random.uniform( low=2. * -numpy.sqrt(6. / (layer_sizes[-1] + 1)), high=2. * numpy.sqrt(6. / (layer_sizes[-1] + 1)), size=(layer_sizes[-1], 1), ), dtype=theano.config.floatX), name="prediction_w", borrow=True) self.prediction_bias = theano.shared(value=numpy.zeros( 1, dtype=theano.config.floatX), name="prediction_b", borrow=True) self.prediction = T.nnet.sigmoid( T.dot(self.final_projection, self.prediction_weights) + self.prediction_bias) self.pair_inputs = [ self.event_vector_network.pred_input_a, self.event_vector_network.subj_input_a, self.event_vector_network.obj_input_a, self.event_vector_network.pobj_input_a, self.event_vector_network.pred_input_b, self.event_vector_network.subj_input_b, self.event_vector_network.obj_input_b, self.event_vector_network.pobj_input_b, self.arg_idx_input ] if self.use_salience: self.pair_inputs.append(self.salience_input) self.triple_inputs = [ self.event_vector_network.pred_input_a, self.event_vector_network.subj_input_a, self.event_vector_network.obj_input_a, self.event_vector_network.pobj_input_a, self.event_vector_network.pred_input_b, self.event_vector_network.subj_input_b, self.event_vector_network.obj_input_b, self.event_vector_network.pobj_input_b, self.event_vector_network.pred_input_c, self.event_vector_network.subj_input_c, self.event_vector_network.obj_input_c, self.event_vector_network.pobj_input_c, self.arg_idx_input, self.neg_arg_idx_input ] if self.use_salience: self.triple_inputs.append(self.salience_input) self.triple_inputs.append(self.neg_salience_input) self._coherence_fn = None
def __init__(self, arg_comp_model, layer_sizes): self.arg_comp_model = arg_comp_model self.input_a, self.input_b = self.arg_comp_model.get_projection_pair() self.input_arg_type = T.vector("arg_type", dtype="int32") self.input_vector = T.concatenate( (self.input_a, self.input_b, self.input_arg_type.dimshuffle( 0, 'x')), axis=1) self.layer_sizes = layer_sizes # Initialize each layer as an autoencoder, # allowing us to initialize it by pretraining self.layers = [] self.layer_outputs = [] input_size = self.arg_comp_model.layer_sizes[-1] * 2 + 1 layer_input = self.input_vector for layer_size in layer_sizes: self.layers.append( DenoisingAutoencoder(input=layer_input, n_visible=input_size, n_hidden=layer_size, non_linearity="tanh")) input_size = layer_size layer_input = self.layers[-1].hidden_layer self.layer_outputs.append(layer_input) self.final_projection = layer_input # Add a final layer, which will only ever be trained with # a supervised objective # This is simply a logistic regression layer to predict # a coherence score for the input pair self.prediction_weights = theano.shared( # Just initialize to zeros, so we start off predicting 0.5 # for every input numpy.asarray(numpy.random.uniform( low=2. * -numpy.sqrt(6. / (layer_sizes[-1] + 1)), high=2. * numpy.sqrt(6. / (layer_sizes[-1] + 1)), size=(layer_sizes[-1], 1), ), dtype=theano.config.floatX), name="prediction_w", borrow=True) self.prediction_bias = theano.shared(value=numpy.zeros( 1, dtype=theano.config.floatX), name="prediction_b", borrow=True) self.prediction = T.nnet.sigmoid( T.dot(self.final_projection, self.prediction_weights) + self.prediction_bias) self.pair_inputs = [ self.arg_comp_model.pred_input_a, self.arg_comp_model.subj_input_a, self.arg_comp_model.obj_input_a, self.arg_comp_model.pobj_input_a, self.arg_comp_model.pred_input_b, self.arg_comp_model.subj_input_b, self.arg_comp_model.obj_input_b, self.arg_comp_model.pobj_input_b, self.input_arg_type ] self.triple_inputs = [ self.arg_comp_model.pred_input_a, self.arg_comp_model.subj_input_a, self.arg_comp_model.obj_input_a, self.arg_comp_model.pobj_input_a, self.arg_comp_model.pred_input_b, self.arg_comp_model.subj_input_b, self.arg_comp_model.obj_input_b, self.arg_comp_model.pobj_input_b, self.arg_comp_model.pred_input_c, self.arg_comp_model.subj_input_c, self.arg_comp_model.obj_input_c, self.arg_comp_model.pobj_input_c, self.input_arg_type ] self._coherence_fn = None
[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0]] ) rng = np.random.RandomState(123) # construct dA da = DenoisingAutoencoder(input=data, n_visible=20, n_hidden=5, np_rng=rng) # train for epoch in range(training_epochs): da.train(lr=learning_rate, corruption_level=corruption_level) # cost = da.negative_log_likelihood(corruption_level=corruption_level) # print >> sys.stderr, 'Training epoch %d, cost is ' % epoch, cost # learning_rate *= 0.95 # test x = np.array([[1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0]]) print(da.get_hidden_values(x).shape) print(x.shape) x = da.reconstruct(x)
import sys import os sys.path.insert(0, '../utils/') import numpy as np import getdata from LeNet5 import LeNet from autoencoder import DenoisingAutoencoder as DAE if sys.argv[1] =='dae': dae = DAE(model_name='dae_svm', pickle_name='svm', test_name='svm', n_components=256, main_dir='dae/', enc_act_func='sigmoid', dec_act_func='none', loss_func='mean_squared', num_epochs=50, batch_size=20, dataset='cifar10', xavier_init=1, opt='adam', learning_rate=0.0001, momentum=0.5, corr_type='gaussian', corr_frac=0.5, verbose=1, seed=1) trX, trY, teX, teY = getdata.load_cifar10_dataset('../cifar-10-batches-py/', mode='supervised') val_dict = {} dae.fit(trX, val_dict, teX, restore_previous_model=True) #dae.load_model(256, 'models/dae/dae_svm') dae_svm_train = dae.transform(trX, name='dae_svm_train_na', save=True) dae_svm_test = dae.transform(teX, name='dae_svm_test_na', save=True) elif sys.argv[1]=='cnn': cifar_train = getdata.get_train() cifar_test = getdata.get_test()