def build_model(input_width, input_height, output_dim, batch_size=BATCH_SIZE, dimshuffle=True): l_in = lasagne.layers.InputLayer( shape=(batch_size, 1, input_width, input_height), ) if not dimshuffle: l_in = cuda_convnet.bc01_to_c01b(l_in) l_conv1 = cuda_convnet.Conv2DCCLayer( l_in, num_filters=32, filter_size=(5, 5), nonlinearity=lasagne.nonlinearities.rectify, dimshuffle=dimshuffle, ) l_pool1 = cuda_convnet.MaxPool2DCCLayer( l_conv1, ds=(2, 2), dimshuffle=dimshuffle, ) l_conv2 = cuda_convnet.Conv2DCCLayer( l_pool1, num_filters=32, filter_size=(5, 5), nonlinearity=lasagne.nonlinearities.rectify, dimshuffle=dimshuffle, ) l_pool2 = cuda_convnet.MaxPool2DCCLayer( l_conv2, ds=(2, 2), dimshuffle=dimshuffle, ) if not dimshuffle: l_pool2 = cuda_convnet.c01b_to_bc01(l_pool2) l_hidden1 = lasagne.layers.DenseLayer( l_pool2, num_units=256, nonlinearity=lasagne.nonlinearities.rectify, ) l_hidden1_dropout = lasagne.layers.DropoutLayer(l_hidden1, p=0.5) # l_hidden2 = lasagne.layers.DenseLayer( # l_hidden1_dropout, # num_units=256, # nonlinearity=lasagne.nonlinearities.rectify, # ) # l_hidden2_dropout = lasagne.layers.DropoutLayer(l_hidden2, p=0.5) l_out = lasagne.layers.DenseLayer( l_hidden1_dropout, num_units=output_dim, nonlinearity=lasagne.nonlinearities.softmax, ) return l_out
def build_network(self, input_width, input_height, output_dim, num_frames, batch_size): l_in = lasagne.layers.InputLayer( shape=(batch_size, num_frames, input_width, input_height) ) l_in = cuda_convnet.bc01_to_c01b(l_in) l_conv1 = cuda_convnet.Conv2DCCLayer( l_in, num_filters=32, filter_size=(8,8), stride=(4,4), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Uniform(0.01), b=lasagne.init.Constant(0.1), dimshuffle=False ) l_conv2 = cuda_convnet.Conv2DCCLayer( l_conv1, num_filters=64, filter_size=(4,4), stride=(2,2), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Uniform(0.01), b=lasagne.init.Constant(0.1), dimshuffle=False ) l_conv3 = cuda_convnet.Conv2DCCLayer( l_conv2, num_filters=64, filter_size=(3,3), stride=(1,1), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Uniform(0.01), b=lasagne.init.Constant(0.1), dimshuffle=False ) l_conv3 = cuda_convnet.c01b_to_bc01(l_conv3) l_hidden1 = lasagne.layers.DenseLayer( l_conv3, num_units=512, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Uniform(0.01), b=lasagne.init.Constant(0.1) ) l_out = lasagne.layers.DenseLayer( l_hidden1, num_units=output_dim, nonlinearity=None, W=lasagne.init.Uniform(0.01), b=lasagne.init.Constant(0.1) ) return l_out
def build_model(input_width, input_height, output_dim, batch_size=BATCH_SIZE): l_in = lasagne.layers.InputLayer( shape=(batch_size, 1, input_width, input_height), ) l_in_c01b = bc01_to_c01b(l_in) l_conv1 = layers.NervanaConvLayer( l_in_c01b, num_filters=32, filter_size=(5, 5), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform(), dimshuffle=False, ) l_pool1 = MaxPool2DCCLayer(l_conv1, ds=(2, 2), dimshuffle=False) l_conv2 = layers.NervanaConvLayer( l_pool1, num_filters=32, filter_size=(5, 5), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform(), dimshuffle=False, ) l_pool2 = MaxPool2DCCLayer(l_conv2, ds=(2, 2), dimshuffle=False) l_pool2_bc01 = c01b_to_bc01(l_pool2) l_hidden1 = lasagne.layers.DenseLayer( l_pool2_bc01, num_units=256, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform(), ) l_hidden1_dropout = lasagne.layers.DropoutLayer(l_hidden1, p=0.5) # l_hidden2 = lasagne.layers.DenseLayer( # l_hidden1_dropout, # num_units=256, # nonlinearity=lasagne.nonlinearities.rectify, # ) # l_hidden2_dropout = lasagne.layers.DropoutLayer(l_hidden2, p=0.5) l_out = lasagne.layers.DenseLayer( l_hidden1_dropout, num_units=output_dim, nonlinearity=lasagne.nonlinearities.softmax, W=lasagne.init.GlorotUniform(), ) return l_out
def build_model(input_width, input_height, output_dim, batch_size=BATCH_SIZE): l_in = lasagne.layers.InputLayer(shape=(batch_size, 1, input_width, input_height), ) l_in_c01b = bc01_to_c01b(l_in) l_conv1 = layers.NervanaConvLayer( l_in_c01b, num_filters=32, filter_size=(5, 5), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform(), dimshuffle=False, ) l_pool1 = MaxPool2DCCLayer(l_conv1, ds=(2, 2), dimshuffle=False) l_conv2 = layers.NervanaConvLayer( l_pool1, num_filters=32, filter_size=(5, 5), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform(), dimshuffle=False, ) l_pool2 = MaxPool2DCCLayer(l_conv2, ds=(2, 2), dimshuffle=False) l_pool2_bc01 = c01b_to_bc01(l_pool2) l_hidden1 = lasagne.layers.DenseLayer( l_pool2_bc01, num_units=256, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotUniform(), ) l_hidden1_dropout = lasagne.layers.DropoutLayer(l_hidden1, p=0.5) # l_hidden2 = lasagne.layers.DenseLayer( # l_hidden1_dropout, # num_units=256, # nonlinearity=lasagne.nonlinearities.rectify, # ) # l_hidden2_dropout = lasagne.layers.DropoutLayer(l_hidden2, p=0.5) l_out = lasagne.layers.DenseLayer( l_hidden1_dropout, num_units=output_dim, nonlinearity=lasagne.nonlinearities.softmax, W=lasagne.init.GlorotUniform(), ) return l_out
def build_network(self, input_width, input_height, output_dim, num_frames, batch_size): l_in = lasagne.layers.InputLayer(shape=(batch_size, num_frames, input_width, input_height)) l_in = cuda_convnet.bc01_to_c01b(l_in) l_conv1 = cuda_convnet.Conv2DCCLayer( l_in, num_filters=32, filter_size=(8, 8), strides=(4, 4), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Uniform(0.01), b=lasagne.init.Constant(0.1), dimshuffle=False) l_conv2 = cuda_convnet.Conv2DCCLayer( l_conv1, num_filters=64, filter_size=(4, 4), strides=(2, 2), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Uniform(0.01), b=lasagne.init.Constant(0.1), dimshuffle=False) l_conv3 = cuda_convnet.Conv2DCCLayer( l_conv2, num_filters=64, filter_size=(3, 3), strides=(1, 1), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Uniform(0.01), b=lasagne.init.Constant(0.1)) l_conv3 = cuda_convnet.c01b_to_bc01(l_conv3) l_hidden1 = lasagne.layers.DenseLayer( l_conv3, num_units=512, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Uniform(0.01), b=lasagne.init.Constant(0.1)) l_out = lasagne.layers.DenseLayer(l_hidden1, num_units=output_dim, nonlinearity=None, W=lasagne.init.Uniform(0.01), b=lasagne.init.Constant(0.1)) return l_out
def __init__(self, data, U, V, num_output, img_w=300, hidden_size=100, batch_size=32, lr=0.001, fine_tune_W=True, optimizer='adam', use_lstm=True, is_bidirectional=False, learn_init=False, use_peepholes=True): self.data = data self.batch_size = batch_size img_h = MAX_LEN index = T.iscalar() q = T.imatrix('q') y = T.imatrix('y') img_idxs = T.ivector('img_idxs') q_seqlen = T.ivector('q_seqlen') #img_features = theano.shared(V, name='img_features', borrow=True) imgs = theano.shared(V, name='imgs', borrow=True) embeddings = theano.shared(U, name='embeddings', borrow=True) zero_vec_tensor = T.fvector() self.zero_vec = np.zeros(img_w, dtype=theano.config.floatX) self.set_zero = theano.function([zero_vec_tensor], updates=[(embeddings, T.set_subtensor(embeddings[0,:], zero_vec_tensor))]) q_input = embeddings[q.flatten()].reshape((q.shape[0], q.shape[1], embeddings.shape[1])) l_in = lasagne.layers.InputLayer(shape=(batch_size, img_h, img_w)) if is_bidirectional: raise 'Bidirectional unsupported' else: if use_lstm: l_dropout = lasagne.layers.DropoutLayer(l_in, p=0.5) l_recurrent = LSTMLayer(l_dropout, hidden_size, W_in_to_ingate=lasagne.init.Orthogonal(), W_hid_to_ingate=lasagne.init.Orthogonal(), W_cell_to_ingate=lasagne.init.Orthogonal(), W_in_to_forgetgate=lasagne.init.Orthogonal(), W_hid_to_forgetgate=lasagne.init.Orthogonal(), W_cell_to_forgetgate=lasagne.init.Orthogonal(), W_in_to_cell=lasagne.init.Orthogonal(), W_hid_to_cell=lasagne.init.Orthogonal(), W_in_to_outgate=lasagne.init.Orthogonal(), W_hid_to_outgate=lasagne.init.Orthogonal(), W_cell_to_outgate=lasagne.init.Orthogonal(), W_in_to_imgingate=lasagne.init.Orthogonal(), W_in_to_imgforgetgate=lasagne.init.Orthogonal(), W_in_to_imgcell=lasagne.init.Orthogonal(), W_in_to_imgoutgate=lasagne.init.Orthogonal(), nonlinearity_ingate=lasagne.nonlinearities.sigmoid, nonlinearity_forgetgate=lasagne.nonlinearities.sigmoid, nonlinearity_outgate=lasagne.nonlinearities.sigmoid, nonlinearity_cell=s_tanh, nonlinearity_out=s_tanh, backwards=False, learn_init=learn_init, peepholes=use_peepholes) l_recurrent = lasagne.layers.DropoutLayer(l_recurrent, p=0.5) else: raise 'RNN Unsupported' l_combined_in = lasagne.layers.InputLayer(shape=(batch_size, hidden_size)) if USE_SINGLE_ANSWER: l_out = lasagne.layers.DenseLayer(l_combined_in, num_units=num_output, W=lasagne.init.Orthogonal(), nonlinearity=lasagne.nonlinearities.softmax) else: l_out = lasagne.layers.DenseLayer(l_combined_in, num_units=num_output, W=lasagne.init.Uniform(0.025), nonlinearity=lasagne.nonlinearities.sigmoid) l_img_in = lasagne.layers.InputLayer(shape=(batch_size, 3, 224, 224)) l_img_in = cuda_convnet.bc01_to_c01b(l_img_in) l_conv1 = cuda_convnet.Conv2DCCLayer( l_img_in, num_filters=32, filter_size=(8,8), stride=(4,4), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Uniform(0.01), b=lasagne.init.Constant(0.1), dimshuffle=False ) l_conv2 = cuda_convnet.Conv2DCCLayer( l_conv1, num_filters=64, filter_size=(4,4), stride=(2,2), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Uniform(0.01), b=lasagne.init.Constant(0.1), dimshuffle=False ) l_conv3 = cuda_convnet.Conv2DCCLayer( l_conv2, num_filters=64, filter_size=(3,3), stride=(1,1), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Uniform(0.01), b=lasagne.init.Constant(0.1), dimshuffle=False ) l_conv3 = cuda_convnet.c01b_to_bc01(l_conv3) l_transform = lasagne.layers.DenseLayer(l_conv3, num_units=hidden_size, W=lasagne.init.Orthogonal(), nonlinearity=lasagne.nonlinearities.rectify) l_transform = lasagne.layers.DropoutLayer(l_transform, p=0.5) e_imgfeats = l_transform.get_output(imgs[img_idxs]).reshape((batch_size, hidden_size)) e_question = l_recurrent.get_output(q_input, e_imgfeats=e_imgfeats, deterministic=False)[T.arange(batch_size), q_seqlen].reshape((q.shape[0], hidden_size)) e_question_det = l_recurrent.get_output(q_input, e_imgfeats=e_imgfeats, deterministic=True)[T.arange(batch_size), q_seqlen].reshape((q.shape[0], hidden_size)) probas = l_out.get_output(e_question, determinstic=False) probas = T.clip(probas, 1e-7, 1.0-1e-7) probas_det = l_out.get_output(e_question_det, determinstic=True) probas_det = T.clip(probas_det, 1e-7, 1.0-1e-7) cost = T.nnet.binary_crossentropy(probas, y).sum(axis=1).mean() cost_det = T.nnet.binary_crossentropy(probas_det, y).sum(axis=1).mean() params = lasagne.layers.get_all_params(l_out) + lasagne.layers.get_all_params(l_recurrent) params += lasagne.layers.get_all_params(l_transform) if fine_tune_W: params += [embeddings] if 'adam' == optimizer: updates = adam(cost, params, learning_rate=lr) elif 'rmsprop' == optimizer: updates = lasagne.updates.rmsprop(cost, params) else: raise 'Unsupported optimizer' self.shared_data = {} self.shared_data['q'] = theano.shared(np.zeros((batch_size, MAX_LEN), dtype=np.int32), borrow=True) self.shared_data['y'] = theano.shared(np.zeros((batch_size, num_output), dtype=np.int32), borrow=True) for key in ['q_seqlen', 'img_idxs']: self.shared_data[key] = theano.shared(np.zeros((batch_size,), dtype=np.int32), borrow=True) givens = { q: self.shared_data['q'], y: self.shared_data['y'], q_seqlen: self.shared_data['q_seqlen'], img_idxs: self.shared_data['img_idxs'] } self.train_model = theano.function([], cost, updates=updates, givens=givens, on_unused_input='warn') self.get_probas = theano.function([], probas_det, givens=givens, on_unused_input='warn') self.get_cost = theano.function([], cost_det, givens=givens, on_unused_input='warn')
def build_model(input_width=96, input_height=96, output_dim=1, batch_size=64, dimshuffle=True): l_in = lasagne.layers.InputLayer( shape=(batch_size, 3, input_width, input_height), ) if not dimshuffle: l_in = cuda_convnet.bc01_to_c01b(l_in) l_conv1 = cuda_convnet.Conv2DCCLayer( l_in, num_filters=32, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify, dimshuffle=dimshuffle, ) l_pool1 = cuda_convnet.MaxPool2DCCLayer( l_conv1, pool_size=(2, 2), dimshuffle=dimshuffle, ) l_conv2 = cuda_convnet.Conv2DCCLayer( l_pool1, num_filters=64, filter_size=(2, 2), nonlinearity=lasagne.nonlinearities.rectify, dimshuffle=dimshuffle, ) l_pool2 = cuda_convnet.MaxPool2DCCLayer( l_conv2, pool_size=(2, 2), dimshuffle=dimshuffle, ) l_conv3 = cuda_convnet.Conv2DCCLayer( l_pool2, num_filters=128, filter_size=(2, 2), nonlinearity=lasagne.nonlinearities.rectify, dimshuffle=dimshuffle, ) l_pool3 = cuda_convnet.MaxPool2DCCLayer( l_conv3, pool_size=(2, 2), dimshuffle=dimshuffle, ) if not dimshuffle: l_poo3 = cuda_convnet.c01b_to_bc01(l_pool3) l_hidden1 = lasagne.layers.DenseLayer( l_pool3, num_units=512, nonlinearity=lasagne.nonlinearities.rectify, ) l_hidden1_dropout = lasagne.layers.DropoutLayer(l_hidden1, p=0.5) l_hidden2 = lasagne.layers.DenseLayer( l_hidden1_dropout, num_units=512, nonlinearity=lasagne.nonlinearities.rectify, ) l_hidden2_dropout = lasagne.layers.DropoutLayer(l_hidden2, p=0.5) l_out = lasagne.layers.DenseLayer( l_hidden2_dropout, num_units=output_dim, nonlinearity=lasagne.nonlinearities.linear, ) return l_out
def build_model(input_width, input_height, output_dim, batch_size=BATCH_SIZE, dimshuffle=True): l_in = lasagne.layers.InputLayer( shape=(batch_size, 1, input_width, input_height), ) if not dimshuffle: l_in = cuda_convnet.bc01_to_c01b(l_in) l_conv1 = cuda_convnet.Conv2DCCLayer( l_in, num_filters=32, filter_size=(5, 5), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Uniform(), dimshuffle=dimshuffle, ) l_pool1 = cuda_convnet.MaxPool2DCCLayer( l_conv1, ds=(2, 2), dimshuffle=dimshuffle, ) l_conv2 = cuda_convnet.Conv2DCCLayer( l_pool1, num_filters=32, filter_size=(5, 5), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Uniform(), dimshuffle=dimshuffle, ) l_pool2 = cuda_convnet.MaxPool2DCCLayer( l_conv2, ds=(2, 2), dimshuffle=dimshuffle, ) if not dimshuffle: l_pool2 = cuda_convnet.c01b_to_bc01(l_pool2) l_hidden1 = lasagne.layers.DenseLayer( l_pool2, num_units=256, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.Uniform(), ) l_hidden1_dropout = lasagne.layers.DropoutLayer(l_hidden1, p=0.5) # l_hidden2 = lasagne.layers.DenseLayer( # l_hidden1_dropout, # num_units=256, # nonlinearity=lasagne.nonlinearities.rectify, # ) # l_hidden2_dropout = lasagne.layers.DropoutLayer(l_hidden2, p=0.5) l_out = lasagne.layers.DenseLayer( l_hidden1_dropout, num_units=output_dim, nonlinearity=lasagne.nonlinearities.softmax, W=lasagne.init.Uniform(), ) return l_out
def build_model(input_width=96, input_height=96, output_dim=1, batch_size=64, dimshuffle=True): l_in = lasagne.layers.InputLayer(shape=(batch_size, 3, input_width, input_height), ) if not dimshuffle: l_in = cuda_convnet.bc01_to_c01b(l_in) l_conv1 = cuda_convnet.Conv2DCCLayer( l_in, num_filters=32, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify, dimshuffle=dimshuffle, ) l_pool1 = cuda_convnet.MaxPool2DCCLayer( l_conv1, pool_size=(2, 2), dimshuffle=dimshuffle, ) l_conv2 = cuda_convnet.Conv2DCCLayer( l_pool1, num_filters=64, filter_size=(2, 2), nonlinearity=lasagne.nonlinearities.rectify, dimshuffle=dimshuffle, ) l_pool2 = cuda_convnet.MaxPool2DCCLayer( l_conv2, pool_size=(2, 2), dimshuffle=dimshuffle, ) l_conv3 = cuda_convnet.Conv2DCCLayer( l_pool2, num_filters=128, filter_size=(2, 2), nonlinearity=lasagne.nonlinearities.rectify, dimshuffle=dimshuffle, ) l_pool3 = cuda_convnet.MaxPool2DCCLayer( l_conv3, pool_size=(2, 2), dimshuffle=dimshuffle, ) if not dimshuffle: l_poo3 = cuda_convnet.c01b_to_bc01(l_pool3) l_hidden1 = lasagne.layers.DenseLayer( l_pool3, num_units=512, nonlinearity=lasagne.nonlinearities.rectify, ) l_hidden1_dropout = lasagne.layers.DropoutLayer(l_hidden1, p=0.5) l_hidden2 = lasagne.layers.DenseLayer( l_hidden1_dropout, num_units=512, nonlinearity=lasagne.nonlinearities.rectify, ) l_hidden2_dropout = lasagne.layers.DropoutLayer(l_hidden2, p=0.5) l_out = lasagne.layers.DenseLayer( l_hidden2_dropout, num_units=output_dim, nonlinearity=lasagne.nonlinearities.linear, ) return l_out