def test_split(self): from lasagne.layers import InputLayer, DenseLayer, get_all_layers # l1 --> l2 --> l3 # \---> l4 l1 = InputLayer((10, 20)) l2 = DenseLayer(l1, 30) l3 = DenseLayer(l2, 40) l4 = DenseLayer(l1, 50) assert get_all_layers(l3) == [l1, l2, l3] assert get_all_layers(l4) == [l1, l4] assert get_all_layers([l3, l4]) == [l1, l2, l3, l4] assert get_all_layers([l4, l3]) == [l1, l4, l2, l3]
def test_merge(self): from lasagne.layers import (InputLayer, DenseLayer, ElemwiseSumLayer, get_all_layers) # l1 --> l2 --> l3 --> l6 # l4 --> l5 ----^ l1 = InputLayer((10, 20)) l2 = DenseLayer(l1, 30) l3 = DenseLayer(l2, 40) l4 = InputLayer((10, 30)) l5 = DenseLayer(l4, 40) l6 = ElemwiseSumLayer([l3, l5]) assert get_all_layers(l6) == [l1, l2, l3, l4, l5, l6] assert get_all_layers([l4, l6]) == [l4, l1, l2, l3, l5, l6] assert get_all_layers([l5, l6]) == [l4, l5, l1, l2, l3, l6] assert get_all_layers([l4, l2, l5, l6]) == [l4, l1, l2, l5, l3, l6]
def test_bridge(self): from lasagne.layers import (InputLayer, DenseLayer, ElemwiseSumLayer, get_all_layers) # l1 --> l2 --> l3 --> l4 --> l5 # \------------^ l1 = InputLayer((10, 20)) l2 = DenseLayer(l1, 30) l3 = DenseLayer(l2, 30) l4 = ElemwiseSumLayer([l2, l3]) l5 = DenseLayer(l4, 40) # check for correct topological order assert get_all_layers(l5) == [l1, l2, l3, l4, l5] # check that treat_as_input=[l4] blocks the search and =[l3] does not assert get_all_layers(l5, treat_as_input=[l4]) == [l4, l5] assert get_all_layers(l5, treat_as_input=[l3]) == [l1, l2, l3, l4, l5]
def create_model(self, input_spread, output_spread): i_map = InputLayer(shape=( None, self.timesteps, self.num_channels, self.width, self.height)) cnn = self.create_cnn() l_pre = net_on_seq(cnn, i_map) i_stat = InputLayer(shape=(None, self.timesteps, input_spread)) net = ConcatLayer([l_pre, i_stat], axis=2) for _ in range(2): net = self.create_lstm_stack(net) net = SliceLayer(net, -1, 1) net = DenseLayer( net, num_units=output_spread, W=init.Normal(), nonlinearity=softmax) self.net = net self.layers += get_all_layers(net) return [i_map, i_stat]
def build_model(input_shape, input_var, dense=True): net = {} net['input'] = InputLayer(input_shape, input_var=input_var) net['input'].num_filters = input_shape[1] net['conv1'] = ConvLayer(net['input'], num_filters=128, filter_size=3, nonlinearity=nonlinearities.leaky_rectify, pad='same') net['conv2'] = ConvLayer(net['conv1'], num_filters=256, filter_size=3, nonlinearity=nonlinearities.leaky_rectify, pad='same') net['pool1'] = ConvLayer(net['conv2'], num_filters=256, filter_size=3, stride=2, nonlinearity=nonlinearities.leaky_rectify, pad='same') net['conv3'] = ConvLayer(net['pool1'], num_filters=512, filter_size=3, nonlinearity=nonlinearities.leaky_rectify, pad='same') net['pool2'] = ConvLayer(net['conv3'], num_filters=512, filter_size=3, stride=2, nonlinearity=nonlinearities.leaky_rectify, pad='same') if dense: net['dense'] = dropout(DenseLayer(net['pool2'], num_units=1024, nonlinearity=nonlinearities.leaky_rectify), 0.5) # Deconv net['dense/inverse'] = inverse_dense_layer(net['dense'], net['dense'], net['pool2'].output_shape) net['pool2/inverse'] = inverse_convolution_strided_layer(net['dense/inverse'], net['pool2']) else: net['pool2/inverse'] = inverse_convolution_strided_layer(net['pool2'], net['pool2']) net['conv3/inverse'] = inverse_convolution_layer(net['pool2/inverse'], net['conv3']) net['pool1/inverse'] = inverse_convolution_strided_layer(net['conv3/inverse'], net['pool1']) net['conv2/inverse'] = inverse_convolution_layer(net['pool1/inverse'], net['conv2']) net['conv1/inverse'] = inverse_convolution_layer(net['conv2/inverse'], net['conv1']) net['conv0/inverse'] = ConvLayer(net['conv1/inverse'], num_filters=input_shape[1], filter_size=1, nonlinearity=nonlinearities.linear, pad='same') net['prob'] = net['conv0/inverse'] for layer in get_all_layers(net['prob']): print layer print layer.output_shape return net
def summary(self, light=False): """ Print a summary of the network architecture """ layer_list = get_all_layers(self.output_layer) def filter_function(layer): """ We only display the layers in the list below""" return np.any([isinstance(layer, layer_type) for layer_type in [InputLayer, Conv2DLayer, Pool2DLayer, Deconv2DLayer, ConcatLayer]]) layer_list = filter(filter_function, layer_list) output_shape_list = map(get_output_shape, layer_list) layer_name_function = lambda s: str(s).split('.')[3].split('Layer')[0] if not light: print('-' * 75) print 'Warning : all the layers are not displayed \n' print ' {:<15} {:<20} {:<20}'.format('Layer', 'Output shape', 'W shape') for i, (layer, output_shape) in enumerate(zip(layer_list, output_shape_list)): if hasattr(layer, 'W'): input_shape = layer.W.get_value().shape else: input_shape = '' print '{:<3} {:<15} {:<20} {:<20}'.format(i + 1, layer_name_function(layer), output_shape, input_shape) if isinstance(layer, Pool2DLayer) | isinstance(layer, Deconv2DLayer): print('') print '\nNumber of Convolutional layers : {}'.format( len(filter(lambda x: isinstance(x, Conv2DLayer) | isinstance(x, Deconv2DLayer), layer_list))) print 'Number of parameters : {}'.format(np.sum(map(np.size, get_all_param_values(self.output_layer)))) print('-' * 75)
def create_user_pref_encoder(self, l_song_embedding): # shape=(num_users, num_songs, embedding) l_song_encoder, i_user_songs = self.create_song_encoder( l_song_embedding) self.layers += get_all_layers(l_song_encoder) l_song_encoder = InputLayer( shape=(None, None, self.embedding), input_var=get_output(l_song_encoder), name='l_song_encoder') self.i_user_song_embeddings = l_song_encoder # shape=(num_users, num_songs, 1 (value is play_count)) i_user_counts = InputLayer( shape=(None, None, 1), name='i_user_counts') # shape=(num_users, num_songs, embedding + 1 (value is play_count)) l_song_vals = ConcatLayer( [i_user_counts, l_song_encoder], axis=2, name='l_song_vals') # output_shape=(num_users, embedding) l_user_prefs = self.create_pref_embedding(l_song_vals) return l_user_prefs, i_user_songs, i_user_counts
def load_network_weights(network, filename): pas = pickle.load(open(filename, 'rb')) ls = layers.get_all_layers(network) for i, l in enumerate(ls[1:], 0): l.W.set_value(pas[(i*2)].astype(np.float32)) l.b.set_value(pas[(i*2)+1].astype(np.float32)) return network
def save_params(self, filename=None): """ Save it to HDF in the following format: /epoch<N>/L<I>_<type>/P<I>_<name> """ if filename is None: filename = self.experiment_name + ".hdf5" mode = 'w' if self.n_iterations() == 0 else 'a' f = h5py.File(filename, mode=mode) epoch_name = 'epoch{:06d}'.format(self.n_iterations()) try: epoch_group = f.create_group(epoch_name) except ValueError: self.logger.exception("Cannot save params!") f.close() return layers = get_all_layers(self.layers[-1]) for layer_i, layer in enumerate(layers): params = layer.get_params() if not params: continue layer_name = 'L{:02d}_{}'.format(layer_i, layer.__class__.__name__) layer_group = epoch_group.create_group(layer_name) for param_i, param in enumerate(params): param_name = 'P{:02d}'.format(param_i) if param.name: param_name += "_" + param.name data = param.get_value() layer_group.create_dataset( param_name, data=data, compression="gzip") f.close()
def load_model_predict(PATH_simresult, test_set_X): # Load sim results print 'loading', PATH_simresult, '\n' with open(PATH_simresult, "rb") as f: temp = pickle.load(f) network = temp[-1] best_network_params = get_all_param_values(network) # extract input var print 'extract input var \n' X = get_all_layers(network)[0].input_var # build test function print 'build test function and reinit network \n' test_fn = build_test_func(test_set_X, network, X) reinitiate_set_params(network, weights=best_network_params) print 'test set shape', test_set_X.shape, 'type:', type(test_set_X), '\n' print 'make prediction \n' # predictedy = test_fn(test_set_X) # batched implementation batch_size = 128 n_test_batches = test_set_X.shape[0] // batch_size + 1 test_set_x_size = test_set_X.shape[0] predictedy = [test_fn( test_set_X[index * batch_size: min((index + 1) * batch_size, test_set_x_size)]) for index in range(n_test_batches)] predictedy = np.vstack(predictedy) return predictedy
def get_network_str(layer, get_network=True, incomings=False, outgoings=False): """ Returns a string representation of the entire network contained under this layer. Parameters ---------- layer : Layer or list the :class:`Layer` instance for which to gather all layers feeding into it, or a list of :class:`Layer` instances. get_network : boolean if True, calls `get_all_layers` on `layer` if False, assumes `layer` already contains all `Layer` instances intended for representation incomings : boolean if True, representation includes a list of all incomings for each `Layer` instance outgoings: boolean if True, representation includes a list of all outgoings for each `Layer` instance Returns ------- str A string representation of `layer`. Each layer is assigned an ID which is it's corresponding index in the list obtained from `get_all_layers`. """ # `layer` can either be a single `Layer` instance or a list of `Layer` instances. # If list, it can already be the result from `get_all_layers` or not, indicated by the `get_network` flag # Get network using get_all_layers if required: if get_network: network = get_all_layers(layer) else: network = layer # Initialize a list of lists to (temporarily) hold the str representation of each component, insert header network_str = deque([]) network_str = _insert_header(network_str, incomings=incomings, outgoings=outgoings) # The representation can optionally display incoming and outgoing layers for each layer, similar to adjacency lists. # If requested (using the incomings and outgoings flags), build the adjacency lists. # The numbers/ids in the adjacency lists correspond to the layer's index in `network` if incomings or outgoings: ins, outs = _get_adjacency_lists(network) # For each layer in the network, build a representation and append to `network_str` for i, current_layer in enumerate(network): # Initialize list to (temporarily) hold str of layer layer_str = deque([]) # First column for incomings, second for the layer itself, third for outgoings, fourth for layer description if incomings: layer_str.append(ins[i]) layer_str.append(i) if outgoings: layer_str.append(outs[i]) layer_str.append(str(current_layer)) # default representation can be changed by overriding __str__ network_str.append(layer_str) return _get_table_str(network_str)
def print_layer_shapes(self): print '\n', '-'*100 print 'Net shapes:\n' layers = get_all_layers(self.net['l_dist']) for l in layers: print '%-20s \t%s' % (l.name, get_output_shape(l)) print '\n', '-'*100
def get_max_norm_params_and_maximums(layer): layers = get_all_layers(layer) result = [] for layer in layers: if hasattr(layer, 'max_col_norm') and hasattr(layer, 'W'): result.append((layer.W, layer.max_col_norm)) return result
def create_model(self): # shape=(num_users, embedding) l_song_embedding, i_input_song = self.create_song_embedding() self.layers += get_all_layers(l_song_embedding) self.i_input_song = i_input_song self.song_embedding = l_song_embedding i_input_song_embedding = InputLayer( (None, self.embedding), input_var=get_output(l_song_embedding), name='i_input_song_embedding') self.i_input_song_embedding = i_input_song_embedding # shape=(num_users, embedding) l_user_prefs, i_user_songs, i_user_counts = \ self.create_user_pref_encoder(l_song_embedding) self.i_user_songs = i_user_songs self.i_user_counts = i_user_counts self.layers += get_all_layers(l_user_prefs) l_user_prefs = InputLayer( (None, self.embedding), input_var=get_output(l_user_prefs), name='l_user_prefs') self.i_prefs = l_user_prefs # shape=(num_users, 2*embedding) net = ConcatLayer( [i_input_song_embedding, l_user_prefs], axis=1, name='concat') for _ in range(3): net = self.dense_stack(net) net = self.dense_stack(net, nonlinearity=None) net = self.dense_stack(net, nonlinearity=None, num_units=1) net = SliceLayer(net, 0, 1) self.net = net self.layers += get_all_layers(net) return [i_user_songs, i_user_counts, i_input_song]
def desc_func(desc_layer, save_diagram=True): # takes a layer and makes a function that returns its output # also saves a diagram of the network wrt the descriptor output X = T.tensor4() if save_diagram: all_layers = ll.get_all_layers(desc_layer) imwrite_architecture(all_layers, './desc_function.png') descriptor = ll.get_output(desc_layer, X, deterministic=True) return tfunc([X], descriptor)
def make_net(output): """Form dictionary from incoming layers of lasagne output. These layers need to have names.""" net = {} for l in get_all_layers(output): name = l.name if l.name is not None: net[name] = l return net
def __init__(self, last_layer, compile_kwargs={}): # get all the layers self.all_layers = layers.get_all_layers(last_layer) # save input, last layer # currently assumed that all_layers[-1] will be input (this is how it should be, i think, but edge cases might exist) self.last_layer = last_layer self.input_layer = self.all_layers[0] self.compile_kwargs = compile_kwargs self.compile(**compile_kwargs)
def test_batchnorm_except_last_layer(batch_norm_no_last_layer_def): model = models.NetworkManager(batch_norm_no_last_layer_def) assert model is not None layer_names = [l.__class__.__name__ for l in get_all_layers(model._network)] batch_norm_layers = [l for l in layer_names if 'BatchNormLayer' in l] layer_count = len(batch_norm_no_last_layer_def['layers']) assert len(batch_norm_layers) == (layer_count - 1)
def get_y_mu_sigma(self, x): layers = get_all_layers(self) # output from sampled weights of all layers-1. z = get_output(layers[-2], x, deterministic=False) # sampled output of the final layer. y = self.nonlinearity(T.dot(z, self.get_W()) + self.get_b().dimshuffle('x', 0)) # mean output of the final layer. y_mu = self.nonlinearity(T.dot(z, self.W_mu) + self.b_mu.dimshuffle('x', 0)) # logsigma output of the final layer. y_logsigma = self.nonlinearity(T.dot(z, self.W_logsigma) + self.b_logsigma.dimshuffle('x', 0)) return y, y_mu, y_logsigma
def get_automatic_updates(layer_or_layers,treat_as_input=None,**kwargs): """ Returns automatic updates from all the layers given and all layers they depend on. :param layer_or_layers: layer(s) to collect updates from :param treat_as_input: see same param in lasagne.layers.get_all_layers """ updates = theano.OrderedUpdates() for layer in get_all_layers(layer_or_layers,treat_as_input=treat_as_input): if hasattr(layer,'get_automatic_updates'): updates += layer.get_automatic_updates(**kwargs) return updates
def get_bottleneck_features(network, X): layers = get_all_layers(network) bottleneck = [l for l in layers if l.name == 'bottleneck'] if len(bottleneck) == 0: raise ValueError('network has no bottleneck') else: bottleneck = bottleneck[0] bfeatures = get_output(bottleneck, X) #print X.shape #print bfeatures.eval().shape return bfeatures
def test_bridge(self): from lasagne.layers import (InputLayer, DenseLayer, ElemwiseSumLayer, get_all_layers) # l1 --> l2 --> l3 --> l4 --> l5 # \------------^ l1 = InputLayer((10, 20)) l2 = DenseLayer(l1, 30) l3 = DenseLayer(l2, 30) l4 = ElemwiseSumLayer([l2, l3]) l5 = DenseLayer(l4, 40) assert get_all_layers(l5) == [l1, l2, l3, l4, l5]
def print_net(self): layers = get_all_layers(self.layers[-1]) for layer in layers: self.logger.info(str(layer)) try: input_shape = layer.input_shape except: pass else: self.logger.info(" Input shape: {}".format(input_shape)) self.logger.info("Output shape: {}".format(layer.output_shape))
def triplet_loss_iter(embedder, update_params={}): X_triplets = { 'anchor':T.tensor4(), 'positive':T.tensor4(), 'negative':T.tensor4(), } # each will be a batch of images final_emb_layer = embedder[-1] all_layers = ll.get_all_layers(embedder) imwrite_architecture(all_layers, './layer_rep.png') # assume we get a list of predictions (e.g. for jet architecture, but should work w/just one pred) # another assumption (which must hold when the network is being made) # the last prediction layer is a) the end of the network and b) what we ultimately care about # however the other prediction layers will be incorporated into the training loss predicted_embeds_train = {k:ll.get_output(embedder, X)[-1] for k, X in X_triplets.items()} predicted_embeds_valid = {k:ll.get_output(final_emb_layer, X, deterministic=True) for k, X in X_triplets.items()} # each output should be batch_size x embed_size # should give us a vector of batch_size of distances btw anchor and positive alpha = 0.2 # FaceNet alpha triplet_pos = lambda pred: (pred['anchor'] - pred['positive']).norm(2,axis=1) triplet_neg = lambda pred: (pred['anchor'] - pred['negative']).norm(2,axis=1) triplet_distances = lambda pred: (triplet_pos(pred) - triplet_neg(pred) + alpha).clip(0, np.inf) triplet_failed = lambda pred: T.mean(triplet_distances(pred) > alpha) triplet_loss = lambda pred: T.sum(triplet_distances(pred)) decay = 0.001 reg = regularize_network_params(final_emb_layer, l2) * decay losses_reg = lambda pred: triplet_loss(pred) + reg loss_train = losses_reg(predicted_embeds_train) loss_train.name = 'TL' # for the names #all_params = list(chain(*[ll.get_all_params(pred) for pred in embedder])) all_params = ll.get_all_params(embedder, trainable=True) # this should work with multiple 'roots' grads = T.grad(loss_train, all_params, add_names=True) updates = adam(grads, all_params) #updates = nesterov_momentum(grads, all_params, update_params['l_r'], momentum=update_params['momentum']) print("Compiling network for training") tic = time.time() train_iter = theano.function([X_triplets['anchor'], X_triplets['positive'], X_triplets['negative']], [loss_train] + grads, updates=updates) toc = time.time() - tic print("Took %0.2f seconds" % toc) #theano.printing.pydotprint(loss, outfile='./loss_graph.png',var_with_name_simple=True) print("Compiling network for validation") tic = time.time() valid_iter = theano.function([X_triplets['anchor'], X_triplets['positive'], X_triplets['negative']], [triplet_loss(predicted_embeds_valid), losses_reg(predicted_embeds_valid), triplet_failed(predicted_embeds_valid)]) toc = time.time() - tic print("Took %0.2f seconds" % toc) return {'train':train_iter, 'valid':valid_iter, 'gradnames':[g.name for g in grads]}
def contrastive_loss_iter(embedder, update_params={}): X_pairs = { 'img1':T.tensor4(), 'img2':T.tensor4(), } y = T.ivector() # basically class labels final_emb_layer = embedder[-1] all_layers = ll.get_all_layers(embedder) imwrite_architecture(all_layers, './layer_rep.png') # assume we get a list of predictions (e.g. for jet architecture, but should work w/just one pred) # another assumption (which must hold when the network is being made) # the last prediction layer is a) the end of the network and b) what we ultimately care about # however the other prediction layers will be incorporated into the training loss predicted_embeds_train = {k:ll.get_output(embedder, X)[-1] for k, X in X_pairs.items()} predicted_embeds_valid = {k:ll.get_output(final_emb_layer, X, deterministic=True) for k, X in X_pairs.items()} margin = 1 # if distance is 0 that's bad distance = lambda pred: (pred['img1'] - pred['img2'] + 1e-7).norm(2, axis=1) contrastive_loss = lambda pred: T.mean(y*(distance(pred)) + (1 - y)*(margin - distance(pred)).clip(0,np.inf)) failed_matches = lambda pred: T.switch(T.eq(T.sum(y),0), 0, T.sum((y*distance(pred)) > margin) / T.sum(y)) failed_nonmatches = lambda pred: T.switch(T.eq(T.sum(1-y),0), 0, T.sum((1-y*distance(pred)) < margin) / T.sum(1-y)) failed_pairs = lambda pred: 0.5*failed_matches(pred) + 0.5*failed_nonmatches(pred) decay = 0.0001 reg = regularize_network_params(final_emb_layer, l2) * decay losses_reg = lambda pred: contrastive_loss(pred) + reg loss_train = losses_reg(predicted_embeds_train) loss_train.name = 'CL' # for the names #all_params = list(chain(*[ll.get_all_params(pred) for pred in embedder])) all_params = ll.get_all_params(embedder, trainable=True) # this should work with multiple 'roots' grads = T.grad(loss_train, all_params, add_names=True) updates = adam(grads, all_params) #updates = nesterov_momentum(grads, all_params, update_params['l_r'], momentum=update_params['momentum']) print("Compiling network for training") tic = time.time() train_iter = theano.function([X_pairs['img1'], X_pairs['img2'], y], [loss_train] + grads, updates=updates) toc = time.time() - tic print("Took %0.2f seconds" % toc) #theano.printing.pydotprint(loss, outfile='./loss_graph.png',var_with_name_simple=True) print("Compiling network for validation") tic = time.time() valid_iter = theano.function([X_pairs['img1'], X_pairs['img2'], y], [ contrastive_loss(predicted_embeds_valid), losses_reg(predicted_embeds_valid), failed_pairs(predicted_embeds_valid)]) toc = time.time() - tic print("Took %0.2f seconds" % toc) return {'train':train_iter, 'valid':valid_iter, 'gradnames':[g.name for g in grads]}
def test_stack(self): from lasagne.layers import InputLayer, DenseLayer, get_all_layers from itertools import permutations # l1 --> l2 --> l3 l1 = InputLayer((10, 20)) l2 = DenseLayer(l1, 30) l3 = DenseLayer(l2, 40) # try all possible combinations and orders for a query for count in (0, 1, 2, 3): for query in permutations([l1, l2, l3], count): if l3 in query: expected = [l1, l2, l3] elif l2 in query: expected = [l1, l2] elif l1 in query: expected = [l1] else: expected = [] assert get_all_layers(query) == expected # treat_as_input=[l2] should block l1 from appearing assert get_all_layers(l3, treat_as_input=[l2]) == [l2, l3]
def build_model_small(input_shape, input_var): net = {} net['input'] = InputLayer(input_shape, input_var=input_var) net['input'].num_filters = input_shape[1] net['conv1'] = batch_norm( ConvLayer(net['input'], num_filters=256, filter_size=11, nonlinearity=nonlinearities.leaky_rectify, pad='same')) net['pool1'] = dropout(PoolLayer(net['conv1'], 2, mode='max'), 0.5) net['conv2'] = batch_norm( ConvLayer(net['pool1'], num_filters=256, filter_size=7, nonlinearity=nonlinearities.leaky_rectify, pad='same')) net['pool2'] = dropout(PoolLayer(net['conv2'], 2, mode='max'), 0.5) net['conv3'] = batch_norm( ConvLayer(net['pool2'], num_filters=396, filter_size=5, nonlinearity=nonlinearities.leaky_rectify, pad='same')) net['pool3'] = dropout(PoolLayer(net['conv3'], 2, mode='max'), 0.5) net['conv4'] = dropout( batch_norm( ConvLayer(net['pool3'], num_filters=512, filter_size=3, nonlinearity=nonlinearities.leaky_rectify, pad='same')), 0.5) net['conv5'] = dropout( batch_norm( ConvLayer(net['conv4'], num_filters=1024, filter_size=1, nonlinearity=nonlinearities.leaky_rectify, pad='same')), 0.5) net['dense1'] = dropout( batch_norm( DenseLayer(net['conv5'], num_units=1024, nonlinearity=nonlinearities.leaky_rectify)), 0.5) net['dense2'] = DenseLayer(net['dense1'], num_units=11, nonlinearity=nonlinearities.softmax) net['prob'] = net['dense2'] for layer in get_all_layers(net['prob']): print layer print layer.output_shape return net
def loss_iter(segmenter, update_params={}): X = T.tensor4() y = T.tensor4() pixel_weights = T.tensor3() final_pred_layer = segmenter[-1] all_layers = ll.get_all_layers(segmenter) imwrite_architecture(all_layers, './layer_rep.png') # assume we get a list of predictions (e.g. for jet architecture, but should work w/just one pred) # another assumption (which must hold when the network is being made) # the last prediction layer is a) the end of the network and b) what we ultimately care about # however the other prediction layers will be incorporated into the training loss predicted_masks_train = ll.get_output(segmenter, X) predicted_mask_valid = ll.get_output(final_pred_layer, X, deterministic=True) thresh = 0.5 accuracy = lambda pred: T.mean(T.eq(T.argmax(pred, axis=1), T.argmax(y, axis=1))) true_pos = lambda pred: T.sum((pred[:,0,:,:] > thresh) * (y[:,0,:,:] > thresh)) false_pos = lambda pred: T.sum((pred[:,0,:,:] > thresh) - (y[:,0,:,:] > thresh)) precision = lambda pred: (true_pos(pred) / (true_pos(pred) + false_pos(pred))) pixel_weights_1d = pixel_weights.flatten(ndim=1) losses = lambda pred: T.mean(crossentropy_flat(pred + 1e-7, y + 1e-7) * pixel_weights_1d) decay = 0.0001 reg = regularize_network_params(final_pred_layer, l2) * decay losses_reg = lambda pred: losses(pred) + reg loss_train = T.sum([losses_reg(mask) for mask in predicted_masks_train]) loss_train.name = 'CE' # for the names #all_params = list(chain(*[ll.get_all_params(pred) for pred in segmenter])) all_params = ll.get_all_params(segmenter, trainable=True) # this should work with multiple 'roots' grads = T.grad(loss_train, all_params, add_names=True) updates = adam(grads, all_params) #updates = nesterov_momentum(grads, all_params, update_params['l_r'], momentum=update_params['momentum']) acc_train = accuracy(predicted_masks_train[-1]) acc_valid = accuracy(predicted_mask_valid) prec_train = precision(predicted_masks_train[-1]) prec_valid = precision(predicted_mask_valid) print("Compiling network for training") tic = time.time() train_iter = theano.function([X, y, pixel_weights], [loss_train] + grads, updates=updates) toc = time.time() - tic print("Took %0.2f seconds" % toc) #theano.printing.pydotprint(loss, outfile='./loss_graph.png',var_with_name_simple=True) print("Compiling network for validation") tic = time.time() valid_iter = theano.function([X, y, pixel_weights], [losses(predicted_mask_valid), losses_reg(predicted_mask_valid), prec_valid]) toc = time.time() - tic print("Took %0.2f seconds" % toc) return {'train':train_iter, 'valid':valid_iter, 'gradnames':[g.name for g in grads]}
def set_weights(model, model_para_values, mapping_dict='auto', unwrap_shared=True, **tags): """ Set model layers' weights by 'mapping_dict' or natural order. When 'mapping_dict' is 'auto', then a mapping dict will be built automatically by common layer names between model and model_para_values :param model: :param model_para_values: list of tuples (name, layer_values) :param mapping_dict: {None, 'auto', or dict with format of {target_layer_name: source_layer_name}} :param unwrap_shared: :param tags: :return: """ layers = get_all_layers(model) #-- if mapping_dict is not given, then the model weights will be set by natural order, from beginning layer to ending layer #-- if len(model_para_values) != len(layers), the iteration will stop at either one the shortest if mapping_dict is None: for layer, layer_values_with_name in zip(layers, model_para_values): name, layer_values = layer_values_with_name layer_params = layer.get_params(unwrap_shared=unwrap_shared, **tags) for p, v in zip(layer_params, layer_values): p.set_value(v) else: #--- build a mapping dict automatically ---# if mapping_dict == 'auto': target_layer_names = set() source_layer_names = set() for layer in layers: target_layer_names.add(layer.name) for name, _ in model_para_values: source_layer_names.add(name) mapping_dict = dict() for name in target_layer_names & source_layer_names: mapping_dict[name] = name #--- do the mapping ---# for target_name in mapping_dict: source_name = mapping_dict[target_name] for layer in layers: if layer.name == target_name: target_layer = layer break for name, layer_values in model_para_values: if name == source_name: break layer_params = target_layer.get_params(unwrap_shared=unwrap_shared, **tags) for p, v in zip(layer_params, layer_values): p.set_value(v)
def print_layers(l_out): all_layers = layers.get_all_layers(l_out) print('this network has %d learnable parameters' % ((layers.count_params(l_out)))) for layer in all_layers: if hasattr(layer, 'W') and hasattr(layer, 'b'): num_params = np.prod(layer.W.get_value().shape) + np.prod( layer.b.get_value().shape) print('layer %s has output shape %r with %d parameters' % ((layer.name, layer.output_shape, num_params))) else: print('layer %s has output shape %r' % ((layer.name, layer.output_shape)))
def print_layers(l_out): all_layers = layers.get_all_layers(l_out) print('this network has %d learnable parameters' % ( (layers.count_params(l_out)))) for layer in all_layers: if hasattr(layer, 'W') and hasattr(layer, 'b'): num_params = np.prod( layer.W.get_value().shape) + np.prod(layer.b.get_value().shape) print('layer %s has output shape %r with %d parameters' % ( (layer.name, layer.output_shape, num_params))) else: print('layer %s has output shape %r' % ( (layer.name, layer.output_shape)))
def description(self): def describe_layer(l): return '%s\n output shape:%s\n number of params: %s' % ( l, l.output_shape, get_number_of_params(l)) summary = '%s -> %s\ntotal number of params: %d' % ( ' x '.join( [str(layers.get_output_shape(input)) for input in self.inputs]), ' x '.join([ str(layers.get_output_shape(output)) for output in self.outputs ]), int( np.sum([ get_number_of_params(l) for l in layers.get_all_layers(self.outputs) ]))) layer_wise = '\n'.join( [describe_layer(l) for l in layers.get_all_layers(self.outputs)]) return '%s\n===========\n%s\n==========\n%s' % (str(self), summary, layer_wise)
def print_lasagne_network(_net, skipnoparam=True): layers = L.get_all_layers(_net) for l in layers: out = l.output_shape par = l.get_params() if skipnoparam and len(par)==0 and l.name==None: continue print "Layer\t: %s\nName\t: %s\nType\t: %s" % (l, l.name, type(l)) print "Shape\t: %s" % (out,) if len(par)>0: print "Params" for p in par: print " |-- {:<10}: {:}".format(p.name, p.get_value().shape,) print "\n"
def _construct_layer_maps(self): layers = L.get_all_layers(self.output_layer) # Store inverse layers to enable merging. self.inverse_map = {l: None for l in layers} # Store the layers a specific layer feeds. self.output_map = {l: [] for l in layers} for layer in layers: if type(layer) is not L.InputLayer: if isinstance(layer, L.MergeLayer): for feeder in layer.input_layers: self.output_map[feeder].append(layer) else: self.output_map[layer.input_layer].append(layer)
def get_y_mu_sigma(self, x): layers = get_all_layers(self) # output from sampled weights of all layers-1. z = get_output(layers[-2], x, deterministic=False) # sampled output of the final layer. y = self.nonlinearity( T.dot(z, self.get_W()) + self.get_b().dimshuffle('x', 0)) # mean output of the final layer. y_mu = self.nonlinearity( T.dot(z, self.W_mu) + self.b_mu.dimshuffle('x', 0)) # logsigma output of the final layer. y_logsigma = self.nonlinearity( T.dot(z, self.W_logsigma) + self.b_logsigma.dimshuffle('x', 0)) return y, y_mu, y_logsigma
def description(self): def get_number_of_params(l): return np.sum([ np.prod(param.get_value().shape) for param in l.get_params() ]) def describe_layer(l): return '%s\n output shape:%s\n number of params: %s' % (l, l.output_shape, get_number_of_params(l)) return '%s\n%s' % ( str(self), '\n'.join([describe_layer(l) for l in layers.get_all_layers(self.outputs)]) )
def build_functions(self, LEARNING_RATE=1e-5, MOMENTUM=0.9, debug=False): target_var = T.ivector('targets') # Get the first layer of the network l_in = L.get_all_layers(self.network)[0] network_output = L.get_output(self.network) # Retrieve all trainable parameters from the network all_params = L.get_all_params(self.network, trainable=True) # loss = T.mean(lasagne.objectives.categorical_crossentropy(network_output, target_var)) loss = T.sum(lasagne.objectives.categorical_crossentropy(network_output, target_var)) # use Stochastic Gradient Descent with nesterov momentum to update parameters updates = lasagne.updates.momentum(loss, all_params, learning_rate = LEARNING_RATE, momentum = MOMENTUM) # Function to determine the number of correct classifications accuracy = T.mean(T.eq(T.argmax(network_output, axis=1), target_var), dtype=theano.config.floatX) # Function to get the output of the network output_fn = theano.function([l_in.input_var], network_output, name='output_fn') if debug: l_out_val = output_fn(X) print('l_out size:', end='\t'); print(l_out_val.shape, end='\t'); print('min/max: [{:.2f},{:.2f}]'.format(l_out_val.min(), l_out_val.max())) argmax_fn = theano.function([l_in.input_var], [T.argmax(network_output, axis=1)], name='argmax_fn') if debug: print('argmax_fn') print(type(argmax_fn(X)[0])) print(argmax_fn(X)[0].shape) # Function implementing one step of gradient descent train_fn = theano.function([l_in.input_var, target_var], [loss, accuracy], updates=updates, name='train_fn') # Function calculating the loss and accuracy validate_fn = theano.function([l_in.input_var, target_var], [loss, accuracy], name='validate_fn') if debug: print(type(train_fn(X, Y))) # print('loss: {:.3f}'.format( float(train_fn(X, Y)))) # print('accuracy: {:.3f}'.format( float(validate_fn(X, Y)[1]) )) self.training_fn = output_fn, argmax_fn, train_fn, validate_fn
def summary(self, light=False): """ Print a summary of the network architecture """ layer_list = get_all_layers(self.output_layer) def filter_function(layer): """ We only display the layers in the list below""" return np.any([ isinstance(layer, layer_type) for layer_type in [ InputLayer, Conv2DLayer, Pool2DLayer, Deconv2DLayer, ConcatLayer ] ]) layer_list = filter(filter_function, layer_list) output_shape_list = map(get_output_shape, layer_list) def layer_name_function(s): return str(s).split('.')[3].split('Layer')[0] if not light: print('-' * 75) print('Warning : all the layers are not displayed \n') print(' {:<15} {:<20} {:<20}'.format('Layer', 'Output shape', 'W shape')) for i, (layer, output_shape) in enumerate( zip(layer_list, output_shape_list)): if hasattr(layer, 'W'): input_shape = layer.W.get_value().shape else: input_shape = '' print('{:<3} {:<15} {:<20} {:<20}'.format( i + 1, layer_name_function(layer), str(output_shape), str(input_shape))) if isinstance(layer, Pool2DLayer) | isinstance( layer, Deconv2DLayer): print('') print('\nNumber of Convolutional layers : {}'.format( len( list( filter( lambda x: isinstance(x, Conv2DLayer) | isinstance( x, Deconv2DLayer), layer_list))))) print('Number of parameters : {}'.format( np.sum(map(np.size, get_all_param_values(self.output_layer))))) print('-' * 75)
def loadParams(epoch, filename=None): print "IMPORTING MODEL PARAMS...", if filename == None: net_filename = MODEL_PATH + "birdCLEF_" + RUN_NAME + "_model_params_epoch_" + str( epoch) + ".pkl" else: net_filename = MODEL_PATH + filename with open(net_filename, 'rb') as f: params = pickle.load(f) if LOAD_OUTPUT_LAYER: l.set_all_param_values(NET, params) else: l.set_all_param_values(l.get_all_layers(NET)[:-1], params[:-2]) print "DONE!"
def get_data(layer_or_layers, seed=4576546): """ Computes the output for simulated data network. Parameters ---------- layer_or_layers : Layer or list the :class:`TransformLayer` instance for which to compute the output data, or a list of :class:`TransformLayer` instances. Returns ------- output : nd-array, nd_array Topo output and y output """ all_layers = get_all_layers(layer_or_layers) # initialize layer-to-output mapping from all input layers # with zeros all_outputs = dict((layer, np.zeros(layer.shape, dtype=np.float32)) for layer in all_layers if isinstance(layer, InputLayer)) rng = RandomState(seed) n_trials = all_layers[0].shape[0] y = np.round(rng.rand(n_trials)).astype(np.int32) # update layer-to-output mapping by propagating the inputs for layer in all_layers: if layer not in all_outputs: try: try: layer_inputs = [ all_outputs[input_layer] for input_layer in layer.input_layers ] except AttributeError: layer_inputs = all_outputs[layer.input_layer] except KeyError: # one of the input_layer attributes must have been `None` raise ValueError("get_output() was called without giving an " "input expression for the free-floating " "layer %r. Please call it with a dictionary " "mapping this layer to an input expression." % layer) outputs = layer.transform(topo=layer_inputs, y=y) all_outputs[layer] = outputs # return the output(s) of the requested layer(s) only try: return [ all_outputs[layer].astype(np.float32) for layer in layer_or_layers ], y except TypeError: return all_outputs[layer_or_layers].astype(np.float32), y
def one_sample(_x, _t): y, y_mu, y_logsigma = self.get_y_mu_sigma(_x) # logP(D|w) _log_pd_given_w = normal2(_t, y, T.log(self.prior_sd**2)).sum() # logq(w) logp(w) _log_qw, _log_pw = 0., 0. layers = get_all_layers(self)[1:] for layer in layers: W = layer.W b = layer.b _log_qw += normal2(W, layer.W_mu, layer.W_logsigma * 2).sum() _log_qw += normal2(b, layer.b_mu, layer.b_logsigma * 2).sum() _log_pw += normal(W, 0., self.prior_sd).sum() _log_pw += normal(b, 0., self.prior_sd).sum() return _log_qw, _log_pw, _log_pd_given_w
def loadPretrained(net): if cfg.MODEL_NAME: # Load saved model n, c = io.loadModel(cfg.MODEL_NAME) # Set params params = l.get_all_param_values(n) if cfg.LOAD_OUTPUT_LAYER: l.set_all_param_values(net, params) else: l.set_all_param_values(l.get_all_layers(net)[:-1], params[:-2]) return net
def one_sample(_x, _t): y, y_mu, y_logsigma = self.get_y_mu_sigma(_x) # logP(D|w) _log_pd_given_w = normal2(_t, y, T.log(self.prior_sd ** 2)).sum() # logq(w) logp(w) _log_qw, _log_pw = 0., 0. layers = get_all_layers(self)[1:] for layer in layers: W = layer.W b = layer.b _log_qw += normal2(W, layer.W_mu, layer.W_logsigma * 2).sum() _log_qw += normal2(b, layer.b_mu, layer.b_logsigma * 2).sum() _log_pw += normal(W, 0., self.prior_sd).sum() _log_pw += normal(b, 0., self.prior_sd).sum() return _log_qw, _log_pw, _log_pd_given_w
def loadModel(filename): print "IMPORTING MODEL PARAMS...", net_filename = MODEL_PATH + filename with open(net_filename, 'rb') as f: data = pickle.load(f) #for training, we only want to load the model params net = data['net'] params = l.get_all_param_values(net) if LOAD_OUTPUT_LAYER: l.set_all_param_values(NET, params) else: l.set_all_param_values(l.get_all_layers(NET)[:-1], params[:-2]) print "DONE!"
def build(layer_heads, params): """""" fns = {} # model methods x = T.tensor4('input') for target in params['targets']: fns[target['name']] = {} out_layer = layer_heads[target['name']] y = T.matrix('target') o = L.get_output(out_layer, inputs=x) o_vl = L.get_output(out_layer, inputs=x, deterministic=True) if 'class_weight' in params and params['class_weight']: loss_fn = partial(weighted_cce, weights=params['class_weight']) else: loss_fn = obj.categorical_crossentropy loss = loss_fn(o, y).mean() loss_vl = loss_fn(o_vl, y).mean() wd_l2 = reg.regularize_network_params(out_layer, reg.l2) wd_l2 *= params['beta'] acc_vl = obj.categorical_accuracy(o_vl, y).mean() updates_ = updates.adam(loss + wd_l2, L.get_all_params(out_layer, trainable=True), learning_rate=params['learning_rate'], epsilon=params['epsilon']) fns[target['name']]['train'] = theano.function( [x, y], updates=updates_, allow_input_downcast=True) fns[target['name']]['predict'] = theano.function( [x], o_vl, allow_input_downcast=True) fns[target['name']]['cost'] = theano.function( [x, y], loss_vl, allow_input_downcast=True) fns[target['name']]['acc'] = theano.function([x, y], acc_vl, allow_input_downcast=True) fns[target['name']]['transform'] = theano.function( [x], L.get_output(L.get_all_layers(layer_heads[target['name']])[-2], inputs=x, deterministic=True), allow_input_downcast=True) return fns, layer_heads
def modify_inception3(input_var, in_channels, channel_transformers_shape, n_classes, freeze_weights): """ load the inception_v3 net, pretrained on Imagenet however, this expects three channels, but we might have a differnet number N of channels hence, put in a first layer that goes from N->3 channels. Just a linear combo of the N channels, i.e. a 1x1 convolution with maybe a nonlinearity channel_transformers_shape: tuple, each entry giving #neurons of a layer, taking the N channels to 3 eventually, e.g (3,) is a single nonlinear transform (64,3) will be Nc -> 64c -> 3c last tuple entry must be 3, as this is the size the inception can handle """ incept_channels = 3 # what the pretrained net can take assert channel_transformers_shape[ -1] == incept_channels, "last transformer layer must have 3 filters to RGB" incept_size = (299, 299) net = build_pretrained_inception_v3() if freeze_weights: print("fixing all pretrained weights") for l in get_all_layers(net['prob']): freeze(l) # replace the old input layer, with one that takes the N channels net['input'] = InputLayer((None, in_channels) + incept_size, input_var=input_var) # add a couple of transformations of the N channels -> 3 channels transformer_layers = create_channel_transformer_layers( net['input'], channel_transformers_shape, name_prefix='lincomb') for tl in transformer_layers: # add them to the dict net[tl.name] = tl # link it into the other layers net['conv_1'].input_layer = transformer_layers[ -1] # conv1 was previously linked to the input, now we put the linCombo in between "replace the top softmax (1000 classes) by a softmax with the approropate classes" # the last layer before the softmax: hidden_layer = net['pool3'] net['softmax'] = DenseLayer(hidden_layer, num_units=n_classes, nonlinearity=softmax, name='softmax') return net
def modify_vgg19(input_var, in_channels, channel_transformers_shape, n_classes, freeze_weights): """ load the VGG19 net, pretrained on Imagenet. however, this expects three channels, but we have N channels hence, put in a first layer that goes from N->3 channels. Just a linear combo of the N channels, i.e. a 1x1 convolution with maybe a nonlinearity channel_transformers_shape: tuple, each entry giving #neurons of a layer, taking the N channels to 3 eventually, e.g (3,) is a single nonlinear transform (64,3) will be N channels -> 64c -> 3c last tuple entry must be 3, as this is the size the vgg can handle """ vgg_channels = 3 # what the pretrained net can take assert channel_transformers_shape[ -1] == vgg_channels, "last transformer layer must have 3 filters to RGB" vgg_size = (224, 224) net = build_pretrained_vgg19() if freeze_weights: print('freezing the pretrained layers') for l in get_all_layers(net['prob']): freeze(l) # replace the old input layer, with one that takes the 34 channels net['input'] = InputLayer((None, in_channels) + vgg_size, input_var=input_var) # add a couple of transformations of the 34 channels -> 3 channels transformer_layers = create_channel_transformer_layers( net['input'], channel_transformers_shape, name_prefix='lincomb') for tl in transformer_layers: # add them to the dict net[tl.name] = tl # link it into the other layers net['conv1_1'].input_layer = transformer_layers[ -1] # conv11 was previously linked to the input, now we put the linCombo in between "replace the top softmax (1000 classes) by a softmax with the approropate classes" net['fc8'] = DenseLayer( net['fc7_dropout'], num_units=n_classes, nonlinearity=None, name='fc8' ) # funny in vgg code: why not put it into a single layer with nonlin net['prob'] = NonlinearityLayer(net['fc8'], softmax, name='prob') return net
def _set_inverse_parameters(self, patterns=None): self.trainable_layers = [ self.inverse_map[l] for l in L.get_all_layers(self.output_layer) if type(l) in [L.Conv2DLayer, L.DenseLayer] ] if patterns is not None: if type(patterns) is list: patterns = patterns[0] for i, layer in enumerate(self.trainable_layers): pattern = patterns['A'][i] if pattern.ndim == 4: pattern = pattern.transpose(1, 0, 2, 3) elif pattern.ndim == 2: pattern = pattern.T layer.W.set_value(pattern) else: print("Patterns not given, explanation is random.")
def build_dist_feat_fnc(net, target, conv_feat_locs=[5, 10, 12, 17, 19], fc_feat_locs=[24, 28]): """""" layers = L.get_all_layers(net[target]) assert len(layers) == 30 # only works for standard deep conv2d feat = [L.GlobalPoolLayer(layers[l]) for l in conv_feat_locs] feat += [layers[l] for l in fc_feat_locs] feat = L.ConcatLayer(feat, axis=1) f = L.get_output(feat, deterministic=True) f_feat = {target: {}} f_feat[target]['transform'] = theano.function([layers[0].input_var], f, allow_input_downcast=True) return f_feat
def test_stack(self): from lasagne.layers import InputLayer, DenseLayer, get_all_layers from itertools import permutations # l1 --> l2 --> l3 l1 = InputLayer((10, 20)) l2 = DenseLayer(l1, 30) l3 = DenseLayer(l2, 40) for count in (0, 1, 2, 3): for query in permutations([l1, l2, l3], count): if l3 in query: expected = [l1, l2, l3] elif l2 in query: expected = [l1, l2] elif l1 in query: expected = [l1] else: expected = [] assert get_all_layers(query) == expected
def build_model_dense(input_shape, input_var): net = {} net['input'] = InputLayer(input_shape, input_var=input_var) net['input'].num_filters = input_shape[1] net['conv1'] = ConvLayer(net['input'], num_filters=256, filter_size=3, nonlinearity=nonlinearities.leaky_rectify, pad='same') net['conv2'] = ConvLayer(net['conv1'], num_filters=256, filter_size=3, nonlinearity=nonlinearities.leaky_rectify, pad='same') net['conv2/reshape'] = ReshapeLayer(net['conv2'], (-1, net['conv2'].output_shape[1] * net['conv2'].output_shape[2])) net['dense'] = dropout(DenseLayer(net['conv2/reshape'], num_units=1024, nonlinearity=nonlinearities.leaky_rectify), 0.5) net['dense/inverse'] = inverse_dense_layer(net['dense'], net['dense'], net['conv2'].output_shape) net['conv2/inverse'] = inverse_convolution_layer(net['dense/inverse'], net['conv2']) net['conv1/inverse'] = inverse_convolution_layer(net['conv2/inverse'], net['conv1']) net['conv0/inverse'] = ConvLayer(net['conv1/inverse'], num_filters=input_shape[1], filter_size=1,nonlinearity=nonlinearities.linear, pad='same') net['prob'] = net['conv0/inverse'] for layer in get_all_layers(net['prob']): print layer print layer.output_shape return net
def summary(output): """Form dataframe from lasagne output.""" layer_info = [] for l in get_all_layers(output): layer_type = l.__class__.__name__ name = l.name shape = l.output_shape params = [p.get_value().shape for p in l.get_params()] params = params if len(params) else None params_total = count_layer_params(l) layer_info.append((layer_type, name, shape, params, params_total)) d = pd.DataFrame( layer_info, columns=['layer_type', 'name', 'shape', 'params', 'params_total']) return pd.DataFrame( layer_info, columns=['layer_type', 'name', 'shape', 'params', 'params_total'])
def example2(): """ Two branches""" # Input l_in = lasagne.layers.InputLayer((100, 1, 20, 20)) # Branch one l_conv1 = lasagne.layers.Conv2DLayer(l_in, num_filters=32, filter_size=(5, 5)) l_pool1 = lasagne.layers.MaxPool2DLayer(l_conv1, pool_size=(2, 2)) l_dense1 = lasagne.layers.DenseLayer(l_pool1, num_units=20) # Branch two l_conv2 = lasagne.layers.Conv2DLayer(l_in, num_filters=32, filter_size=(5, 5)) l_pool2 = lasagne.layers.MaxPool2DLayer(l_conv2, pool_size=(2, 2)) l_dense2 = lasagne.layers.DenseLayer(l_pool2, num_units=20) # Merge l_concat = lasagne.layers.ConcatLayer((l_dense1, l_dense2)) # Output l_out = lasagne.layers.DenseLayer(l_concat, num_units=10) layers = get_all_layers(l_out) print(get_network_str(layers, get_network=False, incomings=True, outgoings=True)) return None
def visualize_conv(nnmodel): train = np.load(PATH) import theano nnmodel._input_layer.input_var = theano.shared( name='input_var', value=np.asarray(train, dtype=theano.config.floatX), borrow=True) lconv = layers.get_all_layers(nnmodel._network)[2] filt = layers.get_output(lconv).eval() # # print np.max(filt[:,6,:,:]),np.min(filt[:,6,:,:]) filt = filt[500, :] kfilt = filt import matplotlib.gridspec as gridspec # kfilt = np.load('filt.npy') idx = range(0, 30, 3) for c, j in enumerate(idx): filt = kfilt[j:j + 3, :] fig = plt.figure(figsize=(12, 12), frameon=True) gs = gridspec.GridSpec(1, 3, wspace=0.1, hspace=0.5) # fig.add_subplot(1,3,1) # map_contour(train[500,0:4096].reshape(64,64),'500hPa Geopotential Height') # fig.add_subplot(1,3,2) # map_contour(train[500,4096:8192].reshape(64,64),'700hPa Geopotential Height') # fig.add_subplot(1,3,3) # map_contour(train[500,8192:12288].reshape(64,64),'900hPa Geopotential Height') # plt.show() # exit(-1) for i in range(3): print i # fig.add_subplot(1,3,i+1) sub = plt.subplot(gs[i]) plt.axis('off') sub.set_xticklabels([]) sub.set_adjustable('box-forced') sub.set_yticklabels([]) cfilter = filt[i, :] cfilter = scipy.misc.imresize(cfilter, (64, 64)) map_contour(cfilter) # plt.show() fig.savefig("out_conv2_" + str(c) + ".png", bbox_inches='tight', pad_inches=0)
def nll_l2(predictions, targets, net, batch_size, num_samples, rw=None, train_clip=False, thresh=3, weight_decay=0.00001, **kwargs): if rw is None: rw = theano.shared(np.cast[theano.config.floatX](0)) print('Weight decay:', weight_decay) loss = categorical_crossentropy(predictions, targets).mean() loss += rg.regularize_layer_params(ll.get_all_layers(net), rg.l2) * weight_decay return loss, rw
def build_model_small(input_shape, input_var): net = {} net['input'] = InputLayer(input_shape, input_var=input_var) net['input'].num_filters = input_shape[1] net['conv1'] = ConvLayer(net['input'], num_filters=256, filter_size=11, nonlinearity=nonlinearities.leaky_rectify, pad='same') net['conv2'] = ConvLayer(net['conv1'], num_filters=256, filter_size=7, nonlinearity=nonlinearities.leaky_rectify, pad='same') net['conv3'] = ConvLayer(net['conv2'], num_filters=396, filter_size=5, nonlinearity=nonlinearities.leaky_rectify, pad='same') net['conv4'] = ConvLayer(net['conv3'], num_filters=512, filter_size=3, nonlinearity=nonlinearities.leaky_rectify, pad='same') net['conv5'] = ConvLayer(net['conv4'], num_filters=1024, filter_size=1, nonlinearity=nonlinearities.leaky_rectify,pad='same') net['conv5/inverse'] = inverse_convolution_layer(net['conv5'], net['conv5']) net['conv4/inverse'] = inverse_convolution_layer(net['conv5/inverse'], net['conv4']) net['conv3/inverse'] = inverse_convolution_layer(net['conv4/inverse'], net['conv3']) net['conv2/inverse'] = inverse_convolution_layer(net['conv3/inverse'], net['conv2']) net['conv1/inverse'] = inverse_convolution_layer(net['conv2/inverse'], net['conv1']) net['conv0/inverse'] = ConvLayer(net['conv1/inverse'], num_filters=input_shape[1], filter_size=1,nonlinearity=nonlinearities.linear, pad='same') net['prob'] = net['conv0/inverse'] for layer in get_all_layers(net['prob']): print layer print layer.output_shape return net
def draw_to_notebook(layer_or_layers, node_creator=default_create, **kwargs): """Draws a network diagram in an IPython notebook. Parameters ---------- layer_or_layers : one :class:`Layer` instance or a list of layers Either a list of layers or the model in form of the last layer. node_creator : callable A function that creates a :class:`Node` for a given layer. kwargs : keyword arguments Those will be passed to ``pydot_graph``, ``node_creator`` and later to :class:`Node`. """ from IPython.display import Image if isinstance(layer_or_layers, Layer): layers = get_all_layers(layer_or_layers) else: layers = layer_or_layers dot = pydot_graph(layers, node_creator=node_creator, **kwargs) return Image(dot.create_png())
def make_model_graph(layers): graph = pydot.Dot('model', splines='line', outputorder='edgesfirst', ranksep=2, nodesep=2) clusters = OrderedDict() for layer in ll.get_all_layers(layers): assert layer.name is not None cluster_name = get_cluster_name(layer) if cluster_name is not None: try: cluster = clusters[cluster_name] except KeyError: clusters[cluster_name] = pydot.Cluster(cluster_name, label=cluster_name, style='filled', color='lightgrey') cluster = clusters[cluster_name] cluster.add_node(layer_to_node(layer)) else: graph.add_node(layer_to_node(layer)) for input_layer in get_input_layers(layer): input_cluster_name = get_cluster_name(input_layer) if cluster_name is not None and input_cluster_name is not None and \ cluster_name == input_cluster_name: cluster.add_edge(layers_to_edge(input_layer, layer)) else: edge = layers_to_edge(input_layer, layer) edge.set_constraint(False) # pylint: disable=no-member edge.set_style('dashed') # pylint: disable=no-member edge.set_color('dimgrey') # pylint: disable=no-member edge.set_headlabel(input_layer.name) # pylint: disable=no-member edge.set_fontcolor('dimgrey') # pylint: disable=no-member cluster.add_edge(edge) for cluster in clusters.itervalues(): graph.add_subgraph(cluster) return graph
def save_activations(self): if not self.do_save_activations: return filename = self.experiment_name + "_activations.hdf5" mode = 'w' if self.n_iterations() == 0 else 'a' f = h5py.File(filename, mode=mode) epoch_name = 'epoch{:06d}'.format(self.n_iterations()) try: epoch_group = f.create_group(epoch_name) except ValueError: self.logger.exception("Cannot save params!") f.close() return layers = get_all_layers(self.layers[-1]) for layer_i, layer in enumerate(layers): # We only care about layers with params if not (layer.get_params() or isinstance(layer, FeaturePoolLayer)): continue output = lasagne.layers.get_output(layer, self.X_val).eval() n_features = output.shape[-1] seq_length = int(output.shape[0] / self.source.n_seq_per_batch) if isinstance(layer, DenseLayer): shape = (self.source.n_seq_per_batch, seq_length, n_features) output = output.reshape(shape) elif isinstance(layer, Conv1DLayer): output = output.transpose(0, 2, 1) layer_name = 'L{:02d}_{}'.format(layer_i, layer.__class__.__name__) epoch_group.create_dataset( layer_name, data=output, compression="gzip") # save validation data if self.n_iterations() == 0: f.create_dataset( 'validation_data', data=self.X_val, compression="gzip") f.close()