def _execute(self): global num_superpixels num_output_features = self.num_output_features idxs = self.idxs top = self.top bottom = self.bottom left = self.left right = self.right save_path = self.save_path batch_size = self.batch_size dataset_family = self.dataset_family which_set = self.which_set model = self.model size = self.size nan = 0 dataset_descriptor = dataset_family[which_set][size] dataset = dataset_descriptor.dataset_maker() expected_num_examples = dataset_descriptor.num_examples full_X = dataset.get_design_matrix() num_examples = full_X.shape[0] assert num_examples == expected_num_examples if self.restrict is not None: assert self.restrict[1] <= full_X.shape[0] print('restricting to examples ', self.restrict[0], ' through ', self.restrict[1], ' exclusive') full_X = full_X[self.restrict[0]:self.restrict[1], :] assert self.restrict[1] > self.restrict[0] #update for after restriction num_examples = full_X.shape[0] assert num_examples > 0 dataset.X = None dataset.design_loc = None dataset.compress = False patchifier = ExtractGridPatches(patch_shape=(size, size), patch_stride=(1, 1)) pipeline = serial.load(dataset_descriptor.pipeline_path) assert isinstance(pipeline.items[0], ExtractPatches) pipeline.items[0] = patchifier print('defining features') V = T.matrix('V') mu = model.mu feat = triangle_code(V, mu) assert feat.dtype == 'float32' print('compiling theano function') f = function([V], feat) nhid = model.mu.get_value().shape[0] if config.device.startswith('gpu') and nhid >= 4000: f = halver(f, model.nhid) topo_feat_var = T.TensorType(broadcastable=(False, False, False, False), dtype='float32')() if self.pool_mode == 'mean': region_features = function([topo_feat_var], topo_feat_var.mean(axis=(1, 2))) elif self.pool_mode == 'max': region_features = function([topo_feat_var], topo_feat_var.max(axis=(1, 2))) else: assert False def average_pool(stride): def point(p): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3]), dtype='float32') for i in xrange(stride): for j in xrange(stride): rval[:, i, j, :] = region_features( topo_feat[:, point(i):point(i + 1), point(j):point(j + 1), :]) return rval output = np.zeros((num_examples, num_output_features), dtype='float32') fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'), view_converter=DefaultViewConverter( [1, 1, nhid])) ns = 32 - size + 1 depatchifier = ReassembleGridPatches(orig_shape=(ns, ns), patch_shape=(1, 1)) if len(range(0, num_examples - batch_size + 1, batch_size)) <= 0: print(num_examples) print(batch_size) for i in xrange(0, num_examples - batch_size + 1, batch_size): print(i) t1 = time.time() d = copy.copy(dataset) d.set_design_matrix(full_X[i:i + batch_size, :]) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit=False) X2 = d.get_design_matrix() t3 = time.time() #print '\trunning theano function' feat = f(X2) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if contains_nan(feat): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling superpixels = average_pool(num_superpixels) assert batch_size == 1 if self.pool_mode == 'mean': for j in xrange(num_output_features): output[i:i + batch_size, j] = superpixels[:, top[j]:bottom[j] + 1, left[j]:right[j] + 1, idxs[j]].mean() elif self.pool_mode == 'max': for j in xrange(num_output_features): output[i:i + batch_size, j] = superpixels[:, top[j]:bottom[j] + 1, left[j]:right[j] + 1, idxs[j]].max() else: assert False assert output[i:i + batch_size, :].max() < 1e20 t6 = time.time() print((t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5)) if self.chunk_size is not None: assert save_path.endswith('.npy') save_path_pieces = save_path.split('.npy') assert len(save_path_pieces) == 2 assert save_path_pieces[1] == '' save_path = save_path_pieces[0] + '_' + chr( ord('A') + self.chunk_id) + '.npy' np.save(save_path, output) if nan > 0: warnings.warn(str(nan) + ' features were nan')
def train_cnn(lambda_l2, dropout1, dropout2, h1_neurons, h2_neurons, es_patience, batch_size, X_train, X_train_eyes, X_train_headpose, y_train, X_valid, X_valid_eyes, X_valid_headpose, y_valid, use_headpose, use_eyes, best_weights, use_pretrained_model=False): #describes network architecture dataset = { 'train': { 'X': X_train, 'eyes': X_train_eyes, 'headpose': X_train_headpose, 'y': y_train }, 'valid': { 'X': X_valid, 'eyes': X_valid_eyes, 'headpose': X_valid_headpose, 'y': y_valid } } input_shape = dataset['train']['X'][0].shape l_in = lasagne.layers.InputLayer(shape=(None, input_shape[0], input_shape[1], input_shape[2]), ) l_conv1 = lasagne.layers.Conv2DLayer( l_in, num_filters=16, filter_size=(3, 3), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotNormal(gain='relu')) l_pool1 = lasagne.layers.MaxPool2DLayer(l_conv1, pool_size=(2, 2)) l_conv2 = lasagne.layers.Conv2DLayer( l_pool1, num_filters=32, filter_size=(2, 2), nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotNormal(gain='relu')) l_pool2 = lasagne.layers.MaxPool2DLayer(l_conv2, pool_size=(2, 2)) l_pool2_dropout = lasagne.layers.DropoutLayer(l_pool2, p=dropout1) eyes_shape = dataset['train']['eyes'][0].shape l_in_eyes = lasagne.layers.InputLayer(shape=(None, eyes_shape[0])) headpose_shape = dataset['train']['headpose'][0].shape l_in_headpose = lasagne.layers.InputLayer(shape=(None, headpose_shape[0])) #concatenates eye and/or headpose information to the net if (use_eyes and use_headpose): l_pool2_dropout_reshaped = lasagne.layers.ReshapeLayer( l_pool2_dropout, (-1, (lasagne.layers.get_output_shape(l_pool2_dropout))[1] * (lasagne.layers.get_output_shape(l_pool2_dropout))[2] * (lasagne.layers.get_output_shape(l_pool2_dropout))[3])) l_concat = lasagne.layers.ConcatLayer( [l_pool2_dropout_reshaped, l_in_eyes, l_in_headpose], axis=1) elif use_eyes: l_pool2_dropout_reshaped = lasagne.layers.ReshapeLayer( l_pool2_dropout, (-1, (lasagne.layers.get_output_shape(l_pool2_dropout))[1] * (lasagne.layers.get_output_shape(l_pool2_dropout))[2] * (lasagne.layers.get_output_shape(l_pool2_dropout))[3])) l_concat = lasagne.layers.ConcatLayer( [l_pool2_dropout_reshaped, l_in_eyes], axis=1) elif use_headpose: l_pool2_dropout_reshaped = lasagne.layers.ReshapeLayer( l_pool2_dropout, (-1, (lasagne.layers.get_output_shape(l_pool2_dropout))[1] * (lasagne.layers.get_output_shape(l_pool2_dropout))[2] * (lasagne.layers.get_output_shape(l_pool2_dropout))[3])) l_concat = lasagne.layers.ConcatLayer( [l_pool2_dropout_reshaped, l_in_headpose], axis=1) else: l_concat = l_pool2_dropout l_hidden1 = lasagne.layers.DenseLayer( l_concat, num_units=h1_neurons, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotNormal(gain='relu')) l_hidden1_dropout = lasagne.layers.DropoutLayer(l_hidden1, dropout2) l_hidden2 = lasagne.layers.DenseLayer( l_hidden1_dropout, num_units=h2_neurons, nonlinearity=lasagne.nonlinearities.rectify, W=lasagne.init.GlorotNormal(gain='relu')) #output units are the x and y angles of the gaze l_output = lasagne.layers.DenseLayer( l_hidden2, num_units=2, nonlinearity=lasagne.nonlinearities.identity) #logs structure of the net with open("results.txt", "a") as myfile: myfile.write("**********" + '\n') myfile.write("net structure:\n" + str( lasagne.layers.get_output_shape( lasagne.layers.get_all_layers(l_output))) + '\n') myfile.write("**********" + '\n') #print out the shape of the net print lasagne.layers.get_output_shape( lasagne.layers.get_all_layers(l_output)) #theano uses symbolic variables to store and process data net_output = lasagne.layers.get_output(l_output) true_output = (T.TensorType(theano.config.floatX, (False, False)))('true_output') loss = T.mean(lasagne.objectives.squared_error(net_output, true_output)) #computes training and validation loss according as squared error of the net's output and the true output loss_train = T.mean( lasagne.objectives.squared_error( lasagne.layers.get_output(l_output, deterministic=False), true_output)) loss_eval = T.mean( lasagne.objectives.squared_error( lasagne.layers.get_output(l_output, deterministic=True), true_output)) #adds l2 regularization to the loss function loss_regularization = lasagne.regularization.regularize_network_params( l_output, lasagne.regularization.l2) params = lasagne.layers.get_all_params(l_output, trainable=True) loss_train = loss_train + lambda_l2 * loss_regularization #adamax updates weights of the network with the help of the loss function updates = lasagne.updates.adamax(loss_train, params) #warn instaed of giving error in case of unused import, because headpose and eye information can be omitted by design train = theano.function([ l_in.input_var, l_in_eyes.input_var, l_in_headpose.input_var, true_output ], loss_train, updates=updates, on_unused_input='warn') get_output = theano.function( [l_in.input_var, l_in_eyes.input_var, l_in_headpose.input_var], lasagne.layers.get_output(l_output, deterministic=True), on_unused_input='warn') BATCH_SIZE = batch_size N_EPOCHS = np.inf batch_idx = 0 epoch = 0 train_mean_errors = [] train_rmses = [] valid_mean_errors = [] valid_rmses = [] patience = es_patience best_valid_rmse = np.inf best_valid_mean_error = np.inf best_valid_epoch = 0 #train model with batch gradient descent until early stopping decides to end it #print out progress during training while epoch < N_EPOCHS: train(dataset['train']['X'][batch_idx:batch_idx + BATCH_SIZE], dataset['train']['eyes'][batch_idx:batch_idx + BATCH_SIZE], dataset['train']['headpose'][batch_idx:batch_idx + BATCH_SIZE], dataset['train']['y'][batch_idx:batch_idx + BATCH_SIZE]) batch_idx += BATCH_SIZE if batch_idx >= dataset['train']['X'].shape[0]: batch_idx = 0 epoch += 1 if use_pretrained_model and epoch == 1: lasagne.layers.set_all_param_values(l_output, best_weights) val_predictions = get_output(dataset['valid']['X'], dataset['valid']['eyes'], dataset['valid']['headpose']) train_predictions = get_output(dataset['train']['X'], dataset['train']['eyes'], dataset['train']['headpose']) train_mean_error = degrees( mean_absolute_error(dataset['train']['y'], train_predictions)) print("Epoch {} training accuracy (mean error in degrees): {}". format(epoch, train_mean_error)) valid_mean_error = degrees( mean_absolute_error(dataset['valid']['y'], val_predictions)) print("Epoch {} validation accuracy (mean error in degrees): {}". format(epoch, valid_mean_error)) train_mean_errors.append(train_mean_error) valid_mean_errors.append(valid_mean_error) train_rmse = degrees( sqrt( mean_squared_error(dataset['train']['y'], train_predictions))) print("Epoch {} training accuracy (RMSE in degrees): {}".format( epoch, train_rmse)) valid_rmse = degrees( sqrt(mean_squared_error(dataset['valid']['y'], val_predictions))) if valid_rmse < best_valid_rmse: print("Epoch {} validation accuracy (RMSE in degrees): {}". format(epoch, colored(valid_rmse, 'green'))) else: print("Epoch {} validation accuracy (RMSE in degrees): {}". format(epoch, colored(valid_rmse, 'red'))) train_rmses.append(train_rmse) valid_rmses.append(valid_rmse) if valid_rmse < best_valid_rmse: best_valid_rmse = valid_rmse best_valid_mean_error = valid_mean_error best_valid_epoch = epoch best_weights = lasagne.layers.get_all_param_values(l_output) best_val_predictions = val_predictions elif best_valid_epoch + patience <= epoch: print(colored("Early stopping.", 'blue')) print( colored( "Best valid rmse was " + str(best_valid_rmse) + " at epoch " + str(best_valid_epoch), 'blue')) print( colored( "Best valid mean error was " + str(best_valid_mean_error) + " at epoch " + str(best_valid_epoch), 'blue')) lasagne.layers.set_all_param_values(l_output, best_weights) break train_losses.append(train_rmse) valid_losses.append(valid_rmse) best_valid_loss = best_valid_rmse return best_val_predictions, train_mean_errors, valid_mean_errors, train_rmses, valid_rmses, best_valid_rmse, best_valid_mean_error, best_weights
def TensorType(dtype, shape): return tt.TensorType(str(dtype), np.atleast_1d(shape) == 1)
def multinomial(random_state, size=None, n=1, pvals=[0.5, 0.5], ndim=None, dtype='int64'): """ Sample from one or more multinomial distributions defined by one-dimensional slices in pvals. Parameters ---------- pvals A tensor of shape "nmulti+(L,)" describing each multinomial distribution. This tensor must have the property that numpy.allclose(pvals.sum(axis=-1), 1) is true. size A vector of shape information for the output; this can also specify the "nmulti" part of pvals' shape. A -1 in the k'th position from the right means to borrow the k'th position from the right in nmulti. (See examples below.) Default ``None`` means size=nmulti. n The number of experiments to simulate for each multinomial. This can be a scalar, or tensor, it will be broadcasted to have shape "nmulti". dtype The dtype of the return value (which will represent counts) Returns ------- tensor Tensor of len(size)+1 dimensions, and shape[-1]==L, with the specified ``dtype``, with the experiment counts. See examples to understand the shape of the return value, which is derived from both size and pvals.shape. In return value rval, "numpy.allclose(rval.sum(axis=-1), n)" will be true. Extended Summary ---------------- For example, to simulate n experiments from each multinomial in a batch of size B: size=None, pvals.shape=(B,L) --> rval.shape=[B,L] rval[i,j] is the count of possibility j in the i'th distribution (row) in pvals. Using size: size=(1,-1), pvals.shape=(A,B,L) --> rval.shape=[1,B,L], and requires that A==1. rval[k,i,j] is the count of possibility j in the distribution specified by pvals[k,i]. Using size for broadcasting of pvals: size=(10, 1, -1), pvals.shape=(A, B, L) --> rval.shape=[10,1,B,L], and requires that A==1. rval[l,k,i,j] is the count of possibility j in the distribution specified by pvals[k,i], in the l'th of 10 draws. """ n = tensor.as_tensor_variable(n) pvals = tensor.as_tensor_variable(pvals) # until ellipsis is implemented (argh) tmp = pvals.T[0].T ndim, size, bcast = _infer_ndim_bcast(ndim, size, n, tmp) bcast = bcast + (pvals.type.broadcastable[-1], ) op = RandomFunction(multinomial_helper, tensor.TensorType(dtype=dtype, broadcastable=bcast), ndim_added=1) return op(random_state, size, n, pvals)
def make_node(self, x): if not isinstance(x.type, GpuArrayType): raise TypeError(x) return Apply(self, [x], [tensor.TensorType(dtype=x.dtype, broadcastable=x.broadcastable)()])
def _execute(self): batch_size = self.batch_size feature_type = self.feature_type pooling_region_counts = self.pooling_region_counts dataset_family = self.dataset_family which_set = self.which_set model = self.model size = self.size nan = 0 dataset_descriptor = dataset_family[which_set][size] dataset = dataset_descriptor.dataset_maker() expected_num_examples = dataset_descriptor.num_examples full_X = dataset.get_design_matrix() assert full_X.dtype == 'float32' num_examples = full_X.shape[0] assert num_examples == expected_num_examples if self.restrict is not None: assert self.restrict[1] <= full_X.shape[0] print 'restricting to examples ', self.restrict[ 0], ' through ', self.restrict[1], ' exclusive' full_X = full_X[self.restrict[0]:self.restrict[1], :] assert self.restrict[1] > self.restrict[0] #update for after restriction num_examples = full_X.shape[0] assert num_examples > 0 dataset.X = None dataset.design_loc = None dataset.compress = False patchifier = ExtractGridPatches(patch_shape=(size, size), patch_stride=(1, 1)) pipeline = serial.load(dataset_descriptor.pipeline_path) assert isinstance(pipeline.items[0], ExtractPatches) pipeline.items[0] = patchifier print 'defining features' V = T.matrix('V') assert V.type.dtype == 'float32' model.make_pseudoparams() d = model.e_step.variational_inference(V=V) H = d['H_hat'] Mu1 = d['S_hat'] assert H.dtype == 'float32' assert Mu1.dtype == 'float32' nfeat = model.nhid if self.feature_type == 'map_hs': feat = (H > 0.5) * Mu1 elif self.feature_type == 'map_h': feat = T.cast(H > 0.5, dtype='float32') elif self.feature_type == 'exp_hs': feat = H * Mu1 elif self.feature_type == 'exp_hs_split': Z = H * Mu1 pos = T.clip(Z, 0., 1e32) neg = T.clip(-Z, 0, 1e32) feat = T.concatenate((pos, neg), axis=1) nfeat *= 2 elif self.feature_type == 'exp_h': feat = H elif self.feature_type == 'exp_h_thresh': feat = H * (H > .01) else: raise NotImplementedError() assert feat.dtype == 'float32' print 'compiling theano function' f = function([V], feat) if config.device.startswith('gpu') and nfeat >= 4000: f = halver(f, nfeat) topo_feat_var = T.TensorType(broadcastable=(False, False, False, False), dtype='float32')() if self.pool_mode == 'mean': region_feat_var = topo_feat_var.mean(axis=(1, 2)) elif self.pool_mode == 'max': region_feat_var = topo_feat_var.max(axis=(1, 2)) else: raise ValueError("Unknown pool mode: " + self.pool_mode) region_features = function([topo_feat_var], region_feat_var) def average_pool(stride): def point(p): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3]), dtype='float32') for i in xrange(stride): for j in xrange(stride): rval[:, i, j, :] = region_features( topo_feat[:, point(i):point(i + 1), point(j):point(j + 1), :]) return rval outputs = [ np.zeros((num_examples, count, count, nfeat), dtype='float32') for count in pooling_region_counts ] assert len(outputs) > 0 fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'), view_converter=DefaultViewConverter( [1, 1, nfeat])) ns = 32 - size + 1 depatchifier = ReassembleGridPatches(orig_shape=(ns, ns), patch_shape=(1, 1)) if len(range(0, num_examples - batch_size + 1, batch_size)) <= 0: print num_examples print batch_size for i in xrange(0, num_examples - batch_size + 1, batch_size): print i t1 = time.time() d = copy.copy(dataset) d.set_design_matrix(full_X[i:i + batch_size, :]) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit=False) X2 = np.cast['float32'](d.get_design_matrix()) t3 = time.time() #print '\trunning theano function' feat = f(X2) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if np.any(np.isnan(feat)): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling for output, count in zip(outputs, pooling_region_counts): output[i:i + batch_size, ...] = average_pool(count) t6 = time.time() print(t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5) for output, save_path in zip(outputs, self.save_paths): if self.chunk_size is not None: assert save_path.endswith('.npy') save_path_pieces = save_path.split('.npy') assert len(save_path_pieces) == 2 assert save_path_pieces[1] == '' save_path = save_path_pieces[0] + '_' + chr( ord('A') + self.chunk_id) + '.npy' np.save(save_path, output) if nan > 0: warnings.warn(str(nan) + ' features were nan')
def make_node(self, x): x = theano.tensor.as_tensor_variable(x) assert x.ndim == 2 o = T.TensorType(dtype='int8', broadcastable=[])() return theano.Apply(self, [x], [o])
def setUp(self): super(TestConv3D, self).setUp() utt.seed_rng() self.rng = N.random.RandomState(utt.fetch_seed()) mode = copy.copy(theano.compile.mode.get_default_mode()) mode.check_py_code = False self.W = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX)) self.W.name = 'W' self.b = shared(N.zeros(1, dtype=floatX)) self.b.name = 'b' self.rb = shared(N.zeros(1, dtype=floatX)) self.rb.name = 'rb' self.V = shared(N.ndarray(shape=(1, 1, 1, 1, 1), dtype=floatX)) self.V.name = 'V' self.d = shared(N.ndarray(shape=(3, ), dtype=int)) self.d.name = 'd' self.H = conv3D(self.V, self.W, self.b, self.d) self.H.name = 'H' self.H_func = function([], self.H, mode=mode) self.H_shape_func = function([], self.H.shape, mode=mode) self.RShape = T.vector(dtype='int64') self.RShape.name = 'RShape' self.otherH = T.TensorType(floatX, (False, False, False, False, False))(name='otherH') self.transp = convTransp3D(self.W, self.rb, self.d, self.otherH, self.RShape) self.transp.name = 'transp' self.transp_func = function([self.otherH, self.RShape], self.transp, mode=mode) self.R = convTransp3D(self.W, self.rb, self.d, self.H, self.RShape) self.R.name = 'R' self.R_func = function([self.RShape], self.R, mode=mode) self.R_shape_func = function([self.RShape], self.R.shape) diff = self.V - self.R diff.name = 'diff' sqr = T.sqr(diff) sqr.name = 'sqr' self.reconsObj = T.sum(sqr) self.reconsObj.name = 'reconsObj' self.reconsObjFunc = function([self.RShape], self.reconsObj, mode=mode) W_grad = T.grad(self.reconsObj, self.W) self.gradientsFunc = function([self.RShape], [W_grad, T.grad(self.reconsObj, self.H), T.grad(self.reconsObj, self.V), T.grad(self.reconsObj, self.b)], mode=mode) self.check_c_against_python = function([self.RShape], [T.grad(self.reconsObj, self.W), T.grad(self.reconsObj, self.H), T.grad(self.reconsObj, self.V), T.grad(self.reconsObj, self.b)], mode='DEBUG_MODE') self.dCdW_shape_func = function([self.RShape], T.grad(self.reconsObj, self.W).shape, mode=mode)
serial.mkdir(outdir) paths = os.listdir(base) if len(paths) != expected_num_images: raise AssertionError("Something is wrong with your " + base \ + "directory. It should contain " + str(expected_num_images) + \ " image files, but contains " + str(len(paths))) kernel_shape = 7 from theano import tensor as T from pylearn2.utils import sharedX from pylearn2.datasets.preprocessing import gaussian_filter from theano.tensor.nnet import conv2d X = T.TensorType(dtype='float32', broadcastable=(True, False, False, True))() from theano import config if config.compute_test_value == 'raise': X.tag.test_value = np.zeros((1, 32, 32, 1), dtype=X.dtype) orig_X = X filter_shape = (1, 1, kernel_shape, kernel_shape) filters = sharedX(gaussian_filter(kernel_shape).reshape(filter_shape)) X = X.dimshuffle(0, 3, 1, 2) convout = conv2d(X, filters=filters, border_mode='full') # For each pixel, remove mean of 9x9 neighborhood mid = int(np.floor(kernel_shape / 2.)) centered_X = X - convout[:, :, mid:-mid, mid:-mid]
def __init__(self, n_in=None, n_out=None, base_network=None, data_map=None, data_map_i=None, shared_params_network=None, mask=None, sparse_input=False, target='classes', train_flag=False, eval_flag=False): """ :param int n_in: input dim of the network :param dict[str,(int,int)] n_out: output dim of the network. first int is num classes, second int is 1 if it is sparse, i.e. we will get the indices. :param dict[str,theano.Variable] data_map: if specified, this will be used for x/y (and it expects data_map_i) :param dict[str,theano.Variable] data_map_i: if specified, this will be used for i/j :param LayerNetwork|None base_network: optional base network where we will derive x/y/i/j/n_in/n_out from. data_map will have precedence over base_network. :param LayerNetwork|()->LayerNetwork|None shared_params_network: optional network where we will share params with. we will error if there is a param which cannot be shared. :param str mask: e.g. "unity" or None ("dropout") :param bool sparse_input: for SourceLayer :param str target: default target :param bool train_flag: marks that we are used for training :param bool eval_flag: marks that we are used for evaluation """ if n_out is None: assert base_network is not None n_out = base_network.n_out else: assert n_out is not None n_out = n_out.copy() if n_in is None: assert "data" in n_out n_in = n_out["data"][0] if "data" not in n_out: data_dim = 3 n_out["data"] = (n_in, data_dim - 1) # small hack: support input-data as target else: assert 1 <= n_out["data"][1] <= 2 # maybe obsolete check... data_dim = n_out["data"][1] + 1 # one more because of batch-dim if data_map is not None: assert data_map_i is not None self.y = data_map self.x = data_map["data"] self.j = data_map_i self.i = data_map_i["data"] elif base_network is not None: self.x = base_network.x self.y = base_network.y self.i = base_network.i self.j = base_network.j else: dtype = "float32" if data_dim >= 3 else "int32" self.x = T.TensorType(dtype, ((False,) * data_dim))('x') self.y = {"data": self.x} self.i = T.bmatrix('i'); """ :type: theano.Variable """ self.j = {"data": self.i} if base_network is not None: self.epoch = base_network.epoch self.tags = base_network.tags else: self.epoch = T.constant(0, name="epoch", dtype="int32") self.tags = T.bmatrix('tags') self.constraints = {} self.total_constraints = T.constant(0) Layer.initialize_rng() self.n_in = n_in self.n_out = n_out self.hidden = {}; """ :type: dict[str,ForwardLayer|RecurrentLayer] """ self.train_params_vars = []; """ :type: list[theano.compile.sharedvalue.SharedVariable] """ self.description = None; """ :type: LayerNetworkDescription | None """ self.train_param_args = None; """ :type: dict[str] """ self.recurrent = False # any of the from_...() functions will set this self.default_mask = mask self.sparse_input = sparse_input self.default_target = target self.train_flag = train_flag self.eval_flag = eval_flag self.output = {}; " :type: dict[str,FramewiseOutputLayer] " self.known_grads = {}; " :type: dict[theano.Variable,theano.Variable]" self.json_content = "{}" self.costs = {} self.total_cost = T.constant(0) self.objective = None self.update_step = 0 self.errors = {} self.loss = None self.ctc_priors = None self.calc_step_base = None self.calc_steps = [] self.base_network = base_network self.shared_params_network = shared_params_network
def main(): # Load the dataset print("Loading data...") data, labels = load_data(filename) mat = scipy.io.loadmat(subjectsFilename, mat_dtype=True) subjNumbers = np.squeeze(mat['subjectNum']) # subject IDs for each trial # Create folds based on subject numbers (for leave-subject-out x-validation) fold_pairs = [] if augment: # Aggregate augmented data and labels data_aug, labels_aug = load_data(filename_aug) data = np.concatenate((data, data_aug), axis=1) labels = np.vstack((labels, labels_aug)) # Leave-Subject-Out cross validation for i in np.unique(subjNumbers): ts = subjNumbers == i tr = np.squeeze(np.nonzero(np.bitwise_not(ts))) # Training indices ts = np.squeeze(np.nonzero(ts)) # Include augmented training data tr = np.concatenate((tr, tr+subjNumbers.size)) np.random.shuffle(tr) # Shuffle indices np.random.shuffle(ts) fold_pairs.append((tr, ts)) else: # Leave-Subject-Out cross validation for i in np.unique(subjNumbers): ts = subjNumbers == i tr = np.squeeze(np.nonzero(np.bitwise_not(ts))) ts = np.squeeze(np.nonzero(ts)) np.random.shuffle(tr) # Shuffle indices np.random.shuffle(ts) fold_pairs.append((tr, ts)) # Initializing output variables validScores, testScores = [], [] trainLoss = np.zeros((len(fold_pairs), num_epochs)) validLoss = np.zeros((len(fold_pairs), num_epochs)) validEpochAccu = np.zeros((len(fold_pairs), num_epochs)) # fold_pairs[:1] for foldNum, fold in enumerate(fold_pairs): print('Beginning fold {0} out of {1}'.format(foldNum+1, len(fold_pairs))) # Divide the dataset into train, validation and test sets (X_train, y_train), (X_val, y_val), (X_test, y_test) = reformatInput(data, labels, fold) X_train = X_train.astype("float32", casting='unsafe') X_val = X_val.astype("float32", casting='unsafe') X_test = X_test.astype("float32", casting='unsafe') # trainMeans = [np.mean(X_train[:, :, i, :, :].flatten()) for i in range(X_train.shape[2])] # trainStds = [np.std(X_train[:, :, i, :, :].flatten()) for i in range(X_train.shape[2])] # for i in range(len(trainMeans)): # X_train[:, :, i, :, :] = (X_train[:, :, i, :, :] - trainMeans[i]) / trainStds[i] # X_val[:, :, i, :, :] = (X_val[:, :, i, :, :] - trainMeans[i]) / trainStds[i] # X_test[:, :, i, :, :] = (X_test[:, :, i, :, :] - trainMeans[i]) / trainStds[i] # X_train = X_train / np.float32(256) # X_val = X_val / np.float32(256) # X_test = X_test / np.float32(256) # Prepare Theano variables for inputs and targets input_var = T.TensorType('floatX', ((False,) * 5))() # Notice the () at the end target_var = T.ivector('targets') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") # Building the appropriate model if model == '1dconv': network = build_convpool_conv1d(input_var) elif model == 'maxpool': network = build_convpool_max(input_var) elif model == 'lstm': network = build_convpool_lstm(input_var) elif model == 'mix': network = build_convpool_mix(input_var) # Initialize parameters with previously saved ones. if init_pars: with np.load('weigths_lasg{0}.npz'.format(foldNum)) as f: # Extract CNN parameters only (not the FC layers) param_values = [f['arr_%d' % i] for i in range(14)] layers = lasagne.layers.get_all_layers(network) lasagne.layers.set_all_param_values(layers[83], param_values) # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use Stochastic Gradient # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more. params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.adam(loss, params, learning_rate=0.001) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) # Finally, launch the training loop. print("Starting training...") best_validation_accu = 0 # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=False): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 av_train_err = train_err / train_batches av_val_err = val_err / val_batches av_val_acc = val_acc / val_batches # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(av_train_err)) print(" validation loss:\t\t{:.6f}".format(av_val_err)) print(" validation accuracy:\t\t{:.2f} %".format(av_val_acc * 100)) trainLoss[foldNum, epoch] = av_train_err validLoss[foldNum, epoch] = av_val_err validEpochAccu[foldNum, epoch] = av_val_acc * 100 if av_val_acc > best_validation_accu: best_validation_accu = av_val_acc # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 av_test_err = test_err / test_batches av_test_acc = test_acc / test_batches print("Final results:") print(" test loss:\t\t\t{:.6f}".format(av_test_err)) print(" test accuracy:\t\t{:.2f} %".format(av_test_acc * 100)) # Dump the network weights to a file like this: np.savez('weights_lasg_{0}_{1}'.format(model, foldNum), *lasagne.layers.get_all_param_values(network)) validScores.append(best_validation_accu * 100) testScores.append(av_test_acc * 100) print('-'*50) print("Best validation accuracy:\t\t{:.2f} %".format(best_validation_accu * 100)) print("Best test accuracy:\t\t{:.2f} %".format(av_test_acc * 100)) scipy.io.savemat('cnn_lasg_{0}_results'.format(model), {'validAccu': validScores, 'testAccu': testScores, 'trainLoss': trainLoss, 'validLoss': validLoss, 'validEpochAccu': validEpochAccu })
def _execute(self): global num_superpixels global num_output_features global idxs global top global bottom global left global right save_path = self.save_path batch_size = self.batch_size dataset_family = self.dataset_family which_set = self.which_set size = self.size nan = 0 dataset_descriptor = dataset_family[which_set][size] dataset = dataset_descriptor.dataset_maker() expected_num_examples = dataset_descriptor.num_examples full_X = dataset.get_design_matrix() num_examples = full_X.shape[0] assert num_examples == expected_num_examples if self.restrict is not None: assert self.restrict[1] <= full_X.shape[0] print 'restricting to examples ',self.restrict[0],' through ',self.restrict[1],' exclusive' full_X = full_X[self.restrict[0]:self.restrict[1],:] assert self.restrict[1] > self.restrict[0] #update for after restriction num_examples = full_X.shape[0] assert num_examples > 0 dataset.X = None dataset.design_loc = None dataset.compress = False patchifier = ExtractGridPatches( patch_shape = (size,size), patch_stride = (1,1) ) pipeline = serial.load(dataset_descriptor.pipeline_path) assert isinstance(pipeline.items[0], ExtractPatches) pipeline.items[0] = patchifier Z = T.matrix('Z') pos = T.clip(Z,0.,1e30) neg = T.clip(-Z,0.,1e30) feat = T.concatenate((pos, neg), axis=1) assert feat.dtype == 'float32' print 'compiling theano function' f = function([Z],feat) nhid = 3200 # 2 * num dictionary elems if config.device.startswith('gpu') and nhid >= 4000: f = halver(f, nhid) topo_feat_var = T.TensorType(broadcastable = (False,False,False,False), dtype='float32')() region_features = function([topo_feat_var], topo_feat_var.mean(axis=(1,2)) ) def average_pool( stride ): def point( p ): return p * ns / stride rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3] ) , dtype = 'float32') for i in xrange(stride): for j in xrange(stride): rval[:,i,j,:] = region_features( topo_feat[:,point(i):point(i+1), point(j):point(j+1),:] ) return rval output = np.zeros((num_examples,num_output_features),dtype='float32') fd = DenseDesignMatrix(X = np.zeros((1,1),dtype='float32'), view_converter = DefaultViewConverter([1, 1, nhid] ) ) ns = 32 - size + 1 depatchifier = ReassembleGridPatches( orig_shape = (ns, ns), patch_shape=(1,1) ) if len(range(0,num_examples-batch_size+1,batch_size)) <= 0: print num_examples print batch_size for i in xrange(0,num_examples-batch_size+1,batch_size): print i t1 = time.time() d = copy.copy(dataset) d.set_design_matrix(full_X[i:i+batch_size,:]) t2 = time.time() #print '\tapplying preprocessor' d.apply_preprocessor(pipeline, can_fit = False) X2 = d.get_design_matrix() t3 = time.time() #print '\trunning theano function' M.put(s,'batch',X2) M.eval(s, 'Z = sparse_codes(batch, dictionary, lambda)') Z = M.get(s, 'Z') feat = f(np.cast['float32'](Z)) t4 = time.time() assert feat.dtype == 'float32' feat_dataset = copy.copy(fd) if np.any(np.isnan(feat)): nan += np.isnan(feat).sum() feat[np.isnan(feat)] = 0 feat_dataset.set_design_matrix(feat) #print '\treassembling features' feat_dataset.apply_preprocessor(depatchifier) #print '\tmaking topological view' topo_feat = feat_dataset.get_topological_view() assert topo_feat.shape[0] == batch_size t5 = time.time() #average pooling superpixels = average_pool(num_superpixels) assert batch_size == 1 assert superpixels.shape[0] == batch_size assert superpixels.shape[1] == num_superpixels assert superpixels.shape[2] == num_superpixels assert superpixels.shape[3] == 2 * num_filters for j in xrange(num_output_features): output[i:i+batch_size, j] = superpixels[:,top[j]:bottom[j]+1, left[j]:right[j]+1, idxs[j]].mean() t6 = time.time() print (t6-t1, t2-t1, t3-t2, t4-t3, t5-t4, t6-t5) if self.chunk_size is not None: assert save_path.endswith('.npy') save_path_pieces = save_path.split('.npy') assert len(save_path_pieces) == 2 assert save_path_pieces[1] == '' save_path = save_path_pieces[0] + '_' + chr(ord('A')+self.chunk_id)+'.npy' np.save(save_path,output) if nan > 0: warnings.warn(str(nan)+' features were nan')
def make_node(self, mean_anom, eccen): output_var = tt.TensorType(dtype=theano.scalar.upcast( mean_anom.dtype, eccen.dtype), broadcastable=[False] * mean_anom.ndim)() return gof.Apply(self, [mean_anom, eccen], [output_var])
def new_tensor(name, ndim, dtype): import theano.tensor as TT return TT.TensorType(dtype, (False, ) * ndim)(name)
def main(num_epochs=NEPOCH): print("Loading data ...") snli = SNLI(batch_size=BSIZE) train_batches = list(snli.train_minibatch_generator()) dev_batches = list(snli.dev_minibatch_generator()) test_batches = list(snli.test_minibatch_generator()) W_word_embedding = snli.weight # W shape: (# vocab size, WE_DIM) W_word_embedding = snli.weight / \ (numpy.linalg.norm(snli.weight, axis=1).reshape(snli.weight.shape[0], 1) + \ 0.00001) del snli print("Building network ...") ########### input layers ########### # hypothesis input_var_h = T.TensorType('int32', [False, False])('hypothesis_vector') input_var_h.tag.test_value = numpy.hstack( (numpy.random.randint(1, 10000, (BSIZE, 18), 'int32'), numpy.zeros( (BSIZE, 6)).astype('int32'))) l_in_h = lasagne.layers.InputLayer(shape=(BSIZE, None), input_var=input_var_h) input_mask_h = T.TensorType('int32', [False, False])('hypo_mask') input_mask_h.tag.test_value = numpy.hstack((numpy.ones( (BSIZE, 18), dtype='int32'), numpy.zeros((BSIZE, 6), dtype='int32'))) input_mask_h.tag.test_value[1, 18:22] = 1 l_mask_h = lasagne.layers.InputLayer(shape=(BSIZE, None), input_var=input_mask_h) # premise input_var_p = T.TensorType('int32', [False, False])('premise_vector') input_var_p.tag.test_value = numpy.hstack( (numpy.random.randint(1, 10000, (BSIZE, 16), 'int32'), numpy.zeros( (BSIZE, 3)).astype('int32'))) l_in_p = lasagne.layers.InputLayer(shape=(BSIZE, None), input_var=input_var_p) input_mask_p = T.TensorType('int32', [False, False])('premise_mask') input_mask_p.tag.test_value = numpy.hstack((numpy.ones( (BSIZE, 16), dtype='int32'), numpy.zeros((BSIZE, 3), dtype='int32'))) input_mask_p.tag.test_value[1, 16:18] = 1 l_mask_p = lasagne.layers.InputLayer(shape=(BSIZE, None), input_var=input_mask_p) ################################### # output shape (BSIZE, None, WEDIM) l_hypo_embed = lasagne.layers.EmbeddingLayer( l_in_h, input_size=W_word_embedding.shape[0], output_size=W_word_embedding.shape[1], W=W_word_embedding) l_prem_embed = lasagne.layers.EmbeddingLayer( l_in_p, input_size=W_word_embedding.shape[0], output_size=W_word_embedding.shape[1], W=l_hypo_embed.W) # EMBEDING MAPPING: output shape (BSIZE, None, WEMAP) l_hypo_reduced_embed = DenseLayer3DInput(l_hypo_embed, num_units=WEMAP, b=None, nonlinearity=None) l_hypo_embed_dpout = lasagne.layers.DropoutLayer(l_hypo_reduced_embed, p=DPOUT, rescale=True) l_prem_reduced_embed = DenseLayer3DInput(l_prem_embed, num_units=WEMAP, W=l_hypo_reduced_embed.W, b=None, nonlinearity=None) l_prem_embed_dpout = lasagne.layers.DropoutLayer(l_prem_reduced_embed, p=DPOUT, rescale=True) # ATTEND l_hypo_embed_hid1 = DenseLayer3DInput( l_hypo_embed_dpout, num_units=EMBDHIDA, b=None, nonlinearity=lasagne.nonlinearities.rectify) l_hypo_embed_hid1_dpout = lasagne.layers.DropoutLayer(l_hypo_embed_hid1, p=DPOUT, rescale=True) l_hypo_embed_hid2 = DenseLayer3DInput( l_hypo_embed_hid1_dpout, num_units=EMBDHIDB, b=None, nonlinearity=lasagne.nonlinearities.rectify) l_prem_embed_hid1 = DenseLayer3DInput( l_prem_embed_dpout, num_units=EMBDHIDA, W=l_hypo_embed_hid1.W, b=None, nonlinearity=lasagne.nonlinearities.rectify) l_prem_embed_hid1_dpout = lasagne.layers.DropoutLayer(l_prem_embed_hid1, p=DPOUT, rescale=True) l_prem_embed_hid2 = DenseLayer3DInput( l_prem_embed_hid1_dpout, num_units=EMBDHIDB, W=l_hypo_embed_hid2.W, b=None, nonlinearity=lasagne.nonlinearities.rectify) # output dim: (BSIZE, NROWx, NROWy) l_e = ComputeEmbeddingPool([l_hypo_embed_hid1, l_prem_embed_hid2]) # output dim: (BSIZE, NROWy, DIM) l_hypo_weighted = AttendOnEmbedding([l_hypo_reduced_embed, l_e], masks=[l_mask_h, l_mask_p], direction='col') # output dim: (BSIZE, NROWx, DIM) l_prem_weighted = AttendOnEmbedding([l_prem_reduced_embed, l_e], masks=[l_mask_h, l_mask_p], direction='row') # COMPARE # output dim: (BSIZE, NROW, 4*LSTMHID) l_hypo_premwtd = lasagne.layers.ConcatLayer( [l_hypo_reduced_embed, l_prem_weighted], axis=2) l_prem_hypowtd = lasagne.layers.ConcatLayer( [l_prem_reduced_embed, l_hypo_weighted], axis=2) l_hypo_premwtd_dpout = lasagne.layers.DropoutLayer(l_hypo_premwtd, p=DPOUT, rescale=True) l_hypo_comphid1 = DenseLayer3DInput( l_hypo_premwtd_dpout, num_units=COMPHIDA, b=None, nonlinearity=lasagne.nonlinearities.rectify) l_hypo_comphid1_dpout = lasagne.layers.DropoutLayer(l_hypo_comphid1, p=DPOUT, rescale=True) l_hypo_comphid2 = DenseLayer3DInput( l_hypo_comphid1_dpout, num_units=COMPHIDB, b=None, nonlinearity=lasagne.nonlinearities.rectify) l_prem_hypowtd_dpout = lasagne.layers.DropoutLayer(l_prem_hypowtd, p=DPOUT, rescale=True) l_prem_comphid1 = DenseLayer3DInput( l_prem_hypowtd_dpout, num_units=COMPHIDA, W=l_hypo_comphid1.W, b=None, nonlinearity=lasagne.nonlinearities.rectify) l_prem_comphid1_dpout = lasagne.layers.DropoutLayer(l_prem_comphid1, p=DPOUT, rescale=True) l_prem_comphid2 = DenseLayer3DInput( l_prem_comphid1_dpout, num_units=COMPHIDB, W=l_hypo_comphid2.W, b=None, nonlinearity=lasagne.nonlinearities.rectify) # AGGREGATE # output dim: (BSIZE, 4*LSTMHID) l_hypo_mean = MeanOverDim(l_hypo_comphid2, mask=l_mask_h, dim=1) l_prem_mean = MeanOverDim(l_prem_comphid2, mask=l_mask_p, dim=1) l_v1v2 = lasagne.layers.ConcatLayer([l_hypo_mean, l_prem_mean], axis=1) l_v1v2_dpout = lasagne.layers.DropoutLayer(l_v1v2, p=DPOUT, rescale=True) l_outhid1 = lasagne.layers.DenseLayer( l_v1v2_dpout, num_units=OUTHID, b=None, nonlinearity=lasagne.nonlinearities.rectify) l_outhid1_dpout = lasagne.layers.DropoutLayer(l_outhid1, p=DPOUT, rescale=True) l_outhid2 = lasagne.layers.DenseLayer( l_outhid1_dpout, num_units=OUTHID, b=None, nonlinearity=lasagne.nonlinearities.rectify) # l_outhid2_dpout = lasagne.layers.DropoutLayer(l_outhid2, p=DPOUT, rescale=True) l_output = lasagne.layers.DenseLayer( l_outhid2, num_units=3, b=None, nonlinearity=lasagne.nonlinearities.softmax) ########### target, cost, validation, etc. ########## target_values = T.ivector('target_output') target_values.tag.test_value = numpy.asarray([ 1, ] * BSIZE, dtype='int32') network_output = lasagne.layers.get_output(l_output) network_prediction = T.argmax(network_output, axis=1) error_rate = T.mean(T.neq(network_prediction, target_values)) network_output_clean = lasagne.layers.get_output(l_output, deterministic=True) network_prediction_clean = T.argmax(network_output_clean, axis=1) error_rate_clean = T.mean(T.neq(network_prediction_clean, target_values)) cost = T.mean( T.nnet.categorical_crossentropy(network_output, target_values)) cost_clean = T.mean( T.nnet.categorical_crossentropy(network_output_clean, target_values)) # Retrieve all parameters from the network all_params = lasagne.layers.get_all_params(l_output) if not UPDATEWE: all_params.remove(l_hypo_embed.W) numparams = sum( [numpy.prod(i) for i in [i.shape.eval() for i in all_params]]) print("Number of params: {}\nName\t\t\tShape\t\t\tSize".format(numparams)) print("-----------------------------------------------------------------") for item in all_params: print("{0:24}{1:24}{2}".format(item, item.shape.eval(), numpy.prod(item.shape.eval()))) # if exist param file then load params look_for = 'params' + os.sep + 'params_' + filename + '.pkl' if os.path.isfile(look_for): print("Resuming from file: " + look_for) all_param_values = cPickle.load(open(look_for, 'rb')) for p, v in zip(all_params, all_param_values): p.set_value(v) # Compute SGD updates for training print("Computing updates ...") updates = lasagne.updates.adagrad(cost, all_params, LR) # Theano functions for training and computing cost print("Compiling functions ...") train = theano.function([ l_in_h.input_var, l_mask_h.input_var, l_in_p.input_var, l_mask_p.input_var, target_values ], [cost, error_rate], updates=updates) # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=False)) compute_cost = theano.function([ l_in_h.input_var, l_mask_h.input_var, l_in_p.input_var, l_mask_p.input_var, target_values ], [cost_clean, error_rate_clean]) # mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=False)) def evaluate(mode): if mode == 'dev': data = dev_batches if mode == 'test': data = test_batches set_cost = 0. set_error_rate = 0. for batches_seen, (hypo, hm, premise, pm, truth) in enumerate(data, 1): _cost, _error = compute_cost(hypo, hm, premise, pm, truth) set_cost = (1.0 - 1.0 / batches_seen) * set_cost + \ 1.0 / batches_seen * _cost set_error_rate = (1.0 - 1.0 / batches_seen) * set_error_rate + \ 1.0 / batches_seen * _error return set_cost, set_error_rate print("Done. Evaluating scratch model ...") dev_set_cost, dev_set_error = evaluate('dev') print("BEFORE TRAINING: dev cost %f, error %f" % (dev_set_cost, dev_set_error)) print("Training ...") try: for epoch in range(num_epochs): train_set_cost = 0. train_set_error = 0. start = time.time() for batches_seen, (hypo, hm, premise, pm, truth) in enumerate(train_batches, 1): _cost, _error = train(hypo, hm, premise, pm, truth) train_set_cost = (1.0 - 1.0 / batches_seen) * train_set_cost + \ 1.0 / batches_seen * _cost train_set_error = (1.0 - 1.0 / batches_seen) * train_set_error + \ 1.0 / batches_seen * _error if (batches_seen * BSIZE) % 5000 == 0: end = time.time() print("Sample %d %.2fs, lr %.4f, train cost %f, error %f" % (batches_seen * BSIZE, end - start, LR, train_set_cost, train_set_error)) start = end if (batches_seen * BSIZE) % 100000 == 0: dev_set_cost, dev_set_error = evaluate('dev') print("***dev cost %f, error %f" % (dev_set_cost, dev_set_error)) # save parameters all_param_values = [p.get_value() for p in all_params] cPickle.dump( all_param_values, open('params' + os.sep + 'params_' + filename + '.pkl', 'wb')) dev_set_cost, dev_set_error = evaluate('dev') test_set_cost, test_set_error = evaluate('test') print("epoch %d, cost: train %f dev %f test %f;\n" " error train %f dev %f test %f" % (epoch, train_set_cost, dev_set_cost, test_set_cost, train_set_error, dev_set_error, test_set_error)) except KeyboardInterrupt: pdb.set_trace() pass
def make_training_functions(model): # 这里l_out是model的输出层 l_out = model['l_out'] #声明一个Batch——index的变量 batch_index = T.iscalar('batch_index') # bct01 #x是五维向量 y是一维 X = T.TensorType('float32', [False]*5)('X') y = T.TensorType('int32', [False]*1)('y') out_shape = lasagne.layers.get_output_shape(l_out) #log.info('output_shape = {}'.format(out_shape)) # 切片函数 参数:start ,stop[step] 用法arr[batch_slice] batch_slice = slice(batch_index*cfg['batch_size'], (batch_index+1)*cfg['batch_size']) # 用给定的输入和网络模型 做输出 out = lasagne.layers.get_output(l_out, X) # 也用来做输出,但会屏蔽掉所有的drop-out层 dout = lasagne.layers.get_output(l_out, X, deterministic=True) # 获取训练网络的所有的参数 一般用于更新网络表达式 params = lasagne.layers.get_all_params(l_out) l2_norm = lasagne.regularization.regularize_network_params(l_out, lasagne.regularization.l2) # 判断 x是不是某类型 (实例,类型名) dict 字典tensorboard if isinstance(cfg['learning_rate'], dict): # share将变量共享为全局变量 ,在多个函数中公用 learning_rate = theano.shared(np.float32(cfg['learning_rate'][0])) else: learning_rate = theano.shared(np.float32(cfg['learning_rate'])) softmax_out = T.nnet.softmax( out ) loss = T.cast(T.mean(T.nnet.categorical_crossentropy(softmax_out, y)), 'float32') pred = T.argmax( dout, axis=1 ) error_rate = T.cast( T.mean( T.neq(pred, y) ), 'float32' ) # 正则化损失函数 l2使权值足够小 reg_loss = loss + cfg['reg']*l2_norm # 动量梯度下降 更新params updates = lasagne.updates.momentum(reg_loss, params, learning_rate, cfg['momentum']) #shared相当于一个全局变量 X_shared = lasagne.utils.shared_empty(5, dtype='float32') y_shared = lasagne.utils.shared_empty(1, dtype='float32') dout_fn = theano.function([X], dout) pred_fn = theano.function([X], pred) pred_fn=theano.function([batch_index], pred, givens={ X: X_shared[batch_slice] , }) update_iter = theano.function([batch_index], reg_loss, updates=updates, givens={ X: X_shared[batch_slice], y: T.cast( y_shared[batch_slice], 'int32'), }) error_rate_fn = theano.function([batch_index], error_rate, givens={ X: X_shared[batch_slice], y: T.cast( y_shared[batch_slice], 'int32'), }) loss_fn = theano.function([batch_index], reg_loss, givens={ X: X_shared[batch_slice], y: T.cast( y_shared[batch_slice], 'int32'), }) tfuncs = {'update_iter':update_iter, 'error_rate':error_rate_fn, 'loss':loss_fn, 'dout' : dout_fn, 'pred' : pred_fn, } tvars = {'X' : X, 'y' : y, 'X_shared' : X_shared, 'y_shared' : y_shared, 'batch_slice' : batch_slice, 'batch_index' : batch_index, 'learning_rate' : learning_rate, } return tfuncs, tvars
def TensorType(dtype, shape, broadcastable=None): if broadcastable is None: broadcastable = np.atleast_1d(shape) == 1 return tt.TensorType(str(dtype), broadcastable)
def train(self, params, indir, outdir, wdir, fid_lst_tra, fid_lst_val, X_vals, Y_vals, cfg, params_savefile, trialstr='', cont=None): print('Model initial status before training') worst_val = data.cost_0pred_rmse(Y_vals) # RMSE print(" 0-pred validation RMSE = {} (100%)".format(worst_val)) init_pred_rms = data.prediction_rms(self._model, [X_vals]) print(' initial RMS of prediction = {}'.format(init_pred_rms)) init_val = data.cost_model_prediction_rmse(self._model, [X_vals], Y_vals) best_val = None print(" initial validation RMSE = {} ({:.4f}%)".format( init_val, 100.0 * init_val / worst_val)) nbbatches = int(len(fid_lst_tra) / cfg.train_batch_size) print(' using {} batches of {} sentences each'.format( nbbatches, cfg.train_batch_size)) print(' model #parameters={}'.format(self._model.nbParams())) nbtrainframes = 0 for fid in fid_lst_tra: X = data.loadfile(outdir, fid) nbtrainframes += X.shape[0] frameshift = 0.005 # TODO print(' Training set: {} sentences, #frames={} ({})'.format( len(fid_lst_tra), nbtrainframes, time.strftime('%H:%M:%S', time.gmtime( (nbtrainframes * frameshift))))) print(' #parameters/#frames={:.2f}'.format( float(self._model.nbParams()) / nbtrainframes)) if cfg.train_nbepochs_scalewdata and not cfg.train_batch_lengthmax is None: # During an epoch, the whole data is _not_ seen by the training since cfg.train_batch_lengthmax is limited and smaller to the sentence size. # To compensate for it and make the config below less depedent on the data, the min ans max nbepochs are scaled according to the missing number of frames seen. # TODO Should consider only non-silent frames, many recordings have a lot of pre and post silences epochcoef = nbtrainframes / float( (cfg.train_batch_lengthmax * len(fid_lst_tra))) print(' scale number of epochs wrt number of frames') cfg.train_min_nbepochs = int(cfg.train_min_nbepochs * epochcoef) cfg.train_max_nbepochs = int(cfg.train_max_nbepochs * epochcoef) print(' train_min_nbepochs={}'.format( cfg.train_min_nbepochs)) print(' train_max_nbepochs={}'.format( cfg.train_max_nbepochs)) if self._errtype == 'WGAN': print('Preparing critic for WGAN...') critic_input_var = T.tensor3( 'critic_input' ) # Either real data to predict/generate, or, fake data that has been generated [critic, layer_critic, layer_cond] = self._model.build_critic( critic_input_var, self._model._input_values, self._model.vocoder, self._model.insize, use_LSweighting=(cfg.train_LScoef > 0.0), LSWGANtransfreqcutoff=self._LSWGANtransfreqcutoff, LSWGANtranscoef=self._LSWGANtranscoef, use_WGAN_incnoisefeature=self._WGAN_incnoisefeature) # Create expression for passing real data through the critic real_out = lasagne.layers.get_output(critic) # Create expression for passing fake data through the critic genout = lasagne.layers.get_output(self._model.net_out) indict = { layer_critic: lasagne.layers.get_output(self._model.net_out), layer_cond: self._model._input_values } fake_out = lasagne.layers.get_output(critic, indict) # Create generator's loss expression # Force LSE for low frequencies, otherwise the WGAN noise makes the voice hoarse. print('WGAN Weighted LS - Generator part') wganls_weights_els = [] wganls_weights_els.append([0.0]) # For f0 specvs = np.arange(self._model.vocoder.specsize(), dtype=theano.config.floatX) if cfg.train_LScoef == 0.0: wganls_weights_els.append( np.ones(self._model.vocoder.specsize()) ) # No special weighting for spec else: wganls_weights_els.append( nonlin_sigmoidparm( specvs, sp.freq2fwspecidx(self._LSWGANtransfreqcutoff, self._model.vocoder.fs, self._model.vocoder.specsize()), self._LSWGANtranscoef)) # For spec if self._model.vocoder.noisesize() > 0: if self._WGAN_incnoisefeature: noisevs = np.arange(self._model.vocoder.noisesize(), dtype=theano.config.floatX) wganls_weights_els.append( nonlin_sigmoidparm( noisevs, sp.freq2fwspecidx(self._LSWGANtransfreqcutoff, self._model.vocoder.fs, self._model.vocoder.noisesize()), self._LSWGANtranscoef)) # For noise else: wganls_weights_els.append( np.zeros(self._model.vocoder.noisesize())) if self._model.vocoder.vuvsize() > 0: wganls_weights_els.append([0.0]) # For vuv wganls_weights_ = np.hstack(wganls_weights_els) # TODO build wganls_weights_ for LSE instead for WGAN, for consistency with the paper # wganls_weights_ = np.hstack((wganls_weights_, wganls_weights_, wganls_weights_)) # That would be for MLPG using deltas wganls_weights_ *= (1.0 - cfg.train_LScoef) lserr = lasagne.objectives.squared_error(genout, self._target_values) wganls_weights_ls = theano.shared(value=(1.0 - wganls_weights_), name='wganls_weights_ls') wganpart = fake_out * np.mean( wganls_weights_ ) # That's a way to automatically balance the WGAN and LSE costs wrt the LSE spectral weighting lsepart = lserr * wganls_weights_ls # Spectral weighting as complement of the WGAN part spectral weighting generator_loss = -wganpart.mean() + lsepart.mean( ) # A term in [-oo,oo] and one in [0,oo] ... why not, LSE as to be small enough for WGAN to do something. generator_lossratio = abs(wganpart.mean()) / abs(lsepart.mean()) critic_loss = fake_out.mean() - real_out.mean( ) # For clarity: we want to maximum real-fake -> -(real-fake) -> fake-real # Improved training for Wasserstein GAN epsi = T.TensorType(dtype=theano.config.floatX, broadcastable=(False, True, True))() mixed_X = (epsi * genout) + (1 - epsi) * critic_input_var indict = { layer_critic: mixed_X, layer_cond: self._model._input_values } output_D_mixed = lasagne.layers.get_output(critic, inputs=indict) grad_mixed = T.grad(T.sum(output_D_mixed), mixed_X) norm_grad_mixed = T.sqrt(T.sum(T.square(grad_mixed), axis=[1, 2])) grad_penalty = T.mean(T.square(norm_grad_mixed - 1)) critic_loss = critic_loss + cfg.train_pg_lambda * grad_penalty # Create update expressions for training critic_params = lasagne.layers.get_all_params(critic, trainable=True) critic_updates = lasagne.updates.adam( critic_loss, critic_params, learning_rate=cfg.train_D_learningrate, beta1=cfg.train_D_adam_beta1, beta2=cfg.train_D_adam_beta2) print(' Critic architecture') print_network(critic, critic_params) generator_params = lasagne.layers.get_all_params( self._model.net_out, trainable=True) generator_updates = lasagne.updates.adam( generator_loss, generator_params, learning_rate=cfg.train_G_learningrate, beta1=cfg.train_G_adam_beta1, beta2=cfg.train_G_adam_beta2) self._optim_updates.extend([generator_updates, critic_updates]) print(' Generator architecture') print_network(self._model.net_out, generator_params) # Compile functions performing a training step on a mini-batch (according # to the updates dictionary) and returning the corresponding score: print('Compiling generator training function...') generator_train_fn_ins = [self._model._input_values] generator_train_fn_ins.append(self._target_values) generator_train_fn_outs = [generator_loss, generator_lossratio] train_fn = theano.function(generator_train_fn_ins, generator_train_fn_outs, updates=generator_updates) train_validation_fn = theano.function(generator_train_fn_ins, generator_loss, no_default_updates=True) print('Compiling critic training function...') critic_train_fn_ins = [ self._model._input_values, critic_input_var, epsi ] critic_train_fn = theano.function(critic_train_fn_ins, critic_loss, updates=critic_updates) critic_train_validation_fn = theano.function( critic_train_fn_ins, critic_loss, no_default_updates=True) elif self._errtype == 'LSE': print(' LSE Training') print_network(self._model.net_out, params) predicttrain_values = lasagne.layers.get_output( self._model.net_out, deterministic=False) costout = (predicttrain_values - self._target_values)**2 self.cost = T.mean( costout) # self.cost = T.mean(T.sum(costout, axis=-1)) ? print(" creating parameters updates ...") updates = lasagne.updates.adam( self.cost, params, learning_rate=float(10**cfg.train_learningrate_log10), beta1=float(cfg.train_adam_beta1), beta2=float(cfg.train_adam_beta2), epsilon=float(10**cfg.train_adam_epsilon_log10)) self._optim_updates.append(updates) print(" compiling training function ...") train_fn = theano.function(self._model.inputs + [self._target_values], self.cost, updates=updates) else: raise ValueError('Unknown err type "' + self._errtype + '"') # pragma: no cover costs = defaultdict(list) epochs_modelssaved = [] epochs_durs = [] nbnodecepochs = 0 generator_updates = 0 epochstart = 1 if cont and os.path.exists( os.path.splitext(params_savefile)[0] + '-trainingstate-last.pkl'): print(' reloading previous training state ...') savedcfg, extras, rngstate = self.loadTrainingState( os.path.splitext(params_savefile)[0] + '-trainingstate-last.pkl', cfg) np.random.set_state(rngstate) cost_val = extras['cost_val'] # Restoring some local variables costs = extras['costs'] epochs_modelssaved = extras['epochs_modelssaved'] epochs_durs = extras['epochs_durs'] generator_updates = extras['generator_updates'] epochstart = extras['epoch'] + 1 # Restore the saving criteria only none of those 3 cfg values changed: if (savedcfg.train_min_nbepochs == cfg.train_min_nbepochs) and ( savedcfg.train_max_nbepochs == cfg.train_max_nbepochs ) and (savedcfg.train_cancel_nodecepochs == cfg.train_cancel_nodecepochs): best_val = extras['best_val'] nbnodecepochs = extras['nbnodecepochs'] print_log(" start training ...") for epoch in range(epochstart, 1 + cfg.train_max_nbepochs): timeepochstart = time.time() rndidx = np.arange( int(nbbatches * cfg.train_batch_size) ) # Need to restart from ordered state to make the shuffling repeatable after reloading training state, the shuffling will be different anyway np.random.shuffle(rndidx) rndidxb = np.split(rndidx, nbbatches) cost_tra = None costs_tra_batches = [] costs_tra_gen_wgan_lse_ratios = [] costs_tra_critic_batches = [] load_times = [] train_times = [] for k in xrange(nbbatches): timeloadstart = time.time() print_tty('\r Training batch {}/{}'.format( 1 + k, nbbatches)) # Load training data online, because data is often too heavy to hold in memory fid_lst_trab = [fid_lst_tra[bidx] for bidx in rndidxb[k]] X_trab, _, Y_trab, _, W_trab = data.load_inoutset( indir, outdir, wdir, fid_lst_trab, length=cfg.train_batch_length, lengthmax=cfg.train_batch_lengthmax, maskpadtype=cfg.train_batch_padtype, cropmode=cfg.train_batch_cropmode) if 0: # Plot batch import matplotlib.pyplot as plt plt.ion() plt.imshow(Y_trab[0, ].T, origin='lower', aspect='auto', interpolation='none', cmap='jet') from IPython.core.debugger import Pdb Pdb().set_trace() load_times.append(time.time() - timeloadstart) print_tty(' (iter load: {:.6f}s); training '.format( load_times[-1])) timetrainstart = time.time() if self._errtype == 'WGAN': random_epsilon = np.random.uniform( size=(cfg.train_batch_size, 1, 1)).astype('float32') critic_returns = critic_train_fn( X_trab, Y_trab, random_epsilon) # Train the criticmnator costs_tra_critic_batches.append(float(critic_returns)) # TODO The params below are supposed to ensure the critic is "almost" fully converged # when training the generator. How to evaluate this? Is it the case currently? if (generator_updates < 25) or (generator_updates % 500 == 0): # TODO Params hardcoded critic_runs = 10 # TODO Params hardcoded 10 else: critic_runs = 5 # TODO Params hardcoded 5 # martinarjovsky: "- Loss of the critic should never be negative, since outputing 0 would yeald a better loss so this is a huge red flag." # if critic_returns>0 and k%critic_runs==0: # Train only if the estimate of the Wasserstein distance makes sense, and, each N critic iteration TODO Doesn't work well though if k % critic_runs == 0: # Train each N critic iteration # Train the generator trainargs = [X_trab] trainargs.append(Y_trab) [cost_tra, gen_ratio] = train_fn(*trainargs) cost_tra = float(cost_tra) generator_updates += 1 if 0: log_plot_samples( Y_vals, Y_preds, nbsamples=nbsamples, fname=os.path.splitext(params_savefile)[0] + '-fig_samples_' + trialstr + '{:07}.png'.format(generator_updates), vocoder=self._model.vocoder, title='E{} I{}'.format(epoch, generator_updates)) elif self._errtype == 'LSE': train_returns = train_fn(X_trab, Y_trab) cost_tra = np.sqrt(float(train_returns)) train_times.append(time.time() - timetrainstart) if not cost_tra is None: print_tty( 'err={:.4f} (iter train: {:.4f}s) '. format(cost_tra, train_times[-1])) if np.isnan(cost_tra): # pragma: no cover print_log( ' previous costs: {}'.format(costs_tra_batches)) print_log(' E{} Batch {}/{} train cost = {}'.format( epoch, 1 + k, nbbatches, cost_tra)) raise ValueError('ERROR: Training cost is nan!') costs_tra_batches.append(cost_tra) if self._errtype == 'WGAN': costs_tra_gen_wgan_lse_ratios.append(gen_ratio) print_tty( '\r \r' ) if self._errtype == 'WGAN': costs['model_training'].append(0.1 * np.mean(costs_tra_batches)) if cfg.train_LScoef > 0: costs['model_training_wgan_lse_ratio'].append( 0.1 * np.mean(costs_tra_gen_wgan_lse_ratios)) else: costs['model_training'].append(np.mean(costs_tra_batches)) # Eval validation cost cost_validation_rmse = data.cost_model_prediction_rmse( self._model, [X_vals], Y_vals) costs['model_rmse_validation'].append(cost_validation_rmse) if self._errtype == 'WGAN': train_validation_fn_args = [X_vals] train_validation_fn_args.append(Y_vals) costs['model_validation'].append(0.1 * data.cost_model_mfn( train_validation_fn, train_validation_fn_args)) costs['critic_training'].append( np.mean(costs_tra_critic_batches)) random_epsilon = [ np.random.uniform(size=(1, 1)).astype('float32') ] * len(X_vals) critic_train_validation_fn_args = [ X_vals, Y_vals, random_epsilon ] costs['critic_validation'].append( data.cost_model_mfn(critic_train_validation_fn, critic_train_validation_fn_args)) costs['critic_validation_ltm'].append( np.mean(costs['critic_validation'] [-cfg.train_validation_ltm_winlen:])) cost_val = costs['critic_validation_ltm'][-1] elif self._errtype == 'LSE': cost_val = costs['model_rmse_validation'][-1] print_log( " E{}/{} {} cost_tra={:.6f} (load:{}s train:{}s) cost_val={:.6f} ({:.4f}% RMSE) {} MiB GPU {} MiB RAM" .format(epoch, cfg.train_max_nbepochs, trialstr, costs['model_training'][-1], time2str(np.sum(load_times)), time2str(np.sum(train_times)), cost_val, 100 * cost_validation_rmse / worst_val, nvidia_smi_gpu_memused(), proc_memresident())) sys.stdout.flush() if np.isnan(cost_val): raise ValueError('ERROR: Validation cost is nan!') if (self._errtype == 'LSE') and ( cost_val >= cfg.train_cancel_validthresh * worst_val): raise ValueError( 'ERROR: Validation cost blew up! It is higher than {} times the worst possible values' .format(cfg.train_cancel_validthresh)) self._model.saveAllParams(os.path.splitext(params_savefile)[0] + '-last.pkl', cfg=cfg, printfn=print_log, extras={'cost_val': cost_val}) # Save model parameters if epoch >= cfg.train_min_nbepochs: # Assume no model is good enough before cfg.train_min_nbepochs if ((best_val is None) or (cost_val < best_val) ): # Among all trials of hyper-parameter optimisation best_val = cost_val self._model.saveAllParams(params_savefile, cfg=cfg, printfn=print_log, extras={'cost_val': cost_val}, infostr='(E{} C{:.4f})'.format( epoch, best_val)) epochs_modelssaved.append(epoch) nbnodecepochs = 0 else: nbnodecepochs += 1 if cfg.train_log_plot: print_log(' saving plots') log_plot_costs(costs, worst_val, fname=os.path.splitext(params_savefile)[0] + '-fig_costs_' + trialstr + '.svg', epochs_modelssaved=epochs_modelssaved) nbsamples = 2 nbsamples = min(nbsamples, len(X_vals)) Y_preds = [] for sampli in xrange(nbsamples): Y_preds.append( self._model.predict( np.reshape( X_vals[sampli], [1] + [s for s in X_vals[sampli].shape]))[0, ]) plotsuffix = '' if len(epochs_modelssaved ) > 0 and epochs_modelssaved[-1] == epoch: plotsuffix = '_best' else: plotsuffix = '_last' log_plot_samples(Y_vals, Y_preds, nbsamples=nbsamples, fname=os.path.splitext(params_savefile)[0] + '-fig_samples_' + trialstr + plotsuffix + '.png', vocoder=self._model.vocoder, title='E{}'.format(epoch)) epochs_durs.append(time.time() - timeepochstart) print_log(' ET: {} max TT: {}s train ~time left: {}'.format( time2str(epochs_durs[-1]), time2str( np.median(epochs_durs[-10:]) * cfg.train_max_nbepochs), time2str( np.median(epochs_durs[-10:]) * (cfg.train_max_nbepochs - epoch)))) self.saveTrainingState(os.path.splitext(params_savefile)[0] + '-trainingstate-last.pkl', cfg=cfg, printfn=print_log, extras={ 'cost_val': cost_val, 'best_val': best_val, 'costs': costs, 'epochs_modelssaved': epochs_modelssaved, 'epochs_durs': epochs_durs, 'nbnodecepochs': nbnodecepochs, 'generator_updates': generator_updates, 'epoch': epoch }) if nbnodecepochs >= cfg.train_cancel_nodecepochs: # pragma: no cover print_log( 'WARNING: validation error did not decrease for {} epochs. Early stop!' .format(cfg.train_cancel_nodecepochs)) break if best_val is None: raise ValueError('No model has been saved during training!') return { 'epoch_stopped': epoch, 'worst_val': worst_val, 'best_epoch': epochs_modelssaved[-1] if len(epochs_modelssaved) > 0 else -1, 'best_val': best_val }
class DifferentialEquation(Op): r""" Specify an ordinary differential equation .. math:: \dfrac{dy}{dt} = f(y,t,p) \quad y(t_0) = y_0 Parameters ---------- func : callable Function specifying the differential equation. Must take arguments y (n_states,), t (scalar), p (n_theta,) times : array Array of times at which to evaluate the solution of the differential equation. n_states : int Dimension of the differential equation. For scalar differential equations, n_states=1. For vector valued differential equations, n_states = number of differential equations in the system. n_theta : int Number of parameters in the differential equation. t0 : float Time corresponding to the initial condition Examples -------- .. code-block:: python def odefunc(y, t, p): #Logistic differential equation return p[0] * y[0] * (1 - y[0]) times = np.arange(0.5, 5, 0.5) ode_model = DifferentialEquation(func=odefunc, times=times, n_states=1, n_theta=1, t0=0) """ _itypes = [ tt.TensorType(floatX, (False, )), # y0 as 1D floatX vector tt.TensorType(floatX, (False, )), # theta as 1D floatX vector ] _otypes = [ tt.TensorType( floatX, (False, False)), # model states as floatX of shape (T, S) tt.TensorType( floatX, (False, False, False) ), # sensitivities as floatX of shape (T, S, len(y0) + len(theta)) ] __props__ = ("func", "times", "n_states", "n_theta", "t0") def __init__(self, func, times, *, n_states, n_theta, t0=0): if not callable(func): raise ValueError("Argument func must be callable.") if n_states < 1: raise ValueError("Argument n_states must be at least 1.") if n_theta <= 0: raise ValueError("Argument n_theta must be positive.") # Public self.func = func self.t0 = t0 self.times = tuple(times) self.n_times = len(times) self.n_states = n_states self.n_theta = n_theta self.n_p = n_states + n_theta # Private self._augmented_times = np.insert(times, 0, t0).astype(floatX) self._augmented_func = utils.augment_system(func, self.n_states, self.n_theta) self._sens_ic = utils.make_sens_ic(self.n_states, self.n_theta, floatX) # Cache symbolic sensitivities by the hash of inputs self._apply_nodes = {} self._output_sensitivities = {} def _system(self, Y, t, p): r"""This is the function that will be passed to odeint. Solves both ODE and sensitivities. Parameters ---------- Y : array augmented state vector (n_states + n_states + n_theta) t : float current time p : array parameter vector (y0, theta) """ dydt, ddt_dydp = self._augmented_func(Y[:self.n_states], t, p, Y[self.n_states:]) derivatives = np.concatenate([dydt, ddt_dydp]) return derivatives def _simulate(self, y0, theta): # Initial condition comprised of state initial conditions and raveled sensitivity matrix s0 = np.concatenate([y0, self._sens_ic]) # perform the integration sol = scipy.integrate.odeint(func=self._system, y0=s0, t=self._augmented_times, args=(np.concatenate( [y0, theta]), )).astype(floatX) # The solution y = sol[1:, :self.n_states] # The sensitivities, reshaped to be a sequence of matrices sens = sol[1:, self.n_states:].reshape(self.n_times, self.n_states, self.n_p) return y, sens def make_node(self, y0, theta): inputs = (y0, theta) _log.debug("make_node for inputs {}".format(hash(inputs))) states = self._otypes[0]() sens = self._otypes[1]() # store symbolic output in dictionary such that it can be accessed in the grad method self._output_sensitivities[hash(inputs)] = sens return Apply(self, inputs, (states, sens)) def __call__(self, y0, theta, return_sens=False, **kwargs): if isinstance(y0, (list, tuple)) and not len(y0) == self.n_states: raise ShapeError("Length of y0 is wrong.", actual=(len(y0), ), expected=(self.n_states, )) if isinstance(theta, (list, tuple)) and not len(theta) == self.n_theta: raise ShapeError("Length of theta is wrong.", actual=(len(theta), ), expected=(self.n_theta, )) # convert inputs to tensors (and check their types) y0 = tt.cast(tt.unbroadcast(tt.as_tensor_variable(y0), 0), floatX) theta = tt.cast(tt.unbroadcast(tt.as_tensor_variable(theta), 0), floatX) inputs = [y0, theta] for i, (input_val, itype) in enumerate(zip(inputs, self._itypes)): if not input_val.type == itype: raise ValueError( f"Input {i} of type {input_val.type} does not have the expected type of {itype}" ) # use default implementation to prepare symbolic outputs (via make_node) states, sens = super().__call__(y0, theta, **kwargs) if theano.config.compute_test_value != "off": # compute test values from input test values test_states, test_sens = self._simulate( y0=get_test_value(y0), theta=get_test_value(theta)) # check types of simulation result if not test_states.dtype == self._otypes[0].dtype: raise DtypeError( "Simulated states have the wrong type.", actual=test_states.dtype, expected=self._otypes[0].dtype, ) if not test_sens.dtype == self._otypes[1].dtype: raise DtypeError( "Simulated sensitivities have the wrong type.", actual=test_sens.dtype, expected=self._otypes[1].dtype, ) # check shapes of simulation result expected_states_shape = (self.n_times, self.n_states) expected_sens_shape = (self.n_times, self.n_states, self.n_p) if not test_states.shape == expected_states_shape: raise ShapeError( "Simulated states have the wrong shape.", test_states.shape, expected_states_shape, ) if not test_sens.shape == expected_sens_shape: raise ShapeError( "Simulated sensitivities have the wrong shape.", test_sens.shape, expected_sens_shape, ) # attach results as test values to the outputs states.tag.test_value = test_states sens.tag.test_value = test_sens if return_sens: return states, sens return states def perform(self, node, inputs_storage, output_storage): y0, theta = inputs_storage[0], inputs_storage[1] # simulate states and sensitivities in one forward pass output_storage[0][0], output_storage[1][0] = self._simulate(y0, theta) def infer_shape(self, fgraph, node, input_shapes): s_y0, s_theta = input_shapes output_shapes = [(self.n_times, self.n_states), (self.n_times, self.n_states, self.n_p)] return output_shapes def grad(self, inputs, output_grads): _log.debug("grad w.r.t. inputs {}".format(hash(tuple(inputs)))) # fetch symbolic sensitivity output node from cache ihash = hash(tuple(inputs)) if ihash in self._output_sensitivities: sens = self._output_sensitivities[ihash] else: _log.debug("No cached sensitivities found!") _, sens = self.__call__(*inputs, return_sens=True) ograds = output_grads[0] # for each parameter, multiply sensitivities with the output gradient and sum the result # sens is (n_times, n_states, n_p) # ograds is (n_times, n_states) grads = [tt.sum(sens[:, :, p] * ograds) for p in range(self.n_p)] # return separate gradient tensors for y0 and theta inputs result = tt.stack(grads[:self.n_states]), tt.stack( grads[self.n_states:]) return result
positive_set_ratio = inputParamsConfigAll['positive_set_ratio'] dropout = inputParamsConfigAll['dropout'] nonlinearityToUse = inputParamsConfigAll['nonlinearityToUse'] augmentationFlag = inputParamsConfigAll['augmentationFlag'] if nonlinearityToUse == 'relu': nonLinearity = lasagne.nonlinearities.rectify elif nonlinearityToUse == 'tanh': nonLinearity = lasagne.nonlinearities.tanh elif nonlinearityToUse == 'sigmoid': nonLinearity = lasagne.nonlinearities.sigmoid else: raise Exception( 'nonlinearityToUse: Unsupported nonlinearity type has been selected for the network, retry with a supported one!') dtensor5 = T.TensorType('float32', (False,) * 5) input_var = dtensor5('inputs') target_var = T.ivector('targets') inputParamsNetwork = dict(shape=input_shape,dropout=float(dropout), nonLinearity=nonLinearity) ############################## ############################## # And load them again later on like this: with np.load(pathSavedNetwork) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] #Reshape the FC layer of saved CNN into FCN form #param_values[4] = param_values[4].reshape((64,32,7,7,5)) W4_new = np.zeros((64,32,7,7,4)).astype('float32') for i in range(0,param_values[4].shape[1]): #weights for each node in FC layer form the columns
def main(): print("Building network ...") # Note in Rocktaschel's paper he first used a linear layer to transform wordvector # into vector of size K_HIDDEN. I'm assuming that this is equivalent to update W. # Input layer for premise input_var_type = T.TensorType('int32', [False] * 2) var_name = "input" input_var_prem = input_var_type(var_name) input_var_hypo = input_var_type(var_name) l_in_prem = lasagne.layers.InputLayer(shape=(None, MAX_LENGTH_PREM), input_var=input_var_prem) # Mask layer for premise l_mask_prem = lasagne.layers.InputLayer(shape=(None, MAX_LENGTH_PREM)) # Input layer for hypothesis l_in_hypo = lasagne.layers.InputLayer(shape=(None, MAX_LENGTH_HYPO), input_var=input_var_hypo) # Mask layer for hypothesis l_mask_hypo = lasagne.layers.InputLayer(shape=(None, MAX_LENGTH_HYPO)) # Word embedding layers l_in_prem_hypo = lasagne.layers.ConcatLayer([l_in_prem, l_in_hypo], axis=1) l_in_embedding = lasagne.layers.EmbeddingLayer(l_in_prem_hypo, VOCAB_SIZE, WORD_VECTOR_SIZE, W=word_vector_init, name='EmbeddingLayer') l_in_embedding_dropout = lasagne.layers.DropoutLayer(l_in_embedding, p=DROPOUT_RATE, rescale=True) l_in_prem_embedding = lasagne.layers.SliceLayer(l_in_embedding_dropout, slice(0, MAX_LENGTH_PREM), axis=1) l_in_hypo_embedding = lasagne.layers.SliceLayer( l_in_embedding, slice(MAX_LENGTH_PREM, MAX_LENGTH_PREM + MAX_LENGTH_HYPO), axis=1) # LSTM layer for premise l_lstm_prem = lasagne.layers.LSTMLayer_withCellOut( l_in_prem_embedding, K_HIDDEN, peepholes=False, grad_clipping=GRAD_CLIP, nonlinearity=lasagne.nonlinearities.tanh, mask_input=l_mask_prem, only_return_final=False) # The slicelayer extracts the cell output of the premise sentence l_lstm_prem_out = lasagne.layers.SliceLayer(l_lstm_prem, -1, axis=1) # LSTM layer for hypothesis # LSTM for premise and LSTM for hypothesis have different parameters l_lstm_hypo = lasagne.layers.LSTMLayer( l_in_hypo_embedding, K_HIDDEN, peepholes=False, grad_clipping=GRAD_CLIP, nonlinearity=lasagne.nonlinearities.tanh, cell_init=l_lstm_prem_out, mask_input=l_mask_hypo) # Isolate the last hidden unit output l_hypo_out = lasagne.layers.SliceLayer(l_lstm_hypo, -1, axis=1) # Attention layer l_attention = lasagne.layers.AttentionLayer([l_lstm_prem, l_lstm_hypo], K_HIDDEN, mask_input=l_mask_prem) l_attention_dropout = lasagne.layers.DropoutLayer(l_attention, p=DROPOUT_RATE, rescale=True) # A softmax layer create probability distribution of the prediction l_out = lasagne.layers.DenseLayer( l_attention_dropout, num_units=NUM_LABELS, W=lasagne.init.Normal(), nonlinearity=lasagne.nonlinearities.softmax) # The output of the net network_output_train = lasagne.layers.get_output(l_out, deterministic=False) network_output_test = lasagne.layers.get_output(l_out, deterministic=True) # Theano tensor for the targets target_values = T.ivector('target_output') # The loss function is calculated as the mean of the cross-entropy cost = lasagne.objectives.categorical_crossentropy(network_output_train, target_values).mean() from lasagne.regularization import l2, regularize_layer_params l2_penalty = regularize_layer_params(l_out, l2) * REGU cost = cost + l2_penalty # Retrieve all parameters from the network all_params = lasagne.layers.get_all_params(l_out) # Compute ADAM updates for training print("Computing updates ...") # updates = lasagne.updates.adam(cost, all_params, learning_rate=LEARNING_RATE, beta1=0.9, beta2=0.999, epsilon=1e-08) updates = lasagne.updates.adam(cost, all_params, masks=[('EmbeddingLayer.W', embedding_w_mask)], learning_rate=LEARNING_RATE, beta1=0.9, beta2=0.999, epsilon=1e-08) """ # Test test_prediction = lasagne.layers.get_output(l_out, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy(test_prediction, target_values).mean() test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) """ # Theano functions for training and computing cost train_acc = T.mean(T.eq(T.argmax(network_output_test, axis=1), target_values), dtype=theano.config.floatX) print("Compiling functions ...") train = theano.function([ l_in_prem.input_var, l_mask_prem.input_var, l_in_hypo.input_var, l_mask_hypo.input_var, target_values ], [cost, train_acc], updates=updates, allow_input_downcast=True) # Theano function computing the validation loss and accuracy val_acc = T.mean(T.eq(T.argmax(network_output_test, axis=1), target_values), dtype=theano.config.floatX) validate = theano.function([ l_in_prem.input_var, l_mask_prem.input_var, l_in_hypo.input_var, l_mask_hypo.input_var, target_values ], [cost, val_acc], allow_input_downcast=True) print("Training ...") import sys sys.stdout.flush() try: for epoch in range(NUM_EPOCHS): n = 0 avg_cost = 0.0 count = 0 sub_epoch = 0 train_acc = 0 while n < TRAIN_SIZE: X_prem, X_prem_mask, X_hypo, X_hypo_mask, y = get_batch_data( n, data_train) """ print(X_prem.shape) print(X_prem_mask.shape) print(X_hypo.shape) print(X_hypo_mask.shape) """ err, acc = train(X_prem, X_prem_mask, X_hypo, X_hypo_mask, y) avg_cost += err train_acc += acc n += BATCH_SIZE count += 1 if (n / BATCH_SIZE) % (TRAIN_SIZE / BATCH_SIZE / 5) == 0: sub_epoch += 1 avg_cost /= count print( "Sub epoch {} average loss = {}, accuracy = {}".format( sub_epoch, avg_cost, train_acc / count * 100)) avg_cost = 0 count = 0 train_acc = 0 # Calculate validation accuracy m = 0 val_err = 0 val_acc = 0 val_batches = 0 while m < VAL_SIZE: X_prem, X_prem_mask, X_hypo, X_hypo_mask, y = get_batch_data( m, data_val) err, acc = validate(X_prem, X_prem_mask, X_hypo, X_hypo_mask, y) val_err += err val_acc += acc val_batches += 1 m += BATCH_SIZE print(" validation loss:\t\t{:.6f}".format(val_err / val_batches)) print(" validation accuracy:\t\t{:.2f} %".format( val_acc / val_batches * 100)) sys.stdout.flush() except KeyboardInterrupt: pass
#!/usr/bin/env python # -*- coding: utf-8 -*- import theano from theano import tensor as T #tensor3 =T.Tensortype(broadcastable=(False, False, False),dtype='float32') #x =tensor3() dtype = 'float32' ndim = 1 broadcast = (False, ) * ndim name = None x = T.TensorType(dtype, broadcast)(name)
def CreateLungInteriorMaskFCN(inputParamsConfigLocal, inputParamsLungInteriorMaskFCN): # pathSavedNetwork = '/home/apezeshk/Codes/DeepMed/models/cnn_36368_20160921114711.npz' # pathSavedSamples = '/home/apezeshk/Codes/DeepMed/models/cnn_36368_20160921114711_samples.npz' #currentCaseName = 'p0012_20000101_s3000561.npy' currentCaseName = inputParamsLungInteriorMaskFCN['currentCaseName'] #sth like p0012_20000101_s3000561.npy #input_3D_npy = inputParamsLungInteriorMaskFCN['input_3D_npy'] masterFolderLidc = inputParamsLungInteriorMaskFCN['masterFolderLidc'] masterFolderLungInterior = inputParamsLungInteriorMaskFCN['masterFolderLungInterior'] cutPointFlag = inputParamsLungInteriorMaskFCN['cutPointFlag'] z_depth = inputParamsLungInteriorMaskFCN['z_depth'] fcnLayerFilterSize = inputParamsLungInteriorMaskFCN['fcnLayerFilterSize'] #tagNoduleMaskFlag = inputParamsLungInteriorMaskFCN['tagNoduleMaskFlag'] #we don't need to tag anything for lung interior #remapFlag = inputParamsLungInteriorMaskFCN['remapFlag'] # Further below where the fully connected layer filter is being constructed, the # way it is defined expects dimensions 1,2 of fcnLayerFilterSize to be odd, and dimension 3 even; # If you have to pass fcnLayerFilterSize that doesn't fit this, then you should write a separate definition for # how that filter is defined. Idea being that ideally you want to define a delta function to convolve, but when # that is not possible due to even dimension, you will have to think of something else! if (np.mod(fcnLayerFilterSize[0], 2) != 1) or (np.mod(fcnLayerFilterSize[1], 2) != 1) or (np.mod(fcnLayerFilterSize[2], 2) != 0): raise ValueError('MaskFCN>>CreateNoduleMaskFCN>>fcnLayerFilterSize: expected dimensions 1,2 to be odd, dimension 3 to be even!') # input_3D_npy = '/diskStation/LIDC/LIDC_NUMPY_3d' # masterFolderLidc = '/raida/apezeshk/lung_dicom_dir' ######################## ######Input Params###### # inputParamsConfigLocal = {} # inputParamsConfigLocal['input_shape'] = '36, 36, 8' # inputParamsConfigLocal['learning_rate'] = '0.05' # inputParamsConfigLocal['momentum'] = '0.9' # inputParamsConfigLocal['num_epochs'] = '1' # inputParamsConfigLocal['batch_size'] = '1' # inputParamsConfigLocal['data_path'] = '/diskStation/LIDC/36368/' # inputParamsConfigLocal['train_set_size'] = '60000' # inputParamsConfigLocal['test_set_size'] = '500' # inputParamsConfigLocal['positive_set_ratio'] = '0.3' # inputParamsConfigLocal['dropout'] = '0.1' # inputParamsConfigLocal['nonlinearityToUse'] = 'relu' # inputParamsConfigLocal['numberOfLayers'] = 3 # inputParamsConfigLocal['augmentationFlag'] = 1 # inputParamsConfigLocal['weightInitToUse'] ='He' #weight initialization; either 'normal' or 'He' (for HeNormal) # inputParamsConfigLocal['lrDecayFlag'] = 1 #1 for using learning rate decay, 0 for constant learning rate throughout training # inputParamsConfigLocal['biasInitVal'] = 0.0 #1 for using learning rate decay, 0 for constant learning rate throughout training inputParamsConfigAll = inputParamsConfigLocal input_shape = inputParamsConfigAll['input_shape'] #learning_rate = inputParamsConfigAll['learning_rate'] #momentum = inputParamsConfigAll['momentum'] #num_epochs = inputParamsConfigAll['num_epochs'] #batch_size = inputParamsConfigAll['batch_size'] #data_path = inputParamsConfigAll['data_path'] #train_set_size = inputParamsConfigAll['train_set_size'] #test_set_size = inputParamsConfigAll['test_set_size'] #positive_set_ratio = inputParamsConfigAll['positive_set_ratio'] dropout = inputParamsConfigAll['dropout'] #nonlinearityToUse = inputParamsConfigAll['nonlinearityToUse'] #augmentationFlag = inputParamsConfigAll['augmentationFlag'] numberOfLayers = inputParamsConfigAll['numberOfLayers'] biasInitVal = inputParamsConfigAll['biasInitVal'] weight_init = lasagne.init.Normal() #we now use He, but since everything is being loaded later this is ok!! biasInit = lasagne.init.Constant(biasInitVal) #for relu use biasInit=1 s.t. inputs to relu are positive in beginning nonLinearity = lasagne.nonlinearities.linear #use linear since u just want propagation of mask thru model inputParamsNetwork = dict(n_layer=numberOfLayers, shape=input_shape,dropout=float(dropout), nonLinearity=nonLinearity, biasInit = biasInit) dtensor5 = T.TensorType('float32', (False,) * 5) input_var = dtensor5('inputs') network_fcn_mask = Build_3dfcn_mask(weight_init, inputParamsNetwork, fcnLayerFilterSize, input_var) param_values_fcn_default = lasagne.layers.get_all_param_values(network_fcn_mask) #just so to get the fully connected dimension ######Input Params###### ######################## #with np.load(pathSavedNetwork) as f: # param_values_fullnetwork = [f['arr_%d' % i] for i in range(len(f.files))] W0 = np.ones((1,1,1,1,1)).astype('float32') b0 = np.zeros((1,)).astype('float32') W2 = np.ones((1,1,1,1,1)).astype('float32') b2 = np.zeros((1,)).astype('float32') if numberOfLayers == 2: W4 = np.zeros(np.shape(param_values_fcn_default[4])[2:]).astype('float32') #get the filter shape of first fully connected layer in original network current_filt_shape = W4.shape W4[int(np.floor(current_filt_shape[0]/2.0)), int(np.floor(current_filt_shape[1]/2.0)), int(np.floor(current_filt_shape[2]/2.0)-1)] = 1 W4[int(np.floor(current_filt_shape[0]/2.0)), int(np.floor(current_filt_shape[1]/2.0)), int(np.floor(current_filt_shape[2]/2.0))] = 1 W4 = W4 * 0.5 #this is so that the output range will not change (since instead of delta fn, 2 entries are equal to 1) W4 = np.reshape(W4, (1,1,current_filt_shape[0],current_filt_shape[1],current_filt_shape[2])) #make it 5-tuple b4 = np.zeros((1,)).astype('float32') W6 = np.ones((1,1,1,1,1)).astype('float32') b6 = np.zeros((1,)).astype('float32') param_values_mask = [] param_values_mask.extend([W0, b0, W2, b2, W4, b4, W6, b6]) elif numberOfLayers == 3: W4 = np.ones((1,1,1,1,1)).astype('float32') b4 = np.zeros((1,)).astype('float32') W6 = np.zeros(np.shape(param_values_fcn_default[6])[2:]).astype('float32') #get the filter shape of first fully connected layer in original network current_filt_shape = W6.shape # When fully connected layer has even size in z direction, e.g. (9,9,4), we can't have a delta function as filter # So using a filter with same size, with two 0.5s in it in 2nd and 3rd indices as next best thing! W6[int(np.floor(current_filt_shape[0]/2.0)), int(np.floor(current_filt_shape[1]/2.0)), int(np.floor(current_filt_shape[2]/2.0)-1)] = 1 W6[int(np.floor(current_filt_shape[0]/2.0)), int(np.floor(current_filt_shape[1]/2.0)), int(np.floor(current_filt_shape[2]/2.0))] = 1 W6 = W6 * 0.5 #this is so that the output range will not change (since instead of delta fn, 2 entries are equal to 1) W6 = np.reshape(W6, (1,1,current_filt_shape[0],current_filt_shape[1],current_filt_shape[2])) #make it 5-tuple b6 = np.zeros((1,)).astype('float32') W8 = np.ones((1,1,1,1,1)).astype('float32') b8 = np.zeros((1,)).astype('float32') param_values_mask = [] param_values_mask.extend([W0, b0, W2, b2, W4, b4, W6, b6, W8, b8]) lasagne.layers.set_all_param_values(network_fcn_mask, param_values_mask) #load the model with the weights/biases mask_prediction = lasagne.layers.get_output(network_fcn_mask, deterministic=True) val_fn = theano.function([input_var], [mask_prediction]) # ,mode='DebugMode') ################################################################################ ######Now load the lung interior mask, and shove it into the network ################################################################################ #full_volume_path=os.path.join(input_3D_npy, currentCaseName) full_mask_path = os.path.join(masterFolderLungInterior, Path_create(currentCaseName)[:-4]) mat_name = 'lungInterior_' + currentCaseName[:-4] + '.mat' if os.path.isfile(os.path.join(full_mask_path, mat_name)): lungInteriorData = sio.loadmat(os.path.join(full_mask_path, mat_name)) full_mask = lungInteriorData['currentFullVolBin'] #this returns uint8 else: #read the corresponding unique mask, s.t. you will have proper size fake mask uniqueMask_path = os.path.join(masterFolderLidc, Path_create(currentCaseName)[:-4]) tmp_name = 'uniqueStats_' + currentCaseName[:-4] + '.mat' uniqueStatsData = sio.loadmat(os.path.join(uniqueMask_path, tmp_name)) unique_mask = uniqueStatsData['allMaxRadiologistMsk'] #this returns uint8 full_mask = np.zeros(np.shape(unique_mask)) #MAKE SURE THE TYPE FOR LUNG INTERIOR MASK IS RIGHT IN BELOW; DO U HAVE TO CONVERT TO INT16 THEN FLOAT32?!! currentMask = full_mask.astype('float32') # chopVolumeFlag = 1 # cutPointFlag = 1 # z_depth = 8 sub_vol_one = [] currentMask = currentMask.reshape((1, 1, 512, 512, currentMask.shape[2])) if cutPointFlag == 1: xCutPoints = [0, 512] yCutPoints = [0, 512] tmpFlag = 0 zCutPoints = [0] zStep = 80 while tmpFlag != 7321: # to make the loop end, set tmpFlag=7321; otherwise hold prev slice number in it currentZCut = tmpFlag + zStep if currentZCut > currentMask.shape[4]: currentZCut = currentMask.shape[4] zCutPoints.append(currentZCut) tmpFlag = 7321 else: tmpFlag = currentZCut - z_depth # this is amount of overlap between consecutive chops in z direction zCutPoints.append(currentZCut) zCutPoints.append(tmpFlag) # z_size=[] # x_size=[] # y_size=[] # first_cube_flag=0 # vol_scores_currentVol = np.empty((0, 2)) # score_mat=np.zeros(()) # vol_labels_currentVol = [] #this part is for the cases that last two slices should be changed if you we wanna to FCN( they got small z # we take from one cube by 20 and add it to another cube if (zCutPoints[-1]-zCutPoints[-2])<=16: zCutPoints[-3]=zCutPoints[-3]-20 zCutPoints[-2] = zCutPoints[-2] - 20 for i in range(0, len(xCutPoints) / 2): for j in range(0, len(yCutPoints) / 2): for k in range(0, len(zCutPoints) / 2): xStart = xCutPoints[2 * i] xEnd = xCutPoints[2 * i + 1] yStart = yCutPoints[2 * j] yEnd = yCutPoints[2 * j + 1] zStart = zCutPoints[2 * k] zEnd = zCutPoints[2 * k + 1] print(xStart, xEnd - 1, yStart, yEnd - 1, zStart, zEnd - 1) asd = currentMask[0, 0, xStart:xEnd, yStart:yEnd, zStart:zEnd] asd = asd.reshape((1, 1, asd.shape[0], asd.shape[1], asd.shape[2])) #put subvolume in 5D form for input to FCN test_pred_full_mask = val_fn(asd) test_pred_full_mask = test_pred_full_mask[0] # test_pred_full_mask_softmax0 = np.exp(test_pred_full_mask[0, 0, :, :, :]) / ( # np.exp(test_pred_full_mask[0, 0, :, :, :]) + np.exp(test_pred_full_mask[0, 1, :, :, :])) # test_pred_full_mask_softmax1 = np.exp(test_pred_full_mask[0, 1, :, :, :]) / ( # np.exp(test_pred_full_mask[0, 0, :, :, :]) + np.exp(test_pred_full_mask[0, 1, :, :, :])) #tmp_sub_vol=test_pred_full_mask_softmax1 tmp_sub_vol = test_pred_full_mask.squeeze() #go from e.g. (1,1,120,120,25) to (120,120,25) if xStart==xCutPoints[0] and yStart==yCutPoints[0]: #NOTE: when u split the volume N times, the difference in size due to 0 padding in last layer # is repeated N times also! So whereas if u passed the entire volume with first fully connected # layer (converted to fully convolutional) of size (9,9,4) you would get -4+1=-3 as many slices, # if you split the volume in 2 and pass each subvolume, you get another round of -3 slices in # the end!!! try:#This part adds the sub volumes back to back and overwrites the bad slice with the correct one sub_vol_one=np.concatenate((sub_vol_one[:,:,:-2],tmp_sub_vol[:,:,3:]),axis=2) #I set the concatination margin to 2 since we have a one max pool for Z and last 2 slices are not correctly convolved except: sub_vol_one=tmp_sub_vol sub_vol_one_fin = (sub_vol_one>0.0).astype('int') #convert to binary; it originally has 0.5 values due to z direction elongation in fully connected layer filter return sub_vol_one_fin
# the same with above. broadcastable pattern indicats dimension of the variable. # True means the length of the axis for that dimension is 1. # empty list is a special case to mean scalar. # pattern interpretation # [] scalar # [True] 1D scalar (vector of length 1) # [True, True] 2D scalar (1x1 matrix) # [False] vector # [False, False] matrix # [False] * n nD tensor # [True, False] row (1xN matrix) # [False, True] column (Mx1 matrix) # [False, True, False] A Mx1xP tensor (a) # [True, False, False] A 1xNxP tensor (b) # [False, False, False] A MxNxP tensor (pattern of a + b) x = T.TensorType(dtype='int32', broadcastable=())('myvar') # config dependent float type (config.floatX is float 64 by default on x86_64) x = T.scalar(name='x', dtype=T.config.floatX) report(x) # 1-dimensional vector (ndarray). v = T.vector(dtype=T.config.floatX, name='v') report(v) # 2-dimensional ndarray in which the number of rows is guaranteed to be 1. v = T.row(name=None, dtype=T.config.floatX) report(v) # 2-dimensional ndarray in which the number of columns is guaranteed to be 1. v = T.col(name=None, dtype=T.config.floatX)
def make_node(self, *inputs): inputs = [tt.as_tensor_variable(i) for i in inputs] outputs = [tt.TensorType(inputs[0].dtype, (False, False))()] return gof.Apply(self, inputs, outputs)
from theano import tensor as T from theano.sandbox.neighbours import images2neibs X = T.TensorType(broadcastable=(False, False, False, False), dtype='float32')() Y = images2neibs(X, (2, 2)) W = T.matrix() Z = T.dot(Y, W) cost = Z.sum() T.grad(cost, W)
def createNetwork(self, networkName, folderName, cnnLayers, kernel_Shapes, intermediate_ConnectedLayers, n_classes, sampleSize_Train, sampleSize_Test, batch_Size, applyBatchNorm, numberEpochToApplyBatchNorm, activationType, dropout_Rates, pooling_Params, weights_Initialization_CNN, weights_Initialization_FCN, weightsFolderName, weightsTrainedIdx, softmax_Temp): # ============= Model Parameters Passed as arguments ================ # Assign parameters: self.networkName = networkName self.folderName = folderName self.cnnLayers = cnnLayers self.n_classes = n_classes self.kernel_Shapes = kernel_Shapes self.intermediate_ConnectedLayers = intermediate_ConnectedLayers self.pooling_scales = pooling_Params self.dropout_Rates = dropout_Rates self.activationType = activationType self.weight_Initialization_CNN = weights_Initialization_CNN self.weight_Initialization_FCN = weights_Initialization_FCN self.weightsFolderName = weightsFolderName self.weightsTrainedIdx = weightsTrainedIdx self.batch_Size = batch_Size self.sampleSize_Train = sampleSize_Train self.sampleSize_Test = sampleSize_Test self.applyBatchNorm = applyBatchNorm self.numberEpochToApplyBatchNorm = numberEpochToApplyBatchNorm self.softmax_Temp = softmax_Temp # Compute the CNN receptive field stride = 1 self.receptiveField = computeReceptiveField(self.kernel_Shapes, stride) # --- Size of Image samples --- self.sampleSize_Train = sampleSize_Train self.sampleSize_Test = sampleSize_Test ## --- Batch Size --- self.batch_Size = batch_Size # ======== Calculated Attributes ========= self.centralVoxelsTrain = getCentralVoxels(self.sampleSize_Train, self.receptiveField) self.centralVoxelsTest = getCentralVoxels(self.sampleSize_Test, self.receptiveField) #============================== rng = numpy.random.RandomState(23455) # Transfer to LIVIA NET self.sampleSize_Train = sampleSize_Train self.sampleSize_Test = sampleSize_Test # --------- Now we build the model -------- # print("...[STATUS]: Building the Network model...") # Define the symbolic variables used as input of the CNN # start-snippet-1 # Define tensor5 tensor5 = T.TensorType(dtype='float32', broadcastable=(False, False, False, False, False)) self.inputNetwork_Train = tensor5() self.inputNetwork_Test = tensor5() self.inputNetwork_Train_Bottom = tensor5() self.inputNetwork_Test_Bottom = tensor5() # Define input shapes to the netwrok inputSampleShape_Train = (self.batch_Size, 1, self.sampleSize_Train[0], self.sampleSize_Train[1], self.sampleSize_Train[2]) inputSampleShape_Test = (self.batch_Size, 1, self.sampleSize_Test[0], self.sampleSize_Test[1], self.sampleSize_Test[2]) print(" - Shape of input subvolume (Training): {}".format( inputSampleShape_Train)) print(" - Shape of input subvolume (Testing): {}".format( inputSampleShape_Test)) inputSample_Train = self.inputNetwork_Train inputSample_Test = self.inputNetwork_Test inputSample_Train_Bottom = self.inputNetwork_Train_Bottom inputSample_Test_Bottom = self.inputNetwork_Test_Bottom # TODO change cnnLayers name by networkLayers self.generateNetworkLayers(cnnLayers, kernel_Shapes, self.pooling_scales, inputSampleShape_Train, inputSampleShape_Test, inputSample_Train, inputSample_Train_Bottom, inputSample_Test, inputSample_Test_Bottom, intermediate_ConnectedLayers)
def train_gan( separate_funcs=False, D_training_repeats=1, G_learning_rate_max=0.0010, D_learning_rate_max=0.0010, G_smoothing=0.999, adam_beta1=0.0, adam_beta2=0.99, adam_epsilon=1e-8, minibatch_default=16, minibatch_overrides={}, rampup_kimg=40 / speed_factor, rampdown_kimg=0, lod_initial_resolution=4, lod_training_kimg=400 / speed_factor, lod_transition_kimg=400 / speed_factor, #lod_training_kimg = 40, #lod_transition_kimg = 40, total_kimg=10000 / speed_factor, dequantize_reals=False, gdrop_beta=0.9, gdrop_lim=0.5, gdrop_coef=0.2, gdrop_exp=2.0, drange_net=[-1, 1], drange_viz=[-1, 1], image_grid_size=None, #tick_kimg_default = 1, tick_kimg_default=50 / speed_factor, tick_kimg_overrides={ 32: 20, 64: 10, 128: 10, 256: 5, 512: 2, 1024: 1 }, image_snapshot_ticks=4, network_snapshot_ticks=40, image_grid_type='default', #resume_network_pkl = '006-celeb128-progressive-growing/network-snapshot-002009.pkl', resume_network_pkl=None, resume_kimg=0, resume_time=0.0): # Load dataset and build networks. training_set, drange_orig = load_dataset() # training_set是dataset模块解析h5之后的对象, # drange_orig 为training_set.get_dynamic_range() if resume_network_pkl: print 'Resuming', resume_network_pkl G, D, _ = misc.load_pkl( os.path.join(config.result_dir, resume_network_pkl)) else: G = network.Network(num_channels=training_set.shape[1], resolution=training_set.shape[2], label_size=training_set.labels.shape[1], **config.G) D = network.Network(num_channels=training_set.shape[1], resolution=training_set.shape[2], label_size=training_set.labels.shape[1], **config.D) Gs = G.create_temporally_smoothed_version(beta=G_smoothing, explicit_updates=True) # G,D对象可以由misc解析pkl之后生成,也可以由network模块构造 misc.print_network_topology_info(G.output_layers) misc.print_network_topology_info(D.output_layers) # Setup snapshot image grid. # 设置中途输出图片的格式 if image_grid_type == 'default': if image_grid_size is None: w, h = G.output_shape[3], G.output_shape[2] image_grid_size = np.clip(1920 / w, 3, 16), np.clip(1080 / h, 2, 16) example_real_images, snapshot_fake_labels = training_set.get_random_minibatch( np.prod(image_grid_size), labels=True) snapshot_fake_latents = random_latents(np.prod(image_grid_size), G.input_shape) else: raise ValueError('Invalid image_grid_type', image_grid_type) # Theano input variables and compile generation func. print 'Setting up Theano...' real_images_var = T.TensorType('float32', [False] * len(D.input_shape))('real_images_var') # <class 'theano.tensor.var.TensorVariable'> # print type(real_images_var),real_images_var real_labels_var = T.TensorType( 'float32', [False] * len(training_set.labels.shape))('real_labels_var') fake_latents_var = T.TensorType('float32', [False] * len(G.input_shape))('fake_latents_var') fake_labels_var = T.TensorType( 'float32', [False] * len(training_set.labels.shape))('fake_labels_var') # 带有_var的均为输入张量 G_lrate = theano.shared(np.float32(0.0)) D_lrate = theano.shared(np.float32(0.0)) # share语法就是用来设定默认值的,返回复制的对象 gen_fn = theano.function([fake_latents_var, fake_labels_var], Gs.eval_nd(fake_latents_var, fake_labels_var, ignore_unused_inputs=True), on_unused_input='ignore') # gen_fn 是一个函数,输入为:[fake_latents_var, fake_labels_var], # 输出位:Gs.eval_nd(fake_latents_var, fake_labels_var, ignore_unused_inputs=True), ''' def function(inputs, outputs=None, mode=None, updates=None, givens=None, no_default_updates=False, accept_inplace=False, name=None, rebuild_strict=True, allow_input_downcast=None, profile=None, on_unused_input=None) ''' #生成函数 # Misc init. #读入当前分辨率 resolution_log2 = int(np.round(np.log2(G.output_shape[2]))) #lod 精细度 initial_lod = max( resolution_log2 - int(np.round(np.log2(lod_initial_resolution))), 0) cur_lod = 0.0 min_lod, max_lod = -1.0, -2.0 fake_score_avg = 0.0 # Save example images. snapshot_fake_images = gen_fn(snapshot_fake_latents, snapshot_fake_labels) result_subdir = misc.create_result_subdir(config.result_dir, config.run_desc) misc.save_image_grid(example_real_images, os.path.join(result_subdir, 'reals.png'), drange=drange_orig, grid_size=image_grid_size) misc.save_image_grid(snapshot_fake_images, os.path.join(result_subdir, 'fakes%06d.png' % 0), drange=drange_viz, grid_size=image_grid_size) # Training loop. # 这里才是主训练入口 # 注意在训练过程中不会跳出最外层while循环,因此更换分辨率等操作必然在while循环里 #现有图片数 cur_nimg = int(resume_kimg * 1000) cur_tick = 0 tick_start_nimg = cur_nimg tick_start_time = time.time() tick_train_out = [] train_start_time = tick_start_time - resume_time while cur_nimg < total_kimg * 1000: # Calculate current LOD. #计算当前精细度 cur_lod = initial_lod if lod_training_kimg or lod_transition_kimg: tlod = (cur_nimg / (1000.0 / speed_factor)) / (lod_training_kimg + lod_transition_kimg) cur_lod -= np.floor(tlod) if lod_transition_kimg: cur_lod -= max( 1.0 + (np.fmod(tlod, 1.0) - 1.0) * (lod_training_kimg + lod_transition_kimg) / lod_transition_kimg, 0.0) cur_lod = max(cur_lod, 0.0) # Look up resolution-dependent parameters. cur_res = 2**(resolution_log2 - int(np.floor(cur_lod))) # 当前分辨率 minibatch_size = minibatch_overrides.get(cur_res, minibatch_default) tick_duration_kimg = tick_kimg_overrides.get(cur_res, tick_kimg_default) # Update network config. # 更新网络结构 lrate_coef = misc.rampup(cur_nimg / 1000.0, rampup_kimg) lrate_coef *= misc.rampdown_linear(cur_nimg / 1000.0, total_kimg, rampdown_kimg) G_lrate.set_value(np.float32(lrate_coef * G_learning_rate_max)) D_lrate.set_value(np.float32(lrate_coef * D_learning_rate_max)) if hasattr(G, 'cur_lod'): G.cur_lod.set_value(np.float32(cur_lod)) if hasattr(D, 'cur_lod'): D.cur_lod.set_value(np.float32(cur_lod)) # Setup training func for current LOD. new_min_lod, new_max_lod = int(np.floor(cur_lod)), int( np.ceil(cur_lod)) #print " cur_lod%f\n min_lod %f\n new_min_lod %f\n max_lod %f\n new_max_lod %f\n"%(cur_lod,min_lod,new_min_lod,max_lod,new_max_lod) if min_lod != new_min_lod or max_lod != new_max_lod: print 'Compiling training funcs...' min_lod, max_lod = new_min_lod, new_max_lod # Pre-process reals. real_images_expr = real_images_var if dequantize_reals: rnd = theano.sandbox.rng_mrg.MRG_RandomStreams( lasagne.random.get_rng().randint(1, 2147462579)) epsilon_noise = rnd.uniform(size=real_images_expr.shape, low=-0.5, high=0.5, dtype='float32') real_images_expr = T.cast( real_images_expr, 'float32' ) + epsilon_noise # match original implementation of Improved Wasserstein real_images_expr = misc.adjust_dynamic_range( real_images_expr, drange_orig, drange_net) if min_lod > 0: # compensate for shrink_based_on_lod real_images_expr = T.extra_ops.repeat(real_images_expr, 2**min_lod, axis=2) real_images_expr = T.extra_ops.repeat(real_images_expr, 2**min_lod, axis=3) # Optimize loss. G_loss, D_loss, real_scores_out, fake_scores_out = evaluate_loss( G, D, min_lod, max_lod, real_images_expr, real_labels_var, fake_latents_var, fake_labels_var, **config.loss) G_updates = adam(G_loss, G.trainable_params(), learning_rate=G_lrate, beta1=adam_beta1, beta2=adam_beta2, epsilon=adam_epsilon).items() D_updates = adam(D_loss, D.trainable_params(), learning_rate=D_lrate, beta1=adam_beta1, beta2=adam_beta2, epsilon=adam_epsilon).items() D_train_fn = theano.function([ real_images_var, real_labels_var, fake_latents_var, fake_labels_var ], [G_loss, D_loss, real_scores_out, fake_scores_out], updates=D_updates, on_unused_input='ignore') G_train_fn = theano.function([fake_latents_var, fake_labels_var], [], updates=G_updates + Gs.updates, on_unused_input='ignore') for idx in xrange(D_training_repeats): mb_reals, mb_labels = training_set.get_random_minibatch( minibatch_size, lod=cur_lod, shrink_based_on_lod=True, labels=True) mb_train_out = D_train_fn( mb_reals, mb_labels, random_latents(minibatch_size, G.input_shape), random_labels(minibatch_size, training_set)) cur_nimg += minibatch_size tick_train_out.append(mb_train_out) G_train_fn(random_latents(minibatch_size, G.input_shape), random_labels(minibatch_size, training_set)) # Fade in D noise if we're close to becoming unstable fake_score_cur = np.clip(np.mean(mb_train_out[1]), 0.0, 1.0) fake_score_avg = fake_score_avg * gdrop_beta + fake_score_cur * ( 1.0 - gdrop_beta) gdrop_strength = gdrop_coef * (max(fake_score_avg - gdrop_lim, 0.0)** gdrop_exp) if hasattr(D, 'gdrop_strength'): D.gdrop_strength.set_value(np.float32(gdrop_strength)) # Perform maintenance operations once per tick. if cur_nimg >= tick_start_nimg + tick_duration_kimg * 1000 or cur_nimg >= total_kimg * 1000: cur_tick += 1 cur_time = time.time() tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0 tick_start_nimg = cur_nimg tick_time = cur_time - tick_start_time tick_start_time = cur_time tick_train_avg = tuple( np.mean(np.concatenate([np.asarray(v).flatten() for v in vals])) for vals in zip(*tick_train_out)) tick_train_out = [] # Print progress. print 'tick %-5d kimg %-8.1f lod %-5.2f minibatch %-4d time %-12s sec/tick %-9.1f sec/kimg %-6.1f Dgdrop %-8.4f Gloss %-8.4f Dloss %-8.4f Dreal %-8.4f Dfake %-8.4f' % ( (cur_tick, cur_nimg / 1000.0, cur_lod, minibatch_size, misc.format_time(cur_time - train_start_time), tick_time, tick_time / tick_kimg, gdrop_strength) + tick_train_avg) # Visualize generated images. if cur_tick % image_snapshot_ticks == 0 or cur_nimg >= total_kimg * 1000: snapshot_fake_images = gen_fn(snapshot_fake_latents, snapshot_fake_labels) misc.save_image_grid(snapshot_fake_images, os.path.join( result_subdir, 'fakes%06d.png' % (cur_nimg / 1000)), drange=drange_viz, grid_size=image_grid_size) # Save network snapshot every N ticks. if cur_tick % network_snapshot_ticks == 0 or cur_nimg >= total_kimg * 1000: misc.save_pkl( (G, D, Gs), os.path.join( result_subdir, 'network-snapshot-%06d.pkl' % (cur_nimg / 1000))) # Write final results. misc.save_pkl((G, D, Gs), os.path.join(result_subdir, 'network-final.pkl')) training_set.close() print 'Done.' with open(os.path.join(result_subdir, '_training-done.txt'), 'wt'): pass
# set those directories to something meaningful in your environment data_dir = "/home/valor/workspace/DLCV_ProtFun/data/full/processed_single_64/1A0H" grid_file = "/home/valor/workspace/DLCV_ProtFun/data/full/processed_single_64/1A0H/grid.memmap" # visualize the original grid test_grid = np.memmap(grid_file, mode='r', dtype=floatX).reshape( (1, 1, 64, 64, 64)) log.debug(test_grid.shape) viewer = MoleculeView(data_dir=data_dir, data={"density": test_grid[0, 0]}, info={"name": "test"}) viewer.density3d() grid_side = test_grid.shape[3] # initialize the rotation layer input_grid = T.TensorType(floatX, (False, ) * 5)() input_layer = lasagne.layers.InputLayer(shape=(1, 1, grid_side, grid_side, grid_side), input_var=input_grid) rotate_layer = GridRotationLayer(incoming=input_layer, grid_side=grid_side, n_channels=1, interpolation='nearest') # create a small function to test the rotation layer func = theano.function(inputs=[input_grid], outputs=lasagne.layers.get_output(rotate_layer)) # show 10 different rotations of the test grid import time
def create_theano_functions(self, target_var, deterministic_training=False): if target_var is None: if hasattr(self.dataset, 'get_dummy_y'): log.info("Use dataset-supplied dummy y to determine " "shape and type of target variable") dummy_y = self.dataset.get_dummy_y() # tensor with as many dimensions as y target_type = T.TensorType(dtype=dummy_y.dtype, broadcastable=[False] * len(dummy_y.shape)) target_var = target_type() else: log.info( "Automatically determine size of target variable by example..." ) # get a dummy batch and determine target size # use test set since it is smaller # maybe memory is freed quicker # prevent reloading at this step? was_reloadable = self.dataset.reloadable self.dataset.reloadable = False test_set = self.dataset_provider.get_train_valid_test( self.dataset)['test'] self.dataset.reloadable = was_reloadable batches = self.iterator.get_batches(test_set, shuffle=False) dummy_batch = batches.next() dummy_y = dummy_batch[1] del test_set # tensor with as many dimensions as y target_type = T.TensorType(dtype=dummy_y.dtype, broadcastable=[False] * len(dummy_y.shape)) target_var = target_type() self.dataset.ensure_is_loaded() prediction = lasagne.layers.get_output( self.final_layer, deterministic=deterministic_training) # test as in during testing not as in "test set" test_prediction = lasagne.layers.get_output(self.final_layer, deterministic=True) # Loss function might need layers or not... try: loss = self.loss_expression(prediction, target_var).mean() test_loss = self.loss_expression(test_prediction, target_var).mean() except TypeError: loss = self.loss_expression(prediction, target_var, self.final_layer).mean() test_loss = self.loss_expression(test_prediction, target_var, self.final_layer).mean() # create parameter update expressions params = lasagne.layers.get_all_params(self.final_layer, trainable=True) updates = self.updates_expression(loss, params) if self.updates_modifier is not None: # put norm constraints on all layer, for now fixed to max kernel norm # 2 and max col norm 0.5 updates = self.updates_modifier.modify(updates, self.final_layer) input_var = lasagne.layers.get_all_layers( self.final_layer)[0].input_var # Store all parameters, including update params like adam params, # needed for resetting to best model after early stop # not sure why i am not only doing update params below # possibly because batch norm is not in update params? all_layer_params = lasagne.layers.get_all_params(self.final_layer) self.all_params = all_layer_params # now params from adam would still be missing... add them ... all_update_params = updates.keys() for param in all_update_params: if param not in self.all_params: self.all_params.append(param) self.train_func = theano.function([input_var, target_var], updates=updates) self.monitor_manager.create_theano_functions(input_var, target_var, test_prediction, test_loss)