def __init__(self, input_size, output_size): self.input_size = input_size self.output_size = output_size self.W = sharedX(value=np.zeros((input_size, output_size)), name='W', borrow=True) self.b = sharedX(value=np.zeros(output_size), name='b', borrow=True)
def __init__(self, input_size, output_size, name="Softmax"): self.input_size = input_size self.output_size = output_size self.name = name # Regression output weights and biases self.W = sharedX(value=np.zeros((self.input_size, self.output_size)), name=self.name+'_W') self.b = sharedX(value=np.zeros(output_size), name=self.name+'_b')
def __init__(self, dataset, batch_size, noisy_streamlines_sigma=None, seed=1234, use_data_augment=True, normalize_target=False, shuffle_streamlines=True, resample_streamlines=True, feed_previous_direction=False): """ Parameters ---------- dataset : :class:`TractographyDataset` Dataset from which to get the examples. batch_size : int Nb. of examples per batch. seed : int, optional Seed for the random generator when shuffling streamlines or adding noise to the streamlines. use_data_augment : bool If true, perform data augmentation by flipping streamlines. normalize_target : bool If true, targets will have a norm of one (usually used by the GruRegression model). shuffle_streamlines : bool Shuffle streamlines in the dataset between each epoch. resample_streamlines : bool Streamlines in a same batch will all have the same number of points. Should be always set to True for now (until the method _process_batch supports it). feed_previous_direction : bool Should the previous direction be appended to the input when making a prediction? """ self.dataset = dataset self.batch_size = batch_size self.use_augment_by_flipping = use_data_augment self.normalize_target = normalize_target self.noisy_streamlines_sigma = noisy_streamlines_sigma self.use_noisy_streamlines = self.noisy_streamlines_sigma is not None self.seed = seed self.rng = np.random.RandomState(self.seed) self.rng_noise = np.random.RandomState(self.seed+1) self.shuffle_streamlines = shuffle_streamlines self.resample_streamlines = resample_streamlines self.indices = np.arange(len(self.dataset)) self.feed_previous_direction = feed_previous_direction # Shared variables self._shared_batch_inputs = sharedX(np.ndarray((0, 0, 0))) self._shared_batch_targets = sharedX(np.ndarray((0, 0, 0))) self._shared_batch_mask = sharedX(np.ndarray((0, 0))) # Test value batch_inputs, batch_targets, batch_mask = self._next_batch(0) self.dataset.symb_inputs.tag.test_value = batch_inputs self.dataset.symb_mask.tag.test_value = batch_mask # Since this batch scheduler creates its own targets. if self.dataset.symb_targets is None: self.dataset.symb_targets = T.TensorVariable(type=T.TensorType("floatX", [False]*(batch_targets.ndim)), name=self.dataset.name+'_symb_targets') self.dataset.symb_targets.tag.test_value = batch_targets
def __init__(self, input_size, output_size, activation="identity", name="Dense"): self.input_size = input_size self.output_size = output_size self.name = name self.activation = activation self.activation_fct = factories.make_activation_function(self.activation) # Regression output weights and biases self.W = sharedX(value=np.zeros((self.input_size, self.output_size)), name=self.name+'_W') self.b = sharedX(value=np.zeros(output_size), name=self.name+'_b')
def test_fprop_faster(self): activation = "tanh" seed = 1234 repeat = 1000 layer = LayerLSTM(input_size=DATA['features_size'], hidden_size=DATA['hidden_size'], activation=activation) layer.initialize(initer.UniformInitializer(seed)) layer_fast = LayerLSTMFast(input_size=DATA['features_size'], hidden_size=DATA['hidden_size'], activation=activation) # Wi, Wo, Wf, Wm layer_fast.W.set_value( np.concatenate([ layer.Wi.get_value(), layer.Wo.get_value(), layer.Wf.get_value(), layer.Wm.get_value() ], axis=1)) layer_fast.U.set_value( np.concatenate([ layer.Ui.get_value(), layer.Uo.get_value(), layer.Uf.get_value(), layer.Um.get_value() ], axis=1)) input = T.matrix('input') input.tag.test_value = DATA['batch_one_step'] last_h = sharedX(DATA['state_h']) last_m = sharedX(DATA['state_m']) fprop = theano.function([input], layer.fprop(input, last_h, last_m)) fprop_faster = theano.function([input], layer_fast.fprop(input, last_h, last_m)) fprop_time = measure("h, m = fprop(DATA['batch_one_step'])", repeat) fprop_faster_time = measure( "h, m = fprop_faster(DATA['batch_one_step'])", repeat) print("fprop time: {:.2f} sec.", fprop_time) print("fprop faster time: {:.2f} sec.", fprop_faster_time) print("Speedup: {:.2f}x".format(fprop_time / fprop_faster_time)) for i in range(DATA['seq_len']): h1, m1 = fprop(DATA['batch'][:, i, :]) h2, m2 = fprop_faster(DATA['batch'][:, i, :]) assert_array_equal(h1, h2) assert_array_equal(m1, m2)
def __init__(self, input_size, output_size, activation="identity", name="DenseNormalized", eps=1e-5): self.input_size = input_size self.output_size = output_size self.name = name self.activation = activation self.activation_fct = factories.make_activation_function(self.activation) self.eps = eps # Regression output weights, biases and gains self.W = sharedX(value=np.zeros((self.input_size, self.output_size)), name=self.name+'_W') self.b = sharedX(value=np.zeros(output_size), name=self.name+'_b') self.g = sharedX(value=np.ones(output_size), name=self.name+'_g')
def __init__(self, input_size, output_size, normed=False, name="Regression", eps=1e-5): self.input_size = input_size self.output_size = output_size self.normed = normed self.name = name self.eps = eps # Regression output weights, biases and gains self.W = sharedX(value=np.zeros((self.input_size, self.output_size)), name=self.name+'_W') self.b = sharedX(value=np.zeros(output_size), name=self.name+'_b') self.g = sharedX(value=np.ones(output_size), name=self.name+'_g')
def __init__(self, eta=0.01, seed=1234): super(GradientNoise, self).__init__() self._updates = OrderedDict() self._seed = seed self._srng = RandomStreams(self._seed) # theano's normal distribution function takes the std (sigma_t) instead of the variance (sigma_t^2); # sqrt is therefore applied beforehand to the parameters ( sigma_t = sqrt(eta) / (1/t)^(gamme/2) ) self._eta = eta ** 0.5 self._gamma = 0.55 / 2 self.t = sharedX(1, name='gradient_noise_t') self.std = sharedX(self._eta / 1 ** self._gamma, name='gradient_noise_std')
def __init__(self, eta=0.01, seed=1234): super(GradientNoise, self).__init__() self._updates = OrderedDict() self._seed = seed self._srng = RandomStreams(self._seed) # theano's normal distribution function takes the std (sigma_t) instead of the variance (sigma_t^2); # sqrt is therefore applied beforehand to the parameters ( sigma_t = sqrt(eta) / (1/t)^(gamme/2) ) self._eta = eta**0.5 self._gamma = 0.55 / 2 self.t = sharedX(1, name='gradient_noise_t') self.std = sharedX(self._eta / 1**self._gamma, name='gradient_noise_std')
def test_sgd(): # Create simple Nd gaussian functions to optimize. These functions are # (perfectly) well-conditioned so it should take only one gradient step # to converge using 1/L, where L is the largest eigenvalue of the hessian. max_epoch = 2 for N in range(1, 5): center = np.arange(1, N+1)[None, :].astype(floatX) param = sharedX(np.zeros((1, N))) cost = T.sum(0.5*T.dot(T.dot((param-center), T.eye(N)), (param-center).T)) loss = DummyLossWithGradient(cost, param) trainer = Trainer(SGD(loss), DummyBatchScheduler()) trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch)) # Monitor the gradient of `loss` w.r.t. to `param`. gparam = tasks.MonitorVariable(loss.gradients[param]) trainer.append_task(gparam) trainer.train() # Since the problem is well-conditionned and we use an optimal gradient step 1/L, # two epochs should be enough for `param` to be around `center` and the gradients near 0. assert_array_almost_equal(param.get_value(), center) assert_array_almost_equal(gparam.value, 0.) # Create an Nd gaussian function to optimize. This function is not # well-conditioned and there exists no perfect gradient step to converge in # only one iteration. #cost = T.sum(N*0.5*T.dot(T.dot((param-center), np.diag(1./np.arange(1, N+1))), ((param-center).T))) max_epoch = 80 N = 4 center = 5*np.ones((1, N)).astype(floatX) param = sharedX(np.zeros((1, N))) cost = T.sum(0.5*T.dot(T.dot((param-center), np.diag(1./np.arange(1, N+1))), (param-center).T)) loss = DummyLossWithGradient(cost, param) trainer = Trainer(SGD(loss), DummyBatchScheduler()) trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch)) #trainer.append_task(tasks.PrintVariable("Loss param : {}", param)) #trainer.append_task(tasks.PrintVariable("Loss gradient: {}", loss.gradients[param])) # Monitor the gradient of `loss` w.r.t. to `param`. gparam = tasks.MonitorVariable(loss.gradients[param]) trainer.append_task(gparam) trainer.train() # Since the problem is well-conditionned and we use an optimal gradient step 1/L, # two epochs should be enough for `param` to be around `center` and the gradients near 0. assert_array_almost_equal(param.get_value(), center, decimal=6) assert_array_almost_equal(gparam.value, 0.)
def test_fprop_mask_vs_not_mask(self): activation = "tanh" seed = 1234 repeat = 100 lstm = LSTM(input_size=DATA['features_size'], hidden_sizes=[DATA['hidden_size']], ) lstm.initialize(initer.UniformInitializer(seed)) lstm2 = LSTMFast(input_size=DATA['features_size'], hidden_sizes=[DATA['hidden_size']], ) lstm2.mask = sharedX(DATA['mask']) # Wi, Wo, Wf, Wm # Make sure the weights are the same. lstm2.layers_lstm[0].W.set_value(np.concatenate([lstm.layers_lstm[0].Wi.get_value(), lstm.layers_lstm[0].Wo.get_value(), lstm.layers_lstm[0].Wf.get_value(), lstm.layers_lstm[0].Wm.get_value()], axis=1)) lstm2.layers_lstm[0].U.set_value(np.concatenate([lstm.layers_lstm[0].Ui.get_value(), lstm.layers_lstm[0].Uo.get_value(), lstm.layers_lstm[0].Uf.get_value(), lstm.layers_lstm[0].Um.get_value()], axis=1)) input = T.tensor3('input') input.tag.test_value = DATA['batch'] fprop = theano.function([input], lstm.get_output(input)) fprop2 = theano.function([input], lstm2.get_output(input)) # fprop_time = measure("out = fprop(DATA['batch'])", repeat) # print("fprop time: {:.2f} sec.", fprop_time) out = fprop(DATA['batch']) out2 = fprop2(DATA['batch']) assert_true(out.sum != out2.sum()) assert_array_equal((out * DATA['mask'][:, :, None]), (out2 * DATA['mask'][:, :, None]))
def _build_experiment(self, threshold=1): # Create an Nd gaussian function to optimize. This function is not # well-conditioned and there exists no perfect gradient step to converge in # only one iteration. N = 4 center = 5 * np.ones((1, N)).astype(floatX) param = sharedX(np.zeros((1, N))) cost = T.sum( 0.5 * T.dot(T.dot((param - center), np.diag(1. / np.arange(1, N + 1))), (param - center).T)) loss = DummyLossWithGradient(cost, param) gradient_clipping = DirectionClipping(threshold=threshold) loss.append_gradient_modifier(gradient_clipping) optimizer = SGD(loss) trainer = Trainer(optimizer, DummyBatchScheduler()) # Monitor the learning rate. logger = tasks.Logger( views.MonitorVariable(list(optimizer.directions.values())[0]), views.MonitorVariable(list(loss.gradients.values())[0]), views.MonitorVariable(list(loss.orig_gradients.values())[0]), views.MonitorVariable(gradient_clipping.grad_norm)) trainer.append_task(logger) return trainer, logger, gradient_clipping
def _build_experiment(self): # Create an Nd gaussian function to optimize. This function is not # well-conditioned and there exists no perfect gradient step to converge in # only one iteration. N = 4 center = 5 * np.ones((1, N)).astype(floatX) param = sharedX(np.zeros((1, N))) cost = T.sum( 0.5 * T.dot(T.dot((param - center), np.diag(1. / np.arange(1, N + 1))), (param - center).T)) loss = DummyLossWithGradient(cost, param) optimizer = SGD(loss) direction_modifier = DecreasingLearningRate(lr=self.lr, dc=self.dc) optimizer.append_direction_modifier(direction_modifier) trainer = Trainer(optimizer, DummyBatchScheduler()) # Monitor the learning rate. logger = tasks.Logger( views.MonitorVariable( list(direction_modifier.parameters.values())[0])) trainer.append_task(logger) return trainer, logger, direction_modifier
def targets(self, value): if value is not None: self._targets_shared = sharedX(np.array(value), name=self.name + "_targets", keep_on_cpu=self.keep_on_cpu) else: self._targets_shared = None
def test_adagrad(): max_epoch = 15 # Create an Nd gaussian functions to optimize. These functions are not # well-conditioned and there exists no perfect gradient step to converge in # only one iteration. for N in range(1, 5): center = 5*np.ones((1, N)).astype(floatX) param = sharedX(np.zeros((1, N))) cost = T.sum(0.5*T.dot(T.dot((param-center), np.diag(1./np.arange(1, N+1))), ((param-center).T))) loss = DummyLossWithGradient(cost, param) # Even with a really high gradient step, AdaGrad can still converge. # Actually, it is faster than using the optimal gradient step with SGD. optimizer = AdaGrad(loss, lr=100, eps=1e-1) trainer = Trainer(optimizer, DummyBatchScheduler()) trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch)) # Monitor the gradient of `loss` w.r.t. to `param`. tracker = tasks.Tracker(loss.gradients[param]) trainer.append_task(tracker) trainer.train() # After 15 epochs, param should be around the center and gradients near 0. assert_array_almost_equal(param.get_value(), center) assert_array_almost_equal(tracker[0], 0.)
def __init__(self, dataset, batch_size, k, noisy_streamlines_sigma=None, nb_updates_per_epoch=None, seed=1234, include_last_point=False): self.dataset = dataset self.batch_size = batch_size self.k = k self.include_last_point = include_last_point self.use_augment_by_flipping = True self._nb_updates_per_epoch = nb_updates_per_epoch self.use_sample_from_bundle = self._nb_updates_per_epoch is not None self.noisy_streamlines_sigma = noisy_streamlines_sigma self.use_noisy_streamlines = self.noisy_streamlines_sigma is not None self.seed = seed self.rng = np.random.RandomState(self.seed) self.rng_noise = np.random.RandomState(self.seed + 1) # No need for a mask since streamlines are going to be resampled. self.dataset.symb_mask = None # Shared variables self._shared_batch_inputs = sharedX(np.ndarray((0, 0, 0))) self._shared_batch_targets = sharedX(np.ndarray((0, 0, 0, 0))) # Test value batch_inputs, batch_targets = self._next_batch(0) self.dataset.symb_inputs.tag.test_value = batch_inputs # Since this batch scheduler creates its own targets. if self.dataset.symb_targets is None: self.dataset.symb_targets = T.TensorVariable( type=T.TensorType("floatX", [False] * batch_targets.ndim), name=self.dataset.name + '_symb_targets') self.dataset.symb_targets.tag.test_value = batch_targets
def register(self, volume): volume_id = len(self.volumes) shape = np.array(volume.shape[:-1], dtype=floatX) strides = np.r_[1, np.cumprod(shape[::-1])[:-1]][::-1] self.volumes_strides.append(strides) self.volumes.append(sharedX(volume, name='volume_{}'.format(volume_id))) # Sanity check: make sure the size of the last dimension is the same for all volumes. assert self.data_dimension == volume.shape[-1] return volume_id
def test_fprop(self): activation = "tanh" seed = 1234 repeat = 1000 layer = LayerLSTM(input_size=DATA['features_size'], hidden_size=DATA['hidden_size'], activation=activation) layer.initialize(initer.UniformInitializer(seed)) # input = T.tensor3('input') input = T.matrix('input') input.tag.test_value = DATA['batch_one_step'] last_h = sharedX(DATA['state_h']) last_m = sharedX(DATA['state_m']) fprop = theano.function([input], layer.fprop_faster(input, last_h, last_m)) fprop_time = measure("h, m = fprop(DATA['batch_one_step'])", repeat) print("fprop time: {:.2f} sec.", fprop_time) h, m = fprop(DATA['batch_one_step'])
def __init__(self, input_size, hidden_size, activation="tanh", name="GRU"): self.input_size = input_size self.hidden_size = hidden_size self.name = name self.activation = activation self.activation_fct = factories.make_activation_function(self.activation) # Input weights (z:update, r:reset) # Concatenation of the weights in that order: Wz, Wr, Wh self.W = sharedX(value=np.zeros((input_size, 3*hidden_size)), name=self.name+'_W') # self.Wh = sharedX(value=np.zeros((input_size, 2*hidden_size)), name=self.name+'_Wh') # Biases (z:update, r:reset) # Concatenation of the biases in that order: bz, br, bh self.b = sharedX(value=np.zeros(3*hidden_size), name=self.name+'_b') # self.bh = sharedX(value=np.zeros(hidden_size), name=self.name+'_bh') # Recurrence weights (z:update, r:reset) # Concatenation of the recurrence weights in that order: Uz, Ur self.U = sharedX(value=np.zeros((hidden_size, 2*hidden_size)), name=self.name+'_U') self.Uh = sharedX(value=np.zeros((hidden_size, hidden_size)), name=self.name+'_Uh')
def __init__(self, dataset, batch_size, seed=1234): """ Parameters ---------- dataset : :class:`MaskClassifierDataset` Dataset from which to get the examples. batch_size : int Nb. of examples per batch. seed : int, optional Seed for the random generator when shuffling streamlines or adding noise to the streamlines. """ self.dataset = dataset self.batch_size = batch_size self.indices = np.arange(len(self.dataset)) self.seed = seed self.rng = np.random.RandomState(self.seed) # Shared variables self._shared_batch_inputs = sharedX(np.ndarray((0, 0))) self._shared_batch_targets = sharedX(np.ndarray((0, ))) # Test value batch_inputs, batch_targets = self._next_batch(0) # Redefine symbolic variables for single input model self.dataset.symb_inputs = T.TensorVariable( type=T.TensorType("floatX", [False] * batch_inputs.ndim), name=self.dataset.name + '_symb_inputs') self.dataset.symb_inputs.tag.test_value = batch_inputs # Since this batch scheduler creates its own targets. if self.dataset.symb_targets is None: self.dataset.symb_targets = T.TensorVariable( type=T.TensorType("floatX", [False] * batch_targets.ndim), name=self.dataset.name + '_symb_targets') self.dataset.symb_targets.tag.test_value = batch_targets
def test_fprop_faster(self): activation = "tanh" seed = 1234 repeat = 1000 layer = LayerLSTM(input_size=DATA['features_size'], hidden_size=DATA['hidden_size'], activation=activation) layer.initialize(initer.UniformInitializer(seed)) layer_fast = LayerLSTMFast(input_size=DATA['features_size'], hidden_size=DATA['hidden_size'], activation=activation) # Wi, Wo, Wf, Wm layer_fast.W.set_value(np.concatenate([layer.Wi.get_value(), layer.Wo.get_value(), layer.Wf.get_value(), layer.Wm.get_value()], axis=1)) layer_fast.U.set_value(np.concatenate([layer.Ui.get_value(), layer.Uo.get_value(), layer.Uf.get_value(), layer.Um.get_value()], axis=1)) input = T.matrix('input') input.tag.test_value = DATA['batch_one_step'] last_h = sharedX(DATA['state_h']) last_m = sharedX(DATA['state_m']) fprop = theano.function([input], layer.fprop(input, last_h, last_m)) fprop_faster = theano.function([input], layer_fast.fprop(input, last_h, last_m)) fprop_time = measure("h, m = fprop(DATA['batch_one_step'])", repeat) fprop_faster_time = measure("h, m = fprop_faster(DATA['batch_one_step'])", repeat) print("fprop time: {:.2f} sec.", fprop_time) print("fprop faster time: {:.2f} sec.", fprop_faster_time) print("Speedup: {:.2f}x".format(fprop_time/fprop_faster_time)) for i in range(DATA['seq_len']): h1, m1 = fprop(DATA['batch'][:, i, :]) h2, m2 = fprop_faster(DATA['batch'][:, i, :]) assert_array_equal(h1, h2) assert_array_equal(m1, m2)
def __init__(self, dataset, batch_size, k, noisy_streamlines_sigma=None, nb_updates_per_epoch=None, seed=1234, include_last_point=False): self.dataset = dataset self.batch_size = batch_size self.k = k self.include_last_point = include_last_point self.use_augment_by_flipping = True self._nb_updates_per_epoch = nb_updates_per_epoch self.use_sample_from_bundle = self._nb_updates_per_epoch is not None self.noisy_streamlines_sigma = noisy_streamlines_sigma self.use_noisy_streamlines = self.noisy_streamlines_sigma is not None self.seed = seed self.rng = np.random.RandomState(self.seed) self.rng_noise = np.random.RandomState(self.seed + 1) # No need for a mask since streamlines are going to be resampled. self.dataset.symb_mask = None # Shared variables self._shared_batch_inputs = sharedX(np.ndarray((0, 0, 0))) self._shared_batch_targets = sharedX(np.ndarray((0, 0, 0, 0))) # Test value batch_inputs, batch_targets = self._next_batch(0) self.dataset.symb_inputs.tag.test_value = batch_inputs # Since this batch scheduler creates its own targets. if self.dataset.symb_targets is None: self.dataset.symb_targets = T.TensorVariable(type=T.TensorType("floatX", [False] * batch_targets.ndim), name=self.dataset.name + '_symb_targets') self.dataset.symb_targets.tag.test_value = batch_targets
def test_fprop_mask_vs_not_mask(self): activation = "tanh" seed = 1234 repeat = 100 lstm = LSTM( input_size=DATA['features_size'], hidden_sizes=[DATA['hidden_size']], ) lstm.initialize(initer.UniformInitializer(seed)) lstm2 = LSTMFast( input_size=DATA['features_size'], hidden_sizes=[DATA['hidden_size']], ) lstm2.mask = sharedX(DATA['mask']) # Wi, Wo, Wf, Wm # Make sure the weights are the same. lstm2.layers_lstm[0].W.set_value( np.concatenate([ lstm.layers_lstm[0].Wi.get_value(), lstm.layers_lstm[0].Wo.get_value(), lstm.layers_lstm[0].Wf.get_value(), lstm.layers_lstm[0].Wm.get_value() ], axis=1)) lstm2.layers_lstm[0].U.set_value( np.concatenate([ lstm.layers_lstm[0].Ui.get_value(), lstm.layers_lstm[0].Uo.get_value(), lstm.layers_lstm[0].Uf.get_value(), lstm.layers_lstm[0].Um.get_value() ], axis=1)) input = T.tensor3('input') input.tag.test_value = DATA['batch'] fprop = theano.function([input], lstm.get_output(input)) fprop2 = theano.function([input], lstm2.get_output(input)) # fprop_time = measure("out = fprop(DATA['batch'])", repeat) # print("fprop time: {:.2f} sec.", fprop_time) out = fprop(DATA['batch']) out2 = fprop2(DATA['batch']) assert_true(out.sum != out2.sum()) assert_array_equal((out * DATA['mask'][:, :, None]), (out2 * DATA['mask'][:, :, None]))
def __init__(self, dataset, batch_size, use_mask_as_input=False, keep_mask=False, seed=1234): """ Parameters ---------- dataset : `SequenceDataset` object Dataset of datasets (one for each bundle). batch_size : int Number of examples per batch. *Must be greater than the number of bundles in `bundles_dataset`.* seed : int (optional) Seed of the random numbers generator used to sample a different regressive mask for each example. """ super().__init__(dataset, batch_size) self.use_mask_as_input = use_mask_as_input self.seed = seed self.rng = np.random.RandomState(self.seed) self.keep_mask = keep_mask # Allocate memory for the autoregressive mask. self.mask_shape = (len(dataset), ) + self.dataset.input_shape self._shared_mask_o_lt_d = sharedX(np.zeros(self.mask_shape), name='autoregressive_mask', keep_on_cpu=True) # Add a new attribute: a symbolic variable representing the auto regressive mask. self._shared_mask_o_lt_d.set_value(self.generate_autoregressive_mask()) self.dataset.mask_o_lt_d = T.TensorVariable( type=T.TensorType("floatX", [False] * dataset.inputs.ndim), name=dataset.name + '_symb_mask') # Keep only `batch_size` masks as test values. self.dataset.mask_o_lt_d.tag.test_value = self._shared_mask_o_lt_d.get_value( )[:batch_size] # For debugging Theano graphs. if self.use_mask_as_input: self.dataset.symb_inputs.tag.test_value = np.concatenate([ self.dataset.symb_inputs.tag.test_value * self.dataset.mask_o_lt_d.tag.test_value, self.dataset.mask_o_lt_d.tag.test_value ], axis=1)
def __init__(self, input_size, hidden_size, activation="tanh", name="GRU", eps=1e-5): self.input_size = input_size self.hidden_size = hidden_size self.name = name self.activation = activation self.activation_fct = factories.make_activation_function(self.activation) self.eps = eps # Input weights (z:update, r:reset) # Concatenation of the weights in that order: Wz, Wr, Wh self.W = sharedX(value=np.zeros((input_size, 3*hidden_size)), name=self.name+'_W') self.b_x = sharedX(value=np.zeros(3 * hidden_size), name=self.name + '_b_x') self.b_u = sharedX(value=np.zeros(2 * hidden_size), name=self.name + '_b_u') self.b_uh = sharedX(value=np.zeros(hidden_size), name=self.name+'_b_uh') self.g_x = sharedX(value=np.ones(3 * hidden_size), name=self.name + '_g_x') self.g_u = sharedX(value=np.ones(2*hidden_size), name=self.name+'_g_u') self.g_uh = sharedX(value=np.ones(hidden_size), name=self.name+'_g_uh') # Recurrence weights (z:update, r:reset) # Concatenation of the recurrence weights in that order: Uz, Ur self.U = sharedX(value=np.zeros((hidden_size, 2*hidden_size)), name=self.name+'_U') self.Uh = sharedX(value=np.zeros((hidden_size, hidden_size)), name=self.name+'_Uh')
def _build_experiment(self): # Create an Nd gaussian function to optimize. This function is not # well-conditioned and there exists no perfect gradient step to converge in # only one iteration. N = 4 center = 5*np.ones((1, N)).astype(floatX) param = sharedX(np.zeros((1, N))) cost = T.sum(0.5*T.dot(T.dot((param-center), np.diag(1./np.arange(1, N+1))), (param-center).T)) loss = DummyLossWithGradient(cost, param) optimizer = SGD(loss) direction_modifier = ConstantLearningRate(lr=self.lr) optimizer.append_direction_modifier(direction_modifier) trainer = Trainer(optimizer, DummyBatchScheduler()) # Monitor the learning rate. logger = tasks.Logger(views.MonitorVariable(list(direction_modifier.parameters.values())[0])) trainer.append_task(logger) return trainer, logger, direction_modifier
def _build_experiment(self, threshold=1): # Create an Nd gaussian function to optimize. This function is not # well-conditioned and there exists no perfect gradient step to converge in # only one iteration. N = 4 center = 5*np.ones((1, N)).astype(floatX) param = sharedX(np.zeros((1, N))) cost = T.sum(0.5*T.dot(T.dot((param-center), np.diag(1./np.arange(1, N+1))), (param-center).T)) loss = DummyLossWithGradient(cost, param) gradient_clipping = DirectionClipping(threshold=threshold) loss.append_gradient_modifier(gradient_clipping) optimizer = SGD(loss) trainer = Trainer(optimizer, DummyBatchScheduler()) # Monitor the learning rate. logger = tasks.Logger(views.MonitorVariable(list(optimizer.directions.values())[0]), views.MonitorVariable(list(loss.gradients.values())[0]), views.MonitorVariable(list(loss.orig_gradients.values())[0]), views.MonitorVariable(gradient_clipping.grad_norm)) trainer.append_task(logger) return trainer, logger, gradient_clipping
def __init__(self, input_size, hidden_size, activation="tanh", name="LSTM"): self.input_size = input_size self.hidden_size = hidden_size self.name = name self.activation = activation self.activation_fct = factories.make_activation_function(self.activation) # Input weights (i:input, o:output, f:forget, m:memory) # Concatenation of the weights in that order: Wi, Wo, Wf, Wm self.W = sharedX(value=np.zeros((input_size, 4*hidden_size)), name=self.name+'_W') # Biases (i:input, o:output, f:forget, m:memory) # Concatenation of the biases in that order: bi, bo, bf, bm self.b = sharedX(value=np.zeros(4*hidden_size), name=self.name+'_b') # Recurrence weights (i:input, o:output, f:forget, m:memory) # Concatenation of the recurrence weights in that order: Ui, Uo, Uf, Um self.U = sharedX(value=np.zeros((hidden_size, 4*hidden_size)), name=self.name+'_U') # Peepholes (i:input, o:output, f:forget, m:memory) self.Vi = sharedX(value=np.ones(hidden_size), name=self.name+'_Vi') self.Vo = sharedX(value=np.ones(hidden_size), name=self.name+'_Vo') self.Vf = sharedX(value=np.ones(hidden_size), name=self.name+'_Vf')
def test_early_stopping(): MAX_EPOCH = 100 # Add a max epoch just in case we got an infinite loop. class DummyCost(View): def __init__(self, initial_cost, costs): super().__init__() self.initial_cost = initial_cost self.costs = costs self.cpt = 0 def update(self, status): if status.current_update == 0: return self.initial_cost cost = self.costs[self.cpt] self.cpt += 1 return cost # 20 identical costs but should stop after 9 unchanged epochs. constant_cost = DummyCost(1, np.ones(20)) lookahead = 9 def callback(task, status): # This callback function should not be called. raise NameError("This callback function should not be called.") early_stopping = stopping_criteria.EarlyStopping(constant_cost, lookahead, callback=callback) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) trainer.append_task(early_stopping) trainer.append_task(stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() assert_equal(trainer.status.current_epoch, lookahead) assert_equal(early_stopping.best_epoch, 0) assert_equal(early_stopping.best_cost, 1.) assert_equal(constant_cost.cpt, lookahead) # `lookahead` identical costs followed by `lookahead` lower identical costs. lookahead = 9 costs = np.r_[np.ones(lookahead-1), np.zeros(lookahead+1)] simple_cost = DummyCost(1, costs) def callback(task, status): # This callback function should be called once after `lookahead` epoch. if status.current_epoch != lookahead: msg = "Callback should be fired up at epoch #{} not #{}.".format(lookahead, status.current_epoch) raise NameError(msg) early_stopping = stopping_criteria.EarlyStopping(simple_cost, lookahead, callback=callback) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) trainer.append_task(early_stopping) trainer.append_task(stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() assert_equal(trainer.status.current_epoch, 2*lookahead) assert_equal(early_stopping.best_epoch, lookahead) assert_equal(early_stopping.best_cost, 0.) # 20 increasing costs but should stop after 9 increasing epochs. lookahead = 9 costs = range(20) increasing_cost = DummyCost(0, costs) def callback(task, status): # This callback function should not be called. raise NameError("This callback function should not be called.") early_stopping = stopping_criteria.EarlyStopping(increasing_cost, lookahead, callback=callback) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) trainer.append_task(early_stopping) trainer.append_task(stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() assert_equal(trainer.status.current_epoch, lookahead) assert_equal(early_stopping.best_epoch, 0) assert_equal(early_stopping.best_cost, 0.) # Test `min_nb_epochs` lookahead = 9 min_nb_epochs = 15 costs = range(20) increasing_cost = DummyCost(0, costs) early_stopping = stopping_criteria.EarlyStopping(increasing_cost, lookahead, min_nb_epochs=min_nb_epochs) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) trainer.append_task(early_stopping) trainer.append_task(stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() assert_equal(trainer.status.current_epoch, min_nb_epochs) # Test that at the end the model is the best one. # `lookahead` decreasing costs followed by `lookahead+1` constant identical costs. lookahead = 9 costs = np.r_[-np.arange(lookahead), np.zeros(lookahead+1)] simple_cost = DummyCost(1, costs) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) model = trainer._optimizer.loss.model # Add some parameters to the model. model.parameters.extend([sharedX(np.zeros(4)), sharedX(np.zeros((3, 5)))]) # Callback that will change model parameters after each epoch. def callback(task, status): for param in model.parameters: param.set_value(param.get_value() + 1) trainer.append_task(tasks.Callback(callback)) early_stopping = stopping_criteria.EarlyStopping(simple_cost, lookahead) trainer.append_task(early_stopping) trainer.append_task(stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() for param in model.parameters: assert_array_equal(param.get_value(), lookahead*np.ones_like(param.get_value()))
def inputs(self, value): self._inputs_shared = sharedX(value, name=self.name + "_inputs")
def inputs(self, value): self._inputs_shared = sharedX(value, name=self.name+"_inputs", keep_on_cpu=self.keep_on_cpu)
def targets(self, value): if value is not None: self._targets_shared = sharedX(np.array(value), name=self.name+"_targets", keep_on_cpu=self.keep_on_cpu) else: self._targets_shared = None
def test_early_stopping(): MAX_EPOCH = 100 # Add a max epoch just in case we got an infinite loop. class DummyCost(View): def __init__(self, initial_cost, costs): super().__init__() self.initial_cost = initial_cost self.costs = costs self.cpt = 0 def update(self, status): if status.current_update == 0: return self.initial_cost cost = self.costs[self.cpt] self.cpt += 1 return cost # 20 identical costs but should stop after 9 unchanged epochs. constant_cost = DummyCost(1, np.ones(20)) lookahead = 9 def callback(task, status): # This callback function should not be called. raise NameError("This callback function should not be called.") early_stopping = stopping_criteria.EarlyStopping(constant_cost, lookahead, callback=callback) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) trainer.append_task(early_stopping) trainer.append_task( stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() assert_equal(trainer.status.current_epoch, lookahead) assert_equal(early_stopping.best_epoch, 0) assert_equal(early_stopping.best_cost, 1.) assert_equal(constant_cost.cpt, lookahead) # `lookahead` identical costs followed by `lookahead` lower identical costs. lookahead = 9 costs = np.r_[np.ones(lookahead - 1), np.zeros(lookahead + 1)] simple_cost = DummyCost(1, costs) def callback(task, status): # This callback function should be called once after `lookahead` epoch. if status.current_epoch != lookahead: msg = "Callback should be fired up at epoch #{} not #{}.".format( lookahead, status.current_epoch) raise NameError(msg) early_stopping = stopping_criteria.EarlyStopping(simple_cost, lookahead, callback=callback) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) trainer.append_task(early_stopping) trainer.append_task( stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() assert_equal(trainer.status.current_epoch, 2 * lookahead) assert_equal(early_stopping.best_epoch, lookahead) assert_equal(early_stopping.best_cost, 0.) # 20 increasing costs but should stop after 9 increasing epochs. lookahead = 9 costs = range(20) increasing_cost = DummyCost(0, costs) def callback(task, status): # This callback function should not be called. raise NameError("This callback function should not be called.") early_stopping = stopping_criteria.EarlyStopping(increasing_cost, lookahead, callback=callback) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) trainer.append_task(early_stopping) trainer.append_task( stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() assert_equal(trainer.status.current_epoch, lookahead) assert_equal(early_stopping.best_epoch, 0) assert_equal(early_stopping.best_cost, 0.) # Test `min_nb_epochs` lookahead = 9 min_nb_epochs = 5 costs = range(20) increasing_cost = DummyCost(0, costs) early_stopping = stopping_criteria.EarlyStopping( increasing_cost, lookahead, min_nb_epochs=min_nb_epochs) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) trainer.append_task(early_stopping) trainer.append_task( stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() assert_equal(trainer.status.current_epoch, lookahead + min_nb_epochs) # Test that at the end the model is the best one. # `lookahead` decreasing costs followed by `lookahead+1` constant identical costs. lookahead = 9 costs = np.r_[-np.arange(lookahead), np.zeros(lookahead + 1)] simple_cost = DummyCost(1, costs) trainer = Trainer(DummyOptimizer(), DummyBatchScheduler()) model = trainer._optimizer.loss.model # Add some parameters to the model. model.parameters.extend([sharedX(np.zeros(4)), sharedX(np.zeros((3, 5)))]) # Callback that will change model parameters after each epoch. def callback(task, status): for param in model.parameters: param.set_value(param.get_value() + 1) trainer.append_task(tasks.Callback(callback)) early_stopping = stopping_criteria.EarlyStopping(simple_cost, lookahead) trainer.append_task(early_stopping) trainer.append_task( stopping_criteria.MaxEpochStopping(MAX_EPOCH)) # To be safe trainer.train() for param in model.parameters: assert_array_equal(param.get_value(), lookahead * np.ones_like(param.get_value()))
def inputs(self, value): self._inputs_shared = sharedX(value, name=self.name+"_inputs")
def __init__(self, decay_rate=0.99, name="decaying_variable", *args, **kwargs): super().__init__(*args, **kwargs) self.var = smartutils.sharedX(np.array(1), name=name) self.decay_rate = np.array(decay_rate, dtype=theano.config.floatX)
def __init__(self, dataset, batch_size, batch_id, ordering_id, use_mask_as_input=False, seed=1234): """ Parameters ---------- dataset : `SequenceDataset` object Dataset of datasets (one for each bundle). batch_size : int Number of examples per batch. *Must be greater than the number of bundles in `bundles_dataset`.* seed : int (optional) Seed of the random numbers generator used to sample a different regressive mask for each example. """ super().__init__(dataset) self.use_mask_as_input = use_mask_as_input self.seed = seed self.rng = np.random.RandomState(self.seed) self.batch_size = batch_size self.batch_id = batch_id self.ordering_id = ordering_id # Determine the start and the end of the batch that will be used by this batch scheduler. assert batch_id * self.batch_size < len(self.dataset) self.batch_start = batch_id * self.batch_size self.batch_end = min((batch_id + 1) * self.batch_size, len(dataset)) # Determine the ordering that will be used by this batch scheduler. self.d = 0 self.D = self.dataset.input_shape[0] self.ordering = np.arange(self.D) for _ in range(ordering_id + 1): self.rng.shuffle(self.ordering) # Matrix mask that will be used when concatenating the mask. self._shared_Moltd = sharedX(np.zeros( (self.batch_end - self.batch_start, self.D)), name='Moltd') # Vector mask that will be broadcasted across all inputs. # self._shared_mod = sharedX(np.zeros((1, self.D)), name='mod') self._shared_mod = sharedX(np.zeros((self.D, )), name='mod') # Add a new attributes: a symbolic variable representing the auto regressive mask. self.change_masks(self.d) self.Moltd = T.TensorVariable(type=T.TensorType( "floatX", [False] * dataset.inputs.ndim), name="symb_Moltd") self.mod = T.TensorVariable(type=T.TensorType("floatX", [True, False]), name="symb_mod") # Keep only `(self.batch_end-self.batch_start)` examples as test values. self.dataset.symb_inputs.tag.test_value = self.dataset.inputs.get_value( )[:(self.batch_end - self.batch_start)] if self.dataset.has_targets: self.dataset.symb_targets.tag.test_value = self.dataset.targets.get_value( )[:(self.batch_end - self.batch_start)] self.Moltd.tag.test_value = self._shared_Moltd.get_value()[:( self.batch_end - self.batch_start)] self.mod.tag.test_value = self._shared_mod.get_value()[None, :] if self.use_mask_as_input: self.dataset.symb_inputs.tag.test_value = np.concatenate([ self.dataset.symb_inputs.tag.test_value * self.Moltd.tag.test_value, self.Moltd.tag.test_value ], axis=1)
def __init__(self, dataset, batch_size, noisy_streamlines_sigma=None, seed=1234, use_data_augment=True, normalize_target=False, shuffle_streamlines=True, resample_streamlines=True, feed_previous_direction=False): """ Parameters ---------- dataset : :class:`TractographyDataset` Dataset from which to get the examples. batch_size : int Nb. of examples per batch. seed : int, optional Seed for the random generator when shuffling streamlines or adding noise to the streamlines. use_data_augment : bool If true, perform data augmentation by flipping streamlines. normalize_target : bool If true, targets will have a norm of one (usually used by the GruRegression model). shuffle_streamlines : bool Shuffle streamlines in the dataset between each epoch. resample_streamlines : bool Streamlines in a same batch will all have the same number of points. Should be always set to True for now (until the method _process_batch supports it). feed_previous_direction : bool Should the previous direction be appended to the input when making a prediction? """ self.dataset = dataset self.batch_size = batch_size self.normalize_target = normalize_target self.noisy_streamlines_sigma = noisy_streamlines_sigma self.use_noisy_streamlines = self.noisy_streamlines_sigma is not None # Parameter use_data_augment cannot be used in the case of a FFNN model (or any other non-recurrent model, # without feed_previous_direction because the targets are flipped but the inputs stay the same) self.use_augment_by_flipping = feed_previous_direction and use_data_augment self.seed = seed self.rng = np.random.RandomState(self.seed) self.rng_noise = np.random.RandomState(self.seed + 1) self.shuffle_streamlines = shuffle_streamlines self.resample_streamlines = resample_streamlines self.indices = np.arange(len(self.dataset)) self.feed_previous_direction = feed_previous_direction # Shared variables self._shared_batch_inputs = sharedX(np.ndarray((0, 0))) self._shared_batch_targets = sharedX(np.ndarray((0, 0))) # Test value batch_inputs, batch_targets = self._next_batch(0) # Redefine symbolic variables for single input model self.dataset.symb_inputs = T.TensorVariable( type=T.TensorType("floatX", [False] * batch_inputs.ndim), name=self.dataset.name + '_symb_inputs') self.dataset.symb_inputs.tag.test_value = batch_inputs # Since this batch scheduler creates its own targets. if self.dataset.symb_targets is None: self.dataset.symb_targets = T.TensorVariable( type=T.TensorType("floatX", [False] * batch_targets.ndim), name=self.dataset.name + '_symb_targets') self.dataset.symb_targets.tag.test_value = batch_targets
def inputs(self, value): self._inputs_shared = sharedX(value, name=self.name + "_inputs", keep_on_cpu=self.keep_on_cpu)
def __init__(self, dataset, batch_size, noisy_streamlines_sigma=None, seed=1234, use_data_augment=True, normalize_target=False, shuffle_streamlines=True, resample_streamlines=True, feed_previous_direction=False, sort_streamlines_by_length=False, learn_to_stop=False): """ Parameters ---------- dataset : :class:`TractographyDataset` Dataset from which to get the examples. batch_size : int Nb. of examples per batch. seed : int, optional Seed for the random generator when shuffling streamlines or adding noise to the streamlines. use_data_augment : bool If true, perform data augmentation by flipping streamlines. normalize_target : bool If true, targets will have a norm of one (usually used by the GruRegression model). shuffle_streamlines : bool Shuffle streamlines in the dataset between each epoch. resample_streamlines : bool Streamlines in a same batch will all have the same number of points. Should be always set to True for now (until the method _process_batch supports it). feed_previous_direction : bool Should the previous direction be appended to the input when making a prediction? sort_streamlines_by_length : bool Streamlines will be approximatively regrouped according to their length. learn_to_stop : bool Predict whether the streamline being generated should stop or not """ self.dataset = dataset self.batch_size = batch_size self.use_augment_by_flipping = use_data_augment self.normalize_target = normalize_target self.noisy_streamlines_sigma = noisy_streamlines_sigma self.use_noisy_streamlines = self.noisy_streamlines_sigma is not None self.seed = seed self.rng = np.random.RandomState(self.seed) self.rng_noise = np.random.RandomState(self.seed + 1) self.shuffle_streamlines = shuffle_streamlines self.resample_streamlines = resample_streamlines self.sort_streamlines_by_length = sort_streamlines_by_length self.feed_previous_direction = feed_previous_direction self.learn_to_stop = learn_to_stop # Sort streamlines according to their length by default. # This should speed up validation. self.indices = np.argsort(self.dataset.streamlines._lengths) # Shared variables self._shared_batch_inputs = sharedX(np.ndarray((0, 0, 0))) self._shared_batch_targets = sharedX(np.ndarray((0, 0, 0))) self._shared_batch_mask = sharedX(np.ndarray((0, 0))) # Test value batch_inputs, batch_targets, batch_mask = self._next_batch(0) self.dataset.symb_inputs.tag.test_value = batch_inputs self.dataset.symb_mask.tag.test_value = batch_mask # Since this batch scheduler creates its own targets. if self.dataset.symb_targets is None: self.dataset.symb_targets = T.TensorVariable( type=T.TensorType("floatX", [False] * (batch_targets.ndim)), name=self.dataset.name + '_symb_targets') self.dataset.symb_targets.tag.test_value = batch_targets