def test_softmax_binary_targets(): """ Constructs softmax layers with binary target and with vector targets to check that they give the same cost. """ num_classes = 10 batch_size = 20 mlp_bin = MLP( layers=[Softmax(num_classes, 's1', irange=0.1, binary_target_dim=1)], nvis=100 ) mlp_vec = MLP( layers=[Softmax(num_classes, 's1', irange=0.1)], nvis=100 ) X = mlp_bin.get_input_space().make_theano_batch() y_bin = mlp_bin.get_target_space().make_theano_batch() y_vec = mlp_vec.get_target_space().make_theano_batch() y_hat_bin = mlp_bin.fprop(X) y_hat_vec = mlp_vec.fprop(X) cost_bin = theano.function([X, y_bin], mlp_bin.cost(y_bin, y_hat_bin), allow_input_downcast=True) cost_vec = theano.function([X, y_vec], mlp_vec.cost(y_vec, y_hat_vec), allow_input_downcast=True) X_data = np.random.random(size=(batch_size, 100)) y_bin_data = np.random.randint(low=0, high=10, size=(batch_size, 1)) y_vec_data = np.zeros((batch_size, num_classes)) y_vec_data[np.arange(batch_size),y_bin_data.flatten()] = 1 np.testing.assert_allclose(cost_bin(X_data, y_bin_data), cost_vec(X_data, y_vec_data))
def test_composite_layer(): """ Test the routing functionality of the CompositeLayer """ # Without routing composite_layer = CompositeLayer('composite_layer', [Linear(2, 'h0', irange=0), Linear(2, 'h1', irange=0), Linear(2, 'h2', irange=0)]) mlp = MLP(nvis=2, layers=[composite_layer]) for i in range(3): composite_layer.layers[i].set_weights( np.eye(2, dtype=theano.config.floatX) ) composite_layer.layers[i].set_biases( np.zeros(2, dtype=theano.config.floatX) ) X = theano.tensor.matrix() y = mlp.fprop(X) funs = [theano.function([X], y_elem) for y_elem in y] x_numeric = np.random.rand(2, 2).astype('float32') y_numeric = [f(x_numeric) for f in funs] assert np.all(x_numeric == y_numeric) # With routing for inputs_to_layers in [{0: [1], 1: [2], 2: [0]}, {0: [1], 1: [0, 2], 2: []}, {0: [], 1: []}]: composite_layer = CompositeLayer('composite_layer', [Linear(2, 'h0', irange=0), Linear(2, 'h1', irange=0), Linear(2, 'h2', irange=0)], inputs_to_layers) input_space = CompositeSpace([VectorSpace(dim=2), VectorSpace(dim=2), VectorSpace(dim=2)]) mlp = MLP(input_space=input_space, layers=[composite_layer]) for i in range(3): composite_layer.layers[i].set_weights( np.eye(2, dtype=theano.config.floatX) ) composite_layer.layers[i].set_biases( np.zeros(2, dtype=theano.config.floatX) ) X = [theano.tensor.matrix() for _ in range(3)] y = mlp.fprop(X) funs = [theano.function(X, y_elem, on_unused_input='ignore') for y_elem in y] x_numeric = [np.random.rand(2, 2).astype(theano.config.floatX) for _ in range(3)] y_numeric = [f(*x_numeric) for f in funs] assert all([all([np.all(x_numeric[i] == y_numeric[j]) for j in inputs_to_layers[i]]) for i in inputs_to_layers])
def test_kl(): """ Test whether function kl() has properly processed the input. """ init_mode = theano.config.compute_test_value theano.config.compute_test_value = 'raise' try: mlp = MLP(layers=[Sigmoid(dim=10, layer_name='Y', irange=0.1)], nvis=10) X = mlp.get_input_space().make_theano_batch() Y = mlp.get_output_space().make_theano_batch() X.tag.test_value = np.random.random( get_debug_values(X)[0].shape).astype(theano.config.floatX) Y_hat = mlp.fprop(X) # This call should not raise any error: ave = kl(Y, Y_hat, 1) # The following calls should raise ValueError exceptions: Y.tag.test_value[2][3] = 1.1 np.testing.assert_raises(ValueError, kl, Y, Y_hat, 1) Y.tag.test_value[2][3] = -0.1 np.testing.assert_raises(ValueError, kl, Y, Y_hat, 1) finally: theano.config.compute_test_value = init_mode
def build_mlp_fn(x0, y0, x1, y1, s0, s1, c, axes): """ Creates an theano function to test the WindowLayer Parameters ---------- x0: x coordinate of the left of the window y0: y coordinate of the top of the window x1: x coordinate of the right of the window y1: y coordinate of the bottom of the window s0: x shape of the images of the input space s1: y shape of the images of the input space c: number of channels of the input space axes: description of the axes of the input space Returns ------- f: a theano function applicating the window layer of window (x0, y0, x1, y1). """ mlp = MLP(layers=[WindowLayer('h0', window=(x0, y0, x1, y1))], input_space=Conv2DSpace(shape=(s0, s1), num_channels=c, axes=axes)) X = mlp.get_input_space().make_batch_theano() f = theano.function([X], mlp.fprop(X)) return f
def test_sigmoid_layer_misclass_reporting(): mlp = MLP(nvis=3, layers=[Sigmoid(layer_name='h0', dim=1, irange=0.005, monitor_style='classification')]) target = theano.tensor.matrix(dtype=theano.config.floatX) batch = theano.tensor.matrix(dtype=theano.config.floatX) rval = mlp.layers[0].get_monitoring_channels_from_state(mlp.fprop(batch), target) f = theano.function([batch, target], [tensor.gt(mlp.fprop(batch), 0.5), rval['misclass']], allow_input_downcast=True) rng = np.random.RandomState(0) for _ in range(10): # repeat a few times for statistical strength targets = (rng.uniform(size=(30, 1)) > 0.5).astype('uint8') out, misclass = f(rng.normal(size=(30, 3)), targets) np.testing.assert_allclose((targets != out).mean(), misclass)
def test_sigmoid_detection_cost(): # This is only a smoke test: verifies that it compiles and runs, # not any particular value. rng = np.random.RandomState(0) y = (rng.uniform(size=(4, 3)) > 0.5).astype('uint8') X = theano.shared(rng.uniform(size=(4, 2))) model = MLP(nvis=2, layers=[Sigmoid(monitor_style='detection', dim=3, layer_name='y', irange=0.8)]) y_hat = model.fprop(X) model.cost(y, y_hat).eval()
def test_identity_layer(): nvis = 10 mlp = MLP(nvis=nvis, layers=[util.IdentityLayer(layer_name='ident')]) X = T.matrix() f = theano.function([X], mlp.fprop(X)) for _ in range(5): X = np.random.rand(10, nvis).astype(theano.config.floatX) yield _test_identity_layer, f, X
def test_nested_mlp(): """ Constructs a nested MLP and tries to fprop through it """ inner_mlp = MLP(layers=[Linear(10, 'h0', 0.1), Linear(10, 'h1', 0.1)], layer_name='inner_mlp') outer_mlp = MLP(layers=[CompositeLayer(layer_name='composite', layers=[inner_mlp, Linear(10, 'h2', 0.1)])], nvis=10) X = outer_mlp.get_input_space().make_theano_batch() f = theano.function([X], outer_mlp.fprop(X)) f(np.random.rand(5, 10).astype(theano.config.floatX))
def test_softmax_two_binary_targets(): """ Constructs softmax layers with two binary targets and with vector targets to check that they give the same cost. """ num_classes = 10 batch_size = 20 mlp_bin = MLP( layers=[Softmax(num_classes, 's1', irange=0.1, binary_target_dim=2)], nvis=100 ) mlp_vec = MLP( layers=[Softmax(num_classes, 's1', irange=0.1)], nvis=100 ) X = mlp_bin.get_input_space().make_theano_batch() y_bin = mlp_bin.get_target_space().make_theano_batch() y_vec = mlp_vec.get_target_space().make_theano_batch() y_hat_bin = mlp_bin.fprop(X) y_hat_vec = mlp_vec.fprop(X) cost_bin = theano.function([X, y_bin], mlp_bin.cost(y_bin, y_hat_bin), allow_input_downcast=True) cost_vec = theano.function([X, y_vec], mlp_vec.cost(y_vec, y_hat_vec), allow_input_downcast=True) X_data = np.random.random(size=(batch_size, 100)) # binary and vector costs can only match # if binary targets are mutually exclusive y_bin_data = np.concatenate([np.random.permutation(10)[:2].reshape((1, 2)) for _ in range(batch_size)]) y_vec_data = np.zeros((batch_size, num_classes)) y_vec_data[np.arange(batch_size), y_bin_data[:, 0].flatten()] = 1 y_vec_data[np.arange(batch_size), y_bin_data[:, 1].flatten()] = 1 np.testing.assert_allclose(cost_bin(X_data, y_bin_data), cost_vec(X_data, y_vec_data))
def check_unimplemented_case(ConvNonlinearity): conv_model = MLP( input_space = Conv2DSpace(shape = [1,1], axes = ['b', 0, 1, 'c'], num_channels = 1), layers = [ConvElemwise(layer_name='conv', nonlinearity = ConvNonlinearity, \ output_channels = 1, kernel_shape = [1,1], \ pool_shape = [1,1], pool_stride = [1,1], irange= 1.0)], batch_size = 1 ) X = conv_model.get_input_space().make_theano_batch() Y = conv_model.get_target_space().make_theano_batch() Y_hat = conv_model.fprop(X) assert np.testing.assert_raises(NotImplementedError, conv_model.cost(Y, Y_hat))
def test_min_zero(): """ This test guards against a bug where the size of the zero buffer used with the min_zero flag was specified to have the wrong size. The bug only manifested when compiled with optimizations off, because the optimizations discard information about the size of the zero buffer. """ mlp = MLP(input_space=VectorSpace(1), layers= [Maxout(layer_name="test_layer", num_units=1, num_pieces = 2, irange=.05, min_zero=True)]) X = T.matrix() output = mlp.fprop(X) # Compile in debug mode so we don't optimize out the size of the buffer # of zeros f = function([X], output, mode="DEBUG_MODE") f(np.zeros((1, 1)).astype(X.dtype))
def test_conditional_encode_conditional_parameters(): """ Conditional.encode_conditional_parameters calls its MLP's fprop method """ mlp = MLP(layers=[Linear(layer_name="h", dim=5, irange=0.01, max_col_norm=0.01)]) conditional = DummyConditional(mlp=mlp, name="conditional") vae = DummyVAE() conditional.set_vae(vae) input_space = VectorSpace(dim=5) conditional.initialize_parameters(input_space=input_space, ndim=5) X = T.matrix("X") mlp_Y1, mlp_Y2 = mlp.fprop(X) cond_Y1, cond_Y2 = conditional.encode_conditional_params(X) f = theano.function([X], [mlp_Y1, mlp_Y2, cond_Y1, cond_Y2]) rval = f(as_floatX(numpy.random.uniform(size=(10, 5)))) numpy.testing.assert_allclose(rval[0], rval[2]) numpy.testing.assert_allclose(rval[1], rval[3])
def test_cost(self): """ Use an RNN to calculate Mersenne number sequences of different lengths and check whether the costs make sense. """ rnn = MLP(input_space=SequenceSpace(VectorSpace(dim=1)), layers=[Recurrent(dim=1, layer_name='recurrent', irange=0, nonlinearity=lambda x: x), Linear(dim=1, layer_name='linear', irange=0)]) W, U, b = rnn.layers[0].get_params() W.set_value([[1]]) U.set_value([[2]]) W, b = rnn.layers[1].get_params() W.set_value([[1]]) X_data, X_mask = rnn.get_input_space().make_theano_batch() y_data, y_mask = rnn.get_output_space().make_theano_batch() y_data_hat, y_mask_hat = rnn.fprop((X_data, X_mask)) seq_len = 20 X_data_vals = np.ones((seq_len, seq_len, 1)) X_mask_vals = np.triu(np.ones((seq_len, seq_len))) y_data_vals = np.tile((2 ** np.arange(1, seq_len + 1) - 1), (seq_len, 1)).T[:, :, np.newaxis] y_mask_vals = np.triu(np.ones((seq_len, seq_len))) f = function([X_data, X_mask, y_data, y_mask], rnn.cost((y_data, y_mask), (y_data_hat, y_mask_hat)), allow_input_downcast=True) # The cost for two exact sequences should be zero assert f(X_data_vals, X_mask_vals, y_data_vals, y_mask_vals) == 0 # If the input is different, the cost should be non-zero assert f(X_data_vals + 1, X_mask_vals, y_data_vals, y_mask_vals) != 0 # And same for the target data; using squared L2 norm, so should be 1 assert f(X_data_vals, X_mask_vals, y_data_vals + 1, y_mask_vals) == 1 # But if the masked data changes, the cost should remain the same X_data_vals_plus = X_data_vals + (1 - X_mask_vals[:, :, None]) assert f(X_data_vals_plus, X_mask_vals, y_data_vals, y_mask_vals) == 0 y_data_vals_plus = y_data_vals + (1 - y_mask_vals[:, :, None]) assert f(X_data_vals, X_mask_vals, y_data_vals_plus, y_mask_vals) == 0
def test_sigmoid_detection_cost(): """ Tests whether the sigmoid convolutional layer returns the right value. """ rng = np.random.RandomState(0) sigmoid_nonlin = SigmoidConvNonlinearity(monitor_style="detection") (rows, cols) = (10, 10) axes = ('c', 0, 1, 'b') nchs = 1 space_shp = (nchs, rows, cols, 1) X_vals = np.random.uniform(-0.01, 0.01, size=space_shp).astype(config.floatX) X = theano.shared(X_vals, name="X") Y_vals = (np.random.uniform(-0.01, 0.01, size=(rows, cols)) > 0.005).astype("uint8") Y = theano.shared(Y_vals, name="y_vals") conv_elemwise = ConvElemwise(layer_name="h0", output_channels=1, irange=.005, kernel_shape=(1, 1), max_kernel_norm=0.9, nonlinearity=sigmoid_nonlin) input_space = pylearn2.space.Conv2DSpace(shape=(rows, cols), num_channels=nchs, axes=axes) model = MLP(batch_size=1, layers=[conv_elemwise], input_space=input_space) Y_hat = model.fprop(X) cost = model.cost(Y, Y_hat).eval() assert not(np.isnan(cost) or np.isinf(cost) or (cost < 0.0) or (cost is None)), ("cost returns illegal " "value.")
def test_conv_pooling_nonlin(): """ Tests whether the nonlinearity is applied before the pooling. """ rng = np.random.RandomState(0) sigm_nonlin = SigmoidConvNonlinearity(monitor_style="detection") (rows, cols) = (5, 5) axes = ('c', 0, 1, 'b') nchs = 1 space_shp = (nchs, rows, cols, 1) X_vals = np.random.uniform(-0.01, 0.01, size=space_shp).astype(config.floatX) X = theano.shared(X_vals, name="X") conv_elemwise = ConvElemwise(layer_name="h0", output_channels=1, pool_type="max", irange=.005, kernel_shape=(1, 1), pool_shape=(1, 1), pool_stride=(1, 1), nonlinearity=sigm_nonlin) input_space = pylearn2.space.Conv2DSpace(shape=(rows, cols), num_channels=nchs, axes=axes) model = MLP(batch_size=1, layers=[conv_elemwise], input_space=input_space) Y_hat = model.fprop(X) assert "max" in str(Y_hat.name) ancestors = theano.gof.graph.ancestors([Y_hat]) lcond = ["sigm" in str(anc.owner) for anc in ancestors] assert np.array(lcond).nonzero()[0].shape[0] > 0, ("Nonlinearity should be " "applied before pooling.")
def test_fprop(self): """ Use an RNN without non-linearity to create the Mersenne numbers (2 ** n - 1) to check whether fprop works correctly. """ rnn = MLP(input_space=SequenceSpace(VectorSpace(dim=1)), layers=[Recurrent(dim=1, layer_name='recurrent', irange=0.1, indices=[-1], nonlinearity=lambda x: x)]) W, U, b = rnn.layers[0].get_params() W.set_value([[1]]) U.set_value([[2]]) X_data, X_mask = rnn.get_input_space().make_theano_batch() y_hat = rnn.fprop((X_data, X_mask)) seq_len = 20 X_data_vals = np.ones((seq_len, seq_len, 1)) X_mask_vals = np.triu(np.ones((seq_len, seq_len))) f = function([X_data, X_mask], y_hat, allow_input_downcast=True) np.testing.assert_allclose(2 ** np.arange(1, seq_len + 1) - 1, f(X_data_vals, X_mask_vals).flatten())
def check_case(conv_nonlinearity, mlp_nonlinearity, cost_implemented=True): """Check that ConvNonLinearity and MLPNonlinearity are consistent. This is done by building an MLP with a ConvElemwise layer with the supplied non-linearity, an MLP with a dense layer, and checking that the outputs (and costs if applicable) are consistent. Parameters ---------- conv_nonlinearity: instance of `ConvNonlinearity` The non-linearity to provide to a `ConvElemwise` layer. mlp_nonlinearity: subclass of `mlp.Linear` The fully-connected MLP layer (including non-linearity). check_implemented: bool If `True`, check that both costs give consistent results. If `False`, check that both costs raise `NotImplementedError`. """ # Create fake data np.random.seed(12345) r = 31 s = 21 shape = [r, s] nvis = r*s output_channels = 13 batch_size = 103 x = np.random.rand(batch_size, r, s, 1) y = np.random.randint(2, size=[batch_size, output_channels, 1, 1]) x = x.astype(config.floatX) y = y.astype(config.floatX) x_mlp = x.flatten().reshape(batch_size, nvis) y_mlp = y.flatten().reshape(batch_size, output_channels) # Initialize convnet with random weights. conv_model = MLP( input_space=Conv2DSpace(shape=shape, axes=['b', 0, 1, 'c'], num_channels=1), layers=[ConvElemwise(layer_name='conv', nonlinearity=conv_nonlinearity, output_channels=output_channels, kernel_shape=shape, pool_shape=[1, 1], pool_stride=shape, irange=1.0)], batch_size=batch_size ) X = conv_model.get_input_space().make_theano_batch() Y = conv_model.get_target_space().make_theano_batch() Y_hat = conv_model.fprop(X) g = theano.function([X], Y_hat) # Construct an equivalent MLP which gives the same output # after flattening both. mlp_model = MLP( layers=[mlp_nonlinearity(dim=output_channels, layer_name='mlp', irange=1.0)], batch_size=batch_size, nvis=nvis ) W, b = conv_model.get_param_values() W_mlp = np.zeros(shape=(output_channels, nvis), dtype=config.floatX) for k in range(output_channels): W_mlp[k] = W[k, 0].flatten()[::-1] W_mlp = W_mlp.T b_mlp = b.flatten() mlp_model.set_param_values([W_mlp, b_mlp]) X1 = mlp_model.get_input_space().make_theano_batch() Y1 = mlp_model.get_target_space().make_theano_batch() Y1_hat = mlp_model.fprop(X1) f = theano.function([X1], Y1_hat) # Check that the two models give the same output assert_allclose(f(x_mlp).flatten(), g(x).flatten(), rtol=1e-5, atol=5e-5) if cost_implemented: # Check that the two models have the same costs mlp_cost = theano.function([X1, Y1], mlp_model.cost(Y1, Y1_hat)) conv_cost = theano.function([X, Y], conv_model.cost(Y, Y_hat)) assert_allclose(conv_cost(x, y), mlp_cost(x_mlp, y_mlp)) else: # Check that both costs are not implemented assert_raises(NotImplementedError, conv_model.cost, Y, Y_hat) assert_raises(NotImplementedError, mlp_model.cost, Y1, Y1_hat)
def check_implemented_case(ConvNonlinearity, MLPNonlinearity): # Create fake data np.random.seed(12345) r = 31 s = 21 shape = [r, s] nvis = r*s output_channels = 13 batch_size = 103 x = np.random.rand(batch_size, r, s, 1) y = np.random.randint(2, size = [batch_size, output_channels, 1 ,1]) x = x.astype('float32') y = y.astype('float32') x_mlp = x.flatten().reshape(batch_size, nvis) y_mlp = y.flatten().reshape(batch_size, output_channels) # Initialize convnet with random weights. conv_model = MLP( input_space = Conv2DSpace(shape = shape, axes = ['b', 0, 1, 'c'], num_channels = 1), layers = [ConvElemwise(layer_name='conv', nonlinearity = ConvNonlinearity, \ output_channels = output_channels, kernel_shape = shape, \ pool_shape = [1,1], pool_stride = shape, irange= 1.0)], batch_size = batch_size ) X = conv_model.get_input_space().make_theano_batch() Y = conv_model.get_target_space().make_theano_batch() Y_hat = conv_model.fprop(X) g = theano.function([X], Y_hat) # Construct an equivalent MLP which gives the same output after flattening both. mlp_model = MLP( layers = [MLPNonlinearity(dim = output_channels, layer_name = 'mlp', irange = 1.0)], batch_size = batch_size, nvis = nvis ) W, b = conv_model.get_param_values() W = W.astype('float32') b = b.astype('float32') W_mlp = np.zeros(shape = (output_channels, nvis)) for k in range(output_channels): W_mlp[k] = W[k, 0].flatten()[::-1] W_mlp = W_mlp.T b_mlp = b.flatten() W_mlp = W_mlp.astype('float32') b_mlp = b_mlp.astype('float32') mlp_model.set_param_values([W_mlp, b_mlp]) X1 = mlp_model.get_input_space().make_theano_batch() Y1 = mlp_model.get_target_space().make_theano_batch() Y1_hat = mlp_model.fprop(X1) f = theano.function([X1], Y1_hat) # Check that the two models give the same output assert np.linalg.norm(f(x_mlp).flatten() - g(x).flatten()) < 10**-3 # Check that the two models have the same costs: mlp_cost = theano.function([X1, Y1], mlp_model.cost(Y1, Y1_hat)) conv_cost = theano.function([X, Y], conv_model.cost(Y, Y_hat)) assert np.linalg.norm(conv_cost(x,y) - mlp_cost(x_mlp, y_mlp)) < 10**-3
class DBL_model(object): def __init__(self,algo_id,model_id,num_epoch,num_dim,train_id,test_id): self.algo_id = algo_id self.model_id = model_id self.num_epoch = num_epoch self.num_dim = num_dim self.train_id = train_id self.test_id = test_id self.path_train = None self.path_test = None self.p_data = None self.batch_size = None self.do_savew = True self.param = paramSet() self.p_monitor = {} def loadData(self,basepath,which_set,data_ind=None): self.DataLoader.loadData(self.p_data,basepath,which_set,data_ind) def loadWeight(self, fname): # create DBL_model # load and rebuild model if fname[-3:] == 'pkl': layer_params = cPickle.load(open(fname)) elif fname[-3:] == 'mat': mat = scipy.io.loadmat(fname) layer_params = mat['param'] else: raise('cannot recognize: '+fname) layer_id = 0 num_layers = len(self.model.layers) for layer in self.model.layers: # squeeze for matlab structure #aa=layer.get_params();print aa[0].shape,aa[1].shape dims =[np.squeeze(layer_params[layer_id][k]).ndim for k in [0,1]] if fname[-3:] == 'mat': for id in [0,1]: if dims[id] ==0: layer_params[layer_id][id] = layer_params[layer_id][id][0] if dims[0]>=dims[1]: layer.set_weights(layer_params[layer_id][0]) layer.set_biases(layer_params[layer_id][1]) #tmp = np.squeeze(layer_params[layer_id][1]) else: layer.set_weights(layer_params[layer_id][1]) layer.set_biases(layer_params[layer_id][0]) #tmp = np.squeeze(layer_params[layer_id][0]) #print "aa:",layer_params[layer_id][1].shape,layer_params[layer_id][0].shape #print "sss:",layer_params[layer_id][1][:10] #print "ttt:",layer_params[layer_id][0][0] layer_id = layer_id + 1 def saveWeight(self,pklname): # save the model layer_params = [] for layer in self.model.layers: param = layer.get_params() #print param #print param[0].get_value().shape #print param[1].get_value().shape layer_params.append([param[0].get_value(), param[1].get_value()]) cPickle.dump(layer_params, open(pklname, 'wb')) def loadAlgo(self,p_algo): # setup algo #print self.DataLoader.data if p_algo.algo_type==0: self.algo = SGD(learning_rate = p_algo.learning_rate, cost = p_algo.cost, batch_size = p_algo.batch_size, monitoring_batches = p_algo.monitoring_batches, monitoring_dataset = p_algo.monitoring_dataset, monitor_iteration_mode = p_algo.monitor_iteration_mode, termination_criterion = p_algo.termination_criterion, update_callbacks = p_algo.update_callbacks, learning_rule = p_algo.learning_rule, init_momentum = p_algo.init_momentum, set_batch_size = p_algo.set_batch_size, train_iteration_mode = p_algo.train_iteration_mode, batches_per_iter = p_algo.batches_per_iter, theano_function_mode = p_algo.theano_function_mode, monitoring_costs = p_algo.monitoring_costs, seed = p_algo.seed) elif p_algo.algo_type==1: self.algo = BGD( cost = p_algo.cost, batch_size=p_algo.batch_size, batches_per_iter=p_algo.batches_per_iter, updates_per_batch = p_algo.updates_per_batch, monitoring_batches=p_algo.monitoring_batches, monitoring_dataset=p_algo.monitoring_dataset, termination_criterion =p_algo.termination_criterion, set_batch_size = p_algo.set_batch_size, reset_alpha = p_algo.reset_alpha, conjugate = p_algo.conjugate, min_init_alpha = p_algo.min_init_alpha, reset_conjugate = p_algo.reset_conjugate, line_search_mode = p_algo.line_search_mode, verbose_optimization=p_algo.verbose_optimization, scale_step=p_algo.scale_step, theano_function_mode=p_algo.theano_function_mode, init_alpha = p_algo.init_alpha, seed = p_algo.seed) self.algo.setup(self.model, self.DataLoader.data['train']) def setup(self): self.setupParam() self.check_setupParam() self.dl_id = str(self.algo_id)+'_'+str(self.model_id)+'_'+str(self.num_dim).strip('[]').replace(', ','_')+'_'+str(self.train_id)+'_'+str(self.num_epoch) self.param_pkl = 'dl_p'+self.dl_id+'.pkl' self.result_mat = 'result/'+self.dl_id+'/dl_r'+str(self.test_id)+'.mat' self.buildModel() self.buildLayer() self.DataLoader = DBL_Data() self.do_test = True print self.param_pkl if not os.path.exists(self.param_pkl): self.do_test = False # training self.loadData_train() self.buildAlgo() def setupParam(self): raise NotImplementedError(str(type(self)) + " does not implement: setupParam().") def check_setupParam(self): varnames = ['path_train','path_test','p_data','batch_size'] for varname in varnames: if eval('self.'+varname+'== None'): raise ValueError('Need to set "'+varname+'" in setupParam()') def buildModel(self): raise NotImplementedError(str(type(self)) + " does not implement: buildModel().") def buildAlgo(self): raise NotImplementedError(str(type(self)) + " does not implement: buildAlgo().") def train(self): raise NotImplementedError(str(type(self)) + " does not implement: train().") def test(self): raise NotImplementedError(str(type(self)) + " does not implement: test().") def loadData_train(self): raise NotImplementedError(str(type(self)) + " does not implement: buildAlgo().") def run(self): if self.do_test: self.test() else: # training self.train() def buildLayer(self): # setup layer self.layers = [] for param in self.p_layers: if param[0].param_type==0: self.layers = self.layers + DBL_ConvLayers(param) elif param[0].param_type==1: self.layers = self.layers + DBL_FcLayers(param) elif param[0].param_type==2: self.layers = self.layers + DBL_CfLayers(param) self.model = MLP(self.layers, input_space=self.ishape) # load available weight pre_dl_id = self.param_pkl[:self.param_pkl.rfind('_')+1] fns = glob.glob(pre_dl_id+'*.pkl') epoch_max = 0 if len(fns)==0: # first time to do it, load matlab prior mat_init = 'init_p'+str(self.model_id)+'_'+str(self.train_id)+'.mat' if os.path.exists(mat_init): print "load initial mat weight: ", mat_init self.loadWeight(mat_init) else: for fn in fns: epoch_id = int(fn[fn.rfind('_')+1:fn.find('.pkl')]) if (epoch_id>epoch_max and epoch_id<=self.num_epoch): epoch_max = epoch_id if epoch_max>0: print "load weight at epoch: ", epoch_max self.loadWeight(pre_dl_id+str(epoch_max)+'.pkl') self.num_epoch -= epoch_max self.p_monitor['epoch'] = epoch_max def runTrain(self): self.loadAlgo(self.p_algo) self.train_monitor = trainMonitor(self.model.monitor,self.p_monitor) #self.model.monitor.report_epoch() self.train_monitor.run() while self.algo.continue_learning(self.model): self.algo.train(self.DataLoader.data['train']) self.train_monitor.run() if self.do_savew and (self.train_monitor.monitor._epochs_seen+1)%10 == 0: self.saveWeight(self.param_pkl) #self.model.monitor() if self.do_savew: self.saveWeight(self.param_pkl) def runTest(self,data_test=None,metric=-1): """ metric: evaluation metric 0: classfication error 1: L1 regression error 2: L2 regression error """ if data_test == None: data_test = self.DataLoader.data['test'] batch_size = self.batch_size # make batches m = data_test.X.shape[0] extra = (batch_size - m % batch_size) % batch_size #print extra,batch_size,m assert (m + extra) % batch_size == 0 #print data_test.X[0] if extra > 0: data_test.X = np.concatenate((data_test.X, np.zeros((extra, data_test.X.shape[1]), dtype=data_test.X.dtype)), axis=0) assert data_test.X.shape[0] % batch_size == 0 X = self.model.get_input_space().make_batch_theano() Y = self.model.fprop(X) """ print 'load param:' param = self.model.layers[0].get_params() aa = param[0].get_value() bb = param[1].get_value() print aa[:3,:3],bb[:10] """ from theano import function if metric==0: from theano import tensor as T y = T.argmax(Y, axis=1) f = function([X], y) else: f = function([X], Y) yhat = [] for i in xrange(data_test.X.shape[0] / batch_size): x_arg = data_test.X[i*batch_size:(i+1)*batch_size,:] if X.ndim > 2: x_arg = data_test.get_topological_view(x_arg) yhat.append(f(x_arg.astype(X.dtype))) #print "ww:",x_arg.shape #print f(x_arg.astype(X.dtype)).shape yhat = np.concatenate(yhat) yhat = yhat[:m] data_test.X = data_test.X[:m,:] y = data_test.y #print m,extra acc = -1 if y != None: if metric == 0: if data_test.y.ndim>1: y = np.argmax(data_test.y,axis=1) assert len(y)==len(yhat) acc = float(np.sum(y-yhat==0))/m elif metric == 1: acc = float(np.sum(abs(y-yhat)))/m elif metric == 2: #print y.shape,yhat.shape #print float(np.sum((y-yhat)**2)) print y[:30] print yhat[:30] print m acc = float(np.sum((y-np.reshape(yhat,y.shape))**2))/m #print "y: ",y #print "yhat: ",yhat print "acc: ",acc return [[yhat],[acc]]
import numpy as np from theano import config from theano import function from theano import tensor from pylearn2.models.mlp import MLP, Tanh from pylearn2.sandbox.rnn.space import SequenceSpace from pylearn2.sandbox.rnn.models.mlp import Recurrent from pylearn2.space import VectorSpace mlp = MLP(layers=[Tanh(dim=25, layer_name='pre_rnn', irange=0.01), Recurrent(dim=50, layer_name='recurrent', irange=0.01), Tanh(dim=100, layer_name='h', irange=0.01)], input_space=SequenceSpace(VectorSpace(dim=25))) # Very simple test input = tensor.tensor3() output = mlp.fprop(input) f = function([input], output) assert f(np.random.rand(10, 5, 25).astype(config.floatX)).shape == (5, 100)
import theano from adversarial.deconv import Deconv input_space = Conv2DSpace(shape=(2, 1), num_channels=16, axes=('c', 0, 1, 'b')) deconv = Deconv(layer_name='deconv', num_channels=1, kernel_shape=(4, 4), output_stride=(2, 2), irange=0.) mlp = MLP(input_space=input_space, layers=[deconv]) X = input_space.get_theano_batch() f = theano.function([X], mlp.fprop(X)) # Construct dummy filters. # Just use two for simplicity. filter1 = np.array([[0, 1, 0, 1], [1, 0, 1, 0], [0, 1, 0, 1], [1, 0, 1, 0]]) filter2 = np.array([[-1, 0, -1, 0], [0, -1, 0, -1], [-1, 0, -1, 0], [0, -1, 0, -1]]) filters_dest = deconv.transformer._filters new_filters = np.zeros((16, 4, 4), dtype=filters_dest.dtype) new_filters[0] = filter1 new_filters[1] = filter2 new_filters = new_filters.reshape(16, 4, 4, 1).swapaxes(0, 3) deconv.transformer._filters.set_value(new_filters)
print "" print "CONFIG: input =", ni, "x", iw, "x", ih, "* ker =", ni, "x", no, "x", kw, "x", kh, "( bs =", bs, ", stride =", dw, ")" conv = MLP( batch_size=bs, input_space=Conv2DSpace((ih, iw), num_channels=ni, axes=("b", "c", 0, 1)), layers=[ConvElemwise(no, (kw, kh), "ConvTest", ConvNonlinearity(), irange=0.1)], ) inputBatch = np.random.randn(bs, ni, ih, iw) sharedX = theano.shared(inputBatch.astype("float32")) sharedY = theano.shared(np.random.randn(bs, no, (ih - kh) / dh + 1, (iw - kw) / dw + 1).astype("float32")) X = theano.tensor.tensor4() Y = conv.fprop(X) fprop = theano.function([], [], givens=[(X, sharedX)], updates=[(sharedY, Y)], on_unused_input="ignore") theano.sandbox.cuda.synchronize() start = time.time() for i in range(steps): fprop() theano.sandbox.cuda.synchronize() tm = (time.time() - start) / steps del fprop del sharedX del conv del sharedY
class MLPTraining: def __init__(self, data_path="./datasets/", save_path="training.pkl", simulation_data = None, identifier = 0, preprocessor='uniform'): self.id = identifier self.data_path = data_path self.save_path = save_path if simulation_data != None: self.sim_data = simulation_data else: self.sim_data = SimulationData(data_path) if not self.sim_data.is_loaded: self.sim_data.load_data() self.sim_data.preprocessor(kind = preprocessor) tmp = self.sim_data.split_train_test() self.datasets = {'train' : tmp[0], 'test' : tmp[1]} self.num_simulations = self.sim_data.num_simulations self.input_values = self.sim_data.input_values self.output_values = self.sim_data.output_values def set_structure(self, num_layers = 4, shape = 'linear'): structure = [] lower_number = self.input_values for i in range(num_layers): upper_number = lower_number lower_number = self.input_values-(i+1)*(self.input_values-self.output_values)/num_layers structure.append([upper_number, lower_number]) self.structure = structure return structure def get_structure(self): return self.structure def get_Linear_Layer(self, structure, i = 0): n_input, n_output = structure config = { 'dim': n_output, 'layer_name': ("l%d" % i), 'irange': .5, 'use_abs_loss': False, 'use_bias': False, } return Linear(**config) def get_Sigmoid_Layer(self, structure, i = 0): n_input, n_output = structure config = { 'dim': n_output, 'layer_name': ("s%d" % i), 'irange' : 0.05, } return Sigmoid(**config) def get_Tanh_Layer(self, structure, i = 0): n_input, n_output = structure config = { 'dim': n_output, 'layer_name': ("t%d" % i), 'irange' : 0.05, } return Tanh(**config) def get_layers(self, act_function='linear'): self.layers = [] i = 0 for pair in self.structure: i += 1 if(act_function == 'linear'): self.layers.append(self.get_Linear_Layer(structure = pair, i = i)) if(act_function == 'sigmoid'): self.layers.append(self.get_Sigmoid_Layer(structure = pair, i = i)) if(act_function == 'tanh'): self.layers.append(self.get_Tanh_Layer(structure = pair, i = i)) return self.layers def get_model(self, batch_size): vis = self.structure[0][0] self.model = MLP(layers = self.layers, nvis = vis, batch_size = batch_size, layer_name = None) return self.model def set_training_criteria(self, learning_rate=0.05, cost=Default(), batch_size=10, max_epochs=10): self.training_alg = SGD(learning_rate = learning_rate, cost = cost, batch_size = batch_size, monitoring_dataset = self.datasets, termination_criterion = EpochCounter(max_epochs)) def set_extensions(self, extensions): self.extensions = extensions #[MonitorBasedSaveBest(channel_name='objective', #save_path = './training/training_monitor_best.pkl')] def set_attributes(self, attributes): self.attributes = attributes def define_training_experiment(self, save_freq = 10): self.experiment = Train(dataset=self.datasets['train'], model=self.model, algorithm=self.training_alg, save_path=self.save_path , save_freq=save_freq, allow_overwrite=True, extensions=self.extensions) def train_experiment(self): self.experiment.main_loop() self.save_model() def save_model(self): self.model = serial.load(self.save_path) def predict(self, test=None, X=None, y=None): if test != None: x_test = test.X y_test = test.y else: x_test = X y_test = y X=self.model.get_input_space().make_theano_batch() Y=self.model.fprop(X) f=theano.function([X], Y) y_pred = f(x_test) if y_test != None: MSE = np.mean(np.square(y_test - y_pred)) print "MSE:", MSE var = np.mean(np.square(y_test)) print "Var:", var self.plot_prediction(y_test, y_pred) else: return y_pred def plot_prediction(self, y_test, y_pred): m = int(np.sqrt(self.output_values)) + 1 f, axarr = plt.subplots(m,m) r = [] s = [] f = 0; c = 0; for i in range(self.output_values): x = np.array([]) y = np.array([]) for j in range(len(y_test)): x = np.append(x, y_test[j][i]) y = np.append(y, y_pred[j][i]) slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x, y) r.append(r_value**2) axarr[f,c].plot(x, y, 'ro') c += 1 if (c==m): c = 0 f += 1 plt.show()
num_pieces = 1, kernel_shape = (4, 4), pool_shape = (1, 1), pool_stride=(1, 1), irange = 0.05) deconv = Deconv(layer_name = 'deconv', num_channels = 1, kernel_shape = (4, 4), irange = 0.05) mlp = MLP(input_space =input_space, layers = [conv, deconv]) mlp.layers[1].transformer._filters.set_value(mlp.layers[0].transformer._filters.get_value()) x = input_space.get_theano_batch() out = mlp.fprop(x) f = theano.function([x], out) data = MNIST('test') data_specs = (input_space, 'features') iter = data.iterator(mode = 'sequential', batch_size = 2, data_specs = data_specs) pv = patch_viewer.PatchViewer((10, 10), (28, 28)) for item in iter: res = f(item) pv.add_patch(item[0,:,:,0]) pv.add_patch(res[0,:,:,0]) pv.show() break
from adversarial.deconv import Deconv input_space = Conv2DSpace(shape=(2, 1), num_channels=16, axes=('c', 0, 1, 'b')) deconv = Deconv(layer_name='deconv', num_channels=1, kernel_shape=(4, 4), output_stride=(2, 2), irange=0.) mlp = MLP(input_space=input_space, layers=[deconv]) X = input_space.get_theano_batch() f = theano.function([X], mlp.fprop(X)) # Construct dummy filters. # Just use two for simplicity. filter1 = np.array([[0, 1, 0, 1], [1, 0, 1, 0], [0, 1, 0, 1], [1, 0, 1, 0]]) filter2 = np.array([[-1, 0, -1, 0], [0, -1, 0, -1], [-1, 0, -1, 0], [0, -1, 0, -1]]) filters_dest = deconv.transformer._filters new_filters = np.zeros((16, 4, 4), dtype=filters_dest.dtype) new_filters[0] = filter1
def test_flattener_layer(): # To test the FlattenerLayer we create a very simple feed-forward neural # network with two parallel linear layers. We then create two separate # feed-forward neural networks with single linear layers. In principle, # these two models should be identical if we start from the same # parameters. This makes it easy to test that the composite layer works # as expected. # Create network with composite layers. mlp_composite = MLP( layers=[ FlattenerLayer( CompositeLayer( 'composite', [Linear(2, 'h0', 0.1), Linear(2, 'h1', 0.1)], { 0: [0], 1: [1] } ) ) ], input_space=CompositeSpace([VectorSpace(5), VectorSpace(10)]), input_source=('features0', 'features1') ) # Create network with single linear layer, corresponding to first # layer in the composite network. mlp_first_part = MLP( layers=[ Linear(2, 'h0', 0.1) ], input_space=VectorSpace(5), input_source=('features0') ) # Create network with single linear layer, corresponding to second # layer in the composite network. mlp_second_part = MLP( layers=[ Linear(2, 'h1', 0.1) ], input_space=VectorSpace(10), input_source=('features1') ) # Create dataset which we will test our networks against. shared_dataset = np.random.rand(20, 19).astype(theano.config.floatX) # Make dataset for composite network. dataset_composite = VectorSpacesDataset( (shared_dataset[:, 0:5], shared_dataset[:, 5:15], shared_dataset[:, 15:19]), (CompositeSpace([ VectorSpace(5), VectorSpace(10), VectorSpace(4)]), ('features0', 'features1', 'targets')) ) # Make dataset for first single linear layer network. dataset_first_part = VectorSpacesDataset( (shared_dataset[:, 0:5], shared_dataset[:, 15:17]), (CompositeSpace([ VectorSpace(5), VectorSpace(2)]), ('features0', 'targets')) ) # Make dataset for second single linear layer network. dataset_second_part = VectorSpacesDataset( (shared_dataset[:, 5:15], shared_dataset[:, 17:19]), (CompositeSpace([ VectorSpace(10), VectorSpace(2)]), ('features1', 'targets')) ) # Initialize all MLPs to start from zero weights. mlp_composite.layers[0].raw_layer.layers[0].set_weights( mlp_composite.layers[0].raw_layer.layers[0].get_weights() * 0.0) mlp_composite.layers[0].raw_layer.layers[1].set_weights( mlp_composite.layers[0].raw_layer.layers[1].get_weights() * 0.0) mlp_first_part.layers[0].set_weights( mlp_first_part.layers[0].get_weights() * 0.0) mlp_second_part.layers[0].set_weights( mlp_second_part.layers[0].get_weights() * 0.0) # Train all models with their respective datasets. train_composite = Train(dataset_composite, mlp_composite, SGD(0.0001, batch_size=20)) train_composite.algorithm.termination_criterion = EpochCounter(1) train_composite.main_loop() train_first_part = Train(dataset_first_part, mlp_first_part, SGD(0.0001, batch_size=20)) train_first_part.algorithm.termination_criterion = EpochCounter(1) train_first_part.main_loop() train_second_part = Train(dataset_second_part, mlp_second_part, SGD(0.0001, batch_size=20)) train_second_part.algorithm.termination_criterion = EpochCounter(1) train_second_part.main_loop() # Check that the composite feed-forward neural network has learned # same parameters as each individual feed-forward neural network. np.testing.assert_allclose( mlp_composite.layers[0].raw_layer.layers[0].get_weights(), mlp_first_part.layers[0].get_weights()) np.testing.assert_allclose( mlp_composite.layers[0].raw_layer.layers[1].get_weights(), mlp_second_part.layers[0].get_weights()) # Check that we get same output given the same input on a randomly # generated dataset. X_composite = mlp_composite.get_input_space().make_theano_batch() X_first_part = mlp_first_part.get_input_space().make_theano_batch() X_second_part = mlp_second_part.get_input_space().make_theano_batch() fprop_composite = theano.function(X_composite, mlp_composite.fprop(X_composite)) fprop_first_part = theano.function([X_first_part], mlp_first_part.fprop(X_first_part)) fprop_second_part = theano.function([X_second_part], mlp_second_part.fprop(X_second_part)) X_data = np.random.random(size=(10, 15)).astype(theano.config.floatX) y_data = np.random.randint(low=0, high=10, size=(10, 4)) np.testing.assert_allclose(fprop_composite(X_data[:, 0:5], X_data[:, 5:15])[:, 0:2], fprop_first_part(X_data[:, 0:5])) np.testing.assert_allclose(fprop_composite(X_data[:, 0:5], X_data[:, 5:15])[:, 2:4], fprop_second_part(X_data[:, 5:15])) # Finally check that calling the internal FlattenerLayer behaves # as we would expect. First, retrieve the FlattenerLayer. fl = mlp_composite.layers[0] # Check that it agrees on the input space. assert mlp_composite.get_input_space() == fl.get_input_space() # Check that it agrees on the parameters. for i in range(0, 4): np.testing.assert_allclose(fl.get_params()[i].eval(), mlp_composite.get_params()[i].eval())
def test_composite_layer(): """ Test the routing functionality of the CompositeLayer """ # Without routing composite_layer = CompositeLayer('composite_layer', [Linear(2, 'h0', irange=0), Linear(2, 'h1', irange=0), Linear(2, 'h2', irange=0)]) mlp = MLP(nvis=2, layers=[composite_layer]) for i in range(3): composite_layer.layers[i].set_weights( np.eye(2, dtype=theano.config.floatX) ) composite_layer.layers[i].set_biases( np.zeros(2, dtype=theano.config.floatX) ) X = tensor.matrix() y = mlp.fprop(X) funs = [theano.function([X], y_elem) for y_elem in y] x_numeric = np.random.rand(2, 2).astype('float32') y_numeric = [f(x_numeric) for f in funs] assert np.all(x_numeric == y_numeric) # With routing for inputs_to_layers in [{0: [1], 1: [2], 2: [0]}, {0: [1], 1: [0, 2], 2: []}, {0: [], 1: []}]: composite_layer = CompositeLayer('composite_layer', [Linear(2, 'h0', irange=0), Linear(2, 'h1', irange=0), Linear(2, 'h2', irange=0)], inputs_to_layers) input_space = CompositeSpace([VectorSpace(dim=2), VectorSpace(dim=2), VectorSpace(dim=2)]) input_source = ('features0', 'features1', 'features2') mlp = MLP(input_space=input_space, input_source=input_source, layers=[composite_layer]) for i in range(3): composite_layer.layers[i].set_weights( np.eye(2, dtype=theano.config.floatX) ) composite_layer.layers[i].set_biases( np.zeros(2, dtype=theano.config.floatX) ) X = [tensor.matrix() for _ in range(3)] y = mlp.fprop(X) funs = [theano.function(X, y_elem, on_unused_input='ignore') for y_elem in y] x_numeric = [np.random.rand(2, 2).astype(theano.config.floatX) for _ in range(3)] y_numeric = [f(*x_numeric) for f in funs] assert all([all([np.all(x_numeric[i] == y_numeric[j]) for j in inputs_to_layers[i]]) for i in inputs_to_layers]) # Get the weight decay expressions from a composite layer composite_layer = CompositeLayer('composite_layer', [Linear(2, 'h0', irange=0.1), Linear(2, 'h1', irange=0.1)]) input_space = VectorSpace(dim=10) mlp = MLP(input_space=input_space, layers=[composite_layer]) for attr, coeff in product(['get_weight_decay', 'get_l1_weight_decay'], [[0.7, 0.3], 0.5]): f = theano.function([], getattr(composite_layer, attr)(coeff)) if is_iterable(coeff): g = theano.function( [], tensor.sum([getattr(layer, attr)(c) for c, layer in zip(coeff, composite_layer.layers)]) ) assert np.allclose(f(), g()) else: g = theano.function( [], tensor.sum([getattr(layer, attr)(coeff) for layer in composite_layer.layers]) ) assert np.allclose(f(), g())
num_pieces = 1, kernel_shape = (4, 4), pool_shape = (1, 1), pool_stride=(1, 1), irange = 0.05) deconv = Deconv(layer_name = 'deconv', num_channels = 1, kernel_shape = (4, 4), irange = 0.05) mlp = MLP(input_space =input_space, layers = [conv, deconv]) mlp.layers[1].transformer._filters.set_value(mlp.layers[0].transformer._filters.get_value()) x = input_space.get_theano_batch() out = mlp.fprop(x) f = theano.function([x], out) data = MNIST('test') data_specs = (input_space, 'features') iter = data.iterator(mode = 'sequential', batch_size = 2, data_specs = data_specs) pv = patch_viewer.PatchViewer((10, 10), (28, 28)) for item in iter: res = f(item) pv.add_patch(item[0,:,:,0]) pv.add_patch(res[0,:,:,0]) pv.show() break
def test_flattener_layer(): # To test the FlattenerLayer we create a very simple feed-forward neural # network with two parallel linear layers. We then create two separate # feed-forward neural networks with single linear layers. In principle, # these two models should be identical if we start from the same # parameters. This makes it easy to test that the composite layer works # as expected. # Create network with composite layers. mlp_composite = MLP( layers=[ FlattenerLayer( CompositeLayer( 'composite', [Linear(2, 'h0', 0.1), Linear(2, 'h1', 0.1)], { 0: [0], 1: [1] } ) ) ], input_space=CompositeSpace([VectorSpace(5), VectorSpace(10)]), input_source=('features0', 'features1') ) # Create network with single linear layer, corresponding to first # layer in the composite network. mlp_first_part = MLP( layers=[ Linear(2, 'h0', 0.1) ], input_space=VectorSpace(5), input_source=('features0') ) # Create network with single linear layer, corresponding to second # layer in the composite network. mlp_second_part = MLP( layers=[ Linear(2, 'h1', 0.1) ], input_space=VectorSpace(10), input_source=('features1') ) # Create dataset which we will test our networks against. shared_dataset = np.random.rand(20, 19).astype(theano.config.floatX) # Make dataset for composite network. dataset_composite = VectorSpacesDataset( (shared_dataset[:, 0:5], shared_dataset[:, 5:15], shared_dataset[:, 15:19]), (CompositeSpace([ VectorSpace(5), VectorSpace(10), VectorSpace(4)]), ('features0', 'features1', 'targets')) ) # Make dataset for first single linear layer network. dataset_first_part = VectorSpacesDataset( (shared_dataset[:, 0:5], shared_dataset[:, 15:17]), (CompositeSpace([ VectorSpace(5), VectorSpace(2)]), ('features0', 'targets')) ) # Make dataset for second single linear layer network. dataset_second_part = VectorSpacesDataset( (shared_dataset[:, 5:15], shared_dataset[:, 17:19]), (CompositeSpace([ VectorSpace(10), VectorSpace(2)]), ('features1', 'targets')) ) # Initialize all MLPs to start from zero weights. mlp_composite.layers[0].raw_layer.layers[0].set_weights( mlp_composite.layers[0].raw_layer.layers[0].get_weights() * 0.0) mlp_composite.layers[0].raw_layer.layers[1].set_weights( mlp_composite.layers[0].raw_layer.layers[1].get_weights() * 0.0) mlp_first_part.layers[0].set_weights( mlp_first_part.layers[0].get_weights() * 0.0) mlp_second_part.layers[0].set_weights( mlp_second_part.layers[0].get_weights() * 0.0) # Train all models with their respective datasets. train_composite = Train(dataset_composite, mlp_composite, SGD(0.0001, batch_size=20)) train_composite.algorithm.termination_criterion = EpochCounter(1) train_composite.main_loop() train_first_part = Train(dataset_first_part, mlp_first_part, SGD(0.0001, batch_size=20)) train_first_part.algorithm.termination_criterion = EpochCounter(1) train_first_part.main_loop() train_second_part = Train(dataset_second_part, mlp_second_part, SGD(0.0001, batch_size=20)) train_second_part.algorithm.termination_criterion = EpochCounter(1) train_second_part.main_loop() # Check that the composite feed-forward neural network has learned # same parameters as each individual feed-forward neural network. np.testing.assert_allclose( mlp_composite.layers[0].raw_layer.layers[0].get_weights(), mlp_first_part.layers[0].get_weights()) np.testing.assert_allclose( mlp_composite.layers[0].raw_layer.layers[1].get_weights(), mlp_second_part.layers[0].get_weights()) # Check that we get same output given the same input on a randomly # generated dataset. X_composite = mlp_composite.get_input_space().make_theano_batch() X_first_part = mlp_first_part.get_input_space().make_theano_batch() X_second_part = mlp_second_part.get_input_space().make_theano_batch() fprop_composite = theano.function(X_composite, mlp_composite.fprop(X_composite)) fprop_first_part = theano.function([X_first_part], mlp_first_part.fprop(X_first_part)) fprop_second_part = theano.function([X_second_part], mlp_second_part.fprop(X_second_part)) X_data = np.random.random(size=(10, 15)).astype(theano.config.floatX) y_data = np.random.randint(low=0, high=10, size=(10, 4)) np.testing.assert_allclose(fprop_composite(X_data[:, 0:5], X_data[:, 5:15])[:, 0:2], fprop_first_part(X_data[:, 0:5])) np.testing.assert_allclose(fprop_composite(X_data[:, 0:5], X_data[:, 5:15])[:, 2:4], fprop_second_part(X_data[:, 5:15])) # Finally check that calling the internal FlattenerLayer behaves # as we would expect. First, retrieve the FlattenerLayer. fl = mlp_composite.layers[0] # Check that it agrees on the input space. assert mlp_composite.get_input_space() == fl.get_input_space() # Check that it agrees on the parameters. for i in range(0, 4): np.testing.assert_allclose(fl.get_params()[i].eval(), mlp_composite.get_params()[i].eval())
ann.monitor() if not t_algo.continue_learning(ann): break # test: https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/scripts/icml_2013_wrepl/emotions/make_submission.py ds2 = DataPylearn2([test_set_x,test_set_y],[48,48,1],-1) m = ds2.X.shape[0] batch_size = 100 extra = (batch_size - m % batch_size) % batch_size assert (m + extra) % batch_size == 0 if extra > 0: ds2.X = np.concatenate((ds2.X, np.zeros((extra, ds2.X.shape[1]), dtype=ds2.X.dtype)), axis=0) assert ds2.X.shape[0] % batch_size == 0 X = ann.get_input_space().make_batch_theano() Y = ann.fprop(X) from theano import tensor as T y = T.argmax(Y, axis=1) from theano import function f = function([X], y) y = [] for i in xrange(ds2.X.shape[0] / batch_size): x_arg = ds2.X[i*batch_size:(i+1)*batch_size,:] if X.ndim > 2: x_arg = ds2.get_topological_view(x_arg) y.append(f(x_arg.astype(X.dtype))) y = np.concatenate(y) print y[:m] print test_set_y
import theano import numpy as np n = 200 p = 2 X = np.random.normal(0, 1, (n, p)) y = X[:,0]* X[:, 1] + np.random.normal(0, .1, n) y.shape = (n, 1) ds = DenseDesignMatrix(X=X, y=y) hidden_layer = Sigmoid(layer_name='hidden', dim=10, irange=.1, init_bias=1.) output_layer = Linear(dim=1, layer_name='y', irange=.1) trainer = SGD(learning_rate=.05, batch_size=10, termination_criterion=EpochCounter(200)) layers = [hidden_layer, output_layer] ann = MLP(layers, nvis=2) trainer.setup(ann, ds) while True: trainer.train(dataset=ds) ann.monitor.report_epoch() ann.monitor() if not trainer.continue_learning(ann): break inputs = X y_est = ann.fprop(theano.shared(inputs, name='inputs')).eval() print(y_est.shape)
def test_composite_layer(): """ Test the routing functionality of the CompositeLayer """ # Without routing composite_layer = CompositeLayer('composite_layer', [Linear(2, 'h0', irange=0), Linear(2, 'h1', irange=0), Linear(2, 'h2', irange=0)]) mlp = MLP(nvis=2, layers=[composite_layer]) for i in range(3): composite_layer.layers[i].set_weights( np.eye(2, dtype=theano.config.floatX) ) composite_layer.layers[i].set_biases( np.zeros(2, dtype=theano.config.floatX) ) X = tensor.matrix() y = mlp.fprop(X) funs = [theano.function([X], y_elem) for y_elem in y] x_numeric = np.random.rand(2, 2).astype('float32') y_numeric = [f(x_numeric) for f in funs] assert np.all(x_numeric == y_numeric) # With routing for inputs_to_layers in [{0: [1], 1: [2], 2: [0]}, {0: [1], 1: [0, 2], 2: []}, {0: [], 1: []}]: composite_layer = CompositeLayer('composite_layer', [Linear(2, 'h0', irange=0), Linear(2, 'h1', irange=0), Linear(2, 'h2', irange=0)], inputs_to_layers) input_space = CompositeSpace([VectorSpace(dim=2), VectorSpace(dim=2), VectorSpace(dim=2)]) mlp = MLP(input_space=input_space, layers=[composite_layer]) for i in range(3): composite_layer.layers[i].set_weights( np.eye(2, dtype=theano.config.floatX) ) composite_layer.layers[i].set_biases( np.zeros(2, dtype=theano.config.floatX) ) X = [tensor.matrix() for _ in range(3)] y = mlp.fprop(X) funs = [theano.function(X, y_elem, on_unused_input='ignore') for y_elem in y] x_numeric = [np.random.rand(2, 2).astype(theano.config.floatX) for _ in range(3)] y_numeric = [f(*x_numeric) for f in funs] assert all([all([np.all(x_numeric[i] == y_numeric[j]) for j in inputs_to_layers[i]]) for i in inputs_to_layers]) # Get the weight decay expressions from a composite layer composite_layer = CompositeLayer('composite_layer', [Linear(2, 'h0', irange=0.1), Linear(2, 'h1', irange=0.1)]) input_space = VectorSpace(dim=10) mlp = MLP(input_space=input_space, layers=[composite_layer]) for attr, coeff in product(['get_weight_decay', 'get_l1_weight_decay'], [[0.7, 0.3], 0.5]): f = theano.function([], getattr(composite_layer, attr)(coeff)) if is_iterable(coeff): g = theano.function( [], tensor.sum([getattr(layer, attr)(c) for c, layer in zip(coeff, composite_layer.layers)]) ) assert np.allclose(f(), g()) else: g = theano.function( [], tensor.sum([getattr(layer, attr)(coeff) for layer in composite_layer.layers]) ) assert np.allclose(f(), g())
layers=[ ConvElemwise(no, (kw, kh), 'ConvTest', ConvNonlinearity(), irange=0.1) ]) inputBatch = np.random.randn(bs, ni, ih, iw) sharedX = theano.shared(inputBatch.astype('float32')) sharedY = theano.shared( np.random.randn(bs, no, (ih - kh) / dh + 1, (iw - kw) / dw + 1).astype('float32')) X = theano.tensor.tensor4() Y = conv.fprop(X) fprop = theano.function([], [], givens=[(X, sharedX)], updates=[(sharedY, Y)], on_unused_input='ignore') theano.sandbox.cuda.synchronize() start = time.time() for i in range(steps): fprop() theano.sandbox.cuda.synchronize() tm = (time.time() - start) / steps del fprop del sharedX