def test_np_format_as_vector2conv2D(): vector_space = VectorSpace(dim=8*8*3, sparse=False) conv2d_space = Conv2DSpace(shape=(8,8), num_channels=3, axes=('b','c',0,1)) data = np.arange(5*8*8*3).reshape(5, 8*8*3) rval = vector_space.np_format_as(data, conv2d_space) assert np.all(rval == data.reshape((5,3,8,8)))
def get_weights_topo(self): """ Returns a topological view of the weights, the first half corresponds to wxf and the second half to wyf. Returns ------- weights : ndarray Same as the return value of `get_weights` but formatted as a 4D tensor with the axes being (hidden/factor units, rows, columns, channels).The the number of channels is either 1 or 3 (because they will be visualized as grayscale or RGB color). At the moment the function only supports factors whose sqrt is exact. """ if (not isinstance(self.input_space.components[0], Conv2DSpace) or not isinstance(self.input_space.components[1], Conv2DSpace)): raise NotImplementedError() wxf = self.wxf.get_value(borrow=False).T wyf = self.wyf.get_value(borrow=False).T convx = self.input_space.components[0] convy = self.input_space.components[1] vecx = VectorSpace(self.nvisx) vecy = VectorSpace(self.nvisy) wxf_view = vecx.np_format_as( wxf, Conv2DSpace(convx.shape, num_channels=convx.num_channels, axes=('b', 0, 1, 'c'))) wyf_view = vecy.np_format_as( wyf, Conv2DSpace(convy.shape, num_channels=convy.num_channels, axes=('b', 0, 1, 'c'))) h = int(numpy.ceil(numpy.sqrt(self.nfac))) new_weights = numpy.zeros((wxf_view.shape[0] * 2, wxf_view.shape[1], wxf_view.shape[2], wxf_view.shape[3]), dtype=wxf_view.dtype) t = 0 while t < (self.nfac // h): filter_pair = numpy.concatenate( (wxf_view[h * t:h * (t + 1), ...], wyf_view[h * t:h * (t + 1), ...]), 0) new_weights[h * 2 * t:h * 2 * (t + 1), ...] = filter_pair t += 1 return new_weights
def get_weights_topo(self): """ Returns a topological view of the weights, the first half corresponds to wxf and the second half to wyf. Returns ------- weights : ndarray Same as the return value of `get_weights` but formatted as a 4D tensor with the axes being (hidden/factor units, rows, columns, channels).The the number of channels is either 1 or 3 (because they will be visualized as grayscale or RGB color). At the moment the function only supports factors whose sqrt is exact. """ if not isinstance(self.input_space.components[0], Conv2DSpace) or not isinstance( self.input_space.components[1], Conv2DSpace ): raise NotImplementedError() wxf = self.wxf.get_value(borrow=False).T wyf = self.wyf.get_value(borrow=False).T convx = self.input_space.components[0] convy = self.input_space.components[1] vecx = VectorSpace(self.nvisx) vecy = VectorSpace(self.nvisy) wxf_view = vecx.np_format_as( wxf, Conv2DSpace(convx.shape, num_channels=convx.num_channels, axes=("b", 0, 1, "c")) ) wyf_view = vecy.np_format_as( wyf, Conv2DSpace(convy.shape, num_channels=convy.num_channels, axes=("b", 0, 1, "c")) ) h = int(numpy.ceil(numpy.sqrt(self.nfac))) new_weights = numpy.zeros( (wxf_view.shape[0] * 2, wxf_view.shape[1], wxf_view.shape[2], wxf_view.shape[3]), dtype=wxf_view.dtype ) t = 0 while t < (self.nfac // h): filter_pair = numpy.concatenate((wxf_view[h * t : h * (t + 1), ...], wyf_view[h * t : h * (t + 1), ...]), 0) new_weights[h * 2 * t : h * 2 * (t + 1), ...] = filter_pair t += 1 return new_weights
def test_np_format_as_conv2d_vector_conv2d(): conv2d_space1 = Conv2DSpace(shape=(8, 8), num_channels=3, axes=('c', 'b', 1, 0)) vector_space = VectorSpace(dim=8*8*3, sparse=False) conv2d_space0 = Conv2DSpace(shape=(8, 8), num_channels=3, axes=('b', 'c', 0, 1)) data = np.arange(5*8*8*3).reshape(5, 3, 8, 8) vecval = conv2d_space0.np_format_as(data, vector_space) rval1 = vector_space.np_format_as(vecval, conv2d_space1) rval2 = conv2d_space0.np_format_as(data, conv2d_space1) assert np.allclose(rval1, rval2) nval = data.transpose(1, 0, 3, 2) assert np.allclose(nval, rval1)
def test_np_format_as_vector2conv2D(): vector_space = VectorSpace(dim=8*8*3, sparse=False) conv2d_space = Conv2DSpace(shape=(8, 8), num_channels=3, axes=('b', 'c', 0, 1)) data = np.arange(5*8*8*3).reshape(5, 8*8*3) rval = vector_space.np_format_as(data, conv2d_space) # Get data in a Conv2DSpace with default axes new_axes = conv2d_space.default_axes axis_to_shape = {'b': 5, 'c': 3, 0: 8, 1: 8} new_shape = tuple([axis_to_shape[ax] for ax in new_axes]) nval = data.reshape(new_shape) # Then transpose nval = nval.transpose(*[new_axes.index(ax) for ax in conv2d_space.axes]) assert np.all(rval == nval)
def test_np_format_as_vector2conv2D(): vector_space = VectorSpace(dim=8 * 8 * 3, sparse=False) conv2d_space = Conv2DSpace(shape=(8, 8), num_channels=3, axes=('b', 'c', 0, 1)) data = np.arange(5 * 8 * 8 * 3).reshape(5, 8 * 8 * 3) rval = vector_space.np_format_as(data, conv2d_space) # Get data in a Conv2DSpace with default axes new_axes = conv2d_space.default_axes axis_to_shape = {'b': 5, 'c': 3, 0: 8, 1: 8} new_shape = tuple([axis_to_shape[ax] for ax in new_axes]) nval = data.reshape(new_shape) # Then transpose nval = nval.transpose(*[new_axes.index(ax) for ax in conv2d_space.axes]) assert np.all(rval == nval)
def test_np_format_as_conv2D_vector_conv2D(): conv2d_space1 = Conv2DSpace(shape=(8, 8), num_channels=3, axes=('c', 'b', 1, 0)) vector_space = VectorSpace(dim=8 * 8 * 3, sparse=False) conv2d_space0 = Conv2DSpace(shape=(8, 8), num_channels=3, axes=('b', 'c', 0, 1)) data = np.arange(5 * 8 * 8 * 3).reshape(5, 3, 8, 8) vecval = conv2d_space0.np_format_as(data, vector_space) rval1 = vector_space.np_format_as(vecval, conv2d_space1) rval2 = conv2d_space0.np_format_as(data, conv2d_space1) assert np.allclose(rval1, rval2) nval = data.transpose(1, 0, 3, 2) assert np.allclose(nval, rval1)
def test_np_format_as_vector2vector(): vector_space_initial = VectorSpace(dim=8*8*3, sparse=False) vector_space_final = VectorSpace(dim=8*8*3, sparse=False) data = np.arange(5*8*8*3).reshape(5, 8*8*3) rval = vector_space_initial.np_format_as(data, vector_space_final) assert np.all(rval == data)
def test_np_format_as_vector2vector(): vector_space_initial = VectorSpace(dim=8 * 8 * 3, sparse=False) vector_space_final = VectorSpace(dim=8 * 8 * 3, sparse=False) data = np.arange(5 * 8 * 8 * 3).reshape(5, 8 * 8 * 3) rval = vector_space_initial.np_format_as(data, vector_space_final) assert np.all(rval == data)
class StereoViewConverter(object): """ Converts stereo image data between two formats: A) A dense design matrix, one stereo pair per row (VectorSpace) B) An image pair (CompositeSpace of two Conv2DSpaces) Parameters ---------- shape : tuple See doc for __init__'s <shape> parameter. """ def __init__(self, shape, axes=None): """ The arguments describe how the data is laid out in the design matrix. Parameters ---------- shape : tuple A tuple of 4 ints, describing the shape of each datum. This is the size of each axis in <axes>, excluding the 'b' axis. axes : tuple A tuple of the following elements in any order: 'b' batch axis 's' stereo axis 0 image axis 0 (row) 1 image axis 1 (column) 'c' channel axis """ shape = tuple(shape) if not all(isinstance(s, int) for s in shape): raise TypeError("Shape must be a tuple/list of ints") if len(shape) != 4: raise ValueError("Shape array needs to be of length 4, got %s." % shape) datum_axes = list(axes) datum_axes.remove('b') if shape[datum_axes.index('s')] != 2: raise ValueError("Expected 's' axis to have size 2, got %d.\n" " axes: %s\n" " shape: %s" % (shape[datum_axes.index('s')], axes, shape)) self.shape = shape self.set_axes(axes) def make_conv2d_space(shape, axes): shape_axes = list(axes) shape_axes.remove('b') image_shape = tuple(shape[shape_axes.index(axis)] for axis in (0, 1)) conv2d_axes = list(axes) conv2d_axes.remove('s') return Conv2DSpace(shape=image_shape, num_channels=shape[shape_axes.index('c')], axes=conv2d_axes, dtype=None) conv2d_space = make_conv2d_space(shape, axes) self.topo_space = CompositeSpace((conv2d_space, conv2d_space)) self.storage_space = VectorSpace(dim=numpy.prod(shape)) def get_formatted_batch(self, batch, space): """ Returns a batch formatted to a space. Parameters ---------- batch : ndarray The batch to format space : a pylearn2.space.Space The target space to format to. """ return self.storage_space.np_format_as(batch, space) def design_mat_to_topo_view(self, design_mat): """ Called by DenseDesignMatrix.get_formatted_view(), get_batch_topo() Parameters ---------- design_mat : ndarray """ return self.storage_space.np_format_as(design_mat, self.topo_space) def design_mat_to_weights_view(self, design_mat): """ Called by DenseDesignMatrix.get_weights_view() Parameters ---------- design_mat : ndarray """ return self.design_mat_to_topo_view(design_mat) def topo_view_to_design_mat(self, topo_batch): """ Used by DenseDesignMatrix.set_topological_view(), .get_design_mat() Parameters ---------- topo_batch : ndarray """ return self.topo_space.np_format_as(topo_batch, self.storage_space) def view_shape(self): """ TODO: write documentation. """ return self.shape def weights_view_shape(self): """ TODO: write documentation. """ return self.view_shape() def set_axes(self, axes): """ Change the order of the axes. Parameters ---------- axes : tuple Must have length 5, must contain 'b', 's', 0, 1, 'c'. """ axes = tuple(axes) if len(axes) != 5: raise ValueError("Axes must have 5 elements; got %s" % str(axes)) for required_axis in ('b', 's', 0, 1, 'c'): if required_axis not in axes: raise ValueError("Axes must contain 'b', 's', 0, 1, and 'c'. " "Got %s." % str(axes)) if axes.index('b') != 0: raise ValueError("The 'b' axis must come first (axes = %s)." % str(axes)) def remove_b_axis(axes): axes = list(axes) axes.remove('b') return tuple(axes) if hasattr(self, 'axes'): # Reorders the shape vector to match the new axis ordering. assert hasattr(self, 'shape') old_axes = remove_b_axis(self.axes) # pylint: disable-msg=E0203 new_axes = remove_b_axis(axes) new_shape = tuple(self.shape[old_axes.index(a)] for a in new_axes) self.shape = new_shape self.axes = axes
class StereoViewConverter(object): """ Converts stereo image data between two formats: #. A dense design matrix, one stereo pair per row (`VectorSpace`) #. An image pair (`CompositeSpace` of two `Conv2DSpace`) The arguments describe how the data is laid out in the design matrix. Parameters ---------- shape: tuple A tuple of 4 ints, describing the shape of each datum. This is the size of each axis in `<axes>`, excluding the `b` axis. axes : tuple Tuple of the following elements in any order: * 'b' : batch axis * 's' : stereo axis * 0 : image axis 0 (row) * 1 : image axis 1 (column) * 'c' : channel axis """ def __init__(self, shape, axes=None): shape = tuple(shape) if not all(isinstance(s, int) for s in shape): raise TypeError("Shape must be a tuple/list of ints") if len(shape) != 4: raise ValueError("Shape array needs to be of length 4, got %s." % shape) datum_axes = list(axes) datum_axes.remove('b') if shape[datum_axes.index('s')] != 2: raise ValueError("Expected 's' axis to have size 2, got %d.\n" " axes: %s\n" " shape: %s" % (shape[datum_axes.index('s')], axes, shape)) self.shape = shape self.set_axes(axes) def make_conv2d_space(shape, axes): shape_axes = list(axes) shape_axes.remove('b') image_shape = tuple(shape[shape_axes.index(axis)] for axis in (0, 1)) conv2d_axes = list(axes) conv2d_axes.remove('s') return Conv2DSpace(shape=image_shape, num_channels=shape[shape_axes.index('c')], axes=conv2d_axes) conv2d_space = make_conv2d_space(shape, axes) self.topo_space = CompositeSpace((conv2d_space, conv2d_space)) self.storage_space = VectorSpace(dim=numpy.prod(shape)) def get_formatted_batch(self, batch, space): return self.storage_space.np_format_as(batch, space) def design_mat_to_topo_view(self, design_mat): """ Called by DenseDesignMatrix.get_formatted_view(), get_batch_topo() """ return self.storage_space.np_format_as(design_mat, self.topo_space) def design_mat_to_weights_view(self, design_mat): """ Called by DenseDesignMatrix.get_weights_view() """ return self.design_mat_to_topo_view(design_mat) def topo_view_to_design_mat(self, topo_batch): """ Used by `DenseDesignMatrix.set_topological_view()` and `DenseDesignMatrix.get_design_mat()`. """ return self.topo_space.np_format_as(topo_batch, self.storage_space) def view_shape(self): return self.shape def weights_view_shape(self): return self.view_shape() def set_axes(self, axes): axes = tuple(axes) if len(axes) != 5: raise ValueError("Axes must have 5 elements; got %s" % str(axes)) for required_axis in ('b', 's', 0, 1, 'c'): if required_axis not in axes: raise ValueError("Axes must contain 'b', 's', 0, 1, and 'c'. " "Got %s." % str(axes)) if axes.index('b') != 0: raise ValueError("The 'b' axis must come first (axes = %s)." % str(axes)) def get_batchless_axes(axes): axes = list(axes) axes.remove('b') return tuple(axes) if hasattr(self, 'axes'): # Reorders the shape vector to match the new axis ordering. assert hasattr(self, 'shape') old_axes = get_batchless_axes(self.axes) new_axes = get_batchless_axes(axes) new_shape = tuple(self.shape[old_axes.index(a)] for a in new_axes) self.shape = new_shape self.axes = axes
class HingeLoss(Layer): def __init__(self, n_classes, layer_name, irange=None, istdev=None, no_affine=False, sparse_init=None): super(HingeLoss, self).__init__() self.__dict__.update(locals()) del self.self self.output_space = VectorSpace(n_classes) if not self.no_affine: self.b = sharedX(np.zeros((n_classes, )), name='hingeloss_b') def get_monitoring_channels(self): if self.no_affine: return OrderedDict() W = self.W assert W.ndim == 2 sq_W = T.sqr(W) row_norms = T.sqrt(sq_W.sum(axis=1)) col_norms = T.sqrt(sq_W.sum(axis=0)) return OrderedDict([ ('row_norms_min', row_norms.min()), ('row_norms_mean', row_norms.mean()), ('row_norms_max', row_norms.max()), ('col_norms_min', col_norms.min()), ('col_norms_mean', col_norms.mean()), ('col_norms_max', col_norms.max()), ]) @wraps(Layer.get_layer_monitoring_channels) def get_layer_monitoring_channels(self, state_below=None, state=None, targets=None): # channels that does not require state information # if self.no_affine: # rval = OrderedDict() # # W = self.W # # assert W.ndim == 2 # # sq_W = T.sqr(W) # # row_norms = T.sqrt(sq_W.sum(axis=1)) # col_norms = T.sqrt(sq_W.sum(axis=0)) # # rval = OrderedDict([('row_norms_min', row_norms.min()), # ('row_norms_mean', row_norms.mean()), # ('row_norms_max', row_norms.max()), # ('col_norms_min', col_norms.min()), # ('col_norms_mean', col_norms.mean()), # ('col_norms_max', col_norms.max()), ]) rval = OrderedDict() if (state_below is not None) or (state is not None): if state is None: state = self.fprop(state_below) mx = state.max(axis=1) rval.update( OrderedDict([('mean_max_class', mx.mean()), ('max_max_class', mx.max()), ('min_max_class', mx.min())])) if targets is not None: y_hat = self.target_convert(T.argmax(state, axis=1)) #Assume target is in [0,1] as binary one-hot y = self.target_convert(T.argmax(targets, axis=1)) misclass = T.neq(y, y_hat).mean() misclass = T.cast(misclass, config.floatX) rval['misclass'] = misclass rval['nll'] = self.cost(Y_hat=state, Y=targets) return rval def get_monitoring_channels_from_state(self, state, target=None): warnings.warn("Layer.get_monitoring_channels_from_state is " + \ "deprecated. Use get_layer_monitoring_channels " + \ "instead. Layer.get_monitoring_channels_from_state " + \ "will be removed on or after september 24th 2014", stacklevel=2) mx = state.max(axis=1) rval = OrderedDict([('mean_max_class', mx.mean()), ('max_max_class', mx.max()), ('min_max_class', mx.min())]) if target is not None: y_hat = self.target_convert(T.argmax(state, axis=1)) #Assume target is in [0,1] as binary one-hot y = self.target_convert(T.argmax(target, axis=1)) misclass = T.neq(y, y_hat).mean() misclass = T.cast(misclass, config.floatX) rval['misclass'] = misclass rval['nll'] = self.cost(Y_hat=state, Y=target) return rval def set_input_space(self, space): self.input_space = space if not isinstance(space, Space): raise TypeError("Expected Space, got " + str(space) + " of type " + str(type(space))) self.input_dim = space.get_total_dimension() self.needs_reformat = not isinstance(space, VectorSpace) desired_dim = self.input_dim self.desired_space = VectorSpace(desired_dim) if not self.needs_reformat: assert self.desired_space == self.input_space rng = self.mlp.rng if self.no_affine: self._params = [] else: if self.irange is not None: assert self.istdev is None assert self.sparse_init is None W = rng.uniform(-self.irange, self.irange, (self.input_dim, self.n_classes)) elif self.istdev is not None: assert self.sparse_init is None W = rng.randn(self.input_dim, self.n_classes) * self.istdev else: assert self.sparse_init is not None W = np.zeros((self.input_dim, self.n_classes)) for i in xrange(self.n_classes): for j in xrange(self.sparse_init): idx = rng.randint(0, self.input_dim) while W[idx, i] != 0.: idx = rng.randint(0, self.input_dim) W[idx, i] = rng.randn() self.W = sharedX(W, 'hingeloss_W') self._params = [self.b, self.W] def get_weights_topo(self): if not isinstance(self.input_space, Conv2DSpace): raise NotImplementedError() desired = self.W.get_value().T ipt = self.desired_space.np_format_as(desired, self.input_space) rval = Conv2DSpace.convert_numpy(ipt, self.input_space.axes, ('b', 0, 1, 'c')) return rval def get_weights(self): if not isinstance(self.input_space, VectorSpace): raise NotImplementedError() return self.W.get_value() def set_weights(self, weights): self.W.set_value(weights) def set_biases(self, biases): self.b.set_value(biases) def get_biases(self): return self.b.get_value() def get_weights_format(self): return ('v', 'h') def fprop(self, state_below): self.input_space.validate(state_below) if self.needs_reformat: state_below = self.input_space.format_as(state_below, self.desired_space) for value in get_debug_values(state_below): if self.mlp.batch_size is not None and value.shape[ 0] != self.mlp.batch_size: raise ValueError("state_below should have batch size " + str(self.dbm.batch_size) + " but has " + str(value.shape[0])) self.desired_space.validate(state_below) assert state_below.ndim == 2 if not hasattr(self, 'no_affine'): self.no_affine = False if self.no_affine: rval = state_below else: assert self.W.ndim == 2 b = self.b W = self.W rval = T.dot(state_below, W) + b for value in get_debug_values(rval): if self.mlp.batch_size is not None: assert value.shape[0] == self.mlp.batch_size return rval def target_convert(self, Y): ''' converts target [0,1] to [-1, 1] ''' Y_t = 2. * Y - 1. return Y_t # def hinge_cost(self, W, Y, Y_hat, C=1.): def hinge_cost(self, Y, Y_hat): #prob = .5 * T.dot(self.W.T, self.W) + C * (T.maximum(1 - Y * Y_hat, 0) ** 2.).sum(axis=1) prob = (T.maximum(1 - Y * Y_hat, 0)**2.).sum(axis=1) return prob def cost(self, Y, Y_hat): """ Y must be one-hot binary. Y_hat is a hinge loss estimate. of Y. """ assert hasattr(Y_hat, 'owner') owner = Y_hat.owner assert owner is not None op = owner.op if isinstance(op, Print): assert len(owner.inputs) == 1 Y_hat, = owner.inputs owner = Y_hat.owner op = owner.op assert Y_hat.ndim == 2 Y_t = self.target_convert(Y) # prob = self.hinge_cost(self.W, Y_t, Y_hat) prob = self.hinge_cost(Y_t, Y_hat) assert prob.ndim == 1 rval = prob.mean() return rval def cost_matrix(self, Y, Y_hat): """ Y must be one-hot binary. Y_hat is a hinge loss estimate. of Y. """ assert hasattr(Y_hat, 'owner') owner = Y_hat.owner assert owner is not None op = owner.op if isinstance(op, Print): assert len(owner.inputs) == 1 Y_hat, = owner.inputs owner = Y_hat.owner op = owner.op assert Y_hat.ndim == 2 Y_t = self.target_convert(Y) # prob = self.hinge_cost(self.W, Y_t, Y_hat) prob = self.hinge_cost(Y_t, Y_hat) return prob def get_weight_decay(self, coeff): if isinstance(coeff, str): coeff = float(coeff) assert isinstance(coeff, float) or hasattr(coeff, 'dtype') return coeff * T.sqr(self.W).sum() def get_l1_weight_decay(self, coeff): if isinstance(coeff, str): coeff = float(coeff) assert isinstance(coeff, float) or hasattr(coeff, 'dtype') W = self.W return coeff * abs(W).sum() @wraps(Layer._modify_updates) def _modify_updates(self, updates): if self.no_affine: return
def train(d=None): train_X = np.array(d.train_X) train_y = np.array(d.train_Y) valid_X = np.array(d.valid_X) valid_y = np.array(d.valid_Y) test_X = np.array(d.test_X) test_y = np.array(d.test_Y) nb_classes = len(np.unique(train_y)) train_y = convert_one_hot(train_y) valid_y = convert_one_hot(valid_y) # train_set = RotationalDDM(X=train_X, y=train_y) train_set = DenseDesignMatrix(X=train_X, y=train_y) valid_set = DenseDesignMatrix(X=valid_X, y=valid_y) print 'Setting up' batch_size = 100 c0 = mlp.ConvRectifiedLinear( layer_name='c0', output_channels=64, irange=.05, kernel_shape=[5, 5], pool_shape=[4, 4], pool_stride=[2, 2], # W_lr_scale=0.25, max_kernel_norm=1.9365) c1 = mlp.ConvRectifiedLinear( layer_name='c1', output_channels=64, irange=.05, kernel_shape=[5, 5], pool_shape=[4, 4], pool_stride=[2, 2], # W_lr_scale=0.25, max_kernel_norm=1.9365) c2 = mlp.ConvRectifiedLinear( layer_name='c2', output_channels=64, irange=.05, kernel_shape=[5, 5], pool_shape=[4, 4], pool_stride=[5, 4], W_lr_scale=0.25, # max_kernel_norm=1.9365 ) sp0 = mlp.SoftmaxPool( detector_layer_dim=16, layer_name='sp0', pool_size=4, sparse_init=512, ) sp1 = mlp.SoftmaxPool( detector_layer_dim=16, layer_name='sp1', pool_size=4, sparse_init=512, ) r0 = mlp.RectifiedLinear( layer_name='r0', dim=512, sparse_init=512, ) r1 = mlp.RectifiedLinear( layer_name='r1', dim=512, sparse_init=512, ) s0 = mlp.Sigmoid( layer_name='s0', dim=500, # max_col_norm=1.9365, sparse_init=15, ) out = mlp.Softmax( n_classes=nb_classes, layer_name='output', irange=.0, # max_col_norm=1.9365, # sparse_init=nb_classes, ) epochs = EpochCounter(100) layers = [s0, out] decay_coeffs = [.00005, .00005, .00005] in_space = Conv2DSpace( shape=[d.size, d.size], num_channels=1, ) vec_space = VectorSpace(d.size**2) nn = mlp.MLP( layers=layers, # input_space=in_space, nvis=d.size**2, # batch_size=batch_size, ) trainer = sgd.SGD( learning_rate=0.01, # cost=SumOfCosts(costs=[ # dropout.Dropout(), # MethodCost(method='cost_from_X'), # WeightDecay(decay_coeffs), # ]), # cost=MethodCost(method='cost_from_X'), batch_size=batch_size, # train_iteration_mode='even_shuffled_sequential', termination_criterion=epochs, # learning_rule=learning_rule.Momentum(init_momentum=0.5), ) trainer = bgd.BGD( batch_size=10000, line_search_mode='exhaustive', conjugate=1, updates_per_batch=10, termination_criterion=epochs, ) lr_adjustor = LinearDecayOverEpoch( start=1, saturate=10, decay_factor=.1, ) momentum_adjustor = learning_rule.MomentumAdjustor( final_momentum=.99, start=1, saturate=10, ) trainer.setup(nn, train_set) print 'Learning' test_X = vec_space.np_format_as(test_X, nn.get_input_space()) train_X = vec_space.np_format_as(train_X, nn.get_input_space()) i = 0 X = nn.get_input_space().make_theano_batch() Y = nn.fprop(X) predict = theano.function([X], Y) best = -40 best_iter = -1 while trainer.continue_learning(nn): print '--------------' print 'Training Epoch ' + str(i) trainer.train(dataset=train_set) nn.monitor() print 'Evaluating...' predictions = convert_categorical(predict(train_X[:2000])) score = accuracy_score(convert_categorical(train_y[:2000]), predictions) print 'Score on train: ' + str(score) predictions = convert_categorical(predict(test_X)) score = accuracy_score(test_y, predictions) print 'Score on test: ' + str(score) best, best_iter = (best, best_iter) if best > score else (score, i) print 'Current best: ' + str(best) + ' at iter ' + str(best_iter) print classification_report(test_y, predictions) print 'Adjusting parameters...' # momentum_adjustor.on_monitor(nn, valid_set, trainer) # lr_adjustor.on_monitor(nn, valid_set, trainer) i += 1 print ' '
class StereoViewConverter(object): """ Converts stereo image data between two formats: #. A dense design matrix, one stereo pair per row (`VectorSpace`) #. An image pair (`CompositeSpace` of two `Conv2DSpace`) The arguments describe how the data is laid out in the design matrix. Parameters ---------- shape: tuple A tuple of 4 ints, describing the shape of each datum. This is the size of each axis in `<axes>`, excluding the `b` axis. axes : tuple Tuple of the following elements in any order: * 'b' : batch axis * 's' : stereo axis * 0 : image axis 0 (row) * 1 : image axis 1 (column) * 'c' : channel axis """ def __init__(self, shape, axes=None): shape = tuple(shape) if not all(isinstance(s, int) for s in shape): raise TypeError("Shape must be a tuple/list of ints") if len(shape) != 4: raise ValueError("Shape array needs to be of length 4, got %s." % shape) datum_axes = list(axes) datum_axes.remove('b') if shape[datum_axes.index('s')] != 2: raise ValueError("Expected 's' axis to have size 2, got %d.\n" " axes: %s\n" " shape: %s" % (shape[datum_axes.index('s')], axes, shape)) self.shape = shape self.set_axes(axes) def make_conv2d_space(shape, axes): shape_axes = list(axes) shape_axes.remove('b') image_shape = tuple(shape[shape_axes.index(axis)] for axis in (0, 1)) conv2d_axes = list(axes) conv2d_axes.remove('s') return Conv2DSpace(shape=image_shape, num_channels=shape[shape_axes.index('c')], axes=conv2d_axes) conv2d_space = make_conv2d_space(shape, axes) self.topo_space = CompositeSpace((conv2d_space, conv2d_space)) self.storage_space = VectorSpace(dim=numpy.prod(shape)) def get_formatted_batch(self, batch, space): """ .. todo:: WRITEME """ return self.storage_space.np_format_as(batch, space) def design_mat_to_topo_view(self, design_mat): """ Called by DenseDesignMatrix.get_formatted_view(), get_batch_topo() """ return self.storage_space.np_format_as(design_mat, self.topo_space) def design_mat_to_weights_view(self, design_mat): """ Called by DenseDesignMatrix.get_weights_view() """ return self.design_mat_to_topo_view(design_mat) def topo_view_to_design_mat(self, topo_batch): """ Used by `DenseDesignMatrix.set_topological_view()` and `DenseDesignMatrix.get_design_mat()`. """ return self.topo_space.np_format_as(topo_batch, self.storage_space) def view_shape(self): """ .. todo:: WRITEME """ return self.shape def weights_view_shape(self): """ .. todo:: WRITEME """ return self.view_shape() def set_axes(self, axes): """ .. todo:: WRITEME """ axes = tuple(axes) if len(axes) != 5: raise ValueError("Axes must have 5 elements; got %s" % str(axes)) for required_axis in ('b', 's', 0, 1, 'c'): if required_axis not in axes: raise ValueError("Axes must contain 'b', 's', 0, 1, and 'c'. " "Got %s." % str(axes)) if axes.index('b') != 0: raise ValueError("The 'b' axis must come first (axes = %s)." % str(axes)) def get_batchless_axes(axes): axes = list(axes) axes.remove('b') return tuple(axes) if hasattr(self, 'axes'): # Reorders the shape vector to match the new axis ordering. assert hasattr(self, 'shape') old_axes = get_batchless_axes(self.axes) new_axes = get_batchless_axes(axes) new_shape = tuple(self.shape[old_axes.index(a)] for a in new_axes) self.shape = new_shape self.axes = axes
class HingeLoss(Layer): def __init__(self, n_classes, layer_name, irange = None, istdev = None, no_affine=False, sparse_init = None): super(HingeLoss, self).__init__(); self.__dict__.update(locals()) del self.self self.output_space = VectorSpace(n_classes) if not self.no_affine: self.b = sharedX(np.zeros((n_classes,)), name = 'hingeloss_b') def get_monitoring_channels(self): if self.no_affine: return OrderedDict() W = self.W assert W.ndim == 2 sq_W = T.sqr(W) row_norms = T.sqrt(sq_W.sum(axis=1)) col_norms = T.sqrt(sq_W.sum(axis=0)) return OrderedDict([ ('row_norms_min' , row_norms.min()), ('row_norms_mean' , row_norms.mean()), ('row_norms_max' , row_norms.max()), ('col_norms_min' , col_norms.min()), ('col_norms_mean' , col_norms.mean()), ('col_norms_max' , col_norms.max()), ]) @wraps(Layer.get_layer_monitoring_channels) def get_layer_monitoring_channels(self, state_below=None, state=None, targets=None): # channels that does not require state information # if self.no_affine: # rval = OrderedDict() # # W = self.W # # assert W.ndim == 2 # # sq_W = T.sqr(W) # # row_norms = T.sqrt(sq_W.sum(axis=1)) # col_norms = T.sqrt(sq_W.sum(axis=0)) # # rval = OrderedDict([('row_norms_min', row_norms.min()), # ('row_norms_mean', row_norms.mean()), # ('row_norms_max', row_norms.max()), # ('col_norms_min', col_norms.min()), # ('col_norms_mean', col_norms.mean()), # ('col_norms_max', col_norms.max()), ]) rval = OrderedDict() if (state_below is not None) or (state is not None): if state is None: state = self.fprop(state_below) mx = state.max(axis=1) rval.update(OrderedDict([ ('mean_max_class', mx.mean()), ('max_max_class', mx.max()), ('min_max_class', mx.min())])) if targets is not None: y_hat = self.target_convert(T.argmax(state, axis=1)) #Assume target is in [0,1] as binary one-hot y = self.target_convert(T.argmax(targets, axis=1)) misclass = T.neq(y, y_hat).mean() misclass = T.cast(misclass, config.floatX) rval['misclass'] = misclass rval['nll'] = self.cost(Y_hat=state, Y=targets) return rval def get_monitoring_channels_from_state(self, state, target=None): warnings.warn("Layer.get_monitoring_channels_from_state is " + \ "deprecated. Use get_layer_monitoring_channels " + \ "instead. Layer.get_monitoring_channels_from_state " + \ "will be removed on or after september 24th 2014", stacklevel=2) mx = state.max(axis=1) rval = OrderedDict([ ('mean_max_class' , mx.mean()), ('max_max_class' , mx.max()), ('min_max_class' , mx.min()) ]) if target is not None: y_hat = self.target_convert(T.argmax(state, axis=1)) #Assume target is in [0,1] as binary one-hot y = self.target_convert(T.argmax(target, axis=1)) misclass = T.neq(y, y_hat).mean() misclass = T.cast(misclass, config.floatX) rval['misclass'] = misclass rval['nll'] = self.cost(Y_hat=state, Y=target) return rval def set_input_space(self, space): self.input_space = space if not isinstance(space, Space): raise TypeError("Expected Space, got "+ str(space)+" of type "+str(type(space))) self.input_dim = space.get_total_dimension() self.needs_reformat = not isinstance(space, VectorSpace) desired_dim = self.input_dim self.desired_space = VectorSpace(desired_dim) if not self.needs_reformat: assert self.desired_space == self.input_space rng = self.mlp.rng if self.no_affine: self._params = [] else: if self.irange is not None: assert self.istdev is None assert self.sparse_init is None W = rng.uniform(-self.irange,self.irange, (self.input_dim,self.n_classes)) elif self.istdev is not None: assert self.sparse_init is None W = rng.randn(self.input_dim, self.n_classes) * self.istdev else: assert self.sparse_init is not None W = np.zeros((self.input_dim, self.n_classes)) for i in xrange(self.n_classes): for j in xrange(self.sparse_init): idx = rng.randint(0, self.input_dim) while W[idx, i] != 0.: idx = rng.randint(0, self.input_dim) W[idx, i] = rng.randn() self.W = sharedX(W, 'hingeloss_W' ) self._params = [ self.b, self.W ] def get_weights_topo(self): if not isinstance(self.input_space, Conv2DSpace): raise NotImplementedError() desired = self.W.get_value().T ipt = self.desired_space.np_format_as(desired, self.input_space) rval = Conv2DSpace.convert_numpy(ipt, self.input_space.axes, ('b', 0, 1, 'c')) return rval def get_weights(self): if not isinstance(self.input_space, VectorSpace): raise NotImplementedError() return self.W.get_value() def set_weights(self, weights): self.W.set_value(weights) def set_biases(self, biases): self.b.set_value(biases) def get_biases(self): return self.b.get_value() def get_weights_format(self): return ('v', 'h') def fprop(self, state_below): self.input_space.validate(state_below) if self.needs_reformat: state_below = self.input_space.format_as(state_below, self.desired_space) for value in get_debug_values(state_below): if self.mlp.batch_size is not None and value.shape[0] != self.mlp.batch_size: raise ValueError("state_below should have batch size "+str(self.dbm.batch_size)+" but has "+str(value.shape[0])) self.desired_space.validate(state_below) assert state_below.ndim == 2 if not hasattr(self, 'no_affine'): self.no_affine = False if self.no_affine: rval = state_below else: assert self.W.ndim == 2 b = self.b W = self.W rval = T.dot(state_below, W) + b for value in get_debug_values(rval): if self.mlp.batch_size is not None: assert value.shape[0] == self.mlp.batch_size return rval def target_convert(self, Y): ''' converts target [0,1] to [-1, 1] ''' Y_t = 2. * Y - 1. return Y_t # def hinge_cost(self, W, Y, Y_hat, C=1.): def hinge_cost(self, Y, Y_hat): #prob = .5 * T.dot(self.W.T, self.W) + C * (T.maximum(1 - Y * Y_hat, 0) ** 2.).sum(axis=1) prob = (T.maximum(1 - Y * Y_hat, 0) ** 2.).sum(axis=1) return prob def cost(self, Y, Y_hat): """ Y must be one-hot binary. Y_hat is a hinge loss estimate. of Y. """ assert hasattr(Y_hat, 'owner') owner = Y_hat.owner assert owner is not None op = owner.op if isinstance(op, Print): assert len(owner.inputs) == 1 Y_hat, = owner.inputs owner = Y_hat.owner op = owner.op assert Y_hat.ndim == 2 Y_t = self.target_convert(Y) # prob = self.hinge_cost(self.W, Y_t, Y_hat) prob = self.hinge_cost(Y_t, Y_hat) assert prob.ndim == 1 rval = prob.mean() return rval def cost_matrix(self, Y, Y_hat): """ Y must be one-hot binary. Y_hat is a hinge loss estimate. of Y. """ assert hasattr(Y_hat, 'owner') owner = Y_hat.owner assert owner is not None op = owner.op if isinstance(op, Print): assert len(owner.inputs) == 1 Y_hat, = owner.inputs owner = Y_hat.owner op = owner.op assert Y_hat.ndim == 2 Y_t = self.target_convert(Y) # prob = self.hinge_cost(self.W, Y_t, Y_hat) prob = self.hinge_cost(Y_t, Y_hat) return prob def get_weight_decay(self, coeff): if isinstance(coeff, str): coeff = float(coeff) assert isinstance(coeff, float) or hasattr(coeff, 'dtype') return coeff * T.sqr(self.W).sum() def get_l1_weight_decay(self, coeff): if isinstance(coeff, str): coeff = float(coeff) assert isinstance(coeff, float) or hasattr(coeff, 'dtype') W = self.W return coeff * abs(W).sum() @wraps(Layer._modify_updates) def _modify_updates(self, updates): if self.no_affine: return
class L2SquareHinge(Layer): """ A layer that can apply an affine transformation and use a l2 regularized square hinge loss. Parameters ---------- n_classes : int Number of classes for softmax targets. layer_name : string Name of Softmax layers. irange : float If specified, initialized each weight randomly in U(-irange, irange). istdev : float If specified, initialize each weight randomly from N(0,istdev). sparse_init : int If specified, initial sparse_init number of weights for each unit from N(0,1). W_lr_scale : float Scale for weight learning rate. b_lr_scale : float Scale for bias learning rate. max_row_norm : float Maximum norm for a row of the weight matrix. no_affine : boolean If True, softmax nonlinearity is applied directly to inputs. max_col_norm : float Maximum norm for a column of the weight matrix. init_bias_target_marginals : dataset Take the probability distribution of the targets into account to intelligently initialize biases. binary_target_dim : int, optional If your targets are class labels (i.e. a binary vector) then set the number of targets here so that an IndexSpace of the proper dimension can be used as the target space. This allows the softmax to compute the cost much more quickly than if it needs to convert the targets into a VectorSpace. """ def __init__(self, n_classes, layer_name, C=0.1, irange=None, istdev=None, sparse_init=None, W_lr_scale=None, b_lr_scale=None, max_row_norm=None, no_affine=False, max_col_norm=None, init_bias_target_marginals=None, binary_target_dim=None): super(L2SquareHinge, self).__init__() if isinstance(W_lr_scale, str): W_lr_scale = float(W_lr_scale) self.__dict__.update(locals()) del self.self del self.init_bias_target_marginals assert isinstance(n_classes, py_integer_types) if binary_target_dim is not None: assert isinstance(binary_target_dim, py_integer_types) self._has_binary_target = True self._target_space = IndexSpace(dim=binary_target_dim, max_labels=n_classes) else: self._has_binary_target = False self.output_space = VectorSpace(n_classes) self.b = sharedX(np.zeros((n_classes, )), name='hinge_b') if init_bias_target_marginals: y = init_bias_target_marginals.y if init_bias_target_marginals.y_labels is None: marginals = y.mean(axis=0) else: # compute class frequencies if np.max(y.shape) != np.prod(y.shape): raise AssertionError("Use of " "`init_bias_target_marginals` " "requires that each example has " "a single label.") marginals = np.bincount(y.flat) / float(y.shape[0]) assert marginals.ndim == 1 b = pseudoinverse_softmax_numpy(marginals).astype(self.b.dtype) assert b.ndim == 1 assert b.dtype == self.b.dtype self.b.set_value(b) else: assert init_bias_target_marginals is None @wraps(Layer.get_lr_scalers) def get_lr_scalers(self): rval = OrderedDict() if self.W_lr_scale is not None: assert isinstance(self.W_lr_scale, float) rval[self.W] = self.W_lr_scale if not hasattr(self, 'b_lr_scale'): self.b_lr_scale = None if self.b_lr_scale is not None: assert isinstance(self.b_lr_scale, float) rval[self.b] = self.b_lr_scale return rval @wraps(Layer.get_monitoring_channels) def get_monitoring_channels(self): warnings.warn("Layer.get_monitoring_channels is " + \ "deprecated. Use get_layer_monitoring_channels " + \ "instead. Layer.get_monitoring_channels " + \ "will be removed on or after september 24th 2014", stacklevel=2) W = self.W assert W.ndim == 2 sq_W = T.sqr(W) row_norms = T.sqrt(sq_W.sum(axis=1)) col_norms = T.sqrt(sq_W.sum(axis=0)) return OrderedDict([ ('row_norms_min', row_norms.min()), ('row_norms_mean', row_norms.mean()), ('row_norms_max', row_norms.max()), ('col_norms_min', col_norms.min()), ('col_norms_mean', col_norms.mean()), ('col_norms_max', col_norms.max()), ]) @wraps(Layer.get_monitoring_channels_from_state) def get_monitoring_channels_from_state(self, state, target=None): warnings.warn("Layer.get_monitoring_channels_from_state is " + \ "deprecated. Use get_layer_monitoring_channels " + \ "instead. Layer.get_monitoring_channels_from_state " + \ "will be removed on or after september 24th 2014", stacklevel=2) # channels that does not require state information W = self.W assert W.ndim == 2 sq_W = T.sqr(W) row_norms = T.sqrt(sq_W.sum(axis=1)) col_norms = T.sqrt(sq_W.sum(axis=0)) rval = OrderedDict([ ('row_norms_min', row_norms.min()), ('row_norms_mean', row_norms.mean()), ('row_norms_max', row_norms.max()), ('col_norms_min', col_norms.min()), ('col_norms_mean', col_norms.mean()), ('col_norms_max', col_norms.max()), ]) mx = state.max(axis=1) rval.update( OrderedDict([('mean_max_class', mx.mean()), ('max_max_class', mx.max()), ('min_max_class', mx.min())])) if target is not None: y_hat = T.argmax(state, axis=1) y = T.argmax(target, axis=1) misclass = T.neq(y, y_hat).mean() misclass = T.cast(misclass, config.floatX) rval['misclass'] = misclass rval['nll'] = self.cost(Y_hat=state, Y=target) return rval @wraps(Layer.get_layer_monitoring_channels) def get_layer_monitoring_channels(self, state_below=None, state=None, targets=None): # channels that does not require state information W = self.W assert W.ndim == 2 sq_W = T.sqr(W) row_norms = T.sqrt(sq_W.sum(axis=1)) col_norms = T.sqrt(sq_W.sum(axis=0)) rval = OrderedDict([ ('row_norms_min', row_norms.min()), ('row_norms_mean', row_norms.mean()), ('row_norms_max', row_norms.max()), ('col_norms_min', col_norms.min()), ('col_norms_mean', col_norms.mean()), ('col_norms_max', col_norms.max()), ]) if (state_below is not None) or (state is not None): if state is None: state = self.fprop(state_below) mx = state.max(axis=1) rval.update( OrderedDict([('mean_max_class', mx.mean()), ('max_max_class', mx.max()), ('min_max_class', mx.min())])) if targets is not None: y_hat = T.argmax(state, axis=1) y = T.argmax(targets, axis=1) misclass = T.neq(y, y_hat).mean() misclass = T.cast(misclass, config.floatX) rval['misclass'] = misclass rval['nll'] = self.cost(Y_hat=state, Y=targets) return rval @wraps(Layer.set_input_space) def set_input_space(self, space): self.input_space = space if not isinstance(space, Space): raise TypeError("Expected Space, got " + str(space) + " of type " + str(type(space))) self.input_dim = space.get_total_dimension() self.needs_reformat = not isinstance(space, VectorSpace) desired_dim = self.input_dim self.desired_space = VectorSpace(desired_dim) if not self.needs_reformat: assert self.desired_space == self.input_space rng = self.mlp.rng if self.no_affine: self._params = [] else: print(self.input_dim, self.n_classes) if self.irange is not None: assert self.istdev is None assert self.sparse_init is None W = rng.uniform(-self.irange, self.irange, (self.input_dim, self.n_classes)) elif self.istdev is not None: assert self.sparse_init is None W = rng.randn(self.input_dim, self.n_classes) * self.istdev else: assert self.sparse_init is not None W = np.zeros((self.input_dim, self.n_classes)) for i in xrange(self.n_classes): for j in xrange(self.sparse_init): idx = rng.randint(0, self.input_dim) while W[idx, i] != 0.: idx = rng.randint(0, self.input_dim) W[idx, i] = rng.randn() self.W = sharedX(W, 'hinge_W') self._params = [self.b, self.W] @wraps(Layer.get_weights_topo) def get_weights_topo(self): if not isinstance(self.input_space, Conv2DSpace): raise NotImplementedError() desired = self.W.get_value().T ipt = self.desired_space.np_format_as(desired, self.input_space) rval = Conv2DSpace.convert_numpy(ipt, self.input_space.axes, ('b', 0, 1, 'c')) return rval @wraps(Layer.get_weights) def get_weights(self): if not isinstance(self.input_space, VectorSpace): raise NotImplementedError() return self.W.get_value() @wraps(Layer.set_weights) def set_weights(self, weights): self.W.set_value(weights) @wraps(Layer.set_biases) def set_biases(self, biases): self.b.set_value(biases) @wraps(Layer.get_biases) def get_biases(self): return self.b.get_value() @wraps(Layer.get_weights_format) def get_weights_format(self): return ('v', 'h') @wraps(Layer.fprop) def fprop(self, state_below): ## Precondition self.input_space.validate(state_below) if self.needs_reformat: state_below = self.input_space.format_as(state_below, self.desired_space) self.desired_space.validate(state_below) assert state_below.ndim == 2 assert self.W.ndim == 2 ## Linear prediction rval = T.dot(state_below, self.W) + self.b return rval def hinge_cost(self, Y, Y_hat): ### print size of Y_hat #Y = Print(message="Y")(Y) #Y_hat = Print(message="Y_hat")(Y_hat) prob = (self.C * self.W.norm(2) + (T.maximum(0, 1 - (Y - Y_hat))**2.)).sum(axis=1) #.W = Print(message="W")(self.W) #prob = (T.maximum(1 - Y * Y_hat, 0) ** 2.).sum(axis=0) #prob = Print(message="prob")(prob) return prob @wraps(Layer.cost) def cost(self, Y, Y_hat): return self.hinge_cost(Y, Y_hat).mean() # @wraps(Layer.cost_matrix) # def cost_matrix(self, Y, Y_hat): # # cost = self._cost(Y, Y_hat) # # if self._has_binary_target: # # flat_Y = Y.flatten() # # flat_matrix = T.alloc(0, (Y.shape[0]*cost.shape[1])) # # flat_indices = flat_Y + T.extra_ops.repeat( # # T.arange(Y.shape[0])*cost.shape[1], Y.shape[1] # # ) # # cost = T.set_subtensor(flat_matrix[flat_indices], flat_Y) # # return cost # return None @wraps(Layer.get_weight_decay) def get_weight_decay(self, coeff): if isinstance(coeff, str): coeff = float(coeff) assert isinstance(coeff, float) or hasattr(coeff, 'dtype') return coeff * T.sqr(self.W).sum() @wraps(Layer.get_l1_weight_decay) def get_l1_weight_decay(self, coeff): if isinstance(coeff, str): coeff = float(coeff) assert isinstance(coeff, float) or hasattr(coeff, 'dtype') W = self.W return coeff * abs(W).sum() @wraps(Layer._modify_updates) def _modify_updates(self, updates): if self.no_affine: return if self.max_row_norm is not None: W = self.W if W in updates: updated_W = updates[W] row_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=1)) desired_norms = T.clip(row_norms, 0, self.max_row_norm) scales = desired_norms / (1e-7 + row_norms) updates[W] = updated_W * scales.dimshuffle(0, 'x') if self.max_col_norm is not None: assert self.max_row_norm is None W = self.W if W in updates: updated_W = updates[W] col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0)) desired_norms = T.clip(col_norms, 0, self.max_col_norm) updates[W] = updated_W * (desired_norms / (1e-7 + col_norms))