def constructLayer(self, inputShape, initParams, name, act_func=None, alpha=0.1, lr_scheduler=None, **layerSpecs): self.layerName = name self.actFunc = act_func self.alpha = alpha self.params.setLearningRateScheduler(lr_scheduler) self.norm_axis = (0,) + tuple(range(2, len(inputShape))) x_avg_values = None beta_values = None param_shape = list(inputShape) for a in self.norm_axis: param_shape[a] = 1 param_shape = tuple(param_shape) if initParams is not None: if ('x_avg' in initParams) and (initParams['x_avg'] is not None): x_avg_values = initParams['x_avg'] else: warnings.warn("initParams provided but did not provide actual initialization" +" value for x_avg, will use constant initialization for x_avg") if x_avg_values is None: x_avg_values = numpy.zeros(inputShape[1], dtype='float32').reshape(param_shape) x_avg_expr = theano.shared(x_avg_values, name='x_avg') if initParams is not None: if ('beta' in initParams) and (initParams['beta'] is not None): beta_values = initParams['beta'] else: warnings.warn("initParams provided but did not provide actual initialization" +" value for beta, will use constant initialization for beta") if beta_values is None: beta_values = numpy.ones(inputShape[1], dtype='float32').reshape(param_shape) beta_expr = theano.shared(beta_values, name='beta') self.inputShape = inputShape self.outputShape = self.inputShape # x_avg and x_std should never be tuned through grad descent if 'tune' not in layerSpecs: layerSpecs['tune'] = {'beta':True, 'x_avg':False} else: layerSpecs['tune']['x_avg'] = False tune, reg, constraint, lr, mu = setupDefaultLayerOptions(['beta', 'x_avg'], layerSpecs) self.params.addParameters(params = {'beta':beta_expr, 'x_avg':x_avg_expr}, tune = tune, regularizer=reg, constraint=constraint, learning_rate=lr, momentum=mu)
def constructLayer(self, inputShape, initParams, name, w_init, hiddens, b_init=0, act_func=None, lr_scheduler=None, weights_outside=None, ignore_bias=False, **layerSpecs): self.layerName = name self.wInit = w_init self.bInit = b_init self.actFunc = act_func self.ignore_bias = ignore_bias self.params.setLearningRateScheduler(lr_scheduler) self.weights_outside = weights_outside nHiddenSize = hiddens if len(inputShape) > 2: self.inputShape = (inputShape[0], numpy.prod(inputShape[1:])) else: self.inputShape = inputShape self.outputShape = (inputShape[0], nHiddenSize) W_values = None b_values = None if initParams is not None: if ('W' in initParams) and (initParams['W'] is not None): W_values = initParams['W'] assert len(W_values.shape) == 2, \ "Initialize W dimension does not match, expected to be a" \ +"2 dimenional matrix got %d dimensions" % len(W_values.shape) assert W_values.shape == (self.inputShape[-1], self.outputShape[-1]), \ ("Initialize W shape is incorrect, expected: (%d, %d), got: (%d, %d)" \ % (self.inputShape[-1], self.outputShape[-1], W_values.shape[0], W_values.shape[1])) else: warnings.warn("initParams provided but did not provide actual initialization" +" value for W, will use random initialization for W") if initParams is not None: if ('b' in initParams) and (initParams['b'] is not None): b_values = initParams['b'] assert len(b_values.shape) == 1, \ "Initialize W dimension does not match, expected to be a" \ +"vector got %d dimensions" % len(b_values.shape) assert b_values.shape == (self.outputShape[-1],), \ ("Initialize b shape is incorrect, expected: %d, got: %d" \ % (self.outputShape[-1], b_values.shape[0])) else: warnings.warn("initParams provided but did not provide actual initialization" +" value for b, will use constant initialization for b") if weights_outside is not None: W_expr = weights_outside[0] else: if W_values is None: W_values = self.wInit.init(self.inputShape[-1], self.outputShape[-1]) W_expr = theano.shared(name='W', value=W_values, borrow=True) if b_values is None: b_values = self.bInit*numpy.ones((self.outputShape[-1],), dtype='float32') b_expr = theano.shared(name='b', value=b_values, borrow=True) if self.ignore_bias: if 'tune' in layerSpecs: layerSpecs['tune']['b'] = False else: layerSpecs['tune'] = {'W':True, 'b':False} tune, reg, constraint, lr, mu = setupDefaultLayerOptions(['W','b'], layerSpecs) self.params.addParameters(params = {'W':W_expr, 'b':b_expr}, tune = tune, regularizer=reg, constraint=constraint, learning_rate=lr, momentum=mu)
def constructLayer(self, inputShape, initParams, name, batch_size, w_init, channels, filter_size, strid_size=1, pad=0, b_init=0, act_func=None, lr_scheduler=None, algo='small', weight_outside=None, **layerSpecs): self.layerName = name self.batchSize = batch_size self.strideSize = strid_size self.nPad = pad self.wInit = w_init self.bInit = b_init self.actFunc = act_func self.algo = algo self.weight_outside = weight_outside self.params.setLearningRateScheduler(lr_scheduler) nFilters = channels filterSize = filter_size # this is the inverse of conv so instead of having (out, in, r, c) # we have here (in, out, r, c) so that when we do the grad it does the # correct thing self.filterShape = (inputShape[1], nFilters, filterSize, filterSize) self.inputShape = inputShape # calculate output size self.outputShape = (self.batchSize, nFilters, int((inputShape[2]-0.5)*self.strideSize + filterSize - 2*self.nPad), int((inputShape[3]-0.5)*self.strideSize + filterSize - 2*self.nPad)) W_values = None b_values = None if initParams is not None: if ('W' in initParams) and (initParams['W'] is not None): W_values = initParams['W'] assert len(W_values.shape) == 4, \ "Initialize W dimension does not match, expected to be a" \ +"4 dimenional tensor got %d dimensions" % len(W_values.shape) assert W_values.shape == self.filterShape, \ ("Initialize W shape is incorrect, expected: (%d, %d, %d, %d), got: (%d, %d, %d, %d)"\ % (self.filterShape[0], self.filterShape[1], self.filterShape[2], self.filterShape[3],\ W_values.shape[0], W_values.shape[1], W_values.shape[2], W_values.shape[3])) else: warnings.warn("initParams provided but did not provide actual initialization" +" value for W, will use random initialization for W") if initParams is not None: if ('b' in initParams) and (initParams['b'] is not None): b_values = initParams['b'] assert len(b_values.shape) == 1, \ "Initialize W dimension does not match, expected to be a" \ +"vector got %d dimensions" % len(b_values.shape) assert b_values.shape == (self.filterShape[1],), \ ("Initialize b shape is incorrect, expected: %d, got: %d" \ % (self.filterShape[1], b_values.shape[0])) else: warnings.warn("initParams provided but did not provide actual initialization" +" value for b, will use random initialization for b") if weight_outside is not None: W_expr = weight_outside[0] else: if W_values is None: W_values = self.wInit.init(numpy.prod(self.filterShape[1:]), self.filterShape[0], numpy.prod(self.filterShape[1:]), numpy.prod(self.filterShape)/self.filterShape[1]) W_expr = theano.shared(name='W', value=W_values.reshape(self.filterShape), borrow=True) if b_values is None: b_values = self.bInit*numpy.ones((self.filterShape[1],), dtype='float32') b_expr = theano.shared(name='b', value=b_values, borrow=True) tune, reg, constraint, lr, mu = setupDefaultLayerOptions(['W','b'], layerSpecs) self.params.addParameters(params = {'W':W_expr, 'b':b_expr}, tune = tune, regularizer=reg, constraint=constraint, learning_rate=lr, momentum=mu)
def constructLayer(self, inputShape, initParams, name, batch_size, w_init, channels, filter_size, strid_size=1, pad=0, b_init=0, act_func=None, lr_scheduler=None, post_act_normalize=True, W_expr=None, **layerSpecs): self.layerName = name self.batchSize = batch_size self.strideSize = strid_size self.nPad = pad self.wInit = w_init self.bInit = b_init self.actFunc = act_func self.post_act_normalize = post_act_normalize self.params.setLearningRateScheduler(lr_scheduler) nFilters = channels filterSize = filter_size self.filterShape = (nFilters, inputShape[1], filterSize, filterSize) self.inputShape = inputShape # calculate output size self.outputShape = (self.batchSize, nFilters, int((inputShape[2] - filterSize + self.nPad*2)//self.strideSize + 1), int((inputShape[3] - filterSize + self.nPad*2)//self.strideSize + 1)) W_values = None b_values = None gamma_values = None if initParams is not None: if ('W' in initParams) and (initParams['W'] is not None): W_values = initParams['W'] assert len(W_values.shape) == 4, \ "Initialize W dimension does not match, expected to be a" \ +"4 dimenional tensor got %d dimensions" % len(W_values.shape) assert W_values.shape == self.filterShape, \ ("Initialize W shape is incorrect, expected: (%d, %d, %d, %d), got: (%d, %d, %d, %d)"\ % (self.filterShape[0], self.filterShape[1], self.filterShape[2], self.filterShape[3],\ W_values.shape[0], W_values.shape[1], W_values.shape[2], W_values.shape[3])) else: warnings.warn("initParams provided but did not provide actual initialization" +" value for W, will use random initialization for W") if initParams is not None: if ('b' in initParams) and (initParams['b'] is not None): b_values = initParams['b'] assert len(b_values.shape) == 1, \ "Initialize W dimension does not match, expected to be a" \ +"vector got %d dimensions" % len(b_values.shape) assert b_values.shape == (self.filterShape[0],), \ ("Initialize b shape is incorrect, expected: %d, got: %d" \ % (self.filterShape[0], b_values.shape[0])) else: warnings.warn("initParams provided but did not provide actual initialization" +" value for b, will use random initialization for b") if initParams is not None: if ('gamma' in initParams) and (initParams['gamma'] is not None): gamma_values = initParams['gamma'] assert len(gamma_values.shape) == 1, \ "Initialize gamma dimension does not match, expected to be a" \ +"vector got %d dimensions" % len(gamma_values.shape) assert gamma_values.shape == (self.filterShape[0],), \ ("Initialize gamma shape is incorrect, expected: %d, got: %d" \ % (self.filterShape[0], gamma_values.shape[0])) else: warnings.warn("initParams provided but did not provide actual initialization" +" value for gamma, will use constant initialization for gamma") if gamma_values is None: gamma_values = numpy.ones((self.filterShape[0],), dtype='float32') gamma_expr = theano.shared(gamma_values, name='gamma') if W_expr is not None: W_expr = W_expr else: if W_values is None: W_values = self.wInit.init(numpy.prod(self.filterShape[1:]), self.filterShape[0], numpy.prod(self.filterShape[1:]), numpy.prod(self.filterShape)/self.filterShape[1]) W_expr = theano.shared(name='W', value=W_values.reshape(self.filterShape), borrow=True) if b_values is None: b_values = self.bInit*numpy.ones((self.filterShape[0],), dtype='float32') b_expr = theano.shared(name='b', value=b_values, borrow=True) # beta_expr = theano.shared(name='beta', value=numpy.ones(self.filterShape[0], dtype='float32')) tune, reg, constraint, lr, mu = setupDefaultLayerOptions(['W','b', 'gamma'], layerSpecs) self.params.addParameters(params = {'W':W_expr, 'b':b_expr, 'gamma':gamma_expr}, tune = tune, regularizer=reg, constraint=constraint, learning_rate=lr, momentum=mu)
def constructLayer(self, inputShape, initParams, name, w_init, hiddens, b_hid_init=0, b_vis_init=0, fact_func=None, bact_func=None, lr_scheduler=None, tie_weights=False, **layerSpecs): self.layerName = name self.wInit = w_init self.bHidInit = b_hid_init self.bVisInit = b_vis_init self.forwardActFunc = fact_func self.backwardActFunc = bact_func self.params.setLearningRateScheduler(lr_scheduler) self.inputShape = inputShape self.tieWeights = tie_weights nHiddenSize = hiddens if len(inputShape) > 2: inputShape = (inputShape[0], numpy.prod(inputShape[1:])) self.outputShape = (inputShape[0], nHiddenSize) W_values = None W_prime_values = None b_values = None b_prime_values = None if initParams is not None: if ('W' in initParams) and (initParams['W'] is not None): W_values = initParams['W'] assert len(W_values.shape) == 2, \ "Initialize W dimension does not match, expected to be a" \ +"2 dimenional matrix got %d dimensions" % len(W_values.shape) assert W_values.shape == (inputShape[-1], self.outputShape[-1]), \ ("Initialize W shape is incorrect, expected: (%d, %d), got: (%d, %d)" \ % (inputShape[-1], self.outputShape[-1], W_values.shape[0], W_values.shape[1])) else: warnings.warn("initParams provided but did not provide actual initialization" +" value for W, will use random initialization for W") if initParams is not None: if not self.tieWeights: if ('W_prime' in initParams) and (initParams['W_prime'] is not None): W_prime_values = initParams['W_prime'] assert len(W_prime_values.shape) == 2, \ "Initialize W_prime dimension does not match, expected to be a" \ +"2 dimenional matrix got %d dimensions" % len(W_prime_values.shape) assert W_prime_values.shape == (self.outputShape[-1], inputShape[-1]), \ ("Initialize W_prime shape is incorrect, expected: (%d, %d), got: (%d, %d)" \ % (self.outputShape[-1], inputShape[-1], W_prime_values.shape[0], W_prime_values.shape[1])) else: warnings.warn("initParams provided but did not provide actual initialization" +" value for W, will use random initialization for W") if initParams is not None: if ('b' in initParams) and (initParams['b'] is not None): b_values = initParams['b'] assert len(b_values.shape) == 1, \ "Initialize b dimension does not match, expected to be a" \ +"vector got %d dimensions" % len(b_values.shape) assert b_values.shape == (self.outputShape[-1],), \ ("Initialize b shape is incorrect, expected: %d, got: %d" \ % (self.outputShape[-1], b_values.shape[0])) else: warnings.warn("initParams provided but did not provide actual initialization" +" value for b, will use constant initialization for b") if initParams is not None: if ('b_prime' in initParams) and (initParams['b_prime'] is not None): b_prime_values = initParams['b_prime'] assert len(b_values.shape) == 1, \ "Initialize b_prime dimension does not match, expected to be a" \ +"vector got %d dimensions" % len(b_prime_values.shape) assert b_prime_values.shape == (inputShape[-1],), \ ("Initialize b_prime shape is incorrect, expected: %d, got: %d" \ % (inputShape[-1], b_prime_values.shape[0])) else: warnings.warn("initParams provided but did not provide actual initialization" +" value for b_prime, will use constant initialization for b_prime") if W_values is None: W_values = self.wInit.init(inputShape[-1], self.outputShape[-1]) W_expr = theano.shared(name='W', value=W_values, borrow=True) if self.tieWeights: W_prime_expr = W_expr.T else: if W_prime_values is None: W_prime_values = self.wInit.init(self.outputShape[-1], inputShape[-1]) W_prime_expr = theano.shared(name='W_prime', value=W_prime_values, borrow=True) if b_values is None: b_values = self.bHidInit*numpy.ones((self.outputShape[-1],), dtype='float32') b_expr = theano.shared(name='b', value=b_values, borrow=True) if b_prime_values is None: b_prime_values = self.bVisInit*numpy.ones((inputShape[-1],), dtype='float32') b_prime_expr = theano.shared(name='b_prime', value=b_prime_values, borrow=True) tune, reg, constraint, lr, mu = setupDefaultLayerOptions(['W','W_prime','b', 'b_prime'], layerSpecs) # no need to tune W_prime if it is tied with W if self.tieWeights: params = {'W':W_expr, 'b':b_expr, 'b_prime':b_prime_expr} else: params = {'W':W_expr, 'W_prime':W_prime_expr, 'b':b_expr, 'b_prime':b_prime_expr} self.params.addParameters(params = params, tune = tune, regularizer=reg, constraint=constraint, learning_rate=lr, momentum=mu)