def log_softmax(z): assert z.ndim >= 1 if z.ndim <= 2: return T.nnet.logsoftmax(z) else: from returnn.theano.util import time_batch_make_flat z_flat = time_batch_make_flat(z) assert z_flat.ndim == 2 return T.reshape(T.nnet.logsoftmax(z_flat), z.shape)
def add_layer(self, layer): """ :type layer: NetworkHiddenLayer.Layer :rtype NetworkHiddenLayer.Layer """ assert layer.name layer_errors = layer.errors() if isinstance(layer, OutputLayer) or layer.name == "output" or layer_errors is not None: is_output_layer = True self.output[layer.name] = layer else: is_output_layer = False self.hidden[layer.name] = layer if layer_errors is not None: self.errors[layer.name] = layer_errors if is_output_layer: if getattr(layer, "p_y_given_x", None) is None and layer.output: # Small little hack for layers which we use as output-layers whicgh don't set this. from returnn.theano.util import time_batch_make_flat layer.p_y_given_x = layer.output layer.p_y_given_x_flat = time_batch_make_flat(layer.output) self.declare_train_params() return layer
def __init__(self, sources, n_out, index, y_in=None, target=None, target_index=None, sparse=False, cost_scale=1.0, input_scale=1.0, L1=0.0, L2=0.0, L2_eye=None, varreg=0.0, output_L2_reg=0.0, output_entropy_reg=0.0, output_entropy_exp_reg=0.0, with_bias=True, mask="unity", dropout=0.0, batch_drop=False, batch_norm=False, bn_use_sample=False, layer_drop=0.0, residual=False, carry=False, sparse_filtering=False, gradient_scale=1.0, trainable=True, device=None, dtype='float32', **kwargs): """ :param list[NetworkBaseLayer.Layer] sources: list of source layers :param int n_out: output dim of W_in and dim of bias :param float L1: l1-param-norm regularization :param float L2: l2-param-norm regularization :param str mask: "unity" or "dropout" :type dropout: float """ super(Layer, self).__init__(**kwargs) self.index = index self.sources = sources; ":type: list[Layer]" self.num_sources = len(sources) self.D = max([s.D for s in sources if isinstance(s,Layer)] + [0]) if mask is None: mask = 'none' self.set_attr('mask', mask) self.set_attr('dropout', dropout) self.set_attr('sparse', sparse) self.set_attr('bn_use_sample', bn_use_sample) self.set_attr('sparse_filtering', sparse_filtering) if not trainable: self.set_attr('trainable', trainable) # only store if not default self.gradient_scale = 0.0 # just to be sure else: self.gradient_scale = gradient_scale if gradient_scale != 1.0: self.set_attr('gradient_scale', gradient_scale) self.set_attr('layer_drop', layer_drop) assert not carry, "not supported anymore" self.set_attr('residual', residual) self.set_attr('n_out', n_out) self.set_attr('L1', L1) self.set_attr('L2', L2) if L2_eye: self.set_attr('L2_eye', L2_eye) self.device = device # if device else str(theano.config.device) for s in self.sources: s.transfer_output(self.device) self.set_attr('varreg', varreg) if output_L2_reg: self.set_attr('output_L2_reg', output_L2_reg) if output_entropy_reg: self.set_attr('output_entropy_reg', output_entropy_reg) if output_entropy_exp_reg: self.set_attr('output_entropy_exp_reg', output_entropy_exp_reg) self.set_attr('batch_norm', batch_norm) self.set_attr('input_scale', input_scale) if y_in is not None: self.y_in = {} for k in y_in: if not isinstance(y_in[k], T.Variable): continue self.y_in[k] = time_batch_make_flat(y_in[k]) # TODO: better not flatten here... self.y_in[k].n_out = getattr(y_in[k], "n_out", None) else: self.y_in = None self.constraints = T.constant(0) if target: self.set_attr('target', target) if target_index: self.set_attr('target_index', target_index) assert target_index in self.network.j self.index = index = self.network.j[target_index] if cost_scale != 1: self.set_attr("cost_scale", cost_scale) if with_bias: self.b = self.add_param(self.create_bias(n_out), 'b_%s'%self.name) else: self.set_attr('with_bias', False) self.b = numpy.float32(0) self.mass = T.constant(1., name = "mass_%s" % self.name, dtype='float32') self.masks = [None] * len(self.sources) assert mask in ['dropout', 'unity', 'none'], "invalid mask: %s" % mask if mask == "dropout" or (mask == 'none' and dropout > 0): assert 0.0 < dropout < 1.0 # If we apply this mass during training then we don't need any mask or mass for testing. # The expected weight should be 1 in # E[x] = mass * (1-dropout) # so mass has to be 1 / (1 - dropout). self.mass = T.constant(1.0 / (1.0 - dropout), dtype='float32') from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams srng = RandomStreams(self.rng.randint(1234) + 1) if self.depth > 1: self.masks = [T.cast(srng.binomial(n=1, p=1 - dropout, size=(s.attrs['n_out'],self.depth)), theano.config.floatX) for s in self.sources] else: if batch_drop: self.masks = [T.cast(srng.binomial(n=1, p=1 - dropout, size=s.output.shape), theano.config.floatX) for s in self.sources] else: self.masks = [T.cast(srng.binomial(n=1, p=1 - dropout, size=(s.attrs['n_out'],)), theano.config.floatX) for s in self.sources]