def _allocate(self): self.W_state = shared_floatx_nans((self.dim, 4*self.dim), name='W_state') self.W_cell_to_in = shared_floatx_nans((self.dim,), name='W_cell_to_in') self.W_cell_to_forget = shared_floatx_nans((self.dim,), name='W_cell_to_forget') self.W_cell_to_out = shared_floatx_nans((self.dim,), name='W_cell_to_out') # The underscore is required to prevent collision with # the `initial_state` application method self.initial_state_ = shared_floatx_zeros((self.dim,), name="initial_state") self.initial_cells = shared_floatx_zeros((self.dim,), name="initial_cells") add_role(self.W_state, WEIGHT) add_role(self.W_cell_to_in, WEIGHT) add_role(self.W_cell_to_forget, WEIGHT) add_role(self.W_cell_to_out, WEIGHT) add_role(self.initial_state_, INITIAL_STATE) add_role(self.initial_cells, INITIAL_STATE) # gamma gamma_val = 0.1 * numpy.ones((self.dim), dtype=config.floatX) self.gamma = shared(name='gamma', value=gamma_val) add_role(self.gamma, PARAMETER) # beta beta_val = numpy.zeros((self.dim), dtype=config.floatX) self.beta = shared(name='beta', value=beta_val) add_role(self.beta, PARAMETER) self.parameters = [ self.W_state, self.W_cell_to_in, self.W_cell_to_forget, self.W_cell_to_out, self.initial_state_, self.initial_cells, self.gamma, self.beta]
def __init__(self, input_dim, n_hidden, n_classes): print floatX dim_list = [input_dim] + n_hidden self.W = [] self.b = [] for i in xrange(len(dim_list) - 1): in_dim = dim_list[i] out_dim = dim_list[i + 1] W = shared_floatx_zeros((in_dim, out_dim)) W.set_value( .01 * (numpy.random.uniform( size=W.get_value().shape ).astype(floatX) - 0.5 ) ) W.name = 'W_' + str(i) b = shared_floatx_zeros((out_dim, )) b.name = 'b_' + str(i) self.W.append(W) self.b.append(b) W = shared_floatx_zeros((n_hidden[-1], n_classes)) W.set_value( .01 * (numpy.random.uniform( size=W.get_value().shape ).astype(floatX) - 0.5 ) ) b = shared_floatx_zeros((n_classes,)) W.name = 'W_out' b.name = 'b_out' self.W.append(W) self.b.append(b)
def _allocate(self): self.W_patch = shared_floatx_nans((np.prod(self.patch_shape) + 4, 4 * self.dim), name="W_patch") self.b = shared_floatx_nans((4 * self.dim,), name="b") self.W_state = shared_floatx_nans((self.dim, 4 * self.dim), name="W_state") # The underscore is required to prevent collision with # the `initial_state` application method self.initial_state_ = shared_floatx_zeros((self.dim,), name="initial_state") self.initial_cells = shared_floatx_zeros((self.dim,), name="initial_cells") self.initial_location = shared_floatx_zeros((2,), name="initial_location") self.initial_scale = shared_floatx_zeros((2,), name="initial_scale") add_role(self.W_state, WEIGHT) add_role(self.b, BIAS) add_role(self.W_patch, WEIGHT) add_role(self.initial_state_, INITIAL_STATE) add_role(self.initial_cells, INITIAL_STATE) add_role(self.initial_location, INITIAL_STATE) add_role(self.initial_scale, INITIAL_STATE) self.parameters = [ self.W_state, self.W_patch, self.b, self.initial_state_, self.initial_cells, self.initial_location, self.initial_scale, ]
def _allocate(self): self.params.append(shared_floatx_zeros((self.input_dim, self.output_dim), name="W")) if self.use_bias: self.params.append(shared_floatx_zeros((self.output_dim,), name="b"))
def __init__(self, input_dim, n_hidden, n_classes): self.input_dim = input_dim self.n_hidden = n_hidden self.n_classes = n_classes self.neural_arch = [input_dim] + n_hidden + [n_classes] self.W = [] self.b = [] # Creating weights for each layer for n in xrange(len(self.neural_arch) - 1): n_in = self.neural_arch[n] n_out = self.neural_arch[n+1] W = shared_floatx_zeros((n_in, n_out)) randomWeightInitialize(W) W.name = 'W_' + str(n) b = shared_floatx_zeros(n_out) b.name = 'b_' + str(n) self.W.append(W) self.b.append(b) self.params = self.W + self.b
def _allocate(self): self.W_state = shared_floatx_nans((self.dim, 4*self.dim), name='W_state') self.W_cell_to_in = shared_floatx_nans((self.dim,), name='W_cell_to_in') self.W_cell_to_forget = shared_floatx_nans((self.dim,), name='W_cell_to_forget') self.W_cell_to_out = shared_floatx_nans((self.dim,), name='W_cell_to_out') # The underscore is required to prevent collision with # the `initial_state` application method self.initial_state_ = shared_floatx_zeros((self.dim,), name="initial_state") self.initial_cells = shared_floatx_zeros((self.dim,), name="initial_cells") add_role(self.W_state, WEIGHT) add_role(self.W_cell_to_in, WEIGHT) add_role(self.W_cell_to_forget, WEIGHT) add_role(self.W_cell_to_out, WEIGHT) add_role(self.initial_state_, INITIAL_STATE) add_role(self.initial_cells, INITIAL_STATE) self.parameters = [ self.W_state, self.W_cell_to_in, self.W_cell_to_forget, self.W_cell_to_out, self.initial_state_, self.initial_cells]
def __init__(self, config, **kwargs): super(Model, self).__init__(**kwargs) self.config = config self.pre_context_embedder = ContextEmbedder( config.pre_embedder, name='pre_context_embedder') self.post_context_embedder = ContextEmbedder( config.post_embedder, name='post_context_embedder') in1 = 2 + sum(x[2] for x in config.pre_embedder.dim_embeddings) self.input_to_rec = MLP(activations=[Tanh()], dims=[in1, config.hidden_state_dim], name='input_to_rec') self.rec = LSTM(dim=config.hidden_state_dim, name='recurrent') in2 = config.hidden_state_dim + sum( x[2] for x in config.post_embedder.dim_embeddings) self.rec_to_output = MLP(activations=[Tanh()], dims=[in2, 2], name='rec_to_output') self.sequences = ['latitude', 'latitude_mask', 'longitude'] self.context = self.pre_context_embedder.inputs + self.post_context_embedder.inputs self.inputs = self.sequences + self.context self.children = [ self.pre_context_embedder, self.post_context_embedder, self.input_to_rec, self.rec, self.rec_to_output ] self.initial_state_ = shared_floatx_zeros((config.hidden_state_dim, ), name="initial_state") self.initial_cells = shared_floatx_zeros((config.hidden_state_dim, ), name="initial_cells")
def __init__(self, config, **kwargs): super(Model, self).__init__(**kwargs) self.config = config self.pre_context_embedder = ContextEmbedder(config.pre_embedder, name='pre_context_embedder') self.post_context_embedder = ContextEmbedder(config.post_embedder, name='post_context_embedder') in1 = 2 + sum(x[2] for x in config.pre_embedder.dim_embeddings) self.input_to_rec = MLP(activations=[Tanh()], dims=[in1, config.hidden_state_dim], name='input_to_rec') self.rec = LSTM( dim = config.hidden_state_dim, name = 'recurrent' ) in2 = config.hidden_state_dim + sum(x[2] for x in config.post_embedder.dim_embeddings) self.rec_to_output = MLP(activations=[Tanh()], dims=[in2, 2], name='rec_to_output') self.sequences = ['latitude', 'latitude_mask', 'longitude'] self.context = self.pre_context_embedder.inputs + self.post_context_embedder.inputs self.inputs = self.sequences + self.context self.children = [ self.pre_context_embedder, self.post_context_embedder, self.input_to_rec, self.rec, self.rec_to_output ] self.initial_state_ = shared_floatx_zeros((config.hidden_state_dim,), name="initial_state") self.initial_cells = shared_floatx_zeros((config.hidden_state_dim,), name="initial_cells")
def _allocate(self): self.W_patch = shared_floatx_nans( (np.prod(self.patch_shape), 4 * self.dim), name='W_input') self.W_state = shared_floatx_nans((self.dim, 4 * self.dim), name='W_state') self.W_cell_to_in = shared_floatx_nans((self.dim, ), name='W_cell_to_in') self.W_cell_to_forget = shared_floatx_nans((self.dim, ), name='W_cell_to_forget') self.W_cell_to_out = shared_floatx_nans((self.dim, ), name='W_cell_to_out') # The underscore is required to prevent collision with # the `initial_state` application method self.initial_state_ = shared_floatx_zeros((self.dim, ), name="initial_state") self.initial_cells = shared_floatx_zeros((self.dim, ), name="initial_cells") add_role(self.W_state, WEIGHT) add_role(self.W_patch, WEIGHT) add_role(self.W_cell_to_in, WEIGHT) add_role(self.W_cell_to_forget, WEIGHT) add_role(self.W_cell_to_out, WEIGHT) add_role(self.initial_state_, INITIAL_STATE) add_role(self.initial_cells, INITIAL_STATE) self.parameters = [ self.W_state, self.W_cell_to_in, self.W_cell_to_forget, self.W_patch, self.W_cell_to_out, self.initial_state_, self.initial_cells ]
def __init__(self, input_dim): # WRITEME self.input_dim = input_dim self.params = [shared_floatx_zeros((input_dim, 1)), shared_floatx_zeros((1,))] self.w = self.params[0] self.b = self.params[1]
def __init__(self, input_dim, n_classes): self.input_dim = input_dim self.params = [ shared_floatx_zeros((input_dim, n_classes)), shared_floatx_zeros((n_classes, )) ] self.W = self.params[0] self.b = self.params[1]
def initial_states(self, batch_size): big_h0_shape = (batch_size, three_tier.N_RNN, three_tier.H0_MULT*three_tier.BIG_DIM) last_big_h0 = shared_floatx_zeros(big_h0_shape) h0_shape = (batch_size, three_tier.N_RNN, three_tier.H0_MULT*three_tier.DIM) last_h0 = shared_floatx_zeros(h0_shape) return last_h0, last_big_h0
def __init__(self, input_dim, n_classes): self.n_classes = n_classes self.input_dim = input_dim self.params = [shared_floatx_zeros((input_dim, n_classes)), shared_floatx_zeros((n_classes,))] self.W = self.params[0] self.b = self.params[1]
def __init__(self, input_dim): self.input_dim = input_dim self.params = [ shared_floatx_zeros((input_dim, 1)), shared_floatx_zeros((1, )) ] self.W = self.params[0] self.b = self.params[1]
def _allocate(self): self.parameters.append(shared_floatx_nans((self.dim, self.dim), name="W")) add_role(self.parameters[0], WEIGHT) self.parameters.append(shared_floatx_zeros((self.dim,), name="initial_state")) add_role(self.parameters[1], INITIAL_STATE) self.parameters.append(shared_floatx_zeros((1,), name="initial_time")) add_role(self.parameters[2], INITIAL_STATE)
def initial_states(self, batch_size): initial_h1 = self.cell1.initial_states(batch_size) initial_kappa = shared_floatx_zeros((batch_size, self.att_size)) initial_w = tensor.repeat(self.initial_w[None, :], batch_size, 0) last_h1 = shared_floatx_zeros((batch_size, self.rec_h_dim)) last_w = shared_floatx_zeros((batch_size, self.num_letters)) use_last_states = shared(numpy.asarray(0., dtype=floatX)) return initial_h1, initial_kappa, initial_w, \ last_h1, last_w, use_last_states
def _allocate(self): if self.noise_batch_size is not None: if self.tied_noise: N = shared_floatx_zeros( (self.noise_batch_size, self.input_dim[0]), name='N') else: N = shared_floatx_zeros( (self.noise_batch_size,) + self.input_dim, name='N') add_role(N, NOISE) self.parameters.append(N)
def _allocate(self): if self.noise_batch_size is not None: if self.tied_noise: N = shared_floatx_zeros( (self.noise_batch_size, self.input_dim[0]), name='N') else: N = shared_floatx_zeros( (self.noise_batch_size, ) + self.input_dim, name='N') add_role(N, NOISE) self.parameters.append(N)
def _allocate(self): W = shared_floatx_zeros((self.input_dim, self.output_dim), name='W') add_role(W, WEIGHTS) self.params.append(W) self.add_auxiliary_variable(W.norm(2), name='W_norm') if self.use_bias: b = shared_floatx_zeros((self.output_dim, ), name='b') add_role(b, BIASES) self.params.append(b) self.add_auxiliary_variable(b.norm(2), name='b_norm')
def _initialize(self): self.beta = shared_floatx_zeros((self.dim, ), name='beta') self.gamma = shared_floatx_zeros((self.dim, ), name='gamma') add_role(self.beta, PARAMETER) add_role(self.gamma, PARAMETER) self.parameters = [self.gamma, self.beta] self.beta_init.initialize(self.beta, self.rng) self.gamma_init.initialize(self.gamma, self.rng)
def _allocate(self): W = shared_floatx_zeros( (self.num_filters, self.num_channels) + self.filter_size, name='W') add_role(W, FILTERS) self.params.append(W) self.add_auxiliary_variable(W.norm(2), name='W_norm') if self.use_bias: b = shared_floatx_zeros(self.get_dim('output'), name='b') add_role(b, BIASES) self.params.append(b) self.add_auxiliary_variable(b.norm(2), name='b_norm')
def _allocate(self): super(GaussianLayerFixedSigma, self)._allocate() dim_X, dim_H = self.dim_X, self.dim_H self.W_mean = shared_floatx_zeros((dim_H, dim_X), name="W_mean") add_role(self.W_mean, WEIGHT) self.b_mean = shared_floatx_zeros((dim_X,), name="b_mean") add_role(self.b_mean, BIAS) self.parameters = [self.W_mean, self.b_mean]
def _allocate(self): super(GaussianLayerFixedSigma, self)._allocate() dim_X, dim_H = self.dim_X, self.dim_H self.W_mean = shared_floatx_zeros((dim_H, dim_X), name='W_mean') add_role(self.W_mean, WEIGHT) self.b_mean = shared_floatx_zeros((dim_X, ), name='b_mean') add_role(self.b_mean, BIAS) self.parameters = [self.W_mean, self.b_mean]
def compile(self): """Do not add any more tasks after this function is called. Compiles the state update and logprobs theano functions for this bucket. """ self.n_tasks = len(self.tasks) self.n_finished = 0 self.all_attended = shared_floatx_zeros((1, 1, 1)) self.all_masks = shared_floatx_zeros((1, 1)) self.src_indices = T.ivector() givens = self._construct_givens() self._compile_next_state_computer(givens) self._compile_logprobs_computer(givens)
def _allocate(self): self.W_state = shared_floatx_nans((self.dim, 4.5 * self.dim), name='W_state') # The underscore is required to prevent collision with # the `initial_state` application method self.initial_state_ = shared_floatx_zeros((self.dim,), name="initial_state") self.initial_cells = shared_floatx_zeros((self.num_copies, self.dim), name="initial_cells") add_role(self.W_state, WEIGHT) # add_role(self.initial_state_, INITIAL_STATE) # add_role(self.initial_cells, INITIAL_STATE) self.parameters = [self.W_state]
def _allocate(self): self.W_state = shared_floatx_nans((self.dim, 4.5 * self.dim), name='W_state') # The underscore is required to prevent collision with # the `initial_state` application method self.initial_state_ = shared_floatx_zeros((self.dim, ), name="initial_state") self.initial_cells = shared_floatx_zeros((self.num_copies, self.dim), name="initial_cells") add_role(self.W_state, WEIGHT) # add_role(self.initial_state_, INITIAL_STATE) # add_role(self.initial_cells, INITIAL_STATE) self.parameters = [self.W_state]
def __init__(self, inputs, cg, reward_emitter, data, **kwargs): self.input_accumulator = shared_floatx_zeros((2, 2), dtype='int64') self.gain_accumulator = shared_floatx_zeros((2, 2, 2)) self.reward_accumulator = shared_floatx_zeros((2, 2, 2), dtype='int64') self.dataset = data.get_dataset('train') self.inputs = inputs self.gains, = VariableFilter(applications=[reward_emitter.cost], roles=[INPUT], name='readouts')(cg.variables) self.reward, = VariableFilter(theano_name=reward_emitter.GAIN_MATRIX)( cg.variables) kwargs.setdefault('before_training', True) kwargs.setdefault('after_batch', True) super(LogInputsGains, self).__init__(**kwargs)
def __init__(self, inputs, cg, reward_emitter, data, **kwargs): self.input_accumulator = shared_floatx_zeros((2, 2), dtype='int64') self.gain_accumulator = shared_floatx_zeros((2, 2, 2)) self.reward_accumulator = shared_floatx_zeros((2, 2, 2), dtype='int64') self.dataset = data.get_dataset('train') self.inputs = inputs self.gains, = VariableFilter( applications=[reward_emitter.cost], roles=[INPUT], name='readouts')(cg.variables) self.reward, = VariableFilter( theano_name=reward_emitter.GAIN_MATRIX)(cg.variables) kwargs.setdefault('before_training', True) kwargs.setdefault('after_batch', True) super(LogInputsGains, self).__init__(**kwargs)
def _allocate(self): self.params.append( shared_floatx_nans((self.dim, self.dim), name='state_to_state')) self.params.append( shared_floatx_zeros((self.dim, ), name="initial_state")) add_role(self.params[0], WEIGHT) add_role(self.params[1], INITIAL_STATE)
def _allocate(self): self.params.append(shared_floatx_nans((self.dim, self.dim), name='state_to_state')) self.params.append(shared_floatx_zeros((self.dim,), name="initial_state")) add_role(self.params[0], WEIGHT) add_role(self.params[1], INITIAL_STATE)
def __init__(self, inputs, data, **kwargs): self.accumulator = shared_floatx_zeros((2, 2), dtype='int64') self.dataset = data.get_dataset('train') self.inputs = inputs kwargs.setdefault('before_training', True) kwargs.setdefault('after_batch', True) super(LogInputs, self).__init__(**kwargs)
def _allocate(self): super(GaussianLayer, self)._allocate() dim_X, dim_Y, dim_H = self.dim_X, self.dim_Y, self.dim_H W_mean = shared_floatx_zeros((dim_H, dim_X), name='W_mean') W_ls = shared_floatx_zeros((dim_H, dim_X), name='W_ls') add_role(W_mean, WEIGHTS) add_role(W_ls, WEIGHTS) b_mean = shared_floatx_zeros((dim_X,), name='b_mean') b_ls = shared_floatx_zeros((dim_X,), name='b_ls') add_role(b_mean, BIASES) add_role(b_ls, BIASES) self.params = [W_mean, W_ls, b_mean, b_ls]
def _allocate(self): super(GaussianLayer, self)._allocate() dim_X, dim_Y, dim_H = self.dim_X, self.dim_Y, self.dim_H W_mean = shared_floatx_zeros((dim_H, dim_X), name='W_mean') W_ls = shared_floatx_zeros((dim_H, dim_X), name='W_ls') add_role(W_mean, WEIGHTS) add_role(W_ls, WEIGHTS) b_mean = shared_floatx_zeros((dim_X, ), name='b_mean') b_ls = shared_floatx_zeros((dim_X, ), name='b_ls') add_role(b_mean, BIASES) add_role(b_ls, BIASES) self.params = [W_mean, W_ls, b_mean, b_ls]
def _allocate(self): new_param = lambda name: shared_floatx_zeros((self.dim, self.dim), name=name) self.params.append(new_param('state_to_state')) self.params.append(new_param('state_to_update') if self.use_update_gate else None) self.params.append(new_param('state_to_reset') if self.use_reset_gate else None)
def _allocate(self): new_param = lambda name: shared_floatx_zeros( (self.dim, self.dim), name=name) self.params.append(new_param('state_to_state')) self.params.append( new_param('state_to_update') if self.use_update_gate else None) self.params.append( new_param('state_to_reset') if self.use_reset_gate else None)
def _create_intpic_histogram_for(param, pic_size, label_count): # The pic histogram is a 2d-array of pic_size. # For a 3d parameter, that ends up being a 5d tensor. # For a 1d parameter, that's a 3d tensor. shape = param.get_value().shape + (label_count,) + pic_size buf = shared_floatx_zeros(shape) buf.tag.for_parameter = param add_role(buf, INTPIC_STATISTICS) return buf
def _create_intpic_histogram_for(param, pic_size, label_count): # The pic histogram is a 2d-array of pic_size. # For a 3d parameter, that ends up being a 5d tensor. # For a 1d parameter, that's a 3d tensor. shape = param.get_value().shape + (label_count, ) + pic_size buf = shared_floatx_zeros(shape) buf.tag.for_parameter = param add_role(buf, INTPIC_STATISTICS) return buf
def _allocate(self): self.W_state = shared_floatx_zeros((self.dim, 4 * self.dim), name='W_state') self.W_cell_to_in = shared_floatx_zeros((self.dim, ), name='W_cell_to_in') self.W_cell_to_forget = shared_floatx_zeros((self.dim, ), name='W_cell_to_forget') self.W_cell_to_out = shared_floatx_zeros((self.dim, ), name='W_cell_to_out') self.biases = shared_floatx_zeros((4 * self.dim, ), name='biases') add_role(self.W_state, WEIGHTS) add_role(self.W_cell_to_in, WEIGHTS) add_role(self.W_cell_to_forget, WEIGHTS) add_role(self.W_cell_to_out, WEIGHTS) add_role(self.biases, BIASES) self.params = [ self.W_state, self.W_cell_to_in, self.W_cell_to_forget, self.W_cell_to_out, self.biases ]
def _allocate(self): self.parameters.append(shared_floatx_nans((self.dim, self.dim), name='state_to_state')) self.parameters.append(shared_floatx_nans((self.dim, 2 * self.dim), name='state_to_gates')) self.parameters.append(shared_floatx_zeros((self.dim,), name="initial_state")) for i in range(2): if self.parameters[i]: add_role(self.parameters[i], WEIGHT) add_role(self.parameters[2], INITIAL_STATE)
def __init__(self, filter_size=(3, 3), num_filters=128, num_channels=3, step=(1, 1), border_mode='valid', input_shape=(None, None)): self.W = shared_floatx_zeros((num_filters, num_channels) + filter_size) self.W.set_value(.1 * (numpy.random.uniform( size=self.W.get_value().shape).astype(floatX) - 0.5)) self.b = shared_floatx_zeros((num_filters, )) self.params = [self.W, self.b] self.num_filters = num_filters self.num_channels = num_channels self.filter_size = filter_size self.border_mode = border_mode if border_mode not in ['full', 'valid']: raise ValueError('Invalid mode: must be `valid` or `full`.') self.step = step self.set_input_shape(input_shape)
def _allocate(self): self.W_state = shared_floatx_nans((self.dim, 4 * self.dim), name='W_state') self.W_cell_to_in = shared_floatx_nans((self.dim, ), name='W_cell_to_in') self.W_cell_to_forget = shared_floatx_nans((self.dim, ), name='W_cell_to_forget') self.W_cell_to_out = shared_floatx_nans((self.dim, ), name='W_cell_to_out') # The underscore is required to prevent collision with # the `initial_state` application method self.initial_state_ = shared_floatx_zeros((self.dim, ), name="initial_state") self.initial_cells = shared_floatx_zeros((self.dim, ), name="initial_cells") add_role(self.W_state, WEIGHT) add_role(self.W_cell_to_in, WEIGHT) add_role(self.W_cell_to_forget, WEIGHT) add_role(self.W_cell_to_out, WEIGHT) add_role(self.initial_state_, INITIAL_STATE) add_role(self.initial_cells, INITIAL_STATE) # layer_norm params scale_add = 0.0 scale_mul = 1.0 self.b1 = scale_add * (shared_floatx_zeros( (4 * self.dim), name="b1") + 1) self.b2 = scale_add * (shared_floatx_zeros( (4 * self.dim), name="b2") + 1) self.b3 = scale_add * (shared_floatx_zeros( (1 * self.dim), name="b3") + 1) self.s1 = scale_mul * (shared_floatx_zeros( (4 * self.dim), name="s1") + 1) self.s2 = scale_mul * (shared_floatx_zeros( (4 * self.dim), name="s2") + 1) self.s3 = scale_mul * (shared_floatx_zeros( (1 * self.dim), name="s3") + 1) # biases add_role(self.b1, WEIGHT) add_role(self.b2, WEIGHT) add_role(self.b3, WEIGHT) add_role(self.s1, WEIGHT) add_role(self.s2, WEIGHT) add_role(self.s3, WEIGHT) self.parameters = [ self.W_state, self.W_cell_to_in, self.W_cell_to_forget, self.W_cell_to_out, self.initial_state_, self.initial_cells, self.b1, self.b2, self.b3, self.s1, self.s2, self.s3 ]
def __init__( self, filter_size=(3, 3), num_filters=128, num_channels=3, step=(1, 1), border_mode="valid", input_shape=(None, None), ): self.W = shared_floatx_zeros((num_filters, num_channels) + filter_size) self.W.set_value(0.1 * (numpy.random.uniform(size=self.W.get_value().shape).astype(floatX) - 0.5)) self.b = shared_floatx_zeros((num_filters,)) self.params = [self.W, self.b] self.num_filters = num_filters self.num_channels = num_channels self.filter_size = filter_size self.border_mode = border_mode if border_mode not in ["full", "valid"]: raise ValueError("Invalid mode: must be `valid` or `full`.") self.step = step self.set_input_shape(input_shape)
def initial_states(self, batch_size): initial_h1 = self.rnn1.initial_states(batch_size) initial_h2 = self.rnn2.initial_states(batch_size) initial_h3 = self.rnn3.initial_states(batch_size) last_h1 = shared_floatx_zeros((batch_size, self.rnn_h_dim)) last_h2 = shared_floatx_zeros((batch_size, self.rnn_h_dim)) last_h3 = shared_floatx_zeros((batch_size, self.rnn_h_dim)) # Defining for all initial_k = tensor.zeros( (batch_size, self.attention_size), dtype=floatX) last_k = shared_floatx_zeros((batch_size, self.attention_size)) # Trainable initial state for w. Why not for k? initial_w = tensor.repeat(self.initial_w[None, :], batch_size, 0) last_w = shared_floatx_zeros((batch_size, self.encoded_input_dim)) return initial_h1, last_h1, initial_h2, last_h2, initial_h3, last_h3, \ initial_w, last_w, initial_k, last_k
def initial_states(self, batch_size): initial_h1 = self.rnn1.initial_states(batch_size) initial_h2 = self.rnn2.initial_states(batch_size) initial_h3 = self.rnn3.initial_states(batch_size) last_h1 = shared_floatx_zeros((batch_size, self.rnn_h_dim)) last_h2 = shared_floatx_zeros((batch_size, self.rnn_h_dim)) last_h3 = shared_floatx_zeros((batch_size, self.rnn_h_dim)) # Defining for all initial_k = tensor.zeros((batch_size, self.attention_size), dtype=floatX) last_k = shared_floatx_zeros((batch_size, self.attention_size)) # Trainable initial state for w. Why not for k? initial_w = tensor.repeat(self.initial_w[None, :], batch_size, 0) last_w = shared_floatx_zeros((batch_size, self.encoded_input_dim)) return initial_h1, last_h1, initial_h2, last_h2, initial_h3, last_h3, \ initial_w, last_w, initial_k, last_k
def _allocate(self): self.W_ss = shared_floatx_nans((self.dim, 4*self.dim), name='W_ss') self.W_is = shared_floatx_nans((self.dim,), name='W_is') # The underscore is required to prevent collision with # the `initial_state` application method self.initial_state_ = shared_floatx_zeros((self.dim,), name="initial_state") add_role(self.W_ss, WEIGHT) add_role(self.W_is, WEIGHT) add_role(self.initial_state_, INITIAL_STATE) self.parameters = [ self.W_ss, self.W_is, self.initial_state_]
def _allocate(self): self.params.append( shared_floatx_nans((self.dim, self.dim), name='state_to_state')) self.params.append( shared_floatx_nans((self.dim, self.dim), name='state_to_update')) self.params.append( shared_floatx_nans((self.dim, self.dim), name='state_to_reset')) self.params.append( shared_floatx_zeros((self.dim, ), name="initial_state")) for i in range(3): if self.params[i]: add_role(self.params[i], WEIGHT) add_role(self.params[3], INITIAL_STATE)
def _allocate(self): self.params.append(shared_floatx_nans((self.dim, self.dim), name='state_to_state')) self.params.append(shared_floatx_nans((self.dim, self.dim), name='state_to_update')) self.params.append(shared_floatx_nans((self.dim, self.dim), name='state_to_reset')) self.params.append(shared_floatx_zeros((self.dim,), name="initial_state")) for i in range(3): if self.params[i]: add_role(self.params[i], WEIGHT) add_role(self.params[3], INITIAL_STATE)
def _allocate(self): self.W_rz = shared_floatx_nans((self.dim, 2 * self.dim), name='W_state') self.W_htilde = shared_floatx_nans((self.dim, self.dim), name='W_state') # The underscore is required to prevent collision with # the `initial_state` application method self.initial_state_ = shared_floatx_zeros((self.dim,), name="initial_state") add_role(self.W_rz, WEIGHT) add_role(self.W_htilde, WEIGHT) add_role(self.initial_state_, INITIAL_STATE) #self.parameters = [self.W_state, self.initial_state_, self.initial_cells] self.parameters = [self.W_rz, self.W_htilde, self.initial_state_]
def _allocate(self): self.W_state = shared_floatx_nans((self.dim, 4 * self.dim), name='W_state') self.W_cell_to_in = shared_floatx_nans((self.dim, ), name='W_cell_to_in') self.W_cell_to_forget = shared_floatx_nans((self.dim, ), name='W_cell_to_forget') self.W_cell_to_out = shared_floatx_nans((self.dim, ), name='W_cell_to_out') # The underscore is required to prevent collision with # the `initial_state` application method self.initial_state_ = shared_floatx_zeros((self.dim, ), name="initial_state") self.initial_cells = shared_floatx_zeros((self.dim, ), name="initial_cells") add_role(self.W_state, WEIGHT) add_role(self.W_cell_to_in, WEIGHT) add_role(self.W_cell_to_forget, WEIGHT) add_role(self.W_cell_to_out, WEIGHT) add_role(self.initial_state_, INITIAL_STATE) add_role(self.initial_cells, INITIAL_STATE) # gamma gamma_val = 0.1 * numpy.ones((self.dim), dtype=config.floatX) self.gamma = shared(name='gamma', value=gamma_val) add_role(self.gamma, PARAMETER) # beta beta_val = numpy.zeros((self.dim), dtype=config.floatX) self.beta = shared(name='beta', value=beta_val) add_role(self.beta, PARAMETER) self.parameters = [ self.W_state, self.W_cell_to_in, self.W_cell_to_forget, self.W_cell_to_out, self.initial_state_, self.initial_cells, self.gamma, self.beta ]
def __init__(self, input_dim, n_hidden): self.input_dim = input_dim self.n_hidden = n_hidden self.neural_arch = [input_dim] + n_hidden self.W = [] self.b = [] # Creating weights for each layer (except the last one) for n in xrange(len(self.neural_arch) - 1): n_in = self.neural_arch[n] n_out = self.neural_arch[n+1] W = shared_floatx_zeros((n_in, n_out)) randomWeightInitialize(W) W.name = 'W_' + str(n) b = shared_floatx_zeros(n_out) b.name = 'b_' + str(n) self.W.append(W) self.b.append(b) # Creating last layer W = shared_floatx_zeros((n_hidden[-1], 1)) randomWeightInitialize(W) W.name = 'W_out' b = shared_floatx_zeros(1) b.name = 'b_out' self.W.append(W) self.b.append(b) self.params = self.W + self.b
def _allocate(self): """In addition to the GRU parameters ``state_to_state`` and ``state_to_gates``, add the initial state if the search strategy is "constant". """ self.parameters.append(shared_floatx_nans((self.dim, self.dim), name='state_to_state')) self.parameters.append(shared_floatx_nans((self.dim, 2 * self.dim), name='state_to_gates')) for i in range(2): if self.parameters[i]: add_role(self.parameters[i], WEIGHT) if self.init_strategy == 'constant': self.parameters.append(shared_floatx_zeros((self.dim,), name="initial_state")) add_role(self.parameters[2], INITIAL_STATE)
def _create_maximum_activation_for(output, topn, dims=None): # Automatically compute the number of units if dims is None: dims = get_brick(output).get_dims(['output'])[0] if isinstance(dims, numbers.Integral): dims = (dims,) index = theano.shared(numpy.zeros((topn, dims[0]), dtype=numpy.int)) snapshot = None else: index = theano.shared(numpy.zeros((topn, dims[0], 3), dtype=numpy.int)) snapshot = theano.shared(numpy.zeros((topn,) + dims)) quantity = shared_floatx_zeros((topn, dims[0])) index.tag.for_output = output add_role(index, MAXIMUM_ACTIVATION_INDEX) quantity.tag.for_output = output add_role(quantity, MAXIMUM_ACTIVATION_QUANTITY) return (dims, quantity, index, snapshot)
def _allocate(self): input_dim = ((self.input_dim,) if not isinstance(self.input_dim, collections.Sequence) else self.input_dim) broadcastable = (tuple(False for _ in input_dim) if self.broadcastable is None else self.broadcastable) if len(input_dim) != len(broadcastable): raise ValueError("input_dim and broadcastable must be same length") var_dim = tuple(1 if broadcast else dim for dim, broadcast in equizip(input_dim, broadcastable)) broadcastable = broadcastable # "gamma", from the Ioffe & Szegedy manuscript. self.scale = shared_floatx_nans(var_dim, name='batch_norm_scale', broadcastable=broadcastable) # "beta", from the Ioffe & Szegedy manuscript. self.shift = shared_floatx_nans(var_dim, name='batch_norm_shift', broadcastable=broadcastable) add_role(self.scale, BATCH_NORM_SCALE_PARAMETER) add_role(self.shift, BATCH_NORM_SHIFT_PARAMETER) self.parameters.append(self.scale) self.parameters.append(self.shift) # These aren't technically parameters, in that they should not be # learned using the same cost function as other model parameters. self.population_mean = shared_floatx_zeros(((self.n_iter,) if self.n_iter else ()) + var_dim, name='population_mean', broadcastable=((False,) if self.n_iter else ()) + broadcastable) self.population_stdev = shared_floatx(numpy.ones(((self.n_iter,) if self.n_iter else ()) + var_dim), name='population_stdev', broadcastable=((False,) if self.n_iter else ()) + broadcastable) add_role(self.population_mean, BATCH_NORM_POPULATION_MEAN) add_role(self.population_stdev, BATCH_NORM_POPULATION_STDEV) # Normally these would get annotated by an AnnotatingList, but they # aren't in self.parameters. add_annotation(self.population_mean, self) add_annotation(self.population_stdev, self)