Пример #1
0
    def set_params(self):

        dim = self.input_dim
        hdim = self.hidden_dim

        self.input = T.matrix()

        self.W_i = self.init((dim, hdim))
        self.U_i = self.inner_init((hdim, hdim))
        self.b_i = shared_zeros((hdim))

        self.W_f = self.init((dim, hdim))
        self.U_f = self.inner_init((hdim, hdim))
        self.b_f = self.forget_bias_init((hdim))

        self.W_c = self.init((dim, hdim))
        self.U_c = self.inner_init((hdim, hdim))
        self.b_c = shared_zeros((hdim))

        self.W_o = self.init((dim, hdim))
        self.U_o = self.inner_init((hdim, hdim))
        self.b_o = shared_zeros((hdim))

        self.W_x = self.init((hdim, dim))
        self.b_x = shared_zeros((dim))

        self.params = [
            self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c,
            self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o,
            self.W_x, self.b_x
        ]
Пример #2
0
    def build(self):
        self.readout.build()
        self.init_h = shared_zeros((self.state_dim, ))
        # here is difference on the sizes
        input_dim = self.input_shape[2] + self.readout.output_shape[1]

        # copy-paste from keras.recurrent.GRU
        self.W_z = self.init((input_dim, self.state_dim))
        self.U_z = self.inner_init((self.state_dim, self.state_dim))
        self.b_z = shared_zeros((self.state_dim))

        self.W_r = self.init((input_dim, self.state_dim))
        self.U_r = self.inner_init((self.state_dim, self.state_dim))
        self.b_r = shared_zeros((self.state_dim))

        self.W_h = self.init((input_dim, self.state_dim))
        self.U_h = self.inner_init((self.state_dim, self.state_dim))
        self.b_h = shared_zeros((self.state_dim))

        self.params = [
            self.init_h,
            self.W_z,
            self.U_z,
            self.b_z,
            self.W_r,
            self.U_r,
            self.b_r,
            self.W_h,
            self.U_h,
            self.b_h,
        ]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
Пример #3
0
    def __init__(self, 
                 input_dim, 
                 hidden_dim, 
                 init='glorot_uniform', 
                 activation='linear', 
                 weights=None,
                 corruption_level=0.3):
        self.init = initializations.get(init)
        self.activation = activations.get(activation)
        self.input_dim = input_dim

        
        self.hidden_dim = hidden_dim
        self.output_dim = input_dim

        self.input = T.matrix()
        self.W = self.init((self.input_dim, self.hidden_dim))
        self.b = shared_zeros((self.hidden_dim))
        self.b_prime = shared_zeros((self.input_dim))

        numpy_rng = np.random.RandomState(123)

        self.theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        self.params = [self.W, self.b, self.b_prime]
        self.corruption_level = corruption_level

        if weights is not None:
            self.set_weights(weights)
Пример #4
0
    def _build(self):
        nw = len(
            self.initial_weights) if self.initial_weights is not None else 0

        if self.initial_state is not None:
            self.h = sharedX(self.initial_state[0])
            self.c = sharedX(self.initial_state[1])
            del self.initial_state
        elif self.batch_size is not None:
            self.h = shared_zeros((self.batch_size, self.hidden_dim))
            self.c = shared_zeros((self.batch_size, self.hidden_dim))
        elif self.initial_weights is not None:
            if nw == len(self.params) + 2:
                self.h = sharedX(self.initial_weights[-1])
                self.c = sharedX(self.initial_weights[-2])
                nw -= 2
            else:
                raise Exception("Hidden state not provided in weights")
        else:
            raise Exception(
                "One of the following arguments must be provided for stateful RNNs: hidden_state, batch_size, weights"
            )
        self.state = [self.h, self.c]
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights[:nw])
            del self.initial_weights
Пример #5
0
	def build(self):
		input_dim = self.input_shape[2]
		self.input = T.tensor3()

		self.W_sum = self.init((input_dim, self.output_dim))
		self.U_sum = self.inner_init((self.output_dim, self.output_dim))
		self.b_sum = shared_zeros((self.output_dim))

		self.W_i = self.init((input_dim, self.output_dim))
		self.U_i = self.inner_init((self.output_dim, self.output_dim))
		self.b_i = shared_zeros((self.output_dim))

		self.W_f = self.init((input_dim, self.output_dim))
		self.U_f = self.inner_init((self.output_dim, self.output_dim))
		self.b_f = self.forget_bias_init((self.output_dim))

		self.W_c = self.init((input_dim, self.output_dim))
		self.U_c = self.inner_init((self.output_dim, self.output_dim))
		self.b_c = shared_zeros((self.output_dim))

 		self.W_o = self.init((input_dim, self.output_dim))
		self.U_o = self.inner_init((self.output_dim, self.output_dim))
		self.b_o = shared_zeros((self.output_dim))

		self.params = [
			self.W_sum, self.U_sum, self.b_sum,
			self.W_i, self.U_i, self.b_i,
			self.W_c, self.U_c, self.b_c,
			self.W_f, self.U_f, self.b_f,
			self.W_o, self.U_o, self.b_o,
		]

		if self.initial_weights is not None:
			self.set_weights(self.initial_weights)
			del self.initial_weights
Пример #6
0
    def __init__(self, input_dim, output_dim=128,
        init= 'uniform', inner_init='glorot_normal',
        activation='softplus', inner_activation='hard_sigmoid',
        gate_activation= 'tanh',
        weights=None, truncate_gradient=-1, return_sequences=False):

        super(SGU, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.truncate_gradient = truncate_gradient
        self.return_sequences = return_sequences

        self.init = initializations.get(init)
        self.inner_init = initializations.get(inner_init)
        self.activation = activations.get(activation)
        self.inner_activation = activations.get(inner_activation)
        self.gate_activation = activations.get(gate_activation)
        self.input = TT.tensor3()

        self.W = self.init((self.input_dim, self.output_dim))
        self.U = self.inner_init((self.output_dim, self.output_dim))
        self.b = shared_zeros((self.output_dim))

        self.W_gate = self.init((self.input_dim, self.output_dim))
        self.b_gate = shared_zeros((self.output_dim))
        self.U_gate = self.inner_init((self.output_dim, self.output_dim))

        self.params = [
            self.W, self.U, self.b,
            self.W_gate, self.b_gate,
            self.U_gate
        ]

        if weights is not None:
            self.set_weights(weights)
Пример #7
0
    def __init__(self, input_dim, states_dim, causes_dim,
                 init='glorot_uniform', inner_init='orthogonal',
                 activation='sigmoid', gate_activation='sigmoid',
                 weights=None, return_mode='states',
                 truncate_gradient=-1, return_sequences=False):
        super(FDPCN, self).__init__()
        self.init = initializations.get(init)
        self.inner_init = initializations.get(inner_init)
        self.input_dim = input_dim
        self.states_dim = states_dim
        self.causes_dim = causes_dim
        self.truncate_gradient = truncate_gradient
        self.activation = activations.get(activation)
        self.gate_activation = activations.get(gate_activation)
        self.return_sequences = return_sequences
        self.return_mode = return_mode
        self.input = T.tensor3()

        self.I2S = self.init((self.input_dim, self.states_dim))
        self.S2S = self.inner_init((self.states_dim, self.states_dim))
        self.Sb = shared_zeros((self.states_dim))

        self.S2C = self.init((self.states_dim, self.causes_dim))
        self.C2C = self.inner_init((self.causes_dim, self.causes_dim))
        self.Cb = shared_zeros((self.causes_dim))
        self.CbS = shared_zeros((self.states_dim))
        self.C2S = self.init((self.causes_dim, self.states_dim))
        self.params = [self.I2S, self.S2S, self.Sb,
                       self.C2S, self.C2C, self.Cb, self.S2C, self.CbS]

        if weights is not None:
            self.set_weights(weights)
Пример #8
0
    def build(self):
        self.readout.build()
        self.init_h = shared_zeros((self.state_dim,))
        # here is difference on the sizes
        input_dim = self.input_shape[2] + self.readout.output_shape[1]

        # copy-paste from keras.recurrent.GRU
        self.W_z = self.init((input_dim, self.state_dim))
        self.U_z = self.inner_init((self.state_dim, self.state_dim))
        self.b_z = shared_zeros((self.state_dim))

        self.W_r = self.init((input_dim, self.state_dim))
        self.U_r = self.inner_init((self.state_dim, self.state_dim))
        self.b_r = shared_zeros((self.state_dim))

        self.W_h = self.init((input_dim, self.state_dim))
        self.U_h = self.inner_init((self.state_dim, self.state_dim))
        self.b_h = shared_zeros((self.state_dim))

        self.trainable_weights = [
            self.init_h,
            self.W_z, self.U_z, self.b_z,
            self.W_r, self.U_r, self.b_r,
            self.W_h, self.U_h, self.b_h,
        ]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
Пример #9
0
    def build(self):
        input_dim = self.input_shape[2]
        self.input = T.tensor3()

        self.W_g = self.init((input_dim, self.output_dim))
        #		self.U_g = sharedX(np.random.uniform(low=-scale, high=scale, size=(self.output_dim, 6 , self.output_dim)))
        self.U_g = self.inner_init((self.output_dim, 6, self.output_dim))
        self.b_g = shared_zeros((self.output_dim))

        self.W_c = self.init((input_dim, self.output_dim))
        self.U_c = self.inner_init((self.output_dim, self.output_dim))
        self.b_c = shared_zeros((self.output_dim))

        self.W_o = self.init((input_dim, self.output_dim))
        self.U_o = self.inner_init((self.output_dim, self.output_dim))
        self.b_o = shared_zeros((self.output_dim))

        self.EPS = 1e-10

        scalar_init = 1
        scale = 0.01

        #		self.k_parameters = shared_ones((11,))
        self.k_parameters = sharedX(
            np.random.uniform(low=scalar_init - scale,
                              high=scalar_init + scale,
                              size=(11, )))
        # self.sigma_se = shared_scalar(scalar_init)
        # self.sigma_per = shared_scalar(scalar_init)
        # self.sigma_b_lin = shared_scalar(scalar_init)
        # self.sigma_v_lin = shared_scalar(scalar_init)
        # self.sigma_rq = shared_scalar(scalar_init)

        # self.l_se = shared_scalar(scalar_init)
        # self.l_per = shared_scalar(scalar_init)
        # self.l_lin = shared_scalar(scalar_init)
        # self.l_rq = shared_scalar(scalar_init)

        # self.alpha_rq = shared_scalar(scalar_init)
        # self.p_per = shared_scalar(scalar_init)

        self.params = [
            self.k_parameters,
            #			self.sigma_se, self.sigma_per, self.sigma_b_lin, self.sigma_v_lin,self.sigma_rq,
            #			self.l_se, self.l_per, self.l_lin, self.l_rq,
            #			self.alpha_rq, self.p_per,
            self.W_g,
            self.U_g,
            self.b_g,
            self.W_c,
            self.U_c,
            self.b_c,
            self.W_o,
            self.U_o,
            self.b_o,
        ]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
Пример #10
0
    def __init__(self, input_dim, output_dim=128, train_init_cell=True, train_init_h=True,
                 init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one',
                 input_activation='tanh', gate_activation='hard_sigmoid', output_activation='tanh',
                 weights=None, truncate_gradient=-1, return_sequences=False):

        super(LSTMLayer, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.truncate_gradient = truncate_gradient
        self.return_sequences = return_sequences

        self.init = initializations.get(init)
        self.inner_init = initializations.get(inner_init)
        self.forget_bias_init = initializations.get(forget_bias_init)
        self.input_activation = activations.get(input_activation)
        self.gate_activation = activations.get(gate_activation)
        self.output_activation = activations.get(output_activation)
        self.input = T.tensor3()
        self.time_range = None

        W_z = self.init((self.input_dim, self.output_dim)).get_value(borrow=True)
        R_z = self.inner_init((self.output_dim, self.output_dim)).get_value(borrow=True)
        # self.b_z = shared_zeros(self.output_dim)

        W_i = self.init((self.input_dim, self.output_dim)).get_value(borrow=True)
        R_i = self.inner_init((self.output_dim, self.output_dim)).get_value(borrow=True)
        # self.b_i = shared_zeros(self.output_dim)

        W_f = self.init((self.input_dim, self.output_dim)).get_value(borrow=True)
        R_f = self.inner_init((self.output_dim, self.output_dim)).get_value(borrow=True)
        # self.b_f = self.forget_bias_init(self.output_dim)

        W_o = self.init((self.input_dim, self.output_dim)).get_value(borrow=True)
        R_o = self.inner_init((self.output_dim, self.output_dim)).get_value(borrow=True)
        # self.b_o = shared_zeros(self.output_dim)

        self.h_m1 = shared_zeros(shape=(1, self.output_dim), name='h0')
        self.c_m1 = shared_zeros(shape=(1, self.output_dim), name='c0')

        W = np.vstack((W_z[np.newaxis, :, :],
                       W_i[np.newaxis, :, :],
                       W_f[np.newaxis, :, :],
                       W_o[np.newaxis, :, :]))  # shape = (4, input_dim, output_dim)
        R = np.vstack((R_z[np.newaxis, :, :],
                       R_i[np.newaxis, :, :],
                       R_f[np.newaxis, :, :],
                       R_o[np.newaxis, :, :]))  # shape = (4, output_dim, output_dim)
        self.W = theano.shared(W, name='Input to hidden weights (zifo)', borrow=True)
        self.R = theano.shared(R, name='Recurrent weights (zifo)', borrow=True)
        self.b = theano.shared(np.zeros(shape=(4, self.output_dim), dtype=theano.config.floatX),
                               name='bias', borrow=True)

        self.params = [self.W, self.R]
        if train_init_cell:
            self.params.append(self.c_m1)
        if train_init_h:
            self.params.append(self.h_m1)

        if weights is not None:
            self.set_weights(weights)
Пример #11
0
	def build(self):
		input_dim = self.input_shape[2]
		self.input = T.tensor3()
		# self.n_param = 0

		# forget gate params
		self.W_xf = self.init((input_dim, self.output_dim))
		# self.U_hf = self.inner_init((input_dim, self.output_dim))
		self.b_f = shared_zeros((self.output_dim))

		
		# input/feature params
		self.W_xz = self.init((input_dim, self.output_dim))
		# self.U_xz = self.inner_init((input_dim, self.output_dim))
		self.b_z = shared_zeros((self.output_dim))
		
		# output params
		self.W_xo = self.init((input_dim, self.output_dim))
		# self.U_xo = self.inner_init((input_dim, self.output_dim))
		self.b_o = shared_zeros((self.output_dim))
		
		self.n_param += 3 * (input_dim + 1) * self.output_dim

		self.params = [
			self.W_xf, self.b_f,
			self.W_xz, self.b_z,
			self.W_xo, self.b_o,
		]

		if self.initial_weights is not None:
			self.set_weights(self.initial_weights)
			del self.initial_weights
Пример #12
0
def get_param_updates(params, grads, lr, method=None, **kwargs):
    rho = 0.95
    epsilon = 1e-6

    accumulators = [shared_zeros(p.get_value().shape) for p in params]
    updates=[]

    if 'constraint' in kwargs:
        constraint = kwargs['constraint']
    else:
        constraint = None

    if method == 'adadelta':
        print "Using ADADELTA"
        delta_accumulators = [shared_zeros(p.get_value().shape) for p in params]
        for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators):
            new_a = rho * a + (1 - rho) * g ** 2 # update accumulator

            # use the new accumulator and the *old* delta_accumulator
            update = g * T.sqrt(d_a + epsilon) / T.sqrt(new_a + epsilon)
            new_p = p - lr * update

            # update delta_accumulator
            new_d_a = rho * d_a + (1 - rho) * update ** 2

            updates.append((p, new_p))
            updates.append((a, new_a))
            updates.append((d_a, new_d_a))

    elif method == 'adagrad':
        print "Using ADAGRAD"
        for p, g, a in zip(params, grads, accumulators):
            new_a = a + g ** 2 # update accumulator

            new_p = p - lr * g / T.sqrt(new_a + epsilon)
            updates.append((p, new_p)) # apply constraints
            updates.append((a, new_a))

    elif method == 'momentum': # Default
        print "Using MOMENTUM"
        momentum = kwargs['momentum']
        for param, gparam in zip(params, grads):
            param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable)
            gparam_constrained = maxnorm_constraint(gparam)
            param_update_update = momentum*param_update + (1. - momentum)*gparam_constrained
            updates.append((param, param - param_update * lr))
            updates.append((param_update, param_update_update))

    else: # Default
        print "Using DEFAULT"
        for param, gparam in zip(params, grads):
            param_update = maxnorm_constraint(gparam)
            updates.append((param, param - param_update * lr))

    # apply constraints on self.weights update
    # assumes that updates[0] corresponds to self.weights param
    if constraint != None:
        updates[0] = (updates[0][0], constraint(updates[0][1]))

    return updates
Пример #13
0
    def set_params(self):

        dim = self.input_dim
        hdim = self.hidden_dim

        self.input = T.matrix()

        self.W_i = self.init((dim, hdim))
        self.U_i = self.inner_init((hdim, hdim))
        self.b_i = shared_zeros((hdim))

        self.W_f = self.init((dim, hdim))
        self.U_f = self.inner_init((hdim, hdim))
        self.b_f = self.forget_bias_init((hdim))

        self.W_c = self.init((dim, hdim))
        self.U_c = self.inner_init((hdim, hdim))
        self.b_c = shared_zeros((hdim))

        self.W_o = self.init((dim, hdim))
        self.U_o = self.inner_init((hdim, hdim))
        self.b_o = shared_zeros((hdim))

        self.W_x = self.init((hdim, dim))
        self.b_x = shared_zeros((dim))

        self.params = [
            self.W_i, self.U_i, self.b_i,
            self.W_c, self.U_c, self.b_c,
            self.W_f, self.U_f, self.b_f,
            self.W_o, self.U_o, self.b_o,
            self.W_x, self.b_x
        ]
Пример #14
0
    def build(self):
        input_dim = self.input_shape[2]
        self.input = T.tensor3()

        self.W_x2e = self.init((self.n_experts, input_dim, self.output_dim))
        self.W_x2g = self.init((input_dim, self.output_dim))

        self.b_x2e = shared_zeros((self.n_experts, self.output_dim))
        self.b_x2g = shared_zeros((self.output_dim))

        self.W_h2e = shared_zeros(
            (self.n_experts, self.output_dim, self.output_dim))

        scale = 0.05
        self.U_g = sharedX(
            np.random.uniform(low=-scale,
                              high=scale,
                              size=(self.output_dim, self.n_experts,
                                    self.output_dim)))

        self.params = [
            self.W_x2e, self.W_x2g, self.b_x2g, self.b_x2e, self.W_h2e,
            self.U_g
        ]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
Пример #15
0
    def build(self):
        input_dim = self.input_shape[2]

        def init_U(way = self.U_init):
            if way == "identity":
                return theano.shared(np.identity(self.output_dim).astype("float32")*0.6)
            if way == "orthogonal":
                return self.inner_init((self.output_dim, self.output_dim))
            if way == "uniform":
                return self.init((self.output_dim, self.output_dim))

        self.W1 = self.init((input_dim, self.output_dim))
        self.U1 = init_U() 

        self.W2 = self.init((self.output_dim, self.output_dim))
        self.U2 = init_U() 
        #self.V2 = theano.shared(np.zeros((self.output_dim, self.output_dim)).astype('float32'))
        self.V2 = self.init((self.output_dim, self.output_dim))

        self.b1 = shared_zeros((self.output_dim))
        self.b2 = shared_zeros((self.output_dim))

        self.params = [self.W1, self.U1] +\
                      [self.W2, self.U2, self.V2] +\
                      [self.b1, self.b2]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
Пример #16
0
	def build(self):
		input_dim = self.input_shape[2]
		self.input = T.tensor3()

		scale=0.05

		self.W_maxout = sharedX(np.random.uniform(low=-scale, high=scale, size=(self.n_opt, 2 , self.n_pieces)))
		self.b_maxout = shared_zeros((self.output_dim, self.n_opt, self.n_pieces))

		self.W_g = self.init((input_dim, self.output_dim))
		self.U_g = sharedX(np.random.uniform(low=-scale, high=scale, size=(self.output_dim, self.n_opt , self.output_dim)))
		self.b_g = shared_zeros((self.output_dim))

		self.W_c = self.init((input_dim, self.output_dim))
		self.U_c = self.inner_init((self.output_dim, self.output_dim))
		self.b_c = shared_zeros((self.output_dim))

		self.W_o = self.init((input_dim, self.output_dim))
		self.U_o = self.inner_init((self.output_dim, self.output_dim))
		self.b_o = shared_zeros((self.output_dim))

		self.params = [
			self.W_maxout, self.b_maxout,
			self.W_g, self.U_g, self.b_g,
			self.W_c, self.U_c, self.b_c,
			self.W_o, self.U_o, self.b_o,
		]

		if self.initial_weights is not None:
			self.set_weights(self.initial_weights)
			del self.initial_weights
Пример #17
0
    def build(self):
        input_dim = self.input_shape[2]
        self.input = T.tensor3()

        scale = 0.05
        self.W_maxout = sharedX(
            np.random.uniform(low=-scale, high=scale, size=(2, self.n_pieces)))
        self.b_maxout = shared_zeros(((self.output_dim, self.n_pieces)))

        self.W_c = self.init((input_dim, self.output_dim))
        self.U_c = self.inner_init((self.output_dim, self.output_dim))
        self.b_c = shared_zeros((self.output_dim))

        self.W_o = self.init((input_dim, self.output_dim))
        self.U_o = self.inner_init((self.output_dim, self.output_dim))
        self.b_o = shared_zeros((self.output_dim))

        self.params = [
            self.W_maxout,
            self.b_maxout,
            self.W_c,
            self.U_c,
            self.b_c,
            self.W_o,
            self.U_o,
            self.b_o,
        ]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
Пример #18
0
    def __init__(self, input_dim, states_dim, causes_dim,
                 init='glorot_uniform', inner_init='orthogonal',
                 activation='sigmoid', gate_activation='sigmoid',
                 weights=None, return_mode='states',
                 truncate_gradient=-1, return_sequences=False):
        super(FDPCN, self).__init__()
        self.init = initializations.get(init)
        self.inner_init = initializations.get(inner_init)
        self.input_dim = input_dim
        self.states_dim = states_dim
        self.causes_dim = causes_dim
        self.truncate_gradient = truncate_gradient
        self.activation = activations.get(activation)
        self.gate_activation = activations.get(gate_activation)
        self.return_sequences = return_sequences
        self.return_mode = return_mode
        self.input = T.tensor3()

        self.I2S = self.init((self.input_dim, self.states_dim))
        self.S2S = self.inner_init((self.states_dim, self.states_dim))
        self.Sb = shared_zeros((self.states_dim))

        self.S2C = self.init((self.states_dim, self.causes_dim))
        self.C2C = self.inner_init((self.causes_dim, self.causes_dim))
        self.Cb = shared_zeros((self.causes_dim))
        self.CbS = shared_zeros((self.states_dim))
        self.C2S = self.init((self.causes_dim, self.states_dim))
        self.params = [self.I2S, self.S2S, self.Sb,
                       self.C2S, self.C2C, self.Cb, self.S2C, self.CbS]

        if weights is not None:
            self.set_weights(weights)
Пример #19
0
    def __init__(self,
                 input_dim,
                 output_dim=128,
                 init='uniform',
                 inner_init='orthogonal',
                 activation='tanh',
                 inner_activation='hard_sigmoid',
                 weights=None,
                 truncate_gradient=-1,
                 return_sequences=False):

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.truncate_gradient = truncate_gradient
        self.return_sequences = return_sequences

        self.init = initializations.get(init)
        self.inner_init = initializations.get(inner_init)
        self.activation = activations.get(activation)
        self.inner_activation = activations.get(inner_activation)
        self.input = T.tensor3()

        self.W_i = self.init((self.input_dim, self.output_dim))
        self.U_i = self.inner_init((self.output_dim, self.output_dim))
        self.b_i = shared_zeros((self.output_dim))

        self.W_f = self.init((self.input_dim, self.output_dim))
        self.U_f = self.inner_init((self.output_dim, self.output_dim))
        self.b_f = shared_zeros((self.output_dim))

        self.W_c = self.init((self.input_dim, self.output_dim))
        self.U_c = self.inner_init((self.output_dim, self.output_dim))
        self.b_c = shared_zeros((self.output_dim))

        self.W_o = self.init((self.input_dim, self.output_dim))
        self.U_o = self.inner_init((self.output_dim, self.output_dim))
        self.b_o = shared_zeros((self.output_dim))

        self.params = [
            self.W_i,
            self.U_i,
            self.b_i,
            self.W_c,
            self.U_c,
            self.b_c,
            self.W_f,
            self.U_f,
            self.b_f,
            self.W_o,
            self.U_o,
            self.b_o,
        ]

        # C1, H1: starting C, H values
        self.C1 = T.matrix()
        self.H1 = T.matrix()

        if weights is not None:
            self.set_weights(weights)
Пример #20
0
    def __init__(self, periods, input_dim, output_dim=128,
        init= 'uniform', inner_init='glorot_normal',
        activation='softplus', inner_activation='hard_sigmoid',
        gate_activation= 'tanh',
        weights=None, truncate_gradient=-1, return_sequences=False):

        super(ClockworkSGU, self).__init__()
        self.periods = periods
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.truncate_gradient = truncate_gradient
        self.return_sequences = return_sequences

        self.init = initializations.get(init)
        self.inner_init = initializations.get(inner_init)
        self.activation = activations.get(activation)
        self.inner_activation = activations.get(inner_activation)
        self.gate_activation = activations.get(gate_activation)

        self.n = self.output_dim // len(self.periods)

        assert self.output_dim % len(self.periods) == 0

        self.input = TT.tensor3()

        self.W = self.init((self.input_dim, self.output_dim))
        self.b = shared_zeros((self.output_dim))

        self.W_gate = self.init((self.input_dim, self.output_dim))
        self.b_gate = shared_zeros((self.output_dim))


        self.clock_h = {}
        for i, period in enumerate(self.periods):
            self.clock_h[period] = self.inner_init((
                (i + 1) * self.n, self.n
            ))


        self.clock_gates = {}
        for i, period in enumerate(self.periods):
            self.clock_gates[period] = self.inner_init((
                (i + 1) * self.n, self.n

            ))


        self.params = [
            self.W, self.b,
            self.W_gate, self.b_gate,
        ]

        self.params.extend(self.clock_h.values())
        self.params.extend(self.clock_gates.values())


        if weights is not None:
            self.set_weights(weights)
Пример #21
0
    def build(self):
        input_dim = self.input_shape[2]
        self.input = T.tensor3()

        self.W_i = self.init((input_dim, self.output_dim))
        self.U_i = self.inner_init((self.output_dim, self.output_dim))
        self.b_i = shared_zeros((self.output_dim))

        self.W_f = self.init((input_dim, self.output_dim))
        self.U_f = self.inner_init((self.output_dim, self.output_dim))
        self.b_f = self.forget_bias_init((self.output_dim))

        self.W_c = self.init((input_dim, self.output_dim))
        self.U_c = self.inner_init((self.output_dim, self.output_dim))
        self.b_c = shared_zeros((self.output_dim))

        self.W_o = self.init((input_dim, self.output_dim))
        self.U_o = self.inner_init((self.output_dim, self.output_dim))
        self.b_o = shared_zeros((self.output_dim))

        self.params = [
            self.W_i,
            self.U_i,
            self.b_i,
            self.W_c,
            self.U_c,
            self.b_c,
            self.W_f,
            self.U_f,
            self.b_f,
            self.W_o,
            self.U_o,
            self.b_o,
        ]
        nw = len(
            self.initial_weights) if self.initial_weights is not None else 0
        if self.initial_state is not None:
            self.h = sharedX(self.initial_state[0])
            self.c = sharedX(self.initial_state[1])
            del self.initial_state
        elif self.batch_size is not None:
            self.h = shared_zeros((self.batch_size, self.output_dim))
            self.c = shared_zeros((self.batch_size, self.output_dim))
        elif self.initial_weights is not None:
            if nw == len(self.params) + 2:
                self.h = sharedX(self.initial_weights[-1])
                self.c = sharedX(self.initial_weights[-2])
                nw -= 2
            else:
                raise Exception("Hidden state not provided in weights")
        else:
            raise Exception(
                "One of the following arguments must be provided for stateful RNNs: hidden_state, batch_size, weights"
            )
        self.state = [self.h, self.c]
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights[:nw])
            del self.initial_weights
Пример #22
0
    def __init__(self, input_dim, output_dim, causes_dim,
                 hid2output,
                 init='glorot_uniform',
                 W_regularizer=None,
                 W_constraint=None,
                 b_regularizer=None,
                 b_constraint=None,
                 activation=lambda X: T.minimum(20, T.maximum(0, X)),
                 activity_regularizer=None,
                 truncate_gradient=-1,
                 weights=None, name=None,
                 return_mode='both',
                 return_sequences=True):
        super(GAE, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.causes_dim = causes_dim
        self.activation = activations.get(activation)
        self.init = initializations.get(init)
        self.truncate_gradient = truncate_gradient
        self.input = T.tensor3()
        self.return_mode = return_mode
        self.return_sequences = return_sequences

        self.V = self.init((input_dim, output_dim))
        self.U = self.init((input_dim, output_dim))
        self.W = self.init((output_dim, causes_dim))
        self.bo = shared_zeros((self.output_dim))
        self.bc = shared_zeros((self.causes_dim))

        self.params = [self.V, self.U, self.W]

        self.regularizers = []
        self.W_regularizer = regularizers.get(W_regularizer)
        if self.W_regularizer:
            self.W_regularizer.set_param(self.W)
            self.regularizers.append(self.W_regularizer)

        self.b_regularizer = regularizers.get(b_regularizer)
        if self.b_regularizer:
            self.b_regularizer.set_param(self.b)
            self.regularizers.append(self.b_regularizer)

        self.activity_regularizer = regularizers.get(activity_regularizer)
        if self.activity_regularizer:
            self.activity_regularizer.set_layer(self)
            self.regularizers.append(self.activity_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)
        self.constraints = [self.W_constraint, self.b_constraint]

        if weights is not None:
            self.set_weights(weights)

        if name is not None:
            self.set_name(name)
Пример #23
0
    def __init__(self, input_dim, output_dim, causes_dim,
                 hid2output,
                 init='glorot_uniform',
                 W_regularizer=None,
                 W_constraint=None,
                 b_regularizer=None,
                 b_constraint=None,
                 activation=lambda X: T.minimum(20, T.maximum(0, X)),
                 activity_regularizer=None,
                 truncate_gradient=-1,
                 weights=None, name=None,
                 return_mode='both',
                 return_sequences=True):
        super(GAE, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.causes_dim = causes_dim
        self.activation = activations.get(activation)
        self.init = initializations.get(init)
        self.truncate_gradient = truncate_gradient
        self.input = T.tensor3()
        self.return_mode = return_mode
        self.return_sequences = return_sequences

        self.V = self.init((input_dim, output_dim))
        self.U = self.init((input_dim, output_dim))
        self.W = self.init((output_dim, causes_dim))
        self.bo = shared_zeros((self.output_dim))
        self.bc = shared_zeros((self.causes_dim))

        self.params = [self.V, self.U, self.W]

        self.regularizers = []
        self.W_regularizer = regularizers.get(W_regularizer)
        if self.W_regularizer:
            self.W_regularizer.set_param(self.W)
            self.regularizers.append(self.W_regularizer)

        self.b_regularizer = regularizers.get(b_regularizer)
        if self.b_regularizer:
            self.b_regularizer.set_param(self.b)
            self.regularizers.append(self.b_regularizer)

        self.activity_regularizer = regularizers.get(activity_regularizer)
        if self.activity_regularizer:
            self.activity_regularizer.set_layer(self)
            self.regularizers.append(self.activity_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)
        self.constraints = [self.W_constraint, self.b_constraint]

        if weights is not None:
            self.set_weights(weights)

        if name is not None:
            self.set_name(name)
Пример #24
0
	def build(self):
		input_dim = self.input_shape[2]
		self.input = T.tensor3()



		self.W_g = self.init((input_dim, self.output_dim))
#		self.U_g = sharedX(np.random.uniform(low=-scale, high=scale, size=(self.output_dim, 6 , self.output_dim)))
		self.U_g = self.inner_init((self.output_dim, 6, self.output_dim))
		self.b_g = shared_zeros((self.output_dim))

		self.W_c = self.init((input_dim, self.output_dim))
		self.U_c = self.inner_init((self.output_dim, self.output_dim))
		self.b_c = shared_zeros((self.output_dim))

		self.W_o = self.init((input_dim, self.output_dim))
		self.U_o = self.inner_init((self.output_dim, self.output_dim))
		self.b_o = shared_zeros((self.output_dim))

		self.EPS = 1e-10

		scalar_init = 1
		scale=0.01

#		self.k_parameters = shared_ones((11,))
		self.k_parameters = sharedX(np.random.uniform(low=scalar_init-scale, high=scalar_init+scale, size=(11, )))
		# self.sigma_se = shared_scalar(scalar_init)
		# self.sigma_per = shared_scalar(scalar_init)
		# self.sigma_b_lin = shared_scalar(scalar_init)
		# self.sigma_v_lin = shared_scalar(scalar_init)
		# self.sigma_rq = shared_scalar(scalar_init)

		# self.l_se = shared_scalar(scalar_init)
		# self.l_per = shared_scalar(scalar_init)
		# self.l_lin = shared_scalar(scalar_init)
		# self.l_rq = shared_scalar(scalar_init)

		# self.alpha_rq = shared_scalar(scalar_init)
		# self.p_per = shared_scalar(scalar_init)

		self.params = [
			self.k_parameters,
#			self.sigma_se, self.sigma_per, self.sigma_b_lin, self.sigma_v_lin,self.sigma_rq,
#			self.l_se, self.l_per, self.l_lin, self.l_rq,
#			self.alpha_rq, self.p_per,
			self.W_g, self.U_g, self.b_g,
			self.W_c, self.U_c, self.b_c,
			self.W_o, self.U_o, self.b_o,
		]

		if self.initial_weights is not None:
			self.set_weights(self.initial_weights)
			del self.initial_weights
Пример #25
0
    def get_updates(self, params, grads, method=None, **kwargs):
        self.rho = 0.95
        self.epsilon = 1e-6

        accumulators = [shared_zeros(p.get_value().shape) for p in params]
        updates = []

        if method == 'adadelta':
            print "Using ADADELTA"
            delta_accumulators = [
                shared_zeros(p.get_value().shape) for p in params
            ]
            for p, g, a, d_a in zip(params, grads, accumulators,
                                    delta_accumulators):
                new_a = self.rho * a + (1 -
                                        self.rho) * g**2  # update accumulator
                updates.append((a, new_a))

                # use the new accumulator and the *old* delta_accumulator
                update = g * T.sqrt(d_a + self.epsilon) / T.sqrt(new_a +
                                                                 self.epsilon)

                new_p = p - self.lr * update
                updates.append((p, new_p))  # apply constraints

                # update delta_accumulator
                new_d_a = self.rho * d_a + (1 - self.rho) * update**2
                updates.append((d_a, new_d_a))

        elif method == 'adam':
            # unimplemented
            print "Using ADAM"

        elif method == 'adagrad':
            print "Using ADAGRAD"
            for p, g, a in zip(params, grads, accumulators):
                new_a = a + g**2  # update accumulator
                updates.append((a, new_a))

                new_p = p - self.lr * g / T.sqrt(new_a + self.epsilon)
                updates.append((p, new_p))  # apply constraints

        else:  # Default
            print "Using MOMENTUM"
            l_rate = kwargs['l_rate']
            for param, gparam in zip(params, gradient):
                param_update = theano.shared(param.get_value() * 0.,
                                             broadcastable=param.broadcastable)
                updates.append((param, param - param_update * l_rate))
                updates.append((param_update, self.momentum * param_update +
                                (1. - self.momentum) * gparam))

        return updates
Пример #26
0
    def build(self):
        self.input = T.tensor4()

        if self.inner_rnn == 'gru':
            self.enc = GRU(
                input_length=self.n_steps,
                input_dim=self._input_shape[0]*2*self.N_enc**2 + self.output_dim,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
            self.dec = GRU(
                input_length=self.n_steps,
                input_dim=self.code_dim, output_dim=self.output_dim,
                init=self.init,
                inner_init=self.inner_init)

        elif self.inner_rnn == 'lstm':
            self.enc = LSTM(
                input_length=self.n_steps,
                input_dim=self._input_shape[0]*2*self.N_enc**2 + self.output_dim,
                output_dim=self.output_dim, init=self.init, inner_init=self.inner_init)
            self.dec = LSTM(
                input_length=self.n_steps,
                input_dim=self.code_dim, output_dim=self.output_dim,
                init=self.init, inner_init=self.inner_init)
        else:
            raise ValueError('This type of inner_rnn is not supported')

        self.enc.build()
        self.dec.build()

        self.init_canvas = shared_zeros(self._input_shape)  # canvas and hidden state
        self.init_h_enc = shared_zeros((self.output_dim))  # initial values
        self.init_h_dec = shared_zeros((self.output_dim))  # should be trained
        self.L_enc = self.enc.init((self.output_dim, 5))  # "read" attention parameters (eq. 21)
        self.L_dec = self.enc.init((self.output_dim, 5))  # "write" attention parameters (eq. 28)
        self.b_enc = shared_zeros((5))  # "read" attention parameters (eq. 21)
        self.b_dec = shared_zeros((5))  # "write" attention parameters (eq. 28)
        self.W_patch = self.enc.init((self.output_dim, self.N_dec**2*self._input_shape[0]))
        self.b_patch = shared_zeros((self.N_dec**2*self._input_shape[0]))
        self.W_mean = self.enc.init((self.output_dim, self.code_dim))
        self.W_sigma = self.enc.init((self.output_dim, self.code_dim))
        self.b_mean = shared_zeros((self.code_dim))
        self.b_sigma = shared_zeros((self.code_dim))
        self.trainable_weights = self.enc.trainable_weights + self.dec.trainable_weights + [
            self.L_enc, self.L_dec, self.b_enc, self.b_dec, self.W_patch,
            self.b_patch, self.W_mean, self.W_sigma, self.b_mean, self.b_sigma,
            self.init_canvas, self.init_h_enc, self.init_h_dec]

        if self.inner_rnn == 'lstm':
            self.init_cell_enc = shared_zeros((self.output_dim))     # initial values
            self.init_cell_dec = shared_zeros((self.output_dim))     # should be trained
            self.trainable_weights = self.trainable_weights + [self.init_cell_dec, self.init_cell_enc]
Пример #27
0
    def build(self):
        self.input = T.tensor4()

        if self.inner_rnn == 'gru':
            self.enc = GRU(
                input_length=self.n_steps,
                input_dim=self._input_shape[0]*2*self.N_enc**2 + self.output_dim,
                output_dim=self.output_dim, init=self.init,
                inner_init=self.inner_init)
            self.dec = GRU(
                input_length=self.n_steps,
                input_dim=self.code_dim, output_dim=self.output_dim,
                init=self.init,
                inner_init=self.inner_init)

        elif self.inner_rnn == 'lstm':
            self.enc = LSTM(
                input_length=self.n_steps,
                input_dim=self._input_shape[0]*2*self.N_enc**2 + self.output_dim,
                output_dim=self.output_dim, init=self.init, inner_init=self.inner_init)
            self.dec = LSTM(
                input_length=self.n_steps,
                input_dim=self.code_dim, output_dim=self.output_dim,
                init=self.init, inner_init=self.inner_init)
        else:
            raise ValueError('This type of inner_rnn is not supported')

        self.enc.build()
        self.dec.build()

        self.init_canvas = shared_zeros(self._input_shape)  # canvas and hidden state
        self.init_h_enc = shared_zeros((self.output_dim))  # initial values
        self.init_h_dec = shared_zeros((self.output_dim))  # should be trained
        self.L_enc = self.enc.init((self.output_dim, 5))  # "read" attention parameters (eq. 21)
        self.L_dec = self.enc.init((self.output_dim, 5))  # "write" attention parameters (eq. 28)
        self.b_enc = shared_zeros((5))  # "read" attention parameters (eq. 21)
        self.b_dec = shared_zeros((5))  # "write" attention parameters (eq. 28)
        self.W_patch = self.enc.init((self.output_dim, self.N_dec**2*self._input_shape[0]))
        self.b_patch = shared_zeros((self.N_dec**2*self._input_shape[0]))
        self.W_mean = self.enc.init((self.output_dim, self.code_dim))
        self.W_sigma = self.enc.init((self.output_dim, self.code_dim))
        self.b_mean = shared_zeros((self.code_dim))
        self.b_sigma = shared_zeros((self.code_dim))
        self.trainable_weights = self.enc.trainable_weights + self.dec.trainable_weights + [
            self.L_enc, self.L_dec, self.b_enc, self.b_dec, self.W_patch,
            self.b_patch, self.W_mean, self.W_sigma, self.b_mean, self.b_sigma,
            self.init_canvas, self.init_h_enc, self.init_h_dec]

        if self.inner_rnn == 'lstm':
            self.init_cell_enc = shared_zeros((self.output_dim))     # initial values
            self.init_cell_dec = shared_zeros((self.output_dim))     # should be trained
            self.trainable_weights = self.trainable_weights + [self.init_cell_dec, self.init_cell_enc]
    def __init__(self,
                 input_dim,
                 output_dim=128,
                 init='glorot_uniform',
                 inner_init='orthogonal',
                 forget_bias_init='one',
                 activation='tanh',
                 inner_activation='hard_sigmoid',
                 weights=None,
                 truncate_gradient=-1,
                 return_sequences=False):

        super(LangLSTMLayerV0, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.truncate_gradient = truncate_gradient
        self.return_sequences = return_sequences

        self.init = initializations.get(init)
        self.inner_init = initializations.get(inner_init)
        self.forget_bias_init = initializations.get(forget_bias_init)
        self.activation = activations.get(activation)
        self.inner_activation = activations.get(inner_activation)
        self.input = T.tensor3()

        self.W_i = self.init((self.input_dim, self.output_dim))
        self.U_i = self.inner_init((self.output_dim, self.output_dim))
        self.b_i = shared_zeros(self.output_dim)

        self.W_f = self.init((self.input_dim, self.output_dim))
        self.U_f = self.inner_init((self.output_dim, self.output_dim))
        self.b_f = self.forget_bias_init(self.output_dim)

        self.W_c = self.init((self.input_dim, self.output_dim))
        self.U_c = self.inner_init((self.output_dim, self.output_dim))
        self.b_c = shared_zeros(self.output_dim)

        self.W_o = self.init((self.input_dim, self.output_dim))
        self.U_o = self.inner_init((self.output_dim, self.output_dim))
        self.b_o = shared_zeros(self.output_dim)

        self.h00 = shared_zeros(shape=(1, self.output_dim))

        self.params = [
            self.W_i, self.U_i, self.b_i, self.W_c, self.U_c, self.b_c,
            self.W_f, self.U_f, self.b_f, self.W_o, self.U_o, self.b_o,
            self.h00
        ]

        if weights is not None:
            self.set_weights(weights)
Пример #29
0
    def build(self):
        input_dim = self.input_shape[2]

        def init_U(way = self.U_init, n = 6):
            U_ = np.zeros((self.output_dim, self.output_dim * n)).astype("float32")
            for k in xrange(n):
                if way == "identity":
                    U_[:, k*self.output_dim: (k+1)*self.output_dim] = np.identity(self.output_dim).astype("float32")*0.95
                if way == "orthogonal":
                    U = self.inner_init((self.output_dim, self.output_dim)).get_value()
                    U_[:, k*self.output_dim: (k+1)*self.output_dim] = U
                if way == "uniform":
                    U = self.init((self.output_dim, self.output_dim), self.v_init).get_value()
                    U_[:, k*self.output_dim: (k+1)*self.output_dim] = U
            return U_

        # U is a big matrix for all the hidden layers
        # for each hidden layer, U = [U_f, U_i, U_o, U_c]
        U = np.zeros((self.output_dim*(self.dp-1), self.output_dim*4)).astype('float32')
        for i in xrange(self.dp-1):
            U[i*self.output_dim:(i+1)*self.output_dim, :] = init_U(n=4)
        self.U = theano.shared(U)
        self.b = shared_zeros((self.dp-1, self.output_dim*4))
        b = np.zeros((self.dp-1, self.output_dim*4), dtype = "float32")
        #############important###########set b#########
        b[:, 0:3*self.output_dim] = 5*np.ones((self.dp-1, 3*self.output_dim), dtype = "float32")
        b[:, 0:1*self.output_dim] = -5*np.ones((self.dp-1, self.output_dim), dtype = "float32")
        self.b.set_value(b)

        # U_1 is a big matrix for the low states:
        # U for hid-hid, W for in-hid, V for skew-top-down.
        # [  W_f_u,  W_i, W_o, W_c;
        #    U_f_u,  U_i, U_o, U_c; 
        #    V_f_u,  V_i, V_o, V_c ]
        self.W1 = self.init((input_dim, self.output_dim *4), self.v_init)
        self.U1 = self.init((self.output_dim, self.output_dim * 4), self.v_init)
        self.U1.set_value(init_U(n=4))

        self.b1 = shared_zeros((self.output_dim*4))

        # initialize b so that b for U_f_w and V_f_w be -k,  U_f_u be 1.
        b = np.zeros((self.output_dim*4), dtype = "float32")
        b[0:self.output_dim] = np.ones((self.output_dim), dtype = "float32")
        self.b1.set_value(b)

        self.params = [self.U, self.b, self.W1, self.U1, self.b1]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
Пример #30
0
    def __init__(self, input_dim, hidden_dim, init='glorot_uniform', weights=None, name=None,
        W_regularizer=None, bx_regularizer=None, bh_regularizer=None, #activity_regularizer=None,
        W_constraint=None, bx_constraint=None, bh_constraint=None):

        super(RBM, self).__init__()
        self.init = initializations.get(init)
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim

        self.input = T.matrix()
        self.W = self.init((self.input_dim, self.hidden_dim))
        self.bx = shared_zeros((self.input_dim))
        self.bh = shared_zeros((self.hidden_dim))

        self.params = [self.W, self.bx, self.bh]

        self.regularizers = []

        self.W_regularizer = regularizers.get(W_regularizer)
        if self.W_regularizer:
            self.W_regularizer.set_param(self.W)
            self.regularizers.append(self.W_regularizer)

        self.bx_regularizer = regularizers.get(bx_regularizer)
        if self.bx_regularizer:
            self.bx_regularizer.set_param(self.bx)
            self.regularizers.append(self.bx_regularizer)

        self.bh_regularizer = regularizers.get(bh_regularizer)
        if self.bh_regularizer:
            self.bh_regularizer.set_param(self.bh)
            self.regularizers.append(self.bh_regularizer)

        #self.activity_regularizer = regularizers.get(activity_regularizer)
        #if self.activity_regularizer:
        #    self.activity_regularizer.set_layer(self)
        #    self.regularizers.append(self.activity_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.bx_constraint = constraints.get(bx_constraint)
        self.bh_constraint = constraints.get(bh_constraint)
        self.constraints = [self.W_constraint, self.bx_constraint, self.bh_constraint]

        if weights is not None:
            self.set_weights(weights)

        if name is not None:
            self.set_name(name)

        self.srng = RandomStreams(seed=np.random.randint(10e6))
Пример #31
0
    def __init__(self, filter_shape, init_mode = 'glorot_uniform', w_shared = True, n_inputs = 1, regularizers = None, constraints = None):
        self.name = self.__class__.__name__
        self.init = initializations.get(init_mode)
        self.w_shared = w_shared
        self.filter_shape = filter_shape

        self.params_dict = OrderedDict( \
            [('W', [self.init(filter_shape)] if w_shared \
                   else [self.init(filter_shape) for i in xrange(n_inputs)]),
             ('b', [shared_zeros((filter_shape[1],))] if w_shared \
                   else [shared_zeros((filter_shape[1],)) for i in xrange(n_inputs)])])
        self.params = [param for sublist in self.params_dict.values() for param in sublist]
        self.set_constraints(constraints)
        self.set_regularizers(regularizers)
Пример #32
0
    def __init__(self,
                 nb_filter,
                 stack_size,
                 filter_length,
                 init='glorot_uniform',
                 activation='linear',
                 weights=None,
                 image_shape=None,
                 border_mode='valid',
                 subsample_length=1):
        super(Convolution1D, self).__init__()

        nb_row = 1
        nb_col = filter_length
        subsample = (1, subsample_length)
        self.init = initializations.get(init)
        self.activation = activations.get(activation)
        self.subsample = subsample
        self.border_mode = border_mode
        self.image_shape = image_shape
        self.nb_filter = nb_filter
        self.stack_size = stack_size

        self.input = T.tensor4()
        self.W_shape = (nb_filter, stack_size, nb_row, nb_col)
        self.W = self.init(self.W_shape)
        self.b = shared_zeros((nb_filter, ))

        self.params = [self.W, self.b]

        if weights is not None:
            self.set_weights(weights)
    def build(self):
        input_dim = self.input_shape[2]

        self.input = T.matrix()
        self.W = self.init((input_dim, self.output_dim))
        self.b = shared_zeros((self.output_dim,))

        self.params = [self.W, self.b]

        self.regularizers = []
        if self.W_regularizer:
            self.W_regularizer.set_param(self.W)
            self.regularizers.append(self.W_regularizer)

        if self.b_regularizer:
            self.b_regularizer.set_param(self.b)
            self.regularizers.append(self.b_regularizer)

        if self.activity_regularizer:
            self.activity_regularizer.set_layer(self)
            self.regularizers.append(self.activity_regularizer)

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
    def build(self):
        input_shape = self.input_shape
        input_dim = input_shape[2] # 嵌入维度
        self.e0 = self.init((input_dim,)) # 句子开头
        #print 'e0.type:', e0.type
        #self.e0 = e0.dimshuffle('x', 0, 1) # 样本维可广播
        #print 'self.e0.type:', self.e0.type
        self.c0 = self.init((self.context_dim,))
        #self.c0 = c0.dimshuffle('x', 0, 1)
        self.en = self.init((input_dim,)) # 句子结尾
        #self.en = en.dimshuffle('x', 0, 1)
        self.cn = self.init((self.context_dim,))
        #self.cn = cn.dimshuffle('x', 0, 1)

        self.Wl = self.init((self.context_dim, self.context_dim))
        self.Wr = self.init((self.context_dim, self.context_dim))
        self.Wsl = self.init((input_dim, self.context_dim))
        self.Wsr = self.init((input_dim, self.context_dim))
        self.W2 = self.init((input_dim + 2*self.context_dim, self.output_dim))
        self.b2 = shared_zeros((self.output_dim),)

        self.params = [self.e0, self.c0,
                       self.en, self.cn,
                       self.Wl, self.Wr,
                       self.Wsl, self.Wsr,
                       self.W2, self.b2]
Пример #35
0
    def __init__(self, input_dim, hidden_dim, init='glorot_uniform', activation='linear', weights=None):
        nvis = input_dim
        nhid = hidden_dim
        W_shape = nhid,nvis
        lim=np.sqrt(6./(2*nvis+1))
        W_init=np.random.uniform(-lim,lim,W_shape)
        W=theano.shared(W_init)

        hbias=theano.shared(np.zeros((nhid,1)),broadcastable=[False,True])

        self.init = initializations.get(init)
        self.activation = activations.get(activation)
        self.input_dim = input_dim
        
        self.hidden_dim = hidden_dim
        self.output_dim = input_dim

        self.input = T.matrix()

        #maybe need to replace the initialization function

        self.W = self.init((self.input_dim, self.hidden_dim))
        self.b = shared_zeros((self.hidden_dim))
        #self.b_tilde = shared_zeros((self.input_dim))

        self.params = [self.W, self.b]

        if weights is not None:
            self.set_weights(weights)
Пример #36
0
    def build(self):
        input_dim = self.input_shape[2]

        self.input = T.matrix()
        self.W = self.init((input_dim, self.output_dim))
        self.b = shared_zeros((self.output_dim, ))

        self.params = [self.W, self.b]

        self.regularizers = []
        if self.W_regularizer:
            self.W_regularizer.set_param(self.W)
            self.regularizers.append(self.W_regularizer)

        if self.b_regularizer:
            self.b_regularizer.set_param(self.b)
            self.regularizers.append(self.b_regularizer)

        if self.activity_regularizer:
            self.activity_regularizer.set_layer(self)
            self.regularizers.append(self.activity_regularizer)

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
Пример #37
0
    def __init__(self,
                 input_dim,
                 hidden_dim,
                 init='glorot_uniform',
                 activation='linear',
                 weights=None):
        nvis = input_dim
        nhid = hidden_dim
        W_shape = nhid, nvis
        lim = np.sqrt(6. / (2 * nvis + 1))
        W_init = np.random.uniform(-lim, lim, W_shape)
        W = theano.shared(W_init)

        hbias = theano.shared(np.zeros((nhid, 1)), broadcastable=[False, True])

        self.init = initializations.get(init)
        self.activation = activations.get(activation)
        self.input_dim = input_dim

        self.hidden_dim = hidden_dim
        self.output_dim = input_dim

        self.input = T.matrix()

        #maybe need to replace the initialization function

        self.W = self.init((self.input_dim, self.hidden_dim))
        self.b = shared_zeros((self.hidden_dim))
        #self.b_tilde = shared_zeros((self.input_dim))

        self.params = [self.W, self.b]

        if weights is not None:
            self.set_weights(weights)
Пример #38
0
    def build(self):
        input_dim = self.input_shape[2]
        self.input = T.tensor3()

        self.W_i = self.init((input_dim, self.output_dim))
        self.U_i = self.inner_init((self.output_dim, self.output_dim))
        self.b_i = shared_zeros((self.output_dim))

        self.W_f = self.init((input_dim, self.output_dim))
        self.U_f = self.inner_init((self.output_dim, self.output_dim))
        self.b_f = self.forget_bias_init((self.output_dim))

        self.W_c = self.init((input_dim, self.output_dim))
        self.U_c = self.inner_init((self.output_dim, self.output_dim))
        self.b_c = shared_zeros((self.output_dim))

        self.W_o = self.init((input_dim, self.output_dim))
        self.U_o = self.inner_init((self.output_dim, self.output_dim))
        self.b_o = shared_zeros((self.output_dim))

        self.params = [
            self.W_i, self.U_i, self.b_i,
            self.W_c, self.U_c, self.b_c,
            self.W_f, self.U_f, self.b_f,
            self.W_o, self.U_o, self.b_o,
        ]
        nw = len(self.initial_weights) if self.initial_weights is not None else 0
        if self.initial_state is not None:
            self.h = sharedX(self.initial_state[0])
            self.c = sharedX(self.initial_state[1])
            del self.initial_state
        elif self.batch_size is not None:
            self.h = shared_zeros((self.batch_size, self.output_dim))
            self.c = shared_zeros((self.batch_size, self.output_dim))                
        elif self.initial_weights is not None:
            if nw == len(self.params) + 2:
                self.h = sharedX(self.initial_weights[-1])
                self.c = sharedX(self.initial_weights[-2])
                nw -= 2
            else:
                raise Exception("Hidden state not provided in weights")
        else:
            raise Exception("One of the following arguments must be provided for stateful RNNs: hidden_state, batch_size, weights")
        self.state = [self.h, self.c]
        if self.initial_weights is not None:
            self.set_weights(self.initial_weights[:nw])
            del self.initial_weights
Пример #39
0
    def get_updates(self, params, grads, method=None, **kwargs):
        self.rho = 0.95
        self.epsilon = 1e-6

        accumulators = [shared_zeros(p.get_value().shape) for p in params]
        updates=[]

        if method == 'adadelta':
            print "Using ADADELTA"
            delta_accumulators = [shared_zeros(p.get_value().shape) for p in params]
            for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators):
                new_a = self.rho * a + (1 - self.rho) * g ** 2 # update accumulator
                updates.append((a, new_a))

                # use the new accumulator and the *old* delta_accumulator
                update = g * T.sqrt(d_a + self.epsilon) / T.sqrt(new_a + self.epsilon)

                new_p = p - self.lr * update
                updates.append((p, new_p)) # apply constraints

                # update delta_accumulator
                new_d_a = self.rho * d_a + (1 - self.rho) * update ** 2
                updates.append((d_a, new_d_a))


        elif method == 'adam':
            # unimplemented
            print "Using ADAM"

        elif method == 'adagrad':
            print "Using ADAGRAD"
            for p, g, a in zip(params, grads, accumulators):
                new_a = a + g ** 2 # update accumulator
                updates.append((a, new_a))

                new_p = p - self.lr * g / T.sqrt(new_a + self.epsilon)
                updates.append((p, new_p)) # apply constraints

        else: # Default
            print "Using MOMENTUM"
            l_rate = kwargs['l_rate']
            for param, gparam in zip(params, gradient):
                param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable)
                updates.append((param, param - param_update * l_rate))
                updates.append((param_update, self.momentum*param_update + (1. - self.momentum)*gparam))

        return updates
Пример #40
0
 def __init__(self, *args, **kwargs):
     super(ProdTensor, self).__init__(*args, **kwargs)
     self.W = self.init((self.input_dim, self.output_dim))
     self.C = self.init((self.causes_dim, self.output_dim))
     self.b0 = shared_zeros((self.output_dim))
     self.params[0] = self.W
     self.params[1] = self.C
     self.params = self.params + [self.b0, ]
Пример #41
0
    def __init__(self, input_dim, output_dim=128,
                 init='glorot_uniform', inner_init='orthogonal', forget_bias_init='one',
                 activation='tanh', inner_activation='hard_sigmoid',
                 weights=None, truncate_gradient=-1, return_sequences=False):

        super(LangLSTMLayerV0, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.truncate_gradient = truncate_gradient
        self.return_sequences = return_sequences

        self.init = initializations.get(init)
        self.inner_init = initializations.get(inner_init)
        self.forget_bias_init = initializations.get(forget_bias_init)
        self.activation = activations.get(activation)
        self.inner_activation = activations.get(inner_activation)
        self.input = T.tensor3()

        self.W_i = self.init((self.input_dim, self.output_dim))
        self.U_i = self.inner_init((self.output_dim, self.output_dim))
        self.b_i = shared_zeros(self.output_dim)

        self.W_f = self.init((self.input_dim, self.output_dim))
        self.U_f = self.inner_init((self.output_dim, self.output_dim))
        self.b_f = self.forget_bias_init(self.output_dim)

        self.W_c = self.init((self.input_dim, self.output_dim))
        self.U_c = self.inner_init((self.output_dim, self.output_dim))
        self.b_c = shared_zeros(self.output_dim)

        self.W_o = self.init((self.input_dim, self.output_dim))
        self.U_o = self.inner_init((self.output_dim, self.output_dim))
        self.b_o = shared_zeros(self.output_dim)

        self.h00 = shared_zeros(shape=(1, self.output_dim))

        self.params = [
            self.W_i, self.U_i, self.b_i,
            self.W_c, self.U_c, self.b_c,
            self.W_f, self.U_f, self.b_f,
            self.W_o, self.U_o, self.b_o,
            self.h00
        ]

        if weights is not None:
            self.set_weights(weights)
Пример #42
0
    def __init__(self, weights):
        super(FixedEmbedding, self).__init__()
        self.input_dim, self.output_dim = weights.shape

        self.input = T.imatrix()
        self.W = shared_zeros((self.input_dim, self.output_dim))
        self.W.set_value(weights)
        self.params = []
Пример #43
0
 def __init__(self, *args, **kwargs):
     super(ProdTensor, self).__init__(*args, **kwargs)
     self.W = self.init((self.input_dim, self.output_dim))
     self.C = self.init((self.causes_dim, self.output_dim))
     self.b0 = shared_zeros((self.output_dim))
     self.params[0] = self.W
     self.params[1] = self.C
     self.params = self.params + [self.b0, ]
Пример #44
0
    def __init__(self, input_dim, output_dim=128, 
        init='uniform', inner_init='orthogonal', 
        activation='tanh', inner_activation='hard_sigmoid',
        weights=None, truncate_gradient=-1, return_sequences=False):

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.truncate_gradient = truncate_gradient
        self.return_sequences = return_sequences

        self.init = initializations.get(init)
        self.inner_init = initializations.get(inner_init)
        self.activation = activations.get(activation)
        self.inner_activation = activations.get(inner_activation)
        self.input = T.tensor3()

        self.W_i = self.init((self.input_dim, self.output_dim))
        self.U_i = self.inner_init((self.output_dim, self.output_dim))
        self.b_i = shared_zeros((self.output_dim))

        self.W_f = self.init((self.input_dim, self.output_dim))
        self.U_f = self.inner_init((self.output_dim, self.output_dim))
        self.b_f = shared_zeros((self.output_dim))

        self.W_c = self.init((self.input_dim, self.output_dim))
        self.U_c = self.inner_init((self.output_dim, self.output_dim))
        self.b_c = shared_zeros((self.output_dim))

        self.W_o = self.init((self.input_dim, self.output_dim))
        self.U_o = self.inner_init((self.output_dim, self.output_dim))
        self.b_o = shared_zeros((self.output_dim))

        self.params = [
            self.W_i, self.U_i, self.b_i,
            self.W_c, self.U_c, self.b_c,
            self.W_f, self.U_f, self.b_f,
            self.W_o, self.U_o, self.b_o,
        ]

        # C1, H1: starting C, H values
        self.C1 = T.matrix()
        self.H1 = T.matrix()

        if weights is not None:
            self.set_weights(weights)
Пример #45
0
    def build(self):
        input_dim = self.input_shape[2]
        self.input = T.tensor3()

        self.W_sum = self.init((input_dim, self.output_dim))
        self.U_sum = self.inner_init((self.output_dim, self.output_dim))
        self.b_sum = shared_zeros((self.output_dim))

        self.W_i = self.init((input_dim, self.output_dim))
        self.U_i = self.inner_init((self.output_dim, self.output_dim))
        self.b_i = shared_zeros((self.output_dim))

        self.W_f = self.init((input_dim, self.output_dim))
        self.U_f = self.inner_init((self.output_dim, self.output_dim))
        self.b_f = self.forget_bias_init((self.output_dim))

        self.W_c = self.init((input_dim, self.output_dim))
        self.U_c = self.inner_init((self.output_dim, self.output_dim))
        self.b_c = shared_zeros((self.output_dim))

        self.W_o = self.init((input_dim, self.output_dim))
        self.U_o = self.inner_init((self.output_dim, self.output_dim))
        self.b_o = shared_zeros((self.output_dim))

        self.params = [
            self.W_sum,
            self.U_sum,
            self.b_sum,
            self.W_i,
            self.U_i,
            self.b_i,
            self.W_c,
            self.U_c,
            self.b_c,
            self.W_f,
            self.U_f,
            self.b_f,
            self.W_o,
            self.U_o,
            self.b_o,
        ]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
Пример #46
0
    def build(self):
        input_dim = self.input_shape[2]
        self.input = T.tensor3()

        self.W_x2e = self.init((self.n_experts, input_dim, self.output_dim))
        self.W_e2e = self.init((self.output_dim, self.output_dim))
        self.b_x2e = shared_zeros((self.n_experts, self.output_dim))

        self.W_x2g = self.init((input_dim, self.output_dim))
        self.b_x2g = shared_zeros((self.output_dim))

        self.U_g = self.init((self.output_dim, self.n_experts, self.output_dim))

        self.params = [self.W_x2e, self.W_e2e, self.b_x2e, self.W_x2g, self.b_x2g, self.U_g]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
Пример #47
0
    def __init__(self, n_channels, batch_size=30):
        self.n_channels = n_channels
        self.batch_size = batch_size

        self.conv1_W = initializations.uniform((96, n_channels, 7,7))
        self.conv1_b = shared_zeros((96,))

        self.conv2_W = initializations.uniform((256,96,5,5))
        self.conv2_b = shared_zeros((256,))

        self.conv3_W = initializations.uniform((512,256,3,3))
        self.conv3_b = shared_zeros((512,))

        self.conv4_W = initializations.uniform((512,512,3,3))
        self.conv4_b = shared_zeros((512,))

        self.conv5_W = initializations.uniform((512,512,3,3))
        self.conv5_b = shared_zeros((512,))
    def __init__(self, input_dim, output_dim, init='glorot_uniform', activation='linear', weights=None, name=None,
                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
                 W_constraint=None, b_constraint=None, corruption_level=0.0):

        super(DAE, self).__init__()
        self.srng = RandomStreams(seed=np.random.randint(10e6))
        self.init = initializations.get(init)
        self.activation = activations.get(activation)
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.corruption_level = corruption_level

        self.input = T.matrix()
        self.W = self.init((self.input_dim, self.output_dim))
        self.b = shared_zeros((self.output_dim))
        self.bT = shared_zeros((self.input_dim))

        self.params = [self.W, self.b, self.bT]

        self.regularizers = []
        self.W_regularizer = regularizers.get(W_regularizer)
        if self.W_regularizer:
            self.W_regularizer.set_param(self.W)
            self.regularizers.append(self.W_regularizer)

        self.b_regularizer = regularizers.get(b_regularizer)
        if self.b_regularizer:
            self.b_regularizer.set_param(self.b)
            self.regularizers.append(self.b_regularizer)

        self.activity_regularizer = regularizers.get(activity_regularizer)
        if self.activity_regularizer:
            self.activity_regularizer.set_layer(self)
            self.regularizers.append(self.activity_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)
        self.constraints = [self.W_constraint, self.b_constraint]

        if weights is not None:
            self.set_weights(weights)

        if name is not None:
            self.set_name(name)
Пример #49
0
    def __init__(self, input_dim, output_dim=128, mem=None,
                 mem_dim=128, init='glorot_uniform', inner_init='orthogonal',
                 activation='sigmoid', inner_activation='hard_sigmoid',
                 weights=None, truncate_gradient=-1, return_sequences=False,
                 return_mode='states'):

        super(GRUM, self).__init__(input_dim, output_dim, init=init,
                                   inner_init=inner_init, activation=activation,
                                   inner_activation=inner_activation,
                                   truncate_gradient=truncate_gradient,
                                   return_sequences=return_sequences)
        if mem is None:
            self.mem = shared_zeros((1, mem_dim))
        else:
            self.mem = mem
        self.mem_dim = mem_dim
        self.return_mode = return_mode

        self.Hm_z = self.init((self.mem_dim, self.output_dim))
        self.Hm_r = self.init((self.mem_dim, self.output_dim))
        self.Hm_h = self.init((self.mem_dim, self.output_dim))

        self.Wm_z = self.init((self.input_dim, self.mem_dim))
        self.Um_z = self.inner_init((self.mem_dim, self.mem_dim))
        self.Vm_z = self.inner_init((self.output_dim, self.mem_dim))
        self.bm_z = shared_zeros((self.mem_dim))

        self.Wm_r = self.init((self.input_dim, self.mem_dim))
        self.Um_r = self.inner_init((self.mem_dim, self.mem_dim))
        self.Vm_r = self.inner_init((self.output_dim, self.mem_dim))
        self.bm_r = shared_zeros((self.mem_dim))

        self.Wm_h = self.init((self.input_dim, self.mem_dim))
        self.Um_h = self.inner_init((self.mem_dim, self.mem_dim))
        self.Vm_h = self.inner_init((self.mem_dim, self.mem_dim))
        self.bm_h = shared_zeros((self.mem_dim))

        self.params = self.params + [
            self.Hm_z, self.Hm_r, self.Hm_h,
            self.Wm_z, self.Um_z, self.bm_z,
            self.Wm_r, self.Um_r, self.bm_r,
            self.Wm_h, self.Um_h, self.bm_h,
        ]
Пример #50
0
    def __init__(self, n_vocab, dim_word, dim_ctx, dim):
        self.n_vocab = n_vocab
        self.dim_word = dim_word
        self.dim_ctx = dim_ctx
        self.dim = dim

        ### Word Embedding ###
        self.Wemb = initializations.uniform((n_vocab, self.dim_word))

        ### LSTM initialization NN ###
        self.Init_state_W = initializations.uniform((self.dim_ctx, self.dim))
        self.Init_state_b = shared_zeros((self.dim))

        self.Init_memory_W = initializations.uniform((self.dim_ctx, self.dim))
        self.Init_memory_b = shared_zeros((self.dim))


        ### Main LSTM ###
        self.lstm_W = initializations.uniform((self.dim_word, self.dim * 4))
        self.lstm_U = sharedX(np.concatenate([ortho_weight(dim),
                                      ortho_weight(dim),
                                      ortho_weight(dim),
                                      ortho_weight(dim)], axis=1))

        self.lstm_b = shared_zeros((self.dim*4))

        self.Wc = initializations.uniform((self.dim_ctx, self.dim*4)) # image -> LSTM hidden
        self.Wc_att = initializations.uniform((self.dim_ctx, self.dim_ctx)) # image -> 뉴럴넷 한번 돌린것
        self.Wd_att = initializations.uniform((self.dim, self.dim_ctx)) # LSTM hidden -> image에 영향
        self.b_att = shared_zeros((self.dim_ctx))

        self.U_att = initializations.uniform((self.dim_ctx, 1)) # image 512개 feature 1차원으로 줄임
        self.c_att = shared_zeros((1))

        ### Decoding NeuralNets ###
        self.decode_lstm_W = initializations.uniform((self.dim, self.dim_word))
        self.decode_lstm_b = shared_zeros((self.dim_word))

        self.decode_word_W = initializations.uniform((self.dim_word, n_vocab))
        self.decode_word_b = shared_zeros((n_vocab))

        self.params = [self.Wemb,
                       self.Init_state_W, self.Init_state_b,
                       self.Init_memory_W, self.Init_memory_b,
                       self.lstm_W, self.lstm_U, self.lstm_b,
                       self.Wc, self.Wc_att, self.Wd_att, self.b_att,
                       self.U_att, self.c_att,
                       self.decode_lstm_W, self.decode_lstm_b,
                       self.decode_word_W, self.decode_word_b]

        self.param_names = ['Wemb', 'Init_state_W', 'Init_state_b',
                            'Init_memory_W', 'Init_memory_b',
                            'lstm_W', 'lstm_U', 'lstm_b',
                            'Wc', 'Wc_att', 'Wd_att', 'b_att',
                            'U_att', 'c_att',
                            'decode_lstm_W', 'decode_lstm_b',
                            'decode_word_W', 'decode_word_b']
Пример #51
0
    def build(self):
        input_dim = self.input_shape[2]
        self.input = T.tensor3()

        self.W_x2e = self.init((self.n_experts, input_dim, self.output_dim))
        self.W_e2e = self.init((self.output_dim, self.output_dim))
        self.b_x2e = shared_zeros((self.n_experts, self.output_dim))

        self.W_x2g = self.init((input_dim, self.output_dim))
        self.b_x2g = shared_zeros((self.output_dim))

        self.U_g = self.init(
            (self.output_dim, self.n_experts, self.output_dim))

        self.params = [
            self.W_x2e, self.W_e2e, self.b_x2e, self.W_x2g, self.b_x2g,
            self.U_g
        ]

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights
Пример #52
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)
        lr = self.lr * (1.0 / (1.0 + self.decay * self.iterations))
        self.updates = [(self.iterations, self.iterations + 1.)]

        for p, g, c in zip(params, grads, constraints):
            m = shared_zeros(p.get_value().shape)  # momentum
            v = self.momentum * m - lr * g  # velocity
            self.updates.append((m, v))

            if self.nesterov:
                new_p = p + self.momentum * v - lr * g
            else:
                new_p = p + v
            c_new_p = _proxOp(c(new_p), self.lr * self.lambdav,
                              self.soft_threshold)
            self.updates.append((p, c_new_p))
        return self.updates
Пример #53
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 init='uniform',
                 activation='linear',
                 weights=None):
        self.init = initializations.get(init)
        self.activation = activations.get(activation)
        self.input_dim = input_dim
        self.output_dim = output_dim

        self.input = T.tensor3()
        self.W = self.init((self.input_dim, self.output_dim))
        self.b = shared_zeros((self.output_dim))

        self.params = [self.W, self.b]

        if weights is not None:
            self.set_weights(weights)
Пример #54
0
    def __init__(self,
                 n_words=1000,
                 n_embedding=100,
                 lr=0.01,
                 margin=0.1,
                 momentum=0.9,
                 word_to_id=None):
        self.n_embedding = n_embedding
        self.n_lstm_embed = n_embedding
        self.word_embed = n_embedding
        self.lr = lr
        self.momentum = momentum
        self.margin = margin
        self.n_words = n_words
        self.n_D = 3 * self.n_words + 3

        self.word_to_id = word_to_id
        self.id_to_word = dict((v, k) for k, v in word_to_id.iteritems())

        # Question
        x = T.vector('x')
        phi_x = T.vector('phi_x')

        # True statements
        phi_f1_1 = T.vector('phi_f1_1')
        phi_f2_1 = T.vector('phi_f2_1')

        # False statements
        phi_f1_2 = T.vector('phi_f1_2')
        phi_f2_2 = T.vector('phi_f2_2')

        # Supporting memories
        m0 = T.vector('m0')
        m1 = T.vector('m1')
        phi_m0 = T.vector('phi_m0')
        phi_m1 = T.vector('phi_m1')

        # True word
        r = T.vector('r')

        # Word sequence
        words = T.ivector('words')

        # Scoring function
        self.U_O = init_shared_normal(n_embedding, self.n_D, 0.01)

        # Word embedding
        self.L = glorot_uniform((self.n_words, self.word_embed))
        self.Lprime = glorot_uniform((self.n_words, self.n_lstm_embed))

        # LSTM
        self.W_i = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_i = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_i = shared_zeros((self.n_lstm_embed))

        self.W_f = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_f = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_f = shared_zeros((self.n_lstm_embed))

        self.W_c = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_c = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_c = shared_zeros((self.n_lstm_embed))

        self.W_o = glorot_uniform((self.word_embed, self.n_lstm_embed))
        self.U_o = orthogonal((self.n_lstm_embed, self.n_lstm_embed))
        self.b_o = shared_zeros((self.n_lstm_embed))

        mem_cost = self.calc_cost(phi_x, phi_f1_1, phi_f1_2, phi_f2_1,
                                  phi_f2_2, phi_m0)

        lstm_output = self.lstm_cost(words)
        self.predict_function_r = theano.function(inputs=[words],
                                                  outputs=lstm_output,
                                                  allow_input_downcast=True)

        lstm_cost = -T.sum(T.mul(r, T.log(lstm_output)))

        cost = mem_cost + lstm_cost

        params = [
            self.U_O, self.W_i, self.U_i, self.b_i, self.W_f, self.U_f,
            self.b_f, self.W_c, self.U_c, self.b_c, self.W_o, self.U_o,
            self.b_o, self.L, self.Lprime
        ]

        grads = T.grad(cost, params)

        # Parameter updates
        updates = self.get_updates(params, grads, method='adagrad')

        l_rate = T.scalar('l_rate')

        # Theano functions
        self.train_function = theano.function(
            inputs=[
                phi_x, phi_f1_1, phi_f1_2, phi_f2_1, phi_f2_2, phi_m0, r,
                words,
                theano.Param(l_rate, default=self.lr)
            ],
            outputs=cost,
            updates=updates,
            on_unused_input='warn',
            allow_input_downcast=True,
        )
        #mode='FAST_COMPILE')
        #mode='DebugMode')
        #mode=theano.compile.MonitorMode(pre_func=inspect_inputs,post_func=inspect_outputs))

        # Candidate statement for prediction
        phi_f = T.vector('phi_f')

        score_o = self.calc_score_o(phi_x, phi_f)
        self.predict_function_o = theano.function(inputs=[phi_x, phi_f],
                                                  outputs=score_o)
Пример #55
0
    def __init__(self,
                 input_dim,
                 output_dim=128,
                 init='glorot_uniform',
                 inner_init='orthogonal',
                 activation='tanh',
                 inner_activation='hard_sigmoid',
                 weights=None,
                 truncate_gradient=-1,
                 output_mode='sum'):

        super(BiDirectionLSTM, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.truncate_gradient = truncate_gradient
        self.output_mode = output_mode  # output_mode is either sum or concatenate

        self.init = initializations.get(init)
        self.inner_init = initializations.get(inner_init)
        self.activation = activations.get(activation)
        self.inner_activation = activations.get(inner_activation)
        self.input = T.tensor3()

        # forward weights
        self.W_i = self.init((self.input_dim, self.output_dim))
        self.U_i = self.inner_init((self.output_dim, self.output_dim))
        self.b_i = shared_zeros((self.output_dim))

        self.W_f = self.init((self.input_dim, self.output_dim))
        self.U_f = self.inner_init((self.output_dim, self.output_dim))
        self.b_f = shared_zeros((self.output_dim))

        self.W_c = self.init((self.input_dim, self.output_dim))
        self.U_c = self.inner_init((self.output_dim, self.output_dim))
        self.b_c = shared_zeros((self.output_dim))

        self.W_o = self.init((self.input_dim, self.output_dim))
        self.U_o = self.inner_init((self.output_dim, self.output_dim))
        self.b_o = shared_zeros((self.output_dim))

        # backward weights
        self.Wb_i = self.init((self.input_dim, self.output_dim))
        self.Ub_i = self.inner_init((self.output_dim, self.output_dim))
        self.bb_i = shared_zeros((self.output_dim))

        self.Wb_f = self.init((self.input_dim, self.output_dim))
        self.Ub_f = self.inner_init((self.output_dim, self.output_dim))
        self.bb_f = shared_zeros((self.output_dim))

        self.Wb_c = self.init((self.input_dim, self.output_dim))
        self.Ub_c = self.inner_init((self.output_dim, self.output_dim))
        self.bb_c = shared_zeros((self.output_dim))

        self.Wb_o = self.init((self.input_dim, self.output_dim))
        self.Ub_o = self.inner_init((self.output_dim, self.output_dim))
        self.bb_o = shared_zeros((self.output_dim))

        self.params = [
            self.W_i,
            self.U_i,
            self.b_i,
            self.W_c,
            self.U_c,
            self.b_c,
            self.W_f,
            self.U_f,
            self.b_f,
            self.W_o,
            self.U_o,
            self.b_o,
            self.Wb_i,
            self.Ub_i,
            self.bb_i,
            self.Wb_c,
            self.Ub_c,
            self.bb_c,
            self.Wb_f,
            self.Ub_f,
            self.bb_f,
            self.Wb_o,
            self.Ub_o,
            self.bb_o,
        ]

        if weights is not None:
            self.set_weights(weights)
    def __init__(self,
                 input_dim,
                 output_dim=128,
                 train_init_cell=True,
                 train_init_h=True,
                 init='glorot_uniform',
                 inner_init='orthogonal',
                 forget_bias_init='one',
                 input_activation='tanh',
                 gate_activation='hard_sigmoid',
                 output_activation='tanh',
                 weights=None,
                 truncate_gradient=-1,
                 return_sequences=False):

        super(LSTMLayerV0, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.truncate_gradient = truncate_gradient
        self.return_sequences = return_sequences

        self.init = initializations.get(init)
        self.inner_init = initializations.get(inner_init)
        self.forget_bias_init = initializations.get(forget_bias_init)
        self.input_activation = activations.get(input_activation)
        self.gate_activation = activations.get(gate_activation)
        self.output_activation = activations.get(output_activation)
        self.input = T.tensor3()

        W_z = self.init(
            (self.input_dim, self.output_dim)).get_value(borrow=True)
        R_z = self.inner_init(
            (self.output_dim, self.output_dim)).get_value(borrow=True)
        # self.b_z = shared_zeros(self.output_dim)

        W_i = self.init(
            (self.input_dim, self.output_dim)).get_value(borrow=True)
        R_i = self.inner_init(
            (self.output_dim, self.output_dim)).get_value(borrow=True)
        # self.b_i = shared_zeros(self.output_dim)

        W_f = self.init(
            (self.input_dim, self.output_dim)).get_value(borrow=True)
        R_f = self.inner_init(
            (self.output_dim, self.output_dim)).get_value(borrow=True)
        # self.b_f = self.forget_bias_init(self.output_dim)

        W_o = self.init(
            (self.input_dim, self.output_dim)).get_value(borrow=True)
        R_o = self.inner_init(
            (self.output_dim, self.output_dim)).get_value(borrow=True)
        # self.b_o = shared_zeros(self.output_dim)

        self.h_m1 = shared_zeros(shape=(1, self.output_dim), name='h0')
        self.c_m1 = shared_zeros(shape=(1, self.output_dim), name='c0')

        W = np.vstack(
            (W_z[np.newaxis, :, :], W_i[np.newaxis, :, :],
             W_f[np.newaxis, :, :],
             W_o[np.newaxis, :, :]))  # shape = (4, input_dim, output_dim)
        R = np.vstack(
            (R_z[np.newaxis, :, :], R_i[np.newaxis, :, :],
             R_f[np.newaxis, :, :],
             R_o[np.newaxis, :, :]))  # shape = (4, output_dim, output_dim)
        self.W = theano.shared(W,
                               name='Input to hidden weights (zifo)',
                               borrow=True)
        self.R = theano.shared(R, name='Recurrent weights (zifo)', borrow=True)
        self.b = theano.shared(np.zeros(shape=(4, self.output_dim),
                                        dtype=theano.config.floatX),
                               name='bias',
                               borrow=True)

        self.params = [self.W, self.R]
        if train_init_cell:
            self.params.append(self.c_m1)
        if train_init_h:
            self.params.append(self.h_m1)

        if weights is not None:
            self.set_weights(weights)
Пример #57
0
    def build(self):
        input_leng, input_dim = self.input_shape[1:]
        self.input = T.tensor3()

        if self.inner_rnn == 'gru':
            self.rnn = GRU(input_dim=input_dim + self.m_length,
                           input_length=input_leng,
                           output_dim=self.output_dim,
                           init=self.init,
                           inner_init=self.inner_init)
        elif self.inner_rnn == 'lstm':
            self.rnn = LSTM(input_dim=input_dim + self.m_length,
                            input_length=input_leng,
                            output_dim=self.output_dim,
                            init=self.init,
                            inner_init=self.inner_init)
        else:
            raise ValueError('this inner_rnn is not implemented yet.')

        self.rnn.build()

        # initial memory, state, read and write vecotrs
        self.M = theano.shared((.001 * np.ones((1, )).astype(floatX)))
        self.init_h = shared_zeros((self.output_dim))
        self.init_wr = self.rnn.init((self.n_slots, ))
        self.init_ww = self.rnn.init((self.n_slots, ))

        # write
        self.W_e = self.rnn.init((self.output_dim, self.m_length))  # erase
        self.b_e = shared_zeros((self.m_length))
        self.W_a = self.rnn.init((self.output_dim, self.m_length))  # add
        self.b_a = shared_zeros((self.m_length))

        # get_w  parameters for reading operation
        self.W_k_read = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_read = self.rnn.init((self.m_length, ))
        self.W_c_read = self.rnn.init(
            (self.output_dim,
             3))  # 3 = beta, g, gamma see eq. 5, 7, 9 in Graves et. al 2014
        self.b_c_read = shared_zeros((3))
        self.W_s_read = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_read = shared_zeros((self.shift_range))

        # get_w  parameters for writing operation
        self.W_k_write = self.rnn.init((self.output_dim, self.m_length))
        self.b_k_write = self.rnn.init((self.m_length, ))
        self.W_c_write = self.rnn.init(
            (self.output_dim, 3))  # 3 = beta, g, gamma see eq. 5, 7, 9
        self.b_c_write = shared_zeros((3))
        self.W_s_write = self.rnn.init((self.output_dim, self.shift_range))
        self.b_s_write = shared_zeros((self.shift_range))

        self.C = _circulant(self.n_slots, self.shift_range)

        self.params = self.rnn.params + [
            self.W_e, self.b_e, self.W_a, self.b_a, self.W_k_read,
            self.b_k_read, self.W_c_read, self.b_c_read, self.W_s_read,
            self.b_s_read, self.W_k_write, self.b_k_write, self.W_s_write,
            self.b_s_write, self.W_c_write, self.b_c_write, self.M,
            self.init_h, self.init_wr, self.init_ww
        ]

        if self.inner_rnn == 'lstm':
            self.init_c = shared_zeros((self.output_dim))
            self.params = self.params + [
                self.init_c,
            ]
Пример #58
0
def get_param_updates(params, grads, lr, method=None, **kwargs):
    rho = 0.95
    epsilon = 1e-6

    accumulators = [shared_zeros(p.get_value().shape) for p in params]
    updates = []

    if 'constraint' in kwargs:
        constraint = kwargs['constraint']
    else:
        constraint = None

    if method == 'adadelta':
        print "Using ADADELTA"
        delta_accumulators = [
            shared_zeros(p.get_value().shape) for p in params
        ]
        for p, g, a, d_a in zip(params, grads, accumulators,
                                delta_accumulators):
            new_a = rho * a + (1 - rho) * g**2  # update accumulator

            # use the new accumulator and the *old* delta_accumulator
            update = g * T.sqrt(d_a + epsilon) / T.sqrt(new_a + epsilon)
            new_p = p - lr * update

            # update delta_accumulator
            new_d_a = rho * d_a + (1 - rho) * update**2

            updates.append((p, new_p))
            updates.append((a, new_a))
            updates.append((d_a, new_d_a))

    elif method == 'adagrad':
        print "Using ADAGRAD"
        for p, g, a in zip(params, grads, accumulators):
            new_a = a + g**2  # update accumulator

            new_p = p - lr * g / T.sqrt(new_a + epsilon)
            updates.append((p, new_p))  # apply constraints
            updates.append((a, new_a))

    elif method == 'momentum':  # Default
        print "Using MOMENTUM"
        momentum = kwargs['momentum']
        for param, gparam in zip(params, grads):
            param_update = theano.shared(param.get_value() * 0.,
                                         broadcastable=param.broadcastable)
            gparam_constrained = maxnorm_constraint(gparam)
            param_update_update = momentum * param_update + (
                1. - momentum) * gparam_constrained
            updates.append((param, param - param_update * lr))
            updates.append((param_update, param_update_update))

    else:  # Default
        print "Using DEFAULT"
        for param, gparam in zip(params, grads):
            param_update = maxnorm_constraint(gparam)
            updates.append((param, param - param_update * lr))

    # apply constraints on self.weights update
    # assumes that updates[0] corresponds to self.weights param
    if constraint != None:
        updates[0] = (updates[0][0], constraint(updates[0][1]))

    return updates