示例#1
0
def build_network(input_size,hidden_size):
	X = T.imatrix('X')
	W_input_to_hidden  = U.create_shared(U.initial_weights(input_size,hidden_size))
	W_hidden_to_output = U.create_shared(U.initial_weights(hidden_size,input_size))
	b_output = U.create_shared(U.initial_weights(input_size))

	hidden = T.nnet.sigmoid(T.dot(X,W_input_to_hidden))
	output = T.nnet.softmax(T.dot(hidden,W_input_to_hidden.T) + b_output)
	
	parameters = [W_input_to_hidden,b_output]

	return X,output,parameters
示例#2
0
文件: lstm.py 项目: ticcky/xtrack
    def __init__(self, learning_rate):
        self.params = Parameters()
        self.learning_rate = learning_rate

        self.s0 = tt.matrix('s0')
        self.s0.tag.test_value = self.test_s0
        self.params.w_x = initial_weights(self.n_cells, self.input_size)
        self.params.w_f = initial_weights(self.n_cells, self.input_size)
        self.params.w_i = initial_weights(self.n_cells, self.input_size)

        self.params.w_clf = initial_weights(self.clf_size, self.n_cells)
        self.params.b_clf = initial_weights(self.clf_size)
示例#3
0
def build_network(input_size, hidden_size):
    X = T.imatrix('X')
    W_input_to_hidden = U.create_shared(
        U.initial_weights(input_size, hidden_size))
    W_hidden_to_output = U.create_shared(
        U.initial_weights(hidden_size, input_size))
    b_output = U.create_shared(U.initial_weights(input_size))

    hidden = T.nnet.sigmoid(T.dot(X, W_input_to_hidden))
    output = T.nnet.softmax(T.dot(hidden, W_input_to_hidden.T) + b_output)

    parameters = [W_input_to_hidden, b_output]

    return X, output, parameters
示例#4
0
	def __init__(self,layers_in,layer_out):
		self.ins     = layers_in
		self.out     = layer_out
		self.Ws      = [ U.create_shared(U.initial_weights(inp.size,self.out.size))
							for inp in self.ins.layers ]
		self.bias    = U.create_shared(np.zeros(self.out.size))
		self.updates = self.Ws + [self.bias]
示例#5
0
def construct_network(context, characters, hidden, mult_hidden):
    print "Setting up memory..."
    X = T.bvector('X')
    Y = T.bvector('Y')
    alpha = T.cast(T.fscalar('alpha'), dtype=theano.config.floatX)
    lr = T.cast(T.fscalar('lr'), dtype=theano.config.floatX)

    print "Initialising weights..."
    W_char_hidden = U.create_shared(U.initial_weights(characters, hidden))
    f_char_hidden = U.create_shared(U.initial_weights(characters, mult_hidden))
    b_hidden = U.create_shared(U.initial_weights(hidden))
    Wf_hidden = U.create_shared(U.initial_weights(hidden, mult_hidden))
    fW_hidden = U.create_shared(U.initial_weights(mult_hidden, hidden))
    W_hidden_predict = U.create_shared(U.initial_weights(hidden, characters))
    b_predict = U.create_shared(U.initial_weights(characters))

    print "Constructing graph..."
    hidden = make_hidden(hidden, W_char_hidden[X], f_char_hidden[X], Wf_hidden,
                         fW_hidden, b_hidden)
    predictions = T.nnet.softmax(T.dot(hidden, W_hidden_predict) + b_predict)
    weights = [
        W_char_hidden, f_char_hidden, b_hidden, Wf_hidden, fW_hidden,
        W_hidden_predict, b_predict
    ]
    cost = -T.mean(T.log(predictions)[T.arange(Y.shape[0]), Y])
    gparams = T.grad(cost, weights)

    deltas = [U.create_shared(np.zeros(w.get_value().shape)) for w in weights]
    updates = [(param, param - (alpha * delta + gparam * lr))
               for param, delta, gparam in zip(weights, deltas, gparams)
               ] + [(delta, alpha * delta + gparam * lr)
                    for delta, gparam in zip(deltas, gparams)]
    return X, Y, alpha, lr, updates, predictions, weights
示例#6
0
 def __init__(self, layers_in, layer_out):
     self.ins = layers_in
     self.out = layer_out
     self.Ws = [
         U.create_shared(U.initial_weights(inp.size, self.out.size))
         for inp in self.ins.layers
     ]
     self.bias = U.create_shared(np.zeros(self.out.size))
     self.updates = self.Ws + [self.bias]
示例#7
0
def build_network(input_size, hidden_size):
    X = T.dmatrix('X')
    W_input_to_hidden = U.create_shared(
        U.initial_weights(input_size, hidden_size))
    W_hidden_to_hidden = U.create_shared(
        U.initial_weights(hidden_size, hidden_size))
    b_hidden = U.create_shared(U.initial_weights(hidden_size))
    #	initial_hidden = U.create_shared(U.initial_weights(hidden_size))
    initial_hidden = U.create_shared(U.initial_weights(hidden_size))

    #	W_hidden_to_hidden_reproduction = W_hidden_to_hidden.T#U.create_shared(U.initial_weights(hidden_size,hidden_size))
    b_hidden_reproduction = U.create_shared(U.initial_weights(hidden_size))
    W_hidden_to_input_reproduction = W_input_to_hidden.T  #U.create_shared(U.initial_weights(hidden_size,input_size))
    b_input_reproduction = U.create_shared(U.initial_weights(input_size))
    parameters = [
        W_input_to_hidden,
        W_hidden_to_hidden,
        b_hidden,
        initial_hidden,
        b_hidden_reproduction,
        b_input_reproduction,
    ]

    hidden, hidden1_reproduction, input_reproduction = make_rae(
        X, W_input_to_hidden, W_hidden_to_hidden, b_hidden, initial_hidden,
        b_hidden_reproduction, b_input_reproduction)

    unrolled = unroll(hidden[-1], W_input_to_hidden, W_hidden_to_hidden,
                      b_hidden_reproduction, b_input_reproduction,
                      hidden.shape[0])

    return X, parameters, hidden, hidden1_reproduction, input_reproduction, unrolled
示例#8
0
def build_network(input_size, hidden_size):
    srng = RandomStreams(seed=12345)

    X = T.fmatrix('X')
    W_input_to_hidden1 = U.create_shared(
        U.initial_weights(input_size, hidden_size))
    b_hidden1 = U.create_shared(U.initial_weights(hidden_size))
    W_hidden1_to_output = U.create_shared(U.initial_weights(hidden_size))
    b_output = U.create_shared(U.initial_weights(1)[0])

    def network(training):
        hidden1 = T.dot(X, W_input_to_hidden1) + b_hidden1
        hidden1 = hidden1 * (hidden1 > 0)
        if training:
            hidden1 = hidden1 * srng.binomial(size=(hidden_size, ), p=0.5)
        else:
            hidden1 = 0.5 * hidden1
        output = T.nnet.sigmoid(T.dot(hidden1, W_hidden1_to_output) + b_output)
        return output

    parameters = [W_input_to_hidden1, b_hidden1, W_hidden1_to_output, b_output]

    return X, network(True), network(False), parameters
示例#9
0
def construct_network(context,characters,hidden,mult_hidden):
	print "Setting up memory..."
	X = T.bvector('X')
	Y = T.bvector('Y')
	alpha = T.cast(T.fscalar('alpha'),dtype=theano.config.floatX)
	lr    = T.cast(T.fscalar('lr'),   dtype=theano.config.floatX)
	
	print "Initialising weights..."
	W_char_hidden    = U.create_shared(U.initial_weights(characters,hidden))
	f_char_hidden    = U.create_shared(U.initial_weights(characters,mult_hidden))
	b_hidden         = U.create_shared(U.initial_weights(hidden))
	Wf_hidden        = U.create_shared(U.initial_weights(hidden,mult_hidden))
	fW_hidden        = U.create_shared(U.initial_weights(mult_hidden,hidden))
	W_hidden_predict = U.create_shared(U.initial_weights(hidden,characters))
	b_predict        = U.create_shared(U.initial_weights(characters))

	print "Constructing graph..."
	hidden = make_hidden(
			hidden,
			W_char_hidden[X],
			f_char_hidden[X],
			Wf_hidden,
			fW_hidden,
			b_hidden
		)
	predictions = T.nnet.softmax(T.dot(hidden,W_hidden_predict) + b_predict)
	weights = [
			W_char_hidden,
			f_char_hidden,
			b_hidden,
			Wf_hidden,
			fW_hidden,
			W_hidden_predict,
			b_predict
		]
	cost    = -T.mean(T.log(predictions)[T.arange(Y.shape[0]),Y])
	gparams =  T.grad(cost,weights)

	deltas  = [ U.create_shared(np.zeros(w.get_value().shape)) for w in weights ]
	updates = [
				( param, param - ( alpha * delta + gparam * lr ) )
					for param,delta,gparam in zip(weights,deltas,gparams)
			] + [
				( delta, alpha * delta + gparam * lr)
					for delta,gparam in zip(deltas,gparams)
			]
	return X,Y,alpha,lr,updates,predictions,weights
示例#10
0
	def __init__(self,inputs,outputs,
				 lr = 0.1,       batch_size = 10,  max_epochs = 100000,
				 momentum = 0.5, validation = 0.1, lambda_2 = 0.001,
				 lr_min = 0.1):
		self.momentum   = momentum
		self.lr         = lr
		self.lr_min     = lr_min
		self.batch_size = batch_size
		self.validation = validation
		self.max_epochs = max_epochs 
		self.lambda_2   = lambda_2


		self.W       = U.create_shared(U.initial_weights(inputs,outputs))
		self.W_delta = U.create_shared(np.zeros((inputs,outputs)))

		self.bias       = U.create_shared(np.zeros(outputs))
		self.bias_delta = U.create_shared(np.zeros(outputs))

		self.tunables = [self.W,       self.bias]
		self.deltas   = [self.W_delta, self.bias_delta]
示例#11
0
def build_network(input_size,hidden_size):
	X = T.dmatrix('X')
	W_input_to_hidden  = U.create_shared(U.initial_weights(input_size,hidden_size))
	W_hidden_to_hidden = U.create_shared(U.initial_weights(hidden_size,hidden_size))
	b_hidden = U.create_shared(U.initial_weights(hidden_size))
#	initial_hidden = U.create_shared(U.initial_weights(hidden_size))
	initial_hidden = U.create_shared(U.initial_weights(hidden_size))

#	W_hidden_to_hidden_reproduction = W_hidden_to_hidden.T#U.create_shared(U.initial_weights(hidden_size,hidden_size))
	b_hidden_reproduction           = U.create_shared(U.initial_weights(hidden_size))
	W_hidden_to_input_reproduction  = W_input_to_hidden.T#U.create_shared(U.initial_weights(hidden_size,input_size))
	b_input_reproduction            = U.create_shared(U.initial_weights(input_size))
	parameters = [
			W_input_to_hidden,
			W_hidden_to_hidden,
			b_hidden,
			initial_hidden,
			b_hidden_reproduction,
			b_input_reproduction,
		]

	hidden, hidden1_reproduction, input_reproduction = make_rae(
			X,
			W_input_to_hidden,
			W_hidden_to_hidden,
			b_hidden,
			initial_hidden,
			b_hidden_reproduction,
			b_input_reproduction
		)

	unrolled = unroll(
			hidden[-1],
			W_input_to_hidden,
			W_hidden_to_hidden,
			b_hidden_reproduction,
			b_input_reproduction,
			hidden.shape[0]
		)

	return X,parameters,hidden,hidden1_reproduction,input_reproduction,unrolled
示例#12
0
	def __init__(self,size):
		super(Recurrent,self).__init__(size)
		self.W = U.create_shared(U.initial_weights(size,size))
		self.h0 = U.create_shared(np.zeros((size,)))
		self.updates = [self.W]
示例#13
0
def sparse_dot(l,prev,values,W):
	row_data = values[T.arange(prev,prev+l)]
	row_weights = W[row_data[:,0]]
	sum_weights = T.sum(row_weights*row_data[:,1].reshape((l,1)),axis=0)
	return sum_weights,prev+l



if __name__ == "__main__":
	M = [[(1,2),(5,3),(10,1)],
		 [(0,2),(3,1)],
		 [(2,2),(8,4)]]
	index  = T.ivector('index')
	values = T.imatrix('values')
	prev   = T.iscalar('prev')
	initial_weights = U.initial_weights(11,3)
	W = U.create_shared(initial_weights)

	[output,_],updates = theano.scan(
			sparse_dot,
			sequences     = index,
			outputs_info  = [None,prev],
			non_sequences = [values,W]
		)

	f = theano.function(
			inputs = [index,values,prev],
			outputs = output
		)

	ind,val = to_sparse_array(M)
示例#14
0
import theano
import theano.tensor as T
import numpy         as np
import utils         as U

switch = T.scalar('switch')
A = U.create_shared(np.eye(8))
weights = U.create_shared(U.initial_weights(8,3))
hidden  = T.nnet.sigmoid(T.dot(A,weights))
recon   = T.nnet.softmax(switch*T.dot(hidden,weights.T))

cost     = T.sum((A-recon)**2)
gradient = T.grad(cost,wrt=weights)

updates  = [ (weights, weights - gradient) ]

print "Compiling..."
f = theano.function(
		inputs  = [switch],
		updates = updates,
		outputs = cost
	)
print "Done."
for _ in xrange(1000000): print f(0)

示例#15
0
			outputs = T.mean(T.neq(T.argmax(predictions, axis=1), Y)),
			updates = updates,
			givens  = {
				X: data,
				Y: labels,
			}
		)
	return train_model



if __name__ == '__main__':
	print "Setting up memory..."
	X = T.bmatrix('X')
	Y = T.bvector('Y')
	Ws_char_to_hidden   = [ U.create_shared(U.initial_weights(CHARACTERS,HIDDEN),name='yeah%d'%i) for i in xrange(CONTEXT) ]
	b_hidden            = U.create_shared(U.initial_weights(HIDDEN))
	W_hidden_to_hidden  = U.create_shared(U.initial_weights(HIDDEN,HIDDEN))
	W_hidden_to_predict = U.create_shared(U.initial_weights(HIDDEN,CHARACTERS))
	b_predict           = U.create_shared(U.initial_weights(CHARACTERS))
	tunables = Ws_char_to_hidden + [
			b_hidden, 
			W_hidden_to_hidden,
			W_hidden_to_predict,
			b_predict
		]

	print "Constructing graph..."
	hidden_inputs  = make_hidden_inputs(X,Ws_char_to_hidden,b_hidden)
	hidden_outputs = make_hidden_outputs(hidden_inputs,W_hidden_to_hidden)
	predictions    = make_predictions(hidden_outputs,W_hidden_to_predict,b_predict)
示例#16
0
import theano
import math
import utils
import theano.tensor as T
import numpy as np
import utils as U

initial_weights = U.initial_weights(8, 3)
W = U.create_shared(initial_weights)
data = T.imatrix('data')
label = T.matrix('label')


def construct(bits_set, W):
    return W[bits_set].sum(axis=0)


output, updates = theano.scan(construct, sequences=data, non_sequences=W)

cost = T.mean(0.5 * T.sum((output - label)**2, axis=1))

grad = T.grad(cost, wrt=W)

x = np.arange(8, dtype=np.int32).reshape(8, 1)
y = np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [1, 0, 0], [1, 0, 1],
              [1, 1, 0], [1, 1, 1]],
             dtype=np.float32)

f = theano.function(inputs=[data, label],
                    outputs=output,
                    updates=[(W, W - 0.5 * grad)])
import utils         as U
from theano import sparse
from scipy.sparse import csr_matrix
def shared_sparse(arr):
	data    = arr.data
	indices = arr.indices
	indptr  = arr.indptr
	shape   = np.array(arr.shape)
	return sparse.CSR(data,indices,indptr,shape)



if __name__ == "__main__":
	training_data   = shared_sparse(csr_matrix(np.eye(100)))

	#training_labels = pickle.load(open('tags.train.data','r'))
	
	W = U.create_shared(U.initial_weights(71165,26920))
	out  = theano.dot(training_data,W)
	f = theano.function(
			inputs = [],
			outputs = out
		)
	print f()






示例#18
0
def construct_network(context,characters,hidden):
	print "Setting up memory..."
	X = T.bmatrix('X')
	Y = T.bvector('Y')
	zeros = np.zeros(characters,dtype=np.int8)
	zeros[0] = 1
	zeros[1] = 1

	alpha = T.cast(T.fscalar('alpha'),dtype=theano.config.floatX)
	lr    = T.cast(T.fscalar('lr'),dtype=theano.config.floatX)
	Ws_char_to_hidden   = [
			U.create_shared(
				U.initial_weights(characters,hidden),
				name='char[%d]'%i
			) for i in xrange(context) 
		]
	mat = Ws_char_to_hidden[0].get_value()
	mat[0] = 0
	Ws_char_to_hidden[0].set_value(mat)
	W_hidden_to_hidden_i = U.create_shared(U.initial_weights(hidden,hidden) + np.eye(hidden))
	b_hidden_i           = U.create_shared(U.initial_weights(hidden))
	W_hidden_to_hidden_o = U.create_shared(U.initial_weights(hidden,hidden) + np.eye(hidden))
	b_hidden_o           = U.create_shared(U.initial_weights(hidden))
	W_hidden_to_predict  = U.create_shared(U.initial_weights(hidden,characters))
	b_predict            = U.create_shared(U.initial_weights(characters))
	W_predict_to_hidden  = U.create_shared(U.initial_weights(characters,hidden))
	gen_weight_mask      = U.create_shared(zeros,name='mask')
	print "Constructing graph..."
	hidden_inputs  = make_char_outputs(X,Ws_char_to_hidden)
	hidden_outputs,predictions = make_hidden_predict_outputs(
			hidden,characters,
			hidden_inputs,
			gen_weight_mask[X[:,0]],
			W_hidden_to_hidden_i,
			b_hidden_i,
			W_hidden_to_hidden_o,
			b_hidden_o,
			W_hidden_to_predict,
			b_predict,
			W_predict_to_hidden			
		)


	weights = Ws_char_to_hidden + [
					W_hidden_to_hidden_i,
					b_hidden_i, 
					W_hidden_to_hidden_o,
					b_hidden_o, 
					W_hidden_to_predict,
					b_predict,
					W_predict_to_hidden
				]
	cost    = -T.mean(T.log(predictions)[T.arange(Y.shape[0]),Y])
	gparams =  T.grad(cost,weights)

	deltas  = [ U.create_shared(np.zeros(w.get_value().shape)) for w in weights ]
	updates = [
				( param, param - ( alpha * delta + gparam * lr ) )
					for param,delta,gparam in zip(weights,deltas,gparams)
			] + [
				( delta, alpha * delta + gparam * lr)
					for delta,gparam in zip(deltas,gparams)
			]
	return X,Y,alpha,lr,updates,predictions,weights
示例#19
0
import theano
import theano.tensor as T
import numpy         as np
import utils         as U
from numpy_hinton import print_arr
from theano.printing import Print

W1 = U.create_shared(U.initial_weights(10,10))
W2 = U.create_shared(U.initial_weights(10,10))
b  = U.create_shared(U.initial_weights(10))
X = T.dmatrix('X')
def pair_combine(X):
	def step(i,inputs):
		length = inputs.shape[0]
		next_level = T.dot(inputs[T.arange(0,length-i-1)],W1) + T.dot(inputs[T.arange(1,length-i)],W2) + b
		next_level = next_level*(next_level > 0)
		#next_level = inputs[T.arange(0,length-i-1)] + inputs[T.arange(1,length-i)]
		#next_level = theano.printing.Print('inputs')(next_level)
		return T.concatenate([next_level,T.zeros_like(inputs[:length-next_level.shape[0]])])
	combined,_ = theano.scan(
			step,
			sequences    = [T.arange(X.shape[0])],
			outputs_info = [X],
			n_steps = X.shape[0]-1
		)
	return combined[-1,0], combined[0][:-1]
combined, pairwise = pair_combine(X)
f = theano.function(
		inputs = [X],
		outputs = [combined,pairwise]
	)
示例#20
0
import theano
import math
import utils
import theano.tensor as T
import numpy         as np
import utils         as U

initial_weights = U.initial_weights(8,3)
W = U.create_shared(initial_weights)
data = T.imatrix('data')
label = T.matrix('label')
def construct(bits_set,W):
	return W[bits_set].sum(axis=0)

output,updates = theano.scan(
		construct,
		sequences = data,
		non_sequences = W
	)

cost = T.mean(0.5*T.sum((output - label)**2,axis=1))


grad = T.grad(cost,wrt=W)

x = np.arange(8,dtype=np.int32).reshape(8,1)
y = np.array(
		[[0,0,0],
		 [0,0,1],
		 [0,1,0],
		 [0,1,1],
import theano
import math
import pickle
import theano.tensor as T
import numpy as np
import utils as U
from theano import sparse
from scipy.sparse import csr_matrix


def shared_sparse(arr):
    data = arr.data
    indices = arr.indices
    indptr = arr.indptr
    shape = np.array(arr.shape)
    return sparse.CSR(data, indices, indptr, shape)


if __name__ == "__main__":
    training_data = shared_sparse(csr_matrix(np.eye(100)))

    #training_labels = pickle.load(open('tags.train.data','r'))

    W = U.create_shared(U.initial_weights(71165, 26920))
    out = theano.dot(training_data, W)
    f = theano.function(inputs=[], outputs=out)
    print f()
示例#22
0
        row += 1
        prev += i
    return dense


def sparse_dot(l, prev, values, W):
    row_data = values[T.arange(prev, prev + l)]
    row_weights = W[row_data[:, 0]]
    sum_weights = T.sum(row_weights * row_data[:, 1].reshape((l, 1)), axis=0)
    return sum_weights, prev + l


if __name__ == "__main__":
    M = [[(1, 2), (5, 3), (10, 1)], [(0, 2), (3, 1)], [(2, 2), (8, 4)]]
    index = T.ivector('index')
    values = T.imatrix('values')
    prev = T.iscalar('prev')
    initial_weights = U.initial_weights(11, 3)
    W = U.create_shared(initial_weights)

    [output, _], updates = theano.scan(sparse_dot,
                                       sequences=index,
                                       outputs_info=[None, prev],
                                       non_sequences=[values, W])

    f = theano.function(inputs=[index, values, prev], outputs=output)

    ind, val = to_sparse_array(M)
    print ind, val
    print f(ind, val, 0)
示例#23
0
import theano
import theano.tensor as T
import numpy as np
import utils as U
from numpy_hinton import print_arr
from theano.printing import Print

W1 = U.create_shared(U.initial_weights(10, 10))
W2 = U.create_shared(U.initial_weights(10, 10))
b = U.create_shared(U.initial_weights(10))
X = T.dmatrix('X')


def pair_combine(X):
    def step(i, inputs):
        length = inputs.shape[0]
        next_level = T.dot(inputs[T.arange(0, length - i - 1)], W1) + T.dot(
            inputs[T.arange(1, length - i)], W2) + b
        next_level = next_level * (next_level > 0)
        #next_level = inputs[T.arange(0,length-i-1)] + inputs[T.arange(1,length-i)]
        #next_level = theano.printing.Print('inputs')(next_level)
        return T.concatenate(
            [next_level,
             T.zeros_like(inputs[:length - next_level.shape[0]])])

    combined, _ = theano.scan(step,
                              sequences=[T.arange(X.shape[0])],
                              outputs_info=[X],
                              n_steps=X.shape[0] - 1)
    return combined[-1, 0], combined[0][:-1]
示例#24
0
import theano
import theano.tensor as T
import numpy as np
import utils as U

switch = T.scalar('switch')
A = U.create_shared(np.eye(8))
weights = U.create_shared(U.initial_weights(8, 3))
hidden = T.nnet.sigmoid(T.dot(A, weights))
recon = T.nnet.softmax(switch * T.dot(hidden, weights.T))

cost = T.sum((A - recon)**2)
gradient = T.grad(cost, wrt=weights)

updates = [(weights, weights - gradient)]

print "Compiling..."
f = theano.function(inputs=[switch], updates=updates, outputs=cost)
print "Done."
for _ in xrange(1000000):
    print f(0)