def build_network(input_size,hidden_size): X = T.imatrix('X') W_input_to_hidden = U.create_shared(U.initial_weights(input_size,hidden_size)) W_hidden_to_output = U.create_shared(U.initial_weights(hidden_size,input_size)) b_output = U.create_shared(U.initial_weights(input_size)) hidden = T.nnet.sigmoid(T.dot(X,W_input_to_hidden)) output = T.nnet.softmax(T.dot(hidden,W_input_to_hidden.T) + b_output) parameters = [W_input_to_hidden,b_output] return X,output,parameters
def __init__(self, learning_rate): self.params = Parameters() self.learning_rate = learning_rate self.s0 = tt.matrix('s0') self.s0.tag.test_value = self.test_s0 self.params.w_x = initial_weights(self.n_cells, self.input_size) self.params.w_f = initial_weights(self.n_cells, self.input_size) self.params.w_i = initial_weights(self.n_cells, self.input_size) self.params.w_clf = initial_weights(self.clf_size, self.n_cells) self.params.b_clf = initial_weights(self.clf_size)
def build_network(input_size, hidden_size): X = T.imatrix('X') W_input_to_hidden = U.create_shared( U.initial_weights(input_size, hidden_size)) W_hidden_to_output = U.create_shared( U.initial_weights(hidden_size, input_size)) b_output = U.create_shared(U.initial_weights(input_size)) hidden = T.nnet.sigmoid(T.dot(X, W_input_to_hidden)) output = T.nnet.softmax(T.dot(hidden, W_input_to_hidden.T) + b_output) parameters = [W_input_to_hidden, b_output] return X, output, parameters
def __init__(self,layers_in,layer_out): self.ins = layers_in self.out = layer_out self.Ws = [ U.create_shared(U.initial_weights(inp.size,self.out.size)) for inp in self.ins.layers ] self.bias = U.create_shared(np.zeros(self.out.size)) self.updates = self.Ws + [self.bias]
def construct_network(context, characters, hidden, mult_hidden): print "Setting up memory..." X = T.bvector('X') Y = T.bvector('Y') alpha = T.cast(T.fscalar('alpha'), dtype=theano.config.floatX) lr = T.cast(T.fscalar('lr'), dtype=theano.config.floatX) print "Initialising weights..." W_char_hidden = U.create_shared(U.initial_weights(characters, hidden)) f_char_hidden = U.create_shared(U.initial_weights(characters, mult_hidden)) b_hidden = U.create_shared(U.initial_weights(hidden)) Wf_hidden = U.create_shared(U.initial_weights(hidden, mult_hidden)) fW_hidden = U.create_shared(U.initial_weights(mult_hidden, hidden)) W_hidden_predict = U.create_shared(U.initial_weights(hidden, characters)) b_predict = U.create_shared(U.initial_weights(characters)) print "Constructing graph..." hidden = make_hidden(hidden, W_char_hidden[X], f_char_hidden[X], Wf_hidden, fW_hidden, b_hidden) predictions = T.nnet.softmax(T.dot(hidden, W_hidden_predict) + b_predict) weights = [ W_char_hidden, f_char_hidden, b_hidden, Wf_hidden, fW_hidden, W_hidden_predict, b_predict ] cost = -T.mean(T.log(predictions)[T.arange(Y.shape[0]), Y]) gparams = T.grad(cost, weights) deltas = [U.create_shared(np.zeros(w.get_value().shape)) for w in weights] updates = [(param, param - (alpha * delta + gparam * lr)) for param, delta, gparam in zip(weights, deltas, gparams) ] + [(delta, alpha * delta + gparam * lr) for delta, gparam in zip(deltas, gparams)] return X, Y, alpha, lr, updates, predictions, weights
def __init__(self, layers_in, layer_out): self.ins = layers_in self.out = layer_out self.Ws = [ U.create_shared(U.initial_weights(inp.size, self.out.size)) for inp in self.ins.layers ] self.bias = U.create_shared(np.zeros(self.out.size)) self.updates = self.Ws + [self.bias]
def build_network(input_size, hidden_size): X = T.dmatrix('X') W_input_to_hidden = U.create_shared( U.initial_weights(input_size, hidden_size)) W_hidden_to_hidden = U.create_shared( U.initial_weights(hidden_size, hidden_size)) b_hidden = U.create_shared(U.initial_weights(hidden_size)) # initial_hidden = U.create_shared(U.initial_weights(hidden_size)) initial_hidden = U.create_shared(U.initial_weights(hidden_size)) # W_hidden_to_hidden_reproduction = W_hidden_to_hidden.T#U.create_shared(U.initial_weights(hidden_size,hidden_size)) b_hidden_reproduction = U.create_shared(U.initial_weights(hidden_size)) W_hidden_to_input_reproduction = W_input_to_hidden.T #U.create_shared(U.initial_weights(hidden_size,input_size)) b_input_reproduction = U.create_shared(U.initial_weights(input_size)) parameters = [ W_input_to_hidden, W_hidden_to_hidden, b_hidden, initial_hidden, b_hidden_reproduction, b_input_reproduction, ] hidden, hidden1_reproduction, input_reproduction = make_rae( X, W_input_to_hidden, W_hidden_to_hidden, b_hidden, initial_hidden, b_hidden_reproduction, b_input_reproduction) unrolled = unroll(hidden[-1], W_input_to_hidden, W_hidden_to_hidden, b_hidden_reproduction, b_input_reproduction, hidden.shape[0]) return X, parameters, hidden, hidden1_reproduction, input_reproduction, unrolled
def build_network(input_size, hidden_size): srng = RandomStreams(seed=12345) X = T.fmatrix('X') W_input_to_hidden1 = U.create_shared( U.initial_weights(input_size, hidden_size)) b_hidden1 = U.create_shared(U.initial_weights(hidden_size)) W_hidden1_to_output = U.create_shared(U.initial_weights(hidden_size)) b_output = U.create_shared(U.initial_weights(1)[0]) def network(training): hidden1 = T.dot(X, W_input_to_hidden1) + b_hidden1 hidden1 = hidden1 * (hidden1 > 0) if training: hidden1 = hidden1 * srng.binomial(size=(hidden_size, ), p=0.5) else: hidden1 = 0.5 * hidden1 output = T.nnet.sigmoid(T.dot(hidden1, W_hidden1_to_output) + b_output) return output parameters = [W_input_to_hidden1, b_hidden1, W_hidden1_to_output, b_output] return X, network(True), network(False), parameters
def construct_network(context,characters,hidden,mult_hidden): print "Setting up memory..." X = T.bvector('X') Y = T.bvector('Y') alpha = T.cast(T.fscalar('alpha'),dtype=theano.config.floatX) lr = T.cast(T.fscalar('lr'), dtype=theano.config.floatX) print "Initialising weights..." W_char_hidden = U.create_shared(U.initial_weights(characters,hidden)) f_char_hidden = U.create_shared(U.initial_weights(characters,mult_hidden)) b_hidden = U.create_shared(U.initial_weights(hidden)) Wf_hidden = U.create_shared(U.initial_weights(hidden,mult_hidden)) fW_hidden = U.create_shared(U.initial_weights(mult_hidden,hidden)) W_hidden_predict = U.create_shared(U.initial_weights(hidden,characters)) b_predict = U.create_shared(U.initial_weights(characters)) print "Constructing graph..." hidden = make_hidden( hidden, W_char_hidden[X], f_char_hidden[X], Wf_hidden, fW_hidden, b_hidden ) predictions = T.nnet.softmax(T.dot(hidden,W_hidden_predict) + b_predict) weights = [ W_char_hidden, f_char_hidden, b_hidden, Wf_hidden, fW_hidden, W_hidden_predict, b_predict ] cost = -T.mean(T.log(predictions)[T.arange(Y.shape[0]),Y]) gparams = T.grad(cost,weights) deltas = [ U.create_shared(np.zeros(w.get_value().shape)) for w in weights ] updates = [ ( param, param - ( alpha * delta + gparam * lr ) ) for param,delta,gparam in zip(weights,deltas,gparams) ] + [ ( delta, alpha * delta + gparam * lr) for delta,gparam in zip(deltas,gparams) ] return X,Y,alpha,lr,updates,predictions,weights
def __init__(self,inputs,outputs, lr = 0.1, batch_size = 10, max_epochs = 100000, momentum = 0.5, validation = 0.1, lambda_2 = 0.001, lr_min = 0.1): self.momentum = momentum self.lr = lr self.lr_min = lr_min self.batch_size = batch_size self.validation = validation self.max_epochs = max_epochs self.lambda_2 = lambda_2 self.W = U.create_shared(U.initial_weights(inputs,outputs)) self.W_delta = U.create_shared(np.zeros((inputs,outputs))) self.bias = U.create_shared(np.zeros(outputs)) self.bias_delta = U.create_shared(np.zeros(outputs)) self.tunables = [self.W, self.bias] self.deltas = [self.W_delta, self.bias_delta]
def build_network(input_size,hidden_size): X = T.dmatrix('X') W_input_to_hidden = U.create_shared(U.initial_weights(input_size,hidden_size)) W_hidden_to_hidden = U.create_shared(U.initial_weights(hidden_size,hidden_size)) b_hidden = U.create_shared(U.initial_weights(hidden_size)) # initial_hidden = U.create_shared(U.initial_weights(hidden_size)) initial_hidden = U.create_shared(U.initial_weights(hidden_size)) # W_hidden_to_hidden_reproduction = W_hidden_to_hidden.T#U.create_shared(U.initial_weights(hidden_size,hidden_size)) b_hidden_reproduction = U.create_shared(U.initial_weights(hidden_size)) W_hidden_to_input_reproduction = W_input_to_hidden.T#U.create_shared(U.initial_weights(hidden_size,input_size)) b_input_reproduction = U.create_shared(U.initial_weights(input_size)) parameters = [ W_input_to_hidden, W_hidden_to_hidden, b_hidden, initial_hidden, b_hidden_reproduction, b_input_reproduction, ] hidden, hidden1_reproduction, input_reproduction = make_rae( X, W_input_to_hidden, W_hidden_to_hidden, b_hidden, initial_hidden, b_hidden_reproduction, b_input_reproduction ) unrolled = unroll( hidden[-1], W_input_to_hidden, W_hidden_to_hidden, b_hidden_reproduction, b_input_reproduction, hidden.shape[0] ) return X,parameters,hidden,hidden1_reproduction,input_reproduction,unrolled
def __init__(self,size): super(Recurrent,self).__init__(size) self.W = U.create_shared(U.initial_weights(size,size)) self.h0 = U.create_shared(np.zeros((size,))) self.updates = [self.W]
def sparse_dot(l,prev,values,W): row_data = values[T.arange(prev,prev+l)] row_weights = W[row_data[:,0]] sum_weights = T.sum(row_weights*row_data[:,1].reshape((l,1)),axis=0) return sum_weights,prev+l if __name__ == "__main__": M = [[(1,2),(5,3),(10,1)], [(0,2),(3,1)], [(2,2),(8,4)]] index = T.ivector('index') values = T.imatrix('values') prev = T.iscalar('prev') initial_weights = U.initial_weights(11,3) W = U.create_shared(initial_weights) [output,_],updates = theano.scan( sparse_dot, sequences = index, outputs_info = [None,prev], non_sequences = [values,W] ) f = theano.function( inputs = [index,values,prev], outputs = output ) ind,val = to_sparse_array(M)
import theano import theano.tensor as T import numpy as np import utils as U switch = T.scalar('switch') A = U.create_shared(np.eye(8)) weights = U.create_shared(U.initial_weights(8,3)) hidden = T.nnet.sigmoid(T.dot(A,weights)) recon = T.nnet.softmax(switch*T.dot(hidden,weights.T)) cost = T.sum((A-recon)**2) gradient = T.grad(cost,wrt=weights) updates = [ (weights, weights - gradient) ] print "Compiling..." f = theano.function( inputs = [switch], updates = updates, outputs = cost ) print "Done." for _ in xrange(1000000): print f(0)
outputs = T.mean(T.neq(T.argmax(predictions, axis=1), Y)), updates = updates, givens = { X: data, Y: labels, } ) return train_model if __name__ == '__main__': print "Setting up memory..." X = T.bmatrix('X') Y = T.bvector('Y') Ws_char_to_hidden = [ U.create_shared(U.initial_weights(CHARACTERS,HIDDEN),name='yeah%d'%i) for i in xrange(CONTEXT) ] b_hidden = U.create_shared(U.initial_weights(HIDDEN)) W_hidden_to_hidden = U.create_shared(U.initial_weights(HIDDEN,HIDDEN)) W_hidden_to_predict = U.create_shared(U.initial_weights(HIDDEN,CHARACTERS)) b_predict = U.create_shared(U.initial_weights(CHARACTERS)) tunables = Ws_char_to_hidden + [ b_hidden, W_hidden_to_hidden, W_hidden_to_predict, b_predict ] print "Constructing graph..." hidden_inputs = make_hidden_inputs(X,Ws_char_to_hidden,b_hidden) hidden_outputs = make_hidden_outputs(hidden_inputs,W_hidden_to_hidden) predictions = make_predictions(hidden_outputs,W_hidden_to_predict,b_predict)
import theano import math import utils import theano.tensor as T import numpy as np import utils as U initial_weights = U.initial_weights(8, 3) W = U.create_shared(initial_weights) data = T.imatrix('data') label = T.matrix('label') def construct(bits_set, W): return W[bits_set].sum(axis=0) output, updates = theano.scan(construct, sequences=data, non_sequences=W) cost = T.mean(0.5 * T.sum((output - label)**2, axis=1)) grad = T.grad(cost, wrt=W) x = np.arange(8, dtype=np.int32).reshape(8, 1) y = np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [1, 0, 0], [1, 0, 1], [1, 1, 0], [1, 1, 1]], dtype=np.float32) f = theano.function(inputs=[data, label], outputs=output, updates=[(W, W - 0.5 * grad)])
import utils as U from theano import sparse from scipy.sparse import csr_matrix def shared_sparse(arr): data = arr.data indices = arr.indices indptr = arr.indptr shape = np.array(arr.shape) return sparse.CSR(data,indices,indptr,shape) if __name__ == "__main__": training_data = shared_sparse(csr_matrix(np.eye(100))) #training_labels = pickle.load(open('tags.train.data','r')) W = U.create_shared(U.initial_weights(71165,26920)) out = theano.dot(training_data,W) f = theano.function( inputs = [], outputs = out ) print f()
def construct_network(context,characters,hidden): print "Setting up memory..." X = T.bmatrix('X') Y = T.bvector('Y') zeros = np.zeros(characters,dtype=np.int8) zeros[0] = 1 zeros[1] = 1 alpha = T.cast(T.fscalar('alpha'),dtype=theano.config.floatX) lr = T.cast(T.fscalar('lr'),dtype=theano.config.floatX) Ws_char_to_hidden = [ U.create_shared( U.initial_weights(characters,hidden), name='char[%d]'%i ) for i in xrange(context) ] mat = Ws_char_to_hidden[0].get_value() mat[0] = 0 Ws_char_to_hidden[0].set_value(mat) W_hidden_to_hidden_i = U.create_shared(U.initial_weights(hidden,hidden) + np.eye(hidden)) b_hidden_i = U.create_shared(U.initial_weights(hidden)) W_hidden_to_hidden_o = U.create_shared(U.initial_weights(hidden,hidden) + np.eye(hidden)) b_hidden_o = U.create_shared(U.initial_weights(hidden)) W_hidden_to_predict = U.create_shared(U.initial_weights(hidden,characters)) b_predict = U.create_shared(U.initial_weights(characters)) W_predict_to_hidden = U.create_shared(U.initial_weights(characters,hidden)) gen_weight_mask = U.create_shared(zeros,name='mask') print "Constructing graph..." hidden_inputs = make_char_outputs(X,Ws_char_to_hidden) hidden_outputs,predictions = make_hidden_predict_outputs( hidden,characters, hidden_inputs, gen_weight_mask[X[:,0]], W_hidden_to_hidden_i, b_hidden_i, W_hidden_to_hidden_o, b_hidden_o, W_hidden_to_predict, b_predict, W_predict_to_hidden ) weights = Ws_char_to_hidden + [ W_hidden_to_hidden_i, b_hidden_i, W_hidden_to_hidden_o, b_hidden_o, W_hidden_to_predict, b_predict, W_predict_to_hidden ] cost = -T.mean(T.log(predictions)[T.arange(Y.shape[0]),Y]) gparams = T.grad(cost,weights) deltas = [ U.create_shared(np.zeros(w.get_value().shape)) for w in weights ] updates = [ ( param, param - ( alpha * delta + gparam * lr ) ) for param,delta,gparam in zip(weights,deltas,gparams) ] + [ ( delta, alpha * delta + gparam * lr) for delta,gparam in zip(deltas,gparams) ] return X,Y,alpha,lr,updates,predictions,weights
import theano import theano.tensor as T import numpy as np import utils as U from numpy_hinton import print_arr from theano.printing import Print W1 = U.create_shared(U.initial_weights(10,10)) W2 = U.create_shared(U.initial_weights(10,10)) b = U.create_shared(U.initial_weights(10)) X = T.dmatrix('X') def pair_combine(X): def step(i,inputs): length = inputs.shape[0] next_level = T.dot(inputs[T.arange(0,length-i-1)],W1) + T.dot(inputs[T.arange(1,length-i)],W2) + b next_level = next_level*(next_level > 0) #next_level = inputs[T.arange(0,length-i-1)] + inputs[T.arange(1,length-i)] #next_level = theano.printing.Print('inputs')(next_level) return T.concatenate([next_level,T.zeros_like(inputs[:length-next_level.shape[0]])]) combined,_ = theano.scan( step, sequences = [T.arange(X.shape[0])], outputs_info = [X], n_steps = X.shape[0]-1 ) return combined[-1,0], combined[0][:-1] combined, pairwise = pair_combine(X) f = theano.function( inputs = [X], outputs = [combined,pairwise] )
import theano import math import utils import theano.tensor as T import numpy as np import utils as U initial_weights = U.initial_weights(8,3) W = U.create_shared(initial_weights) data = T.imatrix('data') label = T.matrix('label') def construct(bits_set,W): return W[bits_set].sum(axis=0) output,updates = theano.scan( construct, sequences = data, non_sequences = W ) cost = T.mean(0.5*T.sum((output - label)**2,axis=1)) grad = T.grad(cost,wrt=W) x = np.arange(8,dtype=np.int32).reshape(8,1) y = np.array( [[0,0,0], [0,0,1], [0,1,0], [0,1,1],
import theano import math import pickle import theano.tensor as T import numpy as np import utils as U from theano import sparse from scipy.sparse import csr_matrix def shared_sparse(arr): data = arr.data indices = arr.indices indptr = arr.indptr shape = np.array(arr.shape) return sparse.CSR(data, indices, indptr, shape) if __name__ == "__main__": training_data = shared_sparse(csr_matrix(np.eye(100))) #training_labels = pickle.load(open('tags.train.data','r')) W = U.create_shared(U.initial_weights(71165, 26920)) out = theano.dot(training_data, W) f = theano.function(inputs=[], outputs=out) print f()
row += 1 prev += i return dense def sparse_dot(l, prev, values, W): row_data = values[T.arange(prev, prev + l)] row_weights = W[row_data[:, 0]] sum_weights = T.sum(row_weights * row_data[:, 1].reshape((l, 1)), axis=0) return sum_weights, prev + l if __name__ == "__main__": M = [[(1, 2), (5, 3), (10, 1)], [(0, 2), (3, 1)], [(2, 2), (8, 4)]] index = T.ivector('index') values = T.imatrix('values') prev = T.iscalar('prev') initial_weights = U.initial_weights(11, 3) W = U.create_shared(initial_weights) [output, _], updates = theano.scan(sparse_dot, sequences=index, outputs_info=[None, prev], non_sequences=[values, W]) f = theano.function(inputs=[index, values, prev], outputs=output) ind, val = to_sparse_array(M) print ind, val print f(ind, val, 0)
import theano import theano.tensor as T import numpy as np import utils as U from numpy_hinton import print_arr from theano.printing import Print W1 = U.create_shared(U.initial_weights(10, 10)) W2 = U.create_shared(U.initial_weights(10, 10)) b = U.create_shared(U.initial_weights(10)) X = T.dmatrix('X') def pair_combine(X): def step(i, inputs): length = inputs.shape[0] next_level = T.dot(inputs[T.arange(0, length - i - 1)], W1) + T.dot( inputs[T.arange(1, length - i)], W2) + b next_level = next_level * (next_level > 0) #next_level = inputs[T.arange(0,length-i-1)] + inputs[T.arange(1,length-i)] #next_level = theano.printing.Print('inputs')(next_level) return T.concatenate( [next_level, T.zeros_like(inputs[:length - next_level.shape[0]])]) combined, _ = theano.scan(step, sequences=[T.arange(X.shape[0])], outputs_info=[X], n_steps=X.shape[0] - 1) return combined[-1, 0], combined[0][:-1]
import theano import theano.tensor as T import numpy as np import utils as U switch = T.scalar('switch') A = U.create_shared(np.eye(8)) weights = U.create_shared(U.initial_weights(8, 3)) hidden = T.nnet.sigmoid(T.dot(A, weights)) recon = T.nnet.softmax(switch * T.dot(hidden, weights.T)) cost = T.sum((A - recon)**2) gradient = T.grad(cost, wrt=weights) updates = [(weights, weights - gradient)] print "Compiling..." f = theano.function(inputs=[switch], updates=updates, outputs=cost) print "Done." for _ in xrange(1000000): print f(0)