mu = T.dscalar('mu') train = theano.function( inputs = [X,eps,mu], updates = updates.adadelta(parameters,gradients,mu,eps), outputs = error ) #example = np.vstack((np.eye(8),np.eye(8))) example = np.eye(8) error = 10 lr = 0.0001 t = 0 while error > 0.0001: np.random.shuffle(example) #error = train(example,lr,min(1 - 3.0/(t+5),0.999)) error = train(example,1e-6,0.95) #error = train(example,lr,0) print error t += 1 np.random.shuffle(example) hidden, hidden_rep, input_rep, unrlld = f(example) print_arr(example) print_arr(unrlld) print_arr(parameters[1].get_value()) # print_arr(unrlld,hidden)
hidden = T.nnet.sigmoid(T.dot(X, W_input_to_hidden)) output = T.nnet.softmax(T.dot(hidden, W_input_to_hidden.T) + b_output) parameters = [W_input_to_hidden, b_output] return X, output, parameters def build_error(X, output, params): return T.mean((X - output)**2) + sum(0.0001 * T.sum(p**2) for p in params) if __name__ == '__main__': X, output, parameters = build_network(8, 3) error = build_error(X, output, parameters) grads = T.grad(error, wrt=parameters) updates = [(W, W - grad) for W, grad in zip(parameters, grads)] train = theano.function(inputs=[X], outputs=error, updates=updates) test = theano.function( inputs=[X], outputs=output, ) data = np.eye(8, dtype=np.int32) # data = np.vstack((data,)) for _ in xrange(100000): np.random.shuffle(data) print train(data) print_arr(test(np.eye(8, dtype=np.int32))) print_arr(1 / (1 + np.exp(-parameters[0].get_value())), 1)
W1 = U.create_shared(U.initial_weights(10,10)) W2 = U.create_shared(U.initial_weights(10,10)) b = U.create_shared(U.initial_weights(10)) X = T.dmatrix('X') def pair_combine(X): def step(i,inputs): length = inputs.shape[0] next_level = T.dot(inputs[T.arange(0,length-i-1)],W1) + T.dot(inputs[T.arange(1,length-i)],W2) + b next_level = next_level*(next_level > 0) #next_level = inputs[T.arange(0,length-i-1)] + inputs[T.arange(1,length-i)] #next_level = theano.printing.Print('inputs')(next_level) return T.concatenate([next_level,T.zeros_like(inputs[:length-next_level.shape[0]])]) combined,_ = theano.scan( step, sequences = [T.arange(X.shape[0])], outputs_info = [X], n_steps = X.shape[0]-1 ) return combined[-1,0], combined[0][:-1] combined, pairwise = pair_combine(X) f = theano.function( inputs = [X], outputs = [combined,pairwise] ) c,p = f(np.eye(10,dtype=np.float64)) print_arr(c) print_arr(p)
W1 = U.create_shared(U.initial_weights(10, 10)) W2 = U.create_shared(U.initial_weights(10, 10)) b = U.create_shared(U.initial_weights(10)) X = T.dmatrix('X') def pair_combine(X): def step(i, inputs): length = inputs.shape[0] next_level = T.dot(inputs[T.arange(0, length - i - 1)], W1) + T.dot( inputs[T.arange(1, length - i)], W2) + b next_level = next_level * (next_level > 0) #next_level = inputs[T.arange(0,length-i-1)] + inputs[T.arange(1,length-i)] #next_level = theano.printing.Print('inputs')(next_level) return T.concatenate( [next_level, T.zeros_like(inputs[:length - next_level.shape[0]])]) combined, _ = theano.scan(step, sequences=[T.arange(X.shape[0])], outputs_info=[X], n_steps=X.shape[0] - 1) return combined[-1, 0], combined[0][:-1] combined, pairwise = pair_combine(X) f = theano.function(inputs=[X], outputs=[combined, pairwise]) c, p = f(np.eye(10, dtype=np.float64)) print_arr(c) print_arr(p)
(hidden[:-1] - hidden1_reproduction[1:])**2) return input_reproduction_sqerror + hidden_reproduction_sqerror if __name__ == '__main__': X, parameters, hidden, hidden1_reproduction, input_reproduction, unrolled = build_network( 8, 24) f = theano.function( inputs=[X], outputs=[hidden, hidden1_reproduction, input_reproduction, unrolled]) error = build_error(X, hidden, hidden1_reproduction, input_reproduction) gradients = T.grad(error, wrt=parameters) updates = [(p, p - 0.5 * g) for p, g in zip(parameters, gradients)] train = theano.function(inputs=[X], updates=updates, outputs=error) example = np.eye(8) for _ in xrange(50000): np.random.shuffle(example) print train(example) np.random.shuffle(example) hidden, hidden_rep, input_rep, unrlld = f(example) print_arr(hidden) print_arr(example) print_arr(unrlld) # print_arr(unrlld,hidden) # print_arr(parameters[4].get_value())
X,parameters,hidden,hidden1_reproduction,input_reproduction,unrolled = build_network(8,24) f = theano.function( inputs = [X], outputs = [hidden,hidden1_reproduction,input_reproduction,unrolled] ) error = build_error(X,hidden,hidden1_reproduction,input_reproduction) gradients = T.grad(error,wrt=parameters) updates = [ (p, p - 0.5*g) for p,g in zip(parameters,gradients) ] train = theano.function( inputs = [X], updates = updates, outputs = error ) example = np.eye(8) for _ in xrange(50000): np.random.shuffle(example) print train(example) np.random.shuffle(example) hidden, hidden_rep, input_rep, unrlld = f(example) print_arr(hidden) print_arr(example) print_arr(unrlld) # print_arr(unrlld,hidden) # print_arr(parameters[4].get_value())
parameters = [W_input_to_hidden,b_output] return X,output,parameters def build_error(X,output,params): return T.mean((X - output)**2) + sum(0.0001*T.sum(p**2) for p in params) if __name__ == '__main__': X,output,parameters = build_network(8,3) error = build_error(X,output,parameters) grads = T.grad(error,wrt=parameters) updates = [ (W,W-grad) for W,grad in zip(parameters,grads) ] train = theano.function( inputs=[X], outputs=error, updates=updates ) test = theano.function( inputs=[X], outputs=output, ) data = np.eye(8,dtype=np.int32) # data = np.vstack((data,)) for _ in xrange(100000): np.random.shuffle(data) print train(data) print_arr(test(np.eye(8,dtype=np.int32))) print_arr(1/(1 + np.exp(-parameters[0].get_value())),1)