def online_mlp_784_500_10(): HUs=500 w = shared(rand(HUs, inputs) * numpy.sqrt(6 / (inputs + HUs))) b = shared(zeros(HUs)) v = shared(zeros(outputs,HUs)) c = shared(zeros(outputs)) si = shared(0) # current training example index sx = data_x[si] sy = data_y[si] nll, p_y_given_x, _argmax = crossentropy_softmax_argmax_1hot_with_bias( dot(tanh(dot(sx, w.T)+b), v.T).dimshuffle('x', 0), c, sy.dimshuffle('x')) cost = nll.mean() gw, gb, gv, gc = grad(cost, [w, b, v, c]) train = function([], [], updates={ w:w - lr * gw, b:b - lr * gb, v:v - lr * gv, c:c - lr * gc, si: (si + 1) % n_examples}) theano.printing.debugprint(train, file=open('foo_train', 'wb')) t = time.time() train.fn(n_calls=n_examples) dt = time.time() - t try: train.fn.update_profile(train.profile) except AttributeError: pass reportmodel('mlp_784_500_10_hack', 1, dt)
def online_mlp_784_500_10(): assert False, "This is old stuff not up to date that you probably don't need" HUs = 500 w = shared(rand(HUs, inputs) * numpy.sqrt(6 / (inputs + HUs))) b = shared(zeros(HUs)) v = shared(zeros(outputs, HUs)) c = shared(zeros(outputs)) si = shared(0) # current training example index sx = data_x[si] sy = data_y[si] nll, p_y_given_x, _argmax = crossentropy_softmax_argmax_1hot_with_bias( dot(tanh(dot(sx, w.T) + b), v.T).dimshuffle('x', 0), c, sy.dimshuffle('x')) cost = nll.mean() gw, gb, gv, gc = grad(cost, [w, b, v, c]) train = function( [], [], updates={ w: w - lr * gw, b: b - lr * gb, v: v - lr * gv, c: c - lr * gc, si: (si + 1) % n_examples }) #theano.printing.debugprint(train, file=open('foo_train', 'wb')) GlobalBenchReporter.simple_eval_model(train, "mlp_784_500_10_hack") try: train.fn.update_profile(train.profile) except AttributeError: pass
def online_mlp_784_1000_1000_1000_10(): assert False, "This is old stuff not up to date that you probably don't need" w0 = shared(rand(inputs, 1000) * numpy.sqrt(6 / (inputs + 1000))) b0 = shared(zeros(1000)) w1 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000))) b1 = shared(zeros(1000)) w2 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000))) b2 = shared(zeros(1000)) v = shared(zeros(1000, outputs)) c = shared(zeros(outputs)) params = [w0, b0, w1, b1, w2, b2, v, c] si = shared(0) # current training example index sx = data_x[si] sy = data_y[si] h0 = tanh(dot(sx, w0) + b0) h1 = tanh(dot(h0, w1) + b1) h2 = tanh(dot(h1, w2) + b2) nll, p_y_given_x, _argmax = crossentropy_softmax_argmax_1hot_with_bias( dot(h2, v).dimshuffle('x', 0), c, sy.dimshuffle('x')) cost = nll.mean() gparams = grad(cost, params) updates = [(p, p - lr * gp) for p, gp in zip(params, gparams)] updates += [(si, (si + 1) % n_examples)] train = function([], [], updates=updates) #theano.printing.debugprint(train, file=open('foo_train', 'wb')) GlobalBenchReporter.simple_eval_model(train, "mlp_784_1000_1000_1000_10_hack") try: train.fn.update_profile(train.profile) except AttributeError: pass
def online_mlp_784_500_10(): assert False, "This is old stuff not up to date that you probably don't need" HUs = 500 w = shared(rand(HUs, inputs) * numpy.sqrt(6 / (inputs + HUs))) b = shared(zeros(HUs)) v = shared(zeros(outputs, HUs)) c = shared(zeros(outputs)) si = shared(0) # current training example index sx = data_x[si] sy = data_y[si] nll, p_y_given_x, _argmax = crossentropy_softmax_argmax_1hot_with_bias( dot(tanh(dot(sx, w.T) + b), v.T).dimshuffle('x', 0), c, sy.dimshuffle('x')) cost = nll.mean() gw, gb, gv, gc = grad(cost, [w, b, v, c]) train = function([], [], updates={ w: w - lr * gw, b: b - lr * gb, v: v - lr * gv, c: c - lr * gc, si: (si + 1) % n_examples}) #theano.printing.debugprint(train, file=open('foo_train', 'wb')) GlobalBenchReporter.simple_eval_model(train, "mlp_784_500_10_hack") try: train.fn.update_profile(train.profile) except AttributeError: pass
def test_bug_2009_07_17_borrowed_output(): """Regression test for a bug where output was borrowed by mistake.""" a = theano.tensor.dmatrix() b = theano.tensor.dmatrix() # The output should *NOT* be borrowed. g = theano.function([a, b], theano.Out(theano.tensor.dot(a, b), borrow=False)) x = numpy.zeros((1, 2)) y = numpy.ones((2, 5)) z = g(x, y) print z # Should be zero. x.fill(1) print g(x, y) # Should be non-zero. print z # Should still be zero. assert numpy.linalg.norm(z) == 0 # The code above was supposed to fail when it was written (or, more # accurately, on the next revision, i.e. when it was merged with the # rest of the code, i.e. on revision cac9c9e9f08e). # However, for some reason, it does not fail anymore when at this revision. # Thus, a new test (below) was added that exhibits the same issue. Note # that it may better be moved into the test_nnet.py test file if it turns # out the bug was caused by 'crossentropy_softmax_argmax_1hot_with_bias', # and was not a more general issue. test_output_activation_no_bias = theano.tensor.dmatrix() test_b2 = theano.tensor.dvector() test_target = theano.tensor.ivector() nll_softmax_argmax = ( crossentropy_softmax_argmax_1hot_with_bias( test_output_activation_no_bias, test_b2, test_target)) output = nll_softmax_argmax[1] g = theano.function([test_output_activation_no_bias, test_b2, test_target], theano.Out(output, borrow=False)) a = numpy.zeros((1, 5)) b = numpy.ones(5) c = numpy.zeros(1, dtype=numpy.int32) z = g(a, b, c) z_backup = copy.copy(z) id_z = id(z) print('Output z after first call: %s' % (z, )) a[0, 0] = 1 id_other = id(g(a, b, c)) print ('Output z after second call: %s' % (z, )) # Ensure that calling the function again returns a pointer towards a new # array. assert id_z != id_other # Just to be 100% sure, ensure that z was not altered. assert (z == z_backup).all()
def test_bug_2009_07_17_borrowed_output(): # Regression test for a bug where output was borrowed by mistake. a = theano.tensor.dmatrix() b = theano.tensor.dmatrix() # The output should *NOT* be borrowed. g = theano.function([a, b], theano.Out(theano.tensor.dot(a, b), borrow=False)) x = np.zeros((1, 2)) y = np.ones((2, 5)) z = g(x, y) print(z) # Should be zero. x.fill(1) print(g(x, y)) # Should be non-zero. print(z) # Should still be zero. assert np.linalg.norm(z) == 0 # The code above was supposed to fail when it was written (or, more # accurately, on the next revision, i.e. when it was merged with the # rest of the code, i.e. on revision cac9c9e9f08e). # However, for some reason, it does not fail anymore when at this revision. # Thus, a new test (below) was added that exhibits the same issue. Note # that it may better be moved into the test_nnet.py test file if it turns # out the bug was caused by 'crossentropy_softmax_argmax_1hot_with_bias', # and was not a more general issue. test_output_activation_no_bias = theano.tensor.dmatrix() test_b2 = theano.tensor.dvector() test_target = theano.tensor.ivector() nll_softmax_argmax = ( crossentropy_softmax_argmax_1hot_with_bias( test_output_activation_no_bias, test_b2, test_target)) output = nll_softmax_argmax[1] g = theano.function([test_output_activation_no_bias, test_b2, test_target], theano.Out(output, borrow=False)) a = np.zeros((1, 5)) b = np.ones(5) c = np.zeros(1, dtype=np.int32) z = g(a, b, c) z_backup = copy.copy(z) id_z = id(z) print(('Output z after first call: %s' % (z, ))) a[0, 0] = 1 id_other = id(g(a, b, c)) print(('Output z after second call: %s' % (z, ))) # Ensure that calling the function again returns a pointer towards a new # array. assert id_z != id_other # Just to be 100% sure, ensure that z was not altered. assert (z == z_backup).all()
def online_mlp_784_10(): v = shared(zeros(outputs, inputs)) c = shared(zeros(outputs)) si = shared(0) # current training example index sx = data_x[si] sy = data_y[si] nll, p_y_given_x, _argmax = crossentropy_softmax_argmax_1hot_with_bias( dot(sx, v.T).dimshuffle('x', 0), c, sy.dimshuffle('x')) cost = nll.mean() gv, gc = grad(cost, [v, c]) train = function([], [], updates={ v:v - lr * gv, c:c - lr * gc, si: (si + 1) % n_examples}) theano.printing.debugprint(train, file=open('foo_train', 'wb')) t = time.time() train.fn(n_calls=n_examples) dt = time.time() - t try: train.fn.update_profile(train.profile) except AttributeError: pass reportmodel('mlp_784_10_hack', 1, dt) if 1: t = time.time() for i in xrange(n_examples): train() dt = time.time() - t reportmodel('mlp_784_10_hack2', 1, dt) if 1: t = time.time() fn = train.fn for i in xrange(n_examples): fn() dt = time.time() - t reportmodel('mlp_784_10_hack3', 1, dt)
def online_mlp_784_10(): assert False, "This is old stuff not up to date that you probably don't need" v = shared(zeros(outputs, inputs)) c = shared(zeros(outputs)) si = shared(0) # current training example index sx = data_x[si] sy = data_y[si] nll, p_y_given_x, _argmax = crossentropy_softmax_argmax_1hot_with_bias( dot(sx, v.T).dimshuffle('x', 0), c, sy.dimshuffle('x')) cost = nll.mean() gv, gc = grad(cost, [v, c]) train = function([], [], updates={ v: v - lr * gv, c: c - lr * gc, si: (si + 1) % n_examples}) #theano.printing.debugprint(train, file=open('foo_train', 'wb')) GlobalBenchReporter.simple_eval_model(train, 'mlp_784_10_hack') try: train.fn.update_profile(train.profile) except AttributeError: pass if 1: t = time.time() for i in xrange(n_examples): train() dt = time.time() - t reportmodel('mlp_784_10_hack2', 1, dt) if 1: t = time.time() fn = train.fn for i in xrange(n_examples): fn() dt = time.time() - t reportmodel('mlp_784_10_hack3', 1, dt)
def online_mlp_784_10(): assert False, "This is old stuff not up to date that you probably don't need" v = shared(zeros(outputs, inputs)) c = shared(zeros(outputs)) si = shared(0) # current training example index sx = data_x[si] sy = data_y[si] nll, p_y_given_x, _argmax = crossentropy_softmax_argmax_1hot_with_bias( dot(sx, v.T).dimshuffle('x', 0), c, sy.dimshuffle('x')) cost = nll.mean() gv, gc = grad(cost, [v, c]) train = function([], [], updates={ v: v - lr * gv, c: c - lr * gc, si: (si + 1) % n_examples }) #theano.printing.debugprint(train, file=open('foo_train', 'wb')) GlobalBenchReporter.simple_eval_model(train, 'mlp_784_10_hack') try: train.fn.update_profile(train.profile) except AttributeError: pass if 1: t = time.time() for i in xrange(n_examples): train() dt = time.time() - t reportmodel('mlp_784_10_hack2', 1, dt) if 1: t = time.time() fn = train.fn for i in xrange(n_examples): fn() dt = time.time() - t reportmodel('mlp_784_10_hack3', 1, dt)
def online_mlp_784_1000_1000_1000_10(): w0 = shared(rand(inputs, 1000) * numpy.sqrt(6 / (inputs + 1000))) b0 = shared(zeros(1000)) w1 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000+1000))) b1 = shared(zeros(1000)) w2 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000+1000))) b2 = shared(zeros(1000)) v = shared(zeros(1000, outputs)) c = shared(zeros(outputs)) params=[w0,b0,w1,b1,w2,b2,v,c] si = shared(0) # current training example index sx = data_x[si] sy = data_y[si] h0 = tanh(dot(sx, w0)+b0) h1 = tanh(dot(h0, w1)+b1) h2 = tanh(dot(h1, w2)+b2) nll, p_y_given_x, _argmax = crossentropy_softmax_argmax_1hot_with_bias( dot(h2, v).dimshuffle('x', 0), c, sy.dimshuffle('x')) cost = nll.mean() gparams = grad(cost, params) updates = [(p,p-lr*gp) for p,gp in zip(params, gparams)] updates += [(si, (si + 1) % n_examples)] train = function([], [], updates=updates) theano.printing.debugprint(train, file=open('foo_train', 'wb')) t = time.time() train.fn(n_calls=n_examples) dt = time.time() - t try: train.fn.update_profile(train.profile) except AttributeError: pass reportmodel('mlp_784_1000_1000_1000_10_hack', 1, dt)
w2 = TT.dmatrix() b2 = TT.dvector() from theano.tensor.nnet import crossentropy_softmax_argmax_1hot_with_bias from theano.compile.function_module import function xw1 = theano.dot(w1.T, x.T).T h = ACTIVATION_FUNCTION(xw1 + b1) if HLAYERS == 2: xwh = theano.dot(wh.T, h.T).T h = ACTIVATION_FUNCTION(xwh + bh) #zero = tensor.zeros_like(x[0,:]) (kl, softmax, argmax) = crossentropy_softmax_argmax_1hot_with_bias(theano.dot(h, w2), b2, targety) if HLAYERS == 2: validatefn = function([x, targety, w1, b1, wh, bh, w2, b2], [kl, softmax, argmax, xw1, xwh], mode=COMPILE_MODE) (gw1, gb1, gwh, gbh, gw2, gb2) = TT.grad(kl, [w1, b1, wh, bh, w2, b2]) trainfn = function([x, targety, w1, b1, wh, bh, w2, b2], [ kl, softmax, argmax, xw1, xwh, theano.compile.io.Out(gw1, borrow=True), gb1, gwh, gbh, gw2, gb2 ], mode=COMPILE_MODE) else: validatefn = function([x, targety, w1, b1, w2, b2], [kl, softmax, argmax, xw1], mode=COMPILE_MODE)
b2 = TT.dvector() from theano.tensor.nnet import crossentropy_softmax_argmax_1hot_with_bias from theano.compile.function_module import function xw1 = theano.dot(w1.T, x.T).T h = ACTIVATION_FUNCTION(xw1 + b1) if HLAYERS == 2: xwh = theano.dot(wh.T, h.T).T h = ACTIVATION_FUNCTION(xwh + bh) #zero = tensor.zeros_like(x[0,:]) if HYPERPARAMETERS["locally normalize"]: (kl, softmax, argmax) = crossentropy_softmax_argmax_1hot_with_bias(theano.dot(h, w2), b2, targety) else: prey = theano.dot(h, w2) + b2 softmax = nnet.sigmoid(prey) kl = -TT.mean(TT.sum(targety * TT.log(softmax) + (1 - targety) * TT.log(1 - softmax), axis=1), axis=0) argmax = TT.argmax(softmax) if HLAYERS == 2: validatefn = function([x, targety, w1, b1, wh, bh, w2, b2], [kl, softmax, argmax, xw1, xwh], mode=COMPILE_MODE) (gw1, gb1, gwh, gbh, gw2, gb2) = TT.grad(kl, [w1, b1, wh, bh, w2, b2]) trainfn = function([x, targety, w1, b1, wh, bh, w2, b2], [kl, softmax, argmax, xw1, xwh, theano.compile.io.Out(gw1, borrow = True), gb1, gwh, gbh, gw2, gb2], mode=COMPILE_MODE) else: validatefn = function([x, targety, w1, b1, w2, b2], [kl, softmax, argmax, xw1], mode=COMPILE_MODE) (gw1, gb1, gw2, gb2) = TT.grad(kl, [w1, b1, w2, b2]) trainfn = function([x, targety, w1, b1, w2, b2], [kl, softmax, argmax, xw1, theano.compile.io.Out(gw1, borrow = True), gb1, gw2, gb2], mode=COMPILE_MODE)