Пример #1
0
    def test_using_gpu_3(self):

        if theano.config.device.find('gpu') > -1:

            from theano import function, config, shared, sandbox, Out
            import theano.tensor as T
            import numpy
            import time

            vlen = 10 * 30 * 70  # 10 x #cores x # threads per core
            iters = 10

            rng = numpy.random.RandomState(22)
            x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
            f = function([],
                         Out(sandbox.cuda.basic_ops.gpu_from_host(T.exp(x)),
                             borrow=True))
            t0 = time.time()
            for i in xrange(iters):
                r = f()
            print 'Looping %d times took' % iters, time.time() - t0, 'seconds'
            print 'Result is', r
            print 'Numpy result is', numpy.asarray(r)
            if numpy.any(
                [isinstance(x.op, T.Elemwise)
                 for x in f.maker.env.toposort()]):
                print 'Used the cpu'
            else:
                print 'Used the gpu'

            assert not numpy.any(
                [isinstance(x.op, T.Elemwise) for x in f.maker.env.toposort()])
Пример #2
0
def benchmark_shared_cpu():
    vlen = 10 * 30 * 700
    iters = 1000

    rng = np.random.RandomState(22)
    x = shared(np.asarray(rng.rand(vlen), config.floatX))
    f1 = function(
        [],
        tensor.exp(x)
    )
    f2 = function(
        [],
        Out(
            tensor.exp(x),
            borrow=True
        )
    )


    t0 = time.time()
    for i in xrange(iters):
        r = f1()
    t1 = time.time()
    no_borrow = t1 - t0
    t0 = time.time()
    for i in xrange(iters):
        r = f2()
    t1 = time.time()

    print('Looping', iters, 'times took', no_borrow, 'seconds without borrow', end='')
    print('and', t1 - t0, 'seconds with borrow.')
Пример #3
0
def makeFunc(inList, outList, updates):
    inputs = []
    for i in inList:
        inputs.append(In(i, borrow=True, allow_downcast=True))
    outputs = []
    for o in outList:
        outputs.append(Out(o, borrow=True))
    return function(inputs=inputs,
                    outputs=outputs,
                    updates=updates,
                    allow_input_downcast=True)
Пример #4
0
P0 = float32((1.0/alpha)*eye(nRec2Out))

x = shared(x0)
r = shared(tanh(x0))
z = shared(z0)
P = shared(P0)
dts = shared(dt)
wo = shared(zeros((nRec2Out, 1), dtype=float32))
wf = shared(float32(2.0*(random.rand(N, 1))-0.5))
fti = T.scalar('fti')
Ms = shared(M)

xnew = (1.0 - dts) * x + T.dot(Ms, r * dts) + wf * dts * z[0][0]
znew = T.dot(T.transpose(wo), r)
update = function([],[Out(z, borrow=True)], updates=[(x, xnew), (r, T.tanh(x)), (z, znew)])

print "Update compiled"

k = T.dot(P, r)
rPr = T.dot(T.transpose(r), k)
c = 1.0/(1.0 + rPr)
Pnew = P  - T.dot(k, k.T)  * c[0][0]
wonew = wo - (z[0][0] - fti) * k * c[0][0]
learn = function([fti],[Out(wo, borrow=True)],updates=[(P, Pnew), (wo, wonew)])

print "Learn compiled"

amp = 1.3
freq = 1/60.0
ft = (amp/1.0)*sin(1.0*math.pi*freq*simtime) + \
Пример #5
0
from theano import function, config, shared, sandbox, Out
import theano.tensor as T
import numpy
import time

vlen = 10 * 30 * 768  # 10 x # cores x # threads per core
iters = 1000

rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([],
        Out(sandbox.cuda.basic_ops.gpu_from_host(T.exp(x)),
            borrow=True))
print f.maker.fgraph.toposort()
t0 = time.time()
for i in xrange(iters):
    r = f()
t1 = time.time()
print 'Looping %d times took' % iters, t1 - t0, 'seconds'
print 'Result is', r
print 'Numpy result is', numpy.asarray(r)
if numpy.any([isinstance(x.op, T.Elemwise) for x in f.maker.fgraph.toposort()]):
    print 'Used the cpu'
else:
    print 'Used the gpu'

この例では、0.05秒強で実行が終了し、CPUでの実装に比べて60倍以上の向上
になっている。

borrowフラグをFalseに設定:
Пример #6
0
def compileModel(data,
                 nInputs,
                 nOutputs,
                 hiddenLayersSize=[1200, 1200],
                 dropoutRates=[0.2, 0.5, 0.5],
                 activation='relu',
                 weightInitMode='normal',
                 regularizer=0.0001):
    """
    Creates a symbolic model given the specified parameters using Theano
    
    Output:
    A list containing three the training, validation and test compiled functions of Theano
    """

    np.random.seed(815)

    x = T.matrix('x')
    y = T.wvector('y')
    learningRate = T.scalar('learningRate')
    regularization = T.scalar('regularization')

    #Data sets
    train_x, train_y = data[0]
    valid_x, valid_y = data[1]
    test_x, test_y = data[2]

    nnet = MLP(x,
               nInputs,
               hiddenLayersSize,
               nOutputs,
               dropoutRates=dropoutRates,
               activation=activation,
               weightInitMode=weightInitMode)

    loss = nnet.loss(y, regularization)
    error = nnet.error(y)

    gParams = T.grad(loss, nnet.params)

    weightUpdates = [(param, param - learningRate * gParam)
                     for param, gParam in zip(nnet.params, gParams)]

    batchIndicesVecctor = T.ivector('batchIndicesVecctor')
    trainF = function([batchIndicesVecctor, learningRate, regularization],
                      Out(sbasic.gpu_from_host(loss), borrow=True),
                      updates=weightUpdates,
                      givens={
                          x: train_x[batchIndicesVecctor],
                          y: train_y[batchIndicesVecctor]
                      })
    validF = function([batchIndicesVecctor],
                      Out(sbasic.gpu_from_host(T.cast(error, T.config.floatX)),
                          borrow=True),
                      givens={
                          x: valid_x[batchIndicesVecctor],
                          y: valid_y[batchIndicesVecctor]
                      })
    testF = function([batchIndicesVecctor],
                     Out(sbasic.gpu_from_host(T.cast(error, T.config.floatX)),
                         borrow=True),
                     givens={
                         x: test_x[batchIndicesVecctor],
                         y: test_y[batchIndicesVecctor]
                     })

    return [trainF, validF, testF]
x = shared(x0)
r = shared(tanh(x0))
z = shared(z0)
P = shared(P0)
dts = shared(dt)
wo = shared(zeros((nRec2Out, 1), dtype=float32))
wf = shared(float32(2.0 * (random.rand(N, 1)) - 0.5))
fti = T.scalar('fti')
Ms = shared(M)
Mins = shared(Min)
I = T.matrix('I')

xnew = (1.0 - dts) * x + T.dot(Ms, r * dts) + T.dot(Mins, I)
znew = T.dot(T.transpose(wo), r)
update = function([I], [Out(z, borrow=True)],
                  updates=[(x, xnew), (r, T.tanh(x)), (z, znew)],
                  mode='PROFILE_MODE')

print "Update compiled"

k = T.dot(P, r)
rPr = T.dot(T.transpose(r), k)
c = 1.0 / (1.0 + rPr)
Pnew = P - T.dot(k, k.T) * c[0][0]
dw = (z[0][0] - fti) * k * c[0][0]
wonew = wo - dw
Mnew = Ms + T.tile(dw.T, (N, 1))
learn = function([fti], [Out(wo, borrow=True)],
                 updates=[(P, Pnew), (wo, wonew), (Ms, Mnew)],
                 mode='PROFILE_MODE')