Пример #1
0
def main(dev1, dev2):
    init_dev(dev1, 'ctx1')
    init_dev(dev2, 'ctx2')

    val1a = shared(numpy.random.randn(1024, 1024).astype('float32'),
                   context_name='ctx1')
    val1b = shared(numpy.random.randn(1024, 1024).astype('float32'),
                   context_name='ctx1')
    val1c = shared(numpy.random.randn(1024, 1024).astype('float32'),
                   context_name='ctx1')
    val1d = shared(numpy.random.randn(1024, 1024).astype('float32'),
                   context_name='ctx1')

    val2a = shared(numpy.random.randn(1024, 1024).astype('float32'),
                   context_name='ctx2')
    val2b = shared(numpy.random.randn(1024, 1024).astype('float32'),
                   context_name='ctx2')

    f1 = theano.function([],
                         [gpu_dot22(val1a, val1b),
                          gpu_dot22(val1c, val1d)])
    f2 = theano.function([],
                         [gpu_dot22(val1a, val1b),
                          gpu_dot22(val2a, val2b)])

    r = f1()
    r[0].sync(), r[1].sync()
    r = None
    t = time.time()
    r = f1()
    r[0].sync(), r[1].sync()
    t2 = time.time()
    r = None

    print("one ctx %f" % (t2 - t, ))

    r = f2()
    r[0].sync(), r[1].sync()
    r = None
    t = time.time()
    r = f2()
    r[0].sync(), r[1].sync()
    t2 = time.time()
    r = None

    print("two ctx %f" % (t2 - t, ))
Пример #2
0
def main(dev1, dev2):
    init_dev(dev1, 'ctx1')
    init_dev(dev2, 'ctx2')

    val1a = shared(numpy.random.randn(1024, 1024).astype('float32'),
                   context_name='ctx1')
    val1b = shared(numpy.random.randn(1024, 1024).astype('float32'),
                   context_name='ctx1')
    val1c = shared(numpy.random.randn(1024, 1024).astype('float32'),
                   context_name='ctx1')
    val1d = shared(numpy.random.randn(1024, 1024).astype('float32'),
                   context_name='ctx1')

    val2a = shared(numpy.random.randn(1024, 1024).astype('float32'),
                   context_name='ctx2')
    val2b = shared(numpy.random.randn(1024, 1024).astype('float32'),
                   context_name='ctx2')

    f1 = theano.function([], [gpu_dot22(val1a, val1b),
                              gpu_dot22(val1c, val1d)])
    f2 = theano.function([], [gpu_dot22(val1a, val1b),
                              gpu_dot22(val2a, val2b)])

    r = f1()
    r[0].sync(), r[1].sync()
    r = None
    t = time.time()
    r = f1()
    r[0].sync(), r[1].sync()
    t2 = time.time()
    r = None

    print("one ctx %f" % (t2 - t,))

    r = f2()
    r[0].sync(), r[1].sync()
    r = None
    t = time.time()
    r = f2()
    r[0].sync(), r[1].sync()
    t2 = time.time()
    r = None

    print("two ctx %f" % (t2 - t,))
Пример #3
0
def main(dev1, dev2):
    init_dev(dev1, 'ctx1')
    init_dev(dev2, 'ctx2')

    size = 1024 * 16
    data = numpy.random.randn(size, size).astype('float32')
    val1a = theano.shared(data, target='ctx1')
    val1b = theano.shared(data, target='ctx1')
    val1c = theano.shared(data, target='ctx1')
    val1d = theano.shared(data, target='ctx1')

    val2a = theano.shared(data, target='ctx2')
    val2b = theano.shared(data, target='ctx2')

    f1 = theano.function([],
                         [gpu_dot22(val1a, val1b),
                          gpu_dot22(val1c, val1d)])
    f2 = theano.function([],
                         [gpu_dot22(val1a, val1b),
                          gpu_dot22(val2a, val2b)])
    f3 = theano.function([], [gpu_dot22(val1a, val1b)])
    f4 = theano.function([], [gpu_dot22(val2a, val2b)])
    f5 = theano.function([], [gpu_dot22(val1a, val1b)[0, 0].transfer('cpu')])
    f6 = theano.function([], [gpu_dot22(val2a, val2b)[0, 0].transfer('cpu')])

    # pre-execute to load code to GPU.
    r = f1.fn()
    r[0].sync(), r[1].sync()
    r = f2.fn()
    r[0].sync(), r[1].sync()
    r = f3.fn()
    r[0].sync()
    r = f4.fn()
    r[0].sync()
    r = f5.fn()
    r = f6.fn()
    r = None

    t = time.time()
    r = f1.fn()
    r[0].sync(), r[1].sync()
    t2 = time.time()
    r = None

    print("one ctx async %f" % (t2 - t, ))

    t = time.time()
    r = f2.fn()
    r[0].sync(), r[1].sync()
    t2 = time.time()
    r = None

    print("two ctx async %f" % (t2 - t, ))

    t = time.time()
    r = f3.fn()
    r2 = f4.fn()
    r[0].sync()
    r2[0].sync()
    t2 = time.time()
    r = None

    print("two ctx, 2 fct async %f" % (t2 - t, ))

    t = time.time()
    r = f5.fn()
    r2 = f6.fn()
    t2 = time.time()
    r = None
    print("two ctx, 2 fct with transfer %f" % (t2 - t, ))

    # Multi-thread version
    class myThread(threading.Thread):
        def __init__(self, name, f, sync):
            threading.Thread.__init__(self)
            self.f = f
            self.name = name
            self.sync = sync

        def run(self):
            # print "Starting " + self.name
            # r = self.f.fn(n_calls=10)
            r = self.f()
            # print "End " + self.name
            if self.sync:
                r[0].sync()
            self.r = r
            # print "Exiting " + self.name

    thread1 = myThread("Thread-3", f3, True)
    thread2 = myThread("Thread-4", f4, True)
    t = time.time()
    thread1.start()
    thread2.start()
    thread1.join()
    thread2.join()
    t2 = time.time()

    print("two ctx, 2 fct async, 2 threads %f" % (t2 - t, ))

    thread1 = myThread("Thread-5", f5, False)
    thread2 = myThread("Thread-6", f6, False)
    t = time.time()
    thread1.start()
    thread2.start()
    thread1.join()
    thread2.join()
    t2 = time.time()

    print("two ctx, 2 fct with transfer, 2 threads %f" % (t2 - t, ))
Пример #4
0
def main(dev1, dev2):
    init_dev(dev1, 'ctx1')
    init_dev(dev2, 'ctx2')

    size = 1024 * 16
    data = numpy.random.randn(size, size).astype('float32')
    val1a = theano.shared(data, target='ctx1')
    val1b = theano.shared(data, target='ctx1')
    val1c = theano.shared(data, target='ctx1')
    val1d = theano.shared(data, target='ctx1')

    val2a = theano.shared(data, target='ctx2')
    val2b = theano.shared(data, target='ctx2')

    f1 = theano.function([], [gpu_dot22(val1a, val1b),
                              gpu_dot22(val1c, val1d)])
    f2 = theano.function([], [gpu_dot22(val1a, val1b),
                              gpu_dot22(val2a, val2b)])
    f3 = theano.function([], [gpu_dot22(val1a, val1b)])
    f4 = theano.function([], [gpu_dot22(val2a, val2b)])
    f5 = theano.function([], [gpu_dot22(val1a, val1b)[0, 0].transfer('cpu')])
    f6 = theano.function([], [gpu_dot22(val2a, val2b)[0, 0].transfer('cpu')])

    # pre-execute to load code to GPU.
    r = f1.fn()
    r[0].sync(), r[1].sync()
    r = f2.fn()
    r[0].sync(), r[1].sync()
    r = f3.fn()
    r[0].sync()
    r = f4.fn()
    r[0].sync()
    r = f5.fn()
    r = f6.fn()
    r = None

    t = time.time()
    r = f1.fn()
    r[0].sync(), r[1].sync()
    t2 = time.time()
    r = None

    print("one ctx async %f" % (t2 - t,))

    t = time.time()
    r = f2.fn()
    r[0].sync(), r[1].sync()
    t2 = time.time()
    r = None

    print("two ctx async %f" % (t2 - t,))

    t = time.time()
    r = f3.fn()
    r2 = f4.fn()
    r[0].sync()
    r2[0].sync()
    t2 = time.time()
    r = None

    print("two ctx, 2 fct async %f" % (t2 - t,))

    t = time.time()
    r = f5.fn()
    r2 = f6.fn()
    t2 = time.time()
    r = None
    print("two ctx, 2 fct with transfer %f" % (t2 - t,))

    # Multi-thread version
    class myThread (threading.Thread):
        def __init__(self, name, f, sync):
            threading.Thread.__init__(self)
            self.f = f
            self.name = name
            self.sync = sync

        def run(self):
            # print "Starting " + self.name
            # r = self.f.fn(n_calls=10)
            r = self.f()
            # print "End " + self.name
            if self.sync:
                r[0].sync()
            self.r = r
            # print "Exiting " + self.name

    thread1 = myThread("Thread-3", f3, True)
    thread2 = myThread("Thread-4", f4, True)
    t = time.time()
    thread1.start()
    thread2.start()
    thread1.join()
    thread2.join()
    t2 = time.time()

    print("two ctx, 2 fct async, 2 threads %f" % (t2 - t,))

    thread1 = myThread("Thread-5", f5, False)
    thread2 = myThread("Thread-6", f6, False)
    t = time.time()
    thread1.start()
    thread2.start()
    thread1.join()
    thread2.join()
    t2 = time.time()

    print("two ctx, 2 fct with transfer, 2 threads %f" % (t2 - t,))