Python gpu示例，aurora.ndarray.ndarray.gpu Python示例

示例#1

0

显示文件

def test_matrix_multiply():
    ctx = ndarray.gpu(0)
    x = np.random.uniform(0, 10, size=(500, 700)).astype(np.float32)
    y = np.random.uniform(0, 10, size=(700, 1000)).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.array(y, ctx=ctx)
    arr_z = ndarray.empty((500, 1000), ctx=ctx)
    gpu_op.matrix_multiply(arr_x, False, arr_y, False, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(x, y), z, rtol=1e-5)

    x = np.random.uniform(0, 10, size=(1000, 500)).astype(np.float32)
    y = np.random.uniform(0, 10, size=(2000, 500)).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.array(y, ctx=ctx)
    arr_z = ndarray.empty((1000, 2000), ctx=ctx)
    gpu_op.matrix_multiply(arr_x, False, arr_y, True, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(x, np.transpose(y)), z, rtol=1e-5)

    x = np.random.uniform(0, 10, size=(500, 1000)).astype(np.float32)
    y = np.random.uniform(0, 10, size=(2000, 500)).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.array(y, ctx=ctx)
    arr_z = ndarray.empty((1000, 2000), ctx=ctx)
    gpu_op.matrix_multiply(arr_x, True, arr_y, True, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(np.transpose(x), np.transpose(y)),
                               z,
                               rtol=1e-5)

示例#2

0

显示文件

 def _copy_to_gpu(params):
     ctx = ndarray.gpu(0)
     gpu_arrays = []
     for param in params:
         param.const = ndarray.array(param.const, ctx=ctx)
         gpu_arrays.append(param)
     return gpu_arrays

示例#3

0

显示文件

def test_matrix_elementwise_sqrt():
    ctx = ndarray.gpu(0)
    shape = (500, 200)
    x = np.random.uniform(0, 10, size=shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    gpu_op.matrix_elementwise_sqrt(arr_x, arr_x)
    z = arr_x.asnumpy()
    np.testing.assert_allclose(np.sqrt(x), z, rtol=1e-5)

示例#4

0

显示文件

def test_softmax():
    ctx = ndarray.gpu(0)
    shape = (400, 1000)
    x = np.random.uniform(-5, 5, shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.empty(shape, ctx=ctx)
    gpu_op.softmax(arr_x, arr_y)
    y = arr_y.asnumpy()
    np.testing.assert_allclose(au.nn.softmax_func(x), y, rtol=1e-5)

示例#5

0

显示文件

def test_relu():
    shape = (2000, 2500)
    ctx = ndarray.gpu(0)
    x = np.random.uniform(-1, 1, shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.empty(shape, ctx=ctx)
    gpu_op.relu(arr_x, arr_y)
    y = arr_y.asnumpy()
    np.testing.assert_allclose(np.maximum(x, 0).astype(np.float32), y)

示例#6

0

显示文件

def test_matrix_elementwise_add_by_const():
    shape = (2000, 3000)
    ctx = ndarray.gpu(0)
    x = np.random.uniform(0, 10, size=shape).astype(np.float32)
    val = np.random.uniform(-5, 5)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.empty(shape, ctx=ctx)
    gpu_op.matrix_elementwise_add_by_const(arr_x, val, arr_y)
    y = arr_y.asnumpy()
    np.testing.assert_allclose(x + val, y, rtol=1e-5)

示例#7

0

显示文件

def test_broadcast_to():
    ctx = ndarray.gpu(0)
    shape = (200, 300)
    to_shape = (130, 200, 300)
    x = np.random.uniform(-1, 1, shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.empty(to_shape, ctx=ctx)
    gpu_op.broadcast_to(arr_x, arr_y)
    y = arr_y.asnumpy()
    np.testing.assert_allclose(np.broadcast_to(x, to_shape), y)

示例#8

0

显示文件

文件： sgd.py 项目： upul/Aurora

 def __init__(self, cost, params, lr=0.1, momentum=0.9, use_gpu=False):
     super().__init__(cost, params, lr=lr, use_gpu=use_gpu)
     self.momentum = momentum
     if use_gpu:
         self.velocity = [
             ndarray.array(np.zeros_like(param.const.asnumpy()),
                           ctx=ndarray.gpu(0)) for param in params
         ]
     else:
         self.velocity = [np.zeros_like(param.const) for param in params]

示例#9

0

显示文件

def test_matrix_elementwise_multiply():
    ctx = ndarray.gpu(0)
    shape = (500, 200)
    x = np.random.uniform(0, 10, size=shape).astype(np.float32)
    y = np.random.uniform(0, 10, size=shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.array(y, ctx=ctx)
    arr_z = ndarray.empty(shape, ctx=ctx)
    gpu_op.matrix_elementwise_multiply(arr_x, arr_y, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(x * y, z, rtol=1e-5)

示例#10

0

显示文件

def test_relu_gradient():
    shape = (2000, 2500)
    ctx = ndarray.gpu(0)
    x = np.random.uniform(-1, 1, shape).astype(np.float32)
    grad_x = np.random.uniform(-5, 5, shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_grad_x = ndarray.array(grad_x, ctx=ctx)
    arr_y = ndarray.empty(shape, ctx=ctx)
    gpu_op.relu_gradient(arr_x, arr_grad_x, arr_y)
    y = arr_y.asnumpy()
    np.testing.assert_allclose(((x > 0) * grad_x).astype(np.float32), y)

示例#11

0

显示文件

def test_array_set():
    ctx = ndarray.gpu(0)
    shape = (5000, 2000)
    # oneslike
    arr_x = ndarray.empty(shape, ctx=ctx)
    gpu_op.array_set(arr_x, 1.)
    x = arr_x.asnumpy()
    np.testing.assert_allclose(np.ones(shape), x)
    # zeroslike
    gpu_op.array_set(arr_x, 0.)
    x = arr_x.asnumpy()
    np.testing.assert_allclose(np.zeros(shape), x)

示例#12

0

显示文件

文件： adam.py 项目： upul/Aurora

    def __init__(self,
                 cost,
                 params,
                 lr=1e-3,
                 beta1=0.9,
                 beta2=0.995,
                 eps=1e-5,
                 use_gpu=False):
        super().__init__(cost, params, lr, use_gpu=use_gpu)
        self.beta1 = beta1
        self.beta2 = beta2

        if self.use_gpu:
            self.velocity = [
                ndarray.array(np.zeros_like(param.const.asnumpy()),
                              ctx=ndarray.gpu(0)) for param in params
            ]
            self.momentum = [
                ndarray.array(np.zeros_like(param.const.asnumpy()),
                              ctx=ndarray.gpu(0)) for param in params
            ]

            self.vec_hat = [
                ndarray.array(np.zeros_like(param.const.asnumpy()),
                              ctx=ndarray.gpu(0)) for param in self.params
            ]
            self.mom_hat = [
                ndarray.array(np.zeros_like(param.const.asnumpy()),
                              ctx=ndarray.gpu(0)) for param in self.params
            ]
        else:
            self.velocity = [np.zeros_like(param.const) for param in params]
            self.momentum = [np.zeros_like(param.const) for param in params]

        self.time = 0
        self.eps = eps

示例#13

0

显示文件

def test_reduce_sum_axis_zero():
    ctx = ndarray.gpu(0)
    shape = (500, 200, 100)
    to_shape = (200, 100)
    x = np.random.uniform(0, 20, shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.empty(to_shape, ctx=ctx)
    gpu_op.reduce_sum_axis_zero(arr_x, arr_y)
    y = arr_y.asnumpy()
    y_ = np.sum(x, axis=0)
    for index, _ in np.ndenumerate(y):
        v = y[index]
        v_ = y_[index]
        if abs((v - v_) / v_) > 1e-4:
            print(index, v, v_)
    np.testing.assert_allclose(np.sum(x, axis=0), y, rtol=1e-5)

示例#14

0

显示文件

文件： executor.py 项目： upul/Aurora

    def __init__(self, eval_list, use_gpu=False):
        """
        Executor computes values for a given subset of nodes in a computation graph.

        Parameters:
        -----------
        :param eval_list: Values of the nodes of this list need to be computed
        """
        self.eval_node_list = eval_list
        self.ctx = None
        if use_gpu:
            self.ctx = ndarray.gpu(0)

        self.topo_order = find_topo_sort(self.eval_node_list)
        self.node_to_arr_map = None
        self.node_to_shape_map = None
        self.feed_shapes = None

示例#15

0

显示文件

文件： adam.py 项目： upul/Aurora

    def step(self, feed_dict):
        exe_output = self.executor.run(feed_dict)
        self.time += 1

        if self.use_gpu:
            # set
            for i in range(len(self.vec_hat)):
                gpu_op.matrix_elementwise_multiply_by_const(
                    self.vec_hat[i], 0.0, self.vec_hat[i])
                gpu_op.matrix_elementwise_multiply_by_const(
                    self.mom_hat[i], 0.0, self.mom_hat[i])

            for i in range(len(self.params)):
                gpu_op.matrix_elementwise_multiply_by_const(
                    self.momentum[i], self.beta1, self.momentum[i])

                # TODO: (upul) copying dev->hot>dev is expensive. We need a better approach.
                tem_gpu_array = ndarray.array(exe_output[i + 1].asnumpy(),
                                              ctx=ndarray.gpu(0))
                gpu_op.matrix_elementwise_multiply_by_const(
                    exe_output[i + 1], (1 - self.beta1), tem_gpu_array)
                gpu_op.matrix_elementwise_add(self.momentum[i], tem_gpu_array,
                                              self.momentum[i])
                gpu_op.matrix_elementwise_div_by_const(
                    self.momentum[i], (1 - self.beta1**self.time),
                    self.mom_hat[i])

                gpu_op.matrix_elementwise_multiply_by_const(
                    self.velocity[i], self.beta2, self.velocity[i])
                gpu_op.matrix_elementwise_multiply(exe_output[i + 1],
                                                   exe_output[i + 1],
                                                   exe_output[i + 1])
                gpu_op.matrix_elementwise_multiply_by_const(
                    exe_output[i + 1], (1 - self.beta2), exe_output[i + 1])
                gpu_op.matrix_elementwise_add(self.velocity[i],
                                              exe_output[i + 1],
                                              self.velocity[i])
                gpu_op.matrix_elementwise_div_by_const(
                    self.velocity[i], (1 - self.beta2**self.time),
                    self.vec_hat[i])

            for i in range(len(self.params)):
                gpu_op.matrix_elementwise_sqrt(self.vec_hat[i],
                                               self.vec_hat[i])
                gpu_op.matrix_elementwise_add_by_const(self.vec_hat[i],
                                                       self.eps,
                                                       self.vec_hat[i])

                gpu_op.matrix_elementwise_multiply_by_const(
                    self.mom_hat[i], -1 * self.lr, self.mom_hat[i])
                gpu_op.matrix_elementwise_division(self.mom_hat[i],
                                                   self.vec_hat[i],
                                                   self.mom_hat[i])
                gpu_op.matrix_elementwise_add(self.params[i].const,
                                              self.mom_hat[i],
                                              self.params[i].const)

        else:
            vec_hat = [np.zeros_like(param.const) for param in self.params]
            mom_hat = [np.zeros_like(param.const) for param in self.params]

            for i in range(len(self.params)):
                self.momentum[i] = self.beta1 * self.momentum[i] + (
                    1 - self.beta1) * exe_output[i + 1]
                mom_hat[i] = self.momentum[i] / (1 - self.beta1**self.time)

                self.velocity[i] = self.beta2 * self.velocity[i] + (
                    1 - self.beta2) * (exe_output[i + 1]**2)
                vec_hat[i] = self.velocity[i] / (1 - self.beta2**self.time)

            for i in range(len(self.params)):
                self.params[i].const += -self.lr * mom_hat[i] / (
                    np.sqrt(vec_hat[i]) + self.eps)

        cost = exe_output[0]
        if self.use_gpu:
            cost = cost.asnumpy()
        return cost