示例#1
0
def test_matrix_elementwise_multiply():
    ctx = ndarray.gpu(0)
    shape = (500, 200)
    x = np.random.uniform(0, 10, size=shape).astype(np.float32)
    y = np.random.uniform(0, 10, size=shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.array(y, ctx=ctx)
    arr_z = ndarray.empty(shape, ctx=ctx)
    gpu_op.matrix_elementwise_multiply(arr_x, arr_y, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(x * y, z, rtol=1e-5)
示例#2
0
def test_relu_gradient():
    shape = (2000, 2500)
    ctx = ndarray.gpu(0)
    x = np.random.uniform(-1, 1, shape).astype(np.float32)
    grad_x = np.random.uniform(-5, 5, shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_grad_x = ndarray.array(grad_x, ctx=ctx)
    arr_y = ndarray.empty(shape, ctx=ctx)
    gpu_op.relu_gradient(arr_x, arr_grad_x, arr_y)
    y = arr_y.asnumpy()
    np.testing.assert_allclose(((x > 0) * grad_x).astype(np.float32), y)
示例#3
0
 def _copy_to_gpu(params):
     ctx = ndarray.gpu(0)
     gpu_arrays = []
     for param in params:
         param.const = ndarray.array(param.const, ctx=ctx)
         gpu_arrays.append(param)
     return gpu_arrays
示例#4
0
def test_matrix_elementwise_sqrt():
    ctx = ndarray.gpu(0)
    shape = (500, 200)
    x = np.random.uniform(0, 10, size=shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    gpu_op.matrix_elementwise_sqrt(arr_x, arr_x)
    z = arr_x.asnumpy()
    np.testing.assert_allclose(np.sqrt(x), z, rtol=1e-5)
示例#5
0
def test_softmax():
    ctx = ndarray.gpu(0)
    shape = (400, 1000)
    x = np.random.uniform(-5, 5, shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.empty(shape, ctx=ctx)
    gpu_op.softmax(arr_x, arr_y)
    y = arr_y.asnumpy()
    np.testing.assert_allclose(au.nn.softmax_func(x), y, rtol=1e-5)
示例#6
0
def test_relu():
    shape = (2000, 2500)
    ctx = ndarray.gpu(0)
    x = np.random.uniform(-1, 1, shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.empty(shape, ctx=ctx)
    gpu_op.relu(arr_x, arr_y)
    y = arr_y.asnumpy()
    np.testing.assert_allclose(np.maximum(x, 0).astype(np.float32), y)
示例#7
0
def test_matrix_elementwise_add_by_const():
    shape = (2000, 3000)
    ctx = ndarray.gpu(0)
    x = np.random.uniform(0, 10, size=shape).astype(np.float32)
    val = np.random.uniform(-5, 5)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.empty(shape, ctx=ctx)
    gpu_op.matrix_elementwise_add_by_const(arr_x, val, arr_y)
    y = arr_y.asnumpy()
    np.testing.assert_allclose(x + val, y, rtol=1e-5)
示例#8
0
def test_broadcast_to():
    ctx = ndarray.gpu(0)
    shape = (200, 300)
    to_shape = (130, 200, 300)
    x = np.random.uniform(-1, 1, shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.empty(to_shape, ctx=ctx)
    gpu_op.broadcast_to(arr_x, arr_y)
    y = arr_y.asnumpy()
    np.testing.assert_allclose(np.broadcast_to(x, to_shape), y)
示例#9
0
文件: sgd.py 项目: upul/Aurora
 def __init__(self, cost, params, lr=0.1, momentum=0.9, use_gpu=False):
     super().__init__(cost, params, lr=lr, use_gpu=use_gpu)
     self.momentum = momentum
     if use_gpu:
         self.velocity = [
             ndarray.array(np.zeros_like(param.const.asnumpy()),
                           ctx=ndarray.gpu(0)) for param in params
         ]
     else:
         self.velocity = [np.zeros_like(param.const) for param in params]
示例#10
0
def test_matrix_multiply():
    ctx = ndarray.gpu(0)
    x = np.random.uniform(0, 10, size=(500, 700)).astype(np.float32)
    y = np.random.uniform(0, 10, size=(700, 1000)).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.array(y, ctx=ctx)
    arr_z = ndarray.empty((500, 1000), ctx=ctx)
    gpu_op.matrix_multiply(arr_x, False, arr_y, False, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(x, y), z, rtol=1e-5)

    x = np.random.uniform(0, 10, size=(1000, 500)).astype(np.float32)
    y = np.random.uniform(0, 10, size=(2000, 500)).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.array(y, ctx=ctx)
    arr_z = ndarray.empty((1000, 2000), ctx=ctx)
    gpu_op.matrix_multiply(arr_x, False, arr_y, True, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(x, np.transpose(y)), z, rtol=1e-5)

    x = np.random.uniform(0, 10, size=(500, 1000)).astype(np.float32)
    y = np.random.uniform(0, 10, size=(2000, 500)).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.array(y, ctx=ctx)
    arr_z = ndarray.empty((1000, 2000), ctx=ctx)
    gpu_op.matrix_multiply(arr_x, True, arr_y, True, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(np.transpose(x), np.transpose(y)),
                               z,
                               rtol=1e-5)
示例#11
0
文件: adam.py 项目: upul/Aurora
    def __init__(self,
                 cost,
                 params,
                 lr=1e-3,
                 beta1=0.9,
                 beta2=0.995,
                 eps=1e-5,
                 use_gpu=False):
        super().__init__(cost, params, lr, use_gpu=use_gpu)
        self.beta1 = beta1
        self.beta2 = beta2

        if self.use_gpu:
            self.velocity = [
                ndarray.array(np.zeros_like(param.const.asnumpy()),
                              ctx=ndarray.gpu(0)) for param in params
            ]
            self.momentum = [
                ndarray.array(np.zeros_like(param.const.asnumpy()),
                              ctx=ndarray.gpu(0)) for param in params
            ]

            self.vec_hat = [
                ndarray.array(np.zeros_like(param.const.asnumpy()),
                              ctx=ndarray.gpu(0)) for param in self.params
            ]
            self.mom_hat = [
                ndarray.array(np.zeros_like(param.const.asnumpy()),
                              ctx=ndarray.gpu(0)) for param in self.params
            ]
        else:
            self.velocity = [np.zeros_like(param.const) for param in params]
            self.momentum = [np.zeros_like(param.const) for param in params]

        self.time = 0
        self.eps = eps
示例#12
0
def test_reduce_sum_axis_zero():
    ctx = ndarray.gpu(0)
    shape = (500, 200, 100)
    to_shape = (200, 100)
    x = np.random.uniform(0, 20, shape).astype(np.float32)
    arr_x = ndarray.array(x, ctx=ctx)
    arr_y = ndarray.empty(to_shape, ctx=ctx)
    gpu_op.reduce_sum_axis_zero(arr_x, arr_y)
    y = arr_y.asnumpy()
    y_ = np.sum(x, axis=0)
    for index, _ in np.ndenumerate(y):
        v = y[index]
        v_ = y_[index]
        if abs((v - v_) / v_) > 1e-4:
            print(index, v, v_)
    np.testing.assert_allclose(np.sum(x, axis=0), y, rtol=1e-5)
示例#13
0
文件: executor.py 项目: upul/Aurora
    def run(self, feed_shapes, convert_to_numpy_ret_vals=False):
        """
        Values of the nodes given in eval_list are evaluated against feed_dict

        Parameters
        ----------
        :param feed_shapes: A dictionary of nodes who values are specified by the user

        Returns
        -------
        :return: Values of the nodes specified by the eval_list
        """
        def are_feed_shapes_equal(sa, sb):
            if (not isinstance(sa, dict)) or (not isinstance(sb, dict)):
                return False
            unmatched_item = set(sa.items()) ^ set(sb.items())
            return len(unmatched_item) == 0

        # Assume self.ctx is None implies numpy array and numpy ops.
        use_numpy = self.ctx is None
        node_to_val_map = {}
        for node, value in feed_shapes.items():
            if use_numpy:
                # all values passed in feed_dict must be np.ndarray
                assert isinstance(value, np.ndarray)
                node_to_val_map[node] = value
            else:
                # convert values to ndarray.NDArray if necessary
                if isinstance(value, np.ndarray):
                    node_to_val_map[node] = ndarray.array(value, ctx=self.ctx)
                elif isinstance(value, ndarray.NDArray):
                    node_to_val_map[node] = value
                else:
                    assert False, "feed_dict value type not supported"

        # collect shapes for all placeholders
        feed_shapes = {}
        for node in node_to_val_map:
            feed_shapes[node] = node_to_val_map[node].shape

        # infer shape if feed_shapes changed since last run
        # e.g. call run() on test data after trainng
        if (not are_feed_shapes_equal(feed_shapes, self.feed_shapes)):
            self.infer_shape(feed_shapes)
            self.feed_shapes = feed_shapes
            # plan memory if using GPU
            if (not use_numpy):
                self.memory_plan(feed_shapes)

        # Traverse graph in topo order and compute values for all nodes.
        for node in self.topo_order:
            if node in node_to_val_map:
                # Skip placeholder nodes. Values already provided by feed_dict.
                continue

            # TODO (upul): following if condition looks like a hack. Find a better approach
            if isinstance(node.op, PlaceholderOp) and node.const is not None:
                node_to_val_map[node] = node.const
                continue

            input_vals = [node_to_val_map[n] for n in node.inputs]
            if use_numpy:
                node_val = np.empty(shape=self.node_to_shape_map[node])
            else:
                node_val = self.node_to_arr_map[node]
            # node_val is modified in-place whether np.ndarray or NDArray
            node.op.compute(node, input_vals, node_val, use_numpy)
            node_to_val_map[node] = node_val

        # Collect node values.
        if not use_numpy and convert_to_numpy_ret_vals:
            return [node_to_val_map[n].asnumpy() for n in self.eval_node_list]

        return [node_to_val_map[n] for n in self.eval_node_list]
示例#14
0
文件: adam.py 项目: upul/Aurora
    def step(self, feed_dict):
        exe_output = self.executor.run(feed_dict)
        self.time += 1

        if self.use_gpu:
            # set
            for i in range(len(self.vec_hat)):
                gpu_op.matrix_elementwise_multiply_by_const(
                    self.vec_hat[i], 0.0, self.vec_hat[i])
                gpu_op.matrix_elementwise_multiply_by_const(
                    self.mom_hat[i], 0.0, self.mom_hat[i])

            for i in range(len(self.params)):
                gpu_op.matrix_elementwise_multiply_by_const(
                    self.momentum[i], self.beta1, self.momentum[i])

                # TODO: (upul) copying dev->hot>dev is expensive. We need a better approach.
                tem_gpu_array = ndarray.array(exe_output[i + 1].asnumpy(),
                                              ctx=ndarray.gpu(0))
                gpu_op.matrix_elementwise_multiply_by_const(
                    exe_output[i + 1], (1 - self.beta1), tem_gpu_array)
                gpu_op.matrix_elementwise_add(self.momentum[i], tem_gpu_array,
                                              self.momentum[i])
                gpu_op.matrix_elementwise_div_by_const(
                    self.momentum[i], (1 - self.beta1**self.time),
                    self.mom_hat[i])

                gpu_op.matrix_elementwise_multiply_by_const(
                    self.velocity[i], self.beta2, self.velocity[i])
                gpu_op.matrix_elementwise_multiply(exe_output[i + 1],
                                                   exe_output[i + 1],
                                                   exe_output[i + 1])
                gpu_op.matrix_elementwise_multiply_by_const(
                    exe_output[i + 1], (1 - self.beta2), exe_output[i + 1])
                gpu_op.matrix_elementwise_add(self.velocity[i],
                                              exe_output[i + 1],
                                              self.velocity[i])
                gpu_op.matrix_elementwise_div_by_const(
                    self.velocity[i], (1 - self.beta2**self.time),
                    self.vec_hat[i])

            for i in range(len(self.params)):
                gpu_op.matrix_elementwise_sqrt(self.vec_hat[i],
                                               self.vec_hat[i])
                gpu_op.matrix_elementwise_add_by_const(self.vec_hat[i],
                                                       self.eps,
                                                       self.vec_hat[i])

                gpu_op.matrix_elementwise_multiply_by_const(
                    self.mom_hat[i], -1 * self.lr, self.mom_hat[i])
                gpu_op.matrix_elementwise_division(self.mom_hat[i],
                                                   self.vec_hat[i],
                                                   self.mom_hat[i])
                gpu_op.matrix_elementwise_add(self.params[i].const,
                                              self.mom_hat[i],
                                              self.params[i].const)

        else:
            vec_hat = [np.zeros_like(param.const) for param in self.params]
            mom_hat = [np.zeros_like(param.const) for param in self.params]

            for i in range(len(self.params)):
                self.momentum[i] = self.beta1 * self.momentum[i] + (
                    1 - self.beta1) * exe_output[i + 1]
                mom_hat[i] = self.momentum[i] / (1 - self.beta1**self.time)

                self.velocity[i] = self.beta2 * self.velocity[i] + (
                    1 - self.beta2) * (exe_output[i + 1]**2)
                vec_hat[i] = self.velocity[i] / (1 - self.beta2**self.time)

            for i in range(len(self.params)):
                self.params[i].const += -self.lr * mom_hat[i] / (
                    np.sqrt(vec_hat[i]) + self.eps)

        cost = exe_output[0]
        if self.use_gpu:
            cost = cost.asnumpy()
        return cost