Exemplo n.º 1
0
 def test_get_device_from_id(self):
     self.assertEqual(cuda.get_device_from_id(0), cuda.Device(0))
Exemplo n.º 2
0
 def test_numpy_array_async3(self):
     with cuda.Device(1):
         y = cuda.to_gpu(self.x, stream=cuda.Stream.null)
     self.assertIsInstance(y, cuda.ndarray)
     cuda.cupy.testing.assert_array_equal(self.x, y)
     self.assertEqual(int(y.device), 1)
Exemplo n.º 3
0
 def test_get_device_from_id_for_numpy_int(self):
     self.assertEqual(
         cuda.get_device_from_id(numpy.int64(0)), cuda.Device(0))
Exemplo n.º 4
0
 def to_gpu(self, device=None):
     super(Parameter, self).to_gpu(device)
     if self.data is None:
         if device is None:
             device = cuda.Device().id
         self._initial_device = device
Exemplo n.º 5
0
    def backward(self, retain_grad=False):
        """Runs error backpropagation (a.k.a. backprop) from this variable.

        On backprop, :meth:`FunctionNode.backward` is called on each
        :class:`FunctionNode` object appearing in the backward graph starting
        from this variable. The backward graph is represented by backward
        references from variable nodes to their creators, and from function
        nodes to their input variable nodes. The backprop stops at all root
        nodes. Some function nodes set ``None`` as gradients of some inputs,
        where further backprop does not take place at such inputs.

        This method uses :data:`grad` as the initial error array. User can
        manually set a gradient array before calling this method. If
        :data:`data` contains only one element (i.e., it is scalar) and
        :data:`grad` is ``None``, then this method automatically complements
        1.0 as the initial error. This is useful on starting backprop from
        some scalar loss value.

        Note that this method does not support *differentiable backprop*. Use
        :func:`grad` to compute the gradient of gradients.

        Args:
            retain_grad (bool): If ``True``, the gradient arrays of all
                intermediate variables are kept. Otherwise, :data:`grad` of the
                intermediate variables are set to ``None`` on appropriate
                timing, which may reduce the maximum memory consumption.

                In most cases of training some models, the purpose of backprop
                is to compute gradients of parameters, not of all variables,
                and therefore it is recommended to set this flag ``False``.

        """
        self._node._check_old_style_gradient()
        if self.creator_node is None:
            return
        initial_device = None
        if cuda.available and isinstance(self.data, cuda.cupy.ndarray):
            try:
                initial_device = cuda.Device()
            except cuda.cupy.cuda.runtime.CUDARuntimeError as e:
                if e.status != 38:  # cudaErrorNoDevice
                    raise

        is_debug = chainer.is_debug()

        cand_funcs = []
        seen_set = set()
        grads = {}

        # Initialize error by 1, if this is a loss variable
        if self.data.size == 1 and self._grad_var is None:
            with cuda.get_device_from_array(self.data) as device:
                if device is cuda.DummyDevice:
                    self.grad = numpy.ones_like(self.data)
                else:
                    self.grad = cuda.cupy.ones_like(self.data)
        grads[self._node] = self._grad_var

        def add_cand(cand):
            if cand not in seen_set:
                # Negate since heapq is min-heap
                heapq.heappush(cand_funcs, (-cand.rank, len(seen_set), cand))
                seen_set.add(cand)

        add_cand(self.creator_node)

        def get_grad(node):
            if node is None:
                return None
            if node in grads:
                return grads[node]
            return node.grad_var

        while cand_funcs:
            _, _, func = heapq.heappop(cand_funcs)
            inputs = func.inputs
            outputs = [y() for y in func.outputs]  # access via weak ref

            in_data = tuple([x.data for x in inputs])
            out_grad = tuple([get_grad(y) for y in outputs])
            out_grad_data = tuple(
                [None if g is None else g.data for g in out_grad])
            hooks = chainer.get_function_hooks()
            if func._n_local_function_hooks != 0:
                hooks = collections.OrderedDict(hooks)
                hooks.update(func.local_function_hooks)
            hooks = hooks.values()  # avoid six for performance

            cuda.get_device_from_array(*in_data).use()
            for hook in hooks:
                hook.backward_preprocess(func, in_data, out_grad_data)

            # Collect the current input gradients.
            #
            # Note (Tokui): When the same variable is passed to multiple input
            # slots (e.g. an expression like ``f(x, x)``), it makes the
            # gradient accumulation complicated since the back-propagated
            # gradients w.r.t. the first and second argument should be
            # accumulated to the current gradient w.r.t. the same variable.
            # In this case, the current implementation passes the current
            # gradient only to the first occurrence of the variable in the
            # input tuple and passes ``None`` to the rest of the occurrences.
            # For example, when the input variables are ``(x, x)``, the
            # input gradient passed to the ``backward_accumulate`` method is
            # ``(gx, None)`` where ``gx`` is the current gradient of ``x``.
            # See also the docstring of ``FunctionNode.backward_accumulate``.
            target_input_indexes = [
                i for i, x in enumerate(inputs) if x.requires_grad
            ]
            target_inputs = [inputs[i] for i in target_input_indexes]
            in_grad = []
            for i, index_i in enumerate(target_input_indexes):
                x = inputs[index_i]
                if x in target_inputs[:i]:
                    # Pass ``None`` for duplicated input variables except for
                    # the first occurrence (see the comment above).
                    gx = None
                elif x in grads:
                    gx = grads[x]
                elif x.creator_node is None:
                    x._check_old_style_gradient()
                    # accumulate the gradient only if the node is a leaf
                    gx = x.grad_var
                else:
                    gx = None
                in_grad.append(gx)

            gxs = func.backward_accumulate(target_input_indexes, out_grad,
                                           in_grad)

            assert len(gxs) == len(in_grad)
            for hook in hooks:
                hook.backward_postprocess(func, in_data, out_grad_data)

            if is_debug:
                for gx in gxs:
                    if gx is None:
                        continue
                    gx_data = gx.data
                    cuda.get_device_from_array(gx_data).use()
                    if cuda.get_array_module(gx_data).isnan(gx_data).any():
                        msg = ('NaN is detected on backward computation of '
                               '{}'.format(func.label))
                        raise RuntimeError(msg)

            if not retain_grad:
                for y in outputs:
                    if y is not None and y is not self.node:
                        grads[y] = None
                        y_var = y.get_variable()
                        if y_var is not None:
                            y_var._grad_var = None

            for i, gx in enumerate(gxs):
                if gx is None:
                    continue

                x = target_inputs[i]
                if not x.requires_grad:
                    continue

                _check_grad_type(func, x, gx.data)

                if x in target_inputs[:i]:
                    # Accumulate the duplicated gradients here. See the comment
                    # above the code that builds ``in_grad``.
                    cur_gx = grads[x]
                    grads[x] = gx if cur_gx is None else gx + cur_gx
                else:
                    grads[x] = gx

                x_var = x.get_variable()
                if x_var is not None:
                    x_var._grad_var = grads[x]

                if x.creator_node is not None:
                    add_cand(x.creator_node)

            del gxs  # to reduce memory usage
            if initial_device is not None:
                initial_device.use()
Exemplo n.º 6
0
 def __call__(self, batch):
     with cuda.Device(self._device_id):
         with chainer.using_config('train', True):
             return self.train(batch)
##################################################################################
# condition 
condTxt = 'DNN:'+DNNmode\
+' STFTchNum:'+str(chNum)\
+' winLen:'+str(winLen)\
+' shift:'+str(shiftLen)\
+' Nfft:'+str(fftLen)\
+' winMode:'+winMode\
+' loss:'+lossMode\
+' stateSize:'+str(stateSize)\
+' hiddenSize:'+str(hiddenSize)

###################################################################################
# DGT setting
with cuda.Device( DEVICE_INFO ):
    window = mm.hannWin(winLen)
    windowD = mm.calcCanonicalDualWindow(window,shiftLen)
        
    dgt = dgt.dgtOnGPU(window,shiftLen,fftLen)
    print(condTxt)
    
##################################################################################
# DNN setting

    inputSize = chNum

    exec('dnnEst = dnnModel.'+DNNmode+'(inputSize, hiddenSize, stateSize).to_gpu( DEVICE_INFO )')
    print("params: "+str(sum(p.data.size for p in dnnEst.params())))
    # Optimizer
    optm_dnn = optimizers.Adam(alpha=lr, beta1=0.9, beta2=0.999, eps=1e-8)
 def fit(self, content_image, style_image, epoch_num, callback=None):
     device_id = None
     if self.device_id >= 0:
         device_id = self.device_id
     with cuda.Device(device_id):
         return self.__fit(content_image, style_image, epoch_num, callback)
Exemplo n.º 9
0
 def test_copy_parameters_from_gpu_to_gpu(self):
     device_id = cuda.Device().id
     self.check_copy_parameters_from(device_id, device_id)
Exemplo n.º 10
0
 def test_copy_parameters_from_gpu_to_cpu(self):
     self.check_copy_parameters_from(cuda.Device().id, -1)
Exemplo n.º 11
0
 def test_linear_model_multi_gpu(self):
     with cuda.Device(0):
         self.assertGreater(
             cuda.to_cpu(self.model.accuracy_gpu(1).data), 0.9)
Exemplo n.º 12
0
    def backward(self, retain_grad=False):
        """Runs error backpropagation (a.k.a. backprop) from this variable.

        On backprop, :meth:`Function.backward` is called on each
        :class:`Function` object appearing in the backward graph starting from
        this variable. The backward graph is represented by backward references
        from variables to their creators, and from functions to their inputs.
        The backprop stops at all root variables. Some functions set ``None``
        as gradients of some inputs, where further backprop does not take place
        at such input variables.

        This method uses :data:`grad` as the initial error array. User can
        manually set a gradient array before calling this method. If
        :data:`data` contains only one element (i.e., it is scalar) and
        :data:`grad` is ``None``, then this method automatically complements
        1.0 as the initial error. This is useful on starting backprop from
        some scalar loss value.

        Args:
            retain_grad (bool): If ``True``, the gradient arrays of all
                intermediate variables are kept. Otherwise, :data:`grad` of the
                intermediate variables are set to ``None`` on appropriate
                timing, which may reduce the maximum memory consumption.

                In most cases of training some models, the purpose of backprop
                is to compute gradients of parameters, not of variables, so it
                is recommended to set this flag ``False``.

        """
        if self.creator is None:
            return
        initial_device = None
        if cuda.available and isinstance(self.data, cuda.cupy.ndarray):
            try:
                initial_device = cuda.Device()
            except cuda.cupy.cuda.runtime.CUDARuntimeError as e:
                if e.status != 38:  # cudaErrorNoDevice
                    raise

        is_debug = chainer.is_debug()

        cand_funcs = []
        seen_set = set()
        seen_vars = set()
        need_copy = set()

        # Initialize error by 1, if this is a loss variable
        if self.data.size == 1 and self.grad is None:
            with cuda.get_device(self.data) as device:
                if device is cuda.DummyDevice:
                    self.grad = numpy.ones_like(self.data)
                else:
                    self.grad = cuda.cupy.ones_like(self.data)

        def add_cand(cand):
            if cand not in seen_set:
                # Negate since heapq is min-heap
                heapq.heappush(cand_funcs, (-cand.rank, len(seen_set), cand))
                seen_set.add(cand)

        add_cand(self.creator)

        while cand_funcs:
            _, _, func = heapq.heappop(cand_funcs)
            outputs = [y() for y in func.outputs]  # access via weak ref

            in_data = tuple([x.data for x in func.inputs])
            out_grad = tuple([None if y is None else y.grad for y in outputs])
            hooks = chainer.get_function_hooks()
            if func._n_local_function_hooks != 0:
                hooks = collections.OrderedDict(hooks)
                hooks.update(func.local_function_hooks)

            cuda.get_device(*(in_data + out_grad)).use()
            for hook in six.itervalues(hooks):
                hook.backward_preprocess(func, in_data, out_grad)
            gxs = func.backward(in_data, out_grad)
            assert len(gxs) == len(in_data)
            for hook in six.itervalues(hooks):
                hook.backward_postprocess(func, in_data, out_grad)

            if is_debug:
                for gx in gxs:
                    if gx is None:
                        continue
                    cuda.get_device(gx).use()
                    if cuda.get_array_module(gx).isnan(gx).any():
                        msg = 'NaN is detected on backward computation'
                        raise RuntimeError(msg)

            if not retain_grad:
                for y in outputs:
                    if y is not None and y is not self:
                        y.grad = None
            for x, gx in zip(func.inputs, gxs):
                if gx is None:
                    continue

                _check_grad_type(func, x, gx)

                # Accumulate the gradient to x. It is a bit tricky to handle
                # branches and parameter gradient accumulation correctly.
                id_x = id(x)
                if x.creator is None:  # leaf
                    if x._grad is None:
                        x.grad = gx
                        need_copy.add(id_x)
                    else:
                        cuda.get_device(gx).use()
                        if id_x in need_copy:
                            x.grad = utils.force_array(x.grad + gx)  # copy
                            need_copy.remove(id_x)
                        else:
                            x._grad += gx
                else:  # not a leaf
                    add_cand(x.creator)
                    if id_x not in seen_vars:  # 1st visit
                        x.grad = gx
                        seen_vars.add(id_x)
                        need_copy.add(id_x)
                    else:
                        cuda.get_device(gx).use()
                        if id_x in need_copy:  # 2nd visit
                            x._grad = utils.force_array(gx + x._grad)  # copied
                            need_copy.remove(id_x)
                        else:  # 3rd or later visit
                            x._grad += gx
            del gxs  # to reduce memory usage
            if initial_device is not None:
                initial_device.use()