Пример #1
0
    def check_shared_exec_group(sparse_embedding):
        # generate an rnn sym with #layers=5
        sym = get_rnn_sym(num_layers=3,
                          num_words=num_words,
                          num_hidden=num_hidden,
                          num_embed=num_embed,
                          seq_len=max_bucket_size,
                          sparse_embedding=sparse_embedding)
        arg_names1 = sym.list_arguments()
        input_names = [name[0] for name in data_shapes
                       ] + [name[0] for name in label_shapes]
        shared_arg_names = [
            name for name in arg_names1 if name not in input_names
        ]
        exec_group1 = DataParallelExecutorGroup(symbol=sym,
                                                contexts=contexts,
                                                workload=workload,
                                                data_shapes=data_shapes,
                                                label_shapes=label_shapes,
                                                param_names=shared_arg_names,
                                                for_training=True,
                                                inputs_need_grad=False)

        # shared_data_arrays should only have input "data" and "softmax_label" arrays
        for i in range(len(contexts)):
            assert len(exec_group1.shared_data_arrays[i]) == len(input_names),\
                "exec_group1.shared_data_arrays[%d] should have the same number of names as in input_names" % i
            for name in input_names:
                assert name in exec_group1.shared_data_arrays[i],\
                    "arg %s should be in exec_group1.shared_data_arrays[%d]" % (name, i)

        # generate an rnn sym with #layers=5
        sym = get_rnn_sym(num_layers=5,
                          num_words=num_words,
                          num_hidden=num_hidden,
                          num_embed=num_embed,
                          seq_len=max_bucket_size,
                          sparse_embedding=sparse_embedding)
        arg_names2 = sym.list_arguments()
        exec_group2 = DataParallelExecutorGroup(symbol=sym,
                                                contexts=contexts,
                                                workload=workload,
                                                data_shapes=data_shapes,
                                                label_shapes=label_shapes,
                                                param_names=shared_arg_names,
                                                for_training=True,
                                                inputs_need_grad=False,
                                                shared_group=exec_group1)
        extra_args = [
            name for name in arg_names2 if name not in shared_arg_names
        ]
        check_shared_grad = not sparse_embedding
        test_shared_exec_group(exec_grp_shared=exec_group1,
                               exec_grp_created=exec_group2,
                               shared_arg_names=shared_arg_names,
                               extra_args=extra_args,
                               check_shared_grad=check_shared_grad)
Пример #2
0
def bind(modQ,
         data_shapes,
         label_shapes=None,
         for_training=True,
         inputs_need_grad=False,
         force_rebind=False,
         shared_module=None,
         grad_req='write'):
    if force_rebind:
        modQ._reset_bind()

    if modQ.binded:
        modQ.logger.warning('Already binded, ignoring bind()')
        return

    modQ.for_training = for_training
    modQ.inputs_need_grad = inputs_need_grad
    modQ.binded = True
    modQ._grad_req = grad_req

    if not for_training:
        assert not inputs_need_grad
    else:
        pass
        # this is not True, as some module might not contains a loss function
        # that consumes the labels
        # assert label_shapes is not None

    modQ._data_shapes, modQ._label_shapes = _parse_data_desc(
        modQ.data_names, modQ.label_names, data_shapes, label_shapes)

    if shared_module is not None:
        assert isinstance(shared_module, Module) and \
               shared_module.binded and shared_module.params_initialized
        shared_group = shared_module._exec_group
    else:
        shared_group = None

    modQ._exec_group = DataParallelExecutorGroup(
        modQ._symbol,
        modQ._context,
        modQ._work_load_list,
        modQ._data_shapes,
        modQ._label_shapes,
        modQ._param_names,
        for_training,
        inputs_need_grad,
        shared_group,
        logger=modQ.logger,
        fixed_param_names=modQ._fixed_param_names,
        grad_req=grad_req,
        state_names=modQ._state_names)
    modQ._total_exec_bytes = modQ._exec_group._total_exec_bytes
    if shared_module is not None:
        modQ.params_initialized = True
        modQ._arg_params = shared_module._arg_params
        modQ._aux_params = shared_module._aux_params
    elif modQ.params_initialized:
        # if the parameters are already initialized, we are re-binding
        # so automatically copy the already initialized params
        modQ._exec_group.set_params(modQ._arg_params, modQ._aux_params)
    else:
        assert modQ._arg_params is None and modQ._aux_params is None
        param_arrays = [
            nd.zeros(x[0].shape, dtype=x[0].dtype, ctx=x[0][0].context)
            for x in modQ._exec_group.param_arrays
        ]
        modQ._arg_params = {
            name: arr
            for name, arr in zip(modQ._param_names, param_arrays)
        }

        aux_arrays = [
            nd.zeros(x[0].shape, dtype=x[0].dtype, ctx=x[0][0].context)
            for x in modQ._exec_group.aux_arrays
        ]
        modQ._aux_params = {
            name: arr
            for name, arr in zip(modQ._aux_names, aux_arrays)
        }

    if shared_module is not None and shared_module.optimizer_initialized:
        modQ.borrow_optimizer(shared_module)
Пример #3
0
def test_executor_group():
    def get_rnn_sym(num_layers, num_words, num_hidden, num_embed, seq_len):
        stack = mx.rnn.SequentialRNNCell()
        for i in range(num_layers):
            stack.add(
                mx.rnn.LSTMCell(num_hidden=num_hidden, prefix='lstm_l%d_' % i))
        data = mx.sym.Variable('data')
        label = mx.sym.Variable('softmax_label')
        embed = mx.sym.Embedding(data=data,
                                 input_dim=num_words,
                                 output_dim=num_embed,
                                 name='embed')

        stack.reset()
        outputs, states = stack.unroll(seq_len,
                                       inputs=embed,
                                       merge_outputs=True)

        pred = mx.sym.Reshape(outputs, shape=(-1, num_hidden))
        pred = mx.sym.FullyConnected(data=pred,
                                     num_hidden=num_words,
                                     name='pred')

        label = mx.sym.Reshape(label, shape=(-1, ))
        pred = mx.sym.SoftmaxOutput(data=pred, label=label, name='softmax')
        return pred

    def test_shared_exec_group(exec_grp_shared,
                               exec_grp_created,
                               shared_arg_names=None,
                               extra_args=None):
        # Test shared data arrays
        for i in range(len(exec_grp_shared.execs)):
            # test same shared_data_arrays for two exec groups
            shared_data_array1 = exec_grp_shared.shared_data_arrays[i]
            shared_data_array2 = exec_grp_created.shared_data_arrays[i]
            if extra_args is not None:
                assert len(shared_data_array1) == len(extra_args),\
                    "exec_grp_shared.shared_data_arrays[%d] should have same number of args as extra_args"
            assert len(shared_data_array1) == len(shared_data_array2),\
                "length of shared_data_array of the shared executor group not equal to the created executor group"
            for k, v in shared_data_array1.items():
                if extra_args is not None:
                    assert k in extra_args, "arg %s is not in extra_args" % k
                assert k in shared_data_array2,\
                    "arg %s of the shared executor group not in the shared_data_array of the created executor group" % k
                assert mx.test_utils.same_array(v, shared_data_array2[k])

            for data_name, array in exec_grp_shared.shared_data_arrays[
                    i].items():
                assert data_name in exec_grp_created.shared_data_arrays[i], \
                    "Shared input data '%s' is not in " \
                    "shared_data_arrays of created executor group." % (data_name)
                assert mx.test_utils.same_array(array, exec_grp_created.shared_data_arrays[i][data_name]), \
                    "Shared input data '%s' does not share memory." % (data_name)

            # Test shared argument arrays and gradient arrays
            exec_shared = exec_grp_shared.execs[i]
            exec_created = exec_grp_created.execs[i]
            if shared_arg_names is not None:
                # test shared arguments
                for arg_name in shared_arg_names:
                    assert arg_name in exec_created.arg_dict, \
                        "Shared argument '%s' is not in arg_dict of created executor group." % (arg_name)
                    assert mx.test_utils.same_array(exec_shared.arg_dict[arg_name], exec_created.arg_dict[arg_name]), \
                        "Shared argument '%s' does not share memory." % (arg_name)
                # test shared argument gradients
                for arg_name in shared_arg_names:
                    assert arg_name in exec_created.grad_dict, \
                        "Shared argument gradient '%s' is not in " \
                        "grad_dict of created executor group." % (arg_name)
                    assert mx.test_utils.same_array(exec_shared.grad_dict[arg_name], exec_created.grad_dict[arg_name]), \
                        "Shared argument gradient '%s' does not sharing memory." % (arg_name)

            for arg_name, grad in exec_grp_shared.grad_req.items():
                assert grad == exec_grp_created.grad_req[arg_name], \
                    "Gradient requirements for shared argument '%s' are inconsistent. " \
                    "Shared executor group requires '%s' while created executor group requires '%s'" \
                    %(arg_name, grad, exec_grp_created.grad_req[arg_name])

    contexts = [mx.cpu(0), mx.cpu(1)]
    workload = [1] * len(contexts)
    batch_size = 32
    max_bucket_size = 80
    num_words = 1000
    num_hidden = 100
    num_embed = 200
    data_shapes = [('data', (batch_size, max_bucket_size))]
    label_shapes = [('softmax_label', (batch_size, max_bucket_size))]

    # generate an rnn sym with #layers=5
    sym = get_rnn_sym(num_layers=3,
                      num_words=num_words,
                      num_hidden=num_hidden,
                      num_embed=num_embed,
                      seq_len=max_bucket_size)
    arg_names1 = sym.list_arguments()
    input_names = [name[0] for name in data_shapes
                   ] + [name[0] for name in label_shapes]
    shared_arg_names = [name for name in arg_names1 if name not in input_names]
    exec_group1 = DataParallelExecutorGroup(symbol=sym,
                                            contexts=contexts,
                                            workload=workload,
                                            data_shapes=data_shapes,
                                            label_shapes=label_shapes,
                                            param_names=shared_arg_names,
                                            for_training=True,
                                            inputs_need_grad=False)

    # shared_data_arrays should only have input "data" and "softmax_label" arrays
    for i in range(len(contexts)):
        assert len(exec_group1.shared_data_arrays[i]) == len(input_names),\
            "exec_group1.shared_data_arrays[%d] should have the same number of names as in input_names" % i
        for name in input_names:
            assert name in exec_group1.shared_data_arrays[i],\
                "arg %s should be in exec_group1.shared_data_arrays[%d]" % (name, i)

    # generate an rnn sym with #layers=5
    sym = get_rnn_sym(num_layers=5,
                      num_words=num_words,
                      num_hidden=num_hidden,
                      num_embed=num_embed,
                      seq_len=max_bucket_size)
    arg_names2 = sym.list_arguments()
    exec_group2 = DataParallelExecutorGroup(symbol=sym,
                                            contexts=contexts,
                                            workload=workload,
                                            data_shapes=data_shapes,
                                            label_shapes=label_shapes,
                                            param_names=shared_arg_names,
                                            for_training=True,
                                            inputs_need_grad=False,
                                            shared_group=exec_group1)
    extra_args = [name for name in arg_names2 if name not in shared_arg_names]
    test_shared_exec_group(exec_grp_shared=exec_group1,
                           exec_grp_created=exec_group2,
                           shared_arg_names=shared_arg_names,
                           extra_args=extra_args)
Пример #4
0
    def bind(self,
             data_shapes,
             label_shapes=None,
             for_training=True,
             inputs_need_grad=False,
             force_rebind=False,
             shared_module=None,
             grad_req='write'):
        """Binds the symbols to construct executors. This is necessary before one
        can perform computation with the module.

        Parameters
        ----------
        data_shapes : list of (str, tuple)
            Typically is ``data_iter.provide_data``.
        label_shapes : list of (str, tuple)
            Typically is ``data_iter.provide_label``.
        for_training : bool
            Default is ``True``. Whether the executors should be bound for training.
        inputs_need_grad : bool
            Default is ``False``. Whether the gradients to the input data need to be computed.
            Typically this is not needed. But this might be needed when implementing composition
            of modules.
        force_rebind : bool
            Default is ``False``. This function does nothing if the executors are already
            bound. But with this ``True``, the executors will be forced to rebind.
        shared_module : Module
            Default is ``None``. This is used in bucketing. When not ``None``, the shared module
            essentially corresponds to a different bucket -- a module with different symbol
            but with the same sets of parameters (e.g. unrolled RNNs with different lengths).
        """
        # force rebinding is typically used when one want to switch from
        # training to prediction phase.
        if force_rebind:
            self._reset_bind()

        if self.binded:
            self.logger.warning('Already bound, ignoring bind()')
            return

        self.for_training = for_training
        self.inputs_need_grad = inputs_need_grad
        self.binded = True
        self._grad_req = grad_req

        if not for_training:
            assert not inputs_need_grad
        else:
            pass
            # this is not True, as some module might not contains a loss function
            # that consumes the labels
            # assert label_shapes is not None

        self._data_shapes, self._label_shapes = _parse_data_desc(
            self.data_names, self.label_names, data_shapes, label_shapes)

        if shared_module is not None:
            assert isinstance(shared_module, Module) and \
                    shared_module.binded and shared_module.params_initialized
            shared_group = shared_module._exec_group
            assert len(shared_group.execs) >= len(self._context)
        else:
            shared_group = None

        self._exec_group = DataParallelExecutorGroup(
            self._symbol,
            self._context,
            self._work_load_list,
            self._data_shapes,
            self._label_shapes,
            self._param_names,
            for_training,
            inputs_need_grad,
            shared_group,
            logger=self.logger,
            fixed_param_names=self._fixed_param_names,
            grad_req=grad_req,
            group2ctxs=self._group2ctxs,
            state_names=self._state_names)
        self._total_exec_bytes = self._exec_group._total_exec_bytes
        if shared_module is not None:
            self.params_initialized = True
            self._arg_params = shared_module._arg_params
            self._aux_params = shared_module._aux_params
        elif self.params_initialized:
            # if the parameters are already initialized, we are re-binding
            # so automatically copy the already initialized params
            self._exec_group.set_params(self._arg_params, self._aux_params)
        else:
            assert self._arg_params is None and self._aux_params is None
            param_arrays = [
                zeros(shape=x[0].shape, dtype=x[0].dtype, stype=x[0].stype)
                for x in self._exec_group.param_arrays
            ]
            self._arg_params = {
                name: arr
                for name, arr in zip(self._param_names, param_arrays)
            }

            aux_arrays = [
                zeros(x[0].shape, dtype=x[0].dtype)
                for x in self._exec_group.aux_arrays
            ]
            self._aux_params = {
                name: arr
                for name, arr in zip(self._aux_names, aux_arrays)
            }

        if shared_module is not None and shared_module.optimizer_initialized:
            self.borrow_optimizer(shared_module)