Пример #1
0
    def apply(function, *args, **kwargs):
        """Apply function and create a checkpoint."""
        kwargs.pop('preserve_rng_state', True)
        variable_scope = kwargs.pop('variable_scope', 'Buffer')
        original_variable_scope = context.get_variable_scope(True)
        if kwargs:
            raise ValueError('Unexpected keyword arguments: ' +
                             ','.join(arg for arg in kwargs))

        # Run function.
        graph_tape = tapes.Tape()
        graph_tape._tracing = True  # Enable tracing.
        graph_tape._checkpointing = True  # Enable checkpointing.
        graph_tape._original_variable_scope = original_variable_scope
        with grad_mode.no_grad(), graph_tape:
            with context.variable_scope(variable_scope):
                outputs = function(*args)

        # Collect involving tensors.
        tensor_inputs, tensor_outputs = [], []
        for arg in args:
            if isinstance(arg, Tensor):
                tensor_inputs.append(arg)
        for arg in nest.flatten(outputs):
            if isinstance(arg, Tensor):
                tensor_outputs.append(arg)

        # Fill tape with function context.
        op_tape = tapes.OrderedTape()
        op_handle = workspace.get_workspace().create_handle('Checkpoint')
        op_tape.add_element(proto_util.make_operator_def(
            op_type='Checkpoint',
            name=op_handle,
            inputs=[input.id for input in tensor_inputs],
            outputs=[output.id for output in tensor_outputs],
            defs=[v.SerializeAs() for v in graph_tape.get_elements()],
            buffer_scope=variable_scope,
            to_impl=True))
        op_tape.add_handle(op_handle)
        op_tape.merge_handles(graph_tape.get_handles())

        # Save input tensors for backward.
        for input in tensor_inputs + graph_tape.get_sources():
            op_tape.add_source(input)

        # Save tape for backward.
        for output in tensor_outputs:
            output._tape = op_tape
            output._requires_grad = True

        return outputs
Пример #2
0
    def __init__(self,
                 shape=None,
                 dtype='float32',
                 name=None,
                 symbolic=False,
                 **kwargs):
        """Create a ``Tensor``.

        Parameters
        ----------
        shape : Sequence[int], optional
            The tensor shape.
        dtype : str, optional, default='float32'
            The data type.
        name : str, optional
            The tensor name.
        symbolic : bool, optional, default=False
            Whether to initialize as a symbolic tensor.

        """
        self._shape = None if shape is None else tuple(shape)
        self._dtype = None if dtype is None else str(dtype)
        self._is_variable = not symbolic
        self._impl = kwargs.get('impl', None)
        self._deleter = kwargs.get('deleter', None)
        self._tape = None
        self._grad = None
        self._grad_tape = None
        self._requires_grad = False
        if self._impl is None:
            default_ws = workspace.get_workspace()
            if self._is_variable:
                if self._shape is None or None in self._shape:
                    raise ValueError(
                        'Excepted the certain shape to create data.')
                var_scope = context.get_variable_scope()
                self._impl = default_ws.create_tensor(scope=var_scope)
                self._impl.FromShape(self._shape, self._dtype)
                self._deleter = default_ws._handle_pool
            else:
                self._impl = default_ws.create_tensor(scope='Tensor')
                self._deleter = None
        self._name = context.get_name_scope() + name if name else None
Пример #3
0
 def __init__(
     self,
     initial_value,
     trainable=True,
     name=None,
     dtype=None,
     shape=None,
 ):
     """Create a ``Variable``."""
     # Determine the initial value.
     if isinstance(initial_value, Tensor):
         value = initial_value.numpy()
     else:
         value = initial_value
     # Determine the data type and shape.
     dtype = str(dtype) if dtype is not None else dtype
     value = numpy.array(value, dtype, copy=False)
     if shape is not None:
         if value.size == 1:
             # Broadcast with scalar value.
             scalar = value.flatten()[0]
             value = numpy.empty(shape, value.dtype)
             value.fill(scalar)
         else:
             # Reshape.
             value = value.reshape(shape)
     # Initialize tensor from the value.
     default_ws = workspace.get_workspace()
     super(Variable, self).__init__(
         shape=value.shape,
         dtype=value.dtype,
         impl=default_ws.create_tensor(
             scope=context.get_variable_scope()).FromNumpy(value, True),
         deleter=default_ws._handle_pool,
         name=name,
     )
     self.requires_grad = trainable
Пример #4
0
    def run(inputs, run_config, **kwargs):
        """Run operator once."""
        graph_tape = tapes.get_tape()
        execute_ws = workspace.get_workspace()

        # Add inputs.
        input_names = []
        enable_grad = False
        for input in inputs:
            input_names.append(input.id)
            if graph_tape and (input.requires_grad
                               or graph_tape.is_target(id(input))):
                enable_grad = True

        # Unify grad modes.
        no_grad = run_config['no_grad']
        enable_grad = enable_grad and not no_grad
        if hasattr(graph_tape, '_exporting'):
            # Ensure the intermediates saved for the exporting graph.
            no_grad, enable_grad = False, True

        # Add outputs.
        outputs, output_names = [], []
        output_specs = list(kwargs.get('outputs', [None]))
        for i, spec in enumerate(output_specs):
            if spec is None:
                outputs.append(
                    Tensor(device=run_config['device'].copy(),
                           impl=execute_ws.create_tensor(
                               scope=context.get_variable_scope(enable_grad)),
                           deleter=execute_ws._handle_pool))
                output_names.append(outputs[i].id)
            else:
                assert isinstance(spec, Tensor)
                outputs.append(spec)
                output_names.append(spec.id)
                if enable_grad and output_names[-1] not in input_names:
                    raise RuntimeError(
                        'Output that requires gradient is not in inputs.')

        # Specialize def for given inputs and outputs.
        op_name = ''  # Optional operator name.
        op_def = run_config['def'].DeriveTo(input_names, output_names)

        # Record def if grad is enabled.
        if len(inputs) > 0 and not no_grad:
            if enable_grad:
                op_name = execute_ws.create_handle(op_def.type)
                op_def.name = op_name
                graph_tape.add_element(op_def)
                graph_tape.add_handle(op_name)
                for input in inputs:
                    graph_tape.add_source(input)
                for output in outputs:
                    output._requires_grad = True
            else:
                for output in outputs:
                    output._requires_grad = False

        # Ensure the named operator for the tracing graph.
        if hasattr(graph_tape, '_tracing') and not op_name:
            op_def.name = op_name = execute_ws.create_handle(op_def.type)
            graph_tape.add_handle(op_name)

        # Emit to dispatch this execution.
        for feed_key, value_type in run_config['feed_dict'].items():
            dest = execute_ws.create_tensor(op_name + '/' + feed_key)
            dest.FromNumpy(numpy.array(kwargs[feed_key], value_type), True)
        execute_ws.run_operator(op_def)

        # Return single or repeated outputs.
        return outputs[0] if len(outputs) == 1 else outputs
Пример #5
0
    def forward(inputs, run_config, **kwargs):
        """Compute the function outputs."""
        graph_tape = tapes.get_tape()
        execute_ws = workspace.get_workspace()
        device = run_config['device']

        # Add inputs.
        inputs_id = []
        enable_grad = False
        for i, input in enumerate(inputs):
            inputs_id.append(input.id)
            if input.requires_grad:
                enable_grad = True
            if run_config['check_device'] and input._device != device:
                raise RuntimeError(
                    'Mismatched device between function and '
                    'element {} of input tensors. ({} vs. {})'
                    .format(i, device, input._device))

        # Unify grad modes.
        no_grad = run_config['no_grad']
        no_grad = no_grad or not grad_mode.is_grad_enabled()
        enable_grad = enable_grad and not no_grad
        if hasattr(graph_tape, '_exporting'):
            # Ensure the intermediates saved for the exporting graph.
            no_grad, enable_grad = False, True

        # Add outputs.
        outputs, outputs_id = [], []
        output_specs = kwargs.get('outputs', [None])
        for i, spec in enumerate(output_specs):
            if spec is None:
                outputs.append(Tensor(
                    device=device.copy(),
                    impl=execute_ws.create_tensor(
                        scope=context.get_variable_scope(enable_grad)),
                    deleter=execute_ws._handle_pool))
                outputs_id.append(outputs[i].id)
            else:
                if isinstance(spec, Tensor):
                    spec._device = device.copy()
                    outputs.append(spec)
                    outputs_id.append(spec.id)
                else:
                    outputs_id.append(spec)
                if enable_grad and outputs_id[-1] not in inputs_id:
                    raise RuntimeError('Output tensor should be in inputs if requires grad.')

        # Specialize def for given inputs and outputs.
        op_name = ''  # Optional operator name.
        op_def = run_config['def'].DeriveTo(inputs_id, outputs_id)

        # Record def if grad is enabled.
        if len(inputs) > 0 and not no_grad:
            if enable_grad:
                op_tape = tapes.OrderedTape()
                op_name = execute_ws.create_handle(op_def.type)
                op_def.name = op_name
                op_tape.add_element(op_def)
                op_tape.add_handle(op_name)
                for input in inputs:
                    op_tape.add_source(input)
                for output in outputs:
                    op_tape.merge_from(output._tape)
                for output in outputs:
                    output._tape = op_tape
                    output._requires_grad = True
            else:
                for output in outputs:
                    output._requires_grad = False

        # Ensure the named operator for the tracing graph.
        if hasattr(graph_tape, '_tracing'):
            if not op_name:
                op_name = execute_ws.create_handle(op_def.type)
            op_def.name = op_name
            graph_tape.add_element(op_def)
            graph_tape.add_handle(op_name)

        # Save inputs for the checkpointing graph.
        if hasattr(graph_tape, '_checkpointing'):
            for input in inputs:
                if input._tape:
                    if input._retains_grad:
                        graph_tape.add_source(input)
                elif input._requires_grad:
                    graph_tape.add_source(input)

        # Emit to dispatch this execution.
        for feed_key, value_type in run_config['feed_dict'].items():
            dest = execute_ws.create_tensor(op_name + '/' + feed_key)
            dest.FromNumpy(numpy.array(kwargs[feed_key], value_type), True)
        execute_ws.run_operator(op_def)

        # Return single or repeated outputs.
        return outputs[0] if len(outputs) == 1 else outputs