Exemplo n.º 1
0
    def test_repeat_simple_addition(self, repeat_count: int):
        """Test that a simple x = x + 1 repeated `repeat_count` times will
        produce x = `repeat_count`

        Args:
            repeat_count (int): Number of times to repeat.
        """
        ir = pir.Ir()
        main = ir.main_graph()
        with main:
            one = pir.constant(0, pir.dtypes.int32)

            add_one = AddOne()
            add_one_graph = ir.create_graph(add_one, one)

            y = ops.repeat(add_one_graph,
                           repeat_count,
                           one,
                           subgraph_in_to_parent_in={})

            d2h = pir.d2h_stream(y.shape, pir.dtypes.int32, name="y_stream")
            ops.host_store(d2h, y)

        r_y = run_ir(ir, 1, d2h.tensor_id(), {})

        assert r_y == repeat_count
Exemplo n.º 2
0
    def test_scaled_add_t_t(self, inplace):
        ir = pir.Ir()
        g = ir.main_graph()

        with g:
            X = pir.variable(np.ones((2, 2)), dtype=pir.float32)
            Y = pir.variable(np.ones((2, 2)), dtype=pir.float32)
            if inplace:
                ops.scaled_add_(X, Y, a=pir.constant(0.9), b=pir.variable(0.1))
            else:
                ops.scaled_add(X, Y, a=pir.constant(0.9), b=pir.variable(0.1))
        assert len(g.get_tensors()) == 5
        assert len(g.get_variables()) == 3
        if inplace:
            assert contains_op_of_type("ScaledAddLhsInplace",
                                       _ir.op.ScaledAddLhsInplaceOp, g)
        else:
            assert contains_op_of_type("ScaledAdd", _ir.op.ScaledAddOp, g)
Exemplo n.º 3
0
def test_tensor_id_conflict():
    ir = pir.Ir()
    main = ir.main_graph()
    with main:
        name0 = pir.variable(1, name="tensor").id
        name1 = pir.variable(1, name="tensor").id
        name2 = pir.constant(1, name="tensor").id
    assert name0 == "tensor"
    ids = [name0, name1, name2]
    assert len(ids) == len(set(ids))
Exemplo n.º 4
0
    def test_dampened_add_square(self):
        ir = pir.Ir()
        g = ir.main_graph()

        with g:
            a = pir.variable(1)
            b = pir.constant(2)
            c = ops.var_updates.accumulate_square_(a, b, 0.999)
        assert contains_op_of_type("Accumulate", _ir.op.AccumulateOp, g)
        op = g._pb_graph.getOps()[0]
        op.getAccumulationType() == _ir.AccumulationType.DampenedAddSquare
Exemplo n.º 5
0
def test_modified():
    ir = pir.Ir()
    g = ir.main_graph()

    with g, pir.in_sequence():
        x = pir.variable(1)

        sg = ir.create_graph(
            lambda x: ops.var_updates.accumulate_(x, pir.constant(1)), x)

        ops.call(sg, x)  # type: ignore
        # Store x
        x_non_modify_stream = pir.d2h_stream(x.shape, x.dtype)
        ops.host_store(x_non_modify_stream, x)

        info = ops.call_with_info(sg, x)
        info.set_op_input_modified(x)
        x_modifiy_stream = pir.d2h_stream(x.shape, x.dtype)
        ops.host_store(x_modifiy_stream, x)

    ir = ir._pb_ir
    dataFlow = popart.DataFlow(batchesPerStep=1,
                               anchorTensors={
                                   x_non_modify_stream.tensor_id():
                                   popart.AnchorReturnType("All"),
                                   x_modifiy_stream.tensor_id():
                                   popart.AnchorReturnType("All"),
                               })
    ir.setDataFlow(dataFlow)

    opts = ir.getSessionOptions()
    opts.useHostCopyOps = True
    opts.enableExplicitMainLoops = True
    opts.aliasZeroCopy = True
    opts.explicitRecomputation = True

    ir.updateVertices()

    session = popart.InferenceSession.fromIr(
        ir=ir, deviceInfo=tu.create_test_device())

    session.prepareDevice()

    # Create buffers for anchors
    anchors = session.initAnchorArrays()

    # Run the model
    stepio = popart.PyStepIO(inputs={}, outputs=anchors)

    session.weightsFromHost()
    session.run(stepio)

    assert anchors[x_non_modify_stream.tensor_id()] == 1
    assert anchors[x_modifiy_stream.tensor_id()] == 2
Exemplo n.º 6
0
    def test_add(self):
        ir = pir.Ir()
        g = ir.main_graph()

        with g:
            a = pir.variable(1)
            b = pir.constant(2)
            c = ops.var_updates.copy_var_update_(a, b)
        assert len(g.get_tensors()) == 3
        assert len(g.get_variables()) == 1
        assert contains_op_of_type("CopyVarUpdate", _ir.op.CopyVarUpdateOp, g)
Exemplo n.º 7
0
    def test_tensor_fns(self):
        ir = pir.Ir()
        g = ir.main_graph()

        with g:
            a = pir.variable(1)
            a_1 = a.copy_to_ipu(1, 0)
            a_0 = a_1.copy_to_ipu(0)

            c = pir.constant(2)
            c_1 = c.copy_to_ipu(1, 0)
            c_0 = c_1.copy_to_ipu(0)
Exemplo n.º 8
0
    def test_mean(self):
        ir = pir.Ir()
        g = ir.main_graph()

        with g:
            a = pir.variable(1)
            b = pir.constant(2)
            step = pir.variable(0)
            c = ops.var_updates.accumulate_mean_(a, b, step)
        assert contains_op_of_type("Accumulate", _ir.op.AccumulateOp, g)
        op = g._pb_graph.getOps()[0]
        op.getAccumulationType() == _ir.AccumulationType.Mean
Exemplo n.º 9
0
    def test_add(self):
        ir = pir.Ir()
        g = ir.main_graph()

        with g:
            a = pir.variable(1)
            b = pir.constant(2)
            c = ops.var_updates.accumulate_(a, b)
        assert len(g.get_tensors()) == 3
        assert len(g.get_variables()) == 1
        assert contains_op_of_type("Accumulate", _ir.op.AccumulateOp, g)
        op = g._pb_graph.getOps()[0]
        op.getAccumulationType() == _ir.AccumulationType.Add
Exemplo n.º 10
0
    def test_remote_load_graph(self, use_offset: bool,
                               tensor_shape: Tuple[int, ...], repeats: int,
                               tensor_dtype: dtype, inplace: bool) -> None:
        """Test that the graph is correct when using the remote load op

        Args:
            use_offset (bool): Whether or not to use offset
            tensor_shape (Tuple[int, ...]): The shape of the tensor to be loaded
            repeats (int): The number of tensors potentially stored in the buffer
            tensor_dtype (dtype): The type of the tensors to be loaded
            inplace (bool): Whether or not to use the inplace version of the op
        """
        ir = pir.Ir()
        g = ir.main_graph()

        with g:
            t = pir.variable(
                np.random.rand(*tensor_shape).astype(tensor_dtype.as_numpy()))
            if use_offset:
                offset = pir.constant([1], name='offset')
                # With this option the graph should contain
                # 1. t
                # 2. offset
                # 3. out
                n_tensors = 3
            else:
                offset = None
                # With this option the graph should contain
                # 1. t
                # 2. out
                n_tensors = 2

            rbh = RemoteBufferHandle(remote_buffer_id=1,
                                     tensor_shape=tensor_shape,
                                     tensor_dtype=tensor_dtype,
                                     repeats=repeats)

            op = ops.remote_load if not inplace else ops.remote_load_
            op(t, offset, rbh)

        assert len(g.get_tensors()) == n_tensors
        # Only t is a variable
        assert len(g.get_variables()) == 1
        type_string = "RemoteLoad" if not inplace else "RemoteLoadInplace"
        pb_type = _ir.op.exchange.RemoteLoadOp if not inplace else _ir.op.exchange.RemoteLoadInplaceOp
        assert contains_op_of_type(type_string, pb_type, g)

        # Clean-up so that the RemoteBufferHandle gets reset
        RemoteBufferHandle._buffers = {}
Exemplo n.º 11
0
    def test_from_pb_type(self):
        """Test the from_pb_tensor returns the correct python type"""
        ir = pir.Ir()
        main = ir.main_graph()

        with main:
            a = pir.variable(1)
            c = pir.constant(2)

        assert isinstance(a, Variable)
        new_a = pir.Tensor._from_pb_tensor(a._pb_tensor)
        assert isinstance(new_a, Variable)
        assert isinstance(c, Constant)
        new_c = pir.Tensor._from_pb_tensor(c._pb_tensor)
        assert isinstance(new_c, Constant)
Exemplo n.º 12
0
    def test_adam_wd_updater(self):
        ir = pir.Ir()
        g = ir.main_graph()

        with g:
            w = pir.variable(1, name='w')
            m = pir.variable(1, name='m')
            v = pir.variable(2, name='v')
            wd = pir.constant(0.2, name='wd')

            updater = ops.var_updates.adam_updater(m,
                                                   v,
                                                   weight=w,
                                                   weight_decay=wd)
        assert len(g.get_tensors()) == 5
        assert contains_op_of_type("AdamUpdater", _ir.op.AdamUpdaterOp, g)
Exemplo n.º 13
0
    def test_adam_wd_updater_invalid(self):
        ir = pir.Ir()
        g = ir.main_graph()

        with g:
            m = pir.variable(1, name='m')
            v = pir.variable(2, name='v')
            t = pir.variable(1, name='t')
            wd = pir.constant(0.2, name='wd')
            with pytest.raises(ValueError) as excinfo:
                updater = ops.var_updates.adam_updater(m,
                                                       v,
                                                       time_step=t,
                                                       weight_decay=wd)
            message = str(excinfo.value)
        assert "Weight decay requires weight to be not None." in message
Exemplo n.º 14
0
    def test_adam_bias_wd_updater(self):
        ir = pir.Ir()
        g = ir.main_graph()

        with g:
            m = pir.variable(1, name='m')
            v = pir.variable(2, name='v')
            w = pir.variable(1, name='w')
            t = pir.variable(2, name='t')
            wd = pir.constant(0.2, name='wd')
            b1 = 0.9
            b2 = 0.99
            updater = ops.var_updates.adam_updater(m, v, w, t, wd, b1, b2)

        assert len(g.get_tensors()) == 6
        assert contains_op_of_type("AdamUpdater", _ir.op.AdamUpdaterOp, g)
Exemplo n.º 15
0
def build_model(
        data: Dict[str,
                   np.array]) -> Tuple[_ir.Ir, Dict[str, DeviceToHostStream]]:
    """Build a model for storing and loading tensors from the remote buffer.

    Args:
        data(Dict[str, np.array]) : Dict of the data to be stored and loaded from the remote buffer

    Returns:
    (tuple): tuple containing:

        ir._pb_ir (_ir.Ir): The underlying IR
        d2h_streams (Dict[str, DeviceToHostStream]): The output streams
    """
    ir = pir.Ir()
    main = ir.main_graph()

    with main:
        # Placeholder for tensor ids
        tensors = {}
        # Create variable tensors from the data
        for name in data.keys():
            tensors[name] = pir.variable(data[name], name=name)

        # Placeholder for device to host streams
        d2h_streams = {}

        # Store and load the first tensor without specifying the remote buffer handle or offset
        ops.remote_store(t=tensors["store_in_1"])
        tensors["load_out_1"] = ops.remote_load(t=tensors["load_in_1"])
        tensors["load_out_1_inplace"] = ops.remote_load_(
            t=tensors["load_in_1_inplace"])
        # Anchor the input tensors to the load operator
        d2h_streams = make_anchor(d2h_streams, tensors, "load_in_1")
        d2h_streams = make_anchor(d2h_streams, tensors, "load_in_1_inplace")
        # Anchor the output tensors of the load operator
        d2h_streams = make_anchor(d2h_streams, tensors, "load_out_1")
        d2h_streams = make_anchor(d2h_streams, tensors, "load_out_1_inplace")

        # Store and load the second and third tensor using the same buffer id
        # Buffer 1 should already be assigned implicitly, so we chose a different id
        rbh = RemoteBufferHandle(
            remote_buffer_id=42,
            tensor_shape=tensors["store_in_2"]._pb_tensor.info.shape(),
            tensor_dtype=dtype.as_dtype(
                tensors["store_in_2"]._pb_tensor.info.data_type_lcase()),
            repeats=2)
        # Index starts at 0
        offset_tensor_2 = pir.constant(0, name="offset_2")
        offset_tensor_3 = pir.constant(1, name="offset_3")
        ops.remote_store(t=tensors["store_in_2"],
                         offset=offset_tensor_2,
                         remote_buffer_handle=rbh)
        ops.remote_store(t=tensors["store_in_3"],
                         offset=offset_tensor_3,
                         remote_buffer_handle=rbh)
        tensors["load_out_2"] = ops.remote_load(t=tensors["load_in_2"],
                                                offset=offset_tensor_2,
                                                remote_buffer_handle=rbh)
        tensors["load_out_3_inplace"] = ops.remote_load_(
            t=tensors["load_in_3_inplace"],
            offset=offset_tensor_3,
            remote_buffer_handle=rbh)

        # Anchor the input tensors to the load operator
        d2h_streams = make_anchor(d2h_streams, tensors, "load_in_2")
        d2h_streams = make_anchor(d2h_streams, tensors, "load_in_3_inplace")
        # Anchor the output tensors of the load operator
        d2h_streams = make_anchor(d2h_streams, tensors, "load_out_2")
        d2h_streams = make_anchor(d2h_streams, tensors, "load_out_3_inplace")

    return ir._pb_ir, d2h_streams
Exemplo n.º 16
0
def build_model(
) -> Tuple[_ir.Ir, pir.HostToDeviceStream, pir.DeviceToHostStream, pir.
           DeviceToHostStream, pir.DeviceToHostStream, pir.
           DeviceToHostStream, np.ndarray, np.ndarray]:
    ir = pir.Ir()

    main = ir.main_graph()
    with main:
        x_h2d = pir.h2d_stream(_IN_SHAPE, pir.float32, name="x_stream")
        x = ops.host_load(x_h2d, "x")

        W_data = np.random.normal(0, 0.1, _WEIGHT_SHAPE).astype(np.float32)
        b_data = np.zeros(_BIAS_SHAPE, dtype=np.float32)

        W = pir.variable(W_data, name="W")
        b = pir.variable(b_data, name="b")

        lin = Linear()
        lin_graph = ir.create_graph(lin, x, out_features=_OUT_FEATURES)

        lin_call_info = ops.call_with_info(lin_graph,
                                           x,
                                           subgraph_in_to_parent_in={
                                               lin.W: W,
                                               lin.b: b
                                           })
        y = lin_call_info.get_output_tensors()[0]

        assert y.shape == _OUT_SHAPE

        y_d2h = pir.d2h_stream(y.shape, y.dtype, name="x_stream")
        ops.host_store(y_d2h, y)

    lin_bwd_info = pir.transforms.autodiff.autodiff(lin_graph)
    lin_bwd_graph = lin_bwd_info.graph

    with main:
        grad_seed = pir.constant(np.ones(_OUT_SHAPE, np.float32))
        tensors_required_for_bwd = pir.transforms.autodiff.get_expected_forward_inputs_from_call(
            lin_call_info, lin_bwd_info)
        lin_bwd_call_info = ops.call_with_info(
            lin_bwd_graph,
            grad_seed,
            subgraph_in_to_parent_in=tensors_required_for_bwd)

    ##### Extract parent graph x_grad, W_grad, b_grad

    expected_outputs = lin_bwd_info.expected_outputs
    x_grad, W_grad, b_grad = None, None, None

    sg_x = lin_call_info.op_in_to_subgraph_in_tensor(x)
    sg_W = lin_call_info.op_in_to_subgraph_in_tensor(W)
    sg_b = lin_call_info.op_in_to_subgraph_in_tensor(b)

    def get_grad_tensor_in_main_graph_from_fwdgrad_expected_connection(
            ec: pir.transforms.autodiff.ExpectedConnection) -> pir.Tensor:
        # If (t, FwdGrad) appears at index i in expected_outputs, it is
        # guaranteed that t’ (the grad of t) appears at output index i in the
        # grad graph.
        sg_out_idx = expected_outputs.index(ec)
        op_out_idx = lin_bwd_call_info.subgraph_in_to_op_in_index(sg_out_idx)
        parent_grad = lin_bwd_call_info.get_op_output_tensor(op_out_idx)

        return parent_grad

    for ec in expected_outputs:
        # Should always be the case for expected_outputs
        assert ec.connection_type == pir.transforms.autodiff.ExpectedConnectionType.FwdGrad

        sg_fwd_tensor = ec.fwd_tensor

        if sg_fwd_tensor == sg_x:
            x_grad = get_grad_tensor_in_main_graph_from_fwdgrad_expected_connection(
                ec)
        elif sg_fwd_tensor == sg_W:
            W_grad = get_grad_tensor_in_main_graph_from_fwdgrad_expected_connection(
                ec)
        elif sg_fwd_tensor == sg_b:
            b_grad = get_grad_tensor_in_main_graph_from_fwdgrad_expected_connection(
                ec)

    assert x_grad is not None
    assert W_grad is not None
    assert b_grad is not None

    # HostStore grads and collect d2h streams
    def host_store_and_return_d2h_stream(
            grad: pir.Tensor) -> pir.DeviceToHostStream:
        with main:
            d2h = pir.d2h_stream(grad.shape,
                                 grad.dtype,
                                 name=grad.name + "_stream")
            ops.host_store(d2h, grad)
        return d2h

    x_grad_d2h = host_store_and_return_d2h_stream(x_grad)
    W_grad_d2h = host_store_and_return_d2h_stream(W_grad)
    b_grad_d2h = host_store_and_return_d2h_stream(b_grad)

    assert x_grad_d2h is not None
    assert W_grad_d2h is not None
    assert b_grad_d2h is not None

    return ir._pb_ir, x_h2d, y_d2h, x_grad_d2h, W_grad_d2h, b_grad_d2h, W_data, b_data
Exemplo n.º 17
0
    def test_remote_store_graph(self, use_offset: bool,
                                use_remote_buffer_id: bool, use_rbh: bool,
                                tensor_shape: Tuple[int, ...], repeats: int,
                                tensor_dtype: dtype) -> None:
        """Test that the graph is correct when using the remote store op.

        Args:
            use_offset (bool): Whether or not to use offset
            use_remote_buffer_id (bool): Whether or not to set the remote buffer_id
            use_rbh (bool): Whether or not to specify the remote buffer handle
            tensor_shape (Tuple[int, ...]): The shape of the tensor to be stored
            repeats (int): The number of tensors to potentially store in the buffer
            tensor_dtype (dtype): The type of the tensors to be stored
        """
        ir = pir.Ir()
        g = ir.main_graph()

        with g:
            t = pir.variable(
                np.random.rand(*tensor_shape).astype(tensor_dtype.as_numpy()))
            if use_offset:
                offset = pir.constant([1], name='offset')
                # With this option the graph should contain
                # 1. t
                # 2. offset
                n_tensors = 2
            else:
                offset = None
                # With this option the graph should contain
                # 1. t
                n_tensors = 1

            remote_buffer_id = 1 if use_remote_buffer_id else -1

            if remote_buffer_id == -1:
                with pytest.raises(NotImplementedError):
                    _ = RemoteBufferHandle(remote_buffer_id=remote_buffer_id,
                                           tensor_shape=tensor_shape,
                                           tensor_dtype=tensor_dtype,
                                           repeats=repeats)
                # Clean-up so that the RemoteBufferHandle gets reset
                RemoteBufferHandle._buffers = {}
                return

            if use_rbh:
                rbh = RemoteBufferHandle(remote_buffer_id=remote_buffer_id,
                                         tensor_shape=tensor_shape,
                                         tensor_dtype=tensor_dtype,
                                         repeats=repeats)
            else:
                rbh = None

            ops.remote_store(t, offset, rbh)

        assert len(g.get_tensors()) == n_tensors
        # Only t is a variable
        assert len(g.get_variables()) == 1
        assert contains_op_of_type("RemoteStore",
                                   _ir.op.exchange.RemoteStoreOp, g)

        # Clean-up so that the RemoteBufferHandle gets reset
        RemoteBufferHandle._buffers = {}
Exemplo n.º 18
0
# Copyright (c) 2021 Graphcore Ltd. All rights reserved.
import numpy as np
import popart.ir as pir
import popart.ir.ops as ops
import popart

# Creating a model with popart.ir
ir = pir.Ir()
main = ir.main_graph()
with main:
    a = pir.variable(3, dtype=pir.int8, name="variable_a")
    b = pir.constant(1, dtype=pir.int8, name="constant_b")

    # addition
    o = a + b
    # host store
    o_d2h = pir.d2h_stream(o.shape, o.dtype, name="output_stream")
    ops.host_store(o_d2h, o)

dataFlow = popart.DataFlow(
    batchesPerStep=1,
    anchorTensors={o_d2h.tensor_id(): popart.AnchorReturnType("All")})

ir = ir._pb_ir
ir.setDataFlow(dataFlow)
opts = ir.getSessionOptions()
opts.useHostCopyOps = True
opts.enableExplicitMainLoops = True
ir.updateVertices()
ir.setIsPrepared()
Exemplo n.º 19
0
def test_subgraph():
    class ScaleNShift(pir.Module):
        def __init__(self):
            self.W: pir.Tensor = None
            self.b: pir.Tensor = None

        def build(self,
                  x: pir.Tensor,
                  out_features: int,
                  bias: bool = True) -> pir.Tensor:
            self.W = pir.subgraph_input((x.shape[-1], out_features),
                                        pir.float32, "W")
            y = ops.mul(x, self.W)
            if bias:
                self.b = pir.subgraph_input((out_features, ), pir.float32, "b")
                y = y + self.b
            return y

    ir = pir.Ir()
    main = ir.main_graph()
    with main:
        h2d = pir.h2d_stream((16, 16), pir.dtypes.float32)
        x = ops.host_load(h2d, "x")

        W = pir.variable(np.random.normal(0, 0.1, (16, 16)), name="W")
        b = pir.variable(np.zeros(16), name="b", dtype=pir.dtypes.float32)

        ss = ScaleNShift()
        ss_graph = ir.create_graph(ss, x, out_features=16)

        call_info = ops.call_with_info(ss_graph,
                                       x,
                                       subgraph_in_to_parent_in={
                                           ss.W: W,
                                           ss.b: b
                                       })

        y = call_info.get_output_tensors()[0]
        d2h = pir.d2h_stream(y.shape, y.dtype)
        ops.host_store(d2h, y)

    assert len(ss_graph.get_input_tensors()) == 3
    assert len(ss_graph.get_output_tensors()) == 1

    ss_bwd_info = pir.transforms.autodiff.autodiff(ss_graph)

    # Check an additional output has been added to the fwd graph.
    assert len(ss_graph.get_output_tensors()) == 2

    bwd_graph = ss_bwd_info.graph

    assert isinstance(bwd_graph, pir.Graph)

    assert len(ss_bwd_info.expected_inputs) == len(
        bwd_graph.get_input_tensors())
    assert len(ss_bwd_info.expected_outputs) == len(
        bwd_graph.get_output_tensors())

    for op in bwd_graph._pb_graph.getOps():
        grad_ops = (_ir.op.SumOp, _ir.op.MulArg0GradOp, _ir.op.MulArg1GradOp,
                    _ir.op.AddArg0GradOp, _ir.op.AddArg1GradOp)
        assert isinstance(op, grad_ops)

    with main:
        grad_seed = pir.constant(np.ones((16, 16), np.float32))
        activations = pir.transforms.autodiff.get_expected_forward_inputs_from_call(
            call_info, ss_bwd_info)
        grads = ops.call(bwd_graph,
                         grad_seed,
                         subgraph_in_to_parent_in=activations)

    assert len(grads) == len(ss_bwd_info.expected_outputs)