def get_updated_p1_popart():
        builder = popart.Builder()

        # Computation is out = matmul(i1, i2)
        c1 = builder.addInputTensor(popart.TensorInfo("FLOAT", c1_shape))
        p1 = builder.addInitializedInputTensor(p1_init)
        out = builder.aiOnnx.matmul([c1, p1])

        # Set up a training session.
        device = tu.create_test_device()
        dataFlow = popart.DataFlow(
            1, {
                c1: popart.AnchorReturnType("Final"),
                p1: popart.AnchorReturnType("Final"),
                out: popart.AnchorReturnType("Final")
            })

        # We're testing losses other than nll/l1 work.
        loss = builder.aiOnnx.reducesum([out])
        optimizer = popart.SGD({
            "defaultLearningRate": (sgd_learning_rate, True),
            "defaultMomentum": (sgd_moment, False),
            "lossScaling": (200, constLossScaling)
        })
        session = popart.TrainingSession(builder.getModelProto(),
                                         deviceInfo=device,
                                         dataFlow=dataFlow,
                                         loss=loss,
                                         optimizer=optimizer)

        session.prepareDevice()
        session.weightsFromHost()

        # Run the popart session to get an answer.
        anchors = session.initAnchorArrays()
        stepio = popart.PyStepIO({c1: c1_init}, anchors)
        session.run(stepio)
        return anchors[c1], anchors[p1], anchors[out]
示例#2
0
def test_distributed_replicated_allreduce():
    mpi_params = get_mpi_params()
    mpi_size, mpi_rank = mpi_params

    input_data = np.array(range(10), dtype=np.float32)

    builder = popart.Builder()
    t = builder.addInitializedInputTensor(input_data, "input")
    o = builder.aiGraphcore.replicatedallreduce([t])
    builder.addOutputTensor(o)
    proto = builder.getModelProto()

    dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")})
    opts = popart.SessionOptions()
    opts.enableReplicatedGraphs = False
    opts.enableDistributedReplicatedGraphs = True
    opts.globalReplicaOffset = mpi_rank
    opts.globalReplicationFactor = 2

    numIpus = 1

    device = tu.create_test_device(numIpus=numIpus)
    session = popart.InferenceSession(fnModel=proto,
                                      dataFlow=dataFlow,
                                      userOptions=opts,
                                      deviceInfo=device)

    session.prepareDevice()

    anchors = session.initAnchorArrays()

    inputs = {}
    stepio = popart.PyStepIO(inputs, anchors)

    session.run(stepio)

    ground_truth = 2.0 * np.array(range(10), dtype=np.float32)
    assert np.allclose(anchors[o], ground_truth)
示例#3
0
def test_stream_on_off(tmpdir):

    builder = popart.Builder()
    shape = popart.TensorInfo("FLOAT16", [2])

    i1 = builder.addInputTensor(shape)
    i2 = builder.addInputTensor(shape)
    o = builder.aiOnnx.add([i1, i2])
    builder.addOutputTensor(o)

    proto = builder.getModelProto()

    dataFlow = popart.DataFlow(
        1, {
            i1: popart.AnchorReturnType("All"),
            i2: popart.AnchorReturnType("All"),
            o: popart.AnchorReturnType("All")
        })

    session = popart.InferenceSession(fnModel=proto,
                                      dataFlow=dataFlow,
                                      deviceInfo=tu.create_test_device())

    session.prepareDevice()

    anchors = session.initAnchorArrays()

    inputs = {
        i1: np.array([1., 3.], dtype=np.float16),
        i2: np.array([7., 8.], dtype=np.float16)
    }
    stepio = popart.PyStepIO(inputs, anchors)

    session.run(stepio)

    # confirm that writing device-to-host of a Stream Tensor returns correctly (unchanged)
    assert (np.allclose(anchors[i1], np.array([1., 3.], dtype=np.float16)))
    assert (np.allclose(anchors[i2], np.array([7., 8.], dtype=np.float16)))
示例#4
0
def check_op_with_invalid_axes(opset, reduceOp, axis):
    with pytest.raises(popart.popart_exception) as e_info:
        builder = popart.Builder()
        tensor_info = popart.TensorInfo("FLOAT", SHAPE)
        x = builder.addInputTensor(tensor_info, "input")

        # reducemedian returns 2 outputs in an array, so we convert the singleton outputs into arrays as well
        ys = getattr(getattr(builder, opset), reduceOp)([x], axes=[axis])
        if not isinstance(ys, list):
            ys = [ys]
        for y in ys:
            builder.addOutputTensor(y)
        anchors = {y: popart.AnchorReturnType("ALL") for y in ys}

        proto = builder.getModelProto()
        dataFlow = popart.DataFlow(1, anchors)
        device = popart.DeviceManager().createCpuDevice()

        session = popart.InferenceSession(proto, dataFlow,
                                          device)  # this should throw an error
    assert (e_info.value.args[0] == (
        "Axis {} is out of acceptable range [{}, {}]").format(
            axis, -RANK, RANK - 1))
示例#5
0
    def run_lstm_popart(onnx_file_name, inputs):
        # generate a popart session
        builder = popart.Builder(onnx_file_name)
        outputs = builder.getOutputTensorIds()
        dataFlow = popart.DataFlow(1, outputs)
        device = tu.create_test_device(1)
        s = popart.InferenceSession(fnModel=onnx_file_name,
                                    dataFlow=dataFlow,
                                    deviceInfo=device)

        anchor_map = s.initAnchorArrays()
        s.prepareDevice()

        # run the popart session
        input_map = {
            'X': inputs[0],
            'initial_h': inputs[1],
            'initial_c': inputs[2]
        }
        stepio = popart.PyStepIO(input_map, anchor_map)
        s.run(stepio)

        return (anchor_map['Y'], anchor_map['Y_h'], anchor_map['Y_c'])
示例#6
0
def test_enabled_recomputation():
    """
    In this test we check that NO error is thrown when doing pipelining
    if recomputation is enabled
    """
    builder, op0_out, op1_out, op2_out, op3_out, anchor_map = get_simple_linear_model(
    )

    opts = popart.SessionOptions()
    opts.enablePipelining = True
    opts.virtualGraphMode = popart.VirtualGraphMode.Manual
    opts.autoRecomputation = popart.RecomputationType.Standard

    builder.virtualGraph(op0_out, 0)
    builder.virtualGraph(op1_out, 1)
    builder.virtualGraph(op2_out, 1)
    builder.virtualGraph(op3_out, 1)

    session = popart.InferenceSession(fnModel=builder.getModelProto(),
                                      dataFlow=popart.DataFlow(10, anchor_map),
                                      userOptions=opts,
                                      deviceInfo=tu.create_test_device(
                                          numIpus=2, tilesPerIpu=20))
示例#7
0
    def getTrainingSession(fn):
        opts = popart.SessionOptions()
        opts.enableGradientAccumulation = True
        opts.accumulationFactor = accum_factor
        opts.disableGradAccumulationTensorStreams = False

        if explicit_loops:
            opts.enableExplicitMainLoops = True
            opts.aliasZeroCopy = True
            opts.explicitRecomputation = True
            opts.useHostCopyOps = True

        sess = popart.TrainingSession(
            fnModel=fn,
            dataFlow=popart.DataFlow(1, {}),
            deviceInfo=tu.create_test_device(tilesPerIPU=testTilesPerIPU),
            loss=output_name,
            optimizer=adam_optimizer,
            userOptions=opts)
        sess.prepareDevice()

        sess.weightsFromHost()
        return sess
def test_fail_due_to_mismatch_permutation():
    d1 = np.random.randn(10, 20, 30).astype(np.float32)

    builder = popart.Builder()
    d = builder.addInputTensor("FLOAT", d1.shape)
    o = builder.aiOnnx.transpose([d], perm=(0, 2, 1))
    o = builder.aiOnnx.transpose([o], perm=(1, 2, 0))

    sess = popart.InferenceSession(fnModel=builder.getModelProto(),
                                   deviceInfo=tu.create_test_device(),
                                   dataFlow=popart.DataFlow(1, [o]))
    sess.prepareDevice()

    anchors = sess.initAnchorArrays()

    stepio = popart.PyStepIO({d: d1}, anchors)
    sess.weightsFromHost()

    sess.run(stepio)
    ir = json.loads(sess._serializeIr(popart.IrSerializationFormat.JSON))
    assert len(
        list(filter(lambda op: "Transpose" in op["type"],
                    ir["maingraph"]))) == 2
示例#9
0
def test_no_virtual_graph():

    popart.getLogger().setLevel("TRACE")

    builder = popart.Builder()

    i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", [1]))
    i2 = builder.addInputTensor(popart.TensorInfo("FLOAT", [1]))
    o1 = builder.aiOnnx.add([i1, i2])
    o2 = builder.aiOnnx.add([i1, i2])
    o = builder.aiOnnx.add([o1, o2])
    builder.addOutputTensor(o)

    proto = builder.getModelProto()

    dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")})

    opts = popart.SessionOptions()

    popart.InferenceSession(fnModel=proto,
                            dataFlow=dataFlow,
                            userOptions=opts,
                            deviceInfo=tu.create_test_device())
示例#10
0
def test_auto_loss_scaling_bin_edge_factor_range(binEdgeLocation):
    """Test if an error is thrown if the binEdgeLocation hyperparameter is 
    outside [0, 1].
    """
    builder = popart.Builder()
    t0 = builder.addInputTensor("FLOAT", [2, 2])
    t1_data = np.random.rand(2, 2).astype(np.float32)
    t1 = builder.addInitializedInputTensor(t1_data)
    mm0 = builder.aiOnnx.matmul([t0, t1])
    loss = builder.aiGraphcore.identityloss([mm0])

    optimizer = popart.SGD({"lossScaling": (2, False)})

    opts = popart.SessionOptions()
    opts.automaticLossScalingSettings.enabled = True
    opts.automaticLossScalingSettings.binEdgeLocation = binEdgeLocation
    opts.automaticLossScalingSettings.thresholdUpperCountProportion = 0.2

    with ExitStack() as stack:
        e_info = None
        if not 0 <= binEdgeLocation <= 1:
            e_info = stack.enter_context(pytest.raises(
                popart.popart_exception))

        session = popart.TrainingSession(builder.getModelProto(),
                                         deviceInfo=tu.create_test_device(),
                                         dataFlow=popart.DataFlow(1, [loss]),
                                         loss=loss,
                                         optimizer=optimizer,
                                         userOptions=opts)

        if e_info:
            assert e_info.value.args[0].startswith(
                "[AutomaticLossScale transform] Out of range value for 'binEdgeLocation'."
            )
        else:
            session.prepareDevice()
示例#11
0
def test_overwriting_external_data_file():
    # Verify that if calling modelToHost twice, the external data is overwritten
    # correctly, and not corrupted!
    builder = popart.Builder()
    d1 = np.random.rand(3, 3).astype(np.float32)
    i1 = builder.addInitializedInputTensor(d1)
    o = builder.aiOnnx.matmul([i1, i1])
    loss = builder.aiGraphcore.identityloss([o])

    with TemporaryDirectory() as tmpdir:
        tmpfile0 = os.path.join(tmpdir, "model_tensors0.onnx")
        builder.saveInitializersExternally([i1], tmpfile0)

        optimizer = popart.SGD({
            "defaultLearningRate": (0.2, True),
            "defaultMomentum": (0.5, True)
        })

        session = popart.TrainingSession(
            deviceInfo=popart.DeviceManager().createCpuDevice(),
            fnModel=builder.getModelProto(),
            loss=loss,
            optimizer=optimizer,
            dataFlow=popart.DataFlow(1, []))

        session.prepareDevice()
        session.weightsFromHost()
        anchors = session.initAnchorArrays()
        session.run(popart.PyStepIO({}, anchors))

        # Should overwrite external data with the same data
        tmpfile1 = os.path.join(tmpdir, "model0.onnx")
        session.modelToHost(tmpfile1)
        weights0 = np.fromfile(tmpfile0, dtype=np.float32)
        session.modelToHost(tmpfile1)
        weights1 = np.fromfile(tmpfile0, dtype=np.float32)
        assert np.array_equal(weights0, weights1)
示例#12
0
def test_identity_inference_session(inputShape, inputArray, BPS, art, R,
                                    explicit, expected):
    builder = popart.Builder()

    inInfo = popart.TensorInfo("FLOAT", inputShape)

    i1 = builder.addInputTensor(inInfo)
    o = builder.aiOnnx.identity([i1])
    builder.addOutputTensor(o)

    proto = builder.getModelProto()

    batchesPerStep = BPS
    dataFlow = popart.DataFlow(batchesPerStep, {o: art})

    opts = popart.SessionOptions()
    opts.replicatedGraphCount = R
    opts.enableReplicatedGraphs = R > 1
    opts.enableExplicitMainLoops = explicit
    opts.useHostCopyOps = explicit

    device = tu.create_test_device(numIpus=R)
    session = popart.InferenceSession(fnModel=proto,
                                      dataFlow=dataFlow,
                                      deviceInfo=device,
                                      userOptions=opts)

    session.prepareDevice()

    anchors = session.initAnchorArrays()

    inputs = {i1: np.array(inputArray, dtype=np.float32)}
    stepio = popart.PyStepIO(inputs, anchors)

    session.run(stepio)

    assert (np.array_equal(anchors[o], expected))
示例#13
0
def test_ipu_copy_bca2():

    popart.getLogger().setLevel("TRACE")

    builder = popart.Builder()

    i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", [1]))
    i2 = builder.addInputTensor(popart.TensorInfo("FLOAT", [1]))
    o1 = builder.aiOnnx.add([i1, i2])
    o2 = builder.aiOnnx.add([i1, i2])

    o3 = builder.aiOnnx.add([o1, o2])
    o4 = builder.aiOnnx.add([o1, o2])

    o = builder.aiOnnx.add([o3, o4])
    builder.addOutputTensor(o)

    builder.virtualGraph(o1, 0)
    builder.virtualGraph(o2, 0)
    builder.virtualGraph(o3, 1)
    builder.virtualGraph(o4, 1)

    builder.virtualGraph(o, 2)

    proto = builder.getModelProto()

    dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")})

    opts = popart.SessionOptions()
    opts.virtualGraphMode = popart.VirtualGraphMode.Manual

    s = popart.InferenceSession(fnModel=proto,
                                dataFlow=dataFlow,
                                userOptions=opts,
                                deviceInfo=tu.create_test_device(numIpus=3))

    s.prepareDevice()
示例#14
0
def get_simple_model_cycle_count(bps):
    builder = popart.Builder()
    # Make the model large enough such that the cycle count is dominated
    # by compute and internal exchange (as apposed to host exchage)
    d_shape = [200, 200]
    d0 = builder.addInputTensor(popart.TensorInfo("FLOAT", d_shape))
    out = d0
    for layer in range(100):
        out = builder.aiOnnx.sin([out])

    opts = popart.SessionOptions()
    opts.instrumentWithHardwareCycleCounter = True
    # Verify that we can still measure cycles when data streams
    # (inuts/weights/anchors) are off
    opts.syntheticDataMode = popart.SyntheticDataMode.Zeros

    session = popart.InferenceSession(
        fnModel=builder.getModelProto(),
        dataFlow=popart.DataFlow(bps, {out: popart.AnchorReturnType("All")}),
        userOptions=opts,
        deviceInfo=tu.create_test_device(),
        patterns=popart.Patterns(popart.PatternsLevel.NoPatterns))

    session.prepareDevice()
    anchors = session.initAnchorArrays()
    if bps > 1:
        d_shape.insert(0, bps)
    stepio = popart.PyStepIO({d0: np.random.rand(*d_shape).astype(np.float32)},
                             anchors)
    session.run(stepio)

    cycles = session.getCycleCount()
    cycles_ = session.getCycleCount()
    print("BPS: ", bps, " Cycles: ", cycles)
    # Verify that the tensor is not overwritten when streaming off device
    assert (cycles == cycles_)
    return cycles
示例#15
0
def get_replicated_dropout_session(replication_factor=4,
                                   dsize=10,
                                   num_layers=1,
                                   ratio=0.3,
                                   batches_per_step=1,
                                   seed=0):
    builder = popart.Builder()
    ip = builder.addInputTensor(popart.TensorInfo("FLOAT", [dsize]))
    d__ip = popart.reservedGradientPrefix() + ip
    out = ip
    for layer in range(num_layers):
        [out] = builder.aiOnnx.dropout([out], num_outputs=1, ratio=ratio)
    loss = builder.aiGraphcore.identityloss([out])
    builder.addOutputTensor(loss)

    device = tu.create_test_device(replication_factor)

    dfAnchors = [out, ip, d__ip]
    dfAnchors = {i: popart.AnchorReturnType("All") for i in dfAnchors}

    opts = popart.SessionOptions()
    opts.enableReplicatedGraphs = True
    opts.replicatedGraphCount = replication_factor

    session = popart.TrainingSession(fnModel=builder.getModelProto(),
                                     dataFlow=popart.DataFlow(
                                         batches_per_step, dfAnchors),
                                     optimizer=popart.ConstSGD(0.1),
                                     loss=loss,
                                     userOptions=opts,
                                     deviceInfo=device)

    session.prepareDevice()
    session.setRandomSeed(seed)
    session.weightsFromHost()
    anchors = session.initAnchorArrays()
    return session, ip, out, d__ip, anchors
示例#16
0
def test_np_memory_layout_add_initialized_input_tensor1():
    """ Test that when we create a parameter input with a non-contiguous array
        things still work (first test).
    """
    np.random.seed(1)

    # Build a computational graph. Initialise an input parameter with a transposed
    # input (which happens to be non-contiguous in numpy).
    builder = popart.Builder()
    input1Value = np.random.randint(0, 100, size=(2, 3), dtype='int32')
    input1Value = np.transpose(input1Value, [1, 0])
    input1 = builder.addInitializedInputTensor(input1Value)
    input1 = builder.aiOnnx.identity([input1])
    builder.addOutputTensor(input1)

    # Perpare a session.
    anchorConfig = {input1: popart.AnchorReturnType("ALL")}
    dataFlow = popart.DataFlow(1, anchorConfig)
    deviceConfig = {'numIPUs': 1}
    dm = popart.DeviceManager()
    device = dm.createIpuModelDevice(deviceConfig)
    session = popart.InferenceSession(fnModel=builder.getModelProto(),
                                      dataFlow=dataFlow,
                                      deviceInfo=device)

    session.prepareDevice()
    session.weightsFromHost()
    anchors = session.initAnchorArrays()

    # Run the session.
    stepio = popart.PyStepIO({}, anchors)
    session.run(stepio)

    # Compare outputs.
    assert (anchors[input1] == input1Value
            ).all(), f"Expected {anchors[input1]} to match {input1Value}"
def test_valid_recompute_options():
    builder = popart.Builder()

    i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", [1]))
    r1 = builder.aiOnnx.relu([i1])
    o = builder.aiOnnx.relu([r1])

    # specify manual recomputation
    builder.recomputeOutputInBackwardPass(r1)

    # specify auto recomputation as well
    opts = popart.SessionOptions()
    opts.autoRecomputation = popart.RecomputationType.Standard

    with pytest.raises(popart.popart_exception) as e_info:
        session = popart.TrainingSession(fnModel=builder.getModelProto(),
                                         dataFlow=popart.DataFlow(1, [o]),
                                         optimizer=popart.ConstSGD(0.001),
                                         loss=o,
                                         patterns=popart.Patterns([]),
                                         userOptions=opts,
                                         deviceInfo=tu.create_test_device())
    assert (e_info.value.args[0] ==
            "A mixture of auto and manual recomputaion is not supported")
示例#18
0
def test_execution_report(tmpdir):

    builder = popart.Builder()

    shape = popart.TensorInfo("FLOAT", [1])

    i1 = builder.addInputTensor(shape)
    i2 = builder.addInputTensor(shape)
    o = builder.aiOnnx.add([i1, i2])
    builder.addOutputTensor(o)

    proto = builder.getModelProto()

    dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")})

    session = popart.InferenceSession(
        fnModel=proto,
        dataFlow=dataFlow,
        deviceInfo=tu.create_test_device(opts={"compileIPUCode": False}))

    anchors = session.initAnchorArrays()

    session.prepareDevice()

    d1 = np.array([10.]).astype(np.float32)
    d2 = np.array([11.]).astype(np.float32)
    stepio = popart.PyStepIO({i1: d1, i2: d2}, anchors)

    session.run(stepio, "Test message")

    rep = session.getExecutionReport()

    # Need to convert bytes to string
    details = json.loads(rep.decode("utf-8"))

    assert (details['runs'][0]['name'] == "Test message")
示例#19
0
def test_view_simplify(a, b, target):
    d1 = np.random.randn(10, 20).astype(np.float32)

    builder = popart.Builder()
    d = builder.addInputTensor("FLOAT", d1.shape)
    o = a(builder, d, [1, *d1.shape])
    o = b(builder, o, [*reversed(d1.shape)])

    opts = popart.SessionOptions()
    # ViewSimplifyPattern only runs when outlining
    opts.enableOutlining = True
    # Set the threshold high so nothing actually gets outlined.
    # This makes it easier to parse the IR.
    opts.outlineThreshold = 100000

    sess = popart.InferenceSession(fnModel=builder.getModelProto(),
                                   deviceInfo=tu.create_test_device(),
                                   dataFlow=popart.DataFlow(1, [o]))
    sess.prepareDevice()

    anchors = sess.initAnchorArrays()

    stepio = popart.PyStepIO({d: d1}, anchors)
    sess.weightsFromHost()

    sess.run(stepio)
    ir = json.loads(sess._serializeIr(popart.IrSerializationFormat.JSON))

    def outputs_o(op):
        return o in map(lambda t: t["name"], op["outputs"])

    def matches_target(op):
        return target in op["type"] and outputs_o(op)

    assert len(list(filter(matches_target, ir["maingraph"]))) == 1
    assert np.allclose(anchors[o].flatten(), d1.flatten())
def test_auto_virtual_graph_train():

    ipus = 2

    popart.getLogger().setLevel("TRACE")

    builder = popart.Builder()

    input_shape = [1, 64]
    input = builder.addInputTensor(popart.TensorInfo("FLOAT16", input_shape))

    x = input
    for i in range(ipus):
        w = builder.addInitializedInputTensor(np.zeros([64, 64], np.float16))
        x = builder.aiOnnx.matmul([x, w])
    output = x
    builder.addOutputTensor(output)

    loss = builder.aiGraphcore.identityloss([output])

    proto = builder.getModelProto()

    dataFlow = popart.DataFlow(1, {loss: popart.AnchorReturnType("Final")})

    opts = popart.SessionOptions()
    opts.virtualGraphMode = popart.VirtualGraphMode.Auto

    device = tu.create_test_device(numIpus=ipus)

    popart.TrainingSession(fnModel=proto,
                           dataFlow=dataFlow,
                           userOptions=opts,
                           loss=loss,
                           optimizer=popart.SGD(
                               {"defaultLearningRate": (0.01, True)}),
                           deviceInfo=device)
示例#21
0
def test_execution_report_reset(tmpdir):

    builder = popart.Builder()

    shape = popart.TensorInfo("FLOAT", [1])

    i1 = builder.addInputTensor(shape)
    i2 = builder.addInputTensor(shape)
    o = builder.aiOnnx.add([i1, i2])
    builder.addOutputTensor(o)

    proto = builder.getModelProto()

    dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")})

    opts = popart.SessionOptions()
    opts.engineOptions = {"debug.instrument": "true"}

    session = popart.InferenceSession(
        fnModel=proto,
        dataFlow=dataFlow,
        deviceInfo=tu.create_test_device(opts={"compileIPUCode": False}))

    anchors = session.initAnchorArrays()

    session.prepareDevice()

    d1 = np.array([10.]).astype(np.float32)
    d2 = np.array([11.]).astype(np.float32)
    stepio = popart.PyStepIO({i1: d1, i2: d2}, anchors)

    session.run(stepio)

    rep1 = session.getExecutionReport(resetProfile=False)
    rep2 = session.getExecutionReport(resetProfile=False)
    assert len(rep1) == len(rep2)
示例#22
0
def test_matmul_serialization_invalid_factor(tmpdir):
    lhs_shape = [2, 2]
    rhs_shape = [2, 4]
    lhs_data = np.random.rand(*lhs_shape).astype(np.float32)
    rhs_data = np.random.rand(*rhs_shape).astype(np.float32)

    builder = popart.Builder()

    lhs = builder.addInputTensor(popart.TensorInfo("FLOAT", lhs_shape), "lhs")
    rhs = builder.addInputTensor(popart.TensorInfo("FLOAT", rhs_shape), "rhs")

    o = builder.aiOnnx.matmul([lhs, rhs])
    builder.setSerializeMatMul({o}, "output_channels", 3)

    builder.addOutputTensor(o)

    proto = builder.getModelProto()

    dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")})

    opts = getBaseOptions()

    pat = popart.Patterns(['MatMulOp', 'MatMulRhsGradOp', 'MatMulLhsGradOp'])
    pat.enableRuntimeAsserts(False)

    with pytest.raises(popart.popart_exception) as e_info:
        session = popart.InferenceSession(
            fnModel=proto,
            dataFlow=dataFlow,
            userOptions=opts,
            patterns=pat,
            deviceInfo=tu.create_test_device(opts={"compileIPUCode": False}))

    assert (e_info.value.args[0].startswith(
        "Invalid serialisation factor 3 for output channels dim 4. output_channels dim should be a multple of the serialisation factor"
    ))
示例#23
0
            def getAnchors(extraReduction):
                builder = popart.Builder()
                ip = builder.addInitializedInputTensor(ip_data)
                lb = builder.addInputTensor("INT32", lshape)

                sm = builder.aiOnnx.softmax([ip], axis=np.size(lshape))
                if extraReduction == True:
                    nll = builder.aiGraphcore.nllloss(
                        [sm, lb], reduction=popart.ReductionType.NoReduction)
                    loss = builder.aiOnnx.reducesum([nll])
                else:
                    loss = builder.aiGraphcore.nllloss(
                        [sm, lb], reduction=popart.ReductionType.Sum)

                anchors = [popart.reservedGradientPrefix() + ip]
                # Always test 'loss' too, except for when we want to test with
                # the SoftmaxGradDirect pattern, which requires 'loss' to be
                # anchored
                if 'SoftmaxGradDirect' not in patternsList or 'NlllWithSoftmaxGradDirect' in patternsList:
                    anchors.append(loss)

                session = popart.TrainingSession(
                    fnModel=builder.getModelProto(),
                    loss=loss,
                    dataFlow=popart.DataFlow(1, anchors),
                    optimizer=popart.ConstSGD(0.1),
                    deviceInfo=tu.create_test_device(),
                    patterns=popart.Patterns(
                        patternsList).enableRuntimeAsserts(False))
                session.prepareDevice()
                session.weightsFromHost()
                anchors = session.initAnchorArrays()
                stepio = popart.PyStepIO({lb: lb_data.astype(np.int32)},
                                         anchors)
                session.run(stepio)
                return anchors
示例#24
0
def test_summary_report_before_execution(tmpdir):

    builder = popart.Builder()

    i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", [1, 2, 32, 32]))
    i2 = builder.addInputTensor(popart.TensorInfo("FLOAT", [1, 2, 32, 32]))
    o = builder.aiOnnx.add([i1, i2])
    builder.addOutputTensor(o)

    proto = builder.getModelProto()

    dataFlow = popart.DataFlow(1, {o: popart.AnchorReturnType("All")})

    session = popart.InferenceSession(fnModel=proto,
                                      dataFlow=dataFlow,
                                      deviceInfo=tu.create_test_device())

    session.initAnchorArrays()

    with pytest.raises(popart.popart_exception) as e_info:
        session.getSummaryReport()

    assert (e_info.value.args[0].endswith(
        "Session must have been prepared before a report can be fetched"))
示例#25
0
def run_pt_session(syntheticDataMode, inputType=None, d_shape=[100]):
    builder = popart.Builder()
    if inputType is not None:
        d0_i8 = builder.addInputTensor(
            popart.TensorInfo(inputType.builder_type, d_shape))
        d0 = builder.aiOnnx.cast([d0_i8], "FLOAT")
        in_name = d0_i8
    else:
        d0 = builder.addInputTensor(popart.TensorInfo("FLOAT", d_shape))
        in_name = d0
    p = builder.aiGraphcore.printtensor([d0])

    opts = popart.SessionOptions()
    opts.syntheticDataMode = syntheticDataMode

    session = popart.InferenceSession(fnModel=builder.getModelProto(),
                                      dataFlow=popart.DataFlow(1, [p]),
                                      userOptions=opts,
                                      deviceInfo=tu.create_test_device())

    session.prepareDevice()
    anchors = session.initAnchorArrays()
    stepio = popart.PyStepIO({in_name: np.ones(d_shape)}, anchors)
    session.run(stepio)
示例#26
0
def run_embedding_layer(args):
    set_library_seeds(args.seed)

    config = bert_config_from_args(args)

    initializers = bert_pretrained_initialisers(config, args)

    logger.info("Building Model")
    # Specifying ai.onnx opset9 for the slice syntax
    # TODO: Change slice to opset10
    model = Bert(config,
                 builder=popart.Builder(opsets={
                     "ai.onnx": 9,
                     "ai.onnx.ml": 1,
                     "ai.graphcore": 1
                 }),
                 initializers=initializers,
                 execution_mode=args.execution_mode)

    # If config.host_embedding is enabled, indices and positions will have the matrices instead of the index vector.
    indices, positions, segments, masks, labels = bert_add_inputs(args, model)
    logits = tuple([model.embedding(indices, positions, segments)])

    if args.inference:
        outputs = bert_add_logit_outputs(model, logits)
        writer = None

        dataset = get_bert_dataset(
            model, args, [indices, positions, segments, masks, labels])

        data_flow = popart.DataFlow(dataset.batches_per_step, outputs)

        iteration = Iteration(
            args,
            steps_per_epoch=len(dataset),
            writer=writer,
            recording_steps=args.aggregate_metrics_over_steps)

        request_ipus = bert_required_ipus(args, model)

        device = acquire_device(args, request_ipus)

        session, anchors = bert_inference_session(model, args, data_flow,
                                                  device)
        logger.info("Inference Started")
        inputs = [indices, positions, segments, *masks]
        """bert_infer_loop(args, session,
                        dataset, inputs, logits, anchors,
                        iteration)"""
        save_results = args.task == "SQUAD" and not (args.synthetic_data
                                                     or args.generated_data)

        start_times = defaultdict(list)
        end_times = defaultdict(list)
        # Create the stepio once outside of the inference loop:
        static_data = {}
        if args.low_latency_inference and args.task == "SQUAD":
            stepio = create_callback_stepio(static_data, anchors, start_times,
                                            end_times,
                                            dataset.batches_per_step)
        else:
            stepio = None

        output = []
        logger.info(dataset)
        for data in dataset:
            static_data.update({t: data[t] for t in inputs})
            result = bert_process_infer_data(args, session, static_data,
                                             anchors, logits, iteration,
                                             start_times, end_times, stepio)
            if save_results:
                output.append(result)
            break

        device.detach()
        return output

    return None
示例#27
0
def test_save_back_externally_saved_tensors():
    """
    Test that initializers (stored externally in the onnx model) that are
    updated in a training session are written back correctly when the onnx
    model is written using the Session API
    Model:
    in0 -
          \
           Matmul0 - Matmul1 - out
          /          /
    w0 --       w1--
    """
    builder = popart.Builder()
    shape = [4, 4]
    elms = np.prod(shape)
    numLayers = 2
    in0 = builder.addInputTensor(popart.TensorInfo("FLOAT", shape))
    initWeights = []
    weightsIds = []
    anchorsDef = {}
    out = in0
    for layer in range(numLayers):
        w_init = np.random.rand(*shape).astype('float32')
        initWeights.append(w_init)
        weightsIds.append(builder.addInitializedInputTensor(w_init))
        anchorsDef[weightsIds[layer]] = popart.AnchorReturnType("All")
        out = builder.aiOnnx.matmul([out, weightsIds[layer]])

    loss = builder.aiGraphcore.identityloss([out])
    tmpdir = tempfile.mkdtemp()
    tmpfile_weights = os.path.join(tmpdir, "weights.onnx")
    builder.saveInitializersExternally(weightsIds, tmpfile_weights)

    # Verify the initial weights are saved correctly
    for layer in range(numLayers):
        saved_weights = np.fromfile(tmpfile_weights,
                                    dtype=np.float32,
                                    count=elms,
                                    offset=layer * elms * 4)
        assert (np.array_equal(initWeights[layer].flatten(), saved_weights))

    opts = popart.SessionOptions()
    session = popart.TrainingSession(
        fnModel=builder.getModelProto(),
        dataFlow=popart.DataFlow(1, anchorsDef),
        deviceInfo=popart.DeviceManager().createCpuDevice(),
        optimizer=popart.ConstSGD(10),
        loss=loss)

    anchors = session.initAnchorArrays()
    inputs = {in0: np.random.rand(*shape).astype('float32')}
    stepio = popart.PyStepIO(inputs, anchors)

    session.prepareDevice()
    session.weightsFromHost()

    session.run(stepio)

    # Check the weights have been updated
    for layer in range(numLayers):
        assert not np.allclose(anchors[weightsIds[layer]], initWeights[layer])

    # Save the model with updated weights back to disk
    tmpfile_model = os.path.join(tmpdir, "model.onnx")
    session.modelToHost(tmpfile_model)

    # Verify that the file containing tensor data has also been updated
    for layer in range(numLayers):
        saved_weights = np.fromfile(tmpfile_weights,
                                    dtype=np.float32,
                                    count=elms,
                                    offset=layer * elms * 4)
        assert np.array_equal(anchors[weightsIds[layer]].flatten(),
                              saved_weights)
示例#28
0
def main(args):
    set_library_seeds(args.seed)

    config = bert_config_from_args(args)

    initializers = bert_pretrained_initialisers(config, args)

    logger.info("Building Model")
    # Specifying ai.onnx opset9 for the slice syntax
    model = Bert(config,
                 builder=popart.Builder(opsets={
                     "ai.onnx": 9,
                     "ai.onnx.ml": 1,
                     "ai.graphcore": 1
                 }),
                 initializers=initializers,
                 execution_mode=args.execution_mode)

    # If config.host_embedding is enabled, indices and positions will have the matrices instead of the index vector.
    indices, positions, segments, masks, labels = bert_add_inputs(args, model)
    logits = bert_logits_graph(model, indices, positions, segments, masks)

    if args.inference:

        predictions = None
        losses = []
        if args.task == "PRETRAINING":
            # If this is a pretraining session, labels for NSP and MLM are already within the dataset,
            # so we can always calculate prediction performance
            predictions, _ = bert_infer_graph(model,
                                              logits,
                                              include_probs=False)

            if args.inference_lm_perplexity:
                losses = bert_perplexity_graph(model, logits, labels)

            outputs = bert_add_validation_outputs(model, predictions, losses)
        else:
            if args.inference_lm_perplexity:
                raise RuntimeError(
                    "Masked LM perplexity is only supported in pretraining.")

            outputs = bert_add_logit_outputs(model, logits)

        writer = None
    else:
        predictions, probs = bert_infer_graph(model, logits)
        losses = bert_loss_graph(model, probs, labels)
        outputs = bert_add_validation_outputs(model, predictions, losses)
        writer = bert_writer(args)

    embedding_dict, positional_dict = model.get_model_embeddings()

    dataset = get_bert_dataset(model, args,
                               [indices, positions, segments, masks, labels],
                               embedding_dict, positional_dict,
                               config.host_embedding == "MERGE")
    logger.info(f"Dataset length: {len(dataset)}")

    data_flow = popart.DataFlow(dataset.batches_per_step, outputs)

    iteration = Iteration(args,
                          batches_per_step=dataset.batches_per_step,
                          steps_per_epoch=len(dataset),
                          writer=writer,
                          recording_steps=args.aggregate_metrics_over_steps)

    request_ipus, required_ipus = calc_required_ipus(args, model)

    device = acquire_device(args, request_ipus)

    if args.inference:
        session, anchors = bert_inference_session(model, args, data_flow,
                                                  device)
        logger.info("Inference Started")
        inputs = [indices, positions, segments, *masks, *labels]
        bert_infer_loop(args, session, dataset, inputs, logits, anchors,
                        labels, predictions, losses, iteration)
        device.detach()
    else:
        if not args.no_training:
            optimizer_factory = ScheduledOptimizerFactory(
                args, iteration, model.tensors)

            session, anchors = bert_training_session(model, args, data_flow,
                                                     losses, device,
                                                     optimizer_factory)
            logger.info("Training Started")
            bert_train_loop(args, session, writer, dataset, labels,
                            predictions, losses, anchors, iteration,
                            optimizer_factory)

            device.detach()
            logger.info("Training Finished")

    return session, iteration
示例#29
0
def train(opts):
    if opts.fix_seed:
        print('Fixing the seed for result reproducibility')
        np.random.seed(0)
    train_data, train_labels, test_data, test_labels = load_mnist(
        opts.data_folder)
    # Limit batches_per_step so the test set isn't evaluated more than once.
    max_value = len(test_data) // opts.batch_size
    if max_value < opts.batches_per_step:
        print("(batches-per-step * batch-size) is larger than test set!\n"
              " Reduced batches-per-step to: {}\n".format(max_value))
        opts.batches_per_step = max_value
    training_set = DataSet(opts.batch_size, opts.batches_per_step, train_data,
                           train_labels)
    test_set = DataSet(opts.batch_size, opts.batches_per_step, test_data,
                       test_labels)
    print("Creating ONNX model.")
    model = MNIST_model(hidden_size=opts.hidden_size)
    proto, data_in, labels_in, output, loss = model.create_proto(
        opts.batch_size)
    # Describe how to run the model
    anchor_desc = {
        output: popart.AnchorReturnType("ALL"),
        loss: popart.AnchorReturnType("ALL")
    }
    dataFlow = popart.DataFlow(opts.batches_per_step, anchor_desc)
    # Options
    userOpts = popart.SessionOptions()
    # The validation graph by default will be optimized to change all variables to constants
    # This prevents that, which allows for checkpoints to be loaded into the model without recompiling
    userOpts.constantWeights = False
    # Enable auto-sharding
    if opts.num_ipus > 1:
        userOpts.virtualGraphMode = popart.VirtualGraphMode.Auto
    # Enable pipelining
    if opts.pipeline:
        userOpts.enablePipelining = True
    userOpts.separateCallOpPdfs = False
    device = get_device(opts.num_ipus, opts.simulation)
    training = init_session(proto,
                            loss,
                            dataFlow,
                            userOpts,
                            device,
                            training=True)
    validation = init_session(proto,
                              loss,
                              dataFlow,
                              userOpts,
                              device,
                              training=False)
    print("Running training loop.")
    for i in range(opts.epochs):
        # Training
        training.session.weightsFromHost()
        for step, (data, labels) in enumerate(training_set):
            stepio = popart.PyStepIO({
                data_in: data,
                labels_in: labels
            }, training.anchors)
            training.session.run(
                stepio, 'Epoch ' + str(i) + ' training step' + str(step))
        aggregated_loss = 0
        aggregated_accuracy = 0
        training.session.modelToHost('ckpt.onnx')
        validation.session.resetHostWeights('ckpt.onnx')
        validation.session.weightsFromHost()
        # Evaluation
        for step, (data, labels) in enumerate(test_set):
            stepio = popart.PyStepIO({
                data_in: data,
                labels_in: labels
            }, validation.anchors)
            validation.session.run(
                stepio, 'Epoch ' + str(i) + ' evaluation step ' + str(step))
            # Loss
            aggregated_loss += np.mean(validation.anchors[loss])
            # Accuracy
            results = np.argmax(
                validation.anchors[output].reshape(
                    [test_set.inputs_per_step, 10]), 1)
            num_correct = np.sum(
                results == labels.reshape([test_set.inputs_per_step]))
            aggregated_accuracy += num_correct / test_set.inputs_per_step
        # Log statistics
        aggregated_loss /= len(test_set)
        aggregated_accuracy /= len(test_set)
        print("Epoch #{}".format(i + 1))
        print("   Loss={0:.4f}".format(aggregated_loss))
        print("   Accuracy={0:.2f}%".format(aggregated_accuracy * 100))
示例#30
0
import cmdline
from popart.torch import torchwriter
#we require torch in this file to create the torch Module
import torch

args = cmdline.parse()

nInChans = 3
nOutChans = 10
batchSize = 2
batchesPerStep = 4
anchors = {
    "loss": popart.AnchorReturnType("EveryN", 2),
    "image0": popart.AnchorReturnType("All")
}
dataFlow = popart.DataFlow(batchesPerStep, anchors)
inputShapeInfo = popart.InputShapeInfo()
inputShapeInfo.add("image0",
                   popart.TensorInfo("FLOAT", [batchSize, nInChans, 32, 32]))
inputShapeInfo.add("image1",
                   popart.TensorInfo("FLOAT", [batchSize, nInChans, 32, 32]))
inputShapeInfo.add("label", popart.TensorInfo("INT32", [batchSize]))
inNames = ["image0", "image1"]
cifarInIndices = {"image0": 0, "image1": 0, "label": 1}
outNames = ["loss"]

willowOptPatterns = popart.Patterns(popart.PatternsLevel.All)


def nllloss(logprobs, targets):
    targets = targets.unsqueeze(1)