Exemplo n.º 1
0
def test_save_tensors_externally():
    d1 = np.array([1, -1, 6]).astype(np.float32)
    d2 = np.array([7, 4]).astype(np.float16)
    builder = popart.Builder()
    i1 = builder.addInitializedInputTensor(d1)
    i2 = builder.addInitializedInputTensor(d2)
    o = builder.aiOnnx.add([i1, i2])
    tmpdir = tempfile.mkdtemp()

    def checkFile(file):
        # Check file exists
        assert os.path.exists(file)

        # Check file is of expected size: (3 * 4) + (2 * 2) = 16 bytes
        assert os.path.getsize(file) == 16

        # Read the binary data back in and check the value is as expected
        assert np.array_equal(np.fromfile(file, dtype=np.float32, count=3), d1)
        assert np.array_equal(
            np.fromfile(file, dtype=np.float16, count=2, offset=12), d2)

    # Test GraphTransformer
    tmpfile0 = os.path.join(tmpdir, "model_tensors0.onnx")
    graph_transformer = popart.GraphTransformer(builder.getModelProto())
    graph_transformer.saveInitializersExternally([i1, i2], tmpfile0)
    checkFile(tmpfile0)

    # Test Builder
    tmpfile1 = os.path.join(tmpdir, "model_tensors1.onnx")
    builder.saveInitializersExternally([i1, i2], tmpfile1)
    checkFile(tmpfile1)
Exemplo n.º 2
0
def test_convert_all_fixed_point_initializers_to_constants(tmpdir):
    builder = popart.Builder()

    i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", [2, 3]))
    i2 = builder.addInitializedInputTensor(np.array([1, 6], dtype=np.int64))
    o1 = builder.aiOnnx.reshape([i1, i2])

    i3 = builder.addInputTensor(popart.TensorInfo("FLOAT", [3, 2]))
    i4 = builder.addInitializedInputTensor(np.array([1, 6], dtype=np.int64))
    o2 = builder.aiOnnx.reshape([i3, i4])

    o = builder.aiOnnx.add([o1, o2])
    builder.addOutputTensor(o)

    graph_transformer = popart.GraphTransformer(builder.getModelProto())
    graph_transformer.convertAllFixedPointInitializersToConstants()

    builder = popart.Builder(graph_transformer.getModelProto())

    ids = builder.getInputTensorIds()
    assert (i1 in ids)
    assert (i2 not in ids)

    assert (i3 in ids)
    assert (i4 not in ids)
Exemplo n.º 3
0
def graph_builder():
    model_path = ("models" if not FLAGS.model_path else FLAGS.model_path)
    proto = f"{model_path}/{FLAGS.model_name}/model_{FLAGS.micro_batch_size}.onnx"

    builder = popart.Builder(proto,
                             opsets={
                                 "ai.onnx": 10,
                                 "ai.onnx.ml": 1,
                                 "ai.graphcore": 1
                             })
    input_id = builder.getInputTensorIds()[0]

    if FLAGS.synthetic:

        input_shape = [int(FLAGS.micro_batch_size), 3, 224, 224]
        data = {
            input_id: np.random.normal(0, 1, input_shape).astype(np.float16)
        }

        if FLAGS.batches_per_step > 1:
            data = {
                k: np.repeat(v[np.newaxis], FLAGS.batches_per_step, 0)
                for k, v in data.items()
            }

    else:
        data = load_dataset([input_id])
    output_id = builder.getOutputTensorIds()[0]
    output = {output_id: popart.AnchorReturnType("ALL")}

    list_of_convolution_ids = []
    graph_proto = onnx.load(proto).graph
    # get list of all IDs of outputs of convolutions. We need these so we can adjust their memory proportions from the default
    for i in range(len(graph_proto.node)):
        if graph_proto.node[i].op_type == 'Conv':
            list_of_convolution_ids.append(graph_proto.node[i].output[0])
    memoryProportion = 0.3
    if FLAGS.batch_size == 5:
        memoryProportion = 0.26
    for id in list_of_convolution_ids:
        builder.setAvailableMemoryProportion(id, memoryProportion)
    proto = builder.getModelProto()
    graph_transformer = popart.GraphTransformer(proto)
    graph_transformer.convertFloatsToHalfs()
    return graph_transformer.getModelProto(), data, output, output_id
Exemplo n.º 4
0
def compile_and_run(image_input, image_output, onnx_model):

    img_data = load_image(image_input)

    # create graph transformer using .onnx file.
    print(onnx_model)
    builder = popart.Builder(onnx_model)

    input_ = builder.getInputTensorIds()[0]
    output = builder.getOutputTensorIds()[0]

    print("Input:", input_, "Output:", output)
    graph_transformer = popart.GraphTransformer(onnx_model)
    graph_transformer.convertAllFixedPointInitializersToConstants()

    # Create forward pass session
    session = popart.InferenceSession(
        fnModel=graph_transformer.getModelProto(),
        dataFlow=popart.DataFlow(1, {output: popart.AnchorReturnType("All")}),
        deviceInfo=popart.DeviceManager().acquireAvailableDevice(1))

    # Compile graph
    print("Compiling...")
    session.prepareDevice()

    # Create buffers to receive results from the execution
    inferenceAnchors = session.initAnchorArrays()
    stepio = popart.PyStepIO({input_: img_data.copy()}, inferenceAnchors)

    # Run the inference graph
    session.run(stepio)

    total_execution_cycles(session.getExecutionReport())
    total_tile_sizes(session.getGraphReport())

    data_out = inferenceAnchors[output]

    save_image(np.squeeze(data_out), image_output)
Exemplo n.º 5
0
def test_convert_initializers_to_constants(tmpdir):
    builder = popart.Builder()

    i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", [2, 3]))
    i2 = builder.addInitializedInputTensor(np.array([1, 6], dtype=np.int64))

    o = builder.aiOnnx.reshape([i1, i2])

    builder.addOutputTensor(o)

    graph_transformer = popart.GraphTransformer(builder.getModelProto())
    graph_transformer.convertInitializersToConstants([i2])

    with pytest.raises(popart.popart_exception) as e_info:
        graph_transformer.convertInitializersToConstants(["unknown"])
    assert (e_info.value.args[0] ==
            "TensorId unknown not in the model initalizers")

    builder = popart.Builder(graph_transformer.getModelProto())

    ids = builder.getInputTensorIds()
    assert (i1 in ids)
    assert (i2 not in ids)
Exemplo n.º 6
0
    def saveModel(self, fnModel):
        print("Writing ONNX model to protobuf file %s" % (fnModel, ))
        # jump into eval mode, just to write the onnx model.
        # note that this might do strange things with batch-normalisation (?)
        self.module.eval()

        inputDataInfos = [self.inputShapeInfo.get(tid) for tid in self.inNames]
        inputData = []
        containsint64 = False
        for info in inputDataInfos:
            shape = info.shape()
            dt = info.data_type_lcase()
            if dt == "int32":
                dt = "int64"  # torch labels must be 'long'
                containsint64 = True
            inputData.append(torch.from_numpy(np.ones(shape=shape, dtype=dt)))

        torch.onnx.export(self.module,
                          inputData,
                          fnModel,
                          verbose=False,
                          input_names=self.inNames,
                          output_names=self.outNames)

        # If the model contains 'long' tensors (e.g. in case of exporting
        # nllloss), they must be converted to int32
        # Note: in models with reshape ops, the 'shape' tensor will be converted
        # to int32 by the blanket conversion. This leads to a technically invalid
        # onnx model. So we only convert when we know we definitely have int64 tensors.
        if containsint64:
            graph_transformer = popart.GraphTransformer(fnModel)
            graph_transformer.convertINT64ToINT32()
            graph_transformer.convertAllFixedPointInitializersToConstants()
            proto = graph_transformer.getModelProto()
            popart.Builder(proto).saveModelProto(fnModel)

        onnx.checker.check_model(fnModel)
Exemplo n.º 7
0
def _run_impl(torchWriter, patterns, outputdir, cifarInIndices, device,
              device_hw_id, mode, syntheticData, transformations, epochs,
              printAnchorArrays):

    runIds = [-1] + [
        int(x.split("runId")[1].split("_")[0])
        for x in os.listdir(outputdir) if "runId" in x
    ]
    baseId = 1 + max(runIds)

    def getFnModel(framework, epoch):
        return os.path.join(
            outputdir,
            "runId%d_%sModel_epoch%s.onnx" % (baseId, framework, epoch))

    def getFnPopArt(epoch):
        return getFnModel("PopArt", epoch)

    def getFnTorch(epoch):
        return getFnModel("Torch", epoch)

    def getFnModel0():
        return os.path.join(outputdir, "runId%d_model0.onnx" % (baseId, ))

    dataFlow = torchWriter.dataFlow
    inputShapeInfo = torchWriter.inputShapeInfo
    validModes = ["infer", "train"]
    if mode not in validModes:
        raise Exception("mode must be one of " + str(validModes))

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    # determine what the data directory is
    datadir = "unset"

    dir_path = os.path.dirname(os.path.realpath(__file__))
    path_c10datadir = os.path.join(dir_path, "c10datadir.py")
    if os.path.exists(path_c10datadir):
        import c10datadir
        datadir = c10datadir.c10datadir
    else:
        tmpdir = tempfile.gettempdir()
        datadir = os.path.abspath(os.path.join(tmpdir, 'cifar10data'))
    print("Using datadir=%s" % (datadir))

    if (not os.path.exists(datadir)):
        print(
            "Specified datadir %s does not exist. Consider making it here with os.mkdir(datadir)"
            % (datadir, ))

    print("c10driver: getting data from", datadir)
    trainset = datasets.CIFAR10(root=datadir,
                                train=True,
                                download=False,
                                transform=transform)

    fnModel0 = getFnModel0()

    # write ONNX Model to file
    torchWriter.saveModel(fnModel=fnModel0)

    stepLoader = torch.utils.data.DataLoader(
        trainset,
        # the amount of data loaded for each step.
        # note this is not the batch size, it's the "step" size
        # (samples per step)
        batch_size=torchWriter.samplesPerBatch * dataFlow.batchesPerStep(),
        #non-random data loading
        shuffle=False,
        num_workers=0)

    deviceManager = popart.DeviceManager()

    # Create a CPU device
    if device == "cpu":
        device = deviceManager.createCpuDevice()

    # Create an IPU Model device
    elif device == "ipu_model":

        options = {"compileIPUCode": True, 'numIPUs': 1, 'tilesPerIPU': 4}
        device = deviceManager.createIpuModelDevice(options)

    # Create an Simulator
    elif device == "sim":
        options = {"numIpus": 1, "tilesPerIPU": 4}
        device = deviceManager.createSimDevice(options)

    # Get a Hardware Device
    elif device == "hw":
        # Get a hardware device that meets the reqirements,
        # may throw if none are available.
        # Will attach to the device
        if device_hw_id:
            device = deviceManager.acquireDeviceById(device_hw_id)
        else:
            device = tu.acquire_ipu()

    # Enumerate available devices
    print("Enumerating devices")
    print("-------------------------------------")
    for idx, d in enumerate(deviceManager.enumerateDevices()):
        print('{0}. {1}'.format(idx, d))
    print("")

    opts = popart.SessionOptions()
    opts.logDir = outputdir
    if syntheticData == True:
        opts.syntheticDataMode = popart.SyntheticDataMode.RandomNormal

    modelProtoX = fnModel0
    if transformations:
        gc = popart.GraphTransformer(fnModel0)
        for transformation in transformations:
            print("Running %s transformation pass" % (transformation, ))
            if transformation == "removeUnusedInputs":
                gc.removeUnusedInputs()

            elif transformation == "prepareNodesForTraining":
                gc.prepareNodesForTraining()

            else:
                raise RuntimeError("Unrecognised transformation %s" %
                                   (transformation, ))

        modelProtoX = gc.getModelProto()

    # Reads ONNX model from file and creates backwards graph,
    # performs Ir optimisations

    if mode == 'infer':
        session = popart.InferenceSession(fnModel=modelProtoX,
                                          inputShapeInfo=inputShapeInfo,
                                          dataFlow=dataFlow,
                                          patterns=patterns,
                                          userOptions=opts,
                                          deviceInfo=device)
    else:
        if len(torchWriter.outNames) != 1:
            raise RuntimeError("Expecting single scalar loss tensor")

        # Append output with an identity loss, to reduce to scalar if
        # necessary
        bder = popart.Builder(modelProtoX)
        loss = bder.aiGraphcore.identityloss(
            [torchWriter.outNames[0]], reduction=popart.ReductionType.Sum)
        session = popart.TrainingSession(fnModel=bder.getModelProto(),
                                         inputShapeInfo=inputShapeInfo,
                                         dataFlow=dataFlow,
                                         loss=loss,
                                         optimizer=torchWriter.optimizer,
                                         patterns=patterns,
                                         userOptions=opts,
                                         deviceInfo=device)

    # get the tensor info for the anchors
    anchorArrays = session.initAnchorArrays()

    allDotPrefixes = [x[0:-4] for x in os.listdir(outputdir) if ".dot" in x]
    print("Will generate graph pdfs for all of:")
    print(allDotPrefixes)
    import subprocess
    # set generateFromDots to True to
    # generate pdf figures of the Ir. It
    # requires the 'dot' program
    generateFromDots = False
    if generateFromDots:
        for name in allDotPrefixes:
            dotfile = os.path.join(outputdir, "%s.dot" % (name, ))
            outputfile = os.path.join(outputdir, "%s.pdf" % (name, ))
            log = subprocess.call(
                ["dot", "-T", "pdf", "-o", outputfile, dotfile])
            print("Exit status on `%s' was: %s" % (name, log))

    print("Setting device to IPU, and preparing it")
    session.prepareDevice()

    if mode == "train":
        print("Writing weights to device")
        session.weightsFromHost()

        print("Writing Optimizer tensors to device, if there are any")

    def addStepDimension(data, batchesPerStep):
        if batchesPerStep == 1:
            return data
        else:
            dataShape = np.array(np.shape(data))
            dataShape[0] //= batchesPerStep
            dataShape = np.insert(dataShape, 0, batchesPerStep)
            return np.reshape(data, dataShape)

    def reportTensorError(tensorInd, result):
        reportStr = str(tensorInd) + " :\n"
        reportStr += "  |pA - tA|^2 / (|pA||tA| + 1e-8)  = " + str(
            result) + "\n"
        return reportStr

    def getAnchorTensor(tId, anchorArrays):
        assertStr = "Tensor" + tId + " must be specified as an anchor"
        assert (tId in anchorArrays.keys()), assertStr
        return anchorArrays[tId]

    def subsampleBatches(array, refShape):
        arrayShape = np.shape(array)

        # Every Nth batch
        if len(arrayShape) == len(refShape):
            n = arrayShape[0] // refShape[0]
            return array[n - 1::n]

        # Last batch only
        else:
            return array[-1]

    def getTensorError(tA, pA):
        # pA, tA are corresponding tensors from two models
        pA_shape = np.shape(pA)
        tA_shape = np.shape(tA)
        assert (pA_shape == tA_shape), "Arrays must be same shape"

        ss_err = np.sum((np.array(pA) - np.array(tA))**2)
        ss_pA = np.sum(np.array(pA)**2)
        ss_tA = np.sum(np.array(tA)**2)
        return ss_err / (math.sqrt(ss_pA * ss_tA) + 1.0e-8)

    def checkResult(result, margin):
        if np.isnan(result):
            raise TestFailureError(str(result) + " is NaN")
        elif (result > margin):
            raise TestFailureError(
                str(result) + " is greater than " + str(margin))

    margin = 5.0e-7
    numReports = []

    for epoch in range(epochs):  # loop over the dataset multiple times
        print("Epoch is %d" % (epoch, ))
        stepData = next(iter(stepLoader))

        # Form the input map for one step's worth of data.
        # Note: data from the torch DataLoader has shape:
        #   [stepSize * batchSize, sampleShape]
        # whereas Popart expects input data of the shape:
        #   [stepSize, batchSize, sampleShape]
        # so we reshape the input array before passing to the stepio
        inputs = {}
        for tenId in cifarInIndices.keys():
            inputs[tenId] = \
                addStepDimension(stepData[cifarInIndices[tenId]].numpy(),
                                 session.dataFlow.batchesPerStep())

        if mode == "train":
            # take batchesPerStep passes (1 step), Torch
            torchWriter.train(inputs)

            # take batchesPerStep passes (1 step), PopArt
            pystepio = popart.PyStepIO(inputs, anchorArrays)
            session.run(pystepio)

            if printAnchorArrays:
                print(
                    "\nAnchor arrays (being printed as printAnchorArrays==True):"
                )
                for name in anchorArrays.keys():
                    arr = anchorArrays[name]
                    print("\nAnchored Array Name=", name, " and Size=",
                          arr.size)

                    if (arr.size < 10):
                        print("\nArray (of size < 10) values are")
                        print(arr)

                    if len(arr.shape) > 1:
                        for i, slice0 in enumerate(arr):
                            print("Sum along axis %d is Sum=%.15f" %
                                  (i, slice0.sum()))

                    print("Total Sum is %.15f" % (arr.sum()))

            # write models to file
            fnTorchModel = getFnTorch(epoch)
            fnPopArtModel = getFnPopArt(epoch)
            torchWriter.saveModel(fnTorchModel)
            session.modelToHost(fnPopArtModel)
            print("Writing models to " + fnTorchModel + " and " +
                  fnPopArtModel)

            # Compare parameters from updated Onnx models
            print("Obtaining popart NumericsReport, A: Torch, B: Popart.")
            if epoch is 0:
                nr = popart.NumericsReport(fnModel0, fnTorchModel, fnModel0,
                                           fnPopArtModel)
            else:
                nr = popart.NumericsReport(getFnTorch(epoch - 1), fnTorchModel,
                                           getFnPopArt(epoch - 1),
                                           fnPopArtModel)

            print(nr.fullReport())
            # One relative error calculated per weight tensor
            for tId, relerror in nr.getRelativeErrors().items():
                checkResult(relerror, margin)

        elif mode == "infer":
            # take batchesPerStep passes (1 step), Torch
            # returns map of outputs for each sample
            # Note: already are of dimension matching the
            # anchors
            torchOutputs = torchWriter.infer(inputs)

            # take batchesPerStep passes (1 step), PopArt
            pystepio = popart.PyStepIO(inputs, anchorArrays)
            session.run(pystepio)

            # Compare torch outputs tensors with popart output from
            # anchor tensor maps
            for nInd, outName in enumerate(torchWriter.outNames):
                # Torch outputs returned for all samples, whereas
                # anchors are returned as specified by the user.
                # Subsample torch outputs to match dimensions
                torchOuput = subsampleBatches(torchOutputs[outName],
                                              np.shape(anchorArrays[outName]))
                result = getTensorError(torchOuput, anchorArrays[outName])
                print(reportTensorError(nInd, result))
                checkResult(result, margin)

    return anchorArrays
Exemplo n.º 8
0
import popart

if len(sys.argv) != 2:
    raise RuntimeError("onnx model file name expected as argument")

model_file = sys.argv[1]

opts = popart.SessionOptions()
opts.logging = {'all': 'TRACE'}
options = {"compileIPUCode": True, 'numIPUs': 1}

# currently, with both Cpu and IpuModel, we have outstanding tasks
# T6384 and T6405, about the conv planner failing (after ~15 mins with Cpu)
device = popart.DeviceManager().createCpuDevice()
#createIpuModelDevice(options)

# TODO: change to not use builder when T6675 is complete
builder = popart.Builder(model_file)
graph_transformer = popart.GraphTransformer(builder.getModelProto())
graph_transformer.convertAllFixedPointInitializersToConstants()

#specific to the task, this output might need changing
output = builder.getOutputTensorIds()[0]
dataFlow = popart.DataFlow(1, {output: popart.AnchorReturnType("All")})

s = popart.Session(graph_transformer.getModelProto(),
                   dataFlow=dataFlow,
                   userOptions=opts)
s.setDevice(device)
s.prepareDevice()
Exemplo n.º 9
0
def test_manual_serialization():

    # Basic model:
    #
    #  X: data input if shape (N, C0)
    #  W: weight input of shape (C0, C1)
    #
    #  Y    = matmul(X, W)
    #  Z    = relu(Y)
    #  loss = l1Loss(Z)
    #
    # With array dimensions

    N = 12
    C0 = 244
    C1 = 286

    # In this test, we manually serialise the matmul, converting
    # matmul ((N,C0) , (C0,C1))
    #
    # into a sequence of factor-f smaller matmuls
    # matmul (N,C0/f),(C0/f,C1))
    #
    # reapeated and accumulated f times, where f is

    f = 4
    assert (C0 % f == 0)

    # Constructing the model

    builder = popart.Builder()
    # NOTE: T22702 For some seeds this test fails.
    np.random.seed(0)
    wVals = np.array(npr.randn(C0, C1), dtype=np.float32)
    W = builder.addInitializedInputTensor(wVals)
    xInfo = popart.TensorInfo("FLOAT", [N, C0])
    X = builder.addInputTensor(xInfo)
    axesV = np.array([0, 1]).astype(np.int32)
    axes = builder.addInitializedInputTensor(axesV)

    for i in range(f):
        # the lower index of the i'th slice
        lwr = int(i * C0 / f)

        # the upper index of the i'th slice
        upp = int((i + 1) * C0 / f)

        # Take a slice of size (N,C0/f) out of X
        s0 = builder.addInitializedInputTensor(
            np.array([0, lwr]).astype(np.int32))
        e0 = builder.addInitializedInputTensor(
            np.array([N, upp]).astype(np.int32))
        X_slice = builder.aiOnnx.slice([X, s0, e0, axes])

        # Take a slice of size (C0/f,C1) out of W
        s1 = builder.addInitializedInputTensor(
            np.array([lwr, 0]).astype(np.int32))
        e1 = builder.addInitializedInputTensor(
            np.array([upp, C1]).astype(np.int32))
        W_slice = builder.aiOnnx.slice([W, s1, e1, axes])

        # Multiply the slices together, and accumulate as necessary
        mm_part = builder.aiOnnx.matmul([X_slice, W_slice])
        if i == 0:
            Y = mm_part

        else:
            Y = builder.aiOnnx.add([mm_part, Y])

    # Finally, the non-linearity
    Z = builder.aiOnnx.relu([Y])

    # This boiler-plate is currently necessary with opset-10 slice
    graph_transformer = popart.GraphTransformer(builder.getModelProto())
    graph_transformer.convertAllFixedPointInitializersToConstants()
    builder = popart.Builder(graph_transformer.getModelProto())

    l1 = builder.aiGraphcore.l1loss([Z], 0.2)
    dataFlow = popart.DataFlow(1, {})
    device = tu.create_test_device()
    userOptions = popart.SessionOptions()

    # To obtain the final dot graph, uncomment this:
    # userOptions.dotChecks = {"Final"};

    patterns = popart.Patterns()

    session = popart.TrainingSession(fnModel=builder.getModelProto(),
                                     dataFlow=dataFlow,
                                     optimizer=popart.SGD(
                                         {"defaultLearningRate": (0.1, True)}),
                                     loss=l1,
                                     patterns=patterns,
                                     userOptions=userOptions,
                                     deviceInfo=device)
    session.prepareDevice()
    session.weightsFromHost()

    inputVals = np.array(npr.randn(1 * N * C0), dtype=np.float32)
    stepio = popart.PyStepIO({X: inputVals}, {})
    session.run(stepio)
    session.weightsToHost()
    w0R = np.array(-777.0 * np.ones(C0 * C1), dtype=np.float32)
    weightsRead = popart.PyWeightsIO({W: w0R})
    session.readWeights(weightsRead)

    # A pytorch version to confirm numerical correctness:
    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.w0 = torch.nn.Parameter(torch.from_numpy(wVals.copy()))

        def forward(self, x):
            return torch.relu(torch.matmul(x, self.w0))

    net = Net()
    optimizer = optim.SGD(net.parameters(), lr=0.1)

    out = net(torch.from_numpy(inputVals.reshape([N, C0])))
    loss = 0.2 * torch.mean(torch.abs(out))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    baseline0 = np.sum(
        np.abs(net.w0.detach().numpy().flatten() - wVals.flatten()))
    baseline1 = np.sum(np.abs(w0R - wVals.flatten()))
    error = np.sum(np.abs(np.abs(net.w0.detach().numpy().flatten() - w0R)))
    assert (error / (baseline0 + baseline1) < 1e-6)
Exemplo n.º 10
0
def test_type_cast_INT64ToINT32_clip():
    """
    The model:

                     starts,ends,axes (int64)
                             |
    t0 -----------.------- Slice -- t1 ------.
                  |                          |
    indices ---- Gather ----------- t2 ----- Add - o
     (int64)

    - Gather takes two inputs:
      - Constant int64 'indices'
      - 't0'. A data input
    - It cannot be evaluated on host by the const expression util, as it takes
      a variable input
    - The IPU does not support int64. Therefore we must convert the int64
      tensors of the onnx model to int32
    - But conversion is only possible if all int64 tensor data is within the
      range of int32, unless we clip the tensor data to int32's numeric limits
    - In this case the 'starts' tensor of the slice is valid according to the
      onnxspec, but out of range of int32.
    - But we know in this case it is safe to clip it, as we will still get the
      same result
    """
    d1 = np.array([[-1, -2, -3], [4, 5, 6], [7, 8, 9]]).astype(np.float32)
    d2 = np.array([0, 1]).astype(np.int64)
    axis = 0

    axesV = np.array([0], dtype=np.int64)
    # Out of range value for int32!
    startsV = np.array([-9223372036854775807], dtype=np.int64)
    endsV = np.array([2], dtype=np.int64)

    builder = popart.Builder()

    i1 = builder.addInputTensor("FLOAT", d1.shape)
    i2 = builder.addInputTensor("INT64", d2.shape)
    g = builder.aiOnnx.gather([i1, i2], axis)

    axes = builder.aiOnnx.constant(axesV)
    starts = builder.aiOnnx.constant(startsV)
    ends = builder.aiOnnx.constant(endsV)
    s = builder.aiOnnx.slice([i1, starts, ends, axes])

    o = builder.aiOnnx.add([g, s])

    int64_proto = builder.getModelProto()
    graph_transformer = popart.GraphTransformer(int64_proto)
    graph_transformer.convertINT64ToINT32(clip=True)
    int32_proto = graph_transformer.getModelProto()

    session = popart.InferenceSession(fnModel=int32_proto,
                                      dataFlow=popart.DataFlow(1, [o]),
                                      deviceInfo=tu.create_test_device())
    session.prepareDevice()

    anchors = session.initAnchorArrays()
    stepio = popart.PyStepIO({i1: d1, i2: d2}, anchors)
    session.run(stepio)

    reference = 2 * np.take(d1, d2, axis=axis)
    assert np.allclose(anchors[o], reference)
Exemplo n.º 11
0
        start = time.perf_counter()

        model(dummy_input)

        end = time.perf_counter()

        elapsed = end - start
        timePerEvent = elapsed / n

        print("{:.12f}".format(elapsed))

    else:

        # POPART IMPORT

        graph_transformer = popart.GraphTransformer(onnx_model)

        anchors = {"tag": popart.AnchorReturnType("ALL")}
        dataFeed = popart.DataFlow(1, anchors)
        device = popart.DeviceManager().acquireAvailableDevice(1)

        session = popart.InferenceSession(graph_transformer.getModelProto(),
                                          dataFeed, device)

        session.prepareDevice()

        inferenceAnchors = session.initAnchorArrays()

        inputs = np.random.rand(n, nFeatures, nTracks, 1).astype(np.float32)

        stepio = popart.PyStepIO({"data": inputs}, inferenceAnchors)
Exemplo n.º 12
0
def test_type_cast_BFloatToFloat():

    # Build an onnx proto with a single constant node
    # Create output tensors of the type to cast
    X = helper.make_tensor_value_info('X', TensorProto.BFLOAT16, [3, 2])
    Y = helper.make_tensor_value_info('Y', TensorProto.BFLOAT16, [3, 2])

    # Define the target values as float32 and cast to bytes
    float_values = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6]).astype(np.float32)
    float_bytes = float_values.tobytes()

    # Reinterpret byte string as int16 values. That way we have split the floats
    # in 2 sets of 16bits
    int16_values = np.frombuffer(float_bytes, dtype=np.uint16)

    # Keep only the second 2 bytes of each float (for some reason it seems
    # that np.array.tobytes() puts the fractional bytes first), ie every other int16
    # and convert back to bytes. We should now have some bfloat which values
    # are close enough to the original floats (precision loss of around 5e-3)
    bfloat_as_int16 = int16_values[1::2]
    bfloat = bfloat_as_int16.tobytes()

    # This data is generated to check against to make sure that we actually get
    # the same "truncated" data with our method
    bfloat_values = np.frombuffer(bfloat, dtype=np.uint16)
    int16_from_bfloat = bfloat_values
    for i in range(6):
        int16_from_bfloat = np.insert(int16_from_bfloat, 5 - i, 0)

    float_again_bytes = np.array(int16_from_bfloat).tobytes()
    float_again = np.frombuffer(float_again_bytes, dtype=np.float32)

    node_def = onnx.helper.make_node('ConstantOfShape',
                                     inputs=['X'],
                                     outputs=['Y'],
                                     value=onnx.helper.make_tensor(
                                         name='const_tensor',
                                         data_type=onnx.TensorProto.BFLOAT16,
                                         dims=[3, 2],
                                         vals=bfloat,
                                         raw=True))

    # Create the graph (GraphProto)
    graph_def = helper.make_graph(
        [node_def],
        'test-model',
        [X],
        [Y],
    )

    # Create the model (ModelProto)
    onnx_model = helper.make_model(graph_def)

    # Make sure the opset version is version 9 (by default it would be 11
    # which would crash subsequent function calls)
    onnx_model = version_converter.convert_version(onnx_model, 9)

    # Compile the model to an onnx graph
    onnx.save_model(onnx_model, "type_test.onnx")

    # Load proto into a graph transfomer and apply cast
    graph_transformer = popart.GraphTransformer("type_test.onnx")
    graph_transformer.convertBFloats16ToFloat32()

    # Retrieve modeified graph proto
    proto = graph_transformer.getModelProto()
    popart.Builder(proto).saveModelProto("type_test_modified.onnx")

    # Load the model as an onnx model again
    # modified_onnx_model = onnx.load(proto)
    modified_onnx_model = onnx.load("type_test_modified.onnx")

    # Make sure the graph is still good
    onnx.checker.check_model(modified_onnx_model)

    # Get only the first input of the input array (there should only be one)
    i = modified_onnx_model.graph.input[0]
    o = modified_onnx_model.graph.output[0]

    input_type = i.type.tensor_type
    output_type = o.type.tensor_type

    # Make sure shapes remain untouched
    assert (input_type.HasField("shape")
            ), "Modified graph output has no shape attribute"
    assert (output_type.HasField("shape")
            ), "Modified graph output has no shape attribute"
    assert (input_type.shape.dim[0].dim_value == 3
            ), "Dimensions were not conserved by cast"
    assert (input_type.shape.dim[1].dim_value == 2
            ), "Dimensions were not conserved by cast"
    assert (output_type.shape.dim[0].dim_value == 3
            ), "Dimensions were not conserved by cast"
    assert (output_type.shape.dim[1].dim_value == 2
            ), "Dimensions were not conserved by cast"

    # Test whether the new tensor has the right size
    assert (len(
        modified_onnx_model.graph.node[0].attribute[0].t.float_data) == 6
            ), "Wrong number of Bytes in casted version."

    # Retrieve the two constant tensors and compare the values
    assert np.allclose(
        modified_onnx_model.graph.node[0].attribute[0].t.float_data,
        float_values,
        rtol=1e-2), "Data was not conserved by cast"
    assert np.allclose(
        modified_onnx_model.graph.node[0].attribute[0].t.float_data,
        float_again), "Data was not conserved by cast"
Exemplo n.º 13
0
def test_type_cast_DoubleToFloat():

    # Build an onnx proto with a single constant node
    # Create output tensors of the type to cast
    X = helper.make_tensor_value_info('X', TensorProto.DOUBLE, [3, 2])
    Y = helper.make_tensor_value_info('Y', TensorProto.DOUBLE, [3, 2])

    values = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]).astype(np.double)
    node_def = onnx.helper.make_node(
        'ConstantOfShape',
        inputs=['X'],
        outputs=['Y'],
        value=onnx.helper.make_tensor(name='const_tensor',
                                      data_type=onnx.TensorProto.DOUBLE,
                                      dims=values.shape,
                                      vals=values.flatten().astype(np.double),
                                      raw=False))

    # Create the graph (GraphProto)
    graph_def = helper.make_graph(
        [node_def],
        'test-model',
        [X],
        [Y],
    )

    # Create the model (ModelProto)
    onnx_model = helper.make_model(graph_def)

    # Make sure the opset version is version 9 (by default it would be 11
    # which would crash subsequent function calls)
    onnx_model = version_converter.convert_version(onnx_model, 9)

    # Compile the model to an onnx graph
    onnx.save_model(onnx_model, "type_test.onnx")

    # Load proto into a graph transfomer and apply cast
    graph_transformer = popart.GraphTransformer("type_test.onnx")
    graph_transformer.convertDoublesToFloats()

    # Retrieve modeified graph proto
    proto = graph_transformer.getModelProto()
    popart.Builder(proto).saveModelProto("type_test_modified.onnx")

    # Load the model as an onnx model again
    # modified_onnx_model = onnx.load(proto)
    modified_onnx_model = onnx.load("type_test_modified.onnx")

    # Make sure the graph is still good
    onnx.checker.check_model(modified_onnx_model)

    # Get only the first input of the input array (there should only be one)
    i = modified_onnx_model.graph.input[0]
    o = modified_onnx_model.graph.output[0]

    input_type = i.type.tensor_type
    output_type = o.type.tensor_type

    # Make sure shapes remain untouched
    assert (input_type.HasField("shape")
            ), "Modified graph output has no shape attribute"
    assert (output_type.HasField("shape")
            ), "Modified graph output has no shape attribute"
    assert (input_type.shape.dim[0].dim_value == 3
            ), "Dimensions were not conserved by cast"
    assert (input_type.shape.dim[1].dim_value == 2
            ), "Dimensions were not conserved by cast"
    assert (output_type.shape.dim[0].dim_value == 3
            ), "Dimensions were not conserved by cast"
    assert (output_type.shape.dim[1].dim_value == 2
            ), "Dimensions were not conserved by cast"

    # Test whether the new tensor has the right size
    assert (len(
        modified_onnx_model.graph.node[0].attribute[0].t.float_data) == len(
            onnx_model.graph.node[0].attribute[0].t.double_data)
            ), "Wrong number of Bytes in casted version."

    # Retrieve the two constant tensors and compare the values
    assert np.allclose(
        modified_onnx_model.graph.node[0].attribute[0].t.float_data,
        values.flatten()), "Data was not conserved by cast"
Exemplo n.º 14
0
def test_type_cast_UINT8ToINT32():

    # Build an onnx proto with a single constant node
    # Create output tensors of the type to cast
    X = helper.make_tensor_value_info('X', TensorProto.UINT8, [3, 2])
    Y = helper.make_tensor_value_info('Y', TensorProto.UINT8, [3, 2])

    values = np.array([[1, 2], [3, 4], [5, 6]]).astype(np.uint8)
    # We use ConstantOfShape even if it is not followint the onnx
    # spec because we need an op that can store several actual values
    # in the graph to make sure the type conversion is succesful. Constant
    # would not work here becauseit has only been available since opset 12.
    # Constant-9 is not compatible with the default opset-version 11
    node_def = onnx.helper.make_node('ConstantOfShape',
                                     inputs=['X'],
                                     outputs=['Y'],
                                     value=onnx.helper.make_tensor(
                                         name='const_tensor',
                                         data_type=onnx.TensorProto.UINT8,
                                         dims=values.shape,
                                         vals=values.flatten().astype(
                                             np.uint8).tobytes(),
                                         raw=True))

    # Create the graph (GraphProto)
    graph_def = helper.make_graph(
        [node_def],
        'test-model',
        [X],
        [Y],
    )

    # Create the model (ModelProto)
    onnx_model = helper.make_model(graph_def)

    # Make sure the opset version is version 9 (by default it would be 11
    # which would crash subsequent function calls)
    onnx_model = version_converter.convert_version(onnx_model, 9)

    # Compile the model to an onnx graph
    onnx.save_model(onnx_model, "type_test.onnx")

    # Load proto into a graph transfomer and apply cast
    graph_transformer = popart.GraphTransformer("type_test.onnx")
    graph_transformer.convertUINT8ToINT32()

    # Retrieve modeified graph proto
    proto = graph_transformer.getModelProto()
    popart.Builder(proto).saveModelProto("type_test_modified.onnx")

    # Load the model as an onnx model again
    # modified_onnx_model = onnx.load(proto)
    modified_onnx_model = onnx.load("type_test_modified.onnx")

    # Make sure the graph is still good
    onnx.checker.check_model(modified_onnx_model)

    # Get only the first input of the input array (there should only be one)
    i = modified_onnx_model.graph.input[0]
    o = modified_onnx_model.graph.output[0]

    input_type = i.type.tensor_type
    output_type = o.type.tensor_type

    # Make sure shapes remain untouched
    assert (input_type.HasField("shape")
            ), "Modified graph output has no shape attribute"
    assert (output_type.HasField("shape")
            ), "Modified graph output has no shape attribute"
    assert (input_type.shape.dim[0].dim_value == 3
            ), "Dimensions were not conserved by cast"
    assert (input_type.shape.dim[1].dim_value == 2
            ), "Dimensions were not conserved by cast"
    assert (output_type.shape.dim[0].dim_value == 3
            ), "Dimensions were not conserved by cast"
    assert (output_type.shape.dim[1].dim_value == 2
            ), "Dimensions were not conserved by cast"

    # Test whether the new tensor has the right size
    assert (len(
        modified_onnx_model.graph.node[0].attribute[0].t.int32_data) == len(
            onnx_model.graph.node[0].attribute[0].t.raw_data)
            ), "Wrong number of Bytes in casted version."

    # Retrieve the two constant tensors and compare the values
    assert np.allclose(
        modified_onnx_model.graph.node[0].attribute[0].t.int32_data,
        values.flatten()), "Data was not conserved by cast"
Exemplo n.º 15
0
        start = time.perf_counter()

        net(dummy_input, dummy_init_hc)

        end = time.perf_counter()

        elapsed = end - start
        timePerEvent = elapsed / n

        print("{:.12f}".format(elapsed))

    else:

        # IMPORT INTO POPART

        graph_transformer = popart.GraphTransformer(export_name)

        inputShapeInfo = popart.InputShapeInfo()
        inputShapeInfo.add("data",
                           popart.TensorInfo("FLOAT", [n, nTracks, nFeatures]))
        inputShapeInfo.add("init_hc",
                           popart.TensorInfo("FLOAT", [1, n, nHidden]))

        anchors = {"tag": popart.AnchorReturnType("ALL")}
        dataFeed = popart.DataFlow(1, anchors)
        # device = popart.DeviceManager().createIpuModelDevice({})
        device = popart.DeviceManager().acquireAvailableDevice(1)

        session = popart.InferenceSession(
            graph_transformer.getModelProto(),
            dataFeed,
def _convert_floats_to_halfs(proto):
    graph_transformer = popart.GraphTransformer(proto)
    graph_transformer.convertFloatsToHalfs()
    return graph_transformer.getModelProto()