def run(enableOutlining): dsize = 10 ratio = 0.5 batches_per_step = 2 builder = popart.Builder() ip = builder.addInputTensor(popart.TensorInfo("FLOAT", [dsize, dsize])) d__ip = popart.reservedGradientPrefix() + ip def add_layer(in_id): w = builder.addInitializedInputTensor( np.ones([dsize, dsize], np.float32)) # w = builder.aiGraphcore.printtensor([w]) matmul_id = builder.aiOnnx.matmul([in_id, w]) return matmul_id m1 = add_layer(ip) m2 = add_layer(m1) m3 = add_layer(m2) anchorIds = [] for i in (ip, m1, m2, m3): anchorIds.append(popart.reservedGradientPrefix() + i) out = builder.aiGraphcore.identityloss([m3]) builder.addOutputTensor(out) device = tu.create_test_device() dfAnchors = {} for anchorId in anchorIds: dfAnchors.update({anchorId: popart.AnchorReturnType("All")}) opts = popart.SessionOptions() opts.enableOutlining = enableOutlining opts.subgraphCopyingStrategy = subgraphCopyingStrategy session = popart.TrainingSession( fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(batches_per_step, dfAnchors), optimizer=popart.ConstSGD(0.1), loss=out, patterns=popart.Patterns(popart.PatternsLevel.All), userOptions=opts, deviceInfo=device) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() ip_data = np.ones((batches_per_step, dsize, dsize), dtype=np.float32) stepio = popart.PyStepIO({ip: ip_data}, anchors) session.run(stepio) return anchors
def test_incomplete_grad(): # Reproducer for T37001, included as regression test. This test doesn't # actually check any assertions, it just ensure that a code path that # previously failed does not result in any exceptions. # # The problem originally revealed by this test was that an exception was # thrown if for some inputs of a fwd subgraph the backwards pass creator was # not able to create gradients for those inputs (for example for a seed # input). This problem was fixed in the code base by allowing subgraph # inputs in the fwd subgraph to not have an associated gradients outputs in # the associated bwd subgraph. def get_subgraph_builder(builder, weights, labels): subgraph_builder = builder.createSubgraphBuilder() subgraph_builder.addInputTensorFromParentGraph(weights) input = subgraph_builder.addInputTensor( popart.TensorInfo("FLOAT16", [4, 32, 1, 64])) subgraph_builder.addInputTensorFromParentGraph(labels) matmul_out = subgraph_builder.aiOnnx.matmul([input, weights]) log_probs = subgraph_builder.aiOnnx.logsoftmax([matmul_out], axis=3) log_probs_compact = subgraph_builder.aiOnnx.gather([log_probs, labels], axis=3) subgraph_builder.addOutputTensor(log_probs_compact) return subgraph_builder builder = popart.Builder() float16_input = builder.addInputTensor( popart.TensorInfo("FLOAT16", [4, 32, 1, 64]), "float16_input") int32_input = builder.addInputTensor(popart.TensorInfo("INT32", [4, 2]), "int32_input") weights = builder.addInitializedInputTensor(np.zeros([64, 64], np.float16), "weights") fn = get_subgraph_builder(builder, weights, int32_input) log_probs_compact = builder.aiGraphcore.call( [weights, float16_input, int32_input], 1, fn)[0] l1_loss = builder.aiGraphcore.l1loss([log_probs_compact], 1.0) optimizer = popart.SGD({ "defaultLearningRate": (0.1, False), "defaultWeightDecay": (0, True) }) training_session = popart.TrainingSession( builder.getModelProto(), loss=l1_loss, deviceInfo=popart.DeviceManager().createIpuModelDevice({}), optimizer=optimizer, dataFlow=popart.DataFlow(1, {}), userOptions=popart.SessionOptions())
def init_train_session(self, optimizer, loss): self.session = popart.TrainingSession(fnModel=self.proto, loss=loss.getIpuIndex(), deviceInfo=self.deviceInfo, optimizer=optimizer.gc_optimizer, dataFlow=self.dataFlow, userOptions=self.options) self.session.prepareDevice() self.session.setRandomSeed(bF.get_seed()) self.session.weightsFromHost() self.anchors = self.session.initAnchorArrays()
def test_constants_preserved(): popart.getLogger().setLevel("TRACE") # Check that `session.modelToHost` can be called when using a # model with a constant node, without throwing an exceptions builder = popart.Builder() i1 = builder.addInputTensor(popart.TensorInfo("FLOAT", [2, 2])) i2 = builder.addInputTensor(popart.TensorInfo("FLOAT", [2, 2])) c = builder.aiOnnx.constant(np.array([[1, 2], [3, 4]], dtype=np.float32)) o1 = builder.aiOnnx.add([i1, i2]) o2 = builder.aiOnnx.add([o1, c]) loss = builder.aiGraphcore.identityloss([o2]) proto = builder.getModelProto() anchors = {o2: popart.AnchorReturnType("All")} dataFlow = popart.DataFlow(1, anchors) optimizer = popart.ConstSGD(0.01) opts = popart.SessionOptions() session = popart.TrainingSession(fnModel=proto, dataFlow=dataFlow, userOptions=opts, loss=loss, optimizer=optimizer, deviceInfo=tu.create_test_device()) anchorArrays = session.initAnchorArrays() session.prepareDevice() inputs = { i1: np.array([[2, 2], [2, 2]]).astype(np.float32), i2: np.array([[4, 4], [4, 4]]).astype(np.float32), } pystepio = popart.PyStepIO(inputs, anchorArrays) session.run(pystepio) session.modelToHost('session_proto.onnx') # models should be the same after training # as there are no trainable parameters with open('session_proto.onnx', 'rb') as f: session_proto = f.read() assert proto == session_proto # confirm that the output is correct. See T6186, which this tests assert (np.sum(np.abs(anchorArrays[o2] - np.array([[7, 8], [9, 10]]))) < 1e-8)
def run_test(aliaszerocopy): proto, data, x, loss = model() options = popart.SessionOptions() patterns = popart.Patterns() optimizer = popart.SGD({ "defaultLearningRate": (0.1, True), "defaultMomentum": (0.9, True), "defaultDampening": (0, True) }) options.enableOutlining = True options.outlineThreshold = -np.inf options.enableOutliningCopyCostPruning = False options.autoRecomputation = popart.RecomputationType.Standard options.virtualGraphMode = popart.VirtualGraphMode.ExecutionPhases options.explicitRecomputation = True options.aliasZeroCopy = aliaszerocopy options.executionPhaseSettings.phases = 5 request_ipus = 2 device = tu.create_test_device(2, pattern=popart.SyncPattern.Full) dataFlow = popart.DataFlow(1, {x: popart.AnchorReturnType("ALL")}) session = popart.TrainingSession(fnModel=proto, dataFlow=dataFlow, userOptions=options, loss=loss, optimizer=optimizer, patterns=patterns, deviceInfo=device) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() stepio = popart.PyStepIO(data, anchors) session.run(stepio) file_path = str(tmpdir / f"aliaszerocopy_model_test.onnx") session.modelToHost(file_path) post_proto = onnx.load(file_path) device.detach() graph_report = json.loads(session.getGraphReport()) max_tile_memory = max(graph_report["memory"]["byTile"]["total"]) total_memory = np.sum(graph_report["memory"]["byTile"]["total"]) return anchors[x], post_proto, total_memory
def run_test(outlining): proto, data, x, loss = model() options = popart.SessionOptions() patterns = popart.Patterns() optimizer = popart.SGD({ "defaultLearningRate": (0.1, True), }) options.enableOutlining = outlining options.outlineThreshold = 10.0 options.enableGradientAccumulation = True options.accumulationFactor = 4 options.enableReplicatedGraphs = True options.replicatedGraphCount = 2 options.virtualGraphMode = popart.VirtualGraphMode.Manual if pipeline: options.enablePipelining = True options.autoRecomputation = popart.RecomputationType.Pipeline device = tu.create_test_device(4) dataFlow = popart.DataFlow(1, {x: popart.AnchorReturnType("ALL")}) session = popart.TrainingSession(fnModel=proto, dataFlow=dataFlow, userOptions=options, loss=loss, optimizer=optimizer, patterns=patterns, deviceInfo=device) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() stepio = popart.PyStepIO(data, anchors) session.run(stepio) file_path = str(tmpdir / f"outlining_execution_context_model.onnx") session.modelToHost(file_path) post_proto = onnx.load(file_path) device.detach() graph_report = json.loads(session.getGraphReport()) max_tile_memory = max(graph_report["memory"]["byTile"]["total"]) total_memory = np.sum(graph_report["memory"]["byTile"]["total"]) return session, anchors[x], post_proto, total_memory
def test_sgd_with_float16_model(): popart.getLogger().setLevel("TRACE") input1 = np.zeros((2, 2, 4, 4), dtype=np.float16) input2 = np.zeros((2, 2, 3, 3), dtype=np.float16) input3 = np.zeros((2, 2, 3, 3), dtype=np.float16) builder = popart.Builder() inid1 = builder.addInputTensor(popart.TensorInfo(input1)) inid2 = builder.addInitializedInputTensor(input2) inid3 = builder.addInitializedInputTensor(input2) c1 = builder.aiOnnx.conv([inid1, inid2], dilations=[1, 1], pads=[1, 1, 1, 1], strides=[1, 1]) c2 = builder.aiOnnx.conv([c1, inid3], dilations=[1, 1], pads=[1, 1, 1, 1], strides=[1, 1]) # Reduce to scalar out = builder.aiGraphcore.identityloss([c2]) proto = builder.getModelProto() optimizer = popart.SGD({ "defaultLearningRate": (0.1, False), "defaultWeightDecay": (0.1, False), "lossScaling": (1000, False) }) anchorNames = { popart.reservedGradientPrefix() + inid1: popart.AnchorReturnType("All"), } opts = popart.SessionOptions() session = popart.TrainingSession( fnModel=proto, dataFlow=popart.DataFlow(1, anchorNames), loss=out, optimizer=optimizer, deviceInfo=tu.create_test_device(opts={"compileIPUCode": False})) session.prepareDevice() session.weightsFromHost() anchorArrays = session.initAnchorArrays() stepio = popart.PyStepIO({inid1: input1}, anchorArrays) session.run(stepio)
def run(inplaceReshape=True): builder = popart.Builder() in0 = builder.addInputTensor(popart.TensorInfo("FLOAT", [4, 2])) w0 = builder.addInitializedInputTensor(w0_init) actIn = in0 with builder.virtualGraph(0): actIn = builder.aiOnnx.matmul([actIn, w0]) actIn = builder.aiOnnx.relu([actIn]) reshape = builder.aiOnnx.constant(np.asarray([2, 8])) actIn = builder.aiOnnx.reshape([actIn, reshape]) r0 = actIn with builder.virtualGraph(1): loss = builder.aiGraphcore.identityloss([actIn]) builder.addOutputTensor(actIn) builder.setInplacePreferences( r0, {"ReshapeInplace": 10 if inplaceReshape else -10}) opts = popart.SessionOptions() opts.virtualGraphMode = popart.VirtualGraphMode.Manual opts.enablePipelining = True numIpus = 2 patterns = popart.Patterns(popart.PatternsLevel.Default) patterns.InPlace = True device = tu.create_test_device(numIpus=numIpus) session = popart.TrainingSession(fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(4, [actIn]), deviceInfo=device, optimizer=popart.ConstSGD(0.1), loss=loss, userOptions=opts, patterns=patterns) anchors = session.initAnchorArrays() inputs = {in0: input_} stepio = popart.PyStepIO(inputs, anchors) session.prepareDevice() session.weightsFromHost() session.run(stepio) return np.copy(anchors[actIn])
def run_test(): builder = popart.Builder() lhs = builder.addInputTensor(popart.TensorInfo("FLOAT", lhs_shape), "lhs") rhs = builder.addInputTensor(popart.TensorInfo("FLOAT", rhs_shape), "rhs") z = builder.addInputTensor(popart.TensorInfo("FLOAT", [2]), "zero") t1 = builder.aiOnnx.matmul([lhs, rhs]) o = builder.aiOnnx.add([z, t1]) o = builder.aiGraphcore.identityloss([o]) proto = builder.getModelProto() dataFlow = popart.DataFlow( 1, { o: popart.AnchorReturnType("All"), popart.reservedGradientPrefix() + lhs: popart.AnchorReturnType("All"), popart.reservedGradientPrefix() + rhs: popart.AnchorReturnType("All"), }) opts = popart.SessionOptions() opts.reportOptions = {"showExecutionSteps": "true"} pat = popart.Patterns(popart.PatternsLevel.Default) session = popart.TrainingSession( fnModel=proto, dataFlow=dataFlow, userOptions=opts, loss=o, optimizer=popart.ConstSGD(0.01), patterns=pat, deviceInfo=tu.create_test_device(opts={"compileIPUCode": False})) session.prepareDevice() anchors = session.initAnchorArrays() inputs = {lhs: lhs_data, rhs: rhs_data, z: zero_data} stepio = popart.PyStepIO(inputs, anchors) session.run(stepio) return anchors[o]
def test_postnrepl_overzealous_elimination(): # Reproducer for T36270, included as regression test. This test doesn't # actually do any assertions, it just checks that a code path that # previously failed does not result in any exceptions. # # The bug was that the PostNRepl pattern removed the gradient sum op that # produces Gradient_<in0> (which has 1 input) in the backwards subgraph, also # rewriting the subgraph itself to use the input to the gradient sum op # instead, as it's identical. However, the tensor produced by the op is a # graph output that is used by a call op in the main graph. The pattern did # not adjust this CallOp or the subgraph's output tensors and so the CallOp # in the main graph fails because it's using a tensor that no longer exists. def get_subgraph_builder(b, w): builder = b.createSubgraphBuilder() builder.addInputTensorFromParentGraph(w) in0 = builder.addInputTensor( popart.TensorInfo("FLOAT16", [4, 32, 16, 64])) x = builder.aiOnnx.matmul([in0, w]) builder.addOutputTensor(x) return builder # building model and dataflow builder = popart.Builder() in0 = builder.addInputTensor(popart.TensorInfo('FLOAT16', [4, 32, 1, 64]), "in0") w = builder.addInitializedInputTensor(np.zeros([64, 64], np.float16), "weights") fn = get_subgraph_builder(builder, w) x = builder.aiGraphcore.call([w, in0], 1, fn)[0] l1_loss = builder.aiGraphcore.l1loss([x], 1.0) optimizer = popart.SGD({ "defaultLearningRate": (0.1, False), "defaultWeightDecay": (0, True) }) device = popart.DeviceManager().createIpuModelDevice({}) # create training session popart.TrainingSession(fnModel=builder.getModelProto(), loss=l1_loss, deviceInfo=device, optimizer=optimizer, dataFlow=popart.DataFlow(1, {}), userOptions=popart.SessionOptions())
def test_batchnorm_shapeinference(op_tester): # create test data d1 = np.random.rand(1, 3, 2, 2).astype(np.float32) * 100 scale = np.random.rand(3).astype(np.float32) b = np.random.rand(3).astype(np.float32) mean = np.random.rand(3).astype(np.float32) var = np.random.rand(3).astype(np.float32) epsilon = 1e-05 momentum = 0.1 builder = popart.Builder() i1 = builder.addInputTensor(popart.TensorInfo(d1)) iScale = builder.addInitializedInputTensor(scale) iB = builder.addInitializedInputTensor(b) iMean = builder.addInitializedInputTensor(mean) iVar = builder.addInitializedInputTensor(var) o_y, o_mean, o_var, o_smean, o_svar = builder.aiOnnx.batchnormalization( [i1, iScale, iB, iMean, iVar], 5, epsilon, momentum) builder.addOutputTensor(o_y) builder.addOutputTensor(o_mean) builder.addOutputTensor(o_var) builder.addOutputTensor(o_smean) builder.addOutputTensor(o_svar) lossId = builder.aiGraphcore.identityloss([o_y]) proto = builder.getModelProto() anchors = [o_y, o_mean, o_var, o_smean, o_svar] art = popart.AnchorReturnType("All") dataFlow = popart.DataFlow(1, {a: art for a in anchors}) device = tu.create_test_device() options = popart.SessionOptions() options.enableStochasticRounding = False # store the shapes here to make sure we are checking shapes # before the IR is complete (i.e. testing onnx shape inference) shapes = [] for a in anchors: shapes.append(tuple(builder.getTensorShape(a))) session = popart.TrainingSession(fnModel=proto, loss=lossId, dataFlow=dataFlow, deviceInfo=device, optimizer=popart.ConstSGD(0.01), userOptions=options) anchors = session.initAnchorArrays() session.prepareDevice() inputs = {i1: d1} stepio = popart.PyStepIO(inputs, anchors) session.weightsFromHost() session.run(stepio) stepio = popart.PyStepIO(inputs, anchors) # This tests the shape inference has run for a, b in zip([o_y, o_mean, o_var, o_smean, o_svar], shapes): assert anchors[a].shape == b
def getGraph(applyOpToIdentityPattern): patterns = popart.Patterns(popart.PatternsLevel.All) patterns.OpToIdentity = applyOpToIdentityPattern session = popart.TrainingSession( fnModel=builder.getModelProto(), deviceInfo=popart.DeviceManager().createCpuDevice(), dataFlow=popart.DataFlow(1, [idloss]), loss=idloss, optimizer=popart.ConstSGD(0.1), patterns=patterns) ir = json.loads(session._serializeIr( popart.IrSerializationFormat.JSON)) return ir['maingraph']
def getPreparesSession(patterns): session = popart.TrainingSession( fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(1, [popart.reservedGradientPrefix() + ip]), optimizer=popart.ConstSGD(LEARNING_RATE, WEIGHT_DECAY), loss=nll, patterns=popart.Patterns(patterns), deviceInfo=tu.create_test_device()) session.prepareDevice() session.weightsFromHost() return session
def run(model_file_name, batchSerializationFactor): bsize = 8 dsize = 10 builder = popart.Builder() ip = builder.addInputTensor( popart.TensorInfo("FLOAT", [bsize, dsize, dsize])) d__ip = popart.reservedGradientPrefix() + ip def add_layer(in_id): w = builder.addInitializedInputTensor( np.ones([dsize, dsize], np.float32)) matmul_id = builder.aiOnnx.matmul([in_id, w]) return matmul_id m1 = add_layer(ip) m2 = add_layer(m1) m3 = add_layer(m2) out = builder.aiGraphcore.l1loss([m3], 0.1) builder.addOutputTensor(out) device = tu.create_test_device(1) dfAnchors = {} opts = popart.SessionOptions() opts.enableOutlining = True opts.batchSerializationSettings.factor = batchSerializationFactor proto = builder.getModelProto() session = popart.TrainingSession( fnModel=proto, dataFlow=popart.DataFlow(1, dfAnchors), optimizer=popart.ConstSGD(0.1), loss=out, patterns=popart.Patterns(popart.PatternsLevel.All), userOptions=opts, deviceInfo=device) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() ip_data = np.ones((bsize, dsize, dsize), dtype=np.float32) stepio = popart.PyStepIO({ip: ip_data}, anchors) session.run(stepio) session.modelToHost(str(tmpdir / model_file_name))
def getTrainingSession(fn): opts = popart.SessionOptions() opts.enableGradientAccumulation = True opts.accumulationFactor = accl_factor opts.disableGradAccumulationTensorStreams = False sess = popart.TrainingSession(fnModel=fn, dataFlow=popart.DataFlow(1, {}), deviceInfo=tu.create_test_device(), loss=output_name, optimizer=optimizer, userOptions=opts) sess.prepareDevice() sess.weightsFromHost() return sess
def test_detach_error(): np.random.seed(0) Batchsize = 8 Classes = 32 dshape = [Batchsize, 2, 4, 4] lshape = [Batchsize] wshape = [2, 2, 3, 3] ip_data = np.random.rand(*dshape).astype(np.float32) w1_data = np.random.rand(*wshape).astype(np.float32) lb_data = np.random.randint(Classes, size=lshape) builder = popart.Builder() input_ = builder.addInputTensor(popart.TensorInfo("FLOAT", dshape), "input_i1") lb = builder.addInputTensor(popart.TensorInfo("INT32", lshape)) w1 = builder.addInitializedInputTensor(w1_data) conv1 = builder.aiOnnx.conv([input_, w1], dilations=[1, 1], pads=[1, 1, 1, 1], strides=[1, 1], debugPrefix="conv") o = builder.reshape_const(builder.aiOnnx, [conv1], [Batchsize, Classes]) o = builder.aiGraphcore.detach([o]) o = builder.aiOnnx.softmax([o], axis=np.size(lshape)) loss = builder.aiGraphcore.nllloss([o, lb]) dataFlow = popart.DataFlow( 1, [o, loss, popart.reservedGradientPrefix() + input_]) opts = popart.SessionOptions() with pytest.raises(popart.popart_exception) as e_info: session = popart.TrainingSession( fnModel=builder.getModelProto(), dataFlow=dataFlow, loss=loss, optimizer=popart.ConstSGD(LEARNING_RATE, WEIGHT_DECAY), userOptions=opts, deviceInfo=popart.DeviceManager().createIpuModelDevice({})) assert (e_info.value.args[0].startswith( f"Anchor tensor `{popart.reservedGradientPrefix() + input_}' not in Ir Tensors." ))
def create_session(proto: onnx.ModelProto, data: Mapping[str, np.ndarray], outputs: Optional[Union[str, Iterable[str]]], optimizer: popart.SGD, loss: Optional[Union[popart.Loss, Iterable[popart.Loss]]] = None, ipus: Optional[int] = None): outputs = make_tuple(outputs) if loss is not None: loss = make_tuple(loss) # Setting up the Session data_flow = popart.DataFlow( 1, {output: popart.AnchorReturnType("ALL") for output in outputs}) options = popart.SessionOptions() options.enableGroupedMatmuls = False # With an Inference session we are actually testing the fwd pass of training. options.constantWeights = False options.enableStochasticRounding = False if ipus is not None: options.enableVirtualGraphs = True else: ipus = 1 request_ipus = pow(2, math.ceil(math.log2(ipus))) device = popart.DeviceManager().acquireAvailableDevice(request_ipus) if device is None: raise Exception("Failed to acquire IPU.") session = popart.TrainingSession(fnModel=proto, deviceInfo=device, dataFeed=data_flow, userOptions=options, losses=loss, optimizer=optimizer) session.prepareDevice() session.weightsFromHost() session.optimizerFromHost() session.setRandomSeed(1984) anchors = session.initAnchorArrays() return session, anchors, device
def runModel(pipeline, recompute): builder = popart.Builder() in0 = builder.addInputTensor("FLOAT", dshape) in1 = builder.addInputTensor("INT32", lshape) w0 = builder.addInitializedInputTensor(w0_data) with builder.virtualGraph(0), builder.pipelineStage(0): x = builder.aiOnnx.matmul([in0, w0]) with builder.virtualGraph(1), builder.pipelineStage(1): x = builder.aiOnnx.sqrt([x]) with builder.virtualGraph(0), builder.pipelineStage(2): x = builder.aiOnnx.add([w0, x]) loss = builder.aiGraphcore.nllloss([x, in1]) opts = popart.SessionOptions() opts.virtualGraphMode = popart.VirtualGraphMode.Manual opts.enablePipelining = pipeline if pipeline == True: opts.enableGradientAccumulation = True opts.accumulationFactor = bps test_bps = 1 else: test_bps = bps if recompute == True: opts.autoRecomputation = popart.RecomputationType.Pipeline session = popart.TrainingSession( deviceInfo=popart.DeviceManager().createIpuModelDevice( {"numIPUs": "2"}), dataFlow=popart.DataFlow(test_bps, [loss]), fnModel=builder.getModelProto(), loss=loss, optimizer=popart.ConstSGD(0.1), userOptions=opts) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() stepio = popart.PyStepIO({in0: in0_data, in1: in1_data}, anchors) session.run(stepio) weights = {} weights[w0] = np.empty(shape=dshape, dtype=np.float32) weightsIo = popart.PyWeightsIO(weights) session.weightsToHost() session.readWeights(weightsIo) return weights[w0]
def init_session(proto, loss, dataFlow, userOpts, device): # Create a session to compile and execute the graph optimizer = popart.SGD({"defaultLearningRate": (0.1, False)}) session = popart.TrainingSession(fnModel=proto, loss=loss, deviceInfo=device, optimizer=optimizer, dataFlow=dataFlow, userOptions=userOpts) session.prepareDevice() session.setRandomSeed(42) # Create buffers to receive results from the execution anchors = session.initAnchorArrays() return Session(session, anchors), optimizer
def create_session(inplacing): builder = popart.Builder() input_ = builder.addInputTensor(popart.TensorInfo("FLOAT", data.shape), "data") w = builder.addInitializedInputTensor(weight, 'input_weights') shape = builder.aiOnnx.constant(np.array(input_size)) transpose = builder.aiOnnx.transpose([w], [0, 2, 1, 3], "transpose") builder.setInplacePreferences(transpose, {"TransposeInplace": +1e8}) reshape = builder.aiOnnx.reshape([transpose, shape], "reshape") builder.setInplacePreferences(reshape, {"ReshapeInplace": +1e6}) add = builder.aiOnnx.add([input_, reshape], "add") builder.setInplacePreferences(add, {"AddRhsInplace": +1e7}) loss = builder.aiGraphcore.l1loss([add], 0.1, reduction=popart.ReductionType.Mean) builder.addOutputTensor(loss) patterns = popart.Patterns(popart.PatternsLevel.Default) patterns.InPlace = inplacing opts = popart.SessionOptions() opts.constantWeights = constantWeights session = popart.TrainingSession(fnModel=builder.getModelProto(), dataFlow=popart.DataFlow(1, [loss]), deviceInfo=tu.create_test_device(), userOptions=opts, patterns=patterns, loss=loss, optimizer=popart.ConstSGD(1e-3)) session.prepareDevice() anchorRets = session.initAnchorArrays() inputs = {"data": data.copy()} stepio = popart.PyStepIO(inputs, anchorRets) session.weightsFromHost() return session, stepio, anchorRets
def create_session_anchors(proto, loss, device, dataFlow, options, training, optimizer=None, use_popdist=False): """ Create the desired session and compile the graph """ if training: session_type = "training" session_kwargs = dict( fnModel=proto, loss=loss, deviceInfo=device, optimizer=optimizer, dataFlow=dataFlow, userOptions=options ) else: session_type = "inference" session_kwargs = dict( fnModel=proto, deviceInfo=device, dataFlow=dataFlow, userOptions=options ) if training: if use_popdist: hvd = try_import_horovod() session = hvd.DistributedTrainingSession(**session_kwargs, enableEngineCaching=False) else: session = popart.TrainingSession(**session_kwargs) else: session = popart.InferenceSession(**session_kwargs) try: logger.info("Preparing the {} graph".format(session_type)) session.prepareDevice() logger.info("{0} graph preparation complete.".format(session_type.capitalize(),)) except popart.OutOfMemoryException as e: logger.warn("Caught OutOfMemoryException during prepareDevice") raise if training and use_popdist: # make sure to broadcast weights when using popdist/poprun hvd.broadcast_weights(session) # Create buffers to receive results from the execution anchors = session.initAnchorArrays() return session, anchors
def test_batchnorm_train_half_fp32var(op_tester): # create test data d1 = np.random.rand(1, 3, 2, 2).astype(np.float16) * 100 scale = np.random.rand(3).astype(np.float16) b = np.random.rand(3).astype(np.float16) mean = np.random.rand(3).astype(np.float16) var = np.random.rand(3).astype(np.float32) epsilon = 1e-05 momentum = 0.1 builder = popart.Builder() i1 = builder.addInputTensor(popart.TensorInfo(d1)) iScale = builder.addInitializedInputTensor(scale) iB = builder.addInitializedInputTensor(b) iMean = builder.addInitializedInputTensor(mean) iVar = builder.addInitializedInputTensor(var) o_y, o_mean, o_var, o_smean, o_svar = builder.aiOnnx.batchnormalization( [i1, iScale, iB, iMean, iVar], 5, epsilon, momentum) builder.addOutputTensor(o_y) lossId = builder.aiGraphcore.identityloss([o_y]) proto = builder.getModelProto() dataFlow = popart.DataFlow(1, {o_y: popart.AnchorReturnType("All")}) device = tu.create_test_device() options = popart.SessionOptions() options.enableStochasticRounding = False session = popart.TrainingSession(fnModel=proto, loss=lossId, dataFlow=dataFlow, deviceInfo=device, optimizer=popart.ConstSGD(0.01), userOptions=options) anchors = session.initAnchorArrays() session.prepareDevice() inputs = {i1: d1} stepio = popart.PyStepIO(inputs, anchors) session.weightsFromHost() session.run(stepio) stepio = popart.PyStepIO(inputs, anchors) session.run(stepio)
def test_mini_resnet_like(): dirpath = os.path.dirname(os.path.realpath(__file__)) sys.path.append(dirpath + "/../../graph_util") batch_size = 1 training = True norm_type = 'BatchNorm' # Get model proto proto, ip, op = get_resnet18_proto(batch_size, training, norm_type) # Create the onnx session opts = popart.SessionOptions() session = popart.TrainingSession(fnModel=proto, dataFlow=popart.DataFlow( 1, {op: popart.AnchorReturnType("All")}), optimizer=popart.ConstSGD(0.001), loss=op, deviceInfo=tu.create_test_device(), userOptions=opts) session.prepareDevice() graph_report = session.getGraphReport() graph_report = json.loads(graph_report) total_mem = sum(graph_report['memory']['byTile']['total']) max_mem = max(graph_report['memory']['byTile']['totalIncludingGaps']) print(f'total_mem: {total_mem}') print(f'max_mem: {max_mem}') # Check that the total memory is within 5% of the reference ref_total = 67_201_630 # If it is more than 5% over, it needs investigating assert total_mem / ref_total < 1.05 # If it is move than 5% under, the reference should probably be updated assert total_mem / ref_total > 0.95 # Check that the maximum memory is within 5% of the reference ref_max = 134_840 # If it is more than 5% over, it needs investigating assert max_mem / ref_max < 1.05 # If it is move than 5% under, the reference should probably be updated assert max_mem / ref_max > 0.95
def get_session(anchorIds, proto, device, loss, bps=1): dfAnchors = {} for anchorId in anchorIds: dfAnchors.update({anchorId: popart.AnchorReturnType("All")}) session = popart.TrainingSession(fnModel=proto, dataFlow=popart.DataFlow(bps, dfAnchors), optimizer=popart.ConstSGD(0.1), loss=loss, patterns=popart.Patterns( popart.PatternsLevel.All), deviceInfo=device) session.prepareDevice() session.weightsFromHost() anchors = session.initAnchorArrays() return session, anchors
def test_invalid_tensor_location_updates(): # Test to demonstrate exceptions thrown during incorrect usage of # Session::updateExternallySavedTensorLocations builder = popart.Builder() d1 = np.random.rand(3, 3).astype(np.float32) i1 = builder.addInitializedInputTensor(d1) o = builder.aiOnnx.matmul([i1, i1]) loss = builder.aiGraphcore.identityloss([o]) with TemporaryDirectory() as tmpdir: origpath = os.path.join(tmpdir, "model_tensors0.onnx") builder.saveInitializersExternally([i1], origpath) optimizer = popart.SGD({ "defaultLearningRate": (0.2, True), "defaultMomentum": (0.5, True) }) session = popart.TrainingSession( deviceInfo=popart.DeviceManager().createCpuDevice(), fnModel=builder.getModelProto(), loss=loss, optimizer=optimizer, dataFlow=popart.DataFlow(1, [])) updatedpath0 = os.path.join(tmpdir, "model_tensors1.onnx") # Try to update from from a path that doesn't exist fakepath = os.path.join(tmpdir, "foo.bar") with pytest.raises(popart.popart_exception) as e_info: session.updateExternallySavedTensorLocations( fakepath, updatedpath0) assert "but file '" + fakepath + "' does not exist" in e_info.value.args[ 0] session.updateExternallySavedTensorLocations(origpath, updatedpath0) # Try to update from from old path updatedpath1 = os.path.join(tmpdir, "model_tensors2.onnx") with pytest.raises(popart.popart_exception) as e_info: session.updateExternallySavedTensorLocations( origpath, updatedpath1) assert "No ONNX model initializers have external location set to" in e_info.value.args[ 0]
def init_session(proto, losses, device, dataFlow, options, training, optimizer=None, gcpLogDir=None): # Create a session to compile and execute the graph if training: session_type = "training" session = popart.TrainingSession(fnModel=proto, losses=losses, deviceInfo=device, optimizer=optimizer, dataFeed=dataFlow, userOptions=options) else: session_type = "validation" session = popart.InferenceSession(fnModel=proto, losses=losses, deviceInfo=device, dataFeed=dataFlow, userOptions=options) try: print("Preparing the {} graph".format(session_type)) with Timer() as prepareTimer: session.prepareDevice() except popart.PrepareDeviceException as e: print("Caught PrepareDeviceException") if (gcpLogDir is not None): from gcprofile import save_popart_report save_popart_report(session, log_dir=gcpLogDir, exception=e) raise print("{0} graph preparation complete. Duration: {1:.3f} seconds".format( session_type.capitalize(), prepareTimer.interval())) # Create buffers to receive results from the execution anchors = session.initAnchorArrays() return session, anchors
def test_stochastic_rounding(): # create test data d1 = np.random.rand(1, 3, 2, 2).astype(np.float16) * 100 scale = np.random.rand(3).astype(np.float16) b = np.random.rand(3).astype(np.float16) mean = np.random.rand(3).astype(np.float16) var = np.random.rand(3).astype(np.float16) epsilon = 1e-05 momentum = 0.1 builder = popart.Builder() i1 = builder.addInputTensor(popart.TensorInfo(d1)) iScale = builder.addInitializedInputTensor(scale) iB = builder.addInitializedInputTensor(b) iMean = builder.addInitializedInputTensor(mean) iVar = builder.addInitializedInputTensor(var) [o_y, o_mean, o_var, o_smean, o_svar] = builder.aiOnnx.batchnormalization([i1, iScale, iB, iMean, iVar], 5, epsilon, momentum) loss = builder.aiGraphcore.identityloss([o_y]) builder.addOutputTensor(o_y) proto = builder.getModelProto() dataFlow = popart.DataFlow(1, {o_y: popart.AnchorReturnType("All")}) device = tu.create_test_device() options = popart.SessionOptions() options.enableStochasticRounding = True sess = popart.TrainingSession(fnModel=proto, optimizer=popart.ConstSGD(0.1), loss=loss, dataFlow=dataFlow, deviceInfo=device, userOptions=options) anchors = sess.initAnchorArrays() sess.prepareDevice() # Confirm that you are able to set the random seed when # enableStochasticRounding is true, even though the random seed tensor # is not consumed by any op in the Ir sess.setRandomSeed(0)
def trainSession(anchors, optimizer, stepSize): popart.getLogger().setLevel("TRACE") builder = popart.Builder() dataShape = popart.TensorInfo("FLOAT", [1, 2, 4, 4]) i1 = builder.addInputTensor(dataShape) filtInit = np.ones([2, 2, 3, 3], dtype=np.float32) i2 = builder.addInitializedInputTensor(filtInit) c1 = builder.aiOnnx.conv([i1, i2], dilations=[1, 1], pads=[1, 1, 1, 1], strides=[1, 1]) c2 = builder.aiOnnx.conv([c1, i2], dilations=[1, 1], pads=[1, 1, 1, 1], strides=[1, 1]) o = builder.aiGraphcore.l1loss([c2], 0.1) proto = builder.getModelProto() opts = popart.SessionOptions() session = popart.TrainingSession( fnModel=proto, dataFlow=popart.DataFlow(stepSize, anchors), loss=o, optimizer=optimizer, deviceInfo=tu.create_test_device(opts={"compileIPUCode": False})) session.prepareDevice() session.weightsFromHost() # add step dimension to infeed infeedShape = dataShape.shape() infeedShape.insert(0, stepSize) data = np.ones(infeedShape, dtype=np.float32) inputs = {i1: data} return session, inputs
def create_session_anchors(proto, loss, device, dataFlow, options, training, optimizer=None, profile=False): """ Create the desired session and compile the graph """ if training: session_type = "training" session = popart.TrainingSession(fnModel=proto, loss=loss, deviceInfo=device, optimizer=optimizer, dataFlow=dataFlow, userOptions=options) else: session_type = "validation" session = popart.InferenceSession(fnModel=proto, deviceInfo=device, dataFlow=dataFlow, userOptions=options) try: logger.info("Preparing the {} graph".format(session_type)) session.prepareDevice() logger.info("{0} graph preparation complete.".format( session_type.capitalize(), )) except popart.OutOfMemoryException as e: logger.warn("Caught Exception while Preparing Device") # Dump the profiled result before raising exception and exit if profile: from gcprofile import save_popart_report save_popart_report(session, exception=e) raise # Create buffers to receive results from the execution anchors = session.initAnchorArrays() return session, anchors
def test_cast_no_grad(npSrcType, builderDstType): """Check that CastOp, doesn't return gradient Op when casted-from type is not float/half. """ np.random.seed(0) # Is randomly generated data ok here? Also, the tested range is [0, 10], so # no negative inputs are tested. inputData = np.random.uniform(0, 10, 10).astype(npSrcType) builder = popart.Builder() input_ = builder.addInputTensor(popart.TensorInfo(inputData)) output_ = builder.aiOnnx.cast([input_], builderDstType) builder.addOutputTensor(output_) lossId = builder.aiGraphcore.identityloss([output_]) proto = builder.getModelProto() dataFlow = popart.DataFlow( 1, { output_: popart.AnchorReturnType("All"), popart.reservedGradientPrefix() + input_: popart.AnchorReturnType("All"), }) device = tu.create_test_device() patterns = popart.Patterns(['PreUniRepl', 'PostNRepl', 'SqrtGradOp']).enableRuntimeAsserts(False) options = popart.SessionOptions() options.enableStochasticRounding = False with pytest.raises(popart.popart_exception) as e_info: popart.TrainingSession(fnModel=proto, loss=lossId, dataFlow=dataFlow, deviceInfo=device, optimizer=popart.ConstSGD(0.01), patterns=patterns, userOptions=options) assert (e_info.value.args[0].startswith( f"Anchor tensor `{popart.reservedGradientPrefix() + input_}' not in Ir Tensors." ))