def main(): images = mnist.get_test_images() labels = mnist.get_test_labels() n_samples = images.shape[0] images = images.reshape((n_samples, 1) + images.shape[1:]) # scale from [0.0, 1.0] to [255.0, 0.0] images = (1.0 - images) * 255.0 # subtract mean values # TODO load mean image from binraryproto images = images - np.mean(images, axis=0) logger = turret.loggers.ConsoleLogger() builder = turret.InferenceEngineBuilder(logger) network = builder.create_network(turret.DataType.FLOAT) plugin_factory = turret.caffe.PluginFactory() plugin_factory.register_plugin("ip2", fully_connected_factory) tensors = turret.caffe.import_caffemodel(network, "mnist.prototxt", "mnist.caffemodel", plugin_factory) network.mark_output("prob", tensors["prob"]) builder.max_batch_size = BATCH_SIZE engine = builder.build(network) ctx = turret.ExecutionContext(engine) buf = ctx.create_buffer() n_correct = 0 for head in range(0, n_samples, BATCH_SIZE): tail = min(head + BATCH_SIZE, n_samples) buf.put("data", images[head:tail]) ctx.execute(buf) prob = buf.get("prob").reshape((tail - head, 10)) prediction = prob.argmax(axis=1) n_correct += np.equal(prediction, labels[head:tail]).sum() print("Accuracy: {}".format(n_correct / n_samples))
def run_build(args): if args.dtype == "int8" and args.calibrator is None: sys.stderr.write("calibrator is required for int8 inference.\n") sys.exit(-1) if args.dtype == "int8": dtype = turret.DataType.INT8 calibrator = create_int8_calibrator( args.calibrator, IMAGE_SIZE, np.load(args.mean), args.nbatches) elif args.dtype == "half": dtype = turret.DataType.HALF calibrator = None elif args.dtype == "float": dtype = turret.DataType.FLOAT calibrator = None logger = turret.loggers.ConsoleLogger() builder = turret.InferenceEngineBuilder(logger) network = builder.create_network(dtype) tensor_set = turret.caffe.import_caffemodel( network, args.deploy, args.model) network.mark_output("prob", tensor_set["prob"]) builder.max_batch_size = MAX_BATCH_SIZE builder.max_workspace_size = WORKSPACE_SIZE builder.int8_calibrator = calibrator engine = builder.build(network) with open(args.dest, "wb") as f: engine.serialize(f)
def execute_inference(inputs, network_generator, max_batch_size=128): builder = turret.InferenceEngineBuilder( turret.loggers.ConsoleLogger(turret.Severity.INFO)) network = builder.create_network() network_generator(network) builder.max_batch_size = max_batch_size builder.max_workspace_size = 1 << 30 engine = builder.build(network) with turret.ExecutionContext(engine) as ctx: buffer = ctx.create_buffer() for k, v in inputs.items(): buffer.put(k, v) ctx.execute(buffer) return buffer.get("output")
def main(): images = mnist.get_test_images() labels = mnist.get_test_labels() n_samples = images.shape[0] images = images.reshape((n_samples, 1) + images.shape[1:]) logger = turret.loggers.ConsoleLogger() builder = turret.InferenceEngineBuilder(logger) network = builder.create_network(turret.DataType.INT8) with h5py.File("model.h5", "r") as h5file: def param(name): return h5file["predictor"][name][:] h = network.add_input("input", turret.DataType.FLOAT, turret.Dimensions.CHW(1, 28, 28)) h = L.convolution_2d(h, param("conv1/W"), param("conv1/b")) h = L.max_pooling_2d(h, 2, stride=2) h = L.convolution_2d(h, param("conv2/W"), param("conv2/b")) h = L.max_pooling_2d(h, 2, stride=2) h = L.fully_connected(h, param("fc1/W"), param("fc1/b")) h = L.relu(h) h = L.fully_connected(h, param("fc2/W"), param("fc2/b")) h = L.softmax(h) network.mark_output("prob", h) builder.max_batch_size = BATCH_SIZE builder.max_workspace_size = 2 ** 30 builder.int8_calibrator = turret.Int8Calibrator( images[:CALIBRATOR_COUNT], BATCH_SIZE) engine = builder.build(network) ctx = turret.ExecutionContext(engine) buf = ctx.create_buffer() n_correct = 0 for head in range(0, n_samples, BATCH_SIZE): tail = min(head + BATCH_SIZE, n_samples) buf.put("input", images[head:tail]) ctx.execute(buf) prob = buf.get("prob").reshape((tail-head, 10)) prediction = prob.argmax(axis=1) n_correct += np.equal(prediction, labels[head:tail]).sum() print("Accuracy: {}".format(n_correct / n_samples))
def build_encodeengine(encoder, batch_size, dtype, logger, max_sequence_length=16, workspace_size=2**30): sys.stderr.write("------------------------------\n") sys.stderr.write(" encoder\n") sys.stderr.write("------------------------------\n") builder = turret.InferenceEngineBuilder(logger) network = builder.create_network(dtype) # extract parameters emb = encoder["embedding.weight"] weights = [] bias = [] weights_rev = [] bias_rev = [] bidirect = ("lstm.weight_hh_l0_reverse" in encoder) weights.append( _reorg_lstm_parameters(encoder["lstm.weight_ih_l0"][:], encoder["lstm.weight_hh_l0"][:])) bias.append( _reorg_lstm_parameters(encoder["lstm.bias_ih_l0"][:], encoder["lstm.bias_hh_l0"][:])) if bidirect: weights_rev.append( _reorg_lstm_parameters(encoder["lstm.weight_ih_l0_reverse"][:], encoder["lstm.weight_hh_l0_reverse"][:])) bias_rev.append( _reorg_lstm_parameters(encoder["lstm.bias_ih_l0_reverse"][:], encoder["lstm.bias_hh_l0_reverse"][:])) # define a network src = network.add_constant(emb) h = network.add_input( "words", turret.DataType.INT32, turret.Dimensions(((1, turret.DimensionType.INDEX), (max_sequence_length, turret.DimensionType.INDEX)))) h = L.gather(src, h, 0) h_lengths = network.add_input( "lengths", turret.DataType.INT32, turret.Dimensions(((1, turret.DimensionType.INDEX), ))) if bidirect: context, hidden, cell = L.blstm_v2(h, max_sequence_length, weights, weights_rev, bias, bias_rev, sequence_lengths=h_lengths) else: context, hidden, cell = L.lstm_v2(h, max_sequence_length, weights, bias, sequence_lengths=h_lengths) network.mark_output("context", context) network.mark_output("hidden", hidden) network.mark_output("cell", cell) builder.max_batch_size = batch_size builder.max_workspace_size = workspace_size # build engine = builder.build(network) return engine
def build_decodeengine(decoder, batch_size, dtype, logger, max_sequence_length=16, workspace_size=2**30): sys.stderr.write("------------------------------\n") sys.stderr.write(" decoder\n") sys.stderr.write("------------------------------\n") builder = turret.InferenceEngineBuilder(logger) network = builder.create_network(dtype) emb = decoder["embedding.weight"] hidden_size = decoder["lstm.weight_hh_l0"].shape[1] weights = [] bias = [] weights.append( _reorg_lstm_parameters(decoder["lstm.weight_ih_l0"], decoder["lstm.weight_hh_l0"])) bias.append( _reorg_lstm_parameters(decoder["lstm.bias_ih_l0"], decoder["lstm.bias_hh_l0"])) tgt = network.add_constant(emb) # Embedding and LSTM. h_indices_in = network.add_input( "indices_in", turret.DataType.INT32, turret.Dimensions(((1, turret.DimensionType.INDEX), ))) h_indices_in = L.gather(tgt, h_indices_in, 0) h_indices_in = L.reshape(h_indices_in, turret.Dimensions.CHW(1, 1, hidden_size)) h_hidden = network.add_input("hidden_in", turret.DataType.FLOAT, turret.Dimensions.CHW(1, 1, hidden_size)) h_cell = network.add_input("cell_in", turret.DataType.FLOAT, turret.Dimensions.CHW(1, 1, hidden_size)) h, h_hidden, h_cell = L.lstm_v2(h_indices_in, 1, weights, bias, hidden_state=h_hidden, cell_state=h_cell) network.mark_output("hidden_out", h_hidden) network.mark_output("cell_out", h_cell) # Attention. h_hidden_enc = network.add_input( "enc_hidden", turret.DataType.FLOAT, turret.Dimensions.CHW(1, max_sequence_length, hidden_size)) h_attn_w = L.elementwise(h_hidden_enc, h_hidden, turret.ElementWiseOperation.PROD) h_attn_w = L.reduce(h_attn_w, turret.ReduceOperation.SUM, axes=2) h_hidden_enc = L.reshape( h_hidden_enc, turret.Dimensions.HW(max_sequence_length, hidden_size)) h_context = L.matrix_multiply(h_attn_w, False, h_hidden_enc, False) h_context = L.softmax(h_context) h_context = L.reshape( h_context, turret.Dimensions.CHW(1, h_context.dimensions.shape[0], h_context.dimensions.shape[1])) h = L.concat([h, h_context], axis=2) # Out, softmax, and log. out_weights = decoder["out.weight"][:] out_bias = decoder["out.bias"][:] h = L.fully_connected(h, out_weights, out_bias) h = L.softmax(h) h = L.unary(h, turret.UnaryOperation.LOG) h = L.reshape( h, turret.Dimensions( ((h.dimensions.shape[0], turret.DimensionType.SPATIAL), ))) _, h_indices_out = L.top_k(h, turret.TopKOperation.MAX, 1, 1) h_indices_out.dimensions # If this line is removed, error is occurred. network.mark_output("indices_out", h_indices_out) builder.max_batch_size = batch_size builder.max_workspace_size = workspace_size # build engine = builder.build(network) return engine
def build_network(network_generator): builder = turret.InferenceEngineBuilder( turret.loggers.ConsoleLogger(turret.Severity.INFO)) network = builder.create_network() network_generator(network) return network
def _create_builder(self): builder = turret.InferenceEngineBuilder(turret.loggers.ConsoleLogger()) return builder