for _, data in enumerate(test_input): torch_sess.run([label_name], {input_name: data}) # real run time_sum = 0 for _, data in enumerate(test_input): start = time.time() # torch_output = torch_sess.run([label_name], {input_name: data}) # d torch_sess.run([label_name], {input_name: data}) # print("torch_output:\n{%.6f}".format(torch_output)) # d time_sum += (time.time() - start) print("ONNX runtime inference time before taso: {}sec".format(time_sum / len(test_input))) f.write("ONNX runtime inference time before taso: {}sec\n\n".format(time_sum / len(test_input))) print("taso.load_onnx()") old_graph = taso.load_onnx("./onnx_models/inception_v3.onnx") #print("[before opt] taso runtime performance: {}ms".format(old_graph.run_time())) #taso_tensor_input = old_graph.new_input_with_value(dims=(1, 3, 299, 299)) #numpy_input = np.random.randn(1, 3, 299, 299).astype('f') old_graph.build_graph() # warm up for _, data in enumerate(test_input): old_graph.taso_forward(data) # real run time_sum = 0 for _, data in enumerate(test_input): start = time.time() old_graph.taso_forward(data) time_sum += (time.time() - start) print("cuDNN runtime inference time before taso: {}sec".format(time_sum / len(test_input))) f.write("cuDNN runtime inference time before taso: {}sec\n\n".format(time_sum / len(test_input)))
torch_sess.run([label_name], {input_name: data}) # real run time_sum = 0 for _, data in enumerate(test_input): start = time.time() # torch_output = torch_sess.run([label_name], {input_name: data}) # d torch_sess.run([label_name], {input_name: data}) # print("torch_output:\n{%.6f}".format(torch_output)) # d time_sum += (time.time() - start) print("ONNX runtime inference time before taso: {}sec".format( time_sum / len(test_input))) f.write("ONNX runtime inference time before taso: {}sec\n\n".format( time_sum / len(test_input))) print("taso.load_onnx()") old_graph = taso.load_onnx("./onnx_models/resnext50.onnx") #print("[before opt] taso runtime performance: {}ms".format(old_graph.run_time())) #taso_tensor_input = old_graph.new_input_with_value(dims=(1, 3, 224, 224)) #numpy_input = np.random.randn(1, 3, 224, 224).astype('f') old_graph.build_graph() # warm up for _, data in enumerate(test_input): old_graph.taso_forward(data, ex_out_size) # real run time_sum = 0 for _, data in enumerate(test_input): start = time.time() old_graph.taso_forward(data, ex_out_size) time_sum += (time.time() - start) print("cuDNN runtime inference time before taso: {}sec".format( time_sum / len(test_input)))
import taso import onnx import argparse parser = argparse.ArgumentParser() parser.add_argument("-f", "--file", help="Path to input ONNX file", required=True) args = parser.parse_args() #graph = taso.load_onnx("/home/ubuntu/taso/onnx/squeezenet1.1.onnx") #graph = taso.load_onnx("/home/ubuntu/taso/onnx/bertsquad10.onnx") graph = taso.load_onnx(args.file) #graph = xflow.load("/home/ubuntu/resnext-101.onnx") #graph = xflow.load("/home/ubuntu/ONNXModel/inception_v2/model.onnx") new_graph = taso.optimize(graph, alpha = 1.0, budget = 100, print_subst = True) onnx_model = taso.export_onnx(new_graph) onnx.checker.check_model(onnx_model) onnx.save(onnx_model, "{}.taso.onnx".format(args.file))
self.relu = nn.ReLU() def forward(self,X): x = self.conv1(X) x = self.conv1(x) x = self.relu(x) y = self.conv2(X) y = self.conv2(y) y = self.relu(y) x = x+y x = x+1 x = x+3 # x = x * 2 # x = x * 0.5 return x model = SampleModel() x = torch.randn(1, 3, 24, 24, device='cpu') torch.onnx.export(model, x, "model.onnx", verbose=False,) graph = taso.load_onnx("./model.onnx") print("\n cost = {}".format(graph.cost())) new_graph = taso.optimize(graph, alpha = 1.0, budget = 1000, print_subst=True) print("\n optimized_cost = {}".format(new_graph.cost())) new_model = taso.export_onnx(new_graph) onnx.save(new_model, "./model_taso.onnx")
for _, data in enumerate(test_input): torch_sess.run([label_name], {input_name: data}) # real run time_sum = 0 for _, data in enumerate(test_input): start = time.time() # torch_output = torch_sess.run([label_name], {input_name: data}) # d torch_sess.run([label_name], {input_name: data}) # print("torch_output:\n{%.6f}".format(torch_output)) # d time_sum += (time.time() - start) print("ONNX runtime inference time before taso: {}sec".format(time_sum / len(test_input))) f.write("ONNX runtime inference time before taso: {}sec\n\n".format(time_sum / len(test_input))) print("taso.load_onnx()") old_graph = taso.load_onnx("./onnx_models/vgg19.onnx") #print("[before opt] taso runtime performance: {}ms".format(old_graph.run_time())) #taso_tensor_input = old_graph.new_input_with_value(dims=(1, 3, 224, 224)) #numpy_input = np.random.randn(1, 3, 224, 224).astype('f') old_graph.build_graph() # warm up for _, data in enumerate(test_input): res1 = old_graph.taso_forward(data, ex_out_size) # real run time_sum = 0 for _, data in enumerate(test_input): start = time.time() res1 = old_graph.taso_forward(data, ex_out_size) time_sum += (time.time() - start) print("cuDNN runtime inference time before taso: {}sec".format(time_sum / len(test_input))) f.write("cuDNN runtime inference time before taso: {}sec\n\n".format(time_sum / len(test_input)))
import taso import onnx # 1. evaluate the performance by just considering substitution optimizations print( "Measuring the performance of graph substitution optimizations (average of 1000 runs)" ) graph = taso.load_onnx('bert_graphs/bert_subst_nw.onnx') print("TASO: end-to-end inference time = {}ms".format(graph.run_time())) print() #2. evaluate the performance by just performing data layout optimizations print("Measuring the performance of data layout optimizations") graph = taso.load_onnx('bert_graphs/bert_layout_nw.onnx') print("TASO: end-to-end inference time = {}ms".format(graph.run_time())) print() #3. evaluate the performance by sequential optimizations print("Measuring the performance of sequential optimizations") graph = taso.load_onnx('bert_graphs/bert_sequential_nw.onnx') print("TASO: end-to-end inference time = {}ms".format(graph.run_time())) print() #4. evaluate the performance by joint optimizations print("Measuring the performance of joint optimizations") graph = taso.load_onnx('bert_graphs/bert_xflow_nw.onnx') print("TASO: end-to-end inference time = {}ms".format(graph.run_time())) print()
import taso import onnx graph = taso.load_onnx('onnx_models/resnext50.onnx') print("graph.run_time(): {}ms".format(graph.run_time())) print("graph.run_forward(): {}ms".format(graph.run_forward())) graph = taso.load_onnx('onnx_models/resnext50_taso.onnx') print("graph.run_time(): {}ms".format(graph.run_time())) print("graph.run_forward(): {}ms".format(graph.run_forward()))
torch_sess.run([label_name], {input_name: data}) # real run time_sum = 0 for _, data in enumerate(test_input): start = time.time() # torch_output = torch_sess.run([label_name], {input_name: data}) # d torch_sess.run([label_name], {input_name: data}) # print("torch_output:\n{%.6f}".format(torch_output)) # d time_sum += (time.time() - start) print("ONNX runtime inference time before taso: {}sec".format( time_sum / len(test_input))) f.write("ONNX runtime inference time before taso: {}sec\n\n".format( time_sum / len(test_input))) print("taso.load_onnx()") old_graph = taso.load_onnx("./onnx_models/alexnet.onnx") #print("[before opt] taso runtime performance: {}ms".format(old_graph.run_time())) #taso_tensor_input = old_graph.new_input_with_value(dims=(1, 3, 256, 256)) #numpy_input = np.random.randn(1, 3, 256, 256).astype('f') old_graph.build_graph() # warm up for _, data in enumerate(test_input): old_graph.taso_forward(data, ex_out_size) # real run time_sum = 0 for _, data in enumerate(test_input): start = time.time() old_graph.taso_forward(data, ex_out_size) time_sum += (time.time() - start) print("cuDNN runtime inference time before taso: {}sec".format( time_sum / len(test_input)))