Exemplo n.º 1
0
    for _, data in enumerate(test_input):
        torch_sess.run([label_name], {input_name: data})
    # real run
    time_sum = 0
    for _, data in enumerate(test_input):
        start = time.time()
        # torch_output = torch_sess.run([label_name], {input_name: data}) # d
        torch_sess.run([label_name], {input_name: data})
        # print("torch_output:\n{%.6f}".format(torch_output)) # d
        time_sum += (time.time() - start)
    print("ONNX runtime inference time before taso: {}sec".format(time_sum / len(test_input)))
    f.write("ONNX runtime inference time before taso: {}sec\n\n".format(time_sum / len(test_input)))


    print("taso.load_onnx()")
    old_graph = taso.load_onnx("./onnx_models/inception_v3.onnx")
    #print("[before opt] taso runtime performance: {}ms".format(old_graph.run_time()))
    #taso_tensor_input = old_graph.new_input_with_value(dims=(1, 3, 299, 299))
    #numpy_input = np.random.randn(1, 3, 299, 299).astype('f')
    old_graph.build_graph()
    # warm up
    for _, data in enumerate(test_input):
        old_graph.taso_forward(data)
    # real run
    time_sum = 0
    for _, data in enumerate(test_input):
        start = time.time()
        old_graph.taso_forward(data)
        time_sum += (time.time() - start)
    print("cuDNN runtime inference time before taso: {}sec".format(time_sum / len(test_input)))
    f.write("cuDNN runtime inference time before taso: {}sec\n\n".format(time_sum / len(test_input)))
Exemplo n.º 2
0
        torch_sess.run([label_name], {input_name: data})
    # real run
    time_sum = 0
    for _, data in enumerate(test_input):
        start = time.time()
        # torch_output = torch_sess.run([label_name], {input_name: data}) # d
        torch_sess.run([label_name], {input_name: data})
        # print("torch_output:\n{%.6f}".format(torch_output)) # d
        time_sum += (time.time() - start)
    print("ONNX runtime inference time before taso: {}sec".format(
        time_sum / len(test_input)))
    f.write("ONNX runtime inference time before taso: {}sec\n\n".format(
        time_sum / len(test_input)))

    print("taso.load_onnx()")
    old_graph = taso.load_onnx("./onnx_models/resnext50.onnx")
    #print("[before opt] taso runtime performance: {}ms".format(old_graph.run_time()))
    #taso_tensor_input = old_graph.new_input_with_value(dims=(1, 3, 224, 224))
    #numpy_input = np.random.randn(1, 3, 224, 224).astype('f')
    old_graph.build_graph()
    # warm up
    for _, data in enumerate(test_input):
        old_graph.taso_forward(data, ex_out_size)
    # real run
    time_sum = 0
    for _, data in enumerate(test_input):
        start = time.time()
        old_graph.taso_forward(data, ex_out_size)
        time_sum += (time.time() - start)
    print("cuDNN runtime inference time before taso: {}sec".format(
        time_sum / len(test_input)))
Exemplo n.º 3
0
import taso
import onnx
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("-f", "--file", help="Path to input ONNX file", required=True)

args = parser.parse_args()

#graph = taso.load_onnx("/home/ubuntu/taso/onnx/squeezenet1.1.onnx")
#graph = taso.load_onnx("/home/ubuntu/taso/onnx/bertsquad10.onnx")
graph = taso.load_onnx(args.file)
#graph = xflow.load("/home/ubuntu/resnext-101.onnx") 
#graph = xflow.load("/home/ubuntu/ONNXModel/inception_v2/model.onnx")
new_graph = taso.optimize(graph, alpha = 1.0, budget = 100, print_subst = True)
onnx_model = taso.export_onnx(new_graph)
onnx.checker.check_model(onnx_model)
onnx.save(onnx_model, "{}.taso.onnx".format(args.file))
Exemplo n.º 4
0
        self.relu = nn.ReLU()

    def forward(self,X):
        x = self.conv1(X)
        x = self.conv1(x)
        x = self.relu(x)

        y = self.conv2(X)
        y = self.conv2(y)
        y = self.relu(y)

        x = x+y
        x = x+1
        x = x+3
        # x = x * 2
        # x = x * 0.5

        return x

model = SampleModel()
x = torch.randn(1, 3, 24, 24, device='cpu')
torch.onnx.export(model,
    x,
    "model.onnx",
    verbose=False,)
graph = taso.load_onnx("./model.onnx")
print("\n cost = {}".format(graph.cost()))
new_graph = taso.optimize(graph, alpha = 1.0, budget = 1000, print_subst=True)
print("\n optimized_cost = {}".format(new_graph.cost()))
new_model = taso.export_onnx(new_graph)
onnx.save(new_model, "./model_taso.onnx")
Exemplo n.º 5
0
    for _, data in enumerate(test_input):
        torch_sess.run([label_name], {input_name: data})
    # real run
    time_sum = 0
    for _, data in enumerate(test_input):
        start = time.time()
        # torch_output = torch_sess.run([label_name], {input_name: data}) # d
        torch_sess.run([label_name], {input_name: data})
        # print("torch_output:\n{%.6f}".format(torch_output)) # d
        time_sum += (time.time() - start)
    print("ONNX runtime inference time before taso: {}sec".format(time_sum / len(test_input)))
    f.write("ONNX runtime inference time before taso: {}sec\n\n".format(time_sum / len(test_input)))


    print("taso.load_onnx()")
    old_graph = taso.load_onnx("./onnx_models/vgg19.onnx")
    #print("[before opt] taso runtime performance: {}ms".format(old_graph.run_time()))
    #taso_tensor_input = old_graph.new_input_with_value(dims=(1, 3, 224, 224))
    #numpy_input = np.random.randn(1, 3, 224, 224).astype('f')
    old_graph.build_graph()
    # warm up
    for _, data in enumerate(test_input):
        res1 = old_graph.taso_forward(data, ex_out_size)
    # real run
    time_sum = 0
    for _, data in enumerate(test_input):
        start = time.time()
        res1 = old_graph.taso_forward(data, ex_out_size)
        time_sum += (time.time() - start)
    print("cuDNN runtime inference time before taso: {}sec".format(time_sum / len(test_input)))
    f.write("cuDNN runtime inference time before taso: {}sec\n\n".format(time_sum / len(test_input)))
Exemplo n.º 6
0
import taso
import onnx

# 1. evaluate the performance by just considering substitution optimizations
print(
    "Measuring the performance of graph substitution optimizations (average of 1000 runs)"
)
graph = taso.load_onnx('bert_graphs/bert_subst_nw.onnx')
print("TASO: end-to-end inference time = {}ms".format(graph.run_time()))
print()

#2. evaluate the performance by just performing data layout optimizations
print("Measuring the performance of data layout optimizations")
graph = taso.load_onnx('bert_graphs/bert_layout_nw.onnx')
print("TASO: end-to-end inference time = {}ms".format(graph.run_time()))
print()

#3. evaluate the performance by sequential optimizations
print("Measuring the performance of sequential optimizations")
graph = taso.load_onnx('bert_graphs/bert_sequential_nw.onnx')
print("TASO: end-to-end inference time = {}ms".format(graph.run_time()))
print()

#4. evaluate the performance by joint optimizations
print("Measuring the performance of joint optimizations")
graph = taso.load_onnx('bert_graphs/bert_xflow_nw.onnx')
print("TASO: end-to-end inference time = {}ms".format(graph.run_time()))
print()
Exemplo n.º 7
0
import taso
import onnx

graph = taso.load_onnx('onnx_models/resnext50.onnx')
print("graph.run_time(): {}ms".format(graph.run_time()))
print("graph.run_forward(): {}ms".format(graph.run_forward()))

graph = taso.load_onnx('onnx_models/resnext50_taso.onnx')
print("graph.run_time(): {}ms".format(graph.run_time()))
print("graph.run_forward(): {}ms".format(graph.run_forward()))
Exemplo n.º 8
0
        torch_sess.run([label_name], {input_name: data})
    # real run
    time_sum = 0
    for _, data in enumerate(test_input):
        start = time.time()
        # torch_output = torch_sess.run([label_name], {input_name: data}) # d
        torch_sess.run([label_name], {input_name: data})
        # print("torch_output:\n{%.6f}".format(torch_output)) # d
        time_sum += (time.time() - start)
    print("ONNX runtime inference time before taso: {}sec".format(
        time_sum / len(test_input)))
    f.write("ONNX runtime inference time before taso: {}sec\n\n".format(
        time_sum / len(test_input)))

    print("taso.load_onnx()")
    old_graph = taso.load_onnx("./onnx_models/alexnet.onnx")
    #print("[before opt] taso runtime performance: {}ms".format(old_graph.run_time()))
    #taso_tensor_input = old_graph.new_input_with_value(dims=(1, 3, 256, 256))
    #numpy_input = np.random.randn(1, 3, 256, 256).astype('f')
    old_graph.build_graph()
    # warm up
    for _, data in enumerate(test_input):
        old_graph.taso_forward(data, ex_out_size)
    # real run
    time_sum = 0
    for _, data in enumerate(test_input):
        start = time.time()
        old_graph.taso_forward(data, ex_out_size)
        time_sum += (time.time() - start)
    print("cuDNN runtime inference time before taso: {}sec".format(
        time_sum / len(test_input)))