示例#1
0
文件: enas.py 项目: zeta1999/TASO
def main(args):
    graph = ts.new_graph()
    input_size = tuple([int(x) for x in args.input_size.split('x')])
    input = graph.new_input(dims=input_size)
    all_w = create_layer_weights(graph, NUM_LAYERS, args.channels)
    all_arcs = parse_arcs(args.input_file)
    if args.num_models is not None:
        all_arcs = all_arcs[:args.num_models]

    # stem conv
    t = graph.conv2d(input=input,
                     weight=graph.new_weight(dims=(args.channels, input.dim(1),
                                                   1, 1)),
                     strides=(1, 1),
                     padding="SAME",
                     activation="RELU")

    for arc in all_arcs:
        create_architecture(arc, graph, t, all_w)

    if args.save_models:
        onnx_model = ts.export_onnx(graph)
        onnx.save(onnx_model, 'original_model.onnx')

    new_graph = ts.optimize(graph, alpha=1.0, budget=1000)
    if args.save_models:
        onnx_model = ts.export_onnx(new_graph)
        onnx.save(onnx_model, 'optimized_model.onnx')
示例#2
0
def main(args):
    graph = ts.new_graph()
    input_size = tuple([int(x) for x in args.input_size.split('x')])
    input = graph.new_input(dims=input_size)
    shared_resnet_model(graph, input, args.num_models, args.num_shared_blocks)
    if args.save_graphs:
        original_model = ts.export_onnx(graph)
        onnx.save(original_model, 'original_model.onnx')

    new_graph = ts.optimize(graph, alpha=1.0, budget=1000)
    if args.save_graphs:
        optimized_model = ts.export_onnx(new_graph)
        onnx.save(optimized_model, 'optimized_model.onnx')
示例#3
0
    for i in range(3):
        t = resnext_block(graph, t, (1, 1), 128, 32)
    strides = (2, 2)
    for i in range(4):
        t = resnext_block(graph, t, strides, 256, 32)
        strides = (1, 1)
    strides = (2, 2)
    for i in range(6):
        t = resnext_block(graph, t, strides, 512, 32)
        strides = (1, 1)
    strides = (2, 2)
    for i in range(3):
        t = resnext_block(graph, t, strides, 1024, 32)
        strides = (1, 1)

    unoptimized_model = ts.export_onnx(graph)
    debug_dir = None
    if args.debug_dir is not None:
        debug_dir = args.debug_dir.resolve()
        debug_dir.mkdir(parents=True)
    if debug_dir is not None:
        graph.export_to_file(str(debug_dir / "unoptimized.txt").encode())
    if args.export:
        onnx.checker.check_model(unoptimized_model)
        onnx.save(
            unoptimized_model,
            str(args.output_dir / f"resnext50_{batch_size}_unoptimized.onnx"))
    _optimized_model = ts.optimize(graph,
                                   alpha=args.alpha,
                                   budget=args.budget,
                                   print_subst=args.print_subst)
示例#4
0
    #taso_tensor_input = new_graph.new_input_with_value(dims=(1, 3, 299, 299))
    new_graph.build_graph()
    # warm up
    for _, data in enumerate(test_input):
        new_graph.taso_forward(data)
    # real run
    time_sum = 0
    for _, data in enumerate(test_input):
        start = time.time()
        new_graph.taso_forward(data)
        time_sum += (time.time() - start)
    print("cuDNN runtime inference time after taso optimization: {}sec".format(time_sum / len(test_input)))
    f.write("cuDNN runtime inference time after taso optimization: {}sec\n\n".format(time_sum / len(test_input)))

    print("taso.export_onnx()")
    new_model = taso.export_onnx(new_graph)
    onnx.save(new_model, "./onnx_models/inception_v3.onnx")
    print("onnx.load()")
    taso_model = onnx.load("./onnx_models/inception_v3.onnx")
    print("TASO model graph:\n{}".format(onnx.helper.printable_graph(taso_model.graph)))
    print("##### INFERENCE with onnxruntime (after TASO) #####")
    sess = rt.InferenceSession("./onnx_models/inception_v3.onnx")
    input_name = sess.get_inputs()[0].name
    label_name = sess.get_outputs()[0].name
    # warm up
    for _, data in enumerate(test_input):
        sess.run([label_name], {input_name: data})
    # real run
    time_sum = 0
    for _, data in enumerate(test_input):
        start = time.time()
示例#5
0
    # transpose the output back
    output = graph.transpose(output,perm=(1,0,2), shuffle=True)
    output = graph.reshape(output, shape=(64, 1024))

    # a final linear layer
    linear = graph.new_weight(dims=(d_model, d_model))
    output = graph.matmul(input, linear)
    return output

if __name__ == '__main__':
    test_input = list()
    for i in range(500):
        test_input.append(np.random.randn(seq_length, hidden_dims))

    graph = ts.new_graph()
    input = graph.new_input(dims=(seq_length, hidden_dims))
    input = graph.relu(input)
    t = input
    for i in range(12):
        t = attention(graph, t, 16)

    print(t)

    before_model = ts.export_onnx(graph)
    onnx.save(before_model, "./onnx_models/bert.onnx")

    new_graph = ts.optimize(graph, alpha=1.05, budget=100)

    after_model = ts.export_onnx(new_graph)
    onnx.save(after_model, "./onnx_models/bert_taso.onnx")
示例#6
0
    ts.append(graph.maxpool2d(input=cur, kernels=(3,3), strides=(2,2), padding="SAME"))
    ts.append(seperable_conv(graph, input=outputs[0], out_channels=out_channels,
              kernels=(3,3), strides=(1,1), padding="SAME"))
    outputs.append(graph.add(ts[6], ts[7]))
    ts.append(graph.avgpool2d(input=outputs[0], kernels=(3,3), strides=(1,1), padding="SAME"))
    ts.append(outputs[1])
    outputs.append(graph.add(ts[8], ts[9]))
    return graph.concat(1, outputs)

graph = ts.new_graph()
input = graph.new_input(dims=(1,3,224,224))
weight = graph.new_weight(dims=(64,3,7,7))
input = graph.conv2d(input=input, weight=weight, strides=(2,2),
                 padding="SAME", activation="RELU")
input = graph.maxpool2d(input=input, kernels=(3,3), strides=(2,2), padding="SAME")

out_channels = 128
for i in range(3):
    prev = input
    cur = input
    for j in range(5):
        t = normal_cell(graph, prev, cur, out_channels)
        prev = cur
        cur = t
    out_channels *= 2
    input = reduction_cell(graph, prev, cur, out_channels)
new_graph = ts.optimize(graph, alpha=1.0, budget=-1)
onnx_model = ts.export_onnx(new_graph)
onnx.checker.check_model(onnx_model)
onnx.save(onnx_model, "nasneta_taso.onnx")
示例#7
0
for i in range(3):
    t = resnext_block(graph, t, (1, 1), 128, 32)
strides = (2, 2)
for i in range(4):
    t = resnext_block(graph, t, strides, 256, 32)
    strides = (1, 1)
strides = (2, 2)
for i in range(6):
    t = resnext_block(graph, t, strides, 512, 32)
    strides = (1, 1)
strides = (2, 2)
for i in range(3):
    t = resnext_block(graph, t, strides, 1024, 32)
    strides = (1, 1)

before_model = ts.export_onnx(graph)
onnx.save(before_model, "./onnx_models/resnext50.onnx")

print("##### INFERENCE (before TASO) #####")
sess1 = rt.InferenceSession("./onnx_models/resnext50.onnx")
input_name = sess1.get_inputs()[0].name
label_name = sess1.get_outputs()[0].name

time_sum = 0
for _, data in enumerate(test_input):
    start = time.time()
    output1 = sess1.run([label_name], {input_name: data})
    #print("torch_output:\n{}".format(torch_output))
    time_sum += (time.time() - start)

print("inference time before taso: {}s".format(time_sum / len(test_input)))
示例#8
0
文件: resnet50.py 项目: uwplse/taso
graph = ts.new_graph()
input = graph.new_input(dims=(1, 64, 56, 56))
t = input
for i in range(3):
    t = resnet_block(graph, t, (1, 1), 64)
strides = (2, 2)
for i in range(4):
    t = resnet_block(graph, t, strides, 128)
    strides = (1, 1)
strides = (2, 2)
for i in range(6):
    t = resnet_block(graph, t, strides, 256)
    strides = (1, 1)
strides = (2, 2)
for i in range(3):
    t = resnet_block(graph, t, strides, 512)
    strides = (1, 1)

onnx_model = ts.export_onnx(graph)
onnx.save(onnx_model, "resnet50_old.onnx")

old_time = graph.run_time()
#onnx.checker.check_model(onnx_model)

new_graph = ts.optimize(graph, alpha=1.0, budget=1000)

new_time = new_graph.run_time()
print("Run time of original graph is: {}".format(old_time))
print("Run time of optimized graph is: {}".format(new_time))