def record_common(dag, s):
    target = tvm.target.Target("llvm")
    task = auto_scheduler.SearchTask(compute_dag=dag,
                                     workload_key="test",
                                     target=target)

    inp = auto_scheduler.measure.MeasureInput(task, s)
    res = auto_scheduler.measure.MeasureResult([0.1], 0, "", 0.2, 1)

    # Test in-memory record processing.
    record_str = auto_scheduler.measure_record.dump_record_to_string(inp, res)
    r_inp, r_res = auto_scheduler.measure_record.load_record_from_string(
        record_str)
    # Only check the workload_key for simplification.
    assert inp.task.workload_key == r_inp.task.workload_key
    assert str(res) == str(r_res)

    # Test file-based record processing.
    with tempfile.NamedTemporaryFile() as fp:
        auto_scheduler.save_records(fp.name, [inp], [res])

        log_reader = auto_scheduler.RecordReader(fp.name)
        inputs, _ = log_reader.read_lines()
        assert len(inputs) == 1

        s1 = dag.infer_bound_from_state(s)
        s2 = dag.infer_bound_from_state(inputs[0].state)

        assert s1 == s2
        assert not (s1 == dag.get_init_state())
示例#2
0
def test_record():
    if not tvm.runtime.enabled("llvm"):
        return

    A = te.placeholder((512, 512), name='A')
    B = te.placeholder((512, 512), name='B')
    k = te.reduce_axis((0, 512), name='k')
    C = te.compute((512, 512), lambda i, j: te.sum(A[i][k] * B[k][j], axis=[k]), name='C')
    D = topi.nn.relu(C)
    k = te.reduce_axis((0, 512), name='k')
    E = te.compute((512, 512), lambda i, j: te.sum(A[i][k] * D[k][j], axis=[k]), name='C')
    F = topi.nn.relu(E)

    dag = auto_scheduler.ComputeDAG([A, B, F])
    s = dag.get_init_state()

    # Split
    its0 = s.split(C, s[C].iters[0], [4, 8, 8])
    its1 = s.split(C, s[C].iters[4], [8, 4, 4])
    # Reorder
    s.reorder(C, [its0[0], its1[0], its0[1], its1[1], its0[2], its1[2], its0[3], s[C].iters[8],
                  its1[3]])
    # Fuse
    s.fuse(C, [s[C].iters[0], s[C].iters[1], s[C].iters[2]])
    # Compute at
    s.split(F, s[F].iters[0], [2])
    s.compute_at(E, F, s[F].iters[0])
    # Compute inline
    s.compute_inline(D)
    # Compute root
    s.compute_root(D)
    # Parallel
    s.parallel(C, s[C].iters[0])
    # Thread bind(The blockIdx & threadIdx are used in GPU, just for record testing here)
    s.bind(C, s[C].iters[1], "blockIdx.x")
    s.bind(C, s[C].iters[2], "threadIdx.z")
    s.bind(C, s[C].iters[3], "vthread")
    # Unroll
    s.unroll(C, s[C].iters[4])
    # Vectorize
    s.vectorize(C, s[C].iters[6])

    target = tvm.target.create("llvm")
    task = auto_scheduler.SearchTask(dag, "test", target)

    inp = auto_scheduler.measure.MeasureInput(task, s)
    res = auto_scheduler.measure.MeasureResult([0.1], 0, "", 0.2, 1)

    with tempfile.NamedTemporaryFile() as fp:
        auto_scheduler.save_records(fp.name, [inp], [res])

        log_reader = auto_scheduler.RecordReader(fp.name)
        inputs, results = log_reader.read_lines()
        assert len(inputs) == 1

        s1 = dag.infer_bound_from_state(s)
        s2 = dag.infer_bound_from_state(inputs[0].state)

        assert s1 == s2
        assert not (s1 == dag.get_init_state())
示例#3
0
def test_xgb_model():
    task, inputs, results = get_sample_records(50)

    model = auto_scheduler.XGBModel(num_warmup_sample=-1)
    model.update(inputs, results)
    preds = model.predict(task, [x.state for x in inputs])
    assert len(preds) == len(inputs)

    costs = [np.mean([x.value for x in res.costs]) for res in results]
    throughputs = np.min(costs) / costs

    # test regression quality
    rmse = np.sqrt(np.mean([np.square(pred - label) for pred, label in zip(preds, throughputs)]))
    assert rmse <= 0.3

    # test loading a record file
    tmpdir = tvm.contrib.utils.tempdir()
    tmpfile = tmpdir.relpath("test1")
    auto_scheduler.save_records(tmpfile, inputs, results)
    model.update_from_file(tmpfile)

    # test model serialization
    tmpfile = tmpdir.relpath("test2")
    model.save(tmpfile)
    model.load(tmpfile)
示例#4
0
def update_file(log_file, tasks):
    new_log_file = log_file

    def get_old_hash_key(dag):
        """Return the hash key of a compute DAG."""
        str_key = ""
        for op in dag.ops:
            t = op.output(0)
            if isinstance(op, PlaceholderOp):
                str_key += "placeholder,"
                str_key += str(get_const_tuple(t.shape)) + ","
                str_key += t.dtype + ";"
            elif isinstance(op, ComputeOp):
                str_key += str(t.op.body) + ","
                str_key += str(get_const_tuple(t.shape)) + ","
                str_key += t.dtype + ";"
            else:
                raise ValueError("Invalid op: " + op)

        str_key = str_key.encode(encoding="utf-8")
        return hashlib.md5(str_key).hexdigest()

    # Establish the key mapping
    old_key_to_task = {}
    hit_count = {}
    for idx, task in enumerate(tasks):
        old_key = json.dumps((get_old_hash_key(task.compute_dag), ))
        old_key_to_task[old_key] = task
        hit_count[old_key] = 0
        print("Task %d %s -> %s" % (idx, old_key, task.workload_key))

    # Update the workload key in an existing log file
    new_inputs = []
    new_results = []
    for inp, res in load_records(log_file):
        if inp.task.workload_key not in old_key_to_task:
            print(
                "Ignore key %s in log file due to no corresponding task found"
                % inp.task.workload_key)
            continue
        hit_count[inp.task.workload_key] += 1
        new_inputs.append(
            MeasureInput(old_key_to_task[inp.task.workload_key], inp.state))
        new_results.append(res)

    for key, cnt in hit_count.items():
        print("Old key %s hits %d times" % (key, cnt))

    if os.path.exists(new_log_file):
        os.remove(new_log_file)
    save_records(new_log_file, new_inputs, new_results)
def record_common(dag, s):
    target = tvm.target.create("llvm")
    task = auto_scheduler.SearchTask(dag, "test", target)

    inp = auto_scheduler.measure.MeasureInput(task, s)
    res = auto_scheduler.measure.MeasureResult([0.1], 0, "", 0.2, 1)

    with tempfile.NamedTemporaryFile() as fp:
        auto_scheduler.save_records(fp.name, [inp], [res])

        log_reader = auto_scheduler.RecordReader(fp.name)
        inputs, results = log_reader.read_lines()
        assert len(inputs) == 1

        s1 = dag.infer_bound_from_state(s)
        s2 = dag.infer_bound_from_state(inputs[0].state)

        assert s1 == s2
        assert not (s1 == dag.get_init_state())
def test_xgb_model():
    task, dag, inputs, results = get_sample_records(50)

    model = auto_scheduler.XGBModel(num_warmup_sample=-1)
    model.update(inputs, results)
    preds = model.predict(task, [x.state for x in inputs])
    assert len(preds) == len(inputs)

    costs = [np.mean([x.value for x in res.costs]) for res in results]
    throughputs = np.min(costs) / costs

    rmse = np.sqrt(np.mean([np.square(pred - label) for pred, label in zip(preds, throughputs)]))
    assert rmse <= 0.3

    with tempfile.NamedTemporaryFile() as fp:
        auto_scheduler.save_records(fp.name, inputs, results)
        model.update_from_file(fp.name)

    with tempfile.NamedTemporaryFile() as fp:
        model.save(fp.name)
        model.load(fp.name)
def test_measure_target_host():
    task = auto_scheduler.SearchTask(
        func=matmul_auto_scheduler_test,
        args=(512, 512, 512),
        target=tvm.target.Target("llvm", "llvm -mtriple=aarch64-linux-gnu"),
    )

    inp = auto_scheduler.measure.MeasureInput(task,
                                              task.compute_dag.init_state)
    res = auto_scheduler.measure.MeasureResult([0.1], 0, "", 0.2, 1)

    with tempfile.NamedTemporaryFile() as fp:
        auto_scheduler.save_records(fp.name, [inp], [res])

        log_reader = auto_scheduler.RecordReader(fp.name)
        inputs, _ = log_reader.read_lines()
        assert len(inputs) == 1

        raw_inp = inputs[0]

        recovered_inp = auto_scheduler.measure.recover_measure_input(raw_inp)
        assert str(recovered_inp.task.target.host) == str(inp.task.target.host)
示例#8
0
def test_recover_measure_input():
    task = auto_scheduler.create_task(matmul_auto_scheduler_test, [512, 512, 512], "llvm")

    inp = auto_scheduler.measure.MeasureInput(task, task.compute_dag.init_state)
    res = auto_scheduler.measure.MeasureResult([0.1], 0, "", 0.2, 1)

    with tempfile.NamedTemporaryFile() as fp:
        auto_scheduler.save_records(fp.name, [inp], [res])

        log_reader = auto_scheduler.RecordReader(fp.name)
        inputs, results = log_reader.read_lines()
        assert len(inputs) == 1

        raw_inp = inputs[0]

        correct_inp = auto_scheduler.measure_record.recover_measure_input(raw_inp)
        assert str(correct_inp.task.compute_dag) == str(inp.task.compute_dag)

        correct_inp = auto_scheduler.measure_record.recover_measure_input(
            raw_inp, rebuild_state=True
        )
        assert str(correct_inp.state) == str(inp.state)
def test_record():
    if not tvm.runtime.enabled("llvm"):
        return

    A = te.placeholder((512, 512), name='A')
    B = te.placeholder((512, 512), name='B')
    k = te.reduce_axis((0, 512), name='k')
    C = te.compute((512, 512),
                   lambda i, j: te.sum(A[i][k] * B[k][j], axis=[k]),
                   name='C')
    D = topi.nn.relu(C)
    k = te.reduce_axis((0, 512), name='k')
    E = te.compute((512, 512),
                   lambda i, j: te.sum(A[i][k] * D[k][j], axis=[k]),
                   name='E')
    F = topi.nn.relu(E)
    k = te.reduce_axis((0, 512), name='k')
    G = te.compute((512, 512),
                   lambda i, j: te.sum(A[i][k] * F[k][j], axis=[k]),
                   name='G')
    H = topi.nn.relu(G)
    I = topi.nn.relu(H)

    dag = auto_scheduler.ComputeDAG([A, B, I])
    s = dag.get_init_state()

    # Split
    its0 = s.split(C, s[C].iters[0], [4, 8, 8])
    its1 = s.split(C, s[C].iters[4], [8, 4, 4])
    # Reorder
    s.reorder(C, [
        its0[0], its1[0], its0[1], its1[1], its0[2], its1[2], its0[3],
        s[C].iters[8], its1[3]
    ])
    # Fuse
    s.fuse(C, [s[C].iters[0], s[C].iters[1], s[C].iters[2]])
    # Compute at
    s.split(F, s[F].iters[0], [2])
    s.compute_at(E, F, s[F].iters[0])
    # Compute inline
    s.compute_inline(D)
    # Compute root
    s.compute_root(D)
    # Parallel
    s.parallel(C, s[C].iters[0])
    # Thread bind(The blockIdx & threadIdx are used in GPU, just for record testing here)
    s.bind(C, s[C].iters[1], "blockIdx.x")
    s.bind(C, s[C].iters[2], "threadIdx.z")
    s.bind(C, s[C].iters[3], "vthread")
    # Unroll
    s.unroll(C, s[C].iters[4])
    # Vectorize
    s.vectorize(C, s[C].iters[6])
    # Cache Read
    D_global = s.cache_read(D, "global", [E])
    s.compute_at(D_global, E, s[E].iters[2])
    # Cache Write
    s.cache_write(D, "shared")
    #follow_split
    its2 = s.split(G, s[G].iters[0], [4, 2, 8, 4], True)
    split_step0 = len(s.transform_steps) - 1
    s.follow_split(G, s[G].iters[5], split_step0, 4)
    #follow_fused_split
    its2 = s.split(H, s[H].iters[0], [4, 2, 8, 4], True)
    split_step1 = len(s.transform_steps) - 1
    its3 = s.split(H, s[H].iters[5], [2, 4, 2, 4], True)
    split_step2 = len(s.transform_steps) - 1
    its = []
    for i0, i1 in zip(its2, its3):
        its.append(i0)
        its.append(i1)
    for i in range(0, 5):
        s.fuse(H, [s[H].iters[i], s[H].iters[i + 1]])
    s.follow_fused_split(I, s[I].iters[0], [split_step1, split_step2], 0,
                         False)

    target = tvm.target.create("llvm")
    task = auto_scheduler.SearchTask(dag, "test", target)

    inp = auto_scheduler.measure.MeasureInput(task, s)
    res = auto_scheduler.measure.MeasureResult([0.1], 0, "", 0.2, 1)

    with tempfile.NamedTemporaryFile() as fp:
        auto_scheduler.save_records(fp.name, [inp], [res])

        log_reader = auto_scheduler.RecordReader(fp.name)
        inputs, results = log_reader.read_lines()
        assert len(inputs) == 1

        s1 = dag.infer_bound_from_state(s)
        s2 = dag.infer_bound_from_state(inputs[0].state)

        assert s1 == s2
        assert not (s1 == dag.get_init_state())