def record_common(dag, s): target = tvm.target.Target("llvm") task = auto_scheduler.SearchTask(compute_dag=dag, workload_key="test", target=target) inp = auto_scheduler.measure.MeasureInput(task, s) res = auto_scheduler.measure.MeasureResult([0.1], 0, "", 0.2, 1) # Test in-memory record processing. record_str = auto_scheduler.measure_record.dump_record_to_string(inp, res) r_inp, r_res = auto_scheduler.measure_record.load_record_from_string( record_str) # Only check the workload_key for simplification. assert inp.task.workload_key == r_inp.task.workload_key assert str(res) == str(r_res) # Test file-based record processing. with tempfile.NamedTemporaryFile() as fp: auto_scheduler.save_records(fp.name, [inp], [res]) log_reader = auto_scheduler.RecordReader(fp.name) inputs, _ = log_reader.read_lines() assert len(inputs) == 1 s1 = dag.infer_bound_from_state(s) s2 = dag.infer_bound_from_state(inputs[0].state) assert s1 == s2 assert not (s1 == dag.get_init_state())
def test_record(): if not tvm.runtime.enabled("llvm"): return A = te.placeholder((512, 512), name='A') B = te.placeholder((512, 512), name='B') k = te.reduce_axis((0, 512), name='k') C = te.compute((512, 512), lambda i, j: te.sum(A[i][k] * B[k][j], axis=[k]), name='C') D = topi.nn.relu(C) k = te.reduce_axis((0, 512), name='k') E = te.compute((512, 512), lambda i, j: te.sum(A[i][k] * D[k][j], axis=[k]), name='C') F = topi.nn.relu(E) dag = auto_scheduler.ComputeDAG([A, B, F]) s = dag.get_init_state() # Split its0 = s.split(C, s[C].iters[0], [4, 8, 8]) its1 = s.split(C, s[C].iters[4], [8, 4, 4]) # Reorder s.reorder(C, [its0[0], its1[0], its0[1], its1[1], its0[2], its1[2], its0[3], s[C].iters[8], its1[3]]) # Fuse s.fuse(C, [s[C].iters[0], s[C].iters[1], s[C].iters[2]]) # Compute at s.split(F, s[F].iters[0], [2]) s.compute_at(E, F, s[F].iters[0]) # Compute inline s.compute_inline(D) # Compute root s.compute_root(D) # Parallel s.parallel(C, s[C].iters[0]) # Thread bind(The blockIdx & threadIdx are used in GPU, just for record testing here) s.bind(C, s[C].iters[1], "blockIdx.x") s.bind(C, s[C].iters[2], "threadIdx.z") s.bind(C, s[C].iters[3], "vthread") # Unroll s.unroll(C, s[C].iters[4]) # Vectorize s.vectorize(C, s[C].iters[6]) target = tvm.target.create("llvm") task = auto_scheduler.SearchTask(dag, "test", target) inp = auto_scheduler.measure.MeasureInput(task, s) res = auto_scheduler.measure.MeasureResult([0.1], 0, "", 0.2, 1) with tempfile.NamedTemporaryFile() as fp: auto_scheduler.save_records(fp.name, [inp], [res]) log_reader = auto_scheduler.RecordReader(fp.name) inputs, results = log_reader.read_lines() assert len(inputs) == 1 s1 = dag.infer_bound_from_state(s) s2 = dag.infer_bound_from_state(inputs[0].state) assert s1 == s2 assert not (s1 == dag.get_init_state())
def test_xgb_model(): task, inputs, results = get_sample_records(50) model = auto_scheduler.XGBModel(num_warmup_sample=-1) model.update(inputs, results) preds = model.predict(task, [x.state for x in inputs]) assert len(preds) == len(inputs) costs = [np.mean([x.value for x in res.costs]) for res in results] throughputs = np.min(costs) / costs # test regression quality rmse = np.sqrt(np.mean([np.square(pred - label) for pred, label in zip(preds, throughputs)])) assert rmse <= 0.3 # test loading a record file tmpdir = tvm.contrib.utils.tempdir() tmpfile = tmpdir.relpath("test1") auto_scheduler.save_records(tmpfile, inputs, results) model.update_from_file(tmpfile) # test model serialization tmpfile = tmpdir.relpath("test2") model.save(tmpfile) model.load(tmpfile)
def update_file(log_file, tasks): new_log_file = log_file def get_old_hash_key(dag): """Return the hash key of a compute DAG.""" str_key = "" for op in dag.ops: t = op.output(0) if isinstance(op, PlaceholderOp): str_key += "placeholder," str_key += str(get_const_tuple(t.shape)) + "," str_key += t.dtype + ";" elif isinstance(op, ComputeOp): str_key += str(t.op.body) + "," str_key += str(get_const_tuple(t.shape)) + "," str_key += t.dtype + ";" else: raise ValueError("Invalid op: " + op) str_key = str_key.encode(encoding="utf-8") return hashlib.md5(str_key).hexdigest() # Establish the key mapping old_key_to_task = {} hit_count = {} for idx, task in enumerate(tasks): old_key = json.dumps((get_old_hash_key(task.compute_dag), )) old_key_to_task[old_key] = task hit_count[old_key] = 0 print("Task %d %s -> %s" % (idx, old_key, task.workload_key)) # Update the workload key in an existing log file new_inputs = [] new_results = [] for inp, res in load_records(log_file): if inp.task.workload_key not in old_key_to_task: print( "Ignore key %s in log file due to no corresponding task found" % inp.task.workload_key) continue hit_count[inp.task.workload_key] += 1 new_inputs.append( MeasureInput(old_key_to_task[inp.task.workload_key], inp.state)) new_results.append(res) for key, cnt in hit_count.items(): print("Old key %s hits %d times" % (key, cnt)) if os.path.exists(new_log_file): os.remove(new_log_file) save_records(new_log_file, new_inputs, new_results)
def record_common(dag, s): target = tvm.target.create("llvm") task = auto_scheduler.SearchTask(dag, "test", target) inp = auto_scheduler.measure.MeasureInput(task, s) res = auto_scheduler.measure.MeasureResult([0.1], 0, "", 0.2, 1) with tempfile.NamedTemporaryFile() as fp: auto_scheduler.save_records(fp.name, [inp], [res]) log_reader = auto_scheduler.RecordReader(fp.name) inputs, results = log_reader.read_lines() assert len(inputs) == 1 s1 = dag.infer_bound_from_state(s) s2 = dag.infer_bound_from_state(inputs[0].state) assert s1 == s2 assert not (s1 == dag.get_init_state())
def test_xgb_model(): task, dag, inputs, results = get_sample_records(50) model = auto_scheduler.XGBModel(num_warmup_sample=-1) model.update(inputs, results) preds = model.predict(task, [x.state for x in inputs]) assert len(preds) == len(inputs) costs = [np.mean([x.value for x in res.costs]) for res in results] throughputs = np.min(costs) / costs rmse = np.sqrt(np.mean([np.square(pred - label) for pred, label in zip(preds, throughputs)])) assert rmse <= 0.3 with tempfile.NamedTemporaryFile() as fp: auto_scheduler.save_records(fp.name, inputs, results) model.update_from_file(fp.name) with tempfile.NamedTemporaryFile() as fp: model.save(fp.name) model.load(fp.name)
def test_measure_target_host(): task = auto_scheduler.SearchTask( func=matmul_auto_scheduler_test, args=(512, 512, 512), target=tvm.target.Target("llvm", "llvm -mtriple=aarch64-linux-gnu"), ) inp = auto_scheduler.measure.MeasureInput(task, task.compute_dag.init_state) res = auto_scheduler.measure.MeasureResult([0.1], 0, "", 0.2, 1) with tempfile.NamedTemporaryFile() as fp: auto_scheduler.save_records(fp.name, [inp], [res]) log_reader = auto_scheduler.RecordReader(fp.name) inputs, _ = log_reader.read_lines() assert len(inputs) == 1 raw_inp = inputs[0] recovered_inp = auto_scheduler.measure.recover_measure_input(raw_inp) assert str(recovered_inp.task.target.host) == str(inp.task.target.host)
def test_recover_measure_input(): task = auto_scheduler.create_task(matmul_auto_scheduler_test, [512, 512, 512], "llvm") inp = auto_scheduler.measure.MeasureInput(task, task.compute_dag.init_state) res = auto_scheduler.measure.MeasureResult([0.1], 0, "", 0.2, 1) with tempfile.NamedTemporaryFile() as fp: auto_scheduler.save_records(fp.name, [inp], [res]) log_reader = auto_scheduler.RecordReader(fp.name) inputs, results = log_reader.read_lines() assert len(inputs) == 1 raw_inp = inputs[0] correct_inp = auto_scheduler.measure_record.recover_measure_input(raw_inp) assert str(correct_inp.task.compute_dag) == str(inp.task.compute_dag) correct_inp = auto_scheduler.measure_record.recover_measure_input( raw_inp, rebuild_state=True ) assert str(correct_inp.state) == str(inp.state)
def test_record(): if not tvm.runtime.enabled("llvm"): return A = te.placeholder((512, 512), name='A') B = te.placeholder((512, 512), name='B') k = te.reduce_axis((0, 512), name='k') C = te.compute((512, 512), lambda i, j: te.sum(A[i][k] * B[k][j], axis=[k]), name='C') D = topi.nn.relu(C) k = te.reduce_axis((0, 512), name='k') E = te.compute((512, 512), lambda i, j: te.sum(A[i][k] * D[k][j], axis=[k]), name='E') F = topi.nn.relu(E) k = te.reduce_axis((0, 512), name='k') G = te.compute((512, 512), lambda i, j: te.sum(A[i][k] * F[k][j], axis=[k]), name='G') H = topi.nn.relu(G) I = topi.nn.relu(H) dag = auto_scheduler.ComputeDAG([A, B, I]) s = dag.get_init_state() # Split its0 = s.split(C, s[C].iters[0], [4, 8, 8]) its1 = s.split(C, s[C].iters[4], [8, 4, 4]) # Reorder s.reorder(C, [ its0[0], its1[0], its0[1], its1[1], its0[2], its1[2], its0[3], s[C].iters[8], its1[3] ]) # Fuse s.fuse(C, [s[C].iters[0], s[C].iters[1], s[C].iters[2]]) # Compute at s.split(F, s[F].iters[0], [2]) s.compute_at(E, F, s[F].iters[0]) # Compute inline s.compute_inline(D) # Compute root s.compute_root(D) # Parallel s.parallel(C, s[C].iters[0]) # Thread bind(The blockIdx & threadIdx are used in GPU, just for record testing here) s.bind(C, s[C].iters[1], "blockIdx.x") s.bind(C, s[C].iters[2], "threadIdx.z") s.bind(C, s[C].iters[3], "vthread") # Unroll s.unroll(C, s[C].iters[4]) # Vectorize s.vectorize(C, s[C].iters[6]) # Cache Read D_global = s.cache_read(D, "global", [E]) s.compute_at(D_global, E, s[E].iters[2]) # Cache Write s.cache_write(D, "shared") #follow_split its2 = s.split(G, s[G].iters[0], [4, 2, 8, 4], True) split_step0 = len(s.transform_steps) - 1 s.follow_split(G, s[G].iters[5], split_step0, 4) #follow_fused_split its2 = s.split(H, s[H].iters[0], [4, 2, 8, 4], True) split_step1 = len(s.transform_steps) - 1 its3 = s.split(H, s[H].iters[5], [2, 4, 2, 4], True) split_step2 = len(s.transform_steps) - 1 its = [] for i0, i1 in zip(its2, its3): its.append(i0) its.append(i1) for i in range(0, 5): s.fuse(H, [s[H].iters[i], s[H].iters[i + 1]]) s.follow_fused_split(I, s[I].iters[0], [split_step1, split_step2], 0, False) target = tvm.target.create("llvm") task = auto_scheduler.SearchTask(dag, "test", target) inp = auto_scheduler.measure.MeasureInput(task, s) res = auto_scheduler.measure.MeasureResult([0.1], 0, "", 0.2, 1) with tempfile.NamedTemporaryFile() as fp: auto_scheduler.save_records(fp.name, [inp], [res]) log_reader = auto_scheduler.RecordReader(fp.name) inputs, results = log_reader.read_lines() assert len(inputs) == 1 s1 = dag.infer_bound_from_state(s) s2 = dag.infer_bound_from_state(inputs[0].state) assert s1 == s2 assert not (s1 == dag.get_init_state())