Пример #1
0
def parse_one_log(best_config_log, new_log_dir):
    target_wkl = None
    for inp, res in load_from_file(best_config_log):
        # Update the target string to generate the right SHA2 hash code.
        if target_wkl is None:
            inp.task.target = inp.target
            target_wkl = Workload.from_task(inp.task)
            target_wkl['target'] = log_target
            if target_wkl not in wkls:
                new_log_file_name = gen_log_file_name_from_workload(target_wkl)
                new_log_path = '{0}/{1}'.format(new_log_dir, new_log_file_name)
                wkls[target_wkl] = (new_log_path, [])

        if res.error_no != 0:
            continue

        # Only focus on the best N configs.
        new_inp = MeasureInput(target=log_target,
                               task=inp.task,
                               config=inp.config)
        if len(wkls[target_wkl][1]) < top_n_cfgs:
            heapq.heappush(wkls[target_wkl][1], (-np.mean(res.costs), new_inp))
        elif np.mean(res.costs) < -wkls[target_wkl][1][0][0]:
            heapq.heappop(wkls[target_wkl][1])
            heapq.heappush(wkls[target_wkl][1], (-np.mean(res.costs), new_inp))
Пример #2
0
def batch_loader(log_file, target, batch_size=8):
    """Batch loading measure inputs."""
    tvm_target = tvm.target.create(target)
    batch = []
    for inp, _ in load_from_file(log_file):
        # FIXME (comaniac): If we apply different target (e.g., llvm to cuda) then
        # the task might be missing.
        inp.task.target = tvm_target
        new_inp = MeasureInput(tvm_target, inp.task, inp.config)
        batch.append(new_inp)
        if len(batch) == batch_size:
            yield batch
            batch = []
    yield batch
Пример #3
0
    def _fetch_cfg(self):
        """Read and pre-process input schedules."""
        if isinstance(self._records, str):
            records = load_from_file(self._records)
        else:
            records = self._records
        cfg_dict = {}
        for record in records:
            in_measure, _ = record
            workload = in_measure.task.workload
            if workload not in cfg_dict:
                cfg_dict[workload] = []
            cfg_dict[workload].append(record)

        cache_dict = {}
        for key in self._in_nodes_dict:
            node_entry = self._node_list[key]
            if node_entry["op"] not in self._target_ops:
                continue
            workload = node_entry["workloads"][0]
            if workload in cache_dict:
                node_entry["record_candidates"] = cache_dict[workload]
                continue
            record_candidates = []
            infer_layout_func = OP2LAYOUT[node_entry["topi_op"][0]]
            layout_tracking_dict = {}
            for record in cfg_dict[workload]:
                in_measure, out_measure = record
                workload = in_measure.task.workload
                cfg = in_measure.config
                # For multiple cfgs which produces the same in/out layouts,
                # only the most efficient one is preserved.
                with self._target:
                    layouts = infer_layout_func(workload, cfg)
                    if layouts in layout_tracking_dict:
                        cost = out_measure.costs[0]
                        current_best_cost = layout_tracking_dict[layouts][
                            1].costs[0]
                        if cost < current_best_cost:
                            layout_tracking_dict[layouts] = record
                    else:
                        layout_tracking_dict[layouts] = record
            sorted_records = sorted(layout_tracking_dict.values(),
                                    key=lambda item: item[1].costs[0])
            for i in range(min(self._max_sch_num, len(sorted_records))):
                record_candidates.append(sorted_records[i])
            node_entry["record_candidates"] = record_candidates
            cache_dict[workload] = record_candidates
Пример #4
0
def extract_feature_from_file(log_file: str, out_path: str):
    """Parse a log file and extract featues to the output file"""
    data: Dict[Tuple[str, str, str], List[str]] = {}

    cnt = 0
    for inp, res in load_from_file(log_file):
        cnt += 1
        key = (gen_key_str(inp), gen_target_str(inp), gen_file_str(inp))
        if key not in data:
            data[key] = []

        try:
            features = extract_feature(inp)
        except Exception as err:  # pylint: disable=broad-except
            return str(err)

        # Compute GFLOP/s
        task = create(inp.task.name, inp.task.args, inp.target)
        if res.error_no == 0:
            features['thrpt'] = np.around(task.flop / 1e9 / np.mean(res.costs),
                                          2).tolist()
        else:
            features['thrpt'] = 0

        data[key].append(json.dumps(features))

    for (_, target_key, file_key), feats in data.items():
        if not os.path.exists(os.path.join(out_path, target_key)):
            os.mkdir(os.path.join(out_path, target_key))
        out_file = '{0}/{1}/{2}.json'.format(out_path, target_key, file_key)
        lock_file = '{0}.lock'.format(out_file)
        with FileLock(lock_file):
            with open(out_file, 'a') as filep:
                for record in feats:
                    filep.write(record)
                    filep.write('\n')
    return None
Пример #5
0
    def benchmark_layout_transform(self,
                                   min_exec_num=100,
                                   timeout=10,
                                   use_rpc=False,
                                   device_key=None,
                                   host="localhost",
                                   port=9190,
                                   n_parallel=1,
                                   build_func='default',
                                   layout_records=None,
                                   target_host=None,
                                   infer_layout=False):
        """Benchmark all possible layout transformation in the graph,
        given a set of schedule candidates for each workload of target operator.

        Parameters
        ----------
        min_exec_num : int, optional
            Minimum number of execution. Final execution time is the average of
            all execution time.

        timeout : int, optional
            Time out for each execution.

        use_rpc : boolean, optional
            Whether to use rpc mode for benchmarking.

        device_key : str, optional
            Remote device key which can be queried by
            python -m tvm.exec.query_rpc_tracker --host=0.0.0.0 --port=9190

        host : str, optional
            IP address used to create RPC tracker on host machine.

        port : int, optional
            Port number used to create RPC tracker on host machine.

        n_parallel: int, optional
            The number of measurement task that can run in parallel.
            Set this according to the number of cpu cores (for compilation) and
            the number of devices you have (for measuring generate code).

        build_func: str or callable, optional
            'default': call default builder. This works for normal target (llvm, cuda)

            'ndk': use Android NDK to create shared library. Use this for android target.

            callable: customized build function for other backends (e.g. VTA).
                      See autotvm/measure/measure_methods.py::default_build_func for example.

        layout_records : str or iterator of (MeasureInput, MeasureResult). optional
            Collection of layout_transform benchmarking records.
            If is str, then it should be the filename of a records log file.
                   Each row of this file is an encoded record pair.
            Otherwise, it is an iterator.

            If this argument is set, graph tuner will first check whether layout_transform
            workload already exists in records and skip benchmarking if possible.

        target_host : str, optional
            str or :any:`tvm.target.Target` optional
            Host compilation target, if target is device.
            When TVM compiles device specific program such as CUDA,
            we also need host(CPU) side code to interact with the driver
            setup the dimensions and parameters correctly.
            target_host is used to specify the host side codegen target.
            By default, llvm is used if it is enabled,
            otherwise a stackvm intepreter is used.

        infer_layout : bool, optional
            Whether to infer layout transformation time if it doesn't exist in records, instead
            of benchmarking on target device.

            This might bring performance loss comparing to benchmarking layout transformation.
        """
        self._logger.info("Start to benchmark layout transformation...")
        if layout_records is None and infer_layout:
            raise RuntimeError(
                "Requires some records to infer layout transformation time.")

        if isinstance(layout_records, str):
            layout_records = load_from_file(layout_records)
            if not layout_records and infer_layout:
                raise RuntimeError(
                    "Records must be non-empty to infer layout transformation time."
                )

        if isinstance(layout_records, str):
            layout_records = load_from_file(layout_records)
        num_flops, total_time = 0, 0
        if layout_records is not None:
            for record in layout_records:
                ltf_wkl = record[0].task.workload
                self._layout_transform_perf_records[ltf_wkl] = record
                input_shape = ltf_wkl[1][1]
                flops = np.prod(input_shape)
                num_flops += flops
                total_time += record[1].costs[0]
        avg_time = total_time / num_flops if num_flops > 0 else 0

        args_list = []

        def _fetch_args_callback(from_node_idx, to_node_idx, from_sch_idx,
                                 to_sch_idx, args):
            """Callback function to fetch layout transform args"""
            _, in_layout, out_layout = args
            if in_layout != out_layout:
                args_list.append(args)

        self._iterate_layout_transform(_fetch_args_callback)

        def _log_to_list(record_list):
            """Callback to log result to a list."""
            def _callback(_, inputs, results):
                """Callback implementation"""
                record_list.append((inputs[0], results[0]))

            return _callback

        builder = autotvm.LocalBuilder(n_parallel=n_parallel,
                                       build_func=build_func)
        runner = autotvm.LocalRunner(number=min_exec_num,
                                     repeat=1,
                                     timeout=timeout)
        if use_rpc:
            if device_key is None:
                raise RuntimeError(
                    "device_key need to be set to use rpc tracker mode.")
            runner = autotvm.measure.RPCRunner(device_key,
                                               host,
                                               port,
                                               n_parallel=n_parallel,
                                               number=min_exec_num,
                                               repeat=1,
                                               timeout=timeout)
        measure_option = autotvm.measure_option(builder=builder, runner=runner)
        for args in args_list:
            data, in_layout, out_layout = args
            args = serialize_args(args)
            ltf_workload = (
                'layout_transform', ) + autotvm.task.args_to_workload(args)
            if ltf_workload in self._layout_transform_perf_records:
                continue

            if infer_layout:
                input_shape = ltf_workload[1][1]
                flops = 1
                for i in input_shape:
                    flops *= i

                # Rule out invalid layout transformations
                out = topi.layout_transform(data, in_layout, out_layout)
                out_flops = 1
                for i in topi.util.get_const_tuple(out.shape):
                    out_flops *= i

                if flops != out_flops:
                    inferred_time = INVALID_LAYOUT_TIME
                else:
                    inferred_time = flops * avg_time

                record_input = MeasureInput(target=self._target,
                                            task=None,
                                            config=None)
                record_output = MeasureResult(costs=(inferred_time, ),
                                              error_no=0,
                                              all_cost=-1,
                                              timestamp=-1)
                self._layout_transform_perf_records[ltf_workload] = (
                    record_input, record_output)
                continue

            records = []
            task = autotvm.task.create(layout_transform,
                                       args=args,
                                       target=self._target,
                                       target_host=target_host)
            task.workload = ltf_workload
            tuner = autotvm.tuner.GridSearchTuner(task)
            tuner.tune(n_trial=1,
                       measure_option=measure_option,
                       callbacks=[_log_to_list(records)])
            if not isinstance(records[0][1].costs[0], float):
                records[0] = (records[0][0], records[0][1]._replace(
                    costs=(INVALID_LAYOUT_TIME, )))
            self._layout_transform_perf_records[ltf_workload] = records[0]

        self._iterate_layout_transform(self._create_matrix_callback)
        self._logger.info("Benchmarking layout transformation successful.")
def run_one_wkl(platform1_log, platform2_dirs):
    target_wkl = None
    cfgs = []
    log_files = None
    for inp, res in load_from_file(platform1_log):
        # Update the target string to generate the same SHA2 hash code
        # to identify the full config log file.
        if target_wkl is None:
            inp.task.target = inp.target
            target_wkl = Workload.from_task(inp.task)
            target_wkl['target'] = log_target
    
            # The full config log file name by Lorien is composed of
            # <SHA2>-<5-byte UUID4>.json
            log_files = []
            for platform2_dir in platform2_dirs:
                log_path = '{0}/{1}*.json'.format(platform2_dir, target_wkl.hash_sha2())
                log_files += glob.glob(log_path)
            if not log_files:
                print('Log missing for %s: %s' % (str(target_wkl), target_wkl.hash_sha2()))
                return
    
        # Only focus on the best N configs.
        target_cfg_str = str(inp.config)
        if len(cfgs) < top_n_cfgs:
            heapq.heappush(cfgs, (-np.mean(res.costs), target_cfg_str))
        elif np.mean(res.costs) < -cfgs[0][0]:
            heapq.heappop(cfgs)
            heapq.heappush(cfgs, (-np.mean(res.costs), target_cfg_str))
    
    # Load and sort all configs.
    assert log_files is not None
    all_records = {}
    for log_file in log_files:
        for inp, res in load_from_file(log_file):
            # De-duplication.
            cfg = str(inp.config)
            if cfg not in all_records:
                all_records[cfg] = (inp, res)
            else:
                old_res = all_records[cfg][1]
                all_records[cfg] = (inp, res if np.mean(res.costs) < np.mean(old_res.costs) else old_res)
    all_records = sorted(all_records.values(), key=lambda p: np.mean(p[1].costs))
    cfg_to_rank_on_p2 = {str(inp.config): rank for rank, (inp, _) in enumerate(all_records)}
    mapped = [False for _ in range(len(cfg_to_rank_on_p2) + 1)]
    
    assert target_wkl is not None
    #task = target_wkl.to_task()
    #space_size = np.product([len(v.entities) for v in task.config_space.space_map.values()])
    
    cfgs.sort(key=lambda x: -x[0])
    for rank1, (target_cost, target_cfg_str) in enumerate(cfgs):
        target_cost = -target_cost
    
        #display_name = '{} {} rank1 {}'.format(str(task), target_cfg_str, rank1)
    
        # Map the rank from the first platform to the second platform.
        if target_cfg_str not in cfg_to_rank_on_p2:
            continue
        rank2 = cfg_to_rank_on_p2[target_cfg_str]
        rank_shift = rank1 - rank2
        mapped[rank2] = True
        #print('{:40s}\trank2 {:5d}\t{:5d}\t{:10d}'.format(display_name,
        #      rank2, len(all_records), space_size))
        #print('%d\t%d' % (rank1, rank2))
        if rank_shift not in hist:
            hist[rank_shift] = 0
        hist[rank_shift] += 1

        #if rank_shift <= -2000:
        #    print('{}\trank2 {:5d}\t{:5d}\t{:10d}'.format(display_name, rank2, len(all_records), space_size))

    return
# The config of the first platform.
platform1_dir = sys.argv[1]

# The config folder of the second platform.
# When the target config is missing in the full log,
# we may re-tune it and put it to another log file.
# In short, when two platform2 dirs are specified,
# we will aggregate their logs as the reference.
platform2_dirs = [sys.argv[2]]
if len(sys.argv) == 4:
    platform2_dirs.append(sys.argv[3])

log_target = None
for log_file in glob.glob('{}/*'.format(platform2_dirs[0])):
    for inp, res in load_from_file(log_file):
        log_target = str(inp.target)
        break
    break
print(log_target)

# Histogram of rank shifting counts
hist = {}

def run_one_wkl(platform1_log, platform2_dirs):
    target_wkl = None
    cfgs = []
    log_files = None
    for inp, res in load_from_file(platform1_log):
        # Update the target string to generate the same SHA2 hash code
        # to identify the full config log file.
Пример #8
0
# Number of top configs.
top_n_cfgs = 5000

# The config found by the rank model to be analyzed
best_config_log = sys.argv[1]

# The full config by AutoTVM to be referenced.
all_log_dir = sys.argv[2]

# The target string in the full config log file.
log_target = sys.argv[3]

wkl_to_log_file = {}
missed = 0
total = 0
for inp, res in load_from_file(best_config_log):
    # Update the target string to generate the same SHA2 hash code
    # to identify the full config log file.
    inp.task.target = inp.target
    target_wkl = Workload.from_task(inp.task)
    target_wkl['target'] = log_target
    target_cfg_str = str(inp.config)

    if target_wkl not in wkl_to_log_file:
        # The full config log file name by Lorien is composed of
        # <SHA2>-<5-byte UUID4>.json
        total += 1
        log_path = '{0}/{1}*.json'.format(all_log_dir, target_wkl.hash_sha2())
        log_files = glob.glob(log_path)
        if not log_files:
            print('Log missing for %s' % str(target_wkl))