예제 #1
0
 def do(self):
     """Start to run benchmark evaluator."""
     logger.info("BenchmarkPipeStep started...")
     records = self._get_current_step_records()
     if not records:
         logger.error("There is no model to evaluate.")
         return
     self.master = create_master()
     for record in records:
         _record = ReportRecord(worker_id=record.worker_id,
                                desc=record.desc,
                                step_name=record.step_name)
         Report().broadcast(_record)
         self._evaluate_single_model(record)
     self.master.join()
     for record in records:
         Report().update_report({
             "step_name": record.step_name,
             "worker_id": record.worker_id
         })
     Report().output_step_all_records(step_name=General.step_name,
                                      weights_file=False,
                                      performance=True)
     self.master.close_client()
     Report().backup_output_path()
예제 #2
0
 def __init__(self):
     """Initialize."""
     super().__init__()
     self.generator = Generator.restore()
     if not self.generator:
         self.generator = Generator()
     Report.restore()
     self.master = create_master(update_func=self.generator.update)
예제 #3
0
 def _broadcast(self, pfms):
     """Boadcase pfrm to record."""
     record = Report().receive(self.step_name, self.worker_id)
     if record.performance:
         record.performance.update(pfms)
     else:
         record.performance = pfms
     Report().broadcast(record)
     logging.info("valid record: {}".format(record))
예제 #4
0
 def do(self):
     """Do the main task in this pipe step."""
     logging.debug("NasPipeStep started...")
     while not self.generator.is_completed:
         res = self.generator.sample()
         if res:
             self._dispatch_trainer(res)
         else:
             time.sleep(0.2)
     self.master.join()
     logging.debug("Pareto_front values: %s",
                   Report().pareto_front(General.step_name))
     Report().output_pareto_front(General.step_name)
     self.master.close_client()
예제 #5
0
    def search(self):
        """Search one NetworkDesc from search space.

        :return: search id, network desc
        :rtype: int, NetworkDesc
        """
        if self.random_count < self.random_models:
            desc = self._random_sample()
            return self.random_count, desc
        records = Report().get_pareto_front_records(self.step_name,
                                                    self.num_individual)
        codes = [
            record.desc.get('nbit_w_list') + record.desc.get('nbit_a_list')
            for record in records
        ]
        logging.info("codes=%s", codes)
        if len(codes) < 2:
            encoding1, encoding2 = codes[0], codes[0]
        else:
            encoding1, encoding2 = random.sample(codes, 2)
        choice = random.randint(0, 1)
        # mutate
        if choice == 0:
            encoding_new = self.mutatation(encoding1)
        # crossover
        else:
            encoding_new, _ = self.crossover(encoding1, encoding2)
        self.ea_count += 1
        if self.ea_count % self.num_individual == 0:
            self.ea_epoch += 1
        desc = self.codec.decode(encoding_new)
        return self.random_count + self.ea_count, desc
예제 #6
0
 def do(self):
     """Start to run fully train with horovod or local trainer."""
     logger.info("FullyTrainPipeStep started...")
     cls_trainer = ClassFactory.get_cls('trainer', "Trainer")
     if cls_trainer.config.distributed:
         self._do_distributed_fully_train()
     else:
         records = self._get_current_step_records()
         logger.debug("load pipestep records: {}".format(records))
         self.master = create_master(update_func=Report().update_report)
         self._train_multi_models(records)
         Report().output_step_all_records(step_name=self.task.step_name,
                                          weights_file=True,
                                          performance=True)
         self.master.close_client()
     Report().backup_output_path()
예제 #7
0
    def search(self):
        """Search one mutated model.

        :return: current number of samples, and the model
        """
        desc = deepcopy(self.search_space)
        search_desc = desc.custom
        records = Report().get_pareto_front_records(['random', 'mutate'])
        codes = []
        for record in records:
            custom = record.desc['custom']
            codes.append(custom['code'])
        num_ops = len(search_desc.op_names)
        upper_bounds = [
            num_ops, 2, 2, num_ops, num_ops, 5, 5, num_ops, num_ops, 8, 8,
            num_ops, num_ops, 4, 4, 5, 5, 6, 6
        ]
        code_to_mutate = random.choice(codes)
        index = random.randrange(len(upper_bounds))
        choices = list(range(upper_bounds[index]))
        choices.pop(int(code_to_mutate[index + 1], 36))
        choice = random.choice(choices)
        code_mutated = code_to_mutate[:index +
                                      1] + str(choice) + code_to_mutate[index +
                                                                        2:]
        search_desc['code'] = code_mutated
        search_desc['method'] = "mutate"
        logging.info("Mutate from {} to {}".format(code_to_mutate,
                                                   code_mutated))
        search_desc = self.codec.decode(search_desc)
        self.sample_count += 1
        desc['custom'] = search_desc
        return self.sample_count, desc
예제 #8
0
 def _evaluate_single_model(self, record):
     try:
         worker_info = {
             "step_name": record.step_name,
             "worker_id": record.worker_id
         }
         _record = dict(worker_id=record.worker_id,
                        desc=record.desc,
                        step_name=record.step_name)
         _init_record = ReportRecord().load_dict(_record)
         Report().broadcast(_init_record)
         if EvaluatorConfig.gpu_evaluator_enable:
             cls_evaluator = ClassFactory.get_cls(ClassType.GPU_EVALUATOR,
                                                  "GpuEvaluator")
             evaluator = cls_evaluator(worker_info=worker_info,
                                       model_desc=record.desc,
                                       weights_file=record.weights_file)
             self.master.run(evaluator)
         if EvaluatorConfig.davinci_mobile_evaluator_enable:
             cls_evaluator = ClassFactory.get_cls(
                 ClassType.DAVINCI_MOBILE_EVALUATOR,
                 "DavinciMobileEvaluator")
             evaluator = cls_evaluator(worker_info=worker_info,
                                       model_desc=record.desc,
                                       weights_file=record.weights_file)
             self.master.run(evaluator)
     except Exception:
         logger.error(
             "Failed to evaluate model, worker info={}".format(worker_info))
         logger.error(traceback.format_exc())
         return
예제 #9
0
 def sample(self):
     """Sample a work id and model from search algorithm."""
     res = self.search_alg.search()
     if not res:
         return None
     if not isinstance(res, list):
         res = [res]
     if len(res) == 0:
         return None
     out = []
     for sample in res:
         desc = sample.get("desc") if isinstance(sample, dict) else sample[1]
         desc = self._decode_hps(desc)
         model_desc = deepcopy(desc)
         if "modules" in desc:
             PipeStepConfig.model.model_desc = deepcopy(desc)
         elif "network" in desc:
             origin_desc = PipeStepConfig.model.model_desc
             desc = update_dict(desc["network"], origin_desc)
             PipeStepConfig.model.model_desc = deepcopy(desc)
         if self.quota.is_filtered(desc):
             continue
         record = self.record.from_sample(sample, desc)
         Report().broadcast(record)
         out.append((record.worker_id, model_desc))
     return out
예제 #10
0
    def search(self):
        """Search one mutated model.

        :return: current number of samples, and the model
        """
        desc = deepcopy(self.search_space)
        search_desc = desc.custom
        # TODO: merge sr ea in one pipe step.
        records = Report().get_pareto_front_records(['random', 'mutate'])
        codes = []
        for record in records:
            codes.append(record.desc['custom']['code'])
        code_to_mutate = random.choice(codes)
        current_mutate, code_mutated = 0, code_to_mutate
        num_candidates = len(search_desc["candidates"])
        while current_mutate < self.num_mutate:
            code_new = self.mutate_once(code_mutated, num_candidates)
            if code_new != code_mutated:
                current_mutate += 1
                code_mutated = code_new
        logging.info("Mutate from {} to {}".format(code_to_mutate,
                                                   code_mutated))
        search_desc['code'] = code_mutated
        search_desc['method'] = "mutate"
        search_desc = self.codec.decode(search_desc)
        desc['custom'] = search_desc
        self.sample_count += 1
        return dict(worker_id=self.sample_count, desc=desc)
예제 #11
0
 def _get_current_step_records(self):
     step_name = General.step_name
     models_folder = PipeStepConfig.pipe_step.get("models_folder")
     cur_index = PipelineConfig.steps.index(step_name)
     if cur_index >= 1 or models_folder:
         # records = Report().get_step_records(PipelineConfig.steps[cur_index - 1])
         if not models_folder:
             models_folder = FileOps.join_path(
                 TaskOps().local_output_path,
                 PipelineConfig.steps[cur_index - 1])
         models_folder = models_folder.replace("{local_base_path}",
                                               TaskOps().local_base_path)
         records = Report().load_records_from_model_folder(models_folder)
     else:
         records = self._load_single_model_records()
     final_records = []
     for record in records:
         if not record.weights_file:
             logger.error("Model file is not existed, id={}".format(
                 record.worker_id))
         else:
             record.step_name = General.step_name
             final_records.append(record)
     logging.debug("Records: {}".format(final_records))
     return final_records
예제 #12
0
 def get_pareto_front(self):
     """Get the pareto front of trained candidates."""
     records = Report().get_pareto_front_records()
     codes = []
     for record in records:
         codes.append(record.desc['code'])
     code_to_mutate = random.choice(codes)
     return code_to_mutate
예제 #13
0
 def _init_report(self):
     info = dict(
         worker_id=self.trainer.worker_id,
         desc=self.cfg.model_desc,
         step_name=self.trainer.step_name,
         weights_file=self.best_model_file)
     record = ReportRecord().load_dict(info)
     logging.debug("Broadcast Record=%s", str(record))
     Report().broadcast(record)
예제 #14
0
 def __init__(self):
     self.step_name = General.step_name
     self.search_space = SearchSpace()
     self.search_alg = SearchAlgorithm(self.search_space)
     self.report = Report()
     self.record = ReportRecord()
     self.record.step_name = self.step_name
     if hasattr(self.search_alg.config, 'objective_keys'):
         self.record.objective_keys = self.search_alg.config.objective_keys
     self.quota = QuotaCompare('restrict')
예제 #15
0
 def _simulate_tiny_pipeline(self, cfg_tiny):
     """Simulate tiny pipeline by using one sample one epoch."""
     report = Report()
     for i, step_name in enumerate(PipelineConfig.steps):
         step_cfg = cfg_tiny.get(step_name)
         step_cfg.trainer.distributed = False
         step_cfg.trainer.epochs = 1
         self.restrict_config.trials[step_name] = 1
         General.step_name = step_name
         PipeStepConfig.from_json(step_cfg)
         pipestep = PipeStep()
         if i == 0:
             pipestep.do()
             record = report.get_step_records(step_name)[-1]
             self.epoch_time = record.runtime
             _worker_path = TaskOps().local_base_path
             if os.path.exists(_worker_path):
                 os.system('rm -rf {}'.format(_worker_path))
         if step_cfg.pipe_step.type == 'NasPipeStep':
             self.params_dict[step_name][
                 'max_samples'] = pipestep.generator.search_alg.max_samples
         _file = os.path.join(TaskOps().step_path, ".generator")
         if os.path.exists(_file):
             os.system('rm {}'.format(_file))
예제 #16
0
    def _init_evaluator(self):
        """Do evaluate stuff.

        :param finished_trainer_info: the finished trainer info
        :type: list or dict

        """
        use_evaluator, cls_evaluator_set = self._use_evaluator()
        if not use_evaluator:
            return
        record = Report().receive(self.step_name, self.worker_id)
        model_desc = record.desc
        for cls in cls_evaluator_set:
            evaluator = cls(worker_info=self.worker_info,
                            model_desc=model_desc)
            self.add_evaluator(evaluator)
예제 #17
0
 def __init__(self, search_space=None, **kwargs):
     """Init SearchAlgorithm."""
     super(SearchAlgorithm, self).__init__()
     # modify config by kwargs, using local scope
     if self.config and kwargs:
         self.config.from_json(kwargs)
     self.search_space = search_space
     if hasattr(self.config, 'codec'):
         self.codec = Codec(search_space, type=self.config.codec)
     else:
         self.codec = None
     logging.debug("Config=%s", self.config)
     self.report = Report()
     self.record = ReportRecord()
     self.record.step_name = self.step_name
     self._get_search_space_list()
예제 #18
0
 def _get_current_step_records(self):
     step_name = self.task.step_name
     models_folder = PipeStepConfig.pipe_step.get("models_folder")
     records = []
     cur_index = PipelineConfig.steps.index(step_name)
     if cur_index >= 1 or models_folder:
         # records = Report().get_pareto_front_records(PipelineConfig.steps[cur_index - 1])
         if not models_folder:
             models_folder = FileOps.join_path(
                 TaskOps().local_output_path,
                 PipelineConfig.steps[cur_index - 1])
         models_folder = models_folder.replace("{local_base_path}",
                                               TaskOps().local_base_path)
         records = Report().load_records_from_model_folder(models_folder)
     else:
         records = [ReportRecord(step_name, 0)]
     logging.debug("Records: {}".format(records))
     for record in records:
         record.step_name = step_name
     return records
예제 #19
0
    def update(self, step_name, worker_id):
        """Update search algorithm accord to the worker path.

        :param step_name: step name
        :param worker_id: current worker id
        :return:
        """
        report = Report()
        record = report.receive(step_name, worker_id)
        logging.debug("Get Record=%s", str(record))
        self.search_alg.update(record.serialize())
        report.dump_report(record.step_name, record)
        self.dump()
        logging.info("Update Success. step_name=%s, worker_id=%s", step_name, worker_id)
        logging.info("Best values: %s", Report().print_best(step_name=General.step_name))
예제 #20
0
 def _train_single_model(self, model_desc=None, model_id=None):
     cls_trainer = ClassFactory.get_cls('trainer', "Trainer")
     step_name = self.task.step_name
     if model_desc is not None:
         sample = dict(worker_id=model_id,
                       desc=model_desc,
                       step_name=step_name)
         record = ReportRecord().load_dict(sample)
         logging.debug("Broadcast Record=%s", str(record))
         Report().broadcast(record)
         trainer = cls_trainer(model_desc=model_desc, id=model_id)
     else:
         trainer = cls_trainer(None, 0)
     # resume training
     if vega.is_torch_backend() and General._resume:
         trainer.load_checkpoint = True
         trainer._resume_training = True
     if cls_trainer.config.distributed:
         self._do_distributed_fully_train()
     else:
         self._do_single_fully_train(trainer)
예제 #21
0
 def _broadcast(self, epoch, performance):
     record = Report().receive(self.trainer.step_name, self.trainer.worker_id)
     record.performance = performance
     Report().broadcast(record)
     logging.debug("report_callback record: {}".format(record))
예제 #22
0
파일: mfkd.py 프로젝트: ylfzr/vega
 def _save_best(self, desc):
     record = Report().receive(self.step_name, self.sample_count + 1)
     record.performance = {"accuracy": 100}
     record.desc = desc
     Report().broadcast(record)
예제 #23
0
 def after_train(self, logs=None):
     """Close the connection of report."""
     self._broadcast(self.epoch)
     Report().close(self.trainer.step_name, self.trainer.worker_id)
예제 #24
0
 def _broadcast(self, epoch=None):
     record = Report().receive(self.trainer.step_name, self.trainer.worker_id)
     if self.trainer.config.report_on_epoch:
         record.epoch = self.trainer.epochs
     # todo: remove in FinedGrainedSpace
     if self.trainer.config.codec:
         record.desc = self.trainer.config.codec
     if not record.desc:
         record.desc = self.trainer.model_desc
     record.performance = self.trainer.performance
     record.objectives = self.trainer.valid_metrics.objectives
     if record.performance is not None:
         for key in record.performance:
             if key not in record.objectives:
                 if (key == 'flops' or key == 'params'):
                     record.objectives.update({key: 'MIN'})
                 else:
                     record.objectives.update({key: 'MAX'})
     record.model_path = self.trainer.model_path
     record.checkpoint_path = self.trainer.checkpoint_file
     record.weights_file = self.trainer.weights_file
     if self.trainer.runtime is not None:
         record.runtime = self.trainer.runtime
     Report().broadcast(record)
     logging.debug("report_callback record: {}".format(record))