Exemplo n.º 1
0
def teardown_module():
    distributedTool.release()
Exemplo n.º 2
0
def test(cloud_args=None):
    """test"""
    args = parse_args(cloud_args)
    context.set_context(mode=context.GRAPH_MODE,
                        enable_auto_mixed_precision=True,
                        device_target=args.platform,
                        save_graphs=False)
    if os.getenv('DEVICE_ID', "not_set").isdigit():
        context.set_context(device_id=int(os.getenv('DEVICE_ID')))

    # init distributed
    if args.is_distributed:
        parallel_mode = ParallelMode.DATA_PARALLEL
        context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                          device_num=args.group_size,
                                          gradients_mean=True)

    args.logger.save_args(args)

    # network
    args.logger.important_info('start create network')
    if os.path.isdir(args.pretrained):
        models = list(glob.glob(os.path.join(args.pretrained, '*.ckpt')))
        print(models)
        if args.graph_ckpt:
            f = lambda x: -1 * int(
                os.path.splitext(os.path.split(x)[-1])[0].split('-')[-1].split(
                    '_')[0])
        else:
            f = lambda x: -1 * int(
                os.path.splitext(os.path.split(x)[-1])[0].split('_')[-1])
        args.models = sorted(models, key=f)
    else:
        args.models = [
            args.pretrained,
        ]

    for model in args.models:
        de_dataset = classification_dataset(args.data_dir,
                                            image_size=args.image_size,
                                            per_batch_size=args.per_batch_size,
                                            max_epoch=1,
                                            rank=args.rank,
                                            group_size=args.group_size,
                                            mode='eval')
        eval_dataloader = de_dataset.create_tuple_iterator(output_numpy=True)
        network = get_network(args.backbone,
                              args.num_classes,
                              platform=args.platform)
        if network is None:
            raise NotImplementedError('not implement {}'.format(args.backbone))

        load_pretrain_model(model, network, args)

        img_tot = 0
        top1_correct = 0
        top5_correct = 0
        if args.platform == "Ascend":
            network.to_float(mstype.float16)
        else:
            auto_mixed_precision(network)
        network.set_train(False)
        t_end = time.time()
        it = 0
        for data, gt_classes in eval_dataloader:
            output = network(Tensor(data, mstype.float32))
            output = output.asnumpy()

            top1_output = np.argmax(output, (-1))
            top5_output = np.argsort(output)[:, -5:]

            t1_correct = np.equal(top1_output, gt_classes).sum()
            top1_correct += t1_correct
            top5_correct += get_top5_acc(top5_output, gt_classes)
            img_tot += args.per_batch_size

            if args.rank == 0 and it == 0:
                t_end = time.time()
                it = 1
        if args.rank == 0:
            time_used = time.time() - t_end
            fps = (img_tot - args.per_batch_size) * args.group_size / time_used
            args.logger.info(
                'Inference Performance: {:.2f} img/sec'.format(fps))
        results = get_result(args, model, top1_correct, top5_correct, img_tot)
        top1_correct = results[0, 0]
        top5_correct = results[1, 0]
        img_tot = results[2, 0]
        acc1 = 100.0 * top1_correct / img_tot
        acc5 = 100.0 * top5_correct / img_tot
        args.logger.info('after allreduce eval: top1_correct={}, tot={},'
                         'acc={:.2f}%(TOP1)'.format(top1_correct, img_tot,
                                                    acc1))
        args.logger.info('after allreduce eval: top5_correct={}, tot={},'
                         'acc={:.2f}%(TOP5)'.format(top5_correct, img_tot,
                                                    acc5))
    if args.is_distributed:
        release()
Exemplo n.º 3
0
    def _ascend_analyse(self):
        """Collect and analyse ascend performance data"""
        release()

        job_id = self._get_profiling_job_id()
        logger.info("Profiling: job id is %s ", job_id)

        source_path = os.path.join(self._output_path, job_id)
        # parse hwts.log.data.45.dev file, and get task profiling data
        hwts_output_filename = self._hwts_output_filename_target + self._dev_id + ".txt"
        hwts_output_filename = os.path.join(self._output_path,
                                            hwts_output_filename)
        source_path = validate_and_normalize_path(source_path)
        hwts_output_filename = validate_and_normalize_path(
            hwts_output_filename)
        hwtslog_parser = HWTSLogParser(source_path, hwts_output_filename)
        hwtslog_parser.execute()

        # parse Framework file, and get the relation of op and tasks
        framework_parser = FrameworkParser(job_id, self._dev_id,
                                           self._output_path)
        framework_parser.parse()
        op_task_dict = framework_parser.to_task_id_full_op_name_dict()
        if not op_task_dict:
            logger.error("Profiling: fail to parse framework files.")
            return

        # get op compute time from hwts data and framework data, write output_op_compute_time.txt
        opcompute_output_filename = self._opcompute_output_filename_target + self._dev_id + ".txt"
        opcompute_output_filename = os.path.join(self._output_path,
                                                 opcompute_output_filename)
        opcompute_output_filename = validate_and_normalize_path(
            opcompute_output_filename)
        optime_parser = OPComputeTimeParser(hwts_output_filename,
                                            opcompute_output_filename,
                                            op_task_dict, self._output_path,
                                            self._dev_id)
        optime_parser.execute()

        # parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt
        output_data_preprocess_aicpu = self._aicpu_op_output_filename_target + self._dev_id + ".txt"
        output_data_preprocess_aicpu = os.path.join(
            self._output_path, output_data_preprocess_aicpu)
        output_data_preprocess_aicpu = validate_and_normalize_path(
            output_data_preprocess_aicpu)
        aicpu_data_parser = DataPreProcessParser(source_path,
                                                 output_data_preprocess_aicpu)
        aicpu_data_parser.execute()

        # Parsing minddata AICPU profiling
        MinddataParser.execute(source_path, self._output_path, self._dev_id)

        # parse minddata pipeline operator and queue
        try:
            pipeline_parser = MinddataPipelineParser(self._output_path,
                                                     self._dev_id,
                                                     self._output_path)
            pipeline_parser.parse()
        except ProfilerException as err:
            logger.warning(err.message)

        # analyse op compute time info
        try:
            self._analyser_op_info()
        except ProfilerException as err:
            logger.warning(err.message)

        # analyse step trace info
        points = None
        try:
            points = self._analyse_step_trace(source_path, framework_parser)
        except ProfilerException as err:
            logger.warning(err.message)

        # analyse timeline info
        try:
            self._analyse_timeline(aicpu_data_parser, optime_parser,
                                   source_path)
        except (ProfilerIOException, ProfilerFileNotFoundException,
                RuntimeError) as err:
            logger.warning('Fail to write timeline data: %s', err)

        # analyse memory usage info
        try:
            self._analyse_memory_usage(points)
        except (ProfilerIOException, ProfilerFileNotFoundException,
                ProfilerRawFileException) as err:
            logger.warning(err.message)

        os.environ['PROFILING_MODE'] = str("false")
        context.set_context(enable_profiling=False)
Exemplo n.º 4
0
def test(cloud_args=None):
    """test"""
    args = parse_args(cloud_args)
    context.set_context(mode=context.GRAPH_MODE,
                        enable_auto_mixed_precision=True,
                        device_target=args.platform,
                        save_graphs=False)
    if os.getenv('DEVICE_ID', "not_set").isdigit():
        context.set_context(device_id=int(os.getenv('DEVICE_ID')))

    # init distributed
    if args.is_distributed:
        init()
        args.rank = get_rank()
        args.group_size = get_group_size()
        parallel_mode = ParallelMode.DATA_PARALLEL
        context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                          device_num=args.group_size,
                                          parameter_broadcast=True,
                                          mirror_mean=True)
    else:
        args.rank = 0
        args.group_size = 1

    args.outputs_dir = os.path.join(
        args.log_path,
        datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))

    args.logger = get_logger(args.outputs_dir, args.rank)
    args.logger.save_args(args)

    # network
    args.logger.important_info('start create network')
    if os.path.isdir(args.pretrained):
        models = list(glob.glob(os.path.join(args.pretrained, '*.ckpt')))
        print(models)
        if args.graph_ckpt:
            f = lambda x: -1 * int(
                os.path.splitext(os.path.split(x)[-1])[0].split('-')[-1].split(
                    '_')[0])
        else:
            f = lambda x: -1 * int(
                os.path.splitext(os.path.split(x)[-1])[0].split('_')[-1])
        args.models = sorted(models, key=f)
    else:
        args.models = [
            args.pretrained,
        ]

    for model in args.models:
        de_dataset = classification_dataset(args.data_dir,
                                            image_size=args.image_size,
                                            per_batch_size=args.per_batch_size,
                                            max_epoch=1,
                                            rank=args.rank,
                                            group_size=args.group_size,
                                            mode='eval')
        eval_dataloader = de_dataset.create_tuple_iterator()
        network = get_network(args.backbone,
                              args.num_classes,
                              platform=args.platform)
        if network is None:
            raise NotImplementedError('not implement {}'.format(args.backbone))

        param_dict = load_checkpoint(model)
        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
                continue
            elif key.startswith('network.'):
                param_dict_new[key[8:]] = values
            else:
                param_dict_new[key] = values

        load_param_into_net(network, param_dict_new)
        args.logger.info('load model {} success'.format(model))

        img_tot = 0
        top1_correct = 0
        top5_correct = 0
        if args.platform == "Ascend":
            network.to_float(mstype.float16)
        else:
            auto_mixed_precision(network)
        network.set_train(False)
        t_end = time.time()
        it = 0
        for data, gt_classes in eval_dataloader:
            output = network(Tensor(data, mstype.float32))
            output = output.asnumpy()

            top1_output = np.argmax(output, (-1))
            top5_output = np.argsort(output)[:, -5:]

            t1_correct = np.equal(top1_output, gt_classes).sum()
            top1_correct += t1_correct
            top5_correct += get_top5_acc(top5_output, gt_classes)
            img_tot += args.per_batch_size

            if args.rank == 0 and it == 0:
                t_end = time.time()
                it = 1
        if args.rank == 0:
            time_used = time.time() - t_end
            fps = (img_tot - args.per_batch_size) * args.group_size / time_used
            args.logger.info(
                'Inference Performance: {:.2f} img/sec'.format(fps))
        results = [[top1_correct], [top5_correct], [img_tot]]
        args.logger.info('before results={}'.format(results))
        if args.is_distributed:
            model_md5 = model.replace('/', '')
            tmp_dir = '/cache'
            if not os.path.exists(tmp_dir):
                os.mkdir(tmp_dir)
            top1_correct_npy = '/cache/top1_rank_{}_{}.npy'.format(
                args.rank, model_md5)
            top5_correct_npy = '/cache/top5_rank_{}_{}.npy'.format(
                args.rank, model_md5)
            img_tot_npy = '/cache/img_tot_rank_{}_{}.npy'.format(
                args.rank, model_md5)
            np.save(top1_correct_npy, top1_correct)
            np.save(top5_correct_npy, top5_correct)
            np.save(img_tot_npy, img_tot)
            while True:
                rank_ok = True
                for other_rank in range(args.group_size):
                    top1_correct_npy = '/cache/top1_rank_{}_{}.npy'.format(
                        other_rank, model_md5)
                    top5_correct_npy = '/cache/top5_rank_{}_{}.npy'.format(
                        other_rank, model_md5)
                    img_tot_npy = '/cache/img_tot_rank_{}_{}.npy'.format(
                        other_rank, model_md5)
                    if not os.path.exists(top1_correct_npy) or not os.path.exists(top5_correct_npy) or \
                       not os.path.exists(img_tot_npy):
                        rank_ok = False
                if rank_ok:
                    break

            top1_correct_all = 0
            top5_correct_all = 0
            img_tot_all = 0
            for other_rank in range(args.group_size):
                top1_correct_npy = '/cache/top1_rank_{}_{}.npy'.format(
                    other_rank, model_md5)
                top5_correct_npy = '/cache/top5_rank_{}_{}.npy'.format(
                    other_rank, model_md5)
                img_tot_npy = '/cache/img_tot_rank_{}_{}.npy'.format(
                    other_rank, model_md5)
                top1_correct_all += np.load(top1_correct_npy)
                top5_correct_all += np.load(top5_correct_npy)
                img_tot_all += np.load(img_tot_npy)
            results = [[top1_correct_all], [top5_correct_all], [img_tot_all]]
            results = np.array(results)
        else:
            results = np.array(results)

        args.logger.info('after results={}'.format(results))
        top1_correct = results[0, 0]
        top5_correct = results[1, 0]
        img_tot = results[2, 0]
        acc1 = 100.0 * top1_correct / img_tot
        acc5 = 100.0 * top5_correct / img_tot
        args.logger.info('after allreduce eval: top1_correct={}, tot={},'
                         'acc={:.2f}%(TOP1)'.format(top1_correct, img_tot,
                                                    acc1))
        args.logger.info('after allreduce eval: top5_correct={}, tot={},'
                         'acc={:.2f}%(TOP5)'.format(top5_correct, img_tot,
                                                    acc5))
    if args.is_distributed:
        release()
Exemplo n.º 5
0
 def _release_parallel(self):
     if self._init_parallel_flag:
         release()
Exemplo n.º 6
0
    def analyse(self):
        """
        Collect and analyse performance data, called after training or during training.

        Examples:
            >>> from mindspore.profiler import Profiler
            >>> import mindspore.context
            >>> context.set_context(mode=context.GRAPH_MODE, device_target="Ascend",
            >>>                     device_id=int(os.environ["DEVICE_ID"]))
            >>> profiler = Profiler(subgraph='all', is_detail=True, is_show_op_path=False, output_path='./data')
            >>> model = Model()
            >>> model.train()
            >>> profiler.analyse()
        """
        release()

        job_id = self._get_profiling_job_id()
        logger.info("Profiling: job id is %s ", job_id)

        source_path = os.path.join(PROFILING_LOG_BASE_PATH, job_id)
        # parse hwts.log.data.45.dev file, and get task profiling data
        hwts_output_filename = self._hwts_output_filename_target + self._dev_id + ".txt"
        hwts_output_filename = os.path.join(self._output_path,
                                            hwts_output_filename)
        hwtslog_parser = HWTSLogParser(source_path, hwts_output_filename)
        result = hwtslog_parser.execute()
        if not result:
            logger.error("Profiling: fail to parse hwts log file.")
            return

        # parse Framework file, and get the relation of op and tasks
        framework_parser = FrameworkParser(job_id, self._dev_id,
                                           self._output_path)
        framework_parser.parse()
        op_task_dict = framework_parser.to_task_id_full_op_name_dict()
        if not op_task_dict:
            logger.error("Profiling: fail to parse framework files.")
            return

        # get op compute time from hwts data and framework data, write output_op_compute_time.txt
        opcompute_output_filename = self._opcompute_output_filename_target + self._dev_id + ".txt"
        opcompute_output_filename = os.path.join(self._output_path,
                                                 opcompute_output_filename)
        optime_parser = OPComputeTimeParser(hwts_output_filename,
                                            opcompute_output_filename,
                                            op_task_dict, self._output_path,
                                            self._dev_id)
        optime_parser.execute()

        # parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt
        output_data_preprocess_aicpu = self._aicpu_op_output_filename_target + self._dev_id + ".txt"
        output_data_preprocess_aicpu = os.path.join(
            self._output_path, output_data_preprocess_aicpu)
        aicpu_data_parser = DataPreProcessParser(source_path,
                                                 output_data_preprocess_aicpu)
        aicpu_data_parser.execute()

        # Parsing minddata AICPU profiling
        MinddataParser.execute(source_path, self._output_path, self._dev_id)

        # parse minddata pipeline operator and queue
        try:
            pipeline_parser = MinddataPipelineParser(self._output_path,
                                                     self._dev_id,
                                                     self._output_path)
            pipeline_parser.parse()
        except ProfilerException as err:
            logger.warning(err.message)

        # analyse op compute time info
        try:
            self._analyser_op_info()
        except ProfilerException as err:
            logger.warning(err.message)

        # analyse step trace info
        try:
            self._analyse_step_trace(source_path, framework_parser)
        except ProfilerException as err:
            logger.warning(err.message)

        # analyse timeline info
        try:
            self._analyse_timeline(aicpu_data_parser, optime_parser)
        except (ProfilerIOException, ProfilerFileNotFoundException,
                RuntimeError) as err:
            logger.warning('Fail to write timeline data: %s', err)
Exemplo n.º 7
0
    def analyse(self):
        """
        Collect and analyse performance data, called after training or during training.

        Examples:
            >>> from mindspore.profiler import Profiler
            >>> import mindspore.context
            >>> context.set_context(mode=context.GRAPH_MODE, device_target="Ascend",
            >>>                     device_id=int(os.environ["DEVICE_ID"]))
            >>> profiler = Profiler()
            >>> model = Model()
            >>> model.train()
            >>> profiler.analyse()
        """
        if self._device_target and self._device_target == "GPU":
            if context.get_auto_parallel_context(
                    'device_num') > 1 and self._dev_id != get_rank():
                self._dev_id = get_rank()
                logger.error(
                    'Please check the Profiler object initialized after set_auto_parallel_context() '
                    'and init(). Profiler should be initialized after these code. '
                )
            self._gpu_profiler.stop()
            timeline_generator = self._generate_timeline()

            # parse minddata pipeline operator and queue for GPU
            try:
                pipeline_parser = MinddataPipelineParser(
                    self._output_path, self._dev_id, self._output_path)
                pipeline_parser.parse()
            except ProfilerException as err:
                logger.warning(err.message)

            # analyse step trace info
            try:
                self._analyse_step_trace(
                    is_training_mode_flag=timeline_generator.check_op_name(
                        'Gradients'))
            except ProfilerException as err:
                logger.warning(err.message)

            os.environ['PROFILING_MODE'] = str("false")

        elif self._device_target and self._device_target == "Ascend":
            release()

            job_id = self._get_profiling_job_id()
            logger.info("Profiling: job id is %s ", job_id)

            source_path = os.path.join(self._output_path, job_id)
            # parse hwts.log.data.45.dev file, and get task profiling data
            hwts_output_filename = self._hwts_output_filename_target + self._dev_id + ".txt"
            hwts_output_filename = os.path.join(self._output_path,
                                                hwts_output_filename)
            source_path = validate_and_normalize_path(source_path)
            hwts_output_filename = validate_and_normalize_path(
                hwts_output_filename)
            hwtslog_parser = HWTSLogParser(source_path, hwts_output_filename)
            _ = hwtslog_parser.execute()

            # parse Framework file, and get the relation of op and tasks
            framework_parser = FrameworkParser(job_id, self._dev_id,
                                               self._output_path)
            framework_parser.parse()
            op_task_dict = framework_parser.to_task_id_full_op_name_dict()
            if not op_task_dict:
                logger.error("Profiling: fail to parse framework files.")
                return

            # get op compute time from hwts data and framework data, write output_op_compute_time.txt
            opcompute_output_filename = self._opcompute_output_filename_target + self._dev_id + ".txt"
            opcompute_output_filename = os.path.join(
                self._output_path, opcompute_output_filename)
            opcompute_output_filename = validate_and_normalize_path(
                opcompute_output_filename)
            optime_parser = OPComputeTimeParser(hwts_output_filename,
                                                opcompute_output_filename,
                                                op_task_dict,
                                                self._output_path,
                                                self._dev_id)
            optime_parser.execute()

            # parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt
            output_data_preprocess_aicpu = self._aicpu_op_output_filename_target + self._dev_id + ".txt"
            output_data_preprocess_aicpu = os.path.join(
                self._output_path, output_data_preprocess_aicpu)
            output_data_preprocess_aicpu = validate_and_normalize_path(
                output_data_preprocess_aicpu)
            aicpu_data_parser = DataPreProcessParser(
                source_path, output_data_preprocess_aicpu)
            aicpu_data_parser.execute()

            # Parsing minddata AICPU profiling
            MinddataParser.execute(source_path, self._output_path,
                                   self._dev_id)

            # parse minddata pipeline operator and queue
            try:
                pipeline_parser = MinddataPipelineParser(
                    self._output_path, self._dev_id, self._output_path)
                pipeline_parser.parse()
            except ProfilerException as err:
                logger.warning(err.message)

            # analyse op compute time info
            try:
                self._analyser_op_info()
            except ProfilerException as err:
                logger.warning(err.message)

            # analyse step trace info
            try:
                self._analyse_step_trace(source_path, framework_parser)
            except ProfilerException as err:
                logger.warning(err.message)

            # analyse timeline info
            try:
                self._analyse_timeline(aicpu_data_parser, optime_parser)
            except (ProfilerIOException, ProfilerFileNotFoundException,
                    RuntimeError) as err:
                logger.warning('Fail to write timeline data: %s', err)

            os.environ['PROFILING_MODE'] = str("false")
            context.set_context(enable_profiling=False)
Exemplo n.º 8
0
def test_train():
    '''
    finetune function
    pytest -s finetune.py::test_train
    '''
    devid = int(os.getenv('DEVICE_ID'))
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="Ascend",
                        device_id=devid,
                        enable_mem_reuse=True,
                        enable_task_sink=True)
    #BertCLSTrain for classification
    #BertNERTrain for sequence labeling
    if cfg.task == 'NER':
        if cfg.use_crf:
            netwithloss = BertNER(bert_net_cfg,
                                  True,
                                  num_labels=len(tag_to_index),
                                  use_crf=True,
                                  tag_to_index=tag_to_index,
                                  dropout_prob=0.1)
        else:
            netwithloss = BertNER(bert_net_cfg,
                                  True,
                                  num_labels=cfg.num_labels,
                                  dropout_prob=0.1)
    else:
        netwithloss = BertCLS(bert_net_cfg,
                              True,
                              num_labels=cfg.num_labels,
                              dropout_prob=0.1)
    dataset = get_dataset(bert_net_cfg.batch_size, cfg.epoch_num)
    # optimizer
    steps_per_epoch = dataset.get_dataset_size()
    if cfg.optimizer == 'AdamWeightDecayDynamicLR':
        optimizer = AdamWeightDecayDynamicLR(
            netwithloss.trainable_params(),
            decay_steps=steps_per_epoch * cfg.epoch_num,
            learning_rate=cfg.AdamWeightDecayDynamicLR.learning_rate,
            end_learning_rate=cfg.AdamWeightDecayDynamicLR.end_learning_rate,
            power=cfg.AdamWeightDecayDynamicLR.power,
            warmup_steps=steps_per_epoch,
            weight_decay=cfg.AdamWeightDecayDynamicLR.weight_decay,
            eps=cfg.AdamWeightDecayDynamicLR.eps)
    elif cfg.optimizer == 'Lamb':
        optimizer = Lamb(netwithloss.trainable_params(),
                         decay_steps=steps_per_epoch * cfg.epoch_num,
                         start_learning_rate=cfg.Lamb.start_learning_rate,
                         end_learning_rate=cfg.Lamb.end_learning_rate,
                         power=cfg.Lamb.power,
                         warmup_steps=steps_per_epoch,
                         decay_filter=cfg.Lamb.decay_filter)
    elif cfg.optimizer == 'Momentum':
        optimizer = Momentum(netwithloss.trainable_params(),
                             learning_rate=cfg.Momentum.learning_rate,
                             momentum=cfg.Momentum.momentum)
    else:
        raise Exception("Optimizer not supported.")
    # load checkpoint into network
    ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch,
                                   keep_checkpoint_max=1)
    ckpoint_cb = ModelCheckpoint(prefix=cfg.ckpt_prefix,
                                 directory=cfg.ckpt_dir,
                                 config=ckpt_config)
    param_dict = load_checkpoint(cfg.pre_training_ckpt)
    load_param_into_net(netwithloss, param_dict)

    update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2**32,
                                             scale_factor=2,
                                             scale_window=1000)
    netwithgrads = BertFinetuneCell(netwithloss,
                                    optimizer=optimizer,
                                    scale_update_cell=update_cell)
    model = Model(netwithgrads)
    model.train(cfg.epoch_num, dataset, callbacks=[LossCallBack(), ckpoint_cb])
    D.release()
Exemplo n.º 9
0
def test(cloud_args=None):
    """
    network eval function. Get top1 and top5 ACC from classification.
    The result will be save at [./outputs] by default.
    """
    args = parse_args(cloud_args)

    context.set_context(mode=context.GRAPH_MODE,
                        device_target=args.device_target,
                        save_graphs=True)
    if args.device_target == 'Ascend':
        devid = int(os.getenv('DEVICE_ID'))
        context.set_context(device_id=devid)

    # init distributed
    if args.is_distributed:
        init()
        args.rank = get_rank()
        args.group_size = get_group_size()

    args.outputs_dir = os.path.join(
        args.log_path,
        datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))

    args.logger = get_logger(args.outputs_dir, args.rank)
    args.logger.save_args(args)

    # network
    args.logger.important_info('start create network')
    if os.path.isdir(args.pretrained):
        models = list(glob.glob(os.path.join(args.pretrained, '*.ckpt')))

        f = lambda x: -1 * int(
            os.path.splitext(os.path.split(x)[-1])[0].split('-')[-1].split('_')
            [0])

        args.models = sorted(models, key=f)
    else:
        args.models = [
            args.pretrained,
        ]

    for model in args.models:
        de_dataset = classification_dataset(args.data_dir,
                                            image_size=args.image_size,
                                            per_batch_size=args.per_batch_size,
                                            max_epoch=1,
                                            rank=args.rank,
                                            group_size=args.group_size,
                                            mode='eval')
        eval_dataloader = de_dataset.create_tuple_iterator()
        network = DenseNet121(args.num_classes)

        param_dict = load_checkpoint(model)
        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
                continue
            elif key.startswith('network.'):
                param_dict_new[key[8:]] = values
            else:
                param_dict_new[key] = values
        load_param_into_net(network, param_dict_new)
        args.logger.info('load model {} success'.format(model))

        if args.device_target == 'Ascend':
            network.add_flags_recursive(fp16=True)

        img_tot = 0
        top1_correct = 0
        top5_correct = 0
        network.set_train(False)
        for data, gt_classes in eval_dataloader:
            output = network(Tensor(data, mstype.float32))
            output = output.asnumpy()
            gt_classes = gt_classes.asnumpy()

            top1_output = np.argmax(output, (-1))
            top5_output = np.argsort(output)[:, -5:]

            t1_correct = np.equal(top1_output, gt_classes).sum()
            top1_correct += t1_correct
            top5_correct += get_top5_acc(top5_output, gt_classes)
            img_tot += args.per_batch_size

        results = [[top1_correct], [top5_correct], [img_tot]]
        args.logger.info('before results={}'.format(results))
        if args.is_distributed:
            results = generate_results(model, args.rank, args.group_size,
                                       top1_correct, top5_correct, img_tot)
            results = np.array(results)
        else:
            results = np.array(results)

        args.logger.info('after results={}'.format(results))
        top1_correct = results[0, 0]
        top5_correct = results[1, 0]
        img_tot = results[2, 0]
        acc1 = 100.0 * top1_correct / img_tot
        acc5 = 100.0 * top5_correct / img_tot
        args.logger.info(
            'after allreduce eval: top1_correct={}, tot={}, acc={:.2f}%'.
            format(top1_correct, img_tot, acc1))
        args.logger.info(
            'after allreduce eval: top5_correct={}, tot={}, acc={:.2f}%'.
            format(top5_correct, img_tot, acc5))
    if args.is_distributed:
        release()
Exemplo n.º 10
0
 def _release_parallel(self):
     release()
Exemplo n.º 11
0
def test(cloud_args=None):
    """
    network eval function. Get top1 and top5 ACC from classification.
    The result will be save at [./outputs] by default.
    """
    args = parse_args(cloud_args)

    # init distributed
    if args.is_distributed:
        init()
        args.rank = get_rank()
        args.group_size = get_group_size()

    args.outputs_dir = os.path.join(
        args.log_path,
        datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))

    args.logger = get_logger(args.outputs_dir, args.rank)
    args.logger.save_args(args)

    # network
    args.logger.important_info('start create network')
    if os.path.isdir(args.pretrained):
        models = list(glob.glob(os.path.join(args.pretrained, '*.ckpt')))

        f = lambda x: -1 * int(
            os.path.splitext(os.path.split(x)[-1])[0].split('-')[-1].split('_')
            [0])

        args.models = sorted(models, key=f)
    else:
        args.models = [
            args.pretrained,
        ]

    for model in args.models:
        de_dataset = classification_dataset(args.data_dir,
                                            image_size=args.image_size,
                                            per_batch_size=args.per_batch_size,
                                            max_epoch=1,
                                            rank=args.rank,
                                            group_size=args.group_size,
                                            mode='eval')
        eval_dataloader = de_dataset.create_tuple_iterator()
        network = DenseNet121(args.num_classes)

        param_dict = load_checkpoint(model)
        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
                continue
            elif key.startswith('network.'):
                param_dict_new[key[8:]] = values
            else:
                param_dict_new[key] = values
        load_param_into_net(network, param_dict_new)
        args.logger.info('load model {} success'.format(model))

        network.add_flags_recursive(fp16=True)

        img_tot = 0
        top1_correct = 0
        top5_correct = 0
        network.set_train(False)
        for data, gt_classes in eval_dataloader:
            output = network(Tensor(data, mstype.float32))
            output = output.asnumpy()
            gt_classes = gt_classes.asnumpy()

            top1_output = np.argmax(output, (-1))
            top5_output = np.argsort(output)[:, -5:]

            t1_correct = np.equal(top1_output, gt_classes).sum()
            top1_correct += t1_correct
            top5_correct += get_top5_acc(top5_output, gt_classes)
            img_tot += args.per_batch_size

        results = [[top1_correct], [top5_correct], [img_tot]]
        args.logger.info('before results={}'.format(results))
        if args.is_distributed:
            model_md5 = model.replace('/', '')
            tmp_dir = '../cache'
            if not os.path.exists(tmp_dir):
                os.mkdir(tmp_dir)
            top1_correct_npy = '{}/top1_rank_{}_{}.npy'.format(
                tmp_dir, args.rank, model_md5)
            top5_correct_npy = '{}/top5_rank_{}_{}.npy'.format(
                tmp_dir, args.rank, model_md5)
            img_tot_npy = '{}/img_tot_rank_{}_{}.npy'.format(
                tmp_dir, args.rank, model_md5)
            np.save(top1_correct_npy, top1_correct)
            np.save(top5_correct_npy, top5_correct)
            np.save(img_tot_npy, img_tot)
            while True:
                rank_ok = True
                for other_rank in range(args.group_size):
                    top1_correct_npy = '{}/top1_rank_{}_{}.npy'.format(
                        tmp_dir, other_rank, model_md5)
                    top5_correct_npy = '{}/top5_rank_{}_{}.npy'.format(
                        tmp_dir, other_rank, model_md5)
                    img_tot_npy = '{}/img_tot_rank_{}_{}.npy'.format(
                        tmp_dir, other_rank, model_md5)
                    if not os.path.exists(top1_correct_npy) or not os.path.exists(top5_correct_npy) \
                       or not os.path.exists(img_tot_npy):
                        rank_ok = False
                if rank_ok:
                    break

            top1_correct_all = 0
            top5_correct_all = 0
            img_tot_all = 0
            for other_rank in range(args.group_size):
                top1_correct_npy = '{}/top1_rank_{}_{}.npy'.format(
                    tmp_dir, other_rank, model_md5)
                top5_correct_npy = '{}/top5_rank_{}_{}.npy'.format(
                    tmp_dir, other_rank, model_md5)
                img_tot_npy = '{}/img_tot_rank_{}_{}.npy'.format(
                    tmp_dir, other_rank, model_md5)
                top1_correct_all += np.load(top1_correct_npy)
                top5_correct_all += np.load(top5_correct_npy)
                img_tot_all += np.load(img_tot_npy)
            results = [[top1_correct_all], [top5_correct_all], [img_tot_all]]
            results = np.array(results)

        else:
            results = np.array(results)

        args.logger.info('after results={}'.format(results))
        top1_correct = results[0, 0]
        top5_correct = results[1, 0]
        img_tot = results[2, 0]
        acc1 = 100.0 * top1_correct / img_tot
        acc5 = 100.0 * top5_correct / img_tot
        args.logger.info(
            'after allreduce eval: top1_correct={}, tot={}, acc={:.2f}%'.
            format(top1_correct, img_tot, acc1))
        args.logger.info(
            'after allreduce eval: top5_correct={}, tot={}, acc={:.2f}%'.
            format(top5_correct, img_tot, acc5))
    if args.is_distributed:
        release()
Exemplo n.º 12
0
    def analyse(self):
        """
        Collect and analyse performance data, called after training or during training.

        Examples:
            >>> from mindinsight.profiler import Profiler
            >>> context.set_context(mode=context.GRAPH_MODE, device_target=“Ascend”,
            >>>                     device_id=int(os.environ["DEVICE_ID"]))
            >>> profiler = Profiler(subgraph='all', is_detail=True, is_show_op_path=False, output_path='./data')
            >>> model = Model(train_network)
            >>> dataset = get_dataset()
            >>> model.train(2, dataset)
            >>> profiler.analyse()
        """

        try:
            from mindspore.communication.management import release
            release()
        except ImportError:
            logger.error("Profiling: fail to import release from mindspore.")

        logger.info("begin profiler analyse")

        job_id = self._get_profiling_job_id()
        if not job_id:
            msg = ("Fail to get profiling job, please check whether job dir was generated under path %s" \
                   % PROFILING_LOG_BASE_PATH)
            raise RuntimeError(msg)

        logger.info("Profiling: job id is %s ", job_id)

        source_path = os.path.join(PROFILING_LOG_BASE_PATH, job_id)
        # parse hwts.log.data.45.dev file, and get task profiling data
        hwts_output_filename = self._hwts_output_filename_target + self._dev_id + ".txt"
        hwts_output_filename = os.path.join(self._output_path, hwts_output_filename)
        hwtslog_parser = HWTSLogParser(source_path, hwts_output_filename)
        result = hwtslog_parser.execute()
        if not result:
            logger.error("Profiling: fail to parse hwts log file.")
            return

        # parse Framework file, and get the relation of op and tasks
        framework_parser = FrameworkParser(job_id, self._dev_id, self._output_path)
        framework_parser.parse()
        op_task_dict = framework_parser.to_task_id_full_op_name_dict()
        if not op_task_dict:
            logger.error("Profiling: fail to parse framework files.")
            return

        # get op compute time from hwts data and framework data, write output_op_compute_time.txt
        opcompute_output_filename = self._opcompute_output_filename_target + self._dev_id + ".txt"
        opcompute_output_filename = os.path.join(self._output_path, opcompute_output_filename)
        optime_parser = OPComputeTimeParser(hwts_output_filename, opcompute_output_filename, op_task_dict)
        optime_parser.execute()

        # parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt
        output_data_preprocess_aicpu = self._aicpu_op_output_filename_target + self._dev_id + ".txt"
        output_data_preprocess_aicpu = os.path.join(self._output_path, output_data_preprocess_aicpu)
        try:
            aicpu_data_parser = DataPreProcessParser(source_path, output_data_preprocess_aicpu)
            aicpu_data_parser.execute()
        except FileNotFoundError as err:
            logger.exception(err)

        # analyse op compute time info
        try:
            self._analyser_op_info()
        except MindInsightException as err:
            logger.error(err.message)