示例#1
0
    def get_npu_classifier(self):
        if self.config.chip == 'npu':
            from npu_bridge.estimator.npu.npu_config import NPURunConfig
            from npu_bridge.estimator.npu.npu_estimator import NPUEstimator

            ## refer to links
            # https://support.huaweicloud.com/tensorflowdevg-cann330alphaXtraining/atlasmprtg_13_0032.html
            # https://support.huaweicloud.com/tensorflowdevg-cann330alphaXtraining/atlastfadapi_07_0004.html
            if self.config.npu_profiling:
                from npu_bridge.estimator.npu.npu_config import ProfilingConfig
                work_dir = os.getcwd()
                profiling_dir = os.path.join(work_dir, "npu_profiling")
                if not os.path.exists(profiling_dir):
                    os.makedirs(profiling_dir)
                profiling_options = '{"output":"%s","task_trace": "on","aicpu": "on"}' % (
                    profiling_dir)
                profiling_config = ProfilingConfig(
                    enable_profiling=True, profiling_options=profiling_options)

                run_config = NPURunConfig(
                    profiling_config=profiling_config,  ## 配置profiling参数
                    save_checkpoints_steps=112590,
                    session_config=self.sess.estimator_config,
                    model_dir=self.config.log_dir,
                    keep_checkpoint_max=self.config.max_checkpoint_to_save)
            else:
                run_config = NPURunConfig(
                    save_checkpoints_steps=112590,
                    session_config=self.sess.estimator_config,
                    model_dir=self.config.log_dir,
                    keep_checkpoint_max=self.config.max_checkpoint_to_save)

            classifier = NPUEstimator(
                model_fn=self.model.get_estimator_model_func,
                config=run_config)
        elif self.config.chip in ['gpu', 'cpu']:
            run_config = tf.estimator.RunConfig(
                save_checkpoints_steps=112590,
                session_config=self.sess.estimator_config,
                model_dir=self.config.log_dir,
                keep_checkpoint_max=self.config.max_checkpoint_to_save)
            classifier = tf.estimator.Estimator(
                model_fn=self.model.get_estimator_model_func,
                config=run_config)

        training_hooks = []
        training_hooks.append(self.logger)

        return classifier, training_hooks
示例#2
0
 def _init_npu_estimator(self, sess_config):
     model_dir = self.get_local_worker_path()
     config = NPURunConfig(model_dir=model_dir,
                           save_checkpoints_steps=self.config.save_steps,
                           log_step_count_steps=self.config.report_freq,
                           session_config=sess_config,
                           enable_data_pre_proc=True,
                           iterations_per_loop=1)
     self.estimator = NPUEstimator(model_fn=self.model_fn, config=config)
示例#3
0
def _refresh_model_dir_and_session_config(config, model_dir):
    """Overwrite estimator config by `model_dir` and `session_config` if needed.

  Args:
    config: Original estimator config.
    model_dir: Estimator model checkpoint directory.

  Returns:
    Overwritten estimator config.

  Raises:
    ValueError: Model directory inconsistent between `model_dir` and `config`.
  """

    if config is None or not isinstance(config, NPURunConfig):
        raise ValueError(
            'config must be an instance of `NPURunConfig`, but provided %s.' %
            config)

    if config.session_config is None:
        session_config = run_config.get_default_session_config()
        config = NPURunConfig.replace(config, session_config=session_config)

    model_dir = compat_internal.path_to_str(model_dir)
    if model_dir is not None:
        if (getattr(config, 'model_dir', None) is not None
                and config.model_dir != model_dir):
            raise ValueError(
                "`model_dir` are set both in constructor and `NPURunConfig`, but with "
                "different values. In constructor: '{}', in `NPURunConfig`: "
                "'{}' ".format(model_dir, config.model_dir))
    if model_dir:
        config = NPURunConfig.replace(config, model_dir=model_dir)
    elif getattr(config, 'model_dir', None) is None:
        model_dir = tempfile.mkdtemp()
        logging.warning('Using temporary folder as model directory: %s',
                        model_dir)
        config = NPURunConfig.replace(config, model_dir=model_dir)

    return config
示例#4
0
def main(unused_argv):
    # set the log
    set_log()

    # Load training and eval data
    mnist = input_data.read_data_sets(FLAGS.data_dir, False)
    train_data = mnist.train.images  # Returns np.array
    train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
    eval_data = mnist.test.images  # Returns np.array
    eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)

    # profiling_config = ProfilingConfig(enable_profiling=True,
    #            enable_options=["training_trace","task_trace"])

    npu_config = NPURunConfig(iterations_per_loop=100,
                              save_checkpoints_steps=10,
                              model_dir=FLAGS.model_dir,
                              session_config=tf.ConfigProto(
                                  allow_soft_placement=True,
                                  log_device_placement=False)
                              #,profiling_config=profiling_config
                              )

    mnist_classifier = NPUEstimator(model_fn=cnn_model_fn,
                                    config=npu_config,
                                    params={},
                                    job_start_file=FLAGS.job_start_file)

    # Set up logging for predictions
    # Log the values in the "Softmax" tensor with label "probabilities"
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(
        tensors=tensors_to_log, every_n_iter=50)  # Train the model

    # print("Train the model...")
    mnist_classifier.train(input_fn=train_input_fn(train_data, train_labels),
                           steps=FLAGS.train_steps,
                           hooks=[logging_hook])

    # Evaluate the model and print results
    print("Evaluate the model...")
    eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn(
        eval_data, eval_labels),
                                             steps=FLAGS.eval_steps)
    print("eval_results: ", eval_results)
示例#5
0
    def get_npu_classifier(self):
        from npu_bridge.estimator.npu.npu_config import NPURunConfig
        from npu_bridge.estimator.npu.npu_estimator import NPUEstimator

        run_config = NPURunConfig(
            hcom_parallel=True,
            precision_mode="allow_mix_precision",
            enable_data_pre_proc=True,
            save_checkpoints_steps=self.args.nsteps_per_epoch,
            session_config=self.sess.estimator_config,
            model_dir=self.args.log_dir,
            iterations_per_loop=self.args.iterations_per_loop,
            keep_checkpoint_max=5)

        classifier = NPUEstimator(model_fn=self.model.get_estimator_model_func,
                                  config=run_config)

        training_hooks = []
        training_hooks.append(self.logger)

        return classifier, training_hooks