Python MultiGpuWrapper.size示例，utils.multi_gpu_wrapper.MultiGpuWrapper.size Python示例

示例#1

0

显示文件

文件： learner.py 项目： nus-comparch/PocketFlow

def setup_bnds_decay_rates(model_name, dataset_name):
    """ NOTE: The bnd_decay_rates here is mgw_size invariant """

    batch_size = FLAGS.batch_size if not FLAGS.enbl_multi_gpu else FLAGS.batch_size * mgw.size(
    )
    nb_batches_per_epoch = int(FLAGS.nb_smpls_train / batch_size)
    mgw_size = int(mgw.size()) if FLAGS.enbl_multi_gpu else 1
    init_lr = FLAGS.lrn_rate_init * FLAGS.batch_size * mgw_size / FLAGS.batch_size_norm if FLAGS.enbl_multi_gpu else FLAGS.lrn_rate_init
    if dataset_name == 'cifar_10':
        if model_name.startswith('resnet'):
            bnds = [nb_batches_per_epoch * 15, nb_batches_per_epoch * 40]
            decay_rates = [1e-3, 1e-4, 1e-5]
        elif model_name.startswith('lenet'):
            bnds = [nb_batches_per_epoch * 5, nb_batches_per_epoch * 30]
            decay_rates = [1e-4, 1e-5, 1e-6]
    elif dataset_name == 'ilsvrc_12':
        if model_name.startswith('resnet'):
            bnds = [nb_batches_per_epoch * 5, nb_batches_per_epoch * 20]
            decay_rates = [1e-4, 1e-5, 1e-6]
        elif model_name.startswith('mobilenet'):
            bnds = [nb_batches_per_epoch * 5, nb_batches_per_epoch * 30]
            decay_rates = [1e-4, 1e-5, 1e-6]
    finetune_steps = nb_batches_per_epoch * FLAGS.uql_quant_epochs
    init_lr = init_lr if FLAGS.enbl_warm_start else FLAGS.lrn_rate_init
    return init_lr, bnds, decay_rates, finetune_steps

示例#2

0

显示文件

文件： learner.py 项目： HatsuneMiku4/PocketFlow

    def __monitor_progress(self, summary, log_rslt, idx_iter, time_step):
        """Monitor the training progress.

        Args:
        * summary: summary protocol buffer
        * log_rslt: logging operations' results
        * idx_iter: index of the training iteration
        * time_step: time step between two summary operations
        """

        # write summaries for TensorBoard visualization
        self.sm_writer.add_summary(summary, idx_iter)

        # compute the training speed
        speed = FLAGS.batch_size * FLAGS.summ_step / time_step
        if FLAGS.enbl_multi_gpu:
            speed *= mgw.size()

        # display monitored statistics
        log_str = ' | '.join([
            '%s = %.4e' % (name, value)
            for name, value in zip(self.log_op_names, log_rslt)
        ])
        tf.logging.info('iter #%d: %s | speed = %.2f pics / sec' %
                        (idx_iter + 1, log_str, speed))

示例#3

0

显示文件

文件： mobilenet_at_ilsvrc12.py 项目： HatsuneMiku4/PocketFlow

    def setup_lrn_rate(self, global_step):
        """Setup the learning rate (and number of training iterations)."""

        batch_size = FLAGS.batch_size * (1 if not FLAGS.enbl_multi_gpu else
                                         mgw.size())
        if FLAGS.mobilenet_version == 1:
            nb_epochs = 100
            idxs_epoch = [30, 60, 80, 90]
            decay_rates = [1.0, 0.1, 0.01, 0.001, 0.0001]
            lrn_rate = setup_lrn_rate_piecewise_constant(
                global_step, batch_size, idxs_epoch, decay_rates)
            nb_iters = int(FLAGS.nb_smpls_train * nb_epochs *
                           FLAGS.nb_epochs_rat / batch_size)
        elif FLAGS.mobilenet_version == 2:
            nb_epochs = 412
            epoch_step = 2.5
            decay_rate = 0.98**epoch_step  # which is better, 0.98 OR (0.98 ** epoch_step)?
            lrn_rate = setup_lrn_rate_exponential_decay(
                global_step, batch_size, epoch_step, decay_rate)
            nb_iters = int(FLAGS.nb_smpls_train * nb_epochs *
                           FLAGS.nb_epochs_rat / batch_size)
        else:
            raise ValueError('invalid MobileNet version: {}'.format(
                FLAGS.mobilenet_version))

        return lrn_rate, nb_iters

示例#4

0

显示文件

文件： mobilenet_at_ilsvrc12.py 项目： YuanDianDian/PocketFlow

    def setup_lrn_rate(self, global_step):
        """Setup the learning rate (and number of training iterations)."""

        batch_size = FLAGS.batch_size * (1 if not FLAGS.enbl_multi_gpu else
                                         mgw.size())
        if FLAGS.mobilenet_version == 1:
            nb_epochs = 100
            nb_epochs = 412
            idxs_epoch = [12000, 20000]
            step_rate = [200, 200, 4000]
            epoch_step = setup_lrn_rate_piecewise_constant(
                global_step, batch_size, idxs_epoch, step_rate)
            decay_rates = [0.985, 0.980, 0.505]
            decay_rate = setup_lrn_rate_piecewise_constant(
                global_step, batch_size, idxs_epoch, decay_rates)
            lrn_rate = setup_lrn_rate_exponential_decay(
                global_step, batch_size, epoch_step, decay_rate)
            nb_iters = int(30000)
        elif FLAGS.mobilenet_version == 2:
            nb_epochs = 412
            epoch_step = 500
            decay_rate = 0.9  # which is better, 0.98 OR (0.98 ** epoch_step)?
            lrn_rate = setup_lrn_rate_exponential_decay(
                global_step, batch_size, epoch_step, decay_rate)
            nb_iters = int(15000)
        else:
            raise ValueError('invalid MobileNet version: {}'.format(
                FLAGS.mobilenet_version))

        return lrn_rate, nb_iters

示例#5

0

显示文件

文件： bit_optimizer.py 项目： HatsuneMiku4/PocketFlow

    def __monitor_progress(self, idx_iter, log_rslt, time_prev):
        if not self.__is_primary_worker():
            return None

        # display monitored statistics
        speed = FLAGS.batch_size * self.tune_global_disp_steps / (timer() - time_prev)
        if FLAGS.enbl_multi_gpu:
            speed *= mgw.size()

        if self.dataset_name == 'coco2017-pose':
            if FLAGS.enbl_dst:
                lrn_rate, dst_loss, model_loss, loss, total_loss, total_loss_ll_paf, total_loss_ll_heat, total_loss_ll = log_rslt[:8]
                tf.logging.info(
                    'iter #%d: lr = %e | dst_loss = %.4f | model_loss = %.4f | loss = %.4f | ll_paf = %.4f | ll_heat = %.4f | ll = %.4f | speed = %.2f pics / sec'
                    % (idx_iter + 1, lrn_rate, dst_loss, model_loss, loss, total_loss_ll_paf, total_loss_ll_heat, total_loss_ll, speed))
            else:
                lrn_rate, model_loss, loss, total_loss, total_loss_ll_paf, total_loss_ll_heat, total_loss_ll = log_rslt[:7]
                tf.logging.info(
                    'iter #%d: lr = %e | model_loss = %.4f | loss = %.4f | ll_paf = %.4f | ll_heat = %.4f | ll = %.4f | speed = %.2f pics / sec'
                    % (idx_iter + 1, lrn_rate, model_loss, loss, total_loss_ll_paf, total_loss_ll_heat, total_loss_ll, speed))

        if FLAGS.enbl_dst:
            lrn_rate, dst_loss, model_loss, loss, acc_top1, acc_top5 = log_rslt[0], log_rslt[1], log_rslt[2], log_rslt[3], log_rslt[4], log_rslt[5]
            tf.logging.info(
                'iter #%d: lr = %e | dst_loss = %e | model_loss = %e | loss = %e | acc_top1 = %e | acc_top5 = %e | speed = %.2f pics / sec '
                % (idx_iter + 1, lrn_rate, dst_loss, model_loss, loss, acc_top1, acc_top5, speed))
        else:
            lrn_rate, model_loss, loss, acc_top1, acc_top5 = log_rslt[0], log_rslt[1], log_rslt[2], log_rslt[3], log_rslt[4]
            tf.logging.info(
                'iter #%d: lr = %e | model_loss = %e | loss = %e | acc_top1 = %e | acc_top5 = %e| speed = %.2f pics / sec'
                % (idx_iter + 1, lrn_rate, model_loss, loss, acc_top1, acc_top5, speed))
        return timer()

示例#6

0

显示文件

    def __monitor_progress(self, summary, log_rslt, time_prev, idx_iter):
        # early break for non-primary workers
        if not self.is_primary_worker():
            return None

        # write summaries for TensorBoard visualization
        self.sm_writer.add_summary(summary, idx_iter)

        # display monitored statistics
        speed = FLAGS.batch_size * FLAGS.summ_step / (timer() - time_prev)
        if FLAGS.enbl_multi_gpu:
            speed *= mgw.size()

        if FLAGS.enbl_dst:
            lrn_rate, dst_loss, model_loss, loss, acc_top1, acc_top5 = log_rslt[0], \
            log_rslt[1], log_rslt[2], log_rslt[3], log_rslt[4], log_rslt[5]
            tf.logging.info('iter #%d: lr = %e | dst_loss = %.4f | model_loss = %.4f | loss = %.4f | acc_top1 = %.4f | acc_top5 = %.4f | speed = %.2f pics / sec' \
                % (idx_iter + 1, lrn_rate, dst_loss, model_loss, loss, acc_top1, acc_top5, speed))
        else:
            lrn_rate, model_loss, loss, acc_top1, acc_top5 = log_rslt[0], \
            log_rslt[1], log_rslt[2], log_rslt[3], log_rslt[4]
            tf.logging.info('iter #%d: lr = %e | model_loss = %.4f | loss = %.4f | acc_top1 = %.4f | acc_top5 = %.4f | speed = %.2f pics / sec' \
                % (idx_iter + 1, lrn_rate, model_loss, loss, acc_top1, acc_top5, speed))

        return timer()

示例#7

0

显示文件

  def setup_lrn_rate(self, global_step):
    """Setup the learning rate (and number of training iterations)."""

    nb_epochs = 100
    idxs_epoch = [30, 60, 80, 90]
    decay_rates = [1.0, 0.1, 0.01, 0.001, 0.0001]
    batch_size = FLAGS.batch_size * (1 if not FLAGS.enbl_multi_gpu else mgw.size())
    lrn_rate = setup_lrn_rate_piecewise_constant(global_step, batch_size, idxs_epoch, decay_rates)
    nb_iters = int(FLAGS.nb_smpls_train * nb_epochs * FLAGS.nb_epochs_rat / batch_size)

    return lrn_rate, nb_iters

示例#8

0

显示文件

文件： resnet_at_ilsvrc12.py 项目： YuanDianDian/PocketFlow

    def setup_lrn_rate(self, global_step):
        """Setup the learning rate (and number of training iterations)."""

        nb_epochs = 100
        idxs_epoch = [0.4, 0.8]
        decay_rates = [0.001, 0.0005, 0.0001]
        batch_size = FLAGS.batch_size * (1 if not FLAGS.enbl_multi_gpu else
                                         mgw.size())
        lrn_rate = setup_lrn_rate_piecewise_constant(global_step, batch_size,
                                                     idxs_epoch, decay_rates)
        nb_iters = int(12000)
        #nb_iters = int(200)
        return lrn_rate, nb_iters

示例#9

0

显示文件

def setup_lrn_rate(global_step, model_name, dataset_name):
    """Setup the learning rate for the given dataset.

  Args:
  * global_step: training iteration counter
  * model_name: model's name; must be one of ['lenet', 'resnet_*', 'mobilenet_v1', 'mobilenet_v2']
  * dataset_name: dataset's name; must be one of ['cifar_10', 'ilsvrc_12']

  Returns:
  * lrn_rate: learning rate
  * nb_batches: number of training mini-batches
  """

    # obtain the overall batch size across all GPUs
    if not FLAGS.enbl_multi_gpu:
        batch_size = FLAGS.batch_size
    else:
        batch_size = FLAGS.batch_size * mgw.size()

    # choose a learning rate protocol according to the model & dataset combination
    global_step = tf.cast(global_step, tf.int32)
    if dataset_name == 'cifar_10':
        if model_name == 'lenet':
            lrn_rate, nb_batches = setup_lrn_rate_lenet_cifar10(
                global_step, batch_size)
        elif model_name.startswith('resnet'):
            lrn_rate, nb_batches = setup_lrn_rate_resnet_cifar10(
                global_step, batch_size)
        else:
            raise NotImplementedError('model: {} / dataset: {}'.format(
                model_name, dataset_name))
    elif dataset_name == 'ilsvrc_12':
        if model_name.startswith('resnet'):
            lrn_rate, nb_batches = setup_lrn_rate_resnet_ilsvrc12(
                global_step, batch_size)
        elif model_name.startswith('mobilenet_v1'):
            lrn_rate, nb_batches = setup_lrn_rate_mobilenet_v1_ilsvrc12(
                global_step, batch_size)
        elif model_name.startswith('mobilenet_v2'):
            lrn_rate, nb_batches = setup_lrn_rate_mobilenet_v2_ilsvrc12(
                global_step, batch_size)
        else:
            raise NotImplementedError('model: {} / dataset: {}'.format(
                model_name, dataset_name))
    else:
        raise NotImplementedError('dataset: ' + dataset_name)

    return lrn_rate, nb_batches

示例#10

0

显示文件

文件： learner.py 项目： HatsuneMiku4/PocketFlow

    def __monitor_progress(self, summary, log_rslt, time_prev, idx_iter):
        # early break for non-primary workers
        if not self.is_primary_worker():
            return None

        # write summaries for TensorBoard visualization
        self.sm_writer.add_summary(summary, idx_iter)

        # display monitored statistics
        speed = FLAGS.batch_size * FLAGS.summ_step / (timer() - time_prev)
        if FLAGS.enbl_multi_gpu:
            speed *= mgw.size()

        # NOTE: for cifar-10, acc_top5 is 0.
        if self.dataset_name == 'coco2017-pose':
            if FLAGS.enbl_dst:
                lrn_rate, dst_loss, model_loss, loss, total_loss, total_loss_ll_paf, total_loss_ll_heat, total_loss_ll = log_rslt[:
                                                                                                                                  8]
                tf.logging.info(
                    'iter #%d: lr = %e | dst_loss = %.4f | model_loss = %.4f | loss = %.4f | ll_paf = %.4f | ll_heat = %.4f | ll = %.4f | speed = %.2f pics / sec'
                    % (idx_iter + 1, lrn_rate, dst_loss, model_loss, loss,
                       total_loss_ll_paf, total_loss_ll_heat, total_loss_ll,
                       speed))
            else:
                lrn_rate, model_loss, loss, total_loss, total_loss_ll_paf, total_loss_ll_heat, total_loss_ll = log_rslt[:
                                                                                                                        7]
                tf.logging.info(
                    'iter #%d: lr = %e | model_loss = %.4f | loss = %.4f | ll_paf = %.4f | ll_heat = %.4f | ll = %.4f | speed = %.2f pics / sec'
                    % (idx_iter + 1, lrn_rate, model_loss, loss,
                       total_loss_ll_paf, total_loss_ll_heat, total_loss_ll,
                       speed))
        else:
            if FLAGS.enbl_dst:
                lrn_rate, dst_loss, model_loss, loss, acc_top1, acc_top5 = log_rslt[:
                                                                                    6]
                tf.logging.info(
                    'iter #%d: lr = %e | dst_loss = %.4f | model_loss = %.4f | loss = %.4f | acc_top1 = %.4f | acc_top5 = %.4f | speed = %.2f pics / sec'
                    % (idx_iter + 1, lrn_rate, dst_loss, model_loss, loss,
                       acc_top1, acc_top5, speed))
            else:
                lrn_rate, model_loss, loss, acc_top1, acc_top5 = log_rslt[:5]
                tf.logging.info(
                    'iter #%d: lr = %e | model_loss = %.4f | loss = %.4f | acc_top1 = %.4f | acc_top5 = %.4f | speed = %.2f pics / sec'
                    % (idx_iter + 1, lrn_rate, model_loss, loss, acc_top1,
                       acc_top5, speed))

        return timer()

示例#11

0

显示文件

  def __monitor_progress(self, summary, log_rslt):
    # early break for non-primary workers
    if not self.__is_primary_worker():
      return
    # write summaries for TensorBoard visualization
    self.sm_writer.add_summary(summary, self.idx_iter)

    # display monitored statistics
    lrn_rate, loss, accuracy = log_rslt[0], log_rslt[1], log_rslt[2]
    speed = FLAGS.batch_size * FLAGS.summ_step / (timer() - self.time_prev)
    if FLAGS.enbl_multi_gpu:
      speed *= mgw.size()
    tf.logging.info('iter #%d: lr = %e | loss = %e | speed = %.2f pics / sec'
                    % (self.idx_iter + 1, lrn_rate, loss, speed))
    for i in range(len(self.accuracy_keys)):
      tf.logging.info('{} = {}'.format(self.accuracy_keys[i], accuracy[i]))
    self.time_prev = timer()

示例#12

0

显示文件

文件： abstract_dataset.py 项目： tsekitsi/PocketFlow

    def build(self, enbl_trn_val_split=False):
        '''Build iterator(s) for tf.data.Dataset() object.

    Args:
    * enbl_trn_val_split: whether to split into training & validation subsets

    Returns:
    * iterator_trn: iterator for the training subset
    * iterator_val: iterator for the validation subset
      OR
    * iterator: iterator for the chosen subset (training OR testing)

    Example:
      # build iterator(s)
      dataset = xxxxDataset(is_train=True)  # TF operations are not created
      iterator = dataset.build()            # TF operations are created
          OR
      iterator_trn, iterator_val = dataset.build(enbl_trn_val_split=True)  # for dataset-train only

      # use the iterator to obtain a mini-batch of images & labels
      images, labels = iterator.get_next()
    '''

        # obtain list of data files' names
        filenames = tf.data.Dataset.list_files(self.file_pattern, shuffle=True)
        if self.enbl_shard:
            filenames = filenames.shard(mgw.size(), mgw.rank())

        # create a tf.data.Dataset from list of files
        dataset = filenames.apply(
            tf.contrib.data.parallel_interleave(
                self.dataset_fn, cycle_length=FLAGS.cycle_length))
        dataset = dataset.map(self.parse_fn,
                              num_parallel_calls=FLAGS.nb_threads)

        # create iterators for training & validation subsets separately
        if self.is_train and enbl_trn_val_split:
            iterator_val = self.__make_iterator(
                dataset.take(FLAGS.nb_smpls_val))
            iterator_trn = self.__make_iterator(
                dataset.skip(FLAGS.nb_smpls_val))
            return iterator_trn, iterator_val

        return self.__make_iterator(dataset)

示例#13

0

显示文件

文件： pr_optimizer.py 项目： jiajunhua/Tencent-PocketFlow

    def __retrain_network(self):
        """Retrain the network with layerwise regression & network fine-tuning."""

        # determine how many iterations to be executed for regression & fine-tuning
        nb_workers = mgw.size() if FLAGS.enbl_multi_gpu else 1
        nb_iters_rg = int(math.ceil(FLAGS.ws_nb_iters_rg / nb_workers))
        nb_iters_ft = int(math.ceil(FLAGS.ws_nb_iters_ft / nb_workers))

        # re-train the network with layerwise regression
        time_prev = timer()
        for rg_train_op in self.rg_train_ops:
            for __ in range(nb_iters_rg):
                self.sess_train.run(rg_train_op)
        time_rg = timer() - time_prev

        # re-train the network with global fine-tuning
        time_prev = timer()
        for __ in range(nb_iters_ft):
            self.sess_train.run(self.ft_train_op)
        time_ft = timer() - time_prev

        # display the time consumption
        tf.logging.info('time consumption: %.4f (s) - RG | %.4f (s) - FT' %
                        (time_rg, time_ft))

示例#14

0

显示文件

  def __init__(self,
               dataset_name,
               weights,
               statistics,
               bit_placeholders,
               ops,
               layerwise_tune_list,
               sess_train,
               sess_eval,
               saver_train,
               saver_eval,
               barrier_fn):
    """ By passing the ops in the learner, we do not need to build the graph
    again for training and testing.

    Args:
    * dataset_name: a string that indicates which dataset to use
    * weights: a list of Tensors, the weights of networks to quantize
    * statistics: a dict, recording the number of weights, activations e.t.c.
    * bit_placeholders: a dict of placeholder Tensors, the input of bits
    * ops: a dict of ops, including trian_op, eval_op e.t.c.
    * layerwise_tune_list: a tuple, in which [0] records the layerwise op and
                          [1] records the layerwise l2_norm
    * sess_train: a session for train
    * sess_eval: a session for eval
    * saver_train: a Tensorflow Saver for the training graph
    * saver_eval: a Tensorflow Saver for the eval graph
    * barrier_fn: a function that implements barrier
    """
    self.dataset_name = dataset_name
    self.weights = weights
    self.statistics = statistics
    self.bit_placeholders = bit_placeholders
    self.ops = ops
    self.layerwise_tune_ops, self.layerwise_diff = \
        layerwise_tune_list[0], layerwise_tune_list[1]
    self.sess_train = sess_train
    self.sess_eval = sess_eval
    self.saver_train = saver_train
    self.saver_eval = saver_eval
    self.auto_barrier = barrier_fn

    self.total_num_weights = sum(self.statistics['num_weights'])
    self.total_bits = self.total_num_weights * FLAGS.uql_equivalent_bits

    self.w_rl_helper = RLHelper(self.sess_train,
                                self.total_bits,
                                self.statistics['num_weights'],
                                self.weights,
                                random_layers=FLAGS.uql_enbl_random_layers)

    self.mgw_size = int(mgw.size()) if FLAGS.enbl_multi_gpu else 1
    self.tune_global_steps = int(FLAGS.uql_tune_global_steps / self.mgw_size)
    self.tune_global_disp_steps = int(FLAGS.uql_tune_disp_steps / self.mgw_size)

    # build the rl trianing graph
    with tf.Graph().as_default():
      config = tf.ConfigProto()
      config.gpu_options.visible_device_list = str(mgw.local_rank() \
          if FLAGS.enbl_multi_gpu else 0)
      self.sess_rl = tf.Session(config=config)

      # train an RL agent through multiple roll-outs
      self.s_dims = self.w_rl_helper.s_dims
      self.a_dims = 1
      buff_size = len(self.weights) * int(FLAGS.uql_nb_rlouts // 4)
      self.agent = DdpgAgent(self.sess_rl,
                             self.s_dims,
                             self.a_dims,
                             FLAGS.uql_nb_rlouts,
                             buff_size,
                             a_min=0.,
                             a_max=FLAGS.uql_w_bit_max-FLAGS.uql_w_bit_min)

示例#15

0

显示文件

文件： learner.py 项目： ziaridoy20/PocketFlow

  def __choose_discr_chns(self):  # pylint: disable=too-many-locals
    """Choose discrimination-aware channels."""

    # select the most discriminative channels through multiple stages
    nb_workers = mgw.size() if FLAGS.enbl_multi_gpu else 1
    nb_iters_block = int(FLAGS.dcp_nb_iters_block / nb_workers)
    nb_iters_layer = int(FLAGS.dcp_nb_iters_layer / nb_workers)
    for idx_block in range(self.nb_blocks):
      # fine-tune the current block
      for idx_iter in range(nb_iters_block):
        if (idx_iter + 1) % FLAGS.summ_step != 0:
          self.sess_train.run(self.block_train_ops[idx_block])
        else:
          summary, __ = self.sess_train.run([self.summary_op, self.block_train_ops[idx_block]])
          if self.is_primary_worker('global'):
            self.sm_writer.add_summary(summary, nb_iters_block * idx_block + idx_iter)

      # select the most discriminative channels for each layer
      for idx_layer in range(1, self.nb_layers):  # do not prune the first layer
        if self.idxs_layer_to_block[idx_layer] != idx_block:
          continue

        # initialize the mask as all channels are pruned
        mask_shape = self.sess_train.run(tf.shape(self.masks[idx_layer]))
        tf.logging.info('layer #{}: mask\'s shape is {}'.format(idx_layer, mask_shape))
        nb_chns = mask_shape[2]
        grad_norm_mask = np.ones(nb_chns)
        mask_vec = np.sum(self.sess_train.run(self.masks[idx_layer]), axis=(0, 1, 3))
        prune_ratio = 1.0 - float(np.count_nonzero(mask_vec)) / mask_vec.size
        tf.logging.info('layer #%d: prune_ratio = %.4f' % (idx_layer, prune_ratio))
        is_first_entry = True
        while is_first_entry or prune_ratio > FLAGS.dcp_prune_ratio:
          # choose the most important channel and then update the mask
          grad_norm = self.sess_train.run(self.grad_norms[idx_layer])
          idx_chn_input = np.argmax(grad_norm * grad_norm_mask)
          grad_norm_mask[idx_chn_input] = 0.0
          tf.logging.info('adding channel #%d to the non-pruned set' % idx_chn_input)
          mask_delta = np.zeros(mask_shape)
          mask_delta[:, :, idx_chn_input, :] = 1.0
          if is_first_entry:
            is_first_entry = False
            self.sess_train.run(self.mask_init_ops[idx_layer])
          self.sess_train.run(self.mask_updt_ops[idx_layer],
                              feed_dict={self.mask_deltas[idx_layer]: mask_delta})
          self.sess_train.run(self.prune_ops[idx_layer])

          # fine-tune the current layer
          for idx_iter in range(nb_iters_layer):
            self.sess_train.run(self.layer_train_ops[idx_layer])

          # re-compute the pruning ratio
          mask_vec = np.sum(self.sess_train.run(self.masks[idx_layer]), axis=(0, 1, 3))
          prune_ratio = 1.0 - float(np.count_nonzero(mask_vec)) / mask_vec.size
          tf.logging.info('layer #%d: prune_ratio = %.4f' % (idx_layer, prune_ratio))

      # compute overall pruning ratios
      if self.is_primary_worker('global'):
        log_rslt = self.sess_train.run(self.log_op)
        log_str = ' | '.join(['%s = %.4e' % (name, value)
                              for name, value in zip(self.log_op_names, log_rslt)])
        tf.logging.info('block #%d: %s' % (idx_block + 1, log_str))

示例#16

0

显示文件

    def __choose_channels(self):  # pylint: disable=too-many-locals
        """Choose channels for all convolutional layers."""

        # obtain each layer's pruning ratio
        if FLAGS.cpg_prune_ratio_type == 'uniform':
            ratio_list = [FLAGS.cpg_prune_ratio] * self.nb_layers
            if FLAGS.cpg_skip_ht_layers:
                ratio_list[0] = 0.0
                ratio_list[-1] = 0.0
        elif FLAGS.cpg_prune_ratio_type == 'list':
            with open(FLAGS.cpg_prune_ratio_file, 'r') as i_file:
                i_line = i_file.readline().strip()
                ratio_list = [float(sub_str) for sub_str in i_line.split(',')]
        else:
            raise ValueError('unrecognized pruning ratio type: ' +
                             FLAGS.cpg_prune_ratio_type)

        # select channels for all convolutional layers
        nb_workers = mgw.size() if FLAGS.enbl_multi_gpu else 1
        nb_iters_layer = int(FLAGS.cpg_nb_iters_layer / nb_workers)
        for idx_layer in range(self.nb_layers):
            # skip if no pruning is required
            if ratio_list[idx_layer] == 0.0:
                continue
            if self.is_primary_worker('global'):
                tf.logging.info('layer #%d: pr = %.2f (target)' %
                                (idx_layer, ratio_list[idx_layer]))
                tf.logging.info('mask.shape = {}'.format(
                    self.masks[idx_layer].shape))

            # select channels for the current convolutional layer
            time_prev = timer()
            reg_loss_prev = 0.0
            lrn_rate_pgd = FLAGS.cpg_lrn_rate_pgd_init
            for idx_iter in range(nb_iters_layer):
                # take a stochastic proximal gradient descent step
                prune_perctl = ratio_list[idx_layer] * 100.0 * (
                    idx_iter + 1) / nb_iters_layer
                __, reg_loss = self.sess_train.run(
                    [
                        self.layer_ops[idx_layer]['prune'],
                        self.reg_losses[idx_layer]
                    ],
                    feed_dict={
                        self.lrn_rates_pgd[idx_layer]: lrn_rate_pgd,
                        self.prune_perctls[idx_layer]: prune_perctl
                    })
                mask = self.sess_train.run(self.masks[idx_layer])
                if self.is_primary_worker('global'):
                    nb_chns_nnz = np.count_nonzero(np.sum(mask,
                                                          axis=(0, 1, 3)))
                    tf.logging.info(
                        'iter %d: nnz-chns = %d | loss = %.2e | lr = %.2e | percentile = %.2f'
                        % (idx_iter + 1, nb_chns_nnz, reg_loss, lrn_rate_pgd,
                           prune_perctl))

                # adjust the learning rate
                if reg_loss < reg_loss_prev:
                    lrn_rate_pgd *= FLAGS.cpg_lrn_rate_pgd_incr
                else:
                    lrn_rate_pgd *= FLAGS.cpg_lrn_rate_pgd_decr
                reg_loss_prev = reg_loss

            # fine-tune with selected channels only
            self.sess_train.run(self.mask_updt_ops[idx_layer])
            for idx_iter in range(nb_iters_layer):
                __, reg_loss = self.sess_train.run([
                    self.layer_ops[idx_layer]['finetune'],
                    self.reg_losses[idx_layer]
                ])
                mask = self.sess_train.run(self.masks[idx_layer])
                if self.is_primary_worker('global'):
                    nb_chns_nnz = np.count_nonzero(np.sum(mask,
                                                          axis=(0, 1, 3)))
                    tf.logging.info('iter %d: nnz-chns = %d | loss = %.2e' %
                                    (idx_iter + 1, nb_chns_nnz, reg_loss))

            # re-compute the pruning ratio
            mask_vec = np.mean(np.square(
                self.sess_train.run(self.masks[idx_layer])),
                               axis=(0, 1, 3))
            prune_ratio = 1.0 - float(
                np.count_nonzero(mask_vec)) / mask_vec.size
            if self.is_primary_worker('global'):
                tf.logging.info('layer #%d: pr = %.2f (actual) | time = %.2f' %
                                (idx_layer, prune_ratio, timer() - time_prev))

        # compute overall pruning ratios
        if self.is_primary_worker('global'):
            log_rslt = self.sess_train.run(self.log_op)
            log_str = ' | '.join([
                '%s = %.4e' % (name, value)
                for name, value in zip(self.log_op_names, log_rslt)
            ])

示例#17

0

显示文件

文件： learner.py 项目： yueyedeai/PocketFlow

    def __choose_channels(self):  # pylint: disable=too-many-locals
        """Choose channels for all convolutional layers."""

        # obtain each layer's pruning ratio
        prune_ratios = [FLAGS.cpr_prune_ratio] * self.nb_conv_layers
        if FLAGS.cpr_skip_frst_layer:
            prune_ratios[0] = 0.0
        if FLAGS.cpr_skip_last_layer:
            prune_ratios[-1] = 0.0

        # select channels for all the convolutional layers
        nb_workers = mgw.size() if FLAGS.enbl_multi_gpu else 1
        for idx_layer, (prune_ratio, conv_info) in enumerate(
                zip(prune_ratios, self.conv_info_list)):
            # skip if no pruning is required
            if prune_ratio == 0.0:
                continue
            if self.is_primary_worker('global'):
                tf.logging.info('layer #%d: pr = %.2f (target)' %
                                (idx_layer, prune_ratio))
                tf.logging.info('kernel shape = {}'.format(
                    conv_info['conv_krnl_prnd'].shape))

            # extract the current layer's information
            conv_krnl_full = self.sess_train.run(conv_info['conv_krnl_full'])
            conv_krnl_prnd = self.sess_train.run(conv_info['conv_krnl_prnd'])
            conv_krnl_prnd_ph = conv_info['conv_krnl_prnd_ph']
            update_op = conv_info['update_op']
            input_full_tf = conv_info['input_full']
            input_prnd_tf = conv_info['input_prnd']
            output_full_tf = conv_info['output_full']
            output_prnd_tf = conv_info['output_prnd']
            strides = conv_info['strides']
            padding = conv_info['padding']
            nb_chns_input = conv_krnl_prnd.shape[2]

            # sample inputs & outputs through multiple mini-batches
            nb_iters_smpl = int(
                math.ceil(float(FLAGS.cpr_nb_smpl_insts) / FLAGS.batch_size))
            inputs_list = [[] for __ in range(nb_chns_input)]
            outputs_list = []
            for idx_iter in range(nb_iters_smpl):
                inputs_full, inputs_prnd, outputs_full, outputs_prnd = \
                  self.sess_train.run([input_full_tf, input_prnd_tf, output_full_tf, output_prnd_tf])
                inputs_smpl, outputs_smpl = self.__smpl_inputs_n_outputs(
                    conv_krnl_full, conv_krnl_prnd, inputs_full, inputs_prnd,
                    outputs_full, outputs_prnd, strides, padding)
                for idx_chn_input in range(nb_chns_input):
                    inputs_list[idx_chn_input] += [inputs_smpl[idx_chn_input]]
                outputs_list += [outputs_smpl]
            inputs_np_list = [np.vstack(x) for x in inputs_list]
            outputs_np = np.vstack(outputs_list)

            # choose channels via solving the sparsity-constrained regression problem
            conv_krnl_prnd = self.__solve_sparse_regression(
                inputs_np_list, outputs_np, conv_krnl_prnd, prune_ratio)
            self.sess_train.run(update_op,
                                feed_dict={conv_krnl_prnd_ph: conv_krnl_prnd})

            # evaluate the channel pruned model
            if FLAGS.cpr_eval_per_layer:
                if self.is_primary_worker('global'):
                    self.__save_model(is_train=True)
                    self.evaluate()
                self.auto_barrier()

        # evaluate the final channel pruned model
        if not FLAGS.cpr_eval_per_layer:
            if self.is_primary_worker('global'):
                self.__save_model(is_train=True)
                self.evaluate()
            self.auto_barrier()

示例#18

0

显示文件

文件： learner.py 项目： thon16/PocketFlow

    def __choose_channels(self):  # pylint: disable=too-many-locals
        """Choose channels for all convolutional layers."""

        # obtain each layer's pruning ratio
        prune_ratios = [FLAGS.cpr_prune_ratio] * self.nb_conv_layers
        if FLAGS.cpr_skip_frst_layer:
            prune_ratios[0] = 0.0
        if FLAGS.cpr_skip_last_layer:
            prune_ratios[-1] = 0.0

        # evaluate the model before channel pruning
        tf.logging.info('evaluating the model before channel pruning')
        if False and self.is_primary_worker('global'):
            self.__save_model(is_train=True)
            self.evaluate()
        self.auto_barrier()

        # select channels for all the convolutional layers
        nb_workers = mgw.size() if FLAGS.enbl_multi_gpu else 1
        skip_names = FLAGS.cpr_skip_op_names.split(
            ',') if FLAGS.cpr_skip_op_names is not None else []
        for idx_layer, (prune_ratio, conv_info) in enumerate(
                zip(prune_ratios, self.conv_info_list)):
            # skip certain layers if no pruning is required
            enbl_skip = False
            for skip_name in skip_names:
                if skip_name in conv_info['conv_krnl_prnd'].name:
                    enbl_skip = True
                    break
            if enbl_skip:
                tf.logging.info('skip %s since no pruning is required' %
                                conv_info['conv_krnl_prnd'].name)
                continue

            # display the layer information
            if self.is_primary_worker('global'):
                tf.logging.info('layer #%d: pr = %.2f (target)' %
                                (idx_layer, prune_ratio))
                tf.logging.info('kernel name = {}'.format(
                    conv_info['conv_krnl_prnd'].name))
                tf.logging.info('kernel shape = {}'.format(
                    conv_info['conv_krnl_prnd'].shape))

            # extract the current layer's information
            conv_krnl_full = self.sess_train.run(conv_info['conv_krnl_full'])
            conv_krnl_prnd = self.sess_train.run(conv_info['conv_krnl_prnd'])
            conv_krnl_prnd_ph = conv_info['conv_krnl_prnd_ph']
            update_op = conv_info['update_op']
            input_full_tf = conv_info['input_full']
            input_prnd_tf = conv_info['input_prnd']
            output_full_tf = conv_info['output_full']
            output_prnd_tf = conv_info['output_prnd']
            strides = conv_info['strides']
            padding = conv_info['padding']
            nb_chns_input = conv_krnl_prnd.shape[2]

            # sample inputs & outputs through multiple mini-batches
            tf.logging.info(
                'sampling inputs & outputs through multiple mini-batches')
            time_beg = timer()
            nb_insts = 0  # number of sampled instances (for regression) collected so far
            inputs_list = [[] for __ in range(nb_chns_input)]
            outputs_list = []
            while nb_insts < FLAGS.cpr_nb_insts_reg:
                inputs_full, inputs_prnd, outputs_full, outputs_prnd = \
                  self.sess_train.run([input_full_tf, input_prnd_tf, output_full_tf, output_prnd_tf])
                inputs_smpl, outputs_smpl = self.__smpl_inputs_n_outputs(
                    conv_krnl_full, conv_krnl_prnd, inputs_full, inputs_prnd,
                    outputs_full, outputs_prnd, strides, padding)
                nb_insts += outputs_smpl.shape[0]
                for idx_chn_input in range(nb_chns_input):
                    inputs_list[idx_chn_input] += [inputs_smpl[idx_chn_input]]
                outputs_list += [outputs_smpl]
                tf.logging.info('sampled inputs & outputs (%d / %d)' %
                                (nb_insts, FLAGS.cpr_nb_insts_reg))
            idxs_inst = np.random.choice(nb_insts,
                                         size=(FLAGS.cpr_nb_insts_reg),
                                         replace=False)
            inputs_np_list = [np.vstack(x)[idxs_inst] for x in inputs_list]
            outputs_np = np.vstack(outputs_list)[idxs_inst]
            tf.logging.info('time elapsed (sampling): %.4f (s)' %
                            (timer() - time_beg))

            # choose channels via solving the sparsity-constrained regression problem
            tf.logging.info(
                'choosing channels via solving the sparsity-constrained regression problem'
            )
            time_beg = timer()
            conv_krnl_prnd = self.__solve_sparse_regression(
                inputs_np_list, outputs_np, conv_krnl_prnd, prune_ratio)
            self.sess_train.run(update_op,
                                feed_dict={conv_krnl_prnd_ph: conv_krnl_prnd})
            tf.logging.info('time elapsed (selection): %.4f (s)' %
                            (timer() - time_beg))

            # evaluate the channel pruned model
            tf.logging.info('evaluating the channel pruned model')
            if FLAGS.cpr_eval_per_layer:
                if self.is_primary_worker('global'):
                    self.__save_model(is_train=True)
                    self.evaluate()
                self.auto_barrier()