示例#1
0
def _get_model(config, nclasses_train, nclasses_eval):
    with tf.name_scope("MetaTrain"):
        with tf.variable_scope("Model"):
            m = get_model(config.model_class,
                          config,
                          nclasses_train,
                          is_training=True,
                          nshot=FLAGS.nshot)
    with tf.name_scope("MetaValid"):
        with tf.variable_scope("Model", reuse=True):
            mvalid = get_model(config.model_class,
                               config,
                               nclasses_eval,
                               is_training=False,
                               nshot=FLAGS.nshot)
    return m, mvalid
示例#2
0
def build_net(config, backbone=None, memory=None, distributed=False):
    """Build a memory based lifelong learning model.

  Args:
    config: Model config.
    backbone: Backbone network.
    memory: Memory network.
  """
    if backbone is None:
        backbone = build_backbone(config)
    if memory is None:
        memory = build_memory_module(config, backbone)
    model = get_model(config.model_class,
                      config,
                      backbone,
                      memory,
                      distributed=distributed)
    return model
    def __init__(self,
                 config,
                 x,
                 y,
                 x_b,
                 y_b,
                 x_b_v,
                 y_b_v,
                 num_classes_a,
                 num_classes_b,
                 is_training=True,
                 y_sel=None,
                 ext_wts=None):
        """Attractor model with RBP.

    Args:
      config: Model config object.
      x: Inputs on task A.
      y: Labels on task A.
      x_b: Support inputs on task B.
      y_b: Support labels on task B.
      x_b_v: Query inputs on task B.
      y_b_v: Query labels on task B.
      num_classes_a: Number of classes on task A.
      num_classes_b: Number of classes on task B.
      is_training: Whether in training mode.
      y_sel: Mask on base classes.
      ext_wts: External weights for initialization.
    """
        self._config = config
        self._is_training = is_training
        self._num_classes_a = num_classes_a
        self._num_classes_b = num_classes_b
        self._global_step = None

        if config.backbone_class == 'resnet_backbone':
            bb_config = config.resnet_config
        else:
            assert False, 'Not supported'
        opt_config = config.optimizer_config
        proto_config = config.protonet_config
        transfer_config = config.transfer_config
        ft_opt_config = transfer_config.ft_optimizer_config

        self._backbone = get_model(config.backbone_class, bb_config)
        self._inputs = x
        self._labels = y
        self._labels_all = self._labels

        self._y_sel = y_sel
        self._rnd = np.random.RandomState(0)  # Common random seed.

        # A step counter for the meta training stage.
        global_step = self.global_step

        log.info('LR decay steps {}'.format(opt_config.lr_decay_steps))
        log.info('LR list {}'.format(opt_config.lr_list))

        # Learning rate decay.
        learn_rate = tf.train.piecewise_constant(
            global_step,
            list(np.array(opt_config.lr_decay_steps).astype(np.int64)),
            list(opt_config.lr_list))
        self._learn_rate = learn_rate

        # Class matrix mask.
        self._mask = tf.placeholder(tf.bool, [], name='mask')

        # Optimizer definition.
        opt = self.get_optimizer(opt_config.optimizer, learn_rate)

        # Task A branch.
        with tf.name_scope('TaskA'):
            self.build_task_a(x, y, is_training, ext_wts=ext_wts)
            if is_training:
                grads_and_vars_a = self.build_task_a_grad()
                with tf.variable_scope('Optimizer'):
                    bn_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                    with tf.control_dependencies(bn_ops):
                        self._train_op_a = opt.apply_gradients(
                            grads_and_vars_a, global_step=global_step)
        h_size = self._h_size  # Calculated in the function above.
        w_class_a = self.w_class_a
        b_class_a = self.b_class_a

        # The finetuning task.
        self._inputs_b = x_b
        self._labels_b = y_b
        self._inputs_b_v = x_b_v
        self._labels_b_v = y_b_v
        self._labels_b_v_all = y_b_v

        with tf.name_scope('TaskB'):
            self.build_task_b(x_b, y_b, x_b_v, y_sel)
            if is_training:
                grads_and_vars_b = self.build_task_b_grad(x_b_v, y_b_v, y_sel)

        # Task A and Task B cost weights.
        assert transfer_config.cost_a_ratio == 0.0
        assert transfer_config.cost_b_ratio == 1.0
        cost_a_ratio_var = tf.constant(transfer_config.cost_a_ratio,
                                       name='cost_a_ratio',
                                       dtype=self.dtype)
        cost_b_ratio_var = tf.constant(transfer_config.cost_b_ratio,
                                       name='cost_b_ratio',
                                       dtype=self.dtype)

        # Update gradients for meta-leraning.
        if is_training:
            total_grads_and_vars_ab = self._aggregate_grads_and_vars(
                [grads_and_vars_a, grads_and_vars_b],
                weights=[cost_a_ratio_var, cost_b_ratio_var])
            with tf.variable_scope('Optimizer'):
                with tf.control_dependencies(bn_ops):
                    self._train_op = opt.apply_gradients(
                        total_grads_and_vars_ab, global_step=global_step)

            if len(grads_and_vars_b) > 0:
                self._train_op_b = opt.apply_gradients(grads_and_vars_b,
                                                       global_step=global_step)
            else:
                self._train_op_b = tf.no_op()

        self._initializer = tf.global_variables_initializer()
  def __init__(self,
               config,
               x,
               y,
               x_b,
               y_b,
               x_b_v,
               y_b_v,
               num_classes_a,
               num_classes_b,
               is_training=True,
               ext_wts=None,
               y_sel=None,
               w_class_a=None,
               b_class_a=None,
               nshot=None):
    self._config = config
    self._is_training = is_training
    self._num_classes_a = num_classes_a
    self._num_classes_b = num_classes_b

    if config.backbone_class == 'resnet_backbone':
      bb_config = config.resnet_config
    else:
      assert False, 'Not supported'
    opt_config = config.optimizer_config
    proto_config = config.protonet_config
    transfer_config = config.transfer_config

    self._backbone = get_model(config.backbone_class, bb_config)
    self._inputs = x
    self._labels = y
    # if opt_config.num_gpu > 1:
    #   self._labels_all = allgather(self._labels)
    # else:
    self._labels_all = self._labels
    self._inputs_b = x_b
    self._labels_b = y_b
    self._inputs_b_v = x_b_v
    self._labels_b_v = y_b_v
    # if opt_config.num_gpu > 1:
    #   self._labels_b_v_all = allgather(self._labels_b_v)
    # else:
    self._labels_b_v_all = self._labels_b_v
    self._y_sel = y_sel
    self._mask = tf.placeholder(tf.bool, [], name='mask')

    # global_step = tf.get_variable(
    #     'global_step', shape=[], dtype=tf.int64, trainable=False)
    global_step = tf.contrib.framework.get_or_create_global_step()
    self._global_step = global_step
    log.info('LR decay steps {}'.format(opt_config.lr_decay_steps))
    log.info('LR list {}'.format(opt_config.lr_list))
    learn_rate = tf.train.piecewise_constant(
        global_step, list(
            np.array(opt_config.lr_decay_steps).astype(np.int64)),
        list(opt_config.lr_list))
    self._learn_rate = learn_rate

    opt = self.get_optimizer(opt_config.optimizer, learn_rate)
    # if opt_config.num_gpu > 1:
    #   opt = hvd.DistributedOptimizer(opt)

    with tf.name_scope('TaskA'):
      h_a = self.backbone(x, is_training=is_training, ext_wts=ext_wts)
      self._h_a = h_a

    # Apply BN ops.
    bn_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.name_scope('TaskB'):
      x_b_all = tf.concat([x_b, x_b_v], axis=0)
      if ext_wts is not None:
        h_b_all = self.backbone(
            x_b_all, is_training=is_training, reuse=True, ext_wts=ext_wts)
      else:
        h_b_all = self.backbone(x_b_all, is_training=is_training, reuse=True)

    with tf.name_scope('TaskA'):
      # Calculates hidden activation size.
      h_shape = h_a.get_shape()
      h_size = 1
      for ss in h_shape[1:]:
        h_size *= int(ss)

      if w_class_a is None:
        if ext_wts is not None:
          w_class_a = weight_variable(
              [h_size, num_classes_a],
              init_method='numpy',
              dtype=tf.float32,
              init_param={'val': np.transpose(ext_wts['w_class_a'])},
              wd=config.wd,
              name='w_class_a')
          b_class_a = weight_variable([],
                                      init_method='numpy',
                                      dtype=tf.float32,
                                      init_param={'val': ext_wts['b_class_a']},
                                      wd=0e0,
                                      name='b_class_a')
        else:
          w_class_a = weight_variable([h_size, num_classes_a],
                                      init_method='truncated_normal',
                                      dtype=tf.float32,
                                      init_param={'stddev': 0.01},
                                      wd=bb_config.wd,
                                      name='w_class_a')
          b_class_a = weight_variable([num_classes_a],
                                      init_method='constant',
                                      init_param={'val': 0.0},
                                      name='b_class_a')
        self._w_class_a_orig = w_class_a
        self._b_class_a_orig = b_class_a
      else:
        assert b_class_a is not None
        w_class_a_orig = weight_variable([h_size, num_classes_a],
                                         init_method='truncated_normal',
                                         dtype=tf.float32,
                                         init_param={'stddev': 0.01},
                                         wd=bb_config.wd,
                                         name='w_class_a')
        b_class_a_orig = weight_variable([num_classes_a],
                                         init_method='constant',
                                         init_param={'val': 0.0},
                                         name='b_class_a')
        self._w_class_a_orig = w_class_a_orig
        self._b_class_a_orig = b_class_a_orig

      self._w_class_a = w_class_a
      self._b_class_a = b_class_a
      num_classes_a_dyn = tf.cast(tf.shape(b_class_a)[0], tf.int64)
      num_classes_a_dyn32 = tf.shape(b_class_a)[0]

      if proto_config.cosine_a:
        if proto_config.cosine_tau:
          if ext_wts is None:
            init_val = 10.0
          else:
            init_val = ext_wts['tau'][0]
          tau = weight_variable([],
                                init_method='constant',
                                init_param={'val': init_val},
                                name='tau')
        else:
          tau = tf.constant(1.0)
        w_class_a_norm = self._normalize(w_class_a, 0)
        h_a_norm = self._normalize(h_a, 1)
        dot = tf.matmul(h_a_norm, w_class_a_norm)
        if ext_wts is not None:
          dot += b_class_a
        logits_a = tau * dot
      else:
        logits_a = compute_euc(tf.transpose(w_class_a), h_a)
      self._prediction_a = logits_a
      # if opt_config.num_gpu > 1:
      #   self._prediction_a_all = allgather(self._prediction_a)
      # else:
      self._prediction_a_all = self._prediction_a

      xent_a = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logits_a, labels=y)
      cost_a = tf.reduce_mean(xent_a, name='xent')
      self._cost_a = cost_a
      cost_a += self._decay()
      correct_a = tf.equal(tf.argmax(logits_a, axis=1), y)
      self._correct_a = correct_a
      self._acc_a = tf.reduce_mean(tf.cast(correct_a, cost_a.dtype))

    with tf.name_scope('TaskB'):
      h_b = h_b_all[:tf.shape(x_b)[0]]
      h_b_v = h_b_all[tf.shape(x_b)[0]:]

      # Add new axes for the `batch` dimension.
      h_b_ = tf.expand_dims(h_b, 0)
      h_b_v_ = tf.expand_dims(h_b_v, 0)
      y_b_ = tf.expand_dims(y_b, 0)
      y_b_v_ = tf.expand_dims(y_b_v, 0)

      if transfer_config.old_and_new:
        protos_b = self._compute_protos(num_classes_b, h_b_,
                                        y_b_ - num_classes_a)
      else:
        protos_b = self._compute_protos(num_classes_b, h_b_, y_b_)

      w_class_a_ = tf.expand_dims(tf.transpose(w_class_a), 0)
      if proto_config.protos_phi:
        w_p1 = weight_variable([h_size],
                               init_method='constant',
                               dtype=tf.float32,
                               init_param={'val': 1.0},
                               wd=bb_config.wd,
                               name='w_p1')
      if proto_config.cosine_attention:
        w_q = weight_variable([h_size, h_size],
                              init_method='truncated_normal',
                              dtype=tf.float32,
                              init_param={'stddev': 0.1},
                              wd=bb_config.wd,
                              name='w_q')
        k_b = weight_variable([num_classes_a, h_size],
                              init_method='truncated_normal',
                              dtype=tf.float32,
                              init_param={'stddev': 0.1},
                              wd=bb_config.wd,
                              name='k_b')
        tau_q = weight_variable([],
                                init_method='constant',
                                init_param={'val': 10.0},
                                name='tau_q')
        if transfer_config.old_and_new:
          w_class_b = self._compute_protos_attend_fix(
              num_classes_b, h_b_, y_b_ - num_classes_a_dyn, w_q, tau_q, k_b,
              self._w_class_a_orig)
        else:
          w_class_b = self._compute_protos_attend_fix(
              num_classes_b, h_b_, y_b_, w_q, tau_q, k_b, self._w_class_a_orig)
        assert proto_config.protos_phi
        w_p2 = weight_variable([h_size],
                               init_method='constant',
                               dtype=tf.float32,
                               init_param={'val': 1.0},
                               wd=bb_config.wd,
                               name='w_p2')
        self._k_b = tf.expand_dims(w_p2, 1) * self._w_class_a_orig
        self._k_b2 = k_b
        self.bias = w_class_b
        self.new_protos = w_p1 * protos_b
        self.new_bias = w_p2 * w_class_b
        w_class_b = w_p1 * protos_b + w_p2 * w_class_b
        self.protos = protos_b
        self.w_class_b_final = w_class_b
      else:
        w_class_b = protos_b
        if proto_config.protos_phi:
          w_class_b = w_p1 * w_class_b

      self._w_class_b = w_class_b

      if transfer_config.old_and_new:
        w_class_all = tf.concat([w_class_a_, w_class_b], axis=1)
      else:
        w_class_all = w_class_b

      if proto_config.cosine_softmax_tau:
        tau_b = weight_variable([],
                                init_method='constant',
                                init_param={'val': 10.0},
                                name='tau_b')
      else:
        tau_b = tf.constant(1.0)

      if proto_config.similarity == 'euclidean':
        logits_b_v = compute_logits(w_class_all, h_b_v_)
      elif proto_config.similarity == 'cosine':
        logits_b_v = tau_b * compute_logits_cosine(w_class_all, h_b_v_)
      else:
        raise ValueError('Unknown similarity')
      self._logits_b_v = logits_b_v
      self._prediction_b = logits_b_v[0]
      # if opt_config.num_gpu > 1:
      #   self._prediction_b_all = allgather(self._prediction_b)
      # else:
      self._prediction_b_all = self._prediction_b

      # Mask out the old classes.
      def mask_fn():
        bin_mask = tf.expand_dims(
            tf.reduce_sum(
                tf.one_hot(y_sel, num_classes_a + num_classes_b),
                0,
                keep_dims=True), 0)
        logits_b_v_m = logits_b_v * (1.0 - bin_mask)
        logits_b_v_m -= bin_mask * 100.0
        return logits_b_v_m

      # if transfer_config.old_and_new:
      #   logits_b_v = tf.cond(self._mask, mask_fn, lambda: logits_b_v)
      xent_b_v = tf.nn.sparse_softmax_cross_entropy_with_logits(
          logits=logits_b_v, labels=y_b_v_)
      cost_b = tf.reduce_mean(xent_b_v, name='xent')
      self._cost_b = cost_b

    if transfer_config.old_and_new:
      total_cost = cost_b
    else:
      total_cost = (transfer_config.cost_a_ratio * cost_a +
                    transfer_config.cost_b_ratio * cost_b)
    self._total_cost = total_cost

    if not transfer_config.meta_only:
      # assert False, 'let us go for pretrained model first'
      var_list = tf.trainable_variables()
      var_list = list(filter(lambda x: 'phi' in x.name, var_list))
      layers = self.config.transfer_config.meta_layers
      if layers == "all":
        pass
      elif layers == "4":
        keywords = ['TaskB', 'unit_4_']
        filter_fn = lambda x: any([kw in x.name for kw in keywords])
        var_list = list(filter(filter_fn, var_list))
      else:
        raise ValueError('Unknown finetune layers {}'.format(layers))
      [log.info('Slow weights {}'.format(v.name)) for v in var_list]
    else:
      var_list = []

    if proto_config.cosine_softmax_tau:
      var_list += [tau_b]

    if proto_config.cosine_attention:
      var_list += [w_q, tau_q, k_b, w_p2]

    if proto_config.protos_phi:
      var_list += [w_p1]

    if transfer_config.train_wclass_a:
      if proto_config.similarity == 'euclidean':
        var_list += [w_class_a, b_class_a]
      elif proto_config.similarity == 'cosine':
        var_list += [w_class_a]

    if is_training:
      grads_and_vars = opt.compute_gradients(total_cost, var_list)
      with tf.control_dependencies(bn_ops):
        [log.info('BN op {}'.format(op.name)) for op in bn_ops]
        train_op = opt.apply_gradients(grads_and_vars, global_step=global_step)

      grads_and_vars_b = opt.compute_gradients(cost_b, var_list)
      with tf.control_dependencies(bn_ops):
        train_op_b = opt.apply_gradients(
            grads_and_vars_b, global_step=global_step)

      with tf.control_dependencies(bn_ops):
        train_op_a = opt.minimize(cost_a, global_step=global_step)
      self._train_op = train_op
      self._train_op_a = train_op_a
      self._train_op_b = train_op_b
    self._initializer = tf.global_variables_initializer()
    self._w_class_a = w_class_a
示例#5
0
文件: run_eval.py 项目: k-r-allen/imp
def _get_model(config):
    m = get_model(args.model, config, args.dataset)
    return m.cuda()
示例#6
0
def build_pretrain_net(config, backbone=None):
    """Builds a regular classification network for pretraining."""
    if backbone is None:
        backbone = build_backbone(config)
    model = get_model("pretrain_net", config, backbone)
    return model