示例#1
0
    def __init__(self, name, in_dim, label_dim, config, dtype=tf.float32):

        super(DNCWriteHeadFeed2, self).__init__(name,
                                                in_dim,
                                                config,
                                                dtype=dtype)

        self._label_dim = label_dim
        use_mlp = False
        with variable_scope(name):
            if use_mlp:
                self._write_query_mlp = ResMLP('write_query_mlp', [
                    in_dim, self._nwrite * self._memory_dim, self._nwrite *
                    self._memory_dim, self._nwrite * self._memory_dim
                ],
                                               dtype=dtype)
                self._write_content_mlp = ResMLP('write_content_mlp', [
                    in_dim, self._nwrite * self._memory_dim, self._nwrite *
                    self._memory_dim, self._nwrite * self._memory_dim
                ],
                                                 dtype=dtype)
            else:
                self._write_query_mlp = Linear('write_query_mlp', in_dim,
                                               self._nwrite * self._memory_dim)
                self._write_content_mlp = Linear(
                    'write_content_mlp', in_dim,
                    self._nwrite * self._memory_dim)
示例#2
0
    def __init__(self, name, nin, nout, dtype=tf.float32):
        super(GRU, self).__init__(dtype=dtype)
        self._nin = nin
        self._nout = nout

        with variable_scope(name):
            self._gates = Linear("gates_linear", nin + nout, 2 * nout)
            self._linear = Linear("linear", nin + nout, nout)
示例#3
0
 def __init__(self, config, backbone, memory, dtype=tf.float32):
     super(LSTMNet, self).__init__(config, backbone, dtype=dtype)
     self._memory = memory
     # Map from memory content to the actual output class.
     if config.fix_unknown:
         self._nclassout = config.num_classes + 1
     else:
         self._nclassout = config.num_classes
     self._readout_layer = Linear('readout', memory.nout, self._nclassout)
    def __init__(self, config, wdict=None):
        super(C4PlusFCBackbone, self).__init__(config)
        self.backbone = C4Backbone(config)

        if len(self.config.num_fc_dim) > 1:
            self.fc = MLP('fc', [config.num_filters[-1]] +
                          list(self.config.num_fc_dim),
                          wdict=wdict)
        else:
            self.fc = Linear('fc',
                             config.num_filters[-1],
                             self.config.num_fc_dim,
                             wdict=wdict)  # Hard coded for now.
示例#5
0
    def __init__(self,
                 name,
                 layer_size,
                 add_bias=True,
                 bias_init=None,
                 act_func=None,
                 layernorm=False,
                 temp=None,
                 learn_temp=False,
                 dtype=tf.float32,
                 wdict=None):
        super(CosineLastMLP, self).__init__(dtype=dtype)
        self._layers = []
        with variable_scope(name):
            for i in range(len(layer_size) - 1):
                if bias_init is not None and bias_init[i] is not None:

                    def bi():
                        return tf.zeros([layer_size[i + 1]],
                                        dtype=dtype) + bias_init[i]
                else:
                    bi = None

                if i < len(layer_size) - 2:
                    layer = Linear("layer_{}".format(i),
                                   layer_size[i],
                                   layer_size[i + 1],
                                   b_init=bi,
                                   add_bias=add_bias,
                                   dtype=dtype,
                                   wdict=wdict)
                else:
                    layer = CosineLinear("layer_{}".format(i),
                                         layer_size[i],
                                         layer_size[i + 1],
                                         temp=temp,
                                         learn_temp=learn_temp,
                                         dtype=tf.float32,
                                         wdict=wdict)
                self._layers.append(layer)
                if layernorm:
                    self._layers.append(
                        LayerNorm("layernorm_{}".format(i),
                                  layer_size[i + 1],
                                  dtype=dtype,
                                  wdict=wdict))
                if i < len(layer_size) - 2:
                    if act_func is None:
                        self._layers.append(tf.nn.relu)
                    else:
                        self._layers.append(act_func[i])
 def __init__(self,
              config,
              backbone,
              memory,
              distributed=False,
              dtype=tf.float32):
     super(LSTMSigmoidNet, self).__init__(config,
                                          backbone,
                                          distributed=distributed,
                                          dtype=dtype)
     assert config.fix_unknown, 'Only unknown is supported'
     self._memory = memory
     self._nclassout = config.num_classes + 1
     self._readout_layer = Linear('readout', memory.nout, self._nclassout)
示例#7
0
 def __init__(self, name, nin, nout, layernorm=False, dtype=tf.float32):
     super(GRU, self).__init__(dtype=dtype)
     self._nin = nin
     self._nout = nout
     self._layernorm = layernorm
     self._gates = Linear("gates_linear",
                          nin + nout,
                          1,
                          b_init=lambda: -tf.ones(1) * 2.0)
     # self._gates = Linear(
     #     "gates_linear", nin + nout, 1, b_init=lambda: tf.ones(1) * 2.0)
     # self._gates = Linear(
     #     "gates_linear", nin + nout, 1, b_init=lambda: tf.zeros(1))
     if layernorm:
         self._ln = LayerNorm("layernorm", nin + nout, dtype=dtype)
示例#8
0
 def __init__(self,
              name,
              in_filters,
              num_hidden=384,
              out_filters=512,
              data_format="NCHW",
              dtype=tf.float32):
   super(FinalConvModule, self).__init__()
   self._data_format = data_format
   with variable_scope(name):
     self._bn1 = BatchNorm("bn1", in_filters, data_format=data_format)
     self._conv = Conv2D("conv", 1, in_filters, num_hidden,
                         self._stride_arr(1))
     self._bn2 = BatchNorm("bn2", num_hidden, data_format=data_format)
     self._fc = Linear("fc", num_hidden, out_filters)
   self._out_filters = out_filters
示例#9
0
 def __init__(self, config, backbone, dtype=tf.float32):
     super(PretrainNet, self).__init__()
     self._backbone = backbone
     self._config = config
     assert self.config.num_classes > 0, 'Must specify number of output classes'
     opt_config = self.config.optimizer_config
     gs = tf.Variable(0, dtype=tf.int64, name='step', trainable=False)
     self._step = gs
     self._wd = backbone.config.weight_decay
     self._learn_rate = tf.compat.v1.train.piecewise_constant(
         self.step,
         list(np.array(opt_config.lr_decay_steps).astype(np.int64)),
         list(opt_config.lr_list))
     opt = self._get_optimizer(opt_config.optimizer, self.learn_rate)
     self._optimizer = opt
     out_dim = backbone.get_output_dimension()
     self._fc = Linear("fc", out_dim[-1], config.num_classes, dtype=dtype)
示例#10
0
  def __init__(self, name, nin, nout, layernorm=False, dtype=tf.float32):
    super(LSTM, self).__init__(dtype=dtype)
    self._nin = nin
    self._nout = nout
    self._layernorm = layernorm

    def _b_init():
      return tf.concat(
          [tf.ones([nout], dtype=dtype),
           tf.zeros([3 * nout], dtype=dtype)],
          axis=0)

    with variable_scope(name):
      self._gates = Linear(
          "gates_linear", nin + nout, 4 * nout, b_init=_b_init)
      if layernorm:
        self._ln = LayerNorm("layernorm", 4 * nout, dtype=dtype)
示例#11
0
    def __init__(self, name, layer_size, dtype=tf.float32):
        super(ResMLP, self).__init__(dtype=dtype)
        self._layer_size = layer_size
        self._layers = []
        with variable_scope(name):
            for i in range(len(layer_size) - 1):

                def bi():
                    return tf.zeros([layer_size[i + 1]], dtype=dtype)

                self._layers.append(
                    Linear("layer_{}".format(i),
                           layer_size[i],
                           layer_size[i + 1],
                           b_init=bi,
                           add_bias=True,
                           dtype=dtype))
示例#12
0
  def __init__(self, name, layer_size, act_func, bias_init, dtype=tf.float32):
    super(ResMLP, self).__init__(dtype=dtype)
    self._layers = []
    self._layer_size = layer_size
    self._act_func = act_func
    with variable_scope(name):
      for i in range(len(layer_size) - 1):

        if bias_init[i] is None:
          bias_init_ = tf.zeros([layer_size[i + 1]], dtype=dtype)
        else:
          bias_init_ = bias_init[i]

        def bi():
          return bias_init_

        self._layers.append(
            Linear(
                "layer_{}".format(i),
                layer_size[i],
                layer_size[i + 1],
                b_init=bi,
                add_bias=True,
                dtype=dtype))
示例#13
0
    def __init__(self,
                 name,
                 rnn_memory,
                 proto_memory,
                 readout_type='linear',
                 use_pred_beta_gamma=True,
                 use_feature_fuse=True,
                 use_feature_fuse_gate=True,
                 use_feature_scaling=True,
                 use_feature_memory_only=False,
                 skip_unk_memory_update=False,
                 use_ssl=True,
                 use_ssl_beta_gamma_write=True,
                 use_ssl_temp=True,
                 dtype=tf.float32):
        super(RNNEncoder, self).__init__(dtype=dtype)
        self._rnn_memory = rnn_memory
        self._proto_memory = proto_memory

        # ------------- Feature Fusing Capability Ablation --------------
        self._use_pred_beta_gamma = use_pred_beta_gamma  # CHECK
        self._use_feature_fuse = use_feature_fuse  # CHECK
        self._use_feature_fuse_gate = use_feature_fuse_gate  # CHECK
        self._use_feature_scaling = use_feature_scaling  # CHECK
        self._use_feature_memory_only = use_feature_memory_only  # CHECK

        # ------------- SSL Capability Ablation --------------
        self._skip_unk_memory_update = skip_unk_memory_update  # CHECK
        self._use_ssl = use_ssl  # CHECK
        self._use_ssl_beta_gamma_write = use_ssl_beta_gamma_write  # CHECK
        self._use_ssl_temp = use_ssl_temp  # CHECK

        D_in = self._rnn_memory.memory_dim
        D = self._rnn_memory.in_dim
        self._dim = D

        # h        [D]
        # scale    [D]
        # temp     [1]
        # gamma2   [1]
        # beta2    [1]
        # gamma    [1]
        # beta     [1]
        # x_gate   [1]
        # h_gate   [1]
        bias_init = [
            tf.zeros(D),
            tf.zeros(D),
            tf.zeros([1]),
            tf.zeros([1]),
            tf.zeros([1]) + proto_memory._radius_init,
            tf.zeros([1]),
            tf.zeros([1]) + proto_memory._radius_init_write,
            tf.zeros([1]) + 1.0,
            tf.zeros([1]) - 1.0
        ]
        bias_init = tf.concat(bias_init, axis=0)

        D_out = bias_init.shape[-1]

        def b_init():
            return bias_init

        if readout_type == 'linear':
            log.info("Using linear readout")
            self._readout = Linear('readout', D_in, D_out, b_init=b_init)
        elif readout_type == 'mlp':
            log.info("Using MLP readout")
            self._readout = MLP('readout_mlp', [D_in, D_out, D_out],
                                bias_init=[None, bias_init],
                                act_func=[tf.math.tanh])
        elif readout_type == 'resmlp':
            log.info("Using ResMLP readout")
            self._readout = ResMLP('readout_mlp', [D_in, D_out, D_out, D_out],
                                   bias_init=[None, None, bias_init],
                                   act_func=[swish, swish, None])
示例#14
0
    def __init__(self,
                 name,
                 dim,
                 radius_init,
                 max_classes=20,
                 fix_unknown=False,
                 unknown_id=None,
                 similarity="euclidean",
                 static_beta_gamma=True,
                 radius_init_write=None,
                 use_ssl_beta_gamma_write=True,
                 dtype=tf.float32):
        assert False, 'hey3'
        super(SemiSupervisedMinDistProtoMemory,
              self).__init__(name,
                             dim,
                             max_classes=max_classes,
                             fix_unknown=fix_unknown,
                             unknown_id=unknown_id,
                             similarity=similarity,
                             dtype=dtype)

        self._controller_type = 'linear'
        # self._controller_type = 'lstm'
        self._radius_init = radius_init
        log.info('Radius init {}'.format(radius_init))
        if radius_init_write is not None:
            self._radius_init_write = radius_init_write
            log.info('Radius init write {}'.format(radius_init_write))
        else:
            self._radius_init_write = radius_init
        self._use_ssl_beta_gamma_write = use_ssl_beta_gamma_write
        if static_beta_gamma:
            with variable_scope(name):
                self._beta = self._get_variable(
                    "beta", self._get_constant_init([], radius_init))
                self._gamma = self._get_variable(
                    "gamma", self._get_constant_init([], 1.0))

                self._beta2 = self._get_variable(
                    "beta2",
                    self._get_constant_init([], self._radius_init_write))
                self._gamma2 = self._get_variable(
                    "gamma2", self._get_constant_init([], 1.0))

        with variable_scope(name):
            if self._controller_type == 'lstm':
                self._ctrl_lstm = LSTM("ctrl_lstm",
                                       dim,
                                       dim,
                                       layernorm=False,
                                       dtype=dtype)
                self._ctrl_readout = Linear("ctrl_readout",
                                            dim,
                                            1,
                                            w_init=lambda: tf.ones([dim, 1]),
                                            b_init=lambda: tf.zeros([1]))
            elif self._controller_type == 'linear':
                self._ctrl_readout = Linear(
                    "ctrl_readout",
                    dim,
                    1,
                    # w_init=lambda: self._get_normal_init([dim, 1])() * 0.001,
                    w_init=lambda: tf.ones([dim, 1]) * 0.001,
                    b_init=lambda: tf.zeros([1]))
示例#15
0
    def __init__(self, name, in_dim, config, dtype=tf.float32):
        """Initialize a DNC module.

    Args:
      name: String. Name of the module.
      in_dim: Int. Input dimension.
      memory_dim: Int. Memory dimension.
      controller_dim: Int. Hidden dimension for the controller.
      nslot: Int. Number of memory slots.
      nread: Int. Number of read heads.
      nwrite: Int. Number of write heads.
      controller_type: String. `lstm` or `mlp.
      memory_layernorm: Bool. Whether perform LayerNorm on each memory
        iteration.
      dtype: Data type.
    """
        super(DNC, self).__init__(dtype=dtype)
        log.info('Currently using MANN with separate write attention')
        log.info('Currently using MANN with decay')
        self._in_dim = in_dim
        self._memory_dim = config.memory_dim
        self._controller_dim = config.controller_dim
        self._nslot = config.num_slots
        self._nread = config.num_reads
        self._nwrite = config.num_writes
        self._controller_nstack = config.controller_nstack
        self._controller_type = config.controller_type
        self._similarity_type = config.similarity_type
        with variable_scope(name):
            if config.controller_layernorm:
                log.info('Using LayerNorm in controller module.')
            if config.controller_type == 'lstm':
                self._controller = LSTM("controller_lstm",
                                        in_dim,
                                        config.controller_dim,
                                        layernorm=config.controller_layernorm,
                                        dtype=dtype)
            elif config.controller_type == 'stack_lstm':
                log.info('Use {}-stack LSTM'.format(config.controller_nstack))
                self._controller = StackLSTM(
                    "stack_controller_lstm",
                    in_dim,
                    config.controller_dim,
                    config.controller_nstack,
                    layernorm=config.controller_layernorm,
                    dtype=dtype)
            elif config.controller_type == 'mlp':
                log.info('Use MLP')
                self._controller = MLP(
                    "controller_mlp",
                    [in_dim, config.controller_dim, config.controller_dim],
                    layernorm=config.controller_layernorm,
                    dtype=dtype)
            rnd = np.random.RandomState(0)
            self._rnd = rnd
            self._memory_init = 1e-5 * tf.ones(
                [config.num_slots, config.memory_dim],
                name="memory_init",
                dtype=dtype)

            # N. Item name         Shape    Init    Comment
            # ------------------------------------------------------------
            # 1) read query        N x D    0.0
            # 2) write query       Nw x D   0.0
            # 3) write content     Nw x D   0.0
            # 4) forget gate       N        -2.0    No forget after read
            # 5) write gate        Nw       2.0     Always write
            # 6) interp gate       Nw       -2.0    Always use LRU
            # 7) read temp         N        0.0     Default 1.0
            # 8) write temp        Nw       0.0     Default 1.0
            # 9) erase             M        -2.0    Default no erase
            Nr = self._nread
            Nw = self._nwrite
            D = self._memory_dim
            M = self._nslot

            def ctrl2mem_bias_init():
                AA = tf.zeros([Nr * D + 2 * Nw * D], dtype=self.dtype)
                BB = -2.0 * tf.ones([Nr], dtype=self.dtype)
                CC = 2.0 * tf.ones([Nw], dtype=self.dtype)
                DD = -2.0 * tf.ones([Nw], dtype=self.dtype)
                EE = 0.0 * tf.ones([Nr], dtype=self.dtype)
                FF = 0.0 * tf.ones([Nw], dtype=self.dtype)
                GG = -2.0 * tf.ones([M], dtype=self.dtype)
                return tf.concat([AA, BB, CC, DD, EE, FF, GG], axis=0)

            self._ctrl2mem = Linear("ctrl2mem",
                                    config.controller_dim,
                                    Nr * D + 2 * Nw * D + Nr + 2 * Nw + Nr +
                                    Nw + M,
                                    b_init=ctrl2mem_bias_init)
            if config.memory_layernorm:
                log.info('Using LayerNorm for each memory iteration.')
                self._mem_layernorm = LayerNorm("memory_layernorm",
                                                D,
                                                dtype=dtype)
            else:
                self._mem_layernorm = None
示例#16
0
  def __init__(self,
               name,
               in_dim,
               memory_dim,
               controller_dim,
               nslot,
               nread,
               memory_decay,
               controller_type='lstm',
               memory_layernorm=False,
               controller_layernorm=False,
               controller_nstack=2,
               dtype=tf.float32):
    """Initialize a MANN module.

    Args:
      name: String. Name of the module.
      in_dim: Int. Input dimension.
      memory_dim: Int. Memory dimension.
      controller_dim: Int. Hidden dimension for the controller.
      nslot: Int. Number of memory slots.
      nread: Int. Number of read heads.
      memory_decay: Float. Memory decay coefficient.
      controller_type: String. `lstm` or `stack_lstm`.
      dtype: Data type.
    """
    super(MANN, self).__init__(dtype=dtype)
    self._in_dim = in_dim
    self._memory_dim = memory_dim
    self._controller_dim = controller_dim
    self._nslot = nslot
    self._nread = nread
    self._controller_nstack = controller_nstack
    self._controller_type = controller_type
    with variable_scope(name):
      if controller_layernorm:
        log.info('Using LayerNorm in controller module.')
      if controller_type == 'lstm':
        self._controller = LSTM(
            "controller_lstm",
            in_dim,
            controller_dim,
            layernorm=controller_layernorm,
            dtype=dtype)
      elif controller_type == 'stack_lstm':
        log.info('Use {}-stack LSTM'.format(controller_nstack))
        self._controller = StackLSTM(
            "stack_controller_lstm",
            in_dim,
            controller_dim,
            controller_nstack,
            layernorm=controller_layernorm,
            dtype=dtype)
      rnd = np.random.RandomState(0)
      self._rnd = rnd
      self._gamma = memory_decay
      D = memory_dim
      N = nread
      M = nslot
      self._memory_init = 1e-5 * tf.ones(
          [M, D], name="memory_init", dtype=dtype)

      def ctrl2mem_bias_init():
        zeros = tf.zeros([2 * N * D], dtype=self.dtype)
        ones = -2.0 * tf.ones([N], dtype=self.dtype)
        return tf.concat([zeros, ones], axis=0)

      self._ctrl2mem = Linear(
          "ctrl2mem",
          controller_dim,
          2 * nread * memory_dim + nread,
          b_init=ctrl2mem_bias_init)
      self._temp = tf.Variable(1.0, name="temp", dtype=dtype)
      if memory_layernorm:
        log.info('Using LayerNorm for each memory iteration.')
        self._mem_layernorm = LayerNorm("memory_layernorm", D, dtype=dtype)
      else:
        self._mem_layernorm = None