def __init__(self, name, in_dim, label_dim, config, dtype=tf.float32): super(DNCWriteHeadFeed2, self).__init__(name, in_dim, config, dtype=dtype) self._label_dim = label_dim use_mlp = False with variable_scope(name): if use_mlp: self._write_query_mlp = ResMLP('write_query_mlp', [ in_dim, self._nwrite * self._memory_dim, self._nwrite * self._memory_dim, self._nwrite * self._memory_dim ], dtype=dtype) self._write_content_mlp = ResMLP('write_content_mlp', [ in_dim, self._nwrite * self._memory_dim, self._nwrite * self._memory_dim, self._nwrite * self._memory_dim ], dtype=dtype) else: self._write_query_mlp = Linear('write_query_mlp', in_dim, self._nwrite * self._memory_dim) self._write_content_mlp = Linear( 'write_content_mlp', in_dim, self._nwrite * self._memory_dim)
def __init__(self, name, nin, nout, dtype=tf.float32): super(GRU, self).__init__(dtype=dtype) self._nin = nin self._nout = nout with variable_scope(name): self._gates = Linear("gates_linear", nin + nout, 2 * nout) self._linear = Linear("linear", nin + nout, nout)
def __init__(self, config, backbone, memory, dtype=tf.float32): super(LSTMNet, self).__init__(config, backbone, dtype=dtype) self._memory = memory # Map from memory content to the actual output class. if config.fix_unknown: self._nclassout = config.num_classes + 1 else: self._nclassout = config.num_classes self._readout_layer = Linear('readout', memory.nout, self._nclassout)
def __init__(self, config, wdict=None): super(C4PlusFCBackbone, self).__init__(config) self.backbone = C4Backbone(config) if len(self.config.num_fc_dim) > 1: self.fc = MLP('fc', [config.num_filters[-1]] + list(self.config.num_fc_dim), wdict=wdict) else: self.fc = Linear('fc', config.num_filters[-1], self.config.num_fc_dim, wdict=wdict) # Hard coded for now.
def __init__(self, name, layer_size, add_bias=True, bias_init=None, act_func=None, layernorm=False, temp=None, learn_temp=False, dtype=tf.float32, wdict=None): super(CosineLastMLP, self).__init__(dtype=dtype) self._layers = [] with variable_scope(name): for i in range(len(layer_size) - 1): if bias_init is not None and bias_init[i] is not None: def bi(): return tf.zeros([layer_size[i + 1]], dtype=dtype) + bias_init[i] else: bi = None if i < len(layer_size) - 2: layer = Linear("layer_{}".format(i), layer_size[i], layer_size[i + 1], b_init=bi, add_bias=add_bias, dtype=dtype, wdict=wdict) else: layer = CosineLinear("layer_{}".format(i), layer_size[i], layer_size[i + 1], temp=temp, learn_temp=learn_temp, dtype=tf.float32, wdict=wdict) self._layers.append(layer) if layernorm: self._layers.append( LayerNorm("layernorm_{}".format(i), layer_size[i + 1], dtype=dtype, wdict=wdict)) if i < len(layer_size) - 2: if act_func is None: self._layers.append(tf.nn.relu) else: self._layers.append(act_func[i])
def __init__(self, config, backbone, memory, distributed=False, dtype=tf.float32): super(LSTMSigmoidNet, self).__init__(config, backbone, distributed=distributed, dtype=dtype) assert config.fix_unknown, 'Only unknown is supported' self._memory = memory self._nclassout = config.num_classes + 1 self._readout_layer = Linear('readout', memory.nout, self._nclassout)
def __init__(self, name, nin, nout, layernorm=False, dtype=tf.float32): super(GRU, self).__init__(dtype=dtype) self._nin = nin self._nout = nout self._layernorm = layernorm self._gates = Linear("gates_linear", nin + nout, 1, b_init=lambda: -tf.ones(1) * 2.0) # self._gates = Linear( # "gates_linear", nin + nout, 1, b_init=lambda: tf.ones(1) * 2.0) # self._gates = Linear( # "gates_linear", nin + nout, 1, b_init=lambda: tf.zeros(1)) if layernorm: self._ln = LayerNorm("layernorm", nin + nout, dtype=dtype)
def __init__(self, name, in_filters, num_hidden=384, out_filters=512, data_format="NCHW", dtype=tf.float32): super(FinalConvModule, self).__init__() self._data_format = data_format with variable_scope(name): self._bn1 = BatchNorm("bn1", in_filters, data_format=data_format) self._conv = Conv2D("conv", 1, in_filters, num_hidden, self._stride_arr(1)) self._bn2 = BatchNorm("bn2", num_hidden, data_format=data_format) self._fc = Linear("fc", num_hidden, out_filters) self._out_filters = out_filters
def __init__(self, config, backbone, dtype=tf.float32): super(PretrainNet, self).__init__() self._backbone = backbone self._config = config assert self.config.num_classes > 0, 'Must specify number of output classes' opt_config = self.config.optimizer_config gs = tf.Variable(0, dtype=tf.int64, name='step', trainable=False) self._step = gs self._wd = backbone.config.weight_decay self._learn_rate = tf.compat.v1.train.piecewise_constant( self.step, list(np.array(opt_config.lr_decay_steps).astype(np.int64)), list(opt_config.lr_list)) opt = self._get_optimizer(opt_config.optimizer, self.learn_rate) self._optimizer = opt out_dim = backbone.get_output_dimension() self._fc = Linear("fc", out_dim[-1], config.num_classes, dtype=dtype)
def __init__(self, name, nin, nout, layernorm=False, dtype=tf.float32): super(LSTM, self).__init__(dtype=dtype) self._nin = nin self._nout = nout self._layernorm = layernorm def _b_init(): return tf.concat( [tf.ones([nout], dtype=dtype), tf.zeros([3 * nout], dtype=dtype)], axis=0) with variable_scope(name): self._gates = Linear( "gates_linear", nin + nout, 4 * nout, b_init=_b_init) if layernorm: self._ln = LayerNorm("layernorm", 4 * nout, dtype=dtype)
def __init__(self, name, layer_size, dtype=tf.float32): super(ResMLP, self).__init__(dtype=dtype) self._layer_size = layer_size self._layers = [] with variable_scope(name): for i in range(len(layer_size) - 1): def bi(): return tf.zeros([layer_size[i + 1]], dtype=dtype) self._layers.append( Linear("layer_{}".format(i), layer_size[i], layer_size[i + 1], b_init=bi, add_bias=True, dtype=dtype))
def __init__(self, name, layer_size, act_func, bias_init, dtype=tf.float32): super(ResMLP, self).__init__(dtype=dtype) self._layers = [] self._layer_size = layer_size self._act_func = act_func with variable_scope(name): for i in range(len(layer_size) - 1): if bias_init[i] is None: bias_init_ = tf.zeros([layer_size[i + 1]], dtype=dtype) else: bias_init_ = bias_init[i] def bi(): return bias_init_ self._layers.append( Linear( "layer_{}".format(i), layer_size[i], layer_size[i + 1], b_init=bi, add_bias=True, dtype=dtype))
def __init__(self, name, rnn_memory, proto_memory, readout_type='linear', use_pred_beta_gamma=True, use_feature_fuse=True, use_feature_fuse_gate=True, use_feature_scaling=True, use_feature_memory_only=False, skip_unk_memory_update=False, use_ssl=True, use_ssl_beta_gamma_write=True, use_ssl_temp=True, dtype=tf.float32): super(RNNEncoder, self).__init__(dtype=dtype) self._rnn_memory = rnn_memory self._proto_memory = proto_memory # ------------- Feature Fusing Capability Ablation -------------- self._use_pred_beta_gamma = use_pred_beta_gamma # CHECK self._use_feature_fuse = use_feature_fuse # CHECK self._use_feature_fuse_gate = use_feature_fuse_gate # CHECK self._use_feature_scaling = use_feature_scaling # CHECK self._use_feature_memory_only = use_feature_memory_only # CHECK # ------------- SSL Capability Ablation -------------- self._skip_unk_memory_update = skip_unk_memory_update # CHECK self._use_ssl = use_ssl # CHECK self._use_ssl_beta_gamma_write = use_ssl_beta_gamma_write # CHECK self._use_ssl_temp = use_ssl_temp # CHECK D_in = self._rnn_memory.memory_dim D = self._rnn_memory.in_dim self._dim = D # h [D] # scale [D] # temp [1] # gamma2 [1] # beta2 [1] # gamma [1] # beta [1] # x_gate [1] # h_gate [1] bias_init = [ tf.zeros(D), tf.zeros(D), tf.zeros([1]), tf.zeros([1]), tf.zeros([1]) + proto_memory._radius_init, tf.zeros([1]), tf.zeros([1]) + proto_memory._radius_init_write, tf.zeros([1]) + 1.0, tf.zeros([1]) - 1.0 ] bias_init = tf.concat(bias_init, axis=0) D_out = bias_init.shape[-1] def b_init(): return bias_init if readout_type == 'linear': log.info("Using linear readout") self._readout = Linear('readout', D_in, D_out, b_init=b_init) elif readout_type == 'mlp': log.info("Using MLP readout") self._readout = MLP('readout_mlp', [D_in, D_out, D_out], bias_init=[None, bias_init], act_func=[tf.math.tanh]) elif readout_type == 'resmlp': log.info("Using ResMLP readout") self._readout = ResMLP('readout_mlp', [D_in, D_out, D_out, D_out], bias_init=[None, None, bias_init], act_func=[swish, swish, None])
def __init__(self, name, dim, radius_init, max_classes=20, fix_unknown=False, unknown_id=None, similarity="euclidean", static_beta_gamma=True, radius_init_write=None, use_ssl_beta_gamma_write=True, dtype=tf.float32): assert False, 'hey3' super(SemiSupervisedMinDistProtoMemory, self).__init__(name, dim, max_classes=max_classes, fix_unknown=fix_unknown, unknown_id=unknown_id, similarity=similarity, dtype=dtype) self._controller_type = 'linear' # self._controller_type = 'lstm' self._radius_init = radius_init log.info('Radius init {}'.format(radius_init)) if radius_init_write is not None: self._radius_init_write = radius_init_write log.info('Radius init write {}'.format(radius_init_write)) else: self._radius_init_write = radius_init self._use_ssl_beta_gamma_write = use_ssl_beta_gamma_write if static_beta_gamma: with variable_scope(name): self._beta = self._get_variable( "beta", self._get_constant_init([], radius_init)) self._gamma = self._get_variable( "gamma", self._get_constant_init([], 1.0)) self._beta2 = self._get_variable( "beta2", self._get_constant_init([], self._radius_init_write)) self._gamma2 = self._get_variable( "gamma2", self._get_constant_init([], 1.0)) with variable_scope(name): if self._controller_type == 'lstm': self._ctrl_lstm = LSTM("ctrl_lstm", dim, dim, layernorm=False, dtype=dtype) self._ctrl_readout = Linear("ctrl_readout", dim, 1, w_init=lambda: tf.ones([dim, 1]), b_init=lambda: tf.zeros([1])) elif self._controller_type == 'linear': self._ctrl_readout = Linear( "ctrl_readout", dim, 1, # w_init=lambda: self._get_normal_init([dim, 1])() * 0.001, w_init=lambda: tf.ones([dim, 1]) * 0.001, b_init=lambda: tf.zeros([1]))
def __init__(self, name, in_dim, config, dtype=tf.float32): """Initialize a DNC module. Args: name: String. Name of the module. in_dim: Int. Input dimension. memory_dim: Int. Memory dimension. controller_dim: Int. Hidden dimension for the controller. nslot: Int. Number of memory slots. nread: Int. Number of read heads. nwrite: Int. Number of write heads. controller_type: String. `lstm` or `mlp. memory_layernorm: Bool. Whether perform LayerNorm on each memory iteration. dtype: Data type. """ super(DNC, self).__init__(dtype=dtype) log.info('Currently using MANN with separate write attention') log.info('Currently using MANN with decay') self._in_dim = in_dim self._memory_dim = config.memory_dim self._controller_dim = config.controller_dim self._nslot = config.num_slots self._nread = config.num_reads self._nwrite = config.num_writes self._controller_nstack = config.controller_nstack self._controller_type = config.controller_type self._similarity_type = config.similarity_type with variable_scope(name): if config.controller_layernorm: log.info('Using LayerNorm in controller module.') if config.controller_type == 'lstm': self._controller = LSTM("controller_lstm", in_dim, config.controller_dim, layernorm=config.controller_layernorm, dtype=dtype) elif config.controller_type == 'stack_lstm': log.info('Use {}-stack LSTM'.format(config.controller_nstack)) self._controller = StackLSTM( "stack_controller_lstm", in_dim, config.controller_dim, config.controller_nstack, layernorm=config.controller_layernorm, dtype=dtype) elif config.controller_type == 'mlp': log.info('Use MLP') self._controller = MLP( "controller_mlp", [in_dim, config.controller_dim, config.controller_dim], layernorm=config.controller_layernorm, dtype=dtype) rnd = np.random.RandomState(0) self._rnd = rnd self._memory_init = 1e-5 * tf.ones( [config.num_slots, config.memory_dim], name="memory_init", dtype=dtype) # N. Item name Shape Init Comment # ------------------------------------------------------------ # 1) read query N x D 0.0 # 2) write query Nw x D 0.0 # 3) write content Nw x D 0.0 # 4) forget gate N -2.0 No forget after read # 5) write gate Nw 2.0 Always write # 6) interp gate Nw -2.0 Always use LRU # 7) read temp N 0.0 Default 1.0 # 8) write temp Nw 0.0 Default 1.0 # 9) erase M -2.0 Default no erase Nr = self._nread Nw = self._nwrite D = self._memory_dim M = self._nslot def ctrl2mem_bias_init(): AA = tf.zeros([Nr * D + 2 * Nw * D], dtype=self.dtype) BB = -2.0 * tf.ones([Nr], dtype=self.dtype) CC = 2.0 * tf.ones([Nw], dtype=self.dtype) DD = -2.0 * tf.ones([Nw], dtype=self.dtype) EE = 0.0 * tf.ones([Nr], dtype=self.dtype) FF = 0.0 * tf.ones([Nw], dtype=self.dtype) GG = -2.0 * tf.ones([M], dtype=self.dtype) return tf.concat([AA, BB, CC, DD, EE, FF, GG], axis=0) self._ctrl2mem = Linear("ctrl2mem", config.controller_dim, Nr * D + 2 * Nw * D + Nr + 2 * Nw + Nr + Nw + M, b_init=ctrl2mem_bias_init) if config.memory_layernorm: log.info('Using LayerNorm for each memory iteration.') self._mem_layernorm = LayerNorm("memory_layernorm", D, dtype=dtype) else: self._mem_layernorm = None
def __init__(self, name, in_dim, memory_dim, controller_dim, nslot, nread, memory_decay, controller_type='lstm', memory_layernorm=False, controller_layernorm=False, controller_nstack=2, dtype=tf.float32): """Initialize a MANN module. Args: name: String. Name of the module. in_dim: Int. Input dimension. memory_dim: Int. Memory dimension. controller_dim: Int. Hidden dimension for the controller. nslot: Int. Number of memory slots. nread: Int. Number of read heads. memory_decay: Float. Memory decay coefficient. controller_type: String. `lstm` or `stack_lstm`. dtype: Data type. """ super(MANN, self).__init__(dtype=dtype) self._in_dim = in_dim self._memory_dim = memory_dim self._controller_dim = controller_dim self._nslot = nslot self._nread = nread self._controller_nstack = controller_nstack self._controller_type = controller_type with variable_scope(name): if controller_layernorm: log.info('Using LayerNorm in controller module.') if controller_type == 'lstm': self._controller = LSTM( "controller_lstm", in_dim, controller_dim, layernorm=controller_layernorm, dtype=dtype) elif controller_type == 'stack_lstm': log.info('Use {}-stack LSTM'.format(controller_nstack)) self._controller = StackLSTM( "stack_controller_lstm", in_dim, controller_dim, controller_nstack, layernorm=controller_layernorm, dtype=dtype) rnd = np.random.RandomState(0) self._rnd = rnd self._gamma = memory_decay D = memory_dim N = nread M = nslot self._memory_init = 1e-5 * tf.ones( [M, D], name="memory_init", dtype=dtype) def ctrl2mem_bias_init(): zeros = tf.zeros([2 * N * D], dtype=self.dtype) ones = -2.0 * tf.ones([N], dtype=self.dtype) return tf.concat([zeros, ones], axis=0) self._ctrl2mem = Linear( "ctrl2mem", controller_dim, 2 * nread * memory_dim + nread, b_init=ctrl2mem_bias_init) self._temp = tf.Variable(1.0, name="temp", dtype=dtype) if memory_layernorm: log.info('Using LayerNorm for each memory iteration.') self._mem_layernorm = LayerNorm("memory_layernorm", D, dtype=dtype) else: self._mem_layernorm = None