def __init__( self, input_dim: IntSpaceType, proj_dim: Optional[IntSpaceType], num_heads: IntSpaceType, qkv_bias: BoolSpaceType = False, attn_drop: Optional[float] = None, proj_drop: Optional[float] = None, use_mask=False, ): super(SuperSelfAttention, self).__init__() self._input_dim = input_dim self._proj_dim = proj_dim self._num_heads = num_heads self._qkv_bias = qkv_bias self._use_mask = use_mask self._infinity = 1e9 mul_head_dim = ( spaces.get_max(input_dim) // spaces.get_min(num_heads) ) * spaces.get_min(num_heads) assert mul_head_dim == spaces.get_max(input_dim) self.q_fc = SuperLinear(input_dim, input_dim, bias=qkv_bias) self.k_fc = SuperLinear(input_dim, input_dim, bias=qkv_bias) self.v_fc = SuperLinear(input_dim, input_dim, bias=qkv_bias) self.attn_drop = SuperDrop(attn_drop or 0.0, [-1, -1, -1, -1], recover=True) if proj_dim is not None: self.proj = SuperLinear(input_dim, proj_dim) self.proj_drop = SuperDropout(proj_drop or 0.0) else: self.proj = None
def test_integer(self): space = Integer(lower=1, upper=4) for i in range(4): self.assertEqual(space[i], i + 1) self.assertEqual("Integer(lower=1, upper=4, default=None)", str(space)) self.assertEqual(get_max(space), 4) self.assertEqual(get_min(space), 1)
def d_model(self): return spaces.get_max(self._d_model)
def embed_dim(self): return spaces.get_max(self._embed_dim)
def out_features(self): return spaces.get_max(self._out_features)
def in_features(self): return spaces.get_max(self._in_features)
def hidden_multiplier(self): return spaces.get_max(self._hidden_multiplier)
def proj_dim(self): return spaces.get_max(self._proj_dim)
def input_dim(self): return spaces.get_max(self._input_dim)
def num_heads(self): return spaces.get_max(self._num_heads)
def hidden_dim(self): return spaces.get_max(self._hidden_dim)
def qk_att_dim(self): return spaces.get_max(self._qk_att_dim)