def initialize(self, num_heads, dim_per_head, drop): self.att = MultiHeadAtt(num_heads, dim_per_head, drop) self.l1 = M.Dense(dim_per_head * num_heads * 4) self.l2 = M.Dense(dim_per_head * num_heads) self.ln1 = M.LayerNorm(1) self.ln2 = M.LayerNorm(1) self.drop = drop
def initialize(self, dim, num_heads, mlp_ratio=4, attn_drop=0.0, drop_path=0.0): self.drop_path = drop_path self.norm1 = M.LayerNorm(1) self.attn = Attention(dim, num_heads, attn_drop) self.norm2 = M.LayerNorm(1) self.mlp = MLP(dim, mlp_ratio)
def initialize(self, dim, mlp_ratio=4, drop_path=0.0): self.drop_path = drop_path self.norm1 = M.LayerNorm(1) self.four = Fourier() self.norm2 = M.LayerNorm(1) self.mlp = MLP(dim, mlp_ratio)