def __init__(self, isize, ncomb=2, hsize=None, dropout=0.0, custom_act=use_adv_act_default, enable_bias=enable_prev_ln_bias_default): super(ResidueCombiner, self).__init__() _hsize = isize * 2 * ncomb if hsize is None else hsize # should dropout be in front of sigmoid or not? self.net = nn.Sequential( Linear(isize * ncomb, _hsize), Custom_Act() if custom_act else nn.Sigmoid(), Dropout(dropout, inplace=inplace_after_Custom_Act), Linear(_hsize, isize, bias=enable_bias), Dropout(dropout, inplace=True)) if dropout > 0.0 else nn.Sequential( Linear(isize * ncomb, _hsize), Custom_Act() if custom_act else nn.Sigmoid(), Linear(_hsize, isize, bias=enable_bias)) self.out_normer = nn.LayerNorm(isize, eps=ieps_ln_default, elementwise_affine=enable_ln_parameters)
def __init__(self, isize, hsize=None, dropout=0.0, num_pos=cache_len_default, custom_act=use_adv_act_default): super(AverageAttn, self).__init__() _hsize = isize if hsize is None else hsize self.num_pos = num_pos self.register_buffer('w', torch.Tensor(num_pos, 1)) self.ffn = nn.Sequential( Linear(isize, _hsize), Custom_Act() if custom_act else nn.ReLU(inplace=True), Dropout(dropout, inplace=inplace_after_Custom_Act), Linear(_hsize, isize), Dropout( dropout, inplace=True)) if dropout > 0.0 else nn.Sequential( Linear(isize, _hsize), Custom_Act() if custom_act else nn.ReLU( inplace=True), Linear(_hsize, isize)) self.gw = Linear(isize * 2, isize * 2) self.reset_parameters()
def __init__(self, isize, hsize=None, dropout=0.0, norm_residual=norm_residual_default, custom_act=use_adv_act_default, enable_bias=enable_prev_ln_bias_default): super(PositionwiseFF, self).__init__() _hsize = isize * 4 if hsize is None else hsize self.net = nn.Sequential( Linear(isize, _hsize), Custom_Act() if custom_act else nn.ReLU(inplace=True), Dropout(dropout, inplace=inplace_after_Custom_Act), Linear(_hsize, isize, bias=enable_bias), Dropout( dropout, inplace=True)) if dropout > 0.0 else nn.Sequential( Linear(isize, _hsize), Custom_Act() if custom_act else nn.ReLU( inplace=True), Linear(_hsize, isize, bias=enable_bias)) self.normer = nn.LayerNorm(isize, eps=ieps_ln_default, elementwise_affine=enable_ln_parameters) self.norm_residual = norm_residual
def __init__(self, isize, osize=None, dropout=0.0, custom_act=use_adv_act_default, enable_bias=enable_prev_ln_bias_default): super(GRUCell4RNMT, self).__init__() _osize = isize if osize is None else osize self.trans = Linear(isize + _osize, _osize * 2, bias=enable_bias) self.transi = Linear(isize, _osize) self.transh = Linear(_osize, _osize) self.normer = nn.LayerNorm((2, _osize), eps=ieps_ln_default, elementwise_affine=enable_ln_parameters) self.act = Custom_Act() if custom_act else nn.Tanh() self.drop = Dropout( dropout, inplace=inplace_after_Custom_Act) if dropout > 0.0 else None self.osize = _osize
def __init__(self, isize, hsize=None, dropout=0.0, custom_act=use_adv_act_default): super(DATTNCombiner, self).__init__() _hsize = isize * 4 if hsize is None else hsize self.net = nn.Sequential( Linear(isize * 2, _hsize), Dropout(dropout, inplace=True), Custom_Act() if custom_act else nn.Sigmoid(), Scorer(_hsize, bias=False)) if dropout > 0.0 else nn.Sequential( Linear(isize * 2, _hsize), Custom_Act() if custom_act else nn.Sigmoid(), Scorer(_hsize, bias=False))
def __init__(self, isize, num_head=8, osize=None, dropout=0.0, custom_act=use_adv_act_default, enable_bias=enable_prev_ln_bias_default): super(MHPLSTMCore, self).__init__() _osize = isize if osize is None else osize i_head_dim = float2odd(float(isize) / num_head) i_hsize = i_head_dim * num_head o_head_dim = float2odd(float(_osize) / num_head) o_hsize = o_head_dim * num_head self.trans_hid = GroupLinear(i_hsize + i_hsize, o_hsize * 3, num_head, bias=enable_bias, shuffle=False, trans_input=False, flatten_output=False) self.trans_og = nn.Sequential(GroupLinear(i_hsize + o_hsize, o_hsize, num_head, bias=enable_bias, shuffle=False, trans_input=False, flatten_output=False), nn.LayerNorm((num_head, o_head_dim), eps=ieps_ln_default, elementwise_affine=enable_ln_parameters)) self.normer_csum = nn.LayerNorm((num_head, i_head_dim), eps=ieps_ln_default, elementwise_affine=enable_ln_parameters) self.normer_hid = nn.LayerNorm((num_head, 3, o_head_dim), eps=ieps_ln_default, elementwise_affine=enable_ln_parameters) self.act = Custom_Act() if custom_act else nn.ReLU()#Tanh() self.drop = Dropout(dropout, inplace=inplace_after_Custom_Act) if dropout > 0.0 else None self.init_cx = nn.Parameter(torch.zeros(1, num_head, o_head_dim))
def __init__(self, isize, osize=None, dropout=0.0, custom_act=use_adv_act_default, enable_bias=enable_prev_ln_bias_default): super(LSTMCell4RNMT, self).__init__() _osize = isize if osize is None else osize # layer normalization is also applied for the computation of hidden for efficiency. bias might be disabled in case provided by LayerNorm self.trans = Linear(isize + _osize, _osize * 4, bias=enable_bias) self.normer = nn.LayerNorm((4, _osize), eps=ieps_ln_default, elementwise_affine=enable_ln_parameters) self.act = Custom_Act() if custom_act else nn.Tanh() self.drop = Dropout( dropout, inplace=inplace_after_Custom_Act) if dropout > 0.0 else None self.osize = _osize