def __init__(self, config, input_size, num_units, use_zoneout=True, use_ln=True, indrop=True, outdrop=True, f_bias=1.): super().__init__() self.input_size = input_size self.num_units = num_units self.f_bias = f_bias self.use_zoneout = use_zoneout self.use_ln = use_ln self.indrop = indrop self.outdrop = outdrop x_size = input_size h_size = num_units self.r_size = r_size = config.r_size self.W_full = nn.Parameter( helper.orthogonal_initializer( [x_size + r_size + h_size, 3 * h_size], scale=1.0)) self.bias = nn.Parameter(torch.zeros([3 * h_size])) self.W_full1 = nn.Parameter( helper.orthogonal_initializer([x_size + r_size + h_size, 2], scale=1.0)) self.bias1 = nn.Parameter(torch.zeros([2])) self.W_full2 = nn.Parameter( helper.orthogonal_initializer( [x_size + r_size + h_size, 1 * h_size], scale=1.0)) self.bias2 = nn.Parameter(torch.zeros([1 * h_size])) self.memcnt = 0 self.mem_cap = config.mem_cap self.tau = 1. self.c_bias = nn.Parameter(torch.zeros(1, h_size)) self.h_bias = nn.Parameter(torch.zeros(1, h_size)) self.hmem_bias = nn.Parameter(torch.zeros(1, config.mem_cap, r_size)) self.keys = nn.Parameter(torch.zeros(config.mem_cap, config.key_size)) self.vec_a = nn.Parameter(torch.zeros(h_size // 4, 1)) nn.init.orthogonal_(self.keys) nn.init.orthogonal_(self.vec_a) self.fc = nn.Linear( x_size + r_size + h_size + config.key_size + config.mem_cap, h_size // 4) self.fc.weight.data = helper.orthogonal_initializer( [self.fc.weight.shape[1], self.fc.weight.shape[0]]).t_() torch.nn.init.constant_(self.fc.bias, 0) self.fc1 = nn.Linear(h_size + x_size, r_size) self.fc1.weight.data = helper.orthogonal_initializer( [self.fc1.weight.shape[1], self.fc1.weight.shape[0]]).t_() torch.nn.init.constant_(self.fc1.bias, 0) self.u_t = None self.prev_read_location = None
def __call__(self, x, state): h, c = state h_size = self.num_units x_size = int(x.size()[1]) W_xh = helper.orthogonal_initializer([x_size, 4 * h_size], scale=1.0) W_hh = helper.orthogonal_initializer([h_size, 4 * h_size], scale=1.0) bias = torch.zeros([4 * h_size]) print(x.shape, 'shape of x') print(h.shape, 'shape of h') W_full = np.concatenate((W_xh, W_hh), axis=0) concat = np.concatenate((x, h), axis=1) concat = torch.mm(concat, W_full) + bias concat = helper.layer_norm_all(concat, 4, h_size) i, j, f, o = torch.split(tensor=concat, split_size=int(concat.size()[1]) // 4, dim=1) new_c = c * F.sigmoid(f + self.f_bias) + F.sigmoid(i) * F.tanh(j) new_h = F.tanh(helper.layer_norm(new_c)) * F.sigmoid(o) if self.use_zoneout: new_h, new_c = helper.zoneout(new_h, new_c, h, c, self.zoneout_keep_h, self.zoneout_keep_c, self.is_training) return new_h, (new_h, new_c)
def __init__(self, config, input_size, num_units, use_zoneout=True, use_ln=True, indrop=True, outdrop=True, f_bias=1.): super().__init__() self.input_size = input_size self.num_units = num_units self.f_bias = f_bias self.use_zoneout = config.use_zoneout self.zoneout_keep_h = config.zoneout_h self.zoneout_keep_c = config.zoneout_c self.use_ln = config.use_ln x_size = input_size h_size = num_units self.W_full = nn.Parameter( helper.orthogonal_initializer([x_size + 2 * h_size, 5 * h_size], scale=1.0)) self.bias = nn.Parameter(torch.zeros([5 * h_size])) self.W_full1 = nn.Parameter( helper.orthogonal_initializer([x_size + 2 * h_size, 2 * h_size], scale=1.0)) self.bias1 = nn.Parameter(torch.zeros([2 * h_size])) self.ln1 = helper.layernorm(num_units, 5) self.ln2 = helper.layernorm(num_units, 1) self.ln3 = helper.layernorm(num_units, 2) self.ln4 = helper.layernorm(config.mem_cap, 1) self.drop = nn.Dropout(p=1 - config.keep_prob) self.zoneout = helper.zoneout1(config.zoneout_c) self.memcnt = 0 self.mem_cap = config.mem_cap self.tau = 1. self.keys = nn.Parameter(torch.zeros(config.mem_cap, config.key_size)) self.vec_a = nn.Parameter(torch.zeros(h_size // 4, 1)) nn.init.orthogonal_(self.keys) nn.init.orthogonal_(self.vec_a) self.fc = nn.Linear( x_size + 2 * h_size + config.key_size + config.mem_cap, h_size // 4) self.fc.weight.data = helper.orthogonal_initializer( [self.fc.weight.shape[1], self.fc.weight.shape[0]]).t_() torch.nn.init.constant_(self.fc.bias, 0) self.u_t = None self.prev_read_location = None
def __init__(self, config, input_size, num_units, f_bias=1.): super().__init__() self.input_size = input_size self.num_units = num_units self.f_bias = f_bias self.use_zoneout = config.use_zoneout self.zoneout_keep_h = config.zoneout_h self.zoneout_keep_c = config.zoneout_c self.use_ln = config.use_ln self.use_head = config.use_head x_size = input_size h_size = num_units self.r_size = r_size = config.r_size self.W_full = nn.Parameter( helper.orthogonal_initializer( [x_size + r_size + h_size, r_size + 4 * h_size], scale=1.0)) self.bias = nn.Parameter(torch.zeros([r_size + 4 * h_size])) self.W_full1 = nn.Parameter( helper.orthogonal_initializer( [x_size + r_size + h_size, r_size + h_size], scale=1.0)) self.bias1 = nn.Parameter(torch.zeros([r_size + h_size])) self.ln1 = helper.layernorm(num_units, 5) self.ln2 = helper.layernorm(num_units, 1) self.ln3 = helper.layernorm(num_units, 2) self.drop = nn.Dropout(p=1 - config.keep_prob) self.zoneout = helper.zoneout1(config.zoneout_c) self.c_bias = nn.Parameter(torch.randn(1, num_units)) self.hmem_bias = nn.Parameter(torch.zeros(1, config.mem_cap, r_size)) torch.nn.init.normal_(self.c_bias) #self.time_fac=torch.cat([torch.ones(config.head_size),torch.Tensor([config.time_fac])]) self.memcnt = 0 self.mem_cap = config.mem_cap self.tau = 0. self.fc = nn.Linear(x_size + h_size, config.mem_cap) self.fc.weight.data = helper.orthogonal_initializer( [self.fc.weight.shape[1], self.fc.weight.shape[0]]).t_() torch.nn.init.constant_(self.fc.bias, 0.) self.trans = nn.Linear(h_size, config.r_size) self.trans.weight.data = helper.orthogonal_initializer( [self.trans.weight.shape[1], self.trans.weight.shape[0]]).t_() torch.nn.init.constant_(self.trans.bias, 0.)
def __init__(self, input_size, num_units, use_ln=1, use_zoneout=1, f_bias=1.): super().__init__() self.input_size = input_size self.num_units = num_units self.f_bias = f_bias self.use_ln = use_ln self.use_zoneout = use_zoneout self.zoneout_keep_h = 0.7 self.zoneout_keep_c = 0.7 x_size = input_size h_size = num_units self.W_full = nn.Parameter( helper.orthogonal_initializer([x_size + h_size, 4 * h_size], scale=1.0)) self.bias = nn.Parameter(torch.zeros([4 * h_size])) self.ln1 = helper.layernorm(num_units, 4) self.ln2 = helper.layernorm(num_units, 1) self.zoneout = helper.zoneout(self.zoneout_keep_h, self.zoneout_keep_c)
def __init__(self, config, input_size, num_units, use_zoneout=True, use_ln=True, indrop=True, outdrop=True, f_bias=1.): super().__init__() self.input_size = input_size self.num_units = num_units self.f_bias = f_bias self.use_zoneout = use_zoneout self.use_ln = use_ln x_size = input_size h_size = num_units self.r_size = r_size = config.r_size self.W_full = nn.Parameter( helper.orthogonal_initializer( [x_size + r_size + h_size, r_size + 4 * h_size], scale=1.0)) self.bias = nn.Parameter(torch.zeros([r_size + 4 * h_size])) self.W_full1 = nn.Parameter( helper.orthogonal_initializer( [x_size + r_size + h_size, r_size + h_size], scale=1.0)) self.bias1 = nn.Parameter(torch.zeros([r_size + h_size])) self.trans = nn.Linear(x_size + h_size, r_size) torch.nn.init.orthogonal_(self.trans.weight) torch.nn.init.constant_(self.trans.bias, 0) self.c_bias = nn.Parameter(torch.rand(1, num_units)) self.hmem_bias = nn.Parameter(torch.zeros(1, config.mem_cap, r_size)) self.memcnt = 0 self.mem_cap = config.mem_cap self.tau = 1. self.fc = nn.Linear(x_size + h_size, config.mem_cap) self.fc.weight.data = helper.orthogonal_initializer( [self.fc.weight.shape[1], self.fc.weight.shape[0]]).t_() torch.nn.init.constant_(self.fc.bias, 0)
def __init__(self, input_size, num_units, f_bias=1.): super().__init__() self.input_size = input_size self.num_units = num_units self.f_bias = f_bias x_size = input_size h_size = num_units self.W_full = nn.Parameter( helper.orthogonal_initializer([x_size + h_size, 4 * h_size], scale=1.0)) self.bias = nn.Parameter(torch.zeros([4 * h_size]))
def __init__(self, config, input_size, num_units, f_bias=1.): super().__init__() self.input_size = input_size self.num_units = num_units self.f_bias = f_bias self.use_ln = config.use_ln self.use_zoneout = config.use_zoneout x_size = input_size h_size = num_units self.W_full = nn.Parameter( helper.orthogonal_initializer([x_size + h_size, 4 * h_size], scale=1.0)) self.bias = nn.Parameter(torch.zeros([4 * h_size])) self.ln1 = helper.layernorm(num_units, 4) self.ln2 = helper.layernorm(num_units, 1) self.zoneout = helper.zoneout(config.zoneout_h, config.zoneout_c)
def __init__(self, config, input_size, num_units, output_size, use_zoneout=True, use_ln=True): super().__init__() if config.model == 'armin': self.cell = ARMIN(config, input_size, num_units, use_zoneout=use_zoneout, use_ln=use_ln) elif config.model == 'tardis': self.cell = TARDIS(config, input_size, num_units, use_zoneout=use_zoneout, use_ln=use_ln) elif config.model == 'awta': self.cell = ARMIN_with_TARDIS_addr(config, input_size, num_units, use_zoneout=use_zoneout, use_ln=use_ln) elif config.model == 'lstm': self.cell = LSTMCell(config, input_size, num_units, use_zoneout=use_zoneout, use_ln=use_ln) if config.model == 'lstm': self.fc = nn.Linear(num_units, output_size) else: self.fc = nn.Linear(config.r_size + num_units, output_size) self.fc.weight.data = helper.orthogonal_initializer( [self.fc.weight.shape[1], self.fc.weight.shape[0]]).t_() torch.nn.init.constant_(self.fc.bias, 0)