示例#1
0
    def __init__(self,
                 config,
                 input_size,
                 num_units,
                 use_zoneout=True,
                 use_ln=True,
                 indrop=True,
                 outdrop=True,
                 f_bias=1.):
        super().__init__()

        self.input_size = input_size
        self.num_units = num_units
        self.f_bias = f_bias

        self.use_zoneout = use_zoneout
        self.use_ln = use_ln
        self.indrop = indrop
        self.outdrop = outdrop

        x_size = input_size
        h_size = num_units
        self.r_size = r_size = config.r_size

        self.W_full = nn.Parameter(
            helper.orthogonal_initializer(
                [x_size + r_size + h_size, 3 * h_size], scale=1.0))
        self.bias = nn.Parameter(torch.zeros([3 * h_size]))
        self.W_full1 = nn.Parameter(
            helper.orthogonal_initializer([x_size + r_size + h_size, 2],
                                          scale=1.0))
        self.bias1 = nn.Parameter(torch.zeros([2]))
        self.W_full2 = nn.Parameter(
            helper.orthogonal_initializer(
                [x_size + r_size + h_size, 1 * h_size], scale=1.0))
        self.bias2 = nn.Parameter(torch.zeros([1 * h_size]))

        self.memcnt = 0
        self.mem_cap = config.mem_cap
        self.tau = 1.

        self.c_bias = nn.Parameter(torch.zeros(1, h_size))
        self.h_bias = nn.Parameter(torch.zeros(1, h_size))
        self.hmem_bias = nn.Parameter(torch.zeros(1, config.mem_cap, r_size))
        self.keys = nn.Parameter(torch.zeros(config.mem_cap, config.key_size))
        self.vec_a = nn.Parameter(torch.zeros(h_size // 4, 1))
        nn.init.orthogonal_(self.keys)
        nn.init.orthogonal_(self.vec_a)
        self.fc = nn.Linear(
            x_size + r_size + h_size + config.key_size + config.mem_cap,
            h_size // 4)
        self.fc.weight.data = helper.orthogonal_initializer(
            [self.fc.weight.shape[1], self.fc.weight.shape[0]]).t_()
        torch.nn.init.constant_(self.fc.bias, 0)
        self.fc1 = nn.Linear(h_size + x_size, r_size)
        self.fc1.weight.data = helper.orthogonal_initializer(
            [self.fc1.weight.shape[1], self.fc1.weight.shape[0]]).t_()
        torch.nn.init.constant_(self.fc1.bias, 0)
        self.u_t = None
        self.prev_read_location = None
示例#2
0
    def __call__(self, x, state):
        h, c = state
        h_size = self.num_units
        x_size = int(x.size()[1])

        W_xh = helper.orthogonal_initializer([x_size, 4 * h_size], scale=1.0)

        W_hh = helper.orthogonal_initializer([h_size, 4 * h_size], scale=1.0)

        bias = torch.zeros([4 * h_size])

        print(x.shape, 'shape of x')
        print(h.shape, 'shape of h')

        W_full = np.concatenate((W_xh, W_hh), axis=0)
        concat = np.concatenate((x, h), axis=1)

        concat = torch.mm(concat, W_full) + bias
        concat = helper.layer_norm_all(concat, 4, h_size)

        i, j, f, o = torch.split(tensor=concat,
                                 split_size=int(concat.size()[1]) // 4,
                                 dim=1)

        new_c = c * F.sigmoid(f + self.f_bias) + F.sigmoid(i) * F.tanh(j)
        new_h = F.tanh(helper.layer_norm(new_c)) * F.sigmoid(o)

        if self.use_zoneout:
            new_h, new_c = helper.zoneout(new_h, new_c, h, c,
                                          self.zoneout_keep_h,
                                          self.zoneout_keep_c,
                                          self.is_training)

        return new_h, (new_h, new_c)
示例#3
0
文件: marnn.py 项目: zoharli/armin
    def __init__(self,
                 config,
                 input_size,
                 num_units,
                 use_zoneout=True,
                 use_ln=True,
                 indrop=True,
                 outdrop=True,
                 f_bias=1.):
        super().__init__()

        self.input_size = input_size
        self.num_units = num_units
        self.f_bias = f_bias

        self.use_zoneout = config.use_zoneout
        self.zoneout_keep_h = config.zoneout_h
        self.zoneout_keep_c = config.zoneout_c
        self.use_ln = config.use_ln

        x_size = input_size
        h_size = num_units

        self.W_full = nn.Parameter(
            helper.orthogonal_initializer([x_size + 2 * h_size, 5 * h_size],
                                          scale=1.0))
        self.bias = nn.Parameter(torch.zeros([5 * h_size]))
        self.W_full1 = nn.Parameter(
            helper.orthogonal_initializer([x_size + 2 * h_size, 2 * h_size],
                                          scale=1.0))
        self.bias1 = nn.Parameter(torch.zeros([2 * h_size]))

        self.ln1 = helper.layernorm(num_units, 5)
        self.ln2 = helper.layernorm(num_units, 1)
        self.ln3 = helper.layernorm(num_units, 2)
        self.ln4 = helper.layernorm(config.mem_cap, 1)
        self.drop = nn.Dropout(p=1 - config.keep_prob)
        self.zoneout = helper.zoneout1(config.zoneout_c)

        self.memcnt = 0
        self.mem_cap = config.mem_cap
        self.tau = 1.

        self.keys = nn.Parameter(torch.zeros(config.mem_cap, config.key_size))
        self.vec_a = nn.Parameter(torch.zeros(h_size // 4, 1))
        nn.init.orthogonal_(self.keys)
        nn.init.orthogonal_(self.vec_a)
        self.fc = nn.Linear(
            x_size + 2 * h_size + config.key_size + config.mem_cap,
            h_size // 4)
        self.fc.weight.data = helper.orthogonal_initializer(
            [self.fc.weight.shape[1], self.fc.weight.shape[0]]).t_()
        torch.nn.init.constant_(self.fc.bias, 0)
        self.u_t = None
        self.prev_read_location = None
示例#4
0
文件: marnn.py 项目: zoharli/armin
    def __init__(self, config, input_size, num_units, f_bias=1.):
        super().__init__()

        self.input_size = input_size
        self.num_units = num_units
        self.f_bias = f_bias

        self.use_zoneout = config.use_zoneout
        self.zoneout_keep_h = config.zoneout_h
        self.zoneout_keep_c = config.zoneout_c
        self.use_ln = config.use_ln
        self.use_head = config.use_head

        x_size = input_size
        h_size = num_units
        self.r_size = r_size = config.r_size

        self.W_full = nn.Parameter(
            helper.orthogonal_initializer(
                [x_size + r_size + h_size, r_size + 4 * h_size], scale=1.0))
        self.bias = nn.Parameter(torch.zeros([r_size + 4 * h_size]))
        self.W_full1 = nn.Parameter(
            helper.orthogonal_initializer(
                [x_size + r_size + h_size, r_size + h_size], scale=1.0))
        self.bias1 = nn.Parameter(torch.zeros([r_size + h_size]))

        self.ln1 = helper.layernorm(num_units, 5)
        self.ln2 = helper.layernorm(num_units, 1)
        self.ln3 = helper.layernorm(num_units, 2)
        self.drop = nn.Dropout(p=1 - config.keep_prob)
        self.zoneout = helper.zoneout1(config.zoneout_c)

        self.c_bias = nn.Parameter(torch.randn(1, num_units))
        self.hmem_bias = nn.Parameter(torch.zeros(1, config.mem_cap, r_size))
        torch.nn.init.normal_(self.c_bias)

        #self.time_fac=torch.cat([torch.ones(config.head_size),torch.Tensor([config.time_fac])])
        self.memcnt = 0
        self.mem_cap = config.mem_cap
        self.tau = 0.
        self.fc = nn.Linear(x_size + h_size, config.mem_cap)
        self.fc.weight.data = helper.orthogonal_initializer(
            [self.fc.weight.shape[1], self.fc.weight.shape[0]]).t_()
        torch.nn.init.constant_(self.fc.bias, 0.)
        self.trans = nn.Linear(h_size, config.r_size)
        self.trans.weight.data = helper.orthogonal_initializer(
            [self.trans.weight.shape[1], self.trans.weight.shape[0]]).t_()
        torch.nn.init.constant_(self.trans.bias, 0.)
示例#5
0
    def __init__(self,
                 input_size,
                 num_units,
                 use_ln=1,
                 use_zoneout=1,
                 f_bias=1.):
        super().__init__()

        self.input_size = input_size
        self.num_units = num_units
        self.f_bias = f_bias

        self.use_ln = use_ln
        self.use_zoneout = use_zoneout
        self.zoneout_keep_h = 0.7
        self.zoneout_keep_c = 0.7

        x_size = input_size
        h_size = num_units
        self.W_full = nn.Parameter(
            helper.orthogonal_initializer([x_size + h_size, 4 * h_size],
                                          scale=1.0))
        self.bias = nn.Parameter(torch.zeros([4 * h_size]))

        self.ln1 = helper.layernorm(num_units, 4)
        self.ln2 = helper.layernorm(num_units, 1)
        self.zoneout = helper.zoneout(self.zoneout_keep_h, self.zoneout_keep_c)
示例#6
0
    def __init__(self,
                 config,
                 input_size,
                 num_units,
                 use_zoneout=True,
                 use_ln=True,
                 indrop=True,
                 outdrop=True,
                 f_bias=1.):
        super().__init__()

        self.input_size = input_size
        self.num_units = num_units
        self.f_bias = f_bias

        self.use_zoneout = use_zoneout
        self.use_ln = use_ln

        x_size = input_size
        h_size = num_units
        self.r_size = r_size = config.r_size

        self.W_full = nn.Parameter(
            helper.orthogonal_initializer(
                [x_size + r_size + h_size, r_size + 4 * h_size], scale=1.0))
        self.bias = nn.Parameter(torch.zeros([r_size + 4 * h_size]))
        self.W_full1 = nn.Parameter(
            helper.orthogonal_initializer(
                [x_size + r_size + h_size, r_size + h_size], scale=1.0))
        self.bias1 = nn.Parameter(torch.zeros([r_size + h_size]))

        self.trans = nn.Linear(x_size + h_size, r_size)

        torch.nn.init.orthogonal_(self.trans.weight)
        torch.nn.init.constant_(self.trans.bias, 0)

        self.c_bias = nn.Parameter(torch.rand(1, num_units))
        self.hmem_bias = nn.Parameter(torch.zeros(1, config.mem_cap, r_size))

        self.memcnt = 0
        self.mem_cap = config.mem_cap
        self.tau = 1.
        self.fc = nn.Linear(x_size + h_size, config.mem_cap)
        self.fc.weight.data = helper.orthogonal_initializer(
            [self.fc.weight.shape[1], self.fc.weight.shape[0]]).t_()
        torch.nn.init.constant_(self.fc.bias, 0)
示例#7
0
    def __init__(self, input_size, num_units, f_bias=1.):
        super().__init__()

        self.input_size = input_size
        self.num_units = num_units
        self.f_bias = f_bias

        x_size = input_size
        h_size = num_units
        self.W_full = nn.Parameter(
            helper.orthogonal_initializer([x_size + h_size, 4 * h_size],
                                          scale=1.0))
        self.bias = nn.Parameter(torch.zeros([4 * h_size]))
示例#8
0
    def __init__(self, config, input_size, num_units, f_bias=1.):
        super().__init__()

        self.input_size = input_size
        self.num_units = num_units
        self.f_bias = f_bias

        self.use_ln = config.use_ln
        self.use_zoneout = config.use_zoneout

        x_size = input_size
        h_size = num_units
        self.W_full = nn.Parameter(
            helper.orthogonal_initializer([x_size + h_size, 4 * h_size],
                                          scale=1.0))
        self.bias = nn.Parameter(torch.zeros([4 * h_size]))

        self.ln1 = helper.layernorm(num_units, 4)
        self.ln2 = helper.layernorm(num_units, 1)
        self.zoneout = helper.zoneout(config.zoneout_h, config.zoneout_c)
示例#9
0
    def __init__(self,
                 config,
                 input_size,
                 num_units,
                 output_size,
                 use_zoneout=True,
                 use_ln=True):
        super().__init__()

        if config.model == 'armin':
            self.cell = ARMIN(config,
                              input_size,
                              num_units,
                              use_zoneout=use_zoneout,
                              use_ln=use_ln)
        elif config.model == 'tardis':
            self.cell = TARDIS(config,
                               input_size,
                               num_units,
                               use_zoneout=use_zoneout,
                               use_ln=use_ln)
        elif config.model == 'awta':
            self.cell = ARMIN_with_TARDIS_addr(config,
                                               input_size,
                                               num_units,
                                               use_zoneout=use_zoneout,
                                               use_ln=use_ln)
        elif config.model == 'lstm':
            self.cell = LSTMCell(config,
                                 input_size,
                                 num_units,
                                 use_zoneout=use_zoneout,
                                 use_ln=use_ln)

        if config.model == 'lstm':
            self.fc = nn.Linear(num_units, output_size)
        else:
            self.fc = nn.Linear(config.r_size + num_units, output_size)
        self.fc.weight.data = helper.orthogonal_initializer(
            [self.fc.weight.shape[1], self.fc.weight.shape[0]]).t_()
        torch.nn.init.constant_(self.fc.bias, 0)