Exemplo n.º 1
0
    def _init_params(self):

        shape_xh = (self.n_in, self.n_hids)
        shape_hh = (self.n_hids, self.n_hids)

        self.W_xz = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xz'))
        self.W_xr = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xr'))
        self.W_xh = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xh'))
        self.b_z = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_z'))
        self.b_r = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_r'))
        self.b_h = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_h'))
        self.W_hz = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hz'))
        self.W_hr = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hr'))
        self.W_hh = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hh'))

        self.params = [
            self.W_xz, self.W_xr, self.W_xh, self.W_hz, self.W_hr, self.W_hh,
            self.b_z, self.b_r, self.b_h
        ]

        if self.with_context:
            shape_ch = (self.c_hids, self.n_hids)
            self.W_cz = norm_weight(shape=shape_ch,
                                    name=_p(self.pname, 'W_cz'))
            self.W_cr = norm_weight(shape=shape_ch,
                                    name=_p(self.pname, 'W_cr'))
            self.W_ch = norm_weight(shape=shape_ch,
                                    name=_p(self.pname, 'W_ch'))
            self.W_c_init = norm_weight(shape=shape_ch,
                                        name=_p(self.pname, 'W_c_init'))

            self.params += [self.W_cz, self.W_cr, self.W_ch, self.W_c_init]
Exemplo n.º 2
0
    def _init_params(self):

        shape_xh = (self.n_in, self.n_hids)
        shape_hh = (self.n_hids, self.n_hids)

        self.W_xz = norm_weight(rng=self.rng, shape=shape_xh, name=_p(self.pname, 'W_xz'))
        self.W_xr = norm_weight(rng=self.rng, shape=shape_xh, name=_p(self.pname, 'W_xr'))
        self.W_xh = norm_weight(rng=self.rng, shape=shape_xh, name=_p(self.pname, 'W_xh'))
        self.b_z = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_z'))
        self.b_r = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_r'))
        self.b_h = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_h'))
        self.W_hz = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_hz'))
        self.W_hr = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_hr'))
        self.W_hh = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_hh'))

        self.params = [self.W_xz, self.W_xr, self.W_xh,
                       self.W_hz, self.W_hr, self.W_hh,
                       self.b_z, self.b_r, self.b_h]

        if self.with_layernorm:
            self.W_xz_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'xz_lnb'))
            self.W_xz_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'xz_lns'))
            self.W_xr_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'xr_lnb'))
            self.W_xr_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'xr_lns'))
            self.W_xh_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'xh_lnb'))
            self.W_xh_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'xh_lns'))

            self.W_z_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'z_lnb'))
            self.W_z_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'z_lns'))
            self.W_r_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'r_lnb'))
            self.W_r_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'r_lns'))
            self.W_h_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'h_lnb'))
            self.W_h_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'h_lns'))

            self.params += [self.W_xz_lnb, self.W_xz_lns, self.W_xr_lnb, self.W_xr_lns, self.W_xh_lnb, self.W_xh_lns, \
                           self.W_z_lnb, self.W_z_lns, self.W_r_lnb, self.W_r_lns, self.W_h_lnb, self.W_h_lns] 


        if self.with_context:
            shape_ch = (self.c_hids, self.n_hids)
            self.W_cz = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_cz'))
            self.W_cr = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_cr'))
            self.W_ch = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_ch'))
            self.W_c_init = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_c_init'))

            self.params += [self.W_cz, self.W_cr, self.W_ch, self.W_c_init]

            if self.with_layernorm:
                self.W_cz_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'cz_lnb'))
                self.W_cz_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'cz_lns'))
                self.W_cr_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'cr_lnb'))
                self.W_cr_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'cr_lns'))
                self.W_ch_lnb = constant_weight(shape=(self.n_hids), value=scale_add, name=_p(self.pname, 'ch_lnb'))
                self.W_ch_lns = constant_weight(shape=(self.n_hids), value=scale_mul, name=_p(self.pname, 'ch_lns'))

                self.params += [self.W_cz_lnb, self.W_cz_lns, self.W_cr_lnb, self.W_cr_lns, self.W_ch_lnb, self.W_ch_lns]
Exemplo n.º 3
0
    def _init_params(self):

        shape_i0o = (self.n_in_0, self.n_out)
        shape_i1o = (self.n_in_1, self.n_out)

        if self.orth:
            if self.n_in_0 != self.n_out or self.n_in_1 != self.n_out:
                raise ValueError('n_in != n_out when require orth in FeedForward')
            self.W0 = ortho_weight(rng=self.rng, shape=shape_i0o, name=_p(self.pname, 'W0'))
            self.W1 = ortho_weight(rng=self.rng, shape=shape_i1o, name=_p(self.pname, 'W1'))
        else:
            self.W0 = norm_weight(rng=self.rng, shape=shape_i0o, name=_p(self.pname, 'W0'))
            self.W1 = norm_weight(rng=self.rng, shape=shape_i1o, name=_p(self.pname, 'W1'))
        self.b = constant_weight(shape=(self.n_out, ), name=_p(self.pname, 'b'))
        self.params = [self.W0, self.W1, self.b]
Exemplo n.º 4
0
    def _init_params(self):

        shape_xh = (self.n_in*3, self.n_hids)
        shape_hh = (self.n_hids*3, self.n_hids)
        self.W_x = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_x'))
        self.b = constant_weight(shape=(self.n_hids*3,), name=_p(self.pname, 'b'))
        self.W_h = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_h'))
        self.params = [self.W_x, self.W_h, self.b]
        self.GRU_op = mkl_gru.GRU(hid=self.n_hids, return_sequences=True)
        self.h_init_state = numpy.zeros((80, 1000), numpy.float64)
Exemplo n.º 5
0
    def _init_params(self):

        shape_io = (self.n_in, self.n_out)

        if self.orth:
            if self.n_in != self.n_out :
                raise ValueError('n_in != n_out when require orth in FeedForward')
            self.W = ortho_weight(rng=self.rng, shape=shape_io, name=_p(self.pname, 'W'))
        else:
            self.W = norm_weight(rng=self.rng, shape=shape_io, name=_p(self.pname, 'W'))
        self.b = constant_weight(shape=(self.n_out, ), name=_p(self.pname, 'b'))
        self.params = [self.W, self.b]
Exemplo n.º 6
0
    def _init_params(self):

        shape_io = (self.n_in_0, self.n_out)

        if self.orth:
            if self.n_in_0 != self.n_out:
                raise ValueError('n_in != n_out when require orth in FeedForward')
            self.W = ortho_weight(rng=self.rng, shape=shape_io, name=_p(self.pname, 'W'))
        else:
            self.W = norm_weight(rng=self.rng, shape=shape_io, name=_p(self.pname, 'W'))
        self.params = [self.W]

        self.ff =  FeedForward(self.n_in_1, self.n_out, orth=self.orth, rng=self.rng, name=_p(self.pname, 'FF_W') )
        self.params.extend(self.ff.params)
Exemplo n.º 7
0
    def _init_params(self):
        shape_xh = (self.n_in, self.n_hids)
        shape_xh2 = (self.n_in, 2*self.n_hids)
        shape_hh = (self.n_hids, self.n_hids)
        shape_hh2 = (self.n_hids, 2*self.n_hids)

        self.W_xzr = norm_weight(rng=self.rng, shape=shape_xh2, name=_p(self.pname, 'W_xzr'))
        self.W_xh  = norm_weight(rng=self.rng, shape=shape_xh, name=_p(self.pname, 'W_xh'))
        self.b_zr  = constant_weight(shape=(2*self.n_hids, ), name=_p(self.pname, 'b_zr'))
        self.b_h   = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_h'))
        self.W_hzr = multi_orth(rng=self.rng, size=shape_hh2, name=_p(self.pname, 'W_hzr'))
        self.W_hh  = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_hh'))

        self.params += [self.W_xzr, self.W_xh,
                        self.W_hzr, self.W_hh,
                        self.b_zr,  self.b_h]

        if self.with_context:
            shape_ch = (self.c_hids, self.n_hids)
            self.W_cz = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_cz'))
            self.W_cr = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_cr'))
            self.W_ch = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_ch'))
            self.W_c_init = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_c_init'))

            self.params += [self.W_cz, self.W_cr, self.W_ch, self.W_c_init]

        if self.with_begin_tag:
            self.struct_begin_tag = constant_weight(shape=(self.n_hids,), value=0., name=_p(self.pname, 'struct_begin_tag'))
            self.params += [self.struct_begin_tag]

        if self.with_end_tag:
            self.struct_end_tag = constant_weight(shape=(self.n_in,), value=0., name=_p(self.pname, 'struct_end_tag'))
            self.params += [self.struct_end_tag]

        if self.n_att_ctx:
            self.gru_combine_ctx_h = GRU(self.n_att_ctx, self.n_hids, rng=self.rng, name=_p(self.pname, 'gru_combine_ctx_h'))
            self.params.extend(self.gru_combine_ctx_h.params)
            self.attention = ATTENTION(self.n_hids, self.rng, name=_p(self.pname, 'att_ctx'))
            self.params.extend(self.attention.params)
Exemplo n.º 8
0
    def _init_params(self):

        shape_xh = (self.n_in, self.n_hids)
        shape_hh = (self.n_hids, self.n_hids)

        self.W_xz = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xz'))
        self.W_xr = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xr'))
        self.W_xh = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xh'))
        self.b_z = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_z'))
        self.b_r = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_r'))
        self.b_h = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_h'))
        self.W_hz = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hz'))
        self.W_hr = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hr'))
        self.W_hh = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hh'))

        self.params = [
            self.W_xz, self.W_xr, self.W_xh, self.W_hz, self.W_hr, self.W_hh,
            self.b_z, self.b_r, self.b_h
        ]

        shape_ch = (self.n_cdim, self.n_hids)
        self.W_cz = norm_weight(shape=shape_ch, name=_p(self.pname, 'W_cz'))
        self.W_cr = norm_weight(shape=shape_ch, name=_p(self.pname, 'W_cr'))
        self.W_ch = norm_weight(shape=shape_ch, name=_p(self.pname, 'W_ch'))
        self.W_c_init = norm_weight(shape=(self.n_cdim, self.n_hids),
                                    name=_p(self.pname, 'W_c_init'))
        self.b_c_init = constant_weight(shape=(self.n_hids, ),
                                        name=_p(self.pname, 'b_c_init'))

        self.params += [
            self.W_cz, self.W_cr, self.W_ch, self.W_c_init, self.b_c_init
        ]

        # we moved the parameters below here, to make it works for both with_context and with_attention modes
        # modification in this version
        # in the paper, e_{i,j} = a(s_{i-1}, h_j)
        # here, e_{i,j} = a(GRU(s_{i-1}, y_{i-1}), h_j), which considers the lastly generated target word
        # all the following parameters are for the introduced GRU
        # it is reasonable
        self.W_n1_h = ortho_weight(shape=shape_hh,
                                   name=_p(self.pname, 'W_n1_h'))
        self.W_n1_r = ortho_weight(shape=shape_hh,
                                   name=_p(self.pname, 'W_n1_r'))
        self.W_n1_z = ortho_weight(shape=shape_hh,
                                   name=_p(self.pname, 'W_n1_z'))
        self.b_n1_h = constant_weight(shape=(self.n_hids, ),
                                      name=_p(self.pname, 'b_n1_h'))
        self.b_n1_r = constant_weight(shape=(self.n_hids, ),
                                      name=_p(self.pname, 'b_n1_r'))
        self.b_n1_z = constant_weight(shape=(self.n_hids, ),
                                      name=_p(self.pname, 'b_n1_z'))
        self.params += [
            self.W_n1_h, self.W_n1_r, self.W_n1_z, self.b_n1_h, self.b_n1_r,
            self.b_n1_z
        ]

        if self.with_attention:
            self.A_cp = norm_weight(shape=shape_ch,
                                    name=_p(self.pname, 'A_cp'))
            self.B_hp = norm_weight(shape=shape_hh,
                                    name=_p(self.pname, 'B_hp'))
            self.b_tt = constant_weight(shape=(self.n_hids, ),
                                        name=_p(self.pname, 'b_tt'))
            self.D_pe = norm_weight(shape=(self.n_hids, 1),
                                    name=_p(self.pname, 'D_pe'))

            self.params += [self.A_cp, self.B_hp, self.b_tt, self.D_pe]

            # coverage only works for attention model
            if self.with_coverage:
                shape_covh = (self.coverage_dim, self.n_hids)
                self.C_covp = norm_weight(shape=shape_covh,
                                          name=_p(self.pname, 'Cov_covp'))

                if self.coverage_type is 'linguistic':
                    # for linguistic coverage, fertility model is necessary since it yields better translation and alignment quality
                    self.W_cov_fertility = norm_weight(shape=(self.n_cdim, 1),
                                                       name=_p(
                                                           self.pname,
                                                           'W_cov_fertility'))
                    self.b_cov_fertility = constant_weight(
                        shape=(1, ), name=_p(self.pname, 'b_cov_fertility'))
                    self.params += [self.W_cov_fertility, self.b_cov_fertility]
                else:
                    # for neural network based coverage, gating is necessary
                    shape_covcov = (self.coverage_dim, self.coverage_dim)
                    self.W_cov_h = ortho_weight(shape=shape_covcov,
                                                name=_p(self.pname, 'W_cov_h'))
                    self.W_cov_r = ortho_weight(shape=shape_covcov,
                                                name=_p(self.pname, 'W_cov_r'))
                    self.W_cov_z = ortho_weight(shape=shape_covcov,
                                                name=_p(self.pname, 'W_cov_z'))
                    self.b_cov_h = constant_weight(shape=(self.coverage_dim, ),
                                                   name=_p(
                                                       self.pname, 'b_cov_h'))
                    self.b_cov_r = constant_weight(shape=(self.coverage_dim, ),
                                                   name=_p(
                                                       self.pname, 'b_cov_r'))
                    self.b_cov_z = constant_weight(shape=(self.coverage_dim, ),
                                                   name=_p(
                                                       self.pname, 'b_cov_z'))

                    self.params += [
                        self.W_cov_h, self.W_cov_r, self.W_cov_z, self.b_cov_h,
                        self.b_cov_r, self.b_cov_z
                    ]

                    # parameters for coverage inputs
                    # attention probablity
                    self.W_cov_ph = norm_weight(shape=(1, self.coverage_dim),
                                                name=_p(
                                                    self.pname, 'W_cov_ph'))
                    self.W_cov_pr = norm_weight(shape=(1, self.coverage_dim),
                                                name=_p(
                                                    self.pname, 'W_cov_pr'))
                    self.W_cov_pz = norm_weight(shape=(1, self.coverage_dim),
                                                name=_p(
                                                    self.pname, 'W_cov_pz'))
                    # source annotations
                    self.W_cov_ch = norm_weight(
                        shape=(self.n_cdim, self.coverage_dim),
                        name=_p(self.pname, 'W_cov_ch'))
                    self.W_cov_cr = norm_weight(
                        shape=(self.n_cdim, self.coverage_dim),
                        name=_p(self.pname, 'W_cov_cr'))
                    self.W_cov_cz = norm_weight(
                        shape=(self.n_cdim, self.coverage_dim),
                        name=_p(self.pname, 'W_cov_cz'))
                    # previous decoding states
                    self.W_cov_hh = norm_weight(
                        shape=(self.n_hids, self.coverage_dim),
                        name=_p(self.pname, 'W_cov_hh'))
                    self.W_cov_hr = norm_weight(
                        shape=(self.n_hids, self.coverage_dim),
                        name=_p(self.pname, 'W_cov_hr'))
                    self.W_cov_hz = norm_weight(
                        shape=(self.n_hids, self.coverage_dim),
                        name=_p(self.pname, 'W_cov_hz'))

                    self.params += [
                        self.W_cov_ph, self.W_cov_pr, self.W_cov_pz,
                        self.W_cov_ch, self.W_cov_cr, self.W_cov_cz,
                        self.W_cov_hh, self.W_cov_hr, self.W_cov_hz
                    ]

        # for context gate, which works for both with_attention and with_context modes
        if self.with_context_gate:
            # parameters for coverage inputs
            # input form target context
            self.W_ctx_h = norm_weight(shape=(self.n_hids, self.n_hids),
                                       name=_p(self.pname, 'W_ctx_h'))
            self.W_ctx_c = norm_weight(shape=(self.n_cdim, self.n_hids),
                                       name=_p(self.pname, 'W_ctx_c'))
            self.b_ctx = constant_weight(shape=(self.n_hids, ),
                                         name=_p(self.pname, 'b_ctx'))
            self.params += [self.W_ctx_h, self.W_ctx_c]

        # for readout
        n_out = self.n_in * self.maxout_part
        self.W_o_c = norm_weight(shape=(self.n_cdim, n_out),
                                 name=_p(self.pname, 'W_out_c'))
        self.W_o_h = norm_weight(shape=(self.n_hids, n_out),
                                 name=_p(self.pname, 'W_out_h'))
        self.W_o_e = norm_weight(shape=(self.n_in, n_out),
                                 name=_p(self.pname, 'W_out_e'))
        self.b_o = constant_weight(shape=(n_out, ),
                                   name=_p(self.pname, 'b_out_o'))

        self.params += [self.W_o_c, self.W_o_h, self.W_o_e, self.b_o]
Exemplo n.º 9
0
    def _init_params(self):
        # generally, parameters with shape shape_ch = (self.c_ndim, self.n_hids) can be applied with tied weights
        # this for combining lastly generated words and decoder state,
        # and thus cannot be applied with tied weights
        shape_xh = (self.n_in, self.n_hids)
        shape_hh = (self.n_hids, self.n_hids)

        self.W_xz = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xz'))
        self.W_xr = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xr'))
        self.W_xh = norm_weight(shape=shape_xh, name=_p(self.pname, 'W_xh'))
        self.b_z = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_z'))
        self.b_r = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_r'))
        self.b_h = constant_weight(shape=(self.n_hids, ),
                                   name=_p(self.pname, 'b_h'))
        self.W_hz = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hz'))
        self.W_hr = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hr'))
        self.W_hh = ortho_weight(shape=shape_hh, name=_p(self.pname, 'W_hh'))

        self.params = [
            self.W_xz, self.W_xr, self.W_xh, self.W_hz, self.W_hr, self.W_hh,
            self.b_z, self.b_r, self.b_h
        ]

        shape_ch = (self.n_cdim, self.n_hids)

        self.W_cz = norm_weight(shape=shape_ch, name=_p(self.pname, 'W_cz'))
        self.W_cr = norm_weight(shape=shape_ch, name=_p(self.pname, 'W_cr'))
        self.W_ch = norm_weight(shape=shape_ch, name=_p(self.pname, 'W_ch'))
        self.W_c_init = norm_weight(shape=shape_ch,
                                    name=_p(self.pname, 'W_c_init'))
        # we don't add the new params if we use tied_weights, since we reuse the weights in decoder
        self.params += [self.W_cz, self.W_cr, self.W_ch, self.W_c_init]

        self.b_c_init = constant_weight(shape=(self.n_hids, ),
                                        name=_p(self.pname, 'b_c_init'))
        self.params += [self.b_c_init]

        # we moved the parameters below here, to make it works for both with_context and with_attention modes
        # modification in this version
        # in the paper, e_{i,j} = a(s_{i-1}, h_j)
        # here, e_{i,j} = a(GRU(s_{i-1}, y_{i-1}), h_j), which considers the lastly generated target word
        # all the following parameters are for the introduced GRU
        # it is reasonable
        self.W_n1_h = ortho_weight(shape=shape_hh,
                                   name=_p(self.pname, 'W_n1_h'))
        self.W_n1_r = ortho_weight(shape=shape_hh,
                                   name=_p(self.pname, 'W_n1_r'))
        self.W_n1_z = ortho_weight(shape=shape_hh,
                                   name=_p(self.pname, 'W_n1_z'))
        self.b_n1_h = constant_weight(shape=(self.n_hids, ),
                                      name=_p(self.pname, 'b_n1_h'))
        self.b_n1_r = constant_weight(shape=(self.n_hids, ),
                                      name=_p(self.pname, 'b_n1_r'))
        self.b_n1_z = constant_weight(shape=(self.n_hids, ),
                                      name=_p(self.pname, 'b_n1_z'))
        self.params += [
            self.W_n1_h, self.W_n1_r, self.W_n1_z, self.b_n1_h, self.b_n1_r,
            self.b_n1_z
        ]
        ###############################################

        if self.with_attention:
            self.A_cp = norm_weight(shape=shape_ch,
                                    name=_p(self.pname, 'A_cp'))
            self.params += [self.A_cp]

            self.B_hp = norm_weight(shape=shape_hh,
                                    name=_p(self.pname, 'B_hp'))
            self.b_tt = constant_weight(shape=(self.n_hids, ),
                                        name=_p(self.pname, 'b_tt'))
            self.D_pe = norm_weight(shape=(self.n_hids, 1),
                                    name=_p(self.pname, 'D_pe'))
            # self.c_tt = constant_weight(shape=(1,), name=_p(self.pname, 'c_tt'))
            self.params += [self.B_hp, self.b_tt, self.D_pe]

        # for error on encoder states, we don't need the probability
        # thus no need for readout, which costs a large number of parameters
        # for readout
        n_out = self.n_in * self.maxout_part
        self.W_o_c = norm_weight(shape=(self.n_cdim, n_out),
                                 name=_p(self.pname, 'W_out_c'))
        self.W_o_h = norm_weight(shape=(self.n_hids, n_out),
                                 name=_p(self.pname, 'W_out_h'))
        self.W_o_e = norm_weight(shape=(self.n_in, n_out),
                                 name=_p(self.pname, 'W_out_e'))
        self.b_o = constant_weight(shape=(n_out, ),
                                   name=_p(self.pname, 'b_out_o'))

        self.params += [self.W_o_c, self.W_o_h, self.W_o_e, self.b_o]
Exemplo n.º 10
0
    def _init_params(self):
        # added by Zhaopeng Tu, 2016-07-12
        # this for combining lastly generated words and decoder state,
        shape_xh = (self.n_in, self.n_hids)
        shape_hh = (self.n_hids, self.n_hids)

        self.W_xz = norm_weight(rng=self.rng, shape=shape_xh, name=_p(self.pname, 'W_xz'))
        self.W_xr = norm_weight(rng=self.rng, shape=shape_xh, name=_p(self.pname, 'W_xr'))
        self.W_xh = norm_weight(rng=self.rng, shape=shape_xh, name=_p(self.pname, 'W_xh'))
        self.b_z = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_z'))
        self.b_r = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_r'))
        self.b_h = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_h'))
        self.W_hz = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_hz'))
        self.W_hr = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_hr'))
        self.W_hh = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_hh'))

        self.params = [self.W_xz, self.W_xr, self.W_xh,
                       self.W_hz, self.W_hr, self.W_hh,
                       self.b_z, self.b_r, self.b_h]

        shape_ch = (self.n_cdim, self.n_hids)
        self.W_cz = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_cz'))
        self.W_cr = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_cr'))
        self.W_ch = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_ch'))
        self.W_c_init = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'W_c_init'))
        # modified by Zhaopeng Tu, 2016-07-29
        # we don't add the new params if we use tied_weights, since we reuse the weights in decoder
        self.params += [self.W_cz, self.W_cr, self.W_ch, self.W_c_init]

        self.b_c_init = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_c_init'))
        self.params += [self.b_c_init]
        
        # modified by Zhaopeng Tu, 2016-06-08
        # we moved the parameters below here, to make it works for both with_context and with_attention modes
        # commented by Zhaopeng Tu, 2016-04-29
        # modification in this version
        # in the paper, e_{i,j} = a(s_{i-1}, h_j)
        # here, e_{i,j} = a(GRU(s_{i-1}, y_{i-1}), h_j), which considers the lastly generated target word
        # all the following parameters are for the introduced GRU
        # it is reasonable
        self.W_n1_h = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_n1_h'))
        self.W_n1_r = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_n1_r'))
        self.W_n1_z = ortho_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'W_n1_z'))
        self.b_n1_h = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_n1_h'))
        self.b_n1_r = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_n1_r'))
        self.b_n1_z = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_n1_z'))
        self.params += [self.W_n1_h, self.W_n1_r, self.W_n1_z, self.b_n1_h, self.b_n1_r, self.b_n1_z]
        ###############################################

        if self.with_attention:
            self.A_cp = norm_weight(rng=self.rng, shape=shape_ch, name=_p(self.pname, 'A_cp'))
            self.B_hp = norm_weight(rng=self.rng, shape=shape_hh, name=_p(self.pname, 'B_hp'))
            self.b_tt = constant_weight(shape=(self.n_hids, ), name=_p(self.pname, 'b_tt'))
            self.D_pe = norm_weight(rng=self.rng, shape=(self.n_hids, 1), name=_p(self.pname, 'D_pe'))
            self.c_tt = constant_weight(shape=(1, ), name=_p(self.pname, 'c_tt'))
            self.params += [self.A_cp, self.B_hp, self.b_tt, self.D_pe, self.c_tt]


        # for readout
        n_out = self.n_in * self.maxout_part
        self.W_o_c = norm_weight(rng=self.rng, shape=(self.n_cdim, n_out), name=_p(self.pname, 'W_out_c'))
        self.W_o_h = norm_weight(rng=self.rng, shape=(self.n_hids, n_out), name=_p(self.pname, 'W_out_h'))
        self.W_o_e = norm_weight(rng=self.rng, shape=(self.n_in, n_out), name=_p(self.pname, 'W_out_e'))
        self.b_o = constant_weight(shape=(n_out, ), name=_p(self.pname, 'b_out_o'))

        self.params += [self.W_o_c, self.W_o_h, self.W_o_e, self.b_o]