示例#1
0
    def __init__(self, inpt, nin, nunits, conv_sz=1,
                 learn_init_state=True):
        # inpt is transposed a priori
        tablet_wd, _ = inpt.shape
        if conv_sz > 1:
            inpt_clipped = inpt[:conv_sz * (tablet_wd // conv_sz), :]
            inpt_conv = inpt_clipped.reshape(
                (tablet_wd // conv_sz, nin * conv_sz))
        else:
            inpt_conv = inpt

        wio = share(init_wts(nin * conv_sz, nunits))  # input to output
        woo = share(init_wts(nunits, nunits))  # output to output
        bo = share(init_wts(nunits))
        h0 = share(init_wts(nunits))

        def step(in_t, out_tm1):
            return tt.tanh(tt.dot(out_tm1, woo) + tt.dot(in_t, wio) + bo)

        self.output, _ = theano.scan(
            step,
            sequences=[inpt_conv],
            outputs_info=[h0]
        )

        self.params = [wio, woo, bo]
        if learn_init_state:
            self.params += [h0]
        self.nout = nunits
示例#2
0
文件: lstm.py 项目: XCHYang/rnn_ctc
    def __init__(self, inpt,
                 nin, nunits,
                 forget=False,
                 actvn_pre='tanh',
                 actvn_post='linear',
                 learn_init_states=True):
        """
        Init
        :param inpt: Lower layer's excitation.
        :param nin: Dimension of lower layer.
        :param nunits: Number of units.
        :param forget: Want a seperate forget gate (or use 1-input)?
        :param actvn_pre: Activation applied to new candidate for cell value.
        :param actvn_post: Activation applied to cell value before output.
        :param learn_init_states: Should the intial states be learnt?
        :return: Output
        """
        # TODO: Incell connections

        num_activations = 3 + forget
        w = stacked_wts(nin, nunits, num_activations)
        u = stacked_wts(nunits, nunits, num_activations)
        b = share(np.zeros(num_activations * nunits))
        out0 = share(np.zeros(nunits))
        cell0 = share(np.zeros(nunits))

        actvn_pre = activation_by_name(actvn_pre)
        actvn_post = activation_by_name(actvn_post)

        def step(in_t, out_tm1, cell_tm1):
            """
            Scan function.
            :param in_t: Current input from bottom layer
            :param out_tm1: Prev output of LSTM layer
            :param cell_tm1: Prev cell value
            :return: Current output and cell value
            """
            tmp = tt.dot(out_tm1, u) + in_t

            inn_gate = sigmoid(tmp[:nunits])
            out_gate = sigmoid(tmp[nunits:2 * nunits])
            fgt_gate = sigmoid(
                tmp[2 * nunits:3 * nunits]) if forget else 1 - inn_gate

            cell_val = actvn_pre(tmp[-nunits:])
            cell_val = fgt_gate * cell_tm1 + inn_gate * cell_val
            out = out_gate * actvn_post(cell_val)

            return out, cell_val

        inpt = tt.dot(inpt, w) + b
        # seqlen x nin * nin x 3*nout + 3 * nout  = seqlen x 3*nout

        rval, updates = th.scan(step,
                                sequences=[inpt],
                                outputs_info=[out0, cell0], )

        self.output = rval[0]
        self.params = [w, u, b]
        if learn_init_states:
            self.params += [out0, cell0]
        self.nout = nunits
示例#3
0
文件: lstm.py 项目: XCHYang/rnn_ctc
def stacked_wts(n, m, copies, name=None):
    return share(
        np.hstack([orthonormal_wts(n, m) for _ in range(copies)]),
        name=name)
示例#4
0
文件: ctc.py 项目: XCHYang/rnn_ctc
 def __init__(self, inpt, in_sz, n_classes, ):
     b = share(init_wts(n_classes))
     w = share(init_wts(in_sz, n_classes))
     self.output = tt.nnet.softmax(tt.dot(inpt, w) + b)
     self.params = [w, b]