示例#1
0
def sg_rnn(tensor, opt):
    r"""Applies a simple rnn.
    
    Args:
      tensor: A 3-D `Tensor`.
      in_dim: A positive `integer`. The size of input dimension.
      dim: A positive `integer`. The size of output dimension.
      bias: Boolean. If True, biases are added.
      ln: Boolean. If True, layer normalization is applied.   
      init_state: A 2-D `Tensor`. If None, the initial state is set to zeros.
      last_only: Boolean. If True, the outputs in the last time step are returned.
    
    Returns:
      A `Tensor`. If last_only is False, the output tensor has shape [batch size, time steps, dim].
        If last_only is True, the shape will be [batch size, dim].
    """
    # layer normalization
    ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v

    # step function
    def step(h, x):
        # simple rnn
        ### Replace tensor[:, i, :] with x. bryan ###
        y = ln(
            tf.matmul(tensor[:, i, :], w) + tf.matmul(h, u) +
            (b if opt.bias else 0))
        return y

    # parameter initialize
    w = init.orthogonal('W', (opt.in_dim, opt.dim))
    u = init.identity('U', opt.dim)
    if opt.bias:
        b = init.constant('b', opt.dim)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = init.constant('beta', opt.dim)
        gamma = init.constant('gamma', opt.dim, value=1)

    # initial state
    init_h = opt.init_state if opt.init_state is not None \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # do rnn loop
    h, out = init_h, []
    for i in range(tensor.get_shape().as_list()[1]):
        # apply step func
        h = step(h, tensor[:, i, :])
        # save result
        out.append(h.sg_expand_dims(dim=1))

    # merge tensor
    if opt.last_only:
        out = out[-1].sg_squeeze(dim=1)
    else:
        out = tf.concat(1, out)

    return out
示例#2
0
def sg_rnn(tensor, opt):

    # parameter initialize
    w = init.orthogonal('W', (opt.in_dim, opt.dim))
    u = init.identity('U', opt.dim)
    if opt.bias:
        b = init.constant('b', opt.dim)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = init.constant('beta', opt.dim)
        gamma = init.constant('gamma', opt.dim, value=1)

    # initial state
    init_h = opt.init_state if opt.init_state \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # permute dimension for scan loop
    xx = tf.transpose(tensor, [1, 0, 2])

    # step func
    def step(h, x):

        # layer normalization
        def ln(xx, opt):
            if opt.ln:
                # calc layer mean, variance for final axis
                mean, variance = tf.nn.moments(xx, axes=[len(xx.get_shape()) - 1])

                # apply layer normalization ( explicit broadcasting needed )
                broadcast_shape = [-1] + [1] * (len(xx.get_shape()) - 1)
                xx = (xx - tf.reshape(mean, broadcast_shape)) \
                         / tf.reshape(tf.sqrt(variance + tf.sg_eps), broadcast_shape)

                # apply parameter
                return gamma * xx + beta

        # apply transform
        y = ln(tf.matmul(x, w) + tf.matmul(h, u) + (b if opt.bias else 0), opt)

        return y

    # loop by scan
    out = tf.scan(step, xx, init_h)

    # recover dimension
    out = tf.transpose(out, [1, 0, 2])

    # last sequence only
    if opt.last_only:
        out = out[:, tensor.get_shape().as_list()[1]-1, :]

    return out
示例#3
0
def sg_rnn(tensor, opt):

    # layer normalization
    ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v

    # step function
    def step(h, x):
        # simple rnn
        y = ln(
            tf.matmul(tensor[:, i, :], w) + tf.matmul(h, u) +
            (b if opt.bias else 0))
        return y

    # parameter initialize
    w = init.orthogonal('W', (opt.in_dim, opt.dim))
    u = init.identity('U', opt.dim)
    if opt.bias:
        b = init.constant('b', opt.dim)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = init.constant('beta', opt.dim)
        gamma = init.constant('gamma', opt.dim, value=1)

    # initial state
    init_h = opt.init_state if opt.init_state is not None \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # do rnn loop
    h, out = init_h, []
    for i in range(tensor.get_shape().as_list()[1]):
        # apply step func
        h = step(h, tensor[:, i, :])
        # save result
        out.append(h.sg_expand_dims(dim=1))

    # merge tensor
    if opt.last_only:
        out = out[-1].sg_squeeze(dim=1)
    else:
        out = tf.concat(1, out)

    return out
示例#4
0
def sg_lstm(tensor, opt):
    r"""Applies an LSTM.

    Args:
      tensor: A 3-D `Tensor`.
      in_dim: A positive `integer`. The size of input dimension.
      dim: A positive `integer`. The size of output dimension.
      bias: Boolean. If True, biases are added.
      ln: Boolean. If True, layer normalization is applied.   
      init_state: A 2-D `Tensor`. If None, the initial state is set to zeros.
      last_only: Boolean. If True, the outputs in the last time step are returned.
    
    Returns:
      A `Tensor`. If last_only is False, the output tensor has shape [batch size, time steps, dim].
        If last_only is True, the shape will be [batch size, dim].
    """
    # layer normalization
    ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v

    # step func
    def step(h, c, x):
        # forget gate
        f = tf.sigmoid(
            ln(
                tf.matmul(x, w_f) + tf.matmul(h, u_f) +
                (b_f if opt.bias else 0)))
        # input gate
        i = tf.sigmoid(
            ln(
                tf.matmul(x, w_i) + tf.matmul(h, u_i) +
                (b_i if opt.bias else 0)))
        # new cell value
        cc = tf.tanh(
            ln(
                tf.matmul(x, w_c) + tf.matmul(h, u_c) +
                (b_c if opt.bias else 0)))
        # out gate
        o = tf.sigmoid(
            ln(
                tf.matmul(x, w_o) + tf.matmul(h, u_o) +
                (b_o if opt.bias else 0)))
        # cell update
        cell = f * c + i * cc
        # final output
        y = o * tf.tanh(cell)
        return y, cell

    # parameter initialize
    w_i = init.orthogonal('W_i', (opt.in_dim, opt.dim))
    u_i = init.identity('U_i', opt.dim)
    w_f = init.orthogonal('W_f', (opt.in_dim, opt.dim))
    u_f = init.identity('U_f', opt.dim)
    w_o = init.orthogonal('W_o', (opt.in_dim, opt.dim))
    u_o = init.identity('U_o', opt.dim)
    w_c = init.orthogonal('W_c', (opt.in_dim, opt.dim))
    u_c = init.identity('U_c', opt.dim)
    if opt.bias:
        b_i = init.constant('b_i', opt.dim)
        b_f = init.constant('b_f', opt.dim)
        b_o = init.constant('b_o', opt.dim, value=1)
        b_c = init.constant('b_c', opt.dim)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = init.constant('beta', opt.dim)
        gamma = init.constant('gamma', opt.dim, value=1)

    # initial state
    init_h = opt.init_state if opt.init_state is not None \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # do rnn loop
    h, c, out = init_h, init_h, []
    for i in range(tensor.get_shape().as_list()[1]):
        # apply step function
        h, c = step(h, c, tensor[:, i, :])
        # save result
        out.append(h.sg_expand_dims(dim=1))

    # merge tensor
    if opt.last_only:
        out = out[-1].sg_squeeze(dim=1)
    else:
        out = tf.concat(1, out)

    return out
示例#5
0
def sg_gru(tensor, opt):
    r"""Applies a GRU.
    
    Args:
      tensor: A 3-D `Tensor`.
      in_dim: A positive `integer`. The size of input dimension.
      dim: A positive `integer`. The size of output dimension.
      bias: Boolean. If True, biases are added.
      ln: Boolean. If True, layer normalization is applied.   
      init_state: A 2-D `Tensor`. If None, the initial state is set to zeros.
      last_only: Boolean. If True, the outputs in the last time step are returned.
    
    Returns:
      A `Tensor`. If last_only is False, the output tensor has shape [batch size, time steps, dim].
        If last_only is True, the shape will be [batch size, dim].
    """

    # layer normalization
    ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v

    # step func
    def step(h, x):
        # update gate
        z = tf.sigmoid(
            ln(
                tf.matmul(x, w_z) + tf.matmul(h, u_z) +
                (b_z if opt.bias else 0)))
        # reset gate
        r = tf.sigmoid(
            ln(
                tf.matmul(x, w_r) + tf.matmul(h, u_r) +
                (b_r if opt.bias else 0)))
        # h_hat
        hh = tf.tanh(
            ln(
                tf.matmul(x, w_h) + tf.matmul(r * h, u_h) +
                (b_h if opt.bias else 0)))
        # final output
        y = (1. - z) * h + z * hh
        return y

    # parameter initialize
    w_z = init.orthogonal('W_z', (opt.in_dim, opt.dim))
    u_z = init.identity('U_z', opt.dim)
    w_r = init.orthogonal('W_r', (opt.in_dim, opt.dim))
    u_r = init.identity('U_r', opt.dim)
    w_h = init.orthogonal('W_h', (opt.in_dim, opt.dim))
    u_h = init.identity('U_h', opt.dim)
    if opt.bias:
        b_z = init.constant('b_z', opt.dim)
        b_r = init.constant('b_r', opt.dim)
        b_h = init.constant('b_h', opt.dim)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = init.constant('beta', opt.dim)
        gamma = init.constant('gamma', opt.dim, value=1)

    # initial state
    init_h = opt.init_state if opt.init_state is not None \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # do rnn loop
    h, out = init_h, []
    for i in range(tensor.get_shape().as_list()[1]):
        # apply step function
        h = step(h, tensor[:, i, :])
        # save result
        out.append(h.sg_expand_dims(dim=1))

    # merge tensor
    if opt.last_only:
        out = out[-1].sg_squeeze(dim=1)
    else:
        out = tf.concat(1, out)

    return out
示例#6
0
def sg_gru(tensor, opt):

    # parameter initialize
    w_z = init.orthogonal('W_z', (opt.in_dim, opt.dim))
    u_z = init.identity('U_z', opt.dim)
    w_r = init.orthogonal('W_r', (opt.in_dim, opt.dim))
    u_r = init.identity('U_r', opt.dim)
    w_h = init.orthogonal('W_h', (opt.in_dim, opt.dim))
    u_h = init.identity('U_h', opt.dim)
    if opt.bias:
        b_z = init.constant('b_z', opt.dim)
        b_r = init.constant('b_r', opt.dim)
        b_h = init.constant('b_h', opt.dim)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = init.constant('beta', opt.dim)
        gamma = init.constant('gamma', opt.dim, value=1)

    # initial state
    init_h = opt.init_state if opt.init_state \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # permute dimension for scan loop
    xx = tf.transpose(tensor, [1, 0, 2])

    # step func
    def step(h, x):

        # layer normalization
        def ln(xx, opt):
            if opt.ln:
                # calc layer mean, variance for final axis
                mean, variance = tf.nn.moments(xx, axes=[len(xx.get_shape()) - 1])

                # apply layer normalization ( explicit broadcasting needed )
                broadcast_shape = [-1] + [1] * (len(xx.get_shape()) - 1)
                xx = (xx - tf.reshape(mean, broadcast_shape)) \
                         / tf.reshape(tf.sqrt(variance + tf.sg_eps), broadcast_shape)

                # apply parameter
                return gamma * xx + beta

        # update gate
        z = tf.sigmoid(ln(tf.matmul(x, w_z) + tf.matmul(h, u_z) + (b_z if opt.bias else 0), opt))
        # reset gate
        r = tf.sigmoid(ln(tf.matmul(x, w_r) + tf.matmul(h, u_r) + (b_r if opt.bias else 0), opt))
        # h_hat
        hh = tf.sigmoid(ln(tf.matmul(x, w_h) + tf.matmul(r*h, u_h) + (b_h if opt.bias else 0), opt))
        # final output
        y = (1. - z) * h + z * hh

        return y

    # loop by scan
    out = tf.scan(step, xx, init_h)

    # recover dimension
    out = tf.transpose(out, [1, 0, 2])

    # last sequence only
    if opt.last_only:
        out = out[:, tensor.get_shape().as_list()[1]-1, :]

    return out
示例#7
0
def sg_lstm(tensor, opt):

    # layer normalization
    ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v

    # step func
    def step(h, c, x):
        # forget gate
        f = tf.sigmoid(
            ln(
                tf.matmul(x, w_f) + tf.matmul(h, u_f) +
                (b_f if opt.bias else 0)))
        # input gate
        i = tf.sigmoid(
            ln(
                tf.matmul(x, w_i) + tf.matmul(h, u_i) +
                (b_i if opt.bias else 0)))
        # new cell value
        cc = tf.tanh(
            ln(
                tf.matmul(x, w_c) + tf.matmul(h, u_c) +
                (b_c if opt.bias else 0)))
        # out gate
        o = tf.sigmoid(
            ln(
                tf.matmul(x, w_o) + tf.matmul(h, u_o) +
                (b_o if opt.bias else 0)))
        # cell update
        cell = f * c + i * cc
        # final output
        y = o * tf.tanh(cell)
        return y, cell

    # parameter initialize
    w_i = init.orthogonal('W_i', (opt.in_dim, opt.dim))
    u_i = init.identity('U_i', opt.dim)
    w_f = init.orthogonal('W_f', (opt.in_dim, opt.dim))
    u_f = init.identity('U_f', opt.dim)
    w_o = init.orthogonal('W_o', (opt.in_dim, opt.dim))
    u_o = init.identity('U_o', opt.dim)
    w_c = init.orthogonal('W_c', (opt.in_dim, opt.dim))
    u_c = init.identity('U_c', opt.dim)
    if opt.bias:
        b_i = init.constant('b_i', opt.dim)
        b_f = init.constant('b_f', opt.dim)
        b_o = init.constant('b_o', opt.dim, value=1)
        b_c = init.constant('b_c', opt.dim)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = init.constant('beta', opt.dim)
        gamma = init.constant('gamma', opt.dim, value=1)

    # initial state
    init_h = opt.init_state if opt.init_state is not None \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # do rnn loop
    h, c, out = init_h, init_h, []
    for i in range(tensor.get_shape().as_list()[1]):
        # apply step function
        h, c = step(h, c, tensor[:, i, :])
        # save result
        out.append(h.sg_expand_dims(dim=1))

    # merge tensor
    if opt.last_only:
        out = out[-1].sg_squeeze(dim=1)
    else:
        out = tf.concat(1, out)

    return out
示例#8
0
def sg_gru(tensor, opt):

    # layer normalization
    ln = lambda v: _ln_rnn(v, gamma, beta) if opt.ln else v

    # step func
    def step(h, x):
        # update gate
        z = tf.sigmoid(
            ln(
                tf.matmul(x, w_z) + tf.matmul(h, u_z) +
                (b_z if opt.bias else 0)))
        # reset gate
        r = tf.sigmoid(
            ln(
                tf.matmul(x, w_r) + tf.matmul(h, u_r) +
                (b_r if opt.bias else 0)))
        # h_hat
        hh = tf.tanh(
            ln(
                tf.matmul(x, w_h) + tf.matmul(r * h, u_h) +
                (b_h if opt.bias else 0)))
        # final output
        y = (1. - z) * h + z * hh
        return y

    # parameter initialize
    w_z = init.orthogonal('W_z', (opt.in_dim, opt.dim))
    u_z = init.identity('U_z', opt.dim)
    w_r = init.orthogonal('W_r', (opt.in_dim, opt.dim))
    u_r = init.identity('U_r', opt.dim)
    w_h = init.orthogonal('W_h', (opt.in_dim, opt.dim))
    u_h = init.identity('U_h', opt.dim)
    if opt.bias:
        b_z = init.constant('b_z', opt.dim)
        b_r = init.constant('b_r', opt.dim)
        b_h = init.constant('b_h', opt.dim)

    # layer normalization parameters
    if opt.ln:
        # offset, scale parameter
        beta = init.constant('beta', opt.dim)
        gamma = init.constant('gamma', opt.dim, value=1)

    # initial state
    init_h = opt.init_state if opt.init_state is not None \
        else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx)

    # do rnn loop
    h, out = init_h, []
    for i in range(tensor.get_shape().as_list()[1]):
        # apply step function
        h = step(h, tensor[:, i, :])
        # save result
        out.append(h.sg_expand_dims(dim=1))

    # merge tensor
    if opt.last_only:
        out = out[-1].sg_squeeze(dim=1)
    else:
        out = tf.concat(1, out)

    return out