示例#1
0
    def __init__(self,
                 in_dim,
                 dim,
                 forget_bias=1.0,
                 activation=tf.tanh,
                 ln=True,
                 bias=True,
                 dtype=tf.float32,
                 dev='/cpu:0',
                 batch_size=3):

        self._in_dim = in_dim
        self._dim = dim
        self._forget_bias = forget_bias
        self._activation = activation
        self._ln = False
        self._bias = bias
        self._dev = dev
        self._size = self._in_dim * self._dim
        self._initializer = tf.contrib.layers.xavier_initializer(
        )  #tf.random_normal_initializer()
        self._dtype = dtype

        with tf.device(self._dev):
            with tf.variable_scope("lstm") as scp:
                #self.rnn_state = tf.get_variable("rnn_c",(batch_size, self._dim), dtype=tf.sg_floatx,initializer=tf.constant_initializer(0.0),trainable=False)
                #self.rnn_h = tf.get_variable("rnn_h",(batch_size, self._dim), dtype=tf.sg_floatx,initializer=tf.constant_initializer(0.0),trainable=False)
                self.rnn_state, self.rnn_h = tf.zeros(
                    (batch_size, self._dim), dtype=tf.sg_floatx), tf.zeros(
                        (batch_size, self._dim), dtype=tf.sg_floatx)
                w_i2h = tf.get_variable(
                    'w_i2h', (self._in_dim, 4 * self._dim),
                    dtype=tf.float32,
                    initializer=tf.contrib.layers.xavier_initializer(),
                    trainable=True)
                w_h2h = tf.get_variable(
                    'w_h2h', (self._dim, 4 * self._dim),
                    dtype=tf.float32,
                    initializer=tf.contrib.layers.xavier_initializer(),
                    trainable=True)
                w_b = tf.get_variable(
                    'w_b', (1, 4 * self._dim),
                    dtype=tf.float32,
                    initializer=tf.contrib.layers.xavier_initializer(),
                    trainable=True) if self._bias == True else 0.0
                if self._ln:
                    with tf.variable_scope("ln_rnn"):
                        beta = tf.get_variable(
                            'beta',
                            self._dim,
                            dtype=tf.sg_floatx,
                            initializer=tf.constant_initializer(0.0),
                            trainable=True)
                        gamma = tf.get_variable(
                            'gamma',
                            self._dim,
                            dtype=tf.sg_floatx,
                            initializer=tf.constant_initializer(1.0),
                            trainable=True)
示例#2
0
def sg_reuse(tensor, **opt):
    opt = tf.sg_opt(opt)
    assert hasattr(tensor, '_sugar'), 'cannot reuse this node.'
    assert opt.input is not None, 'input is mandatory.'

    # get all nodes in this graph
    nodes, prev = [tensor], tensor._sugar.prev
    while prev is not None:
        nodes = [prev] + nodes
        prev = prev._sugar.prev if hasattr(prev, '_sugar') else None

    # create graph again for this input
    out = opt.input
    for node in nodes[1:]:  # exclude head node
        if node._sugar.is_layer:
            fn = tf.sg_layer_func(node._sugar.func)
            if node._sugar.arg.context_name:
                with tf.variable_scope(node._sugar.arg.context_name):
                    out = fn(
                        out,
                        **(node._sugar.arg +
                           tf.sg_opt(name=node._sugar.name, reuse=True)))
            else:
                out = fn(
                    out,
                    **(node._sugar.arg +
                       tf.sg_opt(name=node._sugar.name, reuse=True)))
        else:
            out = node._sugar.func(out, node._sugar.arg)

    return out
def linear(input_, output_size, scope=None):
    '''
    Linear map: output[k] = sum_i(Matrix[k, i] * args[i] ) + Bias[k]
    Args:
        args: a tensor or a list of 2D, batch x n, Tensors.
    output_size: int, second dimension of W[i].
    scope: VariableScope for the created subgraph; defaults to "Linear".
  Returns:
    A 2D Tensor with shape [batch x output_size] equal to
    sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
  Raises:
    ValueError: if some of the arguments has unspecified or wrong shape.
  '''

    shape = input_.get_shape().as_list()
    if len(shape) != 2:
        raise ValueError("Linear is expecting 2D arguments: %s" % str(shape))
    if not shape[1]:
        raise ValueError("Linear expects shape[1] of arguments: %s" %
                         str(shape))
    input_size = shape[1]

    # Now the computation.
    with tf.variable_scope(scope or "SimpleLinear"):
        matrix = tf.get_variable("Matrix", [output_size, input_size],
                                 dtype=input_.dtype)
        bias_term = tf.get_variable("Bias", [output_size], dtype=input_.dtype)

    return tf.matmul(input_, tf.transpose(matrix)) + bias_term
        def symbols_to_logits_fn(ids, dec_state):
            dec = []
            dec_c, dec_h = [], []
            # (batch x beam_size x decoded_seq)
            ids = tf.reshape(ids, [Hp.batch_size, beam_size, -1])
            print("dec_state ", dec_state[0].get_shape().as_list())
            for ind in range(beam_size):
                with tf.variable_scope('dec_lstm', reuse=ind > 0
                                       or reuse_vars):
                    w_input = ids[:, ind, -1].sg_lookup(emb=emb_word)
                    dec_state0 = tf.contrib.rnn.LSTMStateTuple(
                        c=dec_state.c[:, ind, :], h=dec_state.h[:, ind, :])
                    dec_out, dec_state_i = dec_cell(w_input, dec_state0)
                    dec_out = tf.expand_dims(dec_out, 1)
                dec_i = dec_out.sg_conv1d_gpus(size=1,
                                               dim=Hp.word_vs,
                                               name="out_conv",
                                               act="linear",
                                               dev=dev,
                                               reuse=ind > 0 or reuse_vars)

                dec.append(tf.squeeze(dec_i, 1))
                dec_c.append(dec_state_i[0])
                dec_h.append(dec_state_i[1])
            return tf.stack(dec, 1), tf.contrib.rnn.LSTMStateTuple(
                tf.stack(dec_c, 1), tf.stack(dec_h, 1))
示例#5
0
    def __call__(self, x_t, state, size, scope=None, reuse_vars=False):

        (prev_c, prev_h) = state
        scope = scope or tf.get_variable_scope()
        print("____reuse_______", reuse_vars)
        with tf.variable_scope(scope, reuse=True):
            w_ic = tf.get_variable("w_ic")
            w_fc = tf.get_variable("w_fc")
            w_oc = tf.get_variable("w_oc")

        with tf.sg_context(dev=self._dev, reuse=reuse_vars):
            i = x_t.sg_conv1d_gpus(name = "ix_",size=size)+\
            prev_h.sg_conv1d_gpus(name = "ih_",size=size)+\
            prev_c*w_ic

            f = x_t.sg_aconv1d_gpus(name = "fx_",size=size)+\
            prev_h.sg_aconv1d_gpus(name = "fh_",size=size)+\
            prev_c*w_fc

            c = x_t.sg_conv1d_gpus(name = "cx_",size=size)+\
            prev_h.sg_conv1d_gpus(name = "ch_",size=size)

            o = x_t.sg_conv1d_gpus(name = "ox_",size=size)+\
            prev_h.sg_conv1d_gpus(name = "oh_",size=size)+\
            prev_c*w_oc

        new_c = prev_c * tf.sigmoid(f) + tf.sigmoid(i) * self._activation(c)
        new_h = self._activation(new_c) * tf.sigmoid(o)

        return (new_c, new_h)
def get_loss(opt):

    # encode audio feature
    with tf.variable_scope("model"):
        logit_clean = get_logit(opt.input[opt.gpu_index], voca_size=voca_size)
        loss_clean = logit_clean.sg_ctc(target=opt.target[opt.gpu_index],
                                        seq_len=opt.seq_len[opt.gpu_index])
    with tf.variable_scope("model", reuse=True):
        logit_noise = get_logit(opt.input_noise[opt.gpu_index],
                                voca_size=voca_size)
        loss_noise = logit_noise.sg_ctc(target=opt.target[opt.gpu_index],
                                        seq_len=opt.seq_len[opt.gpu_index])
    # CTC loss
    loss_penalize = penalize_loss(opt.gamma, opt.lambd, logit_clean,
                                  logit_noise)

    return loss_clean + opt.alpha * loss_noise + loss_penalize
示例#7
0
def get_logit(x, voca_size):
    with tf.variable_scope('wavenet', reuse=tf.AUTO_REUSE):
        # residual block
        def res_block(tensor, size, rate, block, dim=num_dim):

            with tf.sg_context(name='block_%d_%d' % (block, rate)):

                # filter convolution
                conv_filter = tensor.sg_aconv1d(size=size,
                                                rate=rate,
                                                act='tanh',
                                                bn=True,
                                                name='conv_filter')

                # gate convolution
                conv_gate = tensor.sg_aconv1d(size=size,
                                              rate=rate,
                                              act='sigmoid',
                                              bn=True,
                                              name='conv_gate')

                # output by gate multiplying
                out = conv_filter * conv_gate

                # final output
                out = out.sg_conv1d(size=1,
                                    dim=dim,
                                    act='tanh',
                                    bn=True,
                                    name='conv_out')

                # residual and skip output
                return out + tensor, out

        # expand dimension
        with tf.sg_context(name='front'):
            z = x.sg_conv1d(size=1,
                            dim=num_dim,
                            act='tanh',
                            bn=True,
                            name='conv_in')

        # dilated conv block loop
        skip = 0  # skip connections
        for i in range(num_blocks):
            for r in [1, 2, 4, 8, 16]:
                z, s = res_block(z, size=7, rate=r, block=i)
                skip += s

        # final logit layers
        with tf.sg_context(name='logit'):
            logit = (skip.sg_conv1d(size=1, act='tanh', bn=True,
                                    name='conv_1').sg_conv1d(size=1,
                                                             dim=voca_size,
                                                             name='conv_2'))

        return logit
示例#8
0
 def _linear(self, arys):
     scope = tf.get_variable_scope()
     with tf.variable_scope(scope, reuse=True):
         w_i2h = tf.get_variable("w_i2h")
         w_h2h = tf.get_variable("w_h2h")
         w_b = tf.get_variable("w_b") if self._bias == True else 0
     i2h = tf.matmul(arys[0], w_i2h)
     h2h = tf.matmul(arys[1], w_h2h)
     out = i2h + h2h + w_b
     return out
示例#9
0
 def embed(inputs, vocab_size, embed_size, variable_scope):
     '''
     inputs = tf.expand_dims(tf.range(5), 0) => (1, 5)
     _embed(inputs, 5, 10) => (1, 5, 10)
     '''
     with tf.variable_scope(variable_scope):
         lookup_table = tf.get_variable('lookup_table', 
                                        dtype=tf.float32, 
                                        shape=[vocab_size, embed_size],
                                        initializer=tf.truncated_normal_initializer())
     return tf.nn.embedding_lookup(lookup_table, inputs)
示例#10
0
def sg_context(**kwargs):
    global _context
    # set options when enter
    _context = tf.sg_opt(kwargs)
    if _context.name:
        _context.context_name = _context.name
        _context.name = None
        with tf.variable_scope(_context.context_name):
            yield
    else:
        yield
    # clear options when exit
    _context = tf.sg_opt()
示例#11
0
    def __init__(self,
                 seqlen,
                 in_dim,
                 dim,
                 forget_bias=1.0,
                 activation=tf.tanh,
                 ln=True,
                 bias=True,
                 dtype=tf.float32,
                 dev='/cpu:0',
                 batch_size=3):

        self._in_dim = in_dim
        self._dim = dim
        self._forget_bias = forget_bias
        self._activation = activation
        self._ln = ln
        self._dev = dev
        self._seqlen = seqlen
        self._bias = bias
        self._size = int(self._in_dim * self._dim)
        self._initializer = tf.contrib.layers.xavier_initializer(
        )  #tf.random_normal_initializer()
        self._dtype = dtype

        with tf.device(self._dev):
            with tf.variable_scope("clstm") as scp:
                #self.crnn_state = tf.get_variable("crnn_c",(batch_size, seqlen, self._dim), dtype=tf.sg_floatx,initializer=tf.constant_initializer(0.0),trainable=False)
                #self.crnn_h = tf.get_variable("crnn_h",(batch_size, seqlen, self._dim), dtype=tf.sg_floatx,initializer=tf.constant_initializer(0.0),trainable=False)

                w_ic = tf.get_variable(
                    'w_ic', (self._seqlen, self._dim),
                    dtype=tf.float32,
                    initializer=tf.contrib.layers.xavier_initializer(),
                    trainable=True)
                w_fc = tf.get_variable(
                    'w_fc', (self._seqlen, self._dim),
                    dtype=tf.float32,
                    initializer=tf.contrib.layers.xavier_initializer(),
                    trainable=True)
                w_oc = tf.get_variable(
                    'w_oc', (self._seqlen, self._dim),
                    dtype=tf.float32,
                    initializer=tf.contrib.layers.xavier_initializer(),
                    trainable=True)

                self.make_states(batch_size)
示例#12
0
def sg_context(**kwargs):
    r"""Context helper for computational graph building.
    Makes all elements within the with Block share the parameters.

    For example, in the following example, the default value of parameter `bn` will be set to True
    in the all layers within the with block.
    
    ```
    with tf.sg_context(bn=True):
        ...
        ...
    ```

    Args:
      **kwargs:
        in_dim: An integer. The size of input dimension, which is set to the last one by default.
        dim: An integer. The size of output dimension. Has the same value as in_dim by default.
        bn: Boolean. If True, batch normalization is applied.
        ln: Boolean. If True, layer normalization is applied.
        dout: A float of range [0, 100). A dropout rate. Default is 0..
        bias: Boolean. If True (Default), biases are added.
        name: A name for the layer. By default, the function name is assigned.
        act: A name of activation function. e.g., `sigmoid`, `tanh`, etc.
        reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope
          as well as all sub-scopes; if `None`, we just inherit the parent scope reuse.

    Returns:
      None
    """
    global _context

    # set options when enter
    context_now = tf.sg_opt(kwargs)
    _context += [context_now]

    # if named context
    if context_now.name:
        context_now.scope_name = context_now.name
        context_now.name = None
        with tf.variable_scope(context_now.scope_name):
            yield
    else:
        yield

    # clear options when exit
    del _context[-1]
示例#13
0
    def __call__(self, tensor, state, scope=None):
        (prev_c, prev_h) = state
        # i = input_gate, c = new cell value for update, f = forget_gate, o = output_gate
        lstm_matrix = self._linear([tensor, prev_h])
        i, c, f, o = tf.split(value=lstm_matrix, num_or_size_splits=4, axis=1)
        if self._ln:
            with tf.variable_scope("ln_rnn", reuse=True):
                beta = tf.get_variable('beta')
                gamma = tf.get_variable('gamma')

        ln = lambda v: _ln_rnn(v, gamma, beta) if self._ln else v

        # do rnn loop
        new_c = prev_c * tf.sigmoid(ln(f)) + tf.sigmoid(
            ln(i)) * self._activation(ln(c))
        new_h = self._activation(new_c) * tf.sigmoid(ln(o))

        return (new_c, new_h)
示例#14
0
def sg_reuse(tensor, **opt):
    r""" Reconstruct computational graph of `tensor` so all the parameters 
    can be reused and replace its input tensor with `opt.input`.

    Args:
      tensor: A `Tensor` (automatically given by chaining).
      **opt:
        input: A `Tensor` that will replace the original input tensor.

    Returns:
      Reconstructed tensor nodes.
    """
    opt = tf.sg_opt(opt)
    assert hasattr(tensor, '_sugar'), 'cannot reuse this node.'
    assert opt.input is not None, 'input is mandatory.'

    # get all nodes in this graph
    nodes, prev = [tensor], tensor._sugar.prev
    while prev is not None:
        nodes = [prev] + nodes
        prev = prev._sugar.prev if hasattr(prev, '_sugar') else None

    # create graph again for this input
    out = opt.input
    for node in nodes[1:]:  # exclude head node
        if node._sugar.is_layer:
            fn = tf.sg_layer_func(node._sugar.func)
            if node._sugar.arg.context_name:
                with tf.variable_scope(node._sugar.arg.context_name):
                    out = fn(
                        out,
                        **(node._sugar.arg +
                           tf.sg_opt(name=node._sugar.name, reuse=True)))
            else:
                out = fn(
                    out,
                    **(node._sugar.arg +
                       tf.sg_opt(name=node._sugar.name, reuse=True)))
        else:
            out = node._sugar.func(out, node._sugar.arg)

    return out
def highway(input_,
            size,
            num_layers=1,
            bias=-2.0,
            f=tf.nn.relu,
            scope='Highway'):
    """Highway Network (cf. http://arxiv.org/abs/1505.00387).
    t = sigmoid(Wy + b)
    z = t * g(Wy + b) + (1 - t) * y
    where g is nonlinearity, t is transform gate, and (1 - t) is carry gate.
    """

    with tf.variable_scope(scope):
        for idx in range(num_layers):
            g = f(linear(input_, size, scope='highway_lin_%d' % idx))

            t = tf.sigmoid(
                linear(input_, size, scope='highway_gate_%d' % idx) + bias)

            output = t * g + (1. - t) * input_
            input_ = output

    return output
示例#16
0
    def wrapper(tensor, **kwargs):

        # kwargs parsing
        opt = tf.sg_opt(kwargs) + _context

        # set default argument
        try:
            shape = tensor.get_shape().as_list()
            # dropout off
            opt += tf.sg_opt(shape=shape,
                             in_dim=shape[-1],
                             dim=shape[-1],
                             dout=0)
            # disable bias when normalization on
            opt += tf.sg_opt(bias=not opt.ln)
        finally:
            pass

        # automatic layer naming
        if opt.name is None:

            # layer function name will be used as layer name
            opt.name = func.__name__.replace('sg_', '')

            # find existing layer names
            exist_layers = []
            for t in tf.get_collection(tf.GraphKeys.VARIABLES):
                scope_name = tf.get_variable_scope().name
                prefix = scope_name + '/' if len(scope_name) > 0 else ''
                i = t.name.rfind(prefix + 'layers/' + opt.name)
                if i >= 0:
                    exist_layers.append(t.name[i:].split('/')[-2])
            exist_layers = list(set(exist_layers))

            # layer name numbering
            if len(exist_layers) == 0:
                opt.name += '_1'
            else:
                opt.name += '_%d' % (
                    max([int(n.split('_')[-1]) for n in exist_layers]) + 1)

        # all layer variables start with 'layers/' prefix
        with tf.variable_scope('layers', reuse=opt.reuse):

            with tf.variable_scope(opt.name):

                # call layer function
                out = func(tensor, opt)

                # apply dropout
                if opt.dout:
                    out = tf.cond(_phase,
                                  lambda: tf.nn.dropout(out, 1 - opt.dout),
                                  lambda: out)

                # rename tensor
                out = tf.identity(out, 'out')

                # add final output summary
                if opt.reuse is None or not opt.reuse:
                    tf.sg_summary_activation(out)

                # save node info for reuse
                out._sugar = tf.sg_opt(func=func,
                                       arg=tf.sg_opt(kwargs) + _context,
                                       prev=tensor,
                                       is_layer=True,
                                       name=opt.name)
                # inject reuse function
                out.sg_reuse = types.MethodType(sg_reuse, out)

        return out
示例#17
0
    def wrapper(tensor, **kwargs):
        r"""Manages arguments of `tf.sg_opt`.

        Args:
          tensor: automatically passed by decorator
          kwargs:
              in_dim: An integer. The size of input dimension, which is set to the last one by default.
              dim: An integer. The size of output dimension. Has the same value as in_dim by default.
              ln: Boolean. If True, layer normalization is applied.
              bias: Boolean. If True, biases are added. As a default, it is set to True
              name: A name for the layer. As a default, the function name is assigned.
              reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope
                as well as all sub-scopes; if `None`, we just inherit the parent scope reuse.
        """

        # kwargs parsing
        opt = tf.sg_opt(kwargs) + _context

        # set default argument
        try:
            shape = tensor.get_shape().as_list()
            # dropout off
            opt += tf.sg_opt(shape=shape,
                             in_dim=shape[-1],
                             dim=shape[-1],
                             dout=0)
            # disable bias when normalization on
            opt += tf.sg_opt(bias=not opt.ln)
        finally:
            pass

        # automatic layer naming
        if opt.name is None:

            # layer function name will be used as layer name
            opt.name = func.__name__.replace('sg_', 'lyr-')

            # find existing layer names
            exist_layers = []
            for t in tf.global_variables():
                scope_name = tf.get_variable_scope().name
                prefix = scope_name + '/' if len(scope_name) > 0 else ''
                i = t.name.rfind(prefix + opt.name)
                if i >= 0:
                    exist_layers.append(t.name[i:].split('/')[-2])
            exist_layers = list(set(exist_layers))

            # layer name numbering
            if len(exist_layers) == 0:
                opt.name += '_1'
            else:
                opt.name += '_%d' % (
                    max([int(n.split('_')[-1]) for n in exist_layers]) + 1)

        # all layer variables start with 'lyr-' prefix
        with tf.variable_scope(opt.name, reuse=opt.reuse) as scope:

            # call layer function
            out = func(tensor, opt)

            # apply dropout
            if opt.dout:
                out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout),
                              lambda: out)

            # rename tensor
            out = tf.identity(out, 'out')

            # add final output summary
            if scope.reuse:
                tf.sg_summary_activation(out)

            # save node info for reuse
            out._sugar = tf.sg_opt(func=func,
                                   arg=tf.sg_opt(kwargs) + _context,
                                   prev=tensor,
                                   is_layer=True,
                                   name=opt.name)
            # inject reuse function
            out.sg_reuse = types.MethodType(sg_reuse, out)

        return out
示例#18
0
    def wrapper(tensor, **kwargs):
        r"""Manages arguments of `tf.sg_opt`.
        
        Args:
          tensor: A `tensor` (automatically passed by decorator).
          kwargs:
            shape:  A list of integers. The shape of `tensor`. Inferred if not specified.
            in_dim: An integer. The size of input dimension, which is set to the last one by default.
            dim: An integer. The size of output dimension. Has the same value as in_dim by default.
            bn: Boolean. If True, batch normalization is applied.
            ln: Boolean. If True, layer normalization is applied.
            dout: A float of range [0, 100). A dropout rate. Set to 0 by default.
            bias: Boolean. If True, biases are added. As a default, it is set to True 
            name: A name for the layer. As a default, the function name is assigned.
            act: A name of activation function. e.g., `sigmoid`, `tanh`, etc.
            reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope 
              as well as all sub-scopes; if `None`, we just inherit the parent scope reuse.
        """

        from . import sg_initializer as init
        from . import sg_activation

        # kwargs parsing
        opt = tf.sg_opt(kwargs) + _context

        # set default argument
        try:
            shape = tensor.get_shape().as_list()
            # batch normalization off, layer normalization off, dropout off
            opt += tf.sg_opt(shape=shape,
                             in_dim=shape[-1],
                             dim=shape[-1],
                             bn=False,
                             ln=False,
                             dout=0)
            assert not (
                opt.bn and opt.ln
            ), 'one of batch normalization and layer normalization is available.'

            # disable bias when normalization on
            opt += tf.sg_opt(bias=not (opt.bn or opt.ln))
        finally:
            pass

        # automatic layer naming
        if opt.name is None:

            # layer function name will be used as layer name
            opt.name = func.__name__.replace('sg_', '')

            # find existing layer names
            exist_layers = []
            for t in tf.global_variables():
                scope_name = tf.get_variable_scope().name
                prefix = scope_name + '/' if len(scope_name) > 0 else ''
                i = t.name.rfind(prefix + opt.name)
                if i >= 0:
                    exist_layers.append(t.name[i:].split('/')[-2])
            exist_layers = list(set(exist_layers))

            # layer name numbering
            if len(exist_layers) == 0:
                opt.name += '_1'
            else:
                opt.name += '_%d' % (
                    max([int(n.split('_')[-1]) for n in exist_layers]) + 1)

        # all layer variables start with 'lyr-' prefix
        with tf.variable_scope(opt.name, reuse=opt.reuse) as scope:

            # call layer function
            out = func(tensor, opt)

            # apply batch normalization
            if opt.bn:
                # offset, scale parameter
                beta = init.constant('beta', opt.dim, summary=False)
                gamma = init.constant('gamma', opt.dim, value=1, summary=False)

                # offset, scale parameter
                mean_running = init.constant('mean', opt.dim, summary=False)
                variance_running = init.constant('variance',
                                                 opt.dim,
                                                 value=1,
                                                 summary=False)

                # calc batch mean, variance
                mean, variance = tf.nn.moments(
                    out, axes=range(len(out.get_shape()) - 1))

                # update running mean, variance
                def update_running_stat():
                    decay = 0.99
                    update_op = [
                        mean_running.assign(mean_running * decay + mean *
                                            (1 - decay)),
                        variance_running.assign(variance_running * decay +
                                                variance * (1 - decay))
                    ]
                    with tf.control_dependencies(update_op):
                        return tf.identity(mean), tf.identity(variance)

                # select mean, variance by training phase
                m, v = tf.cond(
                    _phase,
                    update_running_stat,  # updated running stat and batch mean, variance
                    lambda:
                    (mean_running, variance_running))  # saved mean, variance

                # apply batch normalization
                out = tf.nn.batch_normalization(out, m, v, beta, gamma,
                                                tf.sg_eps)

            # apply normalization parameters
            if opt.ln:
                # offset, scale parameter
                beta = init.constant('beta', opt.dim, summary=False)
                gamma = init.constant('gamma', opt.dim, value=1, summary=False)

                # calc layer mean, variance for final axis
                mean, variance = tf.nn.moments(out,
                                               axes=[len(out.get_shape()) - 1],
                                               keep_dims=True)

                # apply normalization
                out = (out - mean) / tf.sqrt(variance + tf.sg_eps)
                # apply parameter
                out = gamma * out + beta

            # apply activation
            if opt.act:
                out = getattr(sg_activation, 'sg_' + opt.act.lower())(out)

            # apply dropout
            if opt.dout:
                out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout),
                              lambda: out)

            # rename tensor
            out = tf.identity(out, 'out')

            # add final output summary
            if not scope.reuse:
                tf.sg_summary_activation(out)

            # save node info for reuse
            out._sugar = tf.sg_opt(func=func,
                                   arg=tf.sg_opt(kwargs) + _context,
                                   prev=tensor,
                                   is_layer=True,
                                   name=opt.name)
            # inject reuse function
            out.sg_reuse = types.MethodType(sg_reuse, out)

        return out
def generate():
    dev = '/cpu:0'
    with tf.device(dev):
        mydir = 'tfrc150char_wrd0704'
        files = [f for f in listdir(mydir) if isfile(join(mydir, f))]
        tfrecords_filename = []
        tfrecords_filename = [join(mydir, 'short_infer3.tfrecords')
                              ]  #[join(mydir, f) for f in tfrecords_filename]
        tfrecords_filename_inf = [join(mydir, '11_3.tfrecords')]

        print(tfrecords_filename)
        filename_queue = tf.train.string_input_producer(tfrecords_filename,
                                                        num_epochs=num_epochs,
                                                        shuffle=True,
                                                        capacity=1)
        infer_queue = tf.train.string_input_producer(tfrecords_filename_inf,
                                                     num_epochs=num_epochs,
                                                     shuffle=True,
                                                     capacity=1)

        optim = tf.train.AdamOptimizer(learning_rate=0.0001,
                                       beta1=0.9,
                                       beta2=0.99)

        # Calculate the gradients for each model tower.
        tower_grads = []
        reuse_vars = False
        with tf.variable_scope("dec_lstm") as scp:
            dec_cell = BasicLSTMCell2(Hp.w_emb_size,
                                      Hp.rnn_hd,
                                      state_is_tuple=True)

        with tf.variable_scope("contx_lstm") as scp:
            cell = BasicLSTMCell2(Hp.hd, Hp.rnn_hd, state_is_tuple=True)
            rnn_cell = tf.contrib.rnn.DropoutWrapper(
                cell,
                input_keep_prob=Hp.keep_prob,
                output_keep_prob=Hp.keep_prob)

        (words, chars) = read_and_decode(filename_queue,
                                         Hp.batch_size * Hp.num_gpus)

        words_splits = tf.split(axis=0,
                                num_or_size_splits=Hp.num_gpus,
                                value=words)
        chars_splits = tf.split(axis=0,
                                num_or_size_splits=Hp.num_gpus,
                                value=chars)

        word_emb = np.loadtxt("glove300d_0704.txt")
        Hp.word_vs = word_emb.shape[0]

        # --------------------------------------------------------------------------------
        with tf.name_scope('%s_%d' % ("tower", 0)) as scope:
            rnn_state = tower_infer_enc(chars_splits[0],
                                        scope,
                                        rnn_cell,
                                        dec_cell,
                                        word_emb,
                                        out_reuse_vars=False,
                                        dev='/cpu:0')

            chars_pl = tf.placeholder(tf.int32, shape=(None, Hp.c_maxlen))
            rnn_state_pl1 = [
                tf.placeholder(tf.float32, shape=(None, Hp.rnn_hd)),
                tf.placeholder(tf.float32, shape=(None, Hp.rnn_hd))
            ]
            rnn_state_pl = tf.contrib.rnn.LSTMStateTuple(
                rnn_state_pl1[0], rnn_state_pl1[1])

            final_ids, rnn_state_dec = tower_infer_dec(chars_pl,
                                                       scope,
                                                       rnn_cell,
                                                       dec_cell,
                                                       word_emb,
                                                       rnn_state_pl,
                                                       out_reuse_vars=False,
                                                       dev='/cpu:0')

        # --------------------------------------------------------------------------------

        saver = tf.train.Saver(tf.trainable_variables())
        session_config = tf.ConfigProto(allow_soft_placement=True,
                                        log_device_placement=False)
        session_config.gpu_options.per_process_gpu_memory_fraction = 0.94

        session_config.gpu_options.allow_growth = False

        restore_dir = 'tnsrbrd/hin17d08m_1313g2'  #   lec30d07m_1634g2   lec04d07m_2006g2     lec28d07m_1221g2    lec31d07m_1548g2
        csv_file = join(restore_dir, time.strftime("hin%dd%mm_%H%M.csv"))
        csv_f = open(csv_file, 'a')
        csv_writer = csv.writer(csv_f)

        with tf.Session(config=session_config) as sess:
            sess.run(
                tf.group(tf.global_variables_initializer(),
                         tf.local_variables_initializer()))

            tf.train.start_queue_runners(sess=sess)
            saver.restore(sess,
                          tf.train.latest_checkpoint(
                              join(restore_dir,
                                   'last_chpt')))  #    lec04d07m_2006g2

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)

            for ep in range(num_epochs):

                tf.sg_set_infer(sess)
                rnn_state_val, w_txt, ch_txt = sess.run(
                    [rnn_state, words_splits[0], chars_splits[0]],
                    feed_dict={Hp.keep_prob: 1.0})

                predictions = []  #[w_txt[:,2,:]]
                for idx in range(3):
                    char_inpt = word2char_ids(
                        ids_val) if idx != 0 else ch_txt[:, 2, :]
                    ids_val, rnn_state_val = sess.run(
                        [final_ids, rnn_state_dec],
                        feed_dict={
                            Hp.keep_prob: 1.0,
                            rnn_state_pl1[0]: rnn_state_val[0],
                            rnn_state_pl1[1]: rnn_state_val[1],
                            chars_pl: char_inpt
                        })
                    temp = np.zeros((Hp.batch_size, Hp.w_maxlen))
                    for b in range(Hp.batch_size):
                        stop_ind = np.where(ids_val[b] == 2)[0]
                        if stop_ind.size > 0:
                            stop_ind = stop_ind[0]
                            ids_val[b, stop_ind +
                                    1:] = ids_val[b, stop_ind + 1:] * 0
                    temp[:, :ids_val.shape[1]] = ids_val
                    predictions.append(temp)

                # predictions are decode_sent x b x w_maxlen
                predictions = np.array(predictions)
                in_batches = [w_txt[b, :, :] for b in range(Hp.batch_size)]
                res_batches = [
                    predictions[:, b, :] for b in range(Hp.batch_size)
                ]

                for b in range(Hp.batch_size):
                    in_paragraph = idxword2txt(in_batches[b])
                    print("\n INPUT SAMPLE \n")
                    print(in_paragraph)

                    res_paragraph = idxword2txt(res_batches[b])
                    print("\n RESULTS \n")
                    print(res_paragraph)

                    csv_writer.writerow([
                        " ".join(in_paragraph[:3]), " ".join(in_paragraph[3:]),
                        " ".join(res_paragraph)
                    ])

            csv_f.close()
示例#20
0
def tower_loss_manyparams(xx, scope, reu_vars=False):
    # make embedding matrix for source and target
    reu_vars = reu_vars
    with tf.variable_scope('embatch_size', reuse=reu_vars):
        # (vocab_size, latent_dim)
        emb_x = tf.sg_emb(name='emb_x',
                          voca_size=Hp.vs,
                          dim=Hp.hd,
                          dev=self._dev)
        emb_y = tf.sg_emb(name='emb_y',
                          voca_size=Hp.vs,
                          dim=Hp.hd,
                          dev=self._dev)

    xx = tf.cast(xx, tf.int32)

    time = tf.constant(0)
    losses_int = tf.constant(0.0)
    inputs = tf.transpose(xx, perm=[1, 0, 2])
    input_ta = tensor_array_ops.TensorArray(tf.int32,
                                            size=1,
                                            dynamic_size=True,
                                            clear_after_read=False)
    x_sent = input_ta.unstack(inputs)  #each element is (batch, sentlen)

    n_steps = tf.shape(xx)[1]  # number of sentences in paragraph

    # generate first an unconditioned sentence
    n_input = Hp.hd
    subrec1_init = subrec_zero_state(Hp.batch_size, Hp.hd)
    subrec2_init = subrec_zero_state(Hp.batch_size, Hp.hd)

    with tf.variable_scope("mem", reuse=reu_vars) as scp:
        rnn_cell = LSTMCell(in_dim=h, dim=Hp.hd)
        crnn_cell = ConvLSTMCell(seqlen=Hp.maxlen,
                                 in_dim=n_input // 2,
                                 dim=Hp.hd // 2)

    (rnn_state_init, rnn_h_init) = rnn_cell.zero_state(Hp.batch_size)

    #   (batch, sentlen, latentdim/2)
    (crnn_state_init, crnn_h_init) = crnn_cell.zero_state(Hp.batch_size)

    def rnn_cond(time, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h,
                 losses):
        return tf.less(time, n_steps - 1)

    def rnn_body(time, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h,
                 losses):
        x = x_sent.read(time)
        y = x_sent.read(time + 1)  #   (batch, sentlen) = (16, 200)

        # shift target by one step for training source
        y_src = tf.concat([tf.zeros((Hp.batch_size, 1), tf.int32), y[:, :-1]],
                          1)
        reuse_vars = time == tf.constant(0) or reu_vars

        # --------------------------   BYTENET ENCODER   --------------------------

        # embed table lookup
        enc = x.sg_lookup(emb=emb_x)  #(batch, sentlen, latentdim)
        # loop dilated conv block
        for i in range(num_blocks):
            enc = (enc.sg_res_block(
                size=5, rate=1, name="enc1_%d" % (i),
                reuse_vars=reuse_vars).sg_res_block(
                    size=5,
                    rate=2,
                    name="enc2_%d" % (i),
                    reuse_vars=reuse_vars).sg_res_block(
                        size=5,
                        rate=4,
                        name="enc4_%d" % (i),
                        reuse_vars=reuse_vars).sg_res_block(
                            size=5,
                            rate=8,
                            name="enc8_%d" % (i),
                            reuse_vars=reuse_vars).sg_res_block(
                                size=5,
                                rate=16,
                                name="enc16_%d" % (i),
                                reuse_vars=reuse_vars))


# --------------------------   QCNN + QPOOL ENCODER with attention #1  --------------------------

#quasi cnn layer ZFO  [batch * 3, t, dim2 ]
        conv = enc.sg_quasi_conv1d(is_enc=True,
                                   size=3,
                                   name="qconv_1",
                                   reuse_vars=reuse_vars)
        #attention layer
        # recurrent layer # 1 + final encoder hidden state
        subrec1 = tf.tile((subrec1.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1])
        concat = conv.sg_concat(target=subrec1,
                                axis=0)  # (batch*4, sentlen, latentdim)
        pool = concat.sg_quasi_rnn(is_enc=True,
                                   att=True,
                                   name="qrnn_1",
                                   reuse_vars=reuse_vars)
        subrec1 = pool[:Hp.batch_size, -1, :]  # last character in sequence

        # --------------------------   QCNN + QPOOL ENCODER with attention #2  --------------------------

        # quazi cnn ZFO (batch*3, sentlen, latentdim)
        conv = pool.sg_quasi_conv1d(is_enc=True,
                                    size=2,
                                    name="qconv_2",
                                    reuse_vars=reuse_vars)
        # (batch, sentlen-duplicated, latentdim)
        subrec2 = tf.tile((subrec2.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1])
        # (batch*4, sentlen, latentdim)
        concat = conv.sg_concat(target=subrec2, axis=0)
        pool = concat.sg_quasi_rnn(is_enc=True,
                                   att=True,
                                   name="qrnn_2",
                                   reuse_vars=reuse_vars)
        subrec2 = pool[:Hp.batch_size, -1, :]  # last character in sequence

        # --------------------------   ConvLSTM with RESIDUAL connection and MULTIPLICATIVE block   --------------------------

        #residual block
        causal = False  # for encoder
        crnn_input = (pool[:Hp.batch_size, :, :].sg_bypass_gpus(
            name='relu_0', act='relu', bn=(not causal),
            ln=causal).sg_conv1d_gpus(name="dimred_0",
                                      size=1,
                                      dev="/cpu:0",
                                      reuse=reuse_vars,
                                      dim=Hp.hd / 2,
                                      act='relu',
                                      bn=(not causal),
                                      ln=causal))

        # conv LSTM
        with tf.variable_scope("mem/clstm") as scp:
            (crnn_state, crnn_h) = crnn_cell(crnn_input, (crnn_state, crnn_h),
                                             size=5,
                                             reuse_vars=reuse_vars)
        # dimension recover and residual connection
        rnn_input0 = pool[:Hp.batch_size,:,:] + crnn_h\
                    .sg_conv1d_gpus(name = "diminc_0",size=1,dev="/cpu:0", dim=Hp.hd,reuse=reuse_vars, act='relu', bn=(not causal), ln=causal)

        # --------------------------   QCNN + QPOOL ENCODER with attention #3  --------------------------

        # pooling for lstm input
        # quazi cnn ZFO (batch*3, sentlen, latentdim)
        conv = rnn_input0.sg_quasi_conv1d(is_enc=True,
                                          size=2,
                                          name="qconv_3",
                                          reuse_vars=reuse_vars)
        pool = conv.sg_quasi_rnn(is_enc=True,
                                 att=False,
                                 name="qrnn_3",
                                 reuse_vars=reuse_vars)
        rnn_input = pool[:Hp.batch_size, -1, :]  # last character in sequence

        # --------------------------   LSTM with RESIDUAL connection and MULTIPLICATIVE block --------------------------

        # recurrent block
        with tf.variable_scope("mem/lstm") as scp:
            (rnn_state, rnn_h) = rnn_cell(rnn_input, (rnn_state, rnn_h))

        rnn_h2 = tf.tile(((rnn_h + rnn_input).sg_expand_dims(axis=1)),
                         [1, Hp.maxlen, 1])

        # --------------------------   BYTENET DECODER   --------------------------

        # CNN decoder
        dec = y_src.sg_lookup(emb=emb_y).sg_concat(target=rnn_h2, name="dec")

        for i in range(num_blocks):
            dec = (dec.sg_res_block(
                size=3,
                rate=1,
                causal=True,
                name="dec1_%d" % (i),
                reuse_vars=reuse_vars).sg_res_block(
                    size=3,
                    rate=2,
                    causal=True,
                    name="dec2_%d" % (i),
                    reuse_vars=reuse_vars).sg_res_block(
                        size=3,
                        rate=4,
                        causal=True,
                        name="dec4_%d" % (i),
                        reuse_vars=reuse_vars).sg_res_block(
                            size=3,
                            rate=8,
                            causal=True,
                            name="dec8_%d" % (i),
                            reuse_vars=reuse_vars).sg_res_block(
                                size=3,
                                rate=16,
                                causal=True,
                                name="dec16_%d" % (i),
                                reuse_vars=reuse_vars))

        # final fully convolution layer for softmax
        dec = dec.sg_conv1d_gpus(size=1,
                                 dim=Hp.vs,
                                 name="out",
                                 summary=False,
                                 dev=self._dev,
                                 reuse=reuse_vars)

        ce_array = dec.sg_ce(target=y, mask=True, name="cross_ent_example")
        cross_entropy_mean = tf.reduce_mean(ce_array, name='cross_entropy')

        losses = tf.add_n([losses, cross_entropy_mean], name='total_loss')

        return (time + 1, subrec1, subrec2, rnn_state, rnn_h, crnn_state,
                crnn_h, losses)
def tower_infer_dec(chars,
                    scope,
                    rnn_cell,
                    dec_cell,
                    word_emb,
                    rnn_state,
                    out_reuse_vars=False,
                    dev='/cpu:0'):

    with tf.device(dev):
        with tf.variable_scope('embatch_size', reuse=True):
            # (vocab_size, latent_dim)
            emb_char = tf.sg_emb(name='emb_char',
                                 voca_size=Hp.char_vs,
                                 dim=Hp.hd,
                                 dev=dev)
            emb_word = tf.sg_emb(name='emb_word',
                                 emb=word_emb,
                                 voca_size=Hp.word_vs,
                                 dim=300,
                                 dev=dev)

    print(chars)
    ch = chars
    ch = tf.reverse_sequence(input=ch,
                             seq_lengths=[Hp.c_maxlen] * Hp.batch_size,
                             seq_dim=1)
    reuse_vars = reuse_vars_enc = True

    # --------------------------   BYTENET ENCODER   --------------------------

    with tf.variable_scope('encoder'):
        # embed table lookup
        enc = ch.sg_lookup(emb=emb_char)  #(batch, sentlen, latentdim)
        # loop dilated conv block
        for i in range(Hp.num_blocks):
            enc = (enc.sg_res_block(size=5,
                                    rate=1,
                                    name="enc1_%d" % (i),
                                    is_first=True,
                                    reuse_vars=reuse_vars,
                                    dev=dev).sg_res_block(
                                        size=5,
                                        rate=2,
                                        name="enc2_%d" % (i),
                                        reuse_vars=reuse_vars,
                                        dev=dev).sg_res_block(
                                            size=5,
                                            rate=4,
                                            name="enc4_%d" % (i),
                                            reuse_vars=reuse_vars,
                                            dev=dev).sg_res_block(
                                                size=5,
                                                rate=8,
                                                name="enc8_%d" % (i),
                                                reuse_vars=reuse_vars,
                                                dev=dev).sg_res_block(
                                                    size=5,
                                                    rate=16,
                                                    name="enc16_%d" % (i),
                                                    reuse_vars=reuse_vars,
                                                    dev=dev))
        byte_enc = enc
        # --------------------------   QCNN + QPOOL ENCODER #1  --------------------------

        with tf.variable_scope('quazi'):

            #quasi cnn layer ZFO  [batch * 3, seqlen, dim2 ]
            conv = byte_enc.sg_quasi_conv1d(is_enc=True,
                                            size=4,
                                            name="qconv_1",
                                            dev=dev,
                                            reuse_vars=reuse_vars)
            # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd]
            pool0 = conv.sg_quasi_rnn(is_enc=False,
                                      att=False,
                                      name="qrnn_1",
                                      reuse_vars=reuse_vars,
                                      dev=dev)

            qpool_last = pool0[:, -1, :]

    # --------------------------   MAXPOOL along time dimension   --------------------------

    inpt_maxpl = tf.expand_dims(byte_enc, 1)  # [batch, 1, seqlen, channels]
    maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1], [1, 1, 1, 1],
                             'VALID')
    maxpool = tf.squeeze(maxpool, [1, 2])

    # --------------------------   HIGHWAY   --------------------------

    concat = qpool_last + maxpool
    with tf.variable_scope('highway', reuse=reuse_vars):
        input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1)

    # --------------------------   CONTEXT LSTM  --------------------------

    input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob)

    with tf.variable_scope('contx_lstm', reuse=reuse_vars):
        output, rnn_state = rnn_cell(input_lstm, rnn_state)

    beam_size = 8
    reuse_vars = out_reuse_vars

    greedy = False
    if greedy:

        dec_state = rnn_state
        dec_out = []
        d_out = tf.constant([1] * Hp.batch_size)
        for idx in range(Hp.w_maxlen):
            w_input = d_out.sg_lookup(emb=emb_word)
            dec_state = tf.contrib.rnn.LSTMStateTuple(c=dec_state.c,
                                                      h=dec_state.h)
            with tf.variable_scope('dec_lstm', reuse=idx > 0 or reuse_vars):
                d_out, dec_state = dec_cell(w_input, dec_state)

            dec_out.append(d_out)
            d_out = tf.expand_dims(d_out, 1).sg_conv1d_gpus(size=1,
                                                            dim=Hp.word_vs,
                                                            name="out_conv",
                                                            act="linear",
                                                            dev=dev,
                                                            reuse=idx > 0
                                                            or reuse_vars)
            d_out = tf.squeeze(d_out).sg_argmax()

        dec_out = tf.stack(dec_out, 1)

        dec = dec_out.sg_conv1d_gpus(size=1,
                                     dim=Hp.word_vs,
                                     name="out_conv",
                                     act="linear",
                                     dev=dev,
                                     reuse=True)
        return dec.sg_argmax(), rnn_state

    else:

        # ------------------ BEAM SEARCH --------------------
        dec_state = tf.contrib.rnn.LSTMStateTuple(
            tf.tile(tf.expand_dims(rnn_state[0], 1), [1, beam_size, 1]),
            tf.tile(tf.expand_dims(rnn_state[1], 1), [1, beam_size, 1]))
        initial_ids = tf.constant([1] * Hp.batch_size)

        def symbols_to_logits_fn(ids, dec_state):
            dec = []
            dec_c, dec_h = [], []
            # (batch x beam_size x decoded_seq)
            ids = tf.reshape(ids, [Hp.batch_size, beam_size, -1])
            print("dec_state ", dec_state[0].get_shape().as_list())
            for ind in range(beam_size):
                with tf.variable_scope('dec_lstm', reuse=ind > 0
                                       or reuse_vars):
                    w_input = ids[:, ind, -1].sg_lookup(emb=emb_word)
                    dec_state0 = tf.contrib.rnn.LSTMStateTuple(
                        c=dec_state.c[:, ind, :], h=dec_state.h[:, ind, :])
                    dec_out, dec_state_i = dec_cell(w_input, dec_state0)
                    dec_out = tf.expand_dims(dec_out, 1)
                dec_i = dec_out.sg_conv1d_gpus(size=1,
                                               dim=Hp.word_vs,
                                               name="out_conv",
                                               act="linear",
                                               dev=dev,
                                               reuse=ind > 0 or reuse_vars)

                dec.append(tf.squeeze(dec_i, 1))
                dec_c.append(dec_state_i[0])
                dec_h.append(dec_state_i[1])
            return tf.stack(dec, 1), tf.contrib.rnn.LSTMStateTuple(
                tf.stack(dec_c, 1), tf.stack(dec_h, 1))

        final_ids, final_probs = beam_search.beam_search(symbols_to_logits_fn,
                                                         dec_state,
                                                         initial_ids,
                                                         beam_size,
                                                         Hp.w_maxlen - 1,
                                                         Hp.word_vs,
                                                         3.5,
                                                         eos_id=2)

        return final_ids[:, 0, :], rnn_state
    def rnn_body_stat(time, rnn_state):
        ch = chars_sent.read(time)
        ch = tf.reverse_sequence(input=ch,
                                 seq_lengths=[Hp.c_maxlen] * Hp.batch_size,
                                 seq_dim=1)
        reuse_vars = out_reuse_vars

        # --------------------------   BYTENET ENCODER   --------------------------

        with tf.variable_scope('encoder'):
            # embed table lookup
            enc = ch.sg_lookup(emb=emb_char)  #(batch, sentlen, latentdim)
            # loop dilated conv block
            for i in range(Hp.num_blocks):
                enc = (enc.sg_res_block(size=5,
                                        rate=1,
                                        name="enc1_%d" % (i),
                                        is_first=True,
                                        reuse_vars=reuse_vars,
                                        dev=dev).sg_res_block(
                                            size=5,
                                            rate=2,
                                            name="enc2_%d" % (i),
                                            reuse_vars=reuse_vars,
                                            dev=dev).sg_res_block(
                                                size=5,
                                                rate=4,
                                                name="enc4_%d" % (i),
                                                reuse_vars=reuse_vars,
                                                dev=dev).sg_res_block(
                                                    size=5,
                                                    rate=8,
                                                    name="enc8_%d" % (i),
                                                    reuse_vars=reuse_vars,
                                                    dev=dev).sg_res_block(
                                                        size=5,
                                                        rate=16,
                                                        name="enc16_%d" % (i),
                                                        reuse_vars=reuse_vars,
                                                        dev=dev))
            byte_enc = enc
            # --------------------------   QCNN + QPOOL ENCODER #1  --------------------------

            with tf.variable_scope('quazi'):

                #quasi cnn layer ZFO  [batch * 3, seqlen, dim2 ]
                conv = byte_enc.sg_quasi_conv1d(is_enc=True,
                                                size=4,
                                                name="qconv_1",
                                                dev=dev,
                                                reuse_vars=reuse_vars)
                # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd]
                pool0 = conv.sg_quasi_rnn(is_enc=False,
                                          att=False,
                                          name="qrnn_1",
                                          reuse_vars=reuse_vars,
                                          dev=dev)

                qpool_last = pool0[:, -1, :]

        # --------------------------   MAXPOOL along time dimension   --------------------------

        inpt_maxpl = tf.expand_dims(byte_enc,
                                    1)  # [batch, 1, seqlen, channels]
        maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1],
                                 [1, 1, 1, 1], 'VALID')
        maxpool = tf.squeeze(maxpool, [1, 2])

        # --------------------------   HIGHWAY   --------------------------

        concat = qpool_last + maxpool
        with tf.variable_scope('highway', reuse=reuse_vars):
            input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1)

        # --------------------------   CONTEXT LSTM  --------------------------
        input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob)

        with tf.variable_scope('contx_lstm', reuse=reuse_vars):
            output, rnn_state = rnn_cell(input_lstm, rnn_state)

        return (time + 1, rnn_state)
def tower_infer_enc(chars,
                    scope,
                    rnn_cell,
                    dec_cell,
                    word_emb,
                    out_reuse_vars=False,
                    dev='/cpu:0'):
    out_rvars = out_reuse_vars

    # make embedding matrix for source and target
    with tf.device(dev):
        with tf.variable_scope('embatch_size', reuse=out_reuse_vars):
            # (vocab_size, latent_dim)
            emb_char = tf.sg_emb(name='emb_char',
                                 voca_size=Hp.char_vs,
                                 dim=Hp.hd,
                                 dev=dev)
            emb_word = tf.sg_emb(name='emb_word',
                                 emb=word_emb,
                                 voca_size=Hp.word_vs,
                                 dim=300,
                                 dev=dev)

    chars = tf.cast(chars, tf.int32)

    time = tf.constant(0)

    inputs = tf.transpose(chars, perm=[1, 0, 2])
    input_ta = tensor_array_ops.TensorArray(tf.int32,
                                            size=tf.shape(chars)[1],
                                            dynamic_size=True,
                                            clear_after_read=True)
    chars_sent = input_ta.unstack(inputs)  #each element is (batch, sentlen)

    resp_steps = tf.shape(chars)[1]  # number of sentences in paragraph
    statm_steps = resp_steps // 2

    rnn_state = rnn_cell.zero_state(
        Hp.batch_size, tf.float32)  #rnn_cell.rnn_state, rnn_cell.rnn_h
    maxdecode = 3

    # -------------------------------------------- STATEMENT ENCODING -----------------------------------------------

    def rnn_cond_stat(time, rnn_state):
        return tf.less(time, statm_steps - 1)

    def rnn_body_stat(time, rnn_state):
        ch = chars_sent.read(time)
        ch = tf.reverse_sequence(input=ch,
                                 seq_lengths=[Hp.c_maxlen] * Hp.batch_size,
                                 seq_dim=1)
        reuse_vars = out_reuse_vars

        # --------------------------   BYTENET ENCODER   --------------------------

        with tf.variable_scope('encoder'):
            # embed table lookup
            enc = ch.sg_lookup(emb=emb_char)  #(batch, sentlen, latentdim)
            # loop dilated conv block
            for i in range(Hp.num_blocks):
                enc = (enc.sg_res_block(size=5,
                                        rate=1,
                                        name="enc1_%d" % (i),
                                        is_first=True,
                                        reuse_vars=reuse_vars,
                                        dev=dev).sg_res_block(
                                            size=5,
                                            rate=2,
                                            name="enc2_%d" % (i),
                                            reuse_vars=reuse_vars,
                                            dev=dev).sg_res_block(
                                                size=5,
                                                rate=4,
                                                name="enc4_%d" % (i),
                                                reuse_vars=reuse_vars,
                                                dev=dev).sg_res_block(
                                                    size=5,
                                                    rate=8,
                                                    name="enc8_%d" % (i),
                                                    reuse_vars=reuse_vars,
                                                    dev=dev).sg_res_block(
                                                        size=5,
                                                        rate=16,
                                                        name="enc16_%d" % (i),
                                                        reuse_vars=reuse_vars,
                                                        dev=dev))
            byte_enc = enc
            # --------------------------   QCNN + QPOOL ENCODER #1  --------------------------

            with tf.variable_scope('quazi'):

                #quasi cnn layer ZFO  [batch * 3, seqlen, dim2 ]
                conv = byte_enc.sg_quasi_conv1d(is_enc=True,
                                                size=4,
                                                name="qconv_1",
                                                dev=dev,
                                                reuse_vars=reuse_vars)
                # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd]
                pool0 = conv.sg_quasi_rnn(is_enc=False,
                                          att=False,
                                          name="qrnn_1",
                                          reuse_vars=reuse_vars,
                                          dev=dev)

                qpool_last = pool0[:, -1, :]

        # --------------------------   MAXPOOL along time dimension   --------------------------

        inpt_maxpl = tf.expand_dims(byte_enc,
                                    1)  # [batch, 1, seqlen, channels]
        maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1],
                                 [1, 1, 1, 1], 'VALID')
        maxpool = tf.squeeze(maxpool, [1, 2])

        # --------------------------   HIGHWAY   --------------------------

        concat = qpool_last + maxpool
        with tf.variable_scope('highway', reuse=reuse_vars):
            input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1)

        # --------------------------   CONTEXT LSTM  --------------------------
        input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob)

        with tf.variable_scope('contx_lstm', reuse=reuse_vars):
            output, rnn_state = rnn_cell(input_lstm, rnn_state)

        return (time + 1, rnn_state)

    loop_vars_stat = [time, rnn_state]

    time, rnn_state = tf.while_loop\
                      (rnn_cond_stat, rnn_body_stat, loop_vars_stat, swap_memory=False)

    return rnn_state
示例#24
0
    def rnn_body(time, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h,
                 losses):
        x = x_sent.read(time)
        y = x_sent.read(time + 1)  #   (batch, sentlen) = (16, 200)

        # shift target by one step for training source
        y_src = tf.concat([tf.zeros((Hp.batch_size, 1), tf.int32), y[:, :-1]],
                          1)
        reuse_vars = time == tf.constant(0) or reu_vars

        # --------------------------   BYTENET ENCODER   --------------------------

        # embed table lookup
        enc = x.sg_lookup(emb=emb_x)  #(batch, sentlen, latentdim)
        # loop dilated conv block
        for i in range(num_blocks):
            enc = (enc.sg_res_block(
                size=5, rate=1, name="enc1_%d" % (i),
                reuse_vars=reuse_vars).sg_res_block(
                    size=5,
                    rate=2,
                    name="enc2_%d" % (i),
                    reuse_vars=reuse_vars).sg_res_block(
                        size=5,
                        rate=4,
                        name="enc4_%d" % (i),
                        reuse_vars=reuse_vars).sg_res_block(
                            size=5,
                            rate=8,
                            name="enc8_%d" % (i),
                            reuse_vars=reuse_vars).sg_res_block(
                                size=5,
                                rate=16,
                                name="enc16_%d" % (i),
                                reuse_vars=reuse_vars))


# --------------------------   QCNN + QPOOL ENCODER with attention #1  --------------------------

#quasi cnn layer ZFO  [batch * 3, t, dim2 ]
        conv = enc.sg_quasi_conv1d(is_enc=True,
                                   size=3,
                                   name="qconv_1",
                                   reuse_vars=reuse_vars)
        #attention layer
        # recurrent layer # 1 + final encoder hidden state
        subrec1 = tf.tile((subrec1.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1])
        concat = conv.sg_concat(target=subrec1,
                                axis=0)  # (batch*4, sentlen, latentdim)
        pool = concat.sg_quasi_rnn(is_enc=True,
                                   att=True,
                                   name="qrnn_1",
                                   reuse_vars=reuse_vars)
        subrec1 = pool[:Hp.batch_size, -1, :]  # last character in sequence

        # --------------------------   QCNN + QPOOL ENCODER with attention #2  --------------------------

        # quazi cnn ZFO (batch*3, sentlen, latentdim)
        conv = pool.sg_quasi_conv1d(is_enc=True,
                                    size=2,
                                    name="qconv_2",
                                    reuse_vars=reuse_vars)
        # (batch, sentlen-duplicated, latentdim)
        subrec2 = tf.tile((subrec2.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1])
        # (batch*4, sentlen, latentdim)
        concat = conv.sg_concat(target=subrec2, axis=0)
        pool = concat.sg_quasi_rnn(is_enc=True,
                                   att=True,
                                   name="qrnn_2",
                                   reuse_vars=reuse_vars)
        subrec2 = pool[:Hp.batch_size, -1, :]  # last character in sequence

        # --------------------------   ConvLSTM with RESIDUAL connection and MULTIPLICATIVE block   --------------------------

        #residual block
        causal = False  # for encoder
        crnn_input = (pool[:Hp.batch_size, :, :].sg_bypass_gpus(
            name='relu_0', act='relu', bn=(not causal),
            ln=causal).sg_conv1d_gpus(name="dimred_0",
                                      size=1,
                                      dev="/cpu:0",
                                      reuse=reuse_vars,
                                      dim=Hp.hd / 2,
                                      act='relu',
                                      bn=(not causal),
                                      ln=causal))

        # conv LSTM
        with tf.variable_scope("mem/clstm") as scp:
            (crnn_state, crnn_h) = crnn_cell(crnn_input, (crnn_state, crnn_h),
                                             size=5,
                                             reuse_vars=reuse_vars)
        # dimension recover and residual connection
        rnn_input0 = pool[:Hp.batch_size,:,:] + crnn_h\
                    .sg_conv1d_gpus(name = "diminc_0",size=1,dev="/cpu:0", dim=Hp.hd,reuse=reuse_vars, act='relu', bn=(not causal), ln=causal)

        # --------------------------   QCNN + QPOOL ENCODER with attention #3  --------------------------

        # pooling for lstm input
        # quazi cnn ZFO (batch*3, sentlen, latentdim)
        conv = rnn_input0.sg_quasi_conv1d(is_enc=True,
                                          size=2,
                                          name="qconv_3",
                                          reuse_vars=reuse_vars)
        pool = conv.sg_quasi_rnn(is_enc=True,
                                 att=False,
                                 name="qrnn_3",
                                 reuse_vars=reuse_vars)
        rnn_input = pool[:Hp.batch_size, -1, :]  # last character in sequence

        # --------------------------   LSTM with RESIDUAL connection and MULTIPLICATIVE block --------------------------

        # recurrent block
        with tf.variable_scope("mem/lstm") as scp:
            (rnn_state, rnn_h) = rnn_cell(rnn_input, (rnn_state, rnn_h))

        rnn_h2 = tf.tile(((rnn_h + rnn_input).sg_expand_dims(axis=1)),
                         [1, Hp.maxlen, 1])

        # --------------------------   BYTENET DECODER   --------------------------

        # CNN decoder
        dec = y_src.sg_lookup(emb=emb_y).sg_concat(target=rnn_h2, name="dec")

        for i in range(num_blocks):
            dec = (dec.sg_res_block(
                size=3,
                rate=1,
                causal=True,
                name="dec1_%d" % (i),
                reuse_vars=reuse_vars).sg_res_block(
                    size=3,
                    rate=2,
                    causal=True,
                    name="dec2_%d" % (i),
                    reuse_vars=reuse_vars).sg_res_block(
                        size=3,
                        rate=4,
                        causal=True,
                        name="dec4_%d" % (i),
                        reuse_vars=reuse_vars).sg_res_block(
                            size=3,
                            rate=8,
                            causal=True,
                            name="dec8_%d" % (i),
                            reuse_vars=reuse_vars).sg_res_block(
                                size=3,
                                rate=16,
                                causal=True,
                                name="dec16_%d" % (i),
                                reuse_vars=reuse_vars))

        # final fully convolution layer for softmax
        dec = dec.sg_conv1d_gpus(size=1,
                                 dim=Hp.vs,
                                 name="out",
                                 summary=False,
                                 dev=self._dev,
                                 reuse=reuse_vars)

        ce_array = dec.sg_ce(target=y, mask=True, name="cross_ent_example")
        cross_entropy_mean = tf.reduce_mean(ce_array, name='cross_entropy')

        losses = tf.add_n([losses, cross_entropy_mean], name='total_loss')

        return (time + 1, subrec1, subrec2, rnn_state, rnn_h, crnn_state,
                crnn_h, losses)
示例#25
0
def tower_loss2_old(xx, scope, reuse_vars=False):

    # make embedding matrix for source and target
    with tf.variable_scope('embs', reuse=reuse_vars):
        emb_x = tf.sg_emb(name='emb_x',
                          voca_size=Hp.vs,
                          dim=Hp.hd,
                          dev=self._dev)
        emb_y = tf.sg_emb(name='emb_y',
                          voca_size=Hp.vs,
                          dim=Hp.hd,
                          dev=self._dev)

    x_sents = tf.unstack(xx, axis=1)  #each element is (batch, sentlen)

    # generate first an unconditioned sentence
    n_input = Hp.hd

    subrec1 = subrec_zero_state(Hp.bs, Hp.hd)
    subrec2 = subrec_zero_state(Hp.bs, Hp.hd)

    rnn_cell = LSTMCell(in_dim=n_input, dim=Hp.hd)
    (rnn_state, rnn_h) = rnn_cell.zero_state(Hp.bs)

    crnn_cell = ConvLSTMCell(in_dim=n_input, dim=Hp.hd)
    (crnn_state, crnn_h) = crnn_cell.zero_state(n_input)

    for sent in range(len(x_sents) - 1):
        y = x_sents[i + 1]
        x = x_sents[i]  #   (batch, sentlen) = (16, 200)
        # shift target by one step for training source
        y_src = tf.concat([tf.zeros((Hp.bs, 1), tf.sg_intx), y[:, :-1]], 1)

        # embed table lookup
        enc = x.sg_lookup(emb=emb_x)  #(batch, sentlen, dim1)
        # loop dilated conv block
        for i in range(num_blocks):
            enc = (enc.sg_res_block(
                size=5, rate=1, name="enc1_%d" % (i),
                reuse_vars=reuse_vars).sg_res_block(
                    size=5,
                    rate=2,
                    name="enc2_%d" % (i),
                    reuse_vars=reuse_vars).sg_res_block(
                        size=5,
                        rate=4,
                        name="enc4_%d" % (i),
                        reuse_vars=reuse_vars).sg_res_block(
                            size=5,
                            rate=8,
                            name="enc8_%d" % (i),
                            reuse_vars=reuse_vars).sg_res_block(
                                size=5,
                                rate=16,
                                name="enc16_%d" % (i),
                                reuse_vars=reuse_vars))

        #quasi rnn layer  [batch * 3, t, dim2 ]
        conv = enc.sg_quasi_conv1d(is_enc=True,
                                   size=2,
                                   name="conv1",
                                   reuse_vars=reuse_vars)
        #attention layer
        # recurrent layer # 1 + final encoder hidden state
        concat = subrec1.sg_concat(target=conv, dim=0)
        subrec1 = conv.sg_quasi_rnn(is_enc=True, att=True)

        conv = pool.sg_quasi_conv1d(is_enc=True,
                                    size=2,
                                    name="conv2",
                                    reuse_vars=reuse_vars)
        concat = subrec2.sg_concat(target=conv, dim=0)
        subrec2 = conv.sg_quasi_rnn(is_enc=True, att=True)

        # conv LSTM
        (crnn_state, crnn_h) = crnn_cell(subrec2, (crnn_state, crnn_h), 5)

        # recurrent block
        (rnn_state, rnn_h) = rnn_cell(crnn_h, (rnn_state, rnn_h))

        # CNN decoder
        dec = crnn_h.sg_concat(target=y_src.sg_lookup(emb=emb_y), name="dec")

        for i in range(num_blocks):
            dec = (dec.sg_res_block(
                size=3,
                rate=1,
                causal=True,
                name="dec1_%d" % (i),
                reuse_vars=reuse_vars).sg_res_block(
                    size=3,
                    rate=2,
                    causal=True,
                    name="dec2_%d" % (i),
                    reuse_vars=reuse_vars).sg_res_block(
                        size=3,
                        rate=4,
                        causal=True,
                        name="dec4_%d" % (i),
                        reuse_vars=reuse_vars).sg_res_block(
                            size=3,
                            rate=8,
                            causal=True,
                            name="dec8_%d" % (i),
                            reuse_vars=reuse_vars).sg_res_block(
                                size=3,
                                rate=16,
                                causal=True,
                                name="dec16_%d" % (i),
                                reuse_vars=reuse_vars))

        # final fully convolution layer for softmax
        dec = dec.sg_conv1d_gpus(size=1, dim=Hp.vs,name="out",summary=False,\
          dev = self._dev,reuse=reuse_vars)

        ce_array = dec.sg_ce(target=y, mask=True, name="cross_ent_example")
        cross_entropy_mean = tf.reduce_mean(ce_array, name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)

    # Assemble all of the losses for the current tower only.
    losses = tf.get_collection('losses', scope)
    # Calculate the total loss for the current tower.
    total_loss = tf.add_n(losses, name='total_loss')

    return total_loss
示例#26
0
    def __init__(self,
                 x,
                 y,
                 num_batch,
                 vocab_size,
                 emb_dim,
                 hidden_dim,
                 max_ep=240,
                 infer_shape=(1, 1),
                 mode="train"):

        self.num_batch = num_batch
        self.emb_dim = emb_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size
        self.max_len_infer = 512
        self.max_ep = max_ep

        # reuse = len([t for t in tf.global_variables() if t.name.startswith('gen')]) > 0
        reuse = (mode == 'infer')

        if mode == "train":
            self.x = x
            self.y = y
        elif mode == "infer":
            self.x = tf.placeholder(tf.int32, shape=infer_shape)
            self.y = tf.placeholder(tf.int32, shape=infer_shape)

        with tf.variable_scope("gen_embs", reuse=reuse):
            self.emb_x = tf.get_variable("emb_x",
                                         [self.vocab_size, self.emb_dim])
            self.emb_y = tf.get_variable("emb_y",
                                         [self.vocab_size, self.emb_dim])
            self.X = tf.nn.embedding_lookup(self.emb_x, self.x)
            self.Y = tf.nn.embedding_lookup(self.emb_y, self.y)

        with tf.sg_context(name='gen', reuse=reuse):
            #     self.emb_x = tf.Variable(tf.random_uniform([self.vocab_size, self.emb_dim], 0.0, 1.0), name="emb_x")
            #     self.emb_y = tf.Variable(tf.random_uniform([self.vocab_size, self.emb_dim], 0.0, 1.0), name="emb_y")
            # self.emb_x = tf.sg_emb(name='emb_x', voca_size=self.vocab_size, dim=self.emb_dim)  # (68,16)
            # self.emb_y = tf.sg_emb(name='emb_y', voca_size=self.vocab_size, dim=self.emb_dim)  # (68,16)
            # self.X = self.x.sg_lookup(emb=self.emb_x)  # (8,63,16)
            # self.Y = self.y.sg_lookup(emb=self.emb_y)  # (8,63,16)

            if mode == "train":
                self.lstm_layer = self.X.sg_lstm(in_dim=self.emb_dim,
                                                 dim=self.vocab_size,
                                                 name="lstm")  # (8, 63, 68)
                self.test = self.lstm_layer.sg_softmax(name="testtt")

                print "mazum??"
                print self.test

            elif mode == "infer":
                self.lstm_layer = self.X.sg_lstm(in_dim=self.emb_dim,
                                                 dim=self.vocab_size,
                                                 last_only=True,
                                                 name="lstm")
                self.log_prob = tf.log(self.lstm_layer)

                # next_token: select by distribution probability, preds: select by argmax

                self.multinormed = tf.multinomial(self.log_prob, 1)
                self.next_token = tf.cast(
                    tf.reshape(tf.multinomial(self.log_prob, 1),
                               [1, infer_shape[0]]), tf.int32)
                self.preds = self.lstm_layer.sg_argmax()

        if mode == "train":
            self.loss = self.lstm_layer.sg_ce(target=self.y)
            self.istarget = tf.not_equal(self.y, 0).sg_float()

            self.reduced_loss = (self.loss.sg_sum()) / (
                self.istarget.sg_sum() + 0.0000001)
            tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
示例#27
0
    def wrapper(tensor, **kwargs):
        r"""Manages arguments of `tf.sg_opt`.

        Args:
          tensor: A `tensor` (automatically passed by decorator).
          kwargs:
            shape:  A list of integers. The shape of `tensor`. Inferred if not specified.
            in_dim: An integer. The size of input dimension, which is set to the last one by default.
            dim: An integer. The size of output dimension. Has the same value as in_dim by default.
            bn: Boolean. If True, batch normalization is applied.
            ln: Boolean. If True, layer normalization is applied.
            scale: If true, multiple by a trainable gamma variable. When the activation is
              linear (relu included), this can be disabled because it can be implicitly
              learned by the next layer. The default is True.
            dout: A float of range [0, 100). A dropout rate. Set to 0 by default.
            bias: Boolean. If True, biases are added. As a default, it is set to True
            name: A name for the layer. As a default, the function name is assigned.
            act: A name of activation function. e.g., `sigmoid`, `tanh`, etc.
            reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope
              as well as all sub-scopes; if `None`, we just inherit the parent scope reuse.
            regularizer:  A string. None, 'l1' or 'l2'. The default is None
            summary: If True, summaries are added. The default is True.
        """

        from . import sg_initializer as init
        from . import sg_activation

        # kwargs parsing
        opt = tf.sg_opt(kwargs) + sg_get_context()

        # set default argument
        try:
            shape = tensor.get_shape().as_list()
            # batch normalization off, layer normalization off, dropout off
            opt += tf.sg_opt(shape=shape,
                             in_dim=shape[-1],
                             dim=shape[-1],
                             bn=False,
                             ln=False,
                             dout=0,
                             summary=True,
                             scale=True)
            if opt.regularizer == 'l1':
                opt.regularizer = lambda x: tf.reduce_mean(tf.abs(x))
            elif opt.regularizer == 'l2':
                opt.regularizer = lambda x: tf.square(
                    tf.reduce_mean(tf.square(x)))
            else:
                opt.regularizer = None

            assert not (
                opt.bn and opt.ln
            ), 'one of batch normalization and layer normalization is available.'

            # disable bias when normalization on
            opt += tf.sg_opt(bias=not (opt.bn or opt.ln))
        finally:
            pass

        # automatic layer naming
        if opt.name is None:

            # layer function name will be used as layer name
            opt.name = func.__name__.replace('sg_', '')

            # find existing layer names
            exist_layers = []
            for t in tf.global_variables():
                scope_name = tf.get_variable_scope().name
                prefix = scope_name + '/' if len(scope_name) > 0 else ''
                i = t.name.rfind(prefix + opt.name)
                if i >= 0:
                    exist_layers.append(t.name[i:].split('/')[-2])
            exist_layers = list(set(exist_layers))

            # layer name numbering
            if len(exist_layers) == 0:
                opt.name += '_1'
            else:
                opt.name += '_%d' % (
                    max([int(n.split('_')[-1]) for n in exist_layers]) + 1)

        with tf.variable_scope(opt.name, reuse=opt.reuse) as scope:

            # call layer function
            out = func(tensor, opt)
            out_shape = out.get_shape()

            # apply batch normalization
            if opt.bn:
                beta = init.constant('beta', opt.dim, summary=opt.summary)
                gamma = init.constant('gamma',
                                      opt.dim,
                                      value=1,
                                      summary=opt.summary,
                                      trainable=opt.scale)

                # offset, scale parameter ( for inference )
                mean_running = init.constant('mean',
                                             opt.dim,
                                             trainable=False,
                                             summary=opt.summary)
                variance_running = init.constant('variance',
                                                 opt.dim,
                                                 value=1,
                                                 trainable=False,
                                                 summary=opt.summary)

                # use fused batch norm if ndims in [2, 3, 4]
                if out_shape.ndims in [2, 3, 4]:
                    # add HW dims if necessary, fused_batch_norm requires shape to be NHWC
                    if out_shape.ndims == 2:
                        out = tf.expand_dims(out, axis=1)
                        out = tf.expand_dims(out, axis=2)
                    elif out_shape.ndims == 3:
                        out = tf.expand_dims(out, axis=2)

                    fused_eps = tf.sg_eps if tf.sg_eps > 1e-5 else 1e-5
                    out, mean, variance = tf.cond(
                        _phase,
                        lambda: tf.nn.fused_batch_norm(
                            out, gamma, beta, epsilon=fused_eps),
                        lambda: tf.nn.fused_batch_norm(out,
                                                       gamma,
                                                       beta,
                                                       mean=mean_running,
                                                       variance=
                                                       variance_running,
                                                       epsilon=fused_eps,
                                                       is_training=False),
                    )

                    # restore original shape if HW dims was added
                    if out_shape.ndims == 2:
                        out = tf.squeeze(out, axis=[1, 2])
                    elif out_shape.ndims == 3:
                        out = tf.squeeze(out, axis=2)

                # fallback to naive batch norm
                else:
                    mean, variance = tf.nn.moments(
                        out, axes=list(range(len(out.get_shape()) - 1)))
                    out = tf.cond(
                        _phase, lambda: tf.nn.batch_normalization(
                            out, mean, variance, beta, gamma, tf.sg_eps),
                        lambda: tf.nn.batch_normalization(
                            out, mean_running, variance_running, beta, gamma,
                            tf.sg_eps))

                decay = 0.99
                tf.add_to_collection(
                    tf.GraphKeys.UPDATE_OPS,
                    mean_running.assign(mean_running * decay + mean *
                                        (1 - decay)))
                tf.add_to_collection(
                    tf.GraphKeys.UPDATE_OPS,
                    variance_running.assign(variance_running * decay +
                                            variance * (1 - decay)))

            # apply layer normalization
            if opt.ln:
                # offset, scale parameter
                beta = init.constant('beta', opt.dim, summary=opt.summary)
                if opt.scale:
                    gamma = init.constant('gamma',
                                          opt.dim,
                                          value=1,
                                          summary=opt.summary)

                # calc layer mean, variance for final axis
                mean, variance = tf.nn.moments(out,
                                               axes=[len(out.get_shape()) - 1],
                                               keep_dims=True)

                # apply normalization
                out = (out - mean) / tf.sqrt(variance + tf.sg_eps)
                # apply parameter
                if opt.scale:
                    out = gamma * out + beta
                else:
                    out = out + beta

            # apply activation
            if opt.act:
                out = getattr(sg_activation, 'sg_' + opt.act.lower())(out)

            # apply dropout
            if opt.dout:
                out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout),
                              lambda: out)

            # rename tensor
            out = tf.identity(out, 'out')

            # add final output summary
            if opt.summary:
                tf.sg_summary_activation(out)

            # save node info for reuse
            out._sugar = tf.sg_opt(func=func,
                                   arg=tf.sg_opt(kwargs) + sg_get_context(),
                                   prev=tensor,
                                   is_layer=True,
                                   name=opt.name)
            # inject reuse function
            out.sg_reuse = types.MethodType(sg_reuse, out)

        return out
示例#28
0
    def wrapper(tensor, **kwargs):

        import sg_initializer as init
        import sg_activation

        # kwargs parsing
        opt = tf.sg_opt(kwargs) + _context

        # set default argument
        try:
            shape = tensor.get_shape().as_list()
            # batch normalization off, layer normalization off, dropout off
            opt += tf.sg_opt(shape=shape,
                             in_dim=shape[-1],
                             dim=shape[-1],
                             bn=False,
                             ln=False,
                             dout=0)
            assert not (
                opt.bn and opt.ln
            ), 'one of batch normalization and layer normalization is available.'

            # disable bias when normalization on
            opt += tf.sg_opt(bias=not (opt.bn or opt.ln))
        finally:
            pass

        # automatic layer naming
        if opt.name is None:

            # layer function name will be used as layer name
            opt.name = func.__name__.replace('sg_', '')

            # find existing layer names
            exist_layers = []
            for t in tf.get_collection(tf.GraphKeys.VARIABLES):
                scope_name = tf.get_variable_scope().name
                prefix = scope_name + '/' if len(scope_name) > 0 else ''
                i = t.name.rfind(prefix + 'layers/' + opt.name)
                if i >= 0:
                    exist_layers.append(t.name[i:].split('/')[-2])
            exist_layers = list(set(exist_layers))

            # layer name numbering
            if len(exist_layers) == 0:
                opt.name += '_1'
            else:
                opt.name += '_%d' % (
                    max([int(n.split('_')[-1]) for n in exist_layers]) + 1)

        # all layer variables start with 'layers/' prefix
        with tf.variable_scope('layers', reuse=opt.reuse):

            with tf.variable_scope(opt.name):

                # call layer function
                out = func(tensor, opt)

                # apply batch normalization
                if opt.bn:
                    # offset, scale parameter
                    beta = init.constant('beta', opt.dim)
                    gamma = init.constant('gamma', opt.dim, value=1)

                    # offset, scale parameter
                    mean_running = init.constant('mean', opt.dim)
                    variance_running = init.constant('variance',
                                                     opt.dim,
                                                     value=1)

                    # calc batch mean, variance
                    mean, variance = tf.nn.moments(
                        out, axes=range(len(out.get_shape()) - 1))

                    # update running mean, variance
                    def update_running_stat():
                        decay = 0.99
                        update_op = [
                            mean_running.assign(mean_running * decay + mean *
                                                (1 - decay)),
                            variance_running.assign(variance_running * decay +
                                                    variance * (1 - decay))
                        ]
                        with tf.control_dependencies(update_op):
                            return tf.identity(mean), tf.identity(variance)

                    # select mean, variance by training phase
                    m, v = tf.cond(
                        _phase,
                        update_running_stat,  # updated running stat and batch mean, variance
                        lambda: (mean_running, variance_running)
                    )  # saved mean, variance

                    # apply batch normalization
                    out = tf.nn.batch_normalization(out, m, v, beta, gamma,
                                                    tf.sg_eps)

                # apply normalization parameters
                if opt.ln:
                    # offset, scale parameter
                    beta = init.constant('beta', opt.dim)
                    gamma = init.constant('gamma', opt.dim, value=1)

                    # calc layer mean, variance for final axis
                    mean, variance = tf.nn.moments(
                        out, axes=[len(out.get_shape()) - 1], keep_dims=True)

                    # apply normalization
                    out = (out - mean) / tf.sqrt(variance + tf.sg_eps)
                    # apply parameter
                    out = gamma * out + beta

                # apply activation
                if opt.act:
                    out = getattr(sg_activation, 'sg_' + opt.act.lower())(out)

                # apply dropout
                if opt.dout:
                    out = tf.cond(_phase,
                                  lambda: tf.nn.dropout(out, 1 - opt.dout),
                                  lambda: out)

                # rename tensor
                out = tf.identity(out, 'out')

                # add final output summary
                if opt.reuse is None or not opt.reuse:
                    tf.sg_summary_activation(out)

                # save node info for reuse
                out._sugar = tf.sg_opt(func=func,
                                       arg=tf.sg_opt(kwargs) + _context,
                                       prev=tensor,
                                       is_layer=True,
                                       name=opt.name)
                # inject reuse function
                out.sg_reuse = types.MethodType(sg_reuse, out)

        return out