示例#1
0
def _histogram(name, tensor):
    if not tf.get_variable_scope().reuse and not tf.sg_get_context().reuse:
        val = gen_logging_ops._histogram_summary(name, tensor)
        tf.add_to_collection(tf.GraphKeys.SUMMARIES, val)
示例#2
0
    def wrapper(tensor, **kwargs):
        r"""Manages arguments of `tf.sg_opt`.
        
        Args:
          tensor: A `tensor` (automatically passed by decorator).
          kwargs:
            shape:  A list of integers. The shape of `tensor`. Inferred if not specified.
            in_dim: An integer. The size of input dimension, which is set to the last one by default.
            dim: An integer. The size of output dimension. Has the same value as in_dim by default.
            bn: Boolean. If True, batch normalization is applied.
            ln: Boolean. If True, layer normalization is applied.
            dout: A float of range [0, 100). A dropout rate. Set to 0 by default.
            bias: Boolean. If True, biases are added. As a default, it is set to True 
            name: A name for the layer. As a default, the function name is assigned.
            act: A name of activation function. e.g., `sigmoid`, `tanh`, etc.
            reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope 
              as well as all sub-scopes; if `None`, we just inherit the parent scope reuse.
            regularizer:  A string. None, 'l1' or 'l2'. The default is None
            summary: If True, summaries are added. The default is True.
        """

        from . import sg_initializer as init
        from . import sg_activation

        # kwargs parsing
        opt = tf.sg_opt(kwargs) + sg_get_context()

        # set default argument
        try:
            shape = tensor.get_shape().as_list()
            # batch normalization off, layer normalization off, dropout off
            opt += tf.sg_opt(shape=shape,
                             in_dim=shape[-1],
                             dim=shape[-1],
                             bn=False,
                             ln=False,
                             dout=0,
                             summary=True)
            if opt.regularizer == 'l1':
                opt.regularizer = lambda x: tf.reduce_mean(tf.abs(x))
            elif opt.regularizer == 'l2':
                opt.regularizer = lambda x: tf.square(
                    tf.reduce_mean(tf.square(x)))
            else:
                opt.regularizer = None

            assert not (
                opt.bn and opt.ln
            ), 'one of batch normalization and layer normalization is available.'

            # disable bias when normalization on
            opt += tf.sg_opt(bias=not (opt.bn or opt.ln))
        finally:
            pass

        # automatic layer naming
        if opt.name is None:

            # layer function name will be used as layer name
            opt.name = func.__name__.replace('sg_', '')

            # find existing layer names
            exist_layers = []
            for t in tf.global_variables():
                scope_name = tf.get_variable_scope().name
                prefix = scope_name + '/' if len(scope_name) > 0 else ''
                i = t.name.rfind(prefix + opt.name)
                if i >= 0:
                    exist_layers.append(t.name[i:].split('/')[-2])
            exist_layers = list(set(exist_layers))

            # layer name numbering
            if len(exist_layers) == 0:
                opt.name += '_1'
            else:
                opt.name += '_%d' % (
                    max([int(n.split('_')[-1]) for n in exist_layers]) + 1)

        with tf.variable_scope(opt.name, reuse=opt.reuse) as scope:

            # call layer function
            out = func(tensor, opt)

            # apply batch normalization
            if opt.bn:
                # offset, scale parameter
                beta = init.constant('beta', opt.dim)
                gamma = init.constant('gamma', opt.dim, value=1)

                # calc batch mean, variance
                mean, variance = tf.nn.moments(
                    out, axes=list(range(len(out.get_shape()) - 1)))

                # offset, scale parameter ( for inference )
                mean_running = init.constant('mean', opt.dim, trainable=False)
                variance_running = init.constant('variance',
                                                 opt.dim,
                                                 value=1,
                                                 trainable=False)

                # add running mean, variance to UPDATE_OP collection
                decay = 0.99
                tf.add_to_collection(
                    tf.GraphKeys.UPDATE_OPS,
                    mean_running.assign(mean_running * decay + mean *
                                        (1 - decay)))
                tf.add_to_collection(
                    tf.GraphKeys.UPDATE_OPS,
                    variance_running.assign(variance_running * decay +
                                            variance * (1 - decay)))

                # select mean, variance by training phase
                m, v = tf.cond(
                    _phase,
                    lambda: (mean, variance),  # batch mean, variance
                    lambda:
                    (mean_running, variance_running))  # saved mean, variance

                # apply batch normalization
                out = tf.nn.batch_normalization(out, m, v, beta, gamma,
                                                tf.sg_eps)

            # apply layer normalization
            if opt.ln:
                # offset, scale parameter
                beta = init.constant('beta', opt.dim)
                gamma = init.constant('gamma', opt.dim, value=1)

                # calc layer mean, variance for final axis
                mean, variance = tf.nn.moments(out,
                                               axes=[len(out.get_shape()) - 1],
                                               keep_dims=True)

                # apply normalization
                out = (out - mean) / tf.sqrt(variance + tf.sg_eps)
                # apply parameter
                out = gamma * out + beta

            # apply activation
            if opt.act:
                out = getattr(sg_activation, 'sg_' + opt.act.lower())(out)

            # apply dropout
            if opt.dout:
                out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout),
                              lambda: out)

            # rename tensor
            out = tf.identity(out, 'out')

            # add final output summary
            tf.sg_summary_activation(out)

            # save node info for reuse
            out._sugar = tf.sg_opt(func=func,
                                   arg=tf.sg_opt(kwargs) + sg_get_context(),
                                   prev=tensor,
                                   is_layer=True,
                                   name=opt.name)
            # inject reuse function
            out.sg_reuse = types.MethodType(sg_reuse, out)

        return out
示例#3
0
def tower_loss2_old(xx, scope, reuse_vars=False):

    # make embedding matrix for source and target
    with tf.variable_scope('embs', reuse=reuse_vars):
        emb_x = tf.sg_emb(name='emb_x',
                          voca_size=Hp.vs,
                          dim=Hp.hd,
                          dev=self._dev)
        emb_y = tf.sg_emb(name='emb_y',
                          voca_size=Hp.vs,
                          dim=Hp.hd,
                          dev=self._dev)

    x_sents = tf.unstack(xx, axis=1)  #each element is (batch, sentlen)

    # generate first an unconditioned sentence
    n_input = Hp.hd

    subrec1 = subrec_zero_state(Hp.bs, Hp.hd)
    subrec2 = subrec_zero_state(Hp.bs, Hp.hd)

    rnn_cell = LSTMCell(in_dim=n_input, dim=Hp.hd)
    (rnn_state, rnn_h) = rnn_cell.zero_state(Hp.bs)

    crnn_cell = ConvLSTMCell(in_dim=n_input, dim=Hp.hd)
    (crnn_state, crnn_h) = crnn_cell.zero_state(n_input)

    for sent in range(len(x_sents) - 1):
        y = x_sents[i + 1]
        x = x_sents[i]  #   (batch, sentlen) = (16, 200)
        # shift target by one step for training source
        y_src = tf.concat([tf.zeros((Hp.bs, 1), tf.sg_intx), y[:, :-1]], 1)

        # embed table lookup
        enc = x.sg_lookup(emb=emb_x)  #(batch, sentlen, dim1)
        # loop dilated conv block
        for i in range(num_blocks):
            enc = (enc.sg_res_block(
                size=5, rate=1, name="enc1_%d" % (i),
                reuse_vars=reuse_vars).sg_res_block(
                    size=5,
                    rate=2,
                    name="enc2_%d" % (i),
                    reuse_vars=reuse_vars).sg_res_block(
                        size=5,
                        rate=4,
                        name="enc4_%d" % (i),
                        reuse_vars=reuse_vars).sg_res_block(
                            size=5,
                            rate=8,
                            name="enc8_%d" % (i),
                            reuse_vars=reuse_vars).sg_res_block(
                                size=5,
                                rate=16,
                                name="enc16_%d" % (i),
                                reuse_vars=reuse_vars))

        #quasi rnn layer  [batch * 3, t, dim2 ]
        conv = enc.sg_quasi_conv1d(is_enc=True,
                                   size=2,
                                   name="conv1",
                                   reuse_vars=reuse_vars)
        #attention layer
        # recurrent layer # 1 + final encoder hidden state
        concat = subrec1.sg_concat(target=conv, dim=0)
        subrec1 = conv.sg_quasi_rnn(is_enc=True, att=True)

        conv = pool.sg_quasi_conv1d(is_enc=True,
                                    size=2,
                                    name="conv2",
                                    reuse_vars=reuse_vars)
        concat = subrec2.sg_concat(target=conv, dim=0)
        subrec2 = conv.sg_quasi_rnn(is_enc=True, att=True)

        # conv LSTM
        (crnn_state, crnn_h) = crnn_cell(subrec2, (crnn_state, crnn_h), 5)

        # recurrent block
        (rnn_state, rnn_h) = rnn_cell(crnn_h, (rnn_state, rnn_h))

        # CNN decoder
        dec = crnn_h.sg_concat(target=y_src.sg_lookup(emb=emb_y), name="dec")

        for i in range(num_blocks):
            dec = (dec.sg_res_block(
                size=3,
                rate=1,
                causal=True,
                name="dec1_%d" % (i),
                reuse_vars=reuse_vars).sg_res_block(
                    size=3,
                    rate=2,
                    causal=True,
                    name="dec2_%d" % (i),
                    reuse_vars=reuse_vars).sg_res_block(
                        size=3,
                        rate=4,
                        causal=True,
                        name="dec4_%d" % (i),
                        reuse_vars=reuse_vars).sg_res_block(
                            size=3,
                            rate=8,
                            causal=True,
                            name="dec8_%d" % (i),
                            reuse_vars=reuse_vars).sg_res_block(
                                size=3,
                                rate=16,
                                causal=True,
                                name="dec16_%d" % (i),
                                reuse_vars=reuse_vars))

        # final fully convolution layer for softmax
        dec = dec.sg_conv1d_gpus(size=1, dim=Hp.vs,name="out",summary=False,\
          dev = self._dev,reuse=reuse_vars)

        ce_array = dec.sg_ce(target=y, mask=True, name="cross_ent_example")
        cross_entropy_mean = tf.reduce_mean(ce_array, name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)

    # Assemble all of the losses for the current tower only.
    losses = tf.get_collection('losses', scope)
    # Calculate the total loss for the current tower.
    total_loss = tf.add_n(losses, name='total_loss')

    return total_loss