Пример #1
0
def conv1d_layer(inputs,
                 kernel_size,
                 channels,
                 activation,
                 is_training,
                 scope=None):
    scope = scope or "conv1d"
    try:
        use_relu = activation in ["relu", "RELU", "ReLU"]
        activation = tf_utils.get_activation(activation)
    except Exception as e:
        print(e)
        activation = None
        use_relu = False

    kernel_initializer = tf_utils.get_initializer("xavier", is_relu=use_relu)

    with tf.variable_scope(scope):
        output = tf.layers.conv1d(inputs,
                                  filters=channels,
                                  kernel_size=kernel_size,
                                  activation=activation,
                                  kernel_initializer=kernel_initializer,
                                  padding='same')
        output = tf.layers.batch_normalization(output, training=is_training)
        tf.summary.histogram("outputs", output)
        return output
Пример #2
0
def apply_dense(inputs, params, is_training=False, dropout=0.0, scope=None):
    """
    full connection layer
    num_params: w: in_dim * num_units, b: num_units

    Args:
        inputs (): 输入, [batch, len, dim]
        params (): DICT, {"num_units", "activation"}
        dropout (): dropout
        scope (): tensorflow域名

    Returns: dense层输出, 需要初始化的变量

    """
    num_units = int(params["num_units"])

    try:
        activation = params["activation"].lower()
    except KeyError:
        activation = "linear"
    try:
        initializer = params["initializer"].lower()
    except KeyError:
        initializer = "xavier_normal"
    use_bn = "use_bn" in params and params["use_bn"]

    temp = set(tf.global_variables())
    scope = scope or "dense"
    with tf.variable_scope(scope):
        shape = inputs.get_shape()
        input_dim = shape[-1].value
        inputs_2d = tf.reshape(inputs, [-1, input_dim])
        inputs_2d = tf.nn.dropout(inputs_2d, keep_prob=1.0 - dropout)
        initializer = tf_utils.get_initializer(initializer,
                                               is_relu=(activation == "relu"))
        w = tf.get_variable("w",
                            dtype=tf.float32,
                            shape=[input_dim, num_units],
                            initializer=initializer)
        b = tf.get_variable("b",
                            dtype=tf.float32,
                            shape=[1, num_units],
                            initializer=tf.zeros_initializer())
        outputs = tf.matmul(inputs_2d, w) + b

        outputs = tf_utils.get_activation(activation)(outputs)

        outputs = tf.reshape(outputs, [tf.shape(inputs)[0], -1, num_units])

        if use_bn:
            outputs = tf.layers.batch_normalization(outputs,
                                                    training=is_training)

        tf.summary.histogram("weights", w)
        tf.summary.histogram("biases", b)
        tf.summary.histogram("outputs", outputs)
        # outputs = tf.nn.dropout(outputs, keep_prob=1.0-dropout)
    init_var_set = set(tf.global_variables()) - temp
    return outputs, init_var_set
Пример #3
0
    def check_cell_params(_name, _params):
        _cell_params = dict()
        try:
            _cell_params["num_units"] = int(_params["num_units"])
        except (KeyError, TypeError):
            Exception(
                "[ERROR] check_rnn_params: 'num_units' is needed, should be an integer"
            )

        try:
            activation = tf_utils.get_activation(_params["activation"])
            _cell_params["activation"] = activation
        except Exception as e:
            print(e)
            _cell_params["activation"] = tf.tanh

        use_relu = _params["activation"].lower() == "relu"
        try:
            initializer = tf_utils.get_initializer(_params["initializer"],
                                                   is_relu=use_relu)
        except Exception as e:
            #print(e)
            gain = np.sqrt(2) if use_relu else 1.0
            initializer = tf.orthogonal_initializer(gain=gain)

        if _name == "GRUCell":
            # GRUCell不需要kernel_initializer, 否则报错
            # _cell_params["kernel_initializer"] = initializer
            return _cell_params

        _cell_params["initializer"] = initializer

        try:
            if not isinstance(_params["use_peepholes"], bool):
                raise KeyError
        except KeyError:
            if _params["use_peepholes"].upper() in ['TRUE', 'T', 'YES', 'Y']:
                _cell_params["use_peepholes"] = True
            else:
                _cell_params["use_peepholes"] = False

        if "num_proj" in _params:
            _cell_params["num_proj"] = int(_params["num_proj"])
        if "forget_bias" in _params:
            _cell_params["forget_bias"] = float(_params["forget_bias"])
        return _cell_params
Пример #4
0
def highwaynet(inputs,
               num_units,
               activation="relu",
               gate_bias=-1.0,
               scope=None):
    """
        activation(x) * T + x * (1 - T)
    args:
        inputs: Tensor([B, T, D], tf.float32)
        num_units: INT, same dimension with inputs
        activation: STR, for transform layer
        gate_bias: FLOAT, bias for transform gate, default=-1.0
        scope:
    return:
    """
    scope = scope or "highwaynet"
    try:
        activation = tf_utils.get_activation(activation)
    except Exception as e:
        print(e)
        activation = None

    with tf.variable_scope(scope):
        H = tf.layers.dense(inputs,
                            units=num_units,
                            activation=activation,
                            name='H')
        T = tf.layers.dense(
            inputs,
            units=num_units,
            activation=tf.nn.sigmoid,
            name='T',
            bias_initializer=tf.constant_initializer(gate_bias))
        outputs = tf.add(H * T, inputs * (1.0 - T), "highway_output")
        tf.summary.histogram(scope + "/H", H)
        tf.summary.histogram(scope + "/T", T)
        tf.summary.histogram(scope + "/outputs", outputs)
    return outputs
Пример #5
0
def gated_conv1d(inputs,
                 kernel_size,
                 channels,
                 dilation_rate=1,
                 activation=None,
                 kernel_initializer=None,
                 use_bn=False,
                 use_wn=False,
                 is_training=True,
                 scope=None):
    """ gated conv1d:
                      |--> conv1d --> activation -->|
            inputs -->|                             * --> bn
                      |--> conv1d -->   sigmoid  -->|
    args:
        inputs: Tensor([B, T, D], tf.float32)
        kernel_size: INT, filter width
        channels: INT, output_channels
        dilation_rate: INT,
        activation: STR
        kernel_initializer: STR
        use_bn: whether to use batch normalization
        is_training: BOOL for batch normalization
            True --- train, False --- prediction
        scope:
    return:
    """
    scope = scope or "gated_conv1d"

    use_relu = activation in ["relu", "ReLU", "RELU"]

    try:
        activation = tf_utils.get_activation(activation)
    except Exception as e:
        print(e)
        activation = None

    # if kernel_initializer is None:
    #    kernel_initializer = xavier_initializer(use_relu)

    with tf.variable_scope(scope):
        conv_out = conv1d(inputs,
                          kernel_size,
                          channels,
                          dilation_rate=dilation_rate,
                          kernel_initializer=kernel_initializer,
                          is_relu=use_relu,
                          add_bias=(not use_bn),
                          use_wn=use_wn,
                          scope="filter")

        if activation is not None:
            conv_out = activation(conv_out)

        gated = conv1d(inputs,
                       kernel_size,
                       channels,
                       dilation_rate=dilation_rate,
                       kernel_initializer=kernel_initializer,
                       is_relu=False,
                       add_bias=True,
                       use_wn=use_wn,
                       scope="gate")
        gated_out = tf.nn.sigmoid(gated)
        tf.summary.histogram("%s_gated_out" % scope, gated_out)

        output = conv_out * gated_out

    if use_bn:
        output = tf.layers.batch_normalization(output, training=is_training)
    return output
Пример #6
0
def atrous_conv1d(inputs,
                  kernel_size,
                  channels,
                  dilation_rate=1,
                  activation=None,
                  kernel_initializer=None,
                  use_bn=False,
                  use_wn=False,
                  is_training=True,
                  scope=None):
    """ atrous cnn for text
    args:
        inputs: Tensor([B, T, D], tf.float32)
        kernel_size: INT, filter width
        channels: INT, output_channels
        dilation_rate: INT,
        activation: STR
        kernel_initializer: STR
        use_bn: whether to use batch normalization
        is_training: BOOL for batch normalization
            True --- train, False --- prediction
        scope:
    return:
    """
    scope = scope or "atrous_conv1d"

    use_relu = activation in ["relu", "ReLU", "RELU"]
    outputs = inputs

    try:
        activation = tf_utils.get_activation(activation)
    except Exception as e:
        print(e)
        activation = None

    # if kernel_initializer is None:
    #    kernel_initializer = xavier_initializer(use_relu)

    with tf.variable_scope(scope):
        outputs = conv1d(inputs,
                         kernel_size,
                         channels,
                         dilation_rate=dilation_rate,
                         kernel_initializer=kernel_initializer,
                         is_relu=use_relu,
                         add_bias=(not use_bn),
                         use_wn=use_wn)
        '''
        corr_0 = tf_utils.calc_cosine_coef(output[0, :20, :], output[0, :20, :])
        corr_0 = tf.abs(corr_0)
        corr_0 = tf.where(tf.greater(corr_0, 0.1), corr_0, tf.zeros_like(corr_0))
        corr_0 = tf.Print(corr_0, [corr_0])
        tf_utils.plot_2d_tensor(
            corr_0, "%s/outputs_0_self_corr" % scope)

        corr_1 = tf_utils.calc_cosine_coef(output[-1, :20, :], output[-1, :20, :])
        corr_1 = tf.abs(corr_1)
        corr_1 = tf.where(tf.greater(corr_1, 0.1), corr_1, tf.zeros_like(corr_1))
        corr_1 = tf.Print(corr_1, [corr_1])
        tf_utils.plot_2d_tensor(
            corr_1, "%s/outputs_1_self_corr" % scope)
        '''
        tf.summary.histogram("conv_out", outputs)

        if activation is not None:
            outputs = activation(outputs)

        if use_bn:
            outputs = tf.layers.batch_normalization(outputs,
                                                    training=is_training)
        tf.summary.histogram("bn_out", outputs)
        return outputs
Пример #7
0
    def densely_cnn_block(self,
                          inputs,
                          params,
                          dropout=0.,
                          is_training=False,
                          scope=None):
        """
            inputs --> conv --|--> concat --> activation --> bn --> (pooling) -->
                    |---------|
        args:
            inputs: Tensor([B, T, D], tf.float32)
            params: DICT, {
                    "kernel_size",
                    "channels",
                    "dilation_rate",
                    "initializer",
                    "activation",
                    "use_bn"
            }
            is_training:
            scope:
        return:
        """
        scope = scope or "densely_conv1d"
        kernel_size = int(params["kernel_size"])
        channels = int(params["channels"])
        try:
            dilation_rate = params["dilation_rate"]
        except KeyError:
            dilation_rate = 1

        try:
            _initializer = params["initializer"].lower()
        except KeyError:
            _initializer = "xavier_normal"

        try:
            activation = tf_utils.get_activation(params["activation"])
        except KeyError:
            activation = lambda x: x

        use_bn = "use_bn" in params and params["use_bn"]
        use_wn = "use_wn" in params and params["use_wn"]

        with tf.variable_scope(scope):
            outputs = inputs
            outputs = conv1d(outputs,
                             kernel_size,
                             channels,
                             dilation_rate=dilation_rate,
                             kernel_initializer=_initializer,
                             is_relu=True,
                             use_wn=use_wn,
                             add_bias=(not use_bn))
            outputs = tf.nn.dropout(outputs, keep_prob=1.0 - dropout)
            tf.summary.histogram("%s_conv" % scope, outputs)
            # outputs = activation(outputs)
            if use_bn:
                # outputs = tf_utils.group_norm(outputs, G=32)
                outputs = tf.layers.batch_normalization(outputs,
                                                        training=is_training)
            outputs = activation(outputs)
            # pooling
            '''
            outputs = tf.nn.pool(outputs,
                                 window_shape=[],
                                 pooling_type="AVG",
                                 padding="SAME")
            '''
            tf.summary.histogram("%s_bn" % scope, outputs)

            outputs = tf.concat((inputs, outputs), axis=-1)
            return outputs
Пример #8
0
    def prenet(self, layer_in, params, dropout=0., scope="prenet"):
        """ preproccessing networks, including
            transform feature, position_embedding
            x --> conv --> ReLU(bn) --> conv --|-> concat --> ReLU(bn) -->
                        |----------------------|
        args:
            layer_in: Tensor([B, T, D], tf.float32)
            params: DICT, {"channels", "activation", "initializer", "use_bn"}
            scope:
        return:
            Tensor([B, T, channels * num_layer], tf.float32)
        """
        channels = int(params["channels"])
        if "initializer" not in params:
            _initializer = "xavier_normal"
        else:
            _initializer = params["initializer"].lower()
        use_bn = "use_bn" in params and params["use_bn"]
        use_wn = "use_wn" in params and params["use_wn"]
        try:
            activation = tf_utils.get_activation(params["activation"].lower())
            use_relu = params["activation"].lower() == "relu"
        except KeyError:
            activation = lambda x: x
            use_relu = False

        with tf.variable_scope(scope):
            layer_in = tf.nn.dropout(layer_in, keep_prob=1.0 - dropout)
            conv1 = conv1d(layer_in,
                           kernel_size=1,
                           channels=channels * 2,
                           add_bias=(not use_bn),
                           is_relu=use_relu,
                           use_wn=use_wn,
                           kernel_initializer=_initializer,
                           scope="conv1")
            outputs = conv1
            #outputs = activation(outputs)

            if use_bn:
                outputs = tf.layers.batch_normalization(
                    outputs, training=self._is_training)
            outputs = activation(outputs)

            conv2 = conv1d(outputs,
                           kernel_size=1,
                           channels=channels,
                           add_bias=(not use_bn),
                           is_relu=use_relu,
                           use_wn=use_wn,
                           kernel_initializer=_initializer,
                           scope="conv2")
            '''
            conv2_out = tf.nn.relu(conv2)
            if use_bn:
                conv2_out = tf.layers.batch_normalization(conv2_out, training=self._is_training)
            conv3 = conv1d(conv2_out, kernel_size=1, channels=channels,
                           add_bias=False,
                           is_relu=True,
                           kernel_initializer=_initializer,
                           scope="conv3")
            '''
            #outputs = tf.concat((conv1, conv2), axis=-1)
            outputs = conv2
            # outputs = activation(outputs)
            if use_bn:
                outputs = tf.layers.batch_normalization(
                    outputs, training=self._is_training)
            outputs = activation(outputs)
            return outputs