def conv1d_layer(inputs, kernel_size, channels, activation, is_training, scope=None): scope = scope or "conv1d" try: use_relu = activation in ["relu", "RELU", "ReLU"] activation = tf_utils.get_activation(activation) except Exception as e: print(e) activation = None use_relu = False kernel_initializer = tf_utils.get_initializer("xavier", is_relu=use_relu) with tf.variable_scope(scope): output = tf.layers.conv1d(inputs, filters=channels, kernel_size=kernel_size, activation=activation, kernel_initializer=kernel_initializer, padding='same') output = tf.layers.batch_normalization(output, training=is_training) tf.summary.histogram("outputs", output) return output
def apply_dense(inputs, params, is_training=False, dropout=0.0, scope=None): """ full connection layer num_params: w: in_dim * num_units, b: num_units Args: inputs (): 输入, [batch, len, dim] params (): DICT, {"num_units", "activation"} dropout (): dropout scope (): tensorflow域名 Returns: dense层输出, 需要初始化的变量 """ num_units = int(params["num_units"]) try: activation = params["activation"].lower() except KeyError: activation = "linear" try: initializer = params["initializer"].lower() except KeyError: initializer = "xavier_normal" use_bn = "use_bn" in params and params["use_bn"] temp = set(tf.global_variables()) scope = scope or "dense" with tf.variable_scope(scope): shape = inputs.get_shape() input_dim = shape[-1].value inputs_2d = tf.reshape(inputs, [-1, input_dim]) inputs_2d = tf.nn.dropout(inputs_2d, keep_prob=1.0 - dropout) initializer = tf_utils.get_initializer(initializer, is_relu=(activation == "relu")) w = tf.get_variable("w", dtype=tf.float32, shape=[input_dim, num_units], initializer=initializer) b = tf.get_variable("b", dtype=tf.float32, shape=[1, num_units], initializer=tf.zeros_initializer()) outputs = tf.matmul(inputs_2d, w) + b outputs = tf_utils.get_activation(activation)(outputs) outputs = tf.reshape(outputs, [tf.shape(inputs)[0], -1, num_units]) if use_bn: outputs = tf.layers.batch_normalization(outputs, training=is_training) tf.summary.histogram("weights", w) tf.summary.histogram("biases", b) tf.summary.histogram("outputs", outputs) # outputs = tf.nn.dropout(outputs, keep_prob=1.0-dropout) init_var_set = set(tf.global_variables()) - temp return outputs, init_var_set
def check_cell_params(_name, _params): _cell_params = dict() try: _cell_params["num_units"] = int(_params["num_units"]) except (KeyError, TypeError): Exception( "[ERROR] check_rnn_params: 'num_units' is needed, should be an integer" ) try: activation = tf_utils.get_activation(_params["activation"]) _cell_params["activation"] = activation except Exception as e: print(e) _cell_params["activation"] = tf.tanh use_relu = _params["activation"].lower() == "relu" try: initializer = tf_utils.get_initializer(_params["initializer"], is_relu=use_relu) except Exception as e: #print(e) gain = np.sqrt(2) if use_relu else 1.0 initializer = tf.orthogonal_initializer(gain=gain) if _name == "GRUCell": # GRUCell不需要kernel_initializer, 否则报错 # _cell_params["kernel_initializer"] = initializer return _cell_params _cell_params["initializer"] = initializer try: if not isinstance(_params["use_peepholes"], bool): raise KeyError except KeyError: if _params["use_peepholes"].upper() in ['TRUE', 'T', 'YES', 'Y']: _cell_params["use_peepholes"] = True else: _cell_params["use_peepholes"] = False if "num_proj" in _params: _cell_params["num_proj"] = int(_params["num_proj"]) if "forget_bias" in _params: _cell_params["forget_bias"] = float(_params["forget_bias"]) return _cell_params
def highwaynet(inputs, num_units, activation="relu", gate_bias=-1.0, scope=None): """ activation(x) * T + x * (1 - T) args: inputs: Tensor([B, T, D], tf.float32) num_units: INT, same dimension with inputs activation: STR, for transform layer gate_bias: FLOAT, bias for transform gate, default=-1.0 scope: return: """ scope = scope or "highwaynet" try: activation = tf_utils.get_activation(activation) except Exception as e: print(e) activation = None with tf.variable_scope(scope): H = tf.layers.dense(inputs, units=num_units, activation=activation, name='H') T = tf.layers.dense( inputs, units=num_units, activation=tf.nn.sigmoid, name='T', bias_initializer=tf.constant_initializer(gate_bias)) outputs = tf.add(H * T, inputs * (1.0 - T), "highway_output") tf.summary.histogram(scope + "/H", H) tf.summary.histogram(scope + "/T", T) tf.summary.histogram(scope + "/outputs", outputs) return outputs
def gated_conv1d(inputs, kernel_size, channels, dilation_rate=1, activation=None, kernel_initializer=None, use_bn=False, use_wn=False, is_training=True, scope=None): """ gated conv1d: |--> conv1d --> activation -->| inputs -->| * --> bn |--> conv1d --> sigmoid -->| args: inputs: Tensor([B, T, D], tf.float32) kernel_size: INT, filter width channels: INT, output_channels dilation_rate: INT, activation: STR kernel_initializer: STR use_bn: whether to use batch normalization is_training: BOOL for batch normalization True --- train, False --- prediction scope: return: """ scope = scope or "gated_conv1d" use_relu = activation in ["relu", "ReLU", "RELU"] try: activation = tf_utils.get_activation(activation) except Exception as e: print(e) activation = None # if kernel_initializer is None: # kernel_initializer = xavier_initializer(use_relu) with tf.variable_scope(scope): conv_out = conv1d(inputs, kernel_size, channels, dilation_rate=dilation_rate, kernel_initializer=kernel_initializer, is_relu=use_relu, add_bias=(not use_bn), use_wn=use_wn, scope="filter") if activation is not None: conv_out = activation(conv_out) gated = conv1d(inputs, kernel_size, channels, dilation_rate=dilation_rate, kernel_initializer=kernel_initializer, is_relu=False, add_bias=True, use_wn=use_wn, scope="gate") gated_out = tf.nn.sigmoid(gated) tf.summary.histogram("%s_gated_out" % scope, gated_out) output = conv_out * gated_out if use_bn: output = tf.layers.batch_normalization(output, training=is_training) return output
def atrous_conv1d(inputs, kernel_size, channels, dilation_rate=1, activation=None, kernel_initializer=None, use_bn=False, use_wn=False, is_training=True, scope=None): """ atrous cnn for text args: inputs: Tensor([B, T, D], tf.float32) kernel_size: INT, filter width channels: INT, output_channels dilation_rate: INT, activation: STR kernel_initializer: STR use_bn: whether to use batch normalization is_training: BOOL for batch normalization True --- train, False --- prediction scope: return: """ scope = scope or "atrous_conv1d" use_relu = activation in ["relu", "ReLU", "RELU"] outputs = inputs try: activation = tf_utils.get_activation(activation) except Exception as e: print(e) activation = None # if kernel_initializer is None: # kernel_initializer = xavier_initializer(use_relu) with tf.variable_scope(scope): outputs = conv1d(inputs, kernel_size, channels, dilation_rate=dilation_rate, kernel_initializer=kernel_initializer, is_relu=use_relu, add_bias=(not use_bn), use_wn=use_wn) ''' corr_0 = tf_utils.calc_cosine_coef(output[0, :20, :], output[0, :20, :]) corr_0 = tf.abs(corr_0) corr_0 = tf.where(tf.greater(corr_0, 0.1), corr_0, tf.zeros_like(corr_0)) corr_0 = tf.Print(corr_0, [corr_0]) tf_utils.plot_2d_tensor( corr_0, "%s/outputs_0_self_corr" % scope) corr_1 = tf_utils.calc_cosine_coef(output[-1, :20, :], output[-1, :20, :]) corr_1 = tf.abs(corr_1) corr_1 = tf.where(tf.greater(corr_1, 0.1), corr_1, tf.zeros_like(corr_1)) corr_1 = tf.Print(corr_1, [corr_1]) tf_utils.plot_2d_tensor( corr_1, "%s/outputs_1_self_corr" % scope) ''' tf.summary.histogram("conv_out", outputs) if activation is not None: outputs = activation(outputs) if use_bn: outputs = tf.layers.batch_normalization(outputs, training=is_training) tf.summary.histogram("bn_out", outputs) return outputs
def densely_cnn_block(self, inputs, params, dropout=0., is_training=False, scope=None): """ inputs --> conv --|--> concat --> activation --> bn --> (pooling) --> |---------| args: inputs: Tensor([B, T, D], tf.float32) params: DICT, { "kernel_size", "channels", "dilation_rate", "initializer", "activation", "use_bn" } is_training: scope: return: """ scope = scope or "densely_conv1d" kernel_size = int(params["kernel_size"]) channels = int(params["channels"]) try: dilation_rate = params["dilation_rate"] except KeyError: dilation_rate = 1 try: _initializer = params["initializer"].lower() except KeyError: _initializer = "xavier_normal" try: activation = tf_utils.get_activation(params["activation"]) except KeyError: activation = lambda x: x use_bn = "use_bn" in params and params["use_bn"] use_wn = "use_wn" in params and params["use_wn"] with tf.variable_scope(scope): outputs = inputs outputs = conv1d(outputs, kernel_size, channels, dilation_rate=dilation_rate, kernel_initializer=_initializer, is_relu=True, use_wn=use_wn, add_bias=(not use_bn)) outputs = tf.nn.dropout(outputs, keep_prob=1.0 - dropout) tf.summary.histogram("%s_conv" % scope, outputs) # outputs = activation(outputs) if use_bn: # outputs = tf_utils.group_norm(outputs, G=32) outputs = tf.layers.batch_normalization(outputs, training=is_training) outputs = activation(outputs) # pooling ''' outputs = tf.nn.pool(outputs, window_shape=[], pooling_type="AVG", padding="SAME") ''' tf.summary.histogram("%s_bn" % scope, outputs) outputs = tf.concat((inputs, outputs), axis=-1) return outputs
def prenet(self, layer_in, params, dropout=0., scope="prenet"): """ preproccessing networks, including transform feature, position_embedding x --> conv --> ReLU(bn) --> conv --|-> concat --> ReLU(bn) --> |----------------------| args: layer_in: Tensor([B, T, D], tf.float32) params: DICT, {"channels", "activation", "initializer", "use_bn"} scope: return: Tensor([B, T, channels * num_layer], tf.float32) """ channels = int(params["channels"]) if "initializer" not in params: _initializer = "xavier_normal" else: _initializer = params["initializer"].lower() use_bn = "use_bn" in params and params["use_bn"] use_wn = "use_wn" in params and params["use_wn"] try: activation = tf_utils.get_activation(params["activation"].lower()) use_relu = params["activation"].lower() == "relu" except KeyError: activation = lambda x: x use_relu = False with tf.variable_scope(scope): layer_in = tf.nn.dropout(layer_in, keep_prob=1.0 - dropout) conv1 = conv1d(layer_in, kernel_size=1, channels=channels * 2, add_bias=(not use_bn), is_relu=use_relu, use_wn=use_wn, kernel_initializer=_initializer, scope="conv1") outputs = conv1 #outputs = activation(outputs) if use_bn: outputs = tf.layers.batch_normalization( outputs, training=self._is_training) outputs = activation(outputs) conv2 = conv1d(outputs, kernel_size=1, channels=channels, add_bias=(not use_bn), is_relu=use_relu, use_wn=use_wn, kernel_initializer=_initializer, scope="conv2") ''' conv2_out = tf.nn.relu(conv2) if use_bn: conv2_out = tf.layers.batch_normalization(conv2_out, training=self._is_training) conv3 = conv1d(conv2_out, kernel_size=1, channels=channels, add_bias=False, is_relu=True, kernel_initializer=_initializer, scope="conv3") ''' #outputs = tf.concat((conv1, conv2), axis=-1) outputs = conv2 # outputs = activation(outputs) if use_bn: outputs = tf.layers.batch_normalization( outputs, training=self._is_training) outputs = activation(outputs) return outputs