def conv_3d(inputs, filter_size, num_filters, layer_name, stride=1, is_train=True,
            add_batch_norm=False, add_reg=False, activation=tf.identity):
    """
    Create a 3D convolution layer
    :param inputs: input array
    :param filter_size: size of the filter
    :param num_filters: number of filters (or output feature maps)
    :param layer_name: layer name
    :param stride: convolution filter stride
    :param add_batch_norm: boolean to use batch norm (or not)
    :param is_train: boolean to differentiate train and test (useful when applying batch normalization)
    :param add_reg: boolean to add norm-2 regularization (or not)
    :param activation: type of activation to be applied
    :return: The output array
    """
    num_in_channel = get_num_channels(inputs)
    with tf.variable_scope(layer_name):
        shape = [filter_size, filter_size, filter_size, num_in_channel, num_filters]
        weights = weight_variable(layer_name, shape=shape)
        tf.summary.histogram('W', weights)
        layer = tf.nn.conv3d(input=inputs,
                             filter=weights,
                             strides=[1, stride, stride, stride, 1],
                             padding="SAME")
        # print('{}: {}'.format(layer_name, layer.get_shape()))
        if add_batch_norm:
            layer = batch_norm(layer, is_train)
        else:
            biases = bias_variable(layer_name, [num_filters])
            layer += biases
        layer = activation(layer)
        if add_reg:
            tf.add_to_collection('weights', weights)
    return layer
def BN_Relu_conv_3d(inputs, filter_size, num_filters, layer_name, stride=1, is_train=True,
                    add_batch_norm=True, use_relu=True, add_reg=False):
    """
    Create a BN, ReLU, and 3D convolution layer
    :param inputs: input array
    :param filter_size: size of the filter
    :param num_filters: number of filters (or output feature maps)
    :param layer_name: layer name
    :param stride: convolution filter stride
    :param add_batch_norm: boolean to use batch norm (or not)
    :param is_train: boolean to differentiate train and test (useful when applying batch normalization)
    :param add_reg: boolean to add norm-2 regularization (or not)
    :param use_relu:
    :return: The output array
    """
    num_in_channel = get_num_channels(inputs)
    with tf.variable_scope(layer_name):
        if add_batch_norm:
            inputs = batch_norm(inputs, is_train)
        if use_relu:
            inputs = tf.nn.relu(inputs)
        shape = [filter_size, filter_size, filter_size, num_in_channel, num_filters]
        weights = weight_variable(layer_name, shape=shape)
        layer = tf.nn.conv3d(input=inputs,
                             filter=weights,
                             strides=[1, stride, stride, stride, 1],
                             padding="SAME")
        if add_reg:
            tf.add_to_collection('weights', weights)
    return layer
示例#3
0
def main_denoising(wav_files, output_dir, verbose=False, **kwargs):
    """Perform speech enhancement for WAV files in ``wav_dir``.

    Parameters
    ----------
    wav_files : list of str
        Paths to WAV files to enhance.

    output_dir : str
        Path to output directory for enhanced WAV files.

    verbose : bool, optional
        If True, print full stacktrace to STDERR for files with errors.

    kwargs
        Keyword arguments to pass to ``denoise_wav``.
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Load global MVN statistics.
    global_mean_var = sio.loadmat(GLOBAL_MEAN_VAR_MATF)
    global_mean = global_mean_var['global_mean']
    global_var = global_mean_var['global_var']

    # Perform speech enhancement.
    for src_wav_file in wav_files:
        # Perform basic checks of input WAV.
        if not os.path.exists(src_wav_file):
            utils.error('File "%s" does not exist. Skipping.' % src_wav_file)
            continue
        if not utils.is_wav(src_wav_file):
            utils.error('File "%s" is not WAV. Skipping.' % src_wav_file)
            continue
        if utils.get_sr(src_wav_file) != SR:
            utils.error('Sample rate of file "%s" is not %d Hz. Skipping.' %
                        (src_wav_file, SR))
            continue
        if utils.get_num_channels(src_wav_file) != NUM_CHANNELS:
            utils.error('File "%s" is not monochannel. Skipping.' %
                        src_wav_file)
            continue
        if utils.get_bitdepth(src_wav_file) != BITDEPTH:
            utils.error('Bitdepth of file "%s" is not %d. Skipping.' %
                        (src_wav_file, BITDEPTH))
            continue

        # Denoise.
        try:
            bn = os.path.basename(src_wav_file)
            dest_wav_file = os.path.join(output_dir, bn)
            denoise_wav(src_wav_file, dest_wav_file, global_mean, global_var,
                        **kwargs)
            print('Finished processing file "%s".' % src_wav_file)
        except Exception as e:
            msg = 'Problem encountered while processing file "%s". Skipping.' % src_wav_file
            if verbose:
                msg = '%s Full error output:\n%s' % (msg, e)
            utils.error(msg)
            continue
示例#4
0
 def down_conv(self, x):
     num_out_channels = get_num_channels(x) * 2
     x = conv_3d(inputs=x,
                 filter_size=2,
                 num_filters=num_out_channels,
                 layer_name='conv_down',
                 stride=2,
                 batch_norm=self.conf.use_BN,
                 is_train=self.is_training,
                 activation=self.act_fcn)
     return x
示例#5
0
 def up_conv(self, x, out_shape):
     num_out_channels = get_num_channels(x) // 2
     x = deconv_3d(inputs=x,
                   filter_size=2,
                   num_filters=num_out_channels,
                   layer_name='conv_up',
                   stride=2,
                   batch_norm=self.conf.use_BN,
                   is_train=self.is_training,
                   out_shape=out_shape,
                   activation=self.act_fcn)
     return x
示例#6
0
    def __init__(self, input_shape, num_actions, env, key=None, folder=None):
        # TODO sistemare signature di costruttore e init
        super(ConvPolicyNet, self).__init__(input_shape, num_actions, env, key,
                                            folder)
        self.conv = nn.Conv2d(in_channels=get_num_channels(),
                              out_channels=128,
                              kernel_size=2)
        o = conv_output_size(input_shape[1], 2, 0, 1)
        self.fc = nn.Linear(128 * o * o, num_actions)
        self.input_shape = input_shape

        self.optimizer = optim.Adam(self.parameters(), lr=10**-5)
 def down_conv(self, x):
     num_out_channels = get_num_channels(x)
     x = BN_Relu_conv_2d(inputs=x,
                         filter_size=1,
                         num_filters=num_out_channels,
                         layer_name='conv_down',
                         stride=1,
                         batch_norm=self.conf.use_BN,
                         add_reg=self.conf.use_reg,
                         is_train=self.is_training,
                         use_relu=True)
     x = tf.nn.dropout(x, self.keep_prob)
     x = max_pool(x, self.conf.pool_filter_size, name='maxpool')
     return x
示例#8
0
 def conv_block_up(self, layer_input, fine_grained_features, num_convolutions):
     x = tf.concat((layer_input, fine_grained_features), axis=-1)
     n_channels = get_num_channels(layer_input)
     for i in range(num_convolutions):
         x = conv_3d(inputs=x,
                     filter_size=self.k_size,
                     num_filters=n_channels,
                     layer_name='conv_' + str(i + 1),
                     batch_norm=self.conf.use_BN,
                     is_train=self.is_training)
         if i == num_convolutions - 1:
             x = x + layer_input
         x = self.act_fcn(x, name='prelu_' + str(i + 1))
         x = tf.nn.dropout(x, self.keep_prob)
     return x
示例#9
0
    def __init__(self, input_shape, lr=1e-3, folder=None):
        super(ConvRewardNet, self).__init__(input_shape, lr, folder)
        self.input_shape = input_shape

        # simple net with: 2D convolutional layer -> activation layer -> fully connected layer
        self.conv = nn.Conv2d(in_channels=get_num_channels(),
                              out_channels=64,
                              kernel_size=2)
        o = conv_output_size(input_shape[1], 2, 0, 1)
        self.fc = nn.Linear(64 * o * o + 1, 1)
        self.step_weight = nn.Linear(1, 1)

        # regularization
        self.optimizer = optim.Adam(self.parameters(), lr=lr)
        self.lambda_abs_rewards = 10**-4  # penalty for rewards regularization
示例#10
0
 def conv_block_down(self, layer_input, num_convolutions):
     x = layer_input
     n_channels = get_num_channels(x)
     if n_channels == 1:
         n_channels = self.conf.start_channel_num
     for i in range(num_convolutions):
         x = conv_3d(inputs=x,
                     filter_size=self.k_size,
                     num_filters=n_channels,
                     layer_name='conv_' + str(i + 1),
                     batch_norm=self.conf.use_BN,
                     is_train=self.is_training)
         if i == num_convolutions - 1:
             x = x + layer_input
         x = self.act_fcn(x, name='prelu_' + str(i + 1))
         x = tf.nn.dropout(x, self.keep_prob)
     return x