示例#1
0
def check_audio_decoder_correctness(fmt, dtype):
    batch_size = 16
    niterations = 10

    @pipeline_def(batch_size=batch_size, device_id=0, num_threads=4)
    def audio_decoder_pipe(fnames, dtype, downmix=False):
        encoded, _ = fn.readers.file(files=fnames)
        decoded, _ = fn.decoders.audio(encoded, dtype=dtype, downmix=downmix)
        return decoded

    audio_files = get_files(os.path.join('db', 'audio', fmt), fmt)
    npy_files = [os.path.splitext(fpath)[0] + '.npy' for fpath in audio_files]
    pipe = audio_decoder_pipe(audio_files, dtype)
    pipe.build()
    for it in range(niterations):
        data = pipe.run()
        for s in range(batch_size):
            sample_idx = (it * batch_size + s) % len(audio_files)
            ref = np.load(npy_files[sample_idx])
            if len(ref.shape) == 1:
                ref = np.expand_dims(ref, 1)
            arr = np.array(data[0][s])
            assert arr.shape == ref.shape
            if fmt == 'ogg':
                # For OGG Vorbis, we consider errors any value that is off by more than 1
                # TODO(janton): There is a bug in libsndfile that produces underflow/overflow.
                #               Remove this when the bug is fixed.
                # Tuple with two arrays, we just need the first dimension
                wrong_values = np.where(np.abs(arr - ref) > 1)[0]
                nerrors = len(wrong_values)
                assert nerrors <= 1
                # TODO(janton): Uncomment this when the bug is fixed
                # np.testing.assert_allclose(arr, ref, atol=1)
            else:
                np.testing.assert_equal(arr, ref)
示例#2
0
import nvidia.dali
import nvidia.dali.ops as ops
import nvidia.dali.fn as fn
from nvidia.dali.pipeline import pipeline_def
import nvidia.dali.types as types
from test_utils import get_files, to_array
import numpy as np
import librosa
import torch
import math
import random
import os
from nose.tools import nottest

audio_files = get_files('db/audio/wav', 'wav')
audio_files = [file for file in audio_files
               if '237-134500' in file]  # Filtering librispeech samples
npy_files = [os.path.splitext(fpath)[0] + '.npy' for fpath in audio_files]
npy_files_sr = 16000


# From DeepLearningExamples
def _convert_samples_to_float32(samples):
    """Convert sample type to float32.
    Audio sample type is usually integer or float-point.
    Integers will be scaled to [-1, 1] in float32.
    """
    float32_samples = samples.astype('float32')
    if samples.dtype in np.sctypes['int']:
        bits = np.iinfo(samples.dtype).bits
示例#3
0
from webdataset_base import generate_temp_index_file as generate_temp_wds_index
import re

import numpy as np
from nose_utils import assert_raises
import os
import glob
from math import ceil, sqrt
import tempfile
import sys
import json
from collections.abc import Iterable

data_root = get_dali_extra_path()
images_dir = os.path.join(data_root, 'db', 'single', 'jpeg')
audio_files = get_files(os.path.join('db', 'audio', 'wav'), 'wav')
caffe_dir = os.path.join(data_root, 'db', 'lmdb')
caffe2_dir = os.path.join(data_root, 'db', 'c2lmdb')
recordio_dir = os.path.join(data_root, 'db', 'recordio')
tfrecord_dir = os.path.join(data_root, 'db', 'tfrecord')
webdataset_dir = os.path.join(data_root, 'db', 'webdataset')
coco_dir = os.path.join(data_root, 'db', 'coco', 'images')
coco_annotation = os.path.join(data_root, 'db', 'coco', 'instances.json')
sequence_dir = os.path.join(data_root, 'db', 'sequence', 'frames')

batch_size = 2
test_data_shape = [10, 20, 3]


def get_data():
    out = [
示例#4
0
                                      setup_test_numpy_reader_cpu)
from test_detection_pipeline import coco_anchors
from test_utils import check_batch, get_dali_extra_path, get_files, module_functions
from segmentation_test_utils import make_batch_select_masks
from webdataset_base import generate_temp_index_file as generate_temp_wds_index

""" Tests of coverage of eager operators. For each operator results from standard pipeline and
eager version are compared across a couple of iterations.
If you have added a new operator you should add a test here for an eager version of it. Also make
sure you have correctly classified the operator in `dali/python/nvidia/dali/_utils/eager_utils.py`
as stateless, stateful or iterator.
"""

data_root = get_dali_extra_path()
images_dir = os.path.join(data_root, 'db', 'single', 'jpeg')
audio_files = get_files(os.path.join('db', 'audio', 'wav'), 'wav')
caffe_dir = os.path.join(data_root, 'db', 'lmdb')
caffe2_dir = os.path.join(data_root, 'db', 'c2lmdb')
recordio_dir = os.path.join(data_root, 'db', 'recordio')
webdataset_dir = os.path.join(data_root, 'db', 'webdataset')
coco_dir = os.path.join(data_root, 'db', 'coco', 'images')
coco_annotation = os.path.join(data_root, 'db', 'coco', 'instances.json')
sequence_dir = os.path.join(data_root, 'db', 'sequence', 'frames')
video_files = get_files(os.path.join('db', 'video', 'vfr'), 'mp4')

rng = np.random.default_rng()

batch_size = 2
data_size = 10
sample_shape = [20, 20, 3]
示例#5
0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import librosa
import numpy as np
import nvidia.dali.types as types
import test_utils
import os
import nvidia.dali.fn as fn
from nvidia.dali import pipeline_def

audio_files = test_utils.get_files(os.path.join('db', 'audio', 'wav'), 'wav')

def trim_ref(cutoff_db, ref, frame_length, hop_length, input_data):
    yt, index = librosa.effects.trim(y=input_data, top_db=-cutoff_db, ref=ref,
                                     frame_length=frame_length,
                                     hop_length=hop_length)
    # librosa's trim function calculates power with reference to center of window,
    # while DALI uses beginning of window. Hence the subtraction below
    begin = index[0] - frame_length // 2
    length = index[1] - index[0]
    if length != 0:
        length += frame_length - 1
    return np.array(begin), np.array(length)

@pipeline_def
def nonsilent_region_pipe(cutoff_value, window_size, reference_power, reset_interval):