示例#1
0
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # AddGaussianNoise
    augmenter = Compose(
        [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "AddGaussianNoise_{:03d}.wav".format(i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # TimeStretch
    augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(output_dir,
                                        "TimeStretch_{:03d}.wav".format(i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # PitchShift
    augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(output_dir,
                                        "PitchShift_{:03d}.wav".format(i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
示例#2
0
            "num_runs": 5
        },
        {
            "instance":
            Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0),
            "num_runs":
            5,
            "name":
            "ShiftWithoutRollover",
        },
        {
            "instance": TimeMask(p=1.0),
            "num_runs": 5
        },
        {
            "instance": TimeStretch(min_rate=0.8, max_rate=1.25, p=1.0),
            "num_runs": 5
        },
        {
            "instance": Trim(p=1.0),
            "num_runs": 1
        },
    ]

    for sound_file_path in sound_file_paths:
        samples, sample_rate = load_sound_file(sound_file_path,
                                               sample_rate=None,
                                               mono=False)
        if len(samples.shape) == 2 and samples.shape[0] > samples.shape[1]:
            samples = samples.transpose()
示例#3
0
from python_speech_features import mfcc
import scipy.io.wavfile as wav
import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
import os
import random
from tqdm import tqdm

from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift

augmenter = Compose([
    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
    TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
    PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
    Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5),
])


def load_noise(path='/home/CAIL/Speaker_R/data/voice/background_noise/'):
    noise = []
    files = os.listdir(path)
    for f in files:
        filename = f
        if ('wav' not in filename):
            continue
        f = os.path.join(path, f)
        (rate, sig) = wav.read(f)
        noise.append(sig)
    return noise
def applyTransformations(fileName, output_dir, auxiliarSoundsDir):
    name = fileName.split(".")[0].split("/")[-1]
    samples = load_wav_file(fileName)

    # AddImpulseResponse
    augmenter = Compose([
        AddImpulseResponse(p=1.0,
                           ir_path=os.path.join(auxiliarSoundsDir,
                                                "helperSounds/ir"))
    ])
    output_file_path = os.path.join(
        output_dir, "{}_AddImpulseResponse_{:03d}.wav".format(name, 0))

    augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)

    wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)
    # FrequencyMask
    augmenter = Compose([FrequencyMask(p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_FrequencyMask_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # TimeMask
    augmenter = Compose([TimeMask(p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_TimeMask_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # AddGaussianSNR
    augmenter = Compose([AddGaussianSNR(p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_AddGaussianSNR_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # AddGaussianNoise
    augmenter = Compose(
        [AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_AddGaussianNoise_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # TimeStretch
    augmenter = Compose([TimeStretch(min_rate=0.8, max_rate=1.25, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_TimeStretch_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # PitchShift
    augmenter = Compose([PitchShift(min_semitones=-4, max_semitones=4, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_itchShift_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # Shift
    augmenter = Compose([Shift(min_fraction=-0.5, max_fraction=0.5, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(output_dir,
                                        "{}_Shift_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # Shift without rollover
    augmenter = Compose(
        [Shift(min_fraction=-0.5, max_fraction=0.5, rollover=False, p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_ShiftWithoutRollover_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # Normalize
    augmenter = Compose([Normalize(p=1.0)])
    output_file_path = os.path.join(output_dir,
                                    "{}_Normalize_{:03d}.wav".format(name, 0))
    augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
    wavfile.write(output_file_path, rate=SAMPLE_RATE, data=augmented_samples)

    # ClippingDistortion
    augmenter = Compose([ClippingDistortion(p=1.0)])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_ClippingDistortion_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # AddBackgroundNoise
    augmenter = Compose([
        AddBackgroundNoise(sounds_path=os.path.join(
            auxiliarSoundsDir, "helperSounds/background_noises"),
                           p=1.0)
    ])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_AddBackgroundNoise_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)

    # AddShortNoises
    augmenter = Compose([
        AddShortNoises(
            sounds_path=os.path.join(auxiliarSoundsDir,
                                     "helperSounds/short_noises"),
            min_snr_in_db=0,
            max_snr_in_db=8,
            min_time_between_sounds=2.0,
            max_time_between_sounds=4.0,
            burst_probability=0.4,
            min_pause_factor_during_burst=0.01,
            max_pause_factor_during_burst=0.95,
            min_fade_in_time=0.005,
            max_fade_in_time=0.08,
            min_fade_out_time=0.01,
            max_fade_out_time=0.1,
            p=1.0,
        )
    ])
    for i in range(5):
        output_file_path = os.path.join(
            output_dir, "{}_AddShortNoises_{:03d}.wav".format(name, i))
        augmented_samples = augmenter(samples=samples, sample_rate=SAMPLE_RATE)
        wavfile.write(output_file_path,
                      rate=SAMPLE_RATE,
                      data=augmented_samples)
    def __getitem__(self, idx):

        augment = Compose([
            TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
            Shift(min_fraction=-0.5, max_fraction=0.5, p=0.5, rollover=False)
        ])

        temp1 = random.randint(0, 1)
        temp2 = random.randint(0, 1)
        temp3 = random.randint(0, 1)

        self.anchor = str(self.data.iloc[idx, 0])
        self.positive = self.data.iloc[idx, 1]
        self.negative = self.data.iloc[idx, 2]

        self.signalAnchor, self.srAnchor = torchaudio.load(self.anchor)
        self.signalPositive, self.srPositive = torchaudio.load(self.positive)
        self.signalNegative, self.srNegative = torchaudio.load(self.negative)

        if (temp1 == 1):
            self.signalAnchor = torch.from_numpy(
                augment(samples=self.signalAnchor.numpy(),
                        sample_rate=self.srAnchor))

        if (temp2 == 1):
            self.signalPositive = torch.from_numpy(
                augment(samples=self.signalPositive.numpy(),
                        sample_rate=self.srPositive))

        if (temp3 == 1):
            self.signalNegative = torch.from_numpy(
                augment(samples=self.signalNegative.numpy(),
                        sample_rate=self.srNegative))

        self.spectogramAnchor = torchaudio.transforms.Spectrogram(
            n_fft=320, hop_length=160, win_length=320)(self.signalAnchor)
        self.logSpectogramAnchor = torchaudio.transforms.AmplitudeToDB()(
            self.spectogramAnchor)

        self.spectogramPositive = torchaudio.transforms.Spectrogram(
            n_fft=320, hop_length=160, win_length=320)(self.signalPositive)
        self.logSpectogramPositive = torchaudio.transforms.AmplitudeToDB()(
            self.spectogramPositive)

        self.spectogramNegative = torchaudio.transforms.Spectrogram(
            n_fft=320, hop_length=160, win_length=320)(self.signalNegative)
        self.logSpectogramNegative = torchaudio.transforms.AmplitudeToDB()(
            self.spectogramNegative)

        #self.tempImgAnchor=torchvision.transforms.ToPILImage()(self.logSpectogramAnchor)
        #self.tempImgAnchor=self.tempImgAnchor.convert("RGB")
        #self.spectogramAnchorImageTensor=self.vision_transform(self.tempImgAnchor)

        #self.tempImgPositive=torchvision.transforms.ToPILImage()(self.logSpectogramPositive)
        #self.tempImgPositive=self.tempImgPositive.convert("RGB")
        #self.spectogramPositiveImageTensor=self.vision_transform(self.tempImgPositive)

        #self.tempImgNegative=torchvision.transforms.ToPILImage()(self.logSpectogramNegative)
        #self.tempImgNegative=self.tempImgNegative.convert("RGB")
        #self.spectogramNegativeImageTensor=self.vision_transform(self.tempImgNegative)

        return self.logSpectogramAnchor, self.logSpectogramPositive, self.logSpectogramNegative