Exemplo n.º 1
0
def read_mp3(filepath, plot=False, nperseg=5000, noverlap=0):
    import audio2numpy as _a2n

    filepath = 'C:\\Users\\jwbrooks\\Downloads\\2021_03_25_20_56_34.mp3'
    audio, f_s = _a2n.open_audio(filepath)

    t = _np.arange(len(audio)) / f_s

    if len(audio.shape) == 1:
        audio = _xr.DataArray(audio, dims='t', coords=[t])
    else:
        audio = _xr.DataArray(audio[:, 0], dims='t', coords=[t])

    if plot == True:
        fig, ax = _plt.subplots()
        audio.plot(ax=ax)
        stft_results = stft(audio,
                            numberSamplesPerSegment=nperseg,
                            numberSamplesToOverlap=noverlap,
                            plot=False,
                            logScale=True)
        fig, ax = _plt.subplots()
        _np.abs(stft_results).plot(ax=ax)

    return audio
Exemplo n.º 2
0
    def read(self, path: str, depth = LOG_NO_DEPTH) -> None:
        debug_prefix = "[AudioFile.read]"
        ndepth = depth + LOG_NEXT_DEPTH

        logging.info(f"{depth}{debug_prefix} Reading stereo audio in path [{path}], trying soundfile")
        try:
            self.stereo_data, self.sample_rate = soundfile.read(path)
        except RuntimeError:
            logging.warn(f"{depth}{debug_prefix} Couldn't read file with soundfile, trying audio2numpy..")
            self.stereo_data, self.sample_rate = audio2numpy.open_audio(path)

        # We need to transpose to a (2, -1) array
        logging.info(f"{depth}{debug_prefix} Transposing audio data")
        self.stereo_data = self.stereo_data.T

        # Calculate the duration and see how much channels this audio file have
        self.duration = self.stereo_data.shape[1] / self.sample_rate
        self.channels = self.stereo_data.shape[0]
        
        # Log few info on the audio file
        logging.info(f"{depth}{debug_prefix} Duration of the audio file = [{self.duration:.2f}s]")
        logging.info(f"{depth}{debug_prefix} Audio sample rate is         [{self.sample_rate}]")
        logging.info(f"{depth}{debug_prefix} Audio data shape is          [{self.stereo_data.shape}]")
        logging.info(f"{depth}{debug_prefix} Audio have                   [{self.channels}]")

        # Get the mono data of the audio
        logging.info(f"{depth}{debug_prefix} Calculating mono audio")
        self.mono_data = (self.stereo_data[0] + self.stereo_data[1]) / 2

        # Just make sure the mono data is right..
        logging.info(f"{depth}{debug_prefix} Mono data shape:             [{self.mono_data.shape}]")
Exemplo n.º 3
0
	def run(self):
		while self._keep_listening:
			media = self.media_queue.get()

			if media == 'SHUTDOWN':
				self._keep_listening = False
				break

			if len(media) < 2:
				continue

			if media[0] == 'audio':
				kwargs = {}
				if len(media) >= 3:
					kwargs = media[2]
				device = kwargs.get('device', None)
				volume = kwargs.get('volume', 100)

				try:
					data, fs = audio2numpy.open_audio(media[1])
				except AudioFormatError:
					self.buffer_queue.put(('ERR', 'Invalid File Format:'))
					self.buffer_queue.put(('ERR', media[1]))
					self.buffer_queue.put(('ERR', 'Accepted file formats: .wav .mp3'))
					continue

				if volume != 100 and type(volume) == int:
					factor = volume / 100
					multiplier = pow(2, (sqrt(sqrt(sqrt(factor))) * 192 - 192)/6)
					numpy.multiply(data, multiplier, out=data, casting='unsafe')

				sounddevice.play(data, fs, device=device)
				sounddevice.wait()
Exemplo n.º 4
0
def get_audio_list(path_dataset, bucket_name):
    audio_list = []
    local_files_paths = download_audio_files(path_dataset, bucket_name)
    for fp in local_files_paths:
        signal, sampling_rate = open_audio(fp)
        audio_list.append((signal, sampling_rate))
    return audio_list
Exemplo n.º 5
0
    def ibm_recog(self, audioname, audiofp):
        authenticator = IAMAuthenticator(
            '6noBhxJHkbRVsgbxsl47v6dFZnJdoRRrDRYte7GgKKxu')
        speech_to_text = SpeechToTextV1(authenticator=authenticator)
        speech_to_text.set_service_url(
            'https://api.us-south.speech-to-text.watson.cloud.ibm.com/instances/51085e72-7959-4c18-94cd-d4d874baf61d'
        )
        myRecognizeCallback = MyRecognizeCallback()
        ts = []
        c = []

        with open(join(dirname(audioname), audiofp), 'rb') as audio_file:

            audio_source = AudioSource(audio_file)

            x = speech_to_text.recognize_using_websocket(
                audio=audio_source,
                content_type='audio/mp3',
                inactivity_timeout=-1,
                recognize_callback=myRecognizeCallback,
                model='en-US_BroadbandModel',
                timestamps=True,
                smart_formatting=True,
            )

        for r in result:
            alternatives = r.get('alternatives')
            ts.append(alternatives[0].get('timestamps'))
            timestamps = [elem for twod in ts for elem in twod]
            c.append(alternatives[0].get('confidence'))
            confidence = sum(c) / len(c)
        a, sr = open_audio(audiofp)
        self.initAudio(a, sr)
        self.setupIBM(timestamps, confidence)
        self.audiofp = audiofp
Exemplo n.º 6
0
def get_talking(time):
    '''Returns a given number of seconds of talking audio. Audio is 0-centered and has max amplitude of 0.5'''
    fp = __file__ + '/../test_talking.wav'
    song_signal, sampling_rate = open_audio(fp)
    trunc_signal = song_signal[:int(sampling_rate * time)]

    return (scale_to_one(trunc_signal))
Exemplo n.º 7
0
def playSoundData(filename: str = "hello.mp3"):
    t = threading.Thread(target=playsound, args=(filename, ))
    t.start()
    #####
    data, samplerate = open_audio(filename)
    sd.play(data, samplerate)
    status = sd.wait()
    #####
    t.join()
Exemplo n.º 8
0
def get_song(time):

    fp = __file__ + '/../test_song_3.wav'

    song_signal, sampling_rate = open_audio(fp)

    trunc_signal = song_signal[1950000:1950000 + int(sampling_rate * time)]

    return flatten(trunc_signal)
Exemplo n.º 9
0
def read_file(filename, directory):
    filepath = os.path.join(directory, filename)
    audio, sampling_rate = open_audio(filepath)
    if len(audio.shape) > 1:
        audio = audio[:, 1:] + audio[:, :1]
        audio = [item for sublist in audio for item in sublist]
    # sd.play(audio,sampling_rate)
    # time.sleep(10)
    return audio, sampling_rate
Exemplo n.º 10
0
    def shortenPause(self, trans, pauseOverlap, RenderSettings):
        shorty = AudioSegment.empty()
        i = 0
        iend = len(pauseOverlap)
        mspause = RenderSettings.pauseShortenAmount * 1000

        fp1 = trans[0].audiofp
        fp2 = trans[1].audiofp
        s1 = AudioSegment.from_file(fp1, format='mp3')
        s2 = AudioSegment.from_file(fp2, format='mp3')
        tot = s1.overlay(s2)

        for i in range(iend - 1):
            msstart = 1000 * pauseOverlap[i + 1][0]
            msend = 1000 * pauseOverlap[i + 1][1]
            pmsstart = 1000 * pauseOverlap[i][0]
            pmsend = 1000 * pauseOverlap[i][1]

            if (i == 0 and pmsstart != 0):
                shorty += tot[:pmsstart]
                #print('Added Audio:0','-',pmsstart)

            if (pmsend - pmsstart > mspause):
                shorty += tot[pmsstart:pmsstart + mspause]
                #print('Shortened Pause:',pmsstart,'-',pmsstart+mspause)

            elif (pmsend - pmsstart < mspause):
                shorty += tot[pmsstart:pmsend]
                #print('Kept Pause:',pmsstart,'-',pmsend)

            shorty += tot[pmsend:msstart]
            #print('Added Audio:',pmsend,'-',msstart)

        shorty += tot[msend:]
        #print('Added Audio:',msend,'-','end')

        shorty.export('RawAudio/PauseShort.mp3', format='mp3')
        a, sr = open_audio('RawAudio/PauseShort.mp3')
        self.initAudio(a, sr)
Exemplo n.º 11
0
    def profanityFilter(self, trans, Renderettings):
        cens = 'RawAudio/timcensor.mp3'
        emp = AudioSegment.empty()
        fp1 = trans[0].audiofp
        fp2 = trans[1].audiofp
        s1 = AudioSegment.from_file(fp1, format='mp3')
        s2 = AudioSegment.from_file(fp2, format='mp3')
        prof = s1.overlay(s2)
        bleep = AudioSegment.from_file(cens, format='mp3')
        badlist = []

        self.MainFromOthers(trans)
        lng = len(self.words)
        for i in range(lng):
            if (self.words[i] == '****'):
                badlist.append(self.timestamps[i])

        fstart = (badlist[0][0] * 1000)
        fl = (badlist[0][1] - badlist[0][0]) * 1000
        emp += prof[:fstart]
        emp += bleep[:fl]

        blng = len(badlist)
        for i in range(blng - 1):
            start = (badlist[i][1] * 1000)
            end = (badlist[i + 1][0] * 1000)
            ended = (badlist[i + 1][1] * 1000)

            emp += prof[start:end]
            l = (badlist[i + 1][1] - badlist[i + 1][0]) * 1000
            if (l > 500): l = 500
            emp += bleep[:l]

        emp += prof[ended:]
        emp.export('RawAudio/cleantest.mp3', format='mp3')
        a, sr = open_audio('RawAudio/cleantest.mp3')
        self.initAudio(a, sr)
Exemplo n.º 12
0
    def __getitem__(self, idx): 
        hop_length = 1024
        # open audio
        file_path = self.data[idx]
        signal, sampling_rate = open_audio(file_path)
        if len(signal.shape) > 1: 
            signal = np.mean(signal, axis = 1)
        if sampling_rate != 44100:
            signal = librosa.resample(signal, sampling_rate, 44100)
            sampling_rate = 44100
            
            
        # get 30 second chunk
        len_index_30_sec = int(30 / (1 / sampling_rate))
        # trim first and last 30 seconds 
        signal = signal[len_index_30_sec:-len_index_30_sec]
        # random start index
        start_index = np.random.randint(low = 0, high = len(signal) - len_index_30_sec)
        signal = signal[start_index:start_index + len_index_30_sec]
        # if training change pitch randomly
        if self.train:
            n_steps = np.random.randint(low = -4, high=4) 
            signal = librosa.effects.pitch_shift(signal, sampling_rate, n_steps=n_steps)
        # extract harmonic 
        data_h = librosa.effects.harmonic(signal)
        # cqt transform
        S = np.real(librosa.cqt(data_h, sr=sampling_rate, hop_length=hop_length)).astype(np.float32)

        
        d = torch.from_numpy(np.expand_dims(S, axis = 0)).type(torch.FloatTensor) 
        # normalize 
        d = F.normalize(d)
        l = torch.from_numpy(np.array(self.labels[idx])).type(torch.LongTensor)
#         print(d.shape, sampling_rate, file_path)

        return d,l
Exemplo n.º 13
0
def resample_and_save_datasets(path_dataset, bucket_name, files_format,
                               dimension_start, folder_start, song_start,
                               fragment_start):
    dimensiones_progresivas = [(4, 750), (8, 1500), (16, 3000), (32, 6000),
                               (64, 12000), (128, 24000), (256, 48000)]
    dimensiones_progresivas = dimensiones_progresivas[
        dimension_start:len(dimensiones_progresivas)]
    for dimension in dimensiones_progresivas:
        for folder in range(folder_start, 9):
            cant_fragmentos = fragment_start
            directory = "local_ds/" + files_format + "/original/" + str(
                folder + 1) + "/"
            lista_canciones = os.listdir(directory)
            lista_canciones = lista_canciones[song_start:len(lista_canciones)]
            for song_dirname in lista_canciones:
                print("Preparando canción...: " + directory + song_dirname)
                try:
                    signal, sampling_rate = open_audio(directory +
                                                       song_dirname)
                    list_resampled_songs = resample_song(
                        dimension, signal, sampling_rate)
                    for i in range(len(list_resampled_songs)):
                        signal = list_resampled_songs[i]
                        #guardar en mp3
                        local_path = "local_ds/mp3/" + str(
                            dimension[0]) + "-" + str(
                                dimension[1]) + "/" + str(
                                    folder +
                                    1) + "/" + str(cant_fragmentos) + ".mp3"
                        path_upload = path_dataset + "mp3/" + str(
                            dimension[0]) + "-" + str(
                                dimension[1]) + "/" + str(
                                    folder +
                                    1) + "/" + str(cant_fragmentos) + ".mp3"
                        folder_name = os.path.dirname(local_path)
                        if not os.path.exists(folder_name):
                            os.makedirs(folder_name)
                        write(local_path, dimension[1], signal)
                        upload_blob(bucket_name, local_path, path_upload)
                        #guardar en wav
                        local_path = "local_ds/wav/" + str(
                            dimension[0]) + "-" + str(
                                dimension[1]) + "/" + str(
                                    folder +
                                    1) + "/" + str(cant_fragmentos) + ".wav"
                        path_upload = path_dataset + "wav/" + str(
                            dimension[0]) + "-" + str(
                                dimension[1]) + "/" + str(
                                    folder +
                                    1) + "/" + str(cant_fragmentos) + ".wav"
                        folder_name = os.path.dirname(local_path)
                        if not os.path.exists(folder_name):
                            os.makedirs(folder_name)
                        write(local_path, dimension[1], signal)
                        upload_blob(bucket_name, local_path, path_upload)
                        cant_fragmentos += 1
                except:
                    pass
            #restablecer para la siguiente carpeta
            song_start = 0
        #restablecer para la siguiente dimension
        folder_start = 0
        fragment_start = 1
Exemplo n.º 14
0
def dalek_voice(inp):
    signal, sampling_rate = open_audio(inp)
    prc = mk_mid(signal, sampling_rate)
    voice = ring_mod(prc, sampling_rate, 30)
    sa.play_buffer(voice, 1, 2, sampling_rate)
Exemplo n.º 15
0
def load_mp3(fname: str) -> np.array:
    """ Loads a mp3 file as a numpy array. """
    data, sampling_rate = open_audio(fname)
    assert sampling_rate == FS or sampling_rate == ORIG
    return data
Exemplo n.º 16
0
 def test_aiff(self):
     fp = "./examples/chord.aif"
     s, sr = open_audio(fp)
     self.assertEqual(128000, s.shape[0])
     self.assertEqual(32000, sr)
Exemplo n.º 17
0
 def test_wav(self):
     fp = "./examples/word.wav"
     s, sr = open_audio(fp)
     self.assertEqual(24000, sr)
     self.assertEqual(16128, s.shape[0])
Exemplo n.º 18
0
def read(filename, normalized=False):
    signal, sampling_rate = open_audio(filename)

    return signal, sampling_rate
Exemplo n.º 19
0
import math

#terminal color
import platform

if(platform.system() == "Linux"): #add colors to Linux terminal
    prefix = "\033[36m"
    suffix = "\033[39m"
else:
    prefix = ""
    suffix = ""

fp = input(prefix + "Name of your input file: " + suffix)
#fp = "inputs/sound.mp3"
print(prefix + "Opening audio file ..." + suffix)
signal, sampling_rate = open_audio(fp)

output_file = input(prefix + "Name of output file: " + suffix)
#output_file = "outputs/output.mp4"

upscale = int(input(prefix + "Upscale factor (2x recommended for HD, 4x for 4k): " + suffix))

logo_img = input(prefix + "Logo image: " + suffix)
print(prefix + "Opening logo file ..." + suffix)

print(prefix + "Sampling rate of audio file: " + suffix + str(sampling_rate))

list = []


c = 0
Exemplo n.º 20
0
import noisereduce as nr


def plot_fft(f):
    n = len(f)
    fhat = np.fft.fft(f, n)
    print('done')
    dt = 1 / sr
    PSD = fhat * np.conj(fhat) / n
    freq = ((1 / dt) * n) * np.arange(n)
    L = np.arange(1, np.floor(n / 2), dtype='int')
    plt.plot(freq[L][::10], PSD[L][::10], alpha=0.1)


fp = r'C:\Users\Dell\PycharmProjects\audiobook\org.mp3'  # change to the correct path to your file accordingly
x, sr = open_audio(fp)
# sr, x = read_mp3(r'C:\Users\Dell\PycharmProjects\audiobook\org.mp3')
# sr, x = read_mp3(r'C:\Users\Dell\PycharmProjects\audiobook\org.mp3')
x_old = x.copy()
version = 4.0
# thr1 = 1e8
thr1 = 'na'
thr2 = int(278000 * 5)
thr3 = 200
noise_indexes = [[3e5, 3.4e5], [182e5, 2e5], [6.1e5, 6.4e5], [1.41e6, 1.43e6],
                 [1.78e6, 1.87e6], [2.02e6, 2.13e6], [2.7e6, 2.79e6],
                 [2.99e6, 3.08e6], [3.14e6, 3.23e6]]
x_noice = []
x_new = x[:, 0]
for noise_tuple in noise_indexes:
    x_noice.extend(x_new[int(noise_tuple[0]):int(noise_tuple[1])])
import os

LENGTH_TO_CONSTRUCT = 500000000

SPEECH_PATH = "en/clips"  #Common Voice dataset, https://commonvoice.mozilla.org/en/datasets
NOISE_PATH = "UrbanSound8k/audio/fold"  #UrbanSound8k dataset, https://urbansounddataset.weebly.com/urbansound8k.html

# print(len(os.listdir(SPEECH_PATH)))
# print(len(os.listdir(NOISE_PATH)))

length = 0
sounds_as_tensors = []

for file_name in os.listdir(SPEECH_PATH):
    if ".mp3" in file_name:
        data, rate = open_audio(SPEECH_PATH + "/" + file_name)
        data = librosa.resample(data, 48000, 22050)
        length += len(data)
        sound_as_tensor = torch.tensor(data)
        sounds_as_tensors.append(sound_as_tensor)
        if length >= LENGTH_TO_CONSTRUCT:
            break

speech_tensor = torch.cat(sounds_as_tensors)[:LENGTH_TO_CONSTRUCT]
torch.save(speech_tensor, "SPEECH.pt")
print(speech_tensor.size())

length = 0
sounds_as_tensors = []

for fold in range(1, 11):
Exemplo n.º 22
0
def speech_file_to_array_fn(batch):
    speech_array, sampling_rate = an.open_audio(batch["path"])
    speech_array = torch.tensor(speech_array)
    batch["speech"] = resampler(sampling_rate, speech_array).squeeze()
    return batch
Exemplo n.º 23
0
#!/usr/bin/env python
# coding: utf-8

# In[2]:


from audio2numpy import open_audio
import numpy as np
import matplotlib.pyplot as plt


# In[3]:


signal, signal_rate = open_audio("lisergic.mp3")


# In[137]:


orig = signal[:, 0]
L = len(orig)
orig_rms = np.sqrt(np.correlate(orig, orig))
noisies = []
for i in range(3):
  noisy = np.zeros(L)
  rnd = int(np.random.uniform()*L*0.01)
  print(rnd)
  noisy[0 : L-rnd] = orig[rnd : ]
  noisy[L - rnd : ] = orig[ : rnd]
  noisy += 1/orig_rms*np.random.uniform(size=len(orig))
Exemplo n.º 24
0
                title, artist = ("-".join(tokens[:-1]), tokens[-1].strip()) if len(tokens) > 1 else (title, None)
                lines.append((t, title.strip().replace("/", "-"), artist))
        return lines

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="ostlyser")
    parser.add_argument("-v", "--version", action="version", version="1.0.0")
    parser.add_argument("-a", "--audio", dest="audio", required=True,
                        help="audio file to break up")
    parser.add_argument("-i", "--input", dest="file", required=True,
                        help="read timing information from a file")
    parser.add_argument("-d", "--delimiter", dest="delim", default=":",
                        help="delimiter to split input file on")
    args = parser.parse_args()

    data, sr = open_audio(args.audio)
    file = eyed3.load(args.audio)
    lines = parse_file(args, sr)

    for i, (start, name, artist) in enumerate(lines):
        # get slice of the original file that this song represents
        song = data[start: lines[i + 1][0] if i != len(lines) - 1 else len(data)]
        path = f"{i + 1}_{name}"
        save_mp3(path, song, sr)

        f = eyed3.load(path + ".mp3")
        # copy the tags of the original file
        f.tag = file.tag
        f.tag.title = name
        f.tag.artist = artist
        f.tag.track_num = i + 1
Exemplo n.º 25
0
plt.plot(sample23, x23, ':b*')
plt.title('Rational 2/3')

#plt.subplots_adjust(wspace=0.35,hspace=0.9)
plt.tight_layout()
plt.show()
# Sine Wave Manipulation

#Extracting audio_file
import ffmpeg

from audio2numpy import open_audio

fp = 'sin.wav'
#fp = 'sin.wav'
audio, sampling_rate = open_audio(fp)
#mu, sampling_rate = ffmpeg.input(fp)
sample_space = len(audio)

#Plotting + Fourier Transform

plt.figure()
plt.style.use('seaborn')

fourier_audio = np.absolute(np.fft.fft(audio))
omega_orginal = np.linspace(-2 * np.pi, 2 * np.pi, len(audio))

plt.subplot(2, 2, (1, 2))
plt.plot(omega_orginal, fourier_audio)
plt.title('Original')
#Down->Inter