示例#1
0
def _convert_audio_and_split_sentences(extracted_dir, data_set, dest_dir):
    source_dir = os.path.join(extracted_dir, data_set)
    target_dir = os.path.join(extracted_dir, dest_dir)

    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    # Loop over transcription files and split each one
    #
    # The format for each file 1-2.trans.txt is:
    #  1-2-0 transcription of 1-2-0.flac
    #  1-2-1 transcription of 1-2-1.flac
    #  ...
    #
    # Each file is then split into several files:
    #  1-2-0.txt (contains transcription of 1-2-0.flac)
    #  1-2-1.txt (contains transcription of 1-2-1.flac)
    #  ...
    #
    # We also convert the corresponding FLACs to WAV in the same pass
    files = []
    for root, dirnames, filenames in os.walk(source_dir):
        for filename in fnmatch.filter(filenames, "*.trans.txt"):
            trans_filename = os.path.join(root, filename)
            with codecs.open(trans_filename, "r", "utf-8") as fin:
                for line in fin:
                    # Parse each segment line
                    first_space = line.find(" ")
                    seqid, transcript = line[:first_space], line[first_space + 1 :]

                    # We need to do the encode-decode dance here because encode
                    # returns a bytes() object on Python 3, and text_to_char_array
                    # expects a string.
                    transcript = (
                        unicodedata.normalize("NFKD", transcript)
                        .encode("ascii", "ignore")
                        .decode("ascii", "ignore")
                    )

                    transcript = transcript.lower().strip()

                    # Convert corresponding FLAC to a WAV
                    flac_file = os.path.join(root, seqid + ".flac")
                    wav_file = os.path.join(target_dir, seqid + ".wav")
                    if not os.path.exists(wav_file):
                        tfm = Transformer()
                        tfm.set_output_format(rate=SAMPLE_RATE)
                        tfm.build(flac_file, wav_file)
                    wav_filesize = os.path.getsize(wav_file)

                    files.append((os.path.abspath(wav_file), wav_filesize, transcript))

    return pandas.DataFrame(
        data=files, columns=["wav_filename", "wav_filesize", "transcript"]
    )
示例#2
0
 def preprocess_wav(cls, fpath: Union[str, Path]) -> np.ndarray:
     """Load, resample, normalize and trim a waveform."""
     transformer = Transformer()
     transformer.norm()
     transformer.silence(silence_threshold=1, min_silence_duration=0.1)
     transformer.set_output_format(rate=cls.sample_rate,
                                   bits=16,
                                   channels=1)
     wav = transformer.build_array(input_filepath=str(fpath))
     wav = wav / (2**15)
     return wav.astype(np.float32)
示例#3
0
文件: dataset.py 项目: s3prl/s3prl
def loadFile(data, max_timestep):
    transformer = Transformer()
    transformer.norm()
    # transformer.silence(silence_threshold=1, min_silence_duration=0.1)
    transformer.set_output_format(rate=16000, bits=16, channels=1)
    wav = transformer.build_array(input_filepath=str(data))
    wav = torch.tensor(wav / (2**15)).float()
    length = len(wav)
    if length > max_timestep:
        start = 0
        end = max_timestep
        length = max_timestep
        wav = wav[start:end]
    length = torch.tensor(length).long()

    return wav, length
def sph_to_wav(source_dir, target_dir):
    """Convert .sph files to .wav files."""

    assert path.exists(source_dir) is True

    if not path.exists(target_dir):
        makedirs(target_dir)

    for sph_file in glob(path.join(source_dir, "*.sph")):
        transformer = Transformer()
        if hp.tedlium_rate != 16000:
            transformer.set_output_format(encoding='signed-integer',
                                          channels=1,
                                          rate=hp.tedlium_rate)
        wav_filename = path.splitext(path.basename(sph_file))[0] + ".wav"
        wav_file = path.join(target_dir, wav_filename)
        transformer.build(sph_file, wav_file)
示例#5
0
def compressed_wav_to_full(source_dir, target_dir):
    """Convert compressed wav files to full wav files."""

    assert path.exists(source_dir) is True

    if not path.exists(target_dir):
        makedirs(target_dir)

    for compressed_file in glob(path.join(source_dir, "*.wav")):
        transformer = Transformer()
        if hp.callhome_rate == 8000:
            transformer.set_output_format(encoding='signed-integer', channels=1)  # Also set single channel.
        else:  # Do resampling if specified.
            transformer.set_output_format(encoding='signed-integer', channels=1, rate=hp.callhome_rate)
        wav_filename = path.basename(compressed_file)
        wav_file = path.join(target_dir, wav_filename)
        transformer.build(compressed_file, wav_file)
示例#6
0
文件: dataset.py 项目: s3prl/s3prl
def loadFile_thread_exec(data):

    wavs = []
    lengths = []
    for i in range(len(data)):

        fullPath = data[i]
        transformer = Transformer()
        transformer.norm()
        transformer.silence(silence_threshold=1, min_silence_duration=0.1)
        transformer.set_output_format(rate=16000, bits=16, channels=1)
        wav = transformer.build_array(input_filepath=str(fullPath))
        wav = torch.tensor(wav / (2**15)).float()
        length = len(wav)
        if length > max_timestep:
            start = random.randint(0, int(length - max_timestep))
            end = start + max_timestep
            length = max_timestep
            wav = wav[start:end]
        wavs.append(wav)
        lengths.append(torch.tensor(length).long())
    return wavs, lengths
示例#7
0
# !/usr/bin/python3

import os
import tqdm
from sox import Transformer

SAMPLE_RATE = 16000
remove_flac = False
path = '/home/dsmolen/agh/LibriSpeech/'

i = 0
tq = tqdm.tqdm(os.walk(path, topdown=False))
for root, dirs, files in tq:
    for name in files:
        if name.endswith('.flac'):
            tq.set_postfix(converted=i)
            i += 1
            name = name[:-5]
            flac_file = os.path.join(root, name + ".flac")
            wav_file = os.path.join(root, name + ".wav")
            if not os.path.exists(wav_file):
                tfm = Transformer()
                tfm.set_output_format(rate=SAMPLE_RATE)
                tfm.build(flac_file, wav_file)
            if remove_flac:
                os.remove(flac_file)
import tensorflow as tf
import pandas as pd
import os
import unicodedata
import tqdm
import logging
import librosa
import numpy as np
import soundfile
from sklearn.model_selection import train_test_split

logging.basicConfig(level=logging.NOTSET)
logging.getLogger('sox').setLevel(logging.ERROR)
FLAGS = tf.compat.v1.app.flags.FLAGS
tfm = Transformer()
tfm.set_output_format(rate=16000)


def main(_):
    source_dir = FLAGS.source_dir
    data = []
    df_details = pd.read_csv(os.path.join(source_dir, "validated.tsv"),
                             sep="\t",
                             header=0)
    with tqdm.tqdm(total=len(df_details.index)) as bar:
        for i in df_details.index:
            file_name = df_details["path"][i]
            source_file = os.path.join(source_dir, "clips/" + file_name)
            wav_file = os.path.join(
                os.path.dirname(__file__),
                "../data/common-voice-mozilla/Common-Voice-Mozilla/wav-files/"