示例#1
0
文件: timit.py 项目: cmanny/vrae
    def get_sentence_data(self, speaker, sid, spec=False):
        # Because the TIMIT dataset uses the NIST SPHERE header we
        # first convert it into a standard WAV if we have not already
        spkr = self.speakers[speaker]
        part = "TRAIN" if spkr["use"] == "TRN" else "TEST"
        gend = spkr["sex"]
        dr = "DR" + spkr["dr"]
        folder = os.path.join(self.full_path, part, dr, gend + speaker)
        phn_file = os.path.join(folder, sid + ".PHN")
        sph_wav_file = os.path.join(folder, sid + ".WAV")
        wav_file = os.path.join(folder, sid + ".REALWAV")
        if not os.path.exists(wav_file):
            sph = SPHFile(sph_wav_file)
            sph.write_wav(wav_file)
        wrd_file = os.path.join(folder, sid + ".WRD")
        spec_ext = "_{}_{}_{}".format(self.fft_size, self.window_size, self.thresh)
        spec_file = os.path.join(folder, sid + ".SPEC" + spec_ext)
        data = None
        if spec:
            if not os.path.exists(spec_file + ".npy"):
                wav = self._wav(wav_file)[1]
                data = wav_spectrogram = spectrogram(
                    wav.astype('float64'),
                    fft_size=self.fft_size*2,
                    step_size=self.window_size,
                    log=True,
                    thresh=self.thresh
                )
            #     np.save(spec_file, wav_spectrogram)
            # else:
            #     data = np.load(spec_file + ".npy")
        else:
            data = self._wav(wav_file)

        return data, self._wrd(wrd_file), self._phn(phn_file)
示例#2
0
    def wav_converter(self):
        dialects_path = self.dataset_path
        dialects = os.listdir(dialects_path)
        for dialect in dialects:
            dialect_path = os.path.join(dialects_path, dialect)
            speakers = os.listdir(path=dialect_path)
            for speaker in speakers:
                speaker_path = os.path.join(dialect_path, speaker)
                speaker_recordings = os.listdir(path=speaker_path)

                wav_files = glob2.glob(speaker_path + '/*.WAV')

                for wav_file in wav_files:
                    sph = SPHFile(wav_file)
                    txt_file = ""
                    txt_file = wav_file[:-3] + "TXT"

                    f = open(txt_file, 'r')
                    for line in f:
                        words = line.split(" ")
                        start_time = (int(words[0]) / 16000)
                        end_time = (int(words[1]) / 16000)
                    print("writing file ", wav_file)
                    sph.write_wav(wav_file.replace(".WAV", ".wav"), start_time,
                                  end_time)
def process_wav(path, wav_file):
    assert (wav_file[-3:] == 'wav'
            ), 'Wrong file name, should be a wav: {}'.format(wav_file)
    sphere_file_path = os.path.join(path, wav_file)

    if not (Path(TMP_PATH).exists()):
        os.mkdir(TMP_PATH)

    wav_copy_file_path = os.path.join(TMP_PATH, wav_file) + '_readable'

    sph = SPHFile(sphere_file_path)
    sph.write_wav(wav_copy_file_path)
    FS, wav = wavfile.read(wav_copy_file_path)
    os.remove(wav_copy_file_path)

    mfccs = mfcc(wav,
                 FS,
                 winstep=window_step_in_second,
                 winlen=window_size_in_second)
    fbs, energy = fbank(wav,
                        FS,
                        nfilt=n_filter_bank,
                        winstep=window_step_in_second,
                        winlen=window_size_in_second)
    fbs_with_energy = np.concatenate([fbs, energy[:, None]], axis=1)
    return mfccs, fbs_with_energy, wav, FS
示例#4
0
 def move_file_2_wav_n(self, remove_flag=False):
     if not remove_flag:
         train_videos = self.get_dir_frombase(self.org_train)
         print("fine to load train_data's name, length is {}".format(len(train_videos)))
         test_videos = self.get_dir_frombase(self.org_test)
         print("fine to load test_data's name, length is {}".format(len(test_videos)))
         all_videos = train_videos + test_videos
         if os.path.exists(self.targ_dir):
             if len(os.listdir(self.targ_dir)) or "1" + self.sub_add + ".wav" in os.listdir(self.targ_dir):
                 print("The file might be exsist this function {} might not work"
                       .format(self.move_file_2_wav_n.__name__))
                 return
         else:
             os.mkdir(self.targ_dir)
         for i, fp in enumerate(all_videos):
             shutil.copy(fp, self.targ_dir + str(i) + ".WAV")
         for i in range(len(os.listdir(self.targ_dir))):
             fp = self.targ_dir + "/" + str(i) + ".wav"
             sph = SPHFile(fp)
             sph.write_wav(filename=fp.replace(".wav", self.sub_add + ".wav"))
             print("fin {}".format(i))
     else:
         a = input("the dir:{} will be remove".format(self.targ_dir))
         if a:
             try:
                 os.chmod(self.targ_dir, stat.S_IWOTH)
                 os.remove(self.targ_dir)
             except PermissionError:
                 print("Permission is dine,after use chomod try to run with sudo")
     return
示例#5
0
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        audio_filename = self.audio_filenames[idx]
        try:
            sr, y = io.wavfile.read(self.audio_root_path + audio_filename)
        except:
            if audio_filename.split('.')[-1] == 'wav':
                audio_filename_sph = '.'.join(
                    audio_filename.split('.')[:-1] + ['WAV'])
                sph = SPHFile(self.audio_root_path + audio_filename_sph)
                sph.write_wav(self.audio_root_path + audio_filename)
            sr, y = io.wavfile.read(self.audio_root_path + audio_filename)

        y = preemphasis(y, self.coeff)

        n_fft = int(self.window_ms * sr / 1000)
        hop_length = int(self.skip_ms * sr / 1000)
        mfcc = librosa.feature.mfcc(y,
                                    sr=sr,
                                    n_mfcc=self.n_mfcc,
                                    dct_type=self.dct_type,
                                    n_fft=n_fft,
                                    hop_length=hop_length)
        mfcc -= np.mean(mfcc)
        mfcc /= max(np.sqrt(np.var(mfcc)), EPS)
        nframes = min(mfcc.shape[1], self.max_nframes)
        mfcc = self.convert_to_fixed_length(mfcc)
        mfcc = mfcc.T

        # labels = [self.phone2idx[phone_seq[i]] for i in range(min(self.max_nphones, len(phone_seq)))]
        # TODO
        # if self.compute_cmvn:
        return torch.FloatTensor(mfcc), nframes
示例#6
0
def read_audio(current_file, sample_rate=None, mono=True):
    """Read audio file

    Parameters
    ----------
    current_file : dict
        Dictionary given by pyannote.database.
    sample_rate: int, optional
        Target sampling rate. Defaults to using native sampling rate.
    mono : int, optional
        Convert multi-channel to mono. Defaults to True.

    Returns
    -------
    y : (n_samples, n_channels) np.array
        Audio samples.
    sample_rate : int
        Sampling rate.

    Notes
    -----
    In case `current_file` contains a `channel` key, data of this (1-indexed)
    channel will be returned.

    """

    # sphere files
    if current_file['audio'][-4:] == '.sph':

        # dump sphere file to a temporary wav file
        # and load it from here...
        from sphfile import SPHFile
        sph = SPHFile(current_file['audio'])
        with tempfile.NamedTemporaryFile() as f:
            sph.write_wav(f.name)
            y, sample_rate = librosa.load(f.name, sr=sample_rate, mono=False)

    # all other files
    else:
        y, sample_rate = librosa.load(current_file['audio'],
                                      sr=sample_rate,
                                      mono=False)

    # reshape mono files to (1, n) [was (n, )]
    if y.ndim == 1:
        y = y.reshape(1, -1)

    # extract specific channel if requested
    channel = current_file.get('channel', None)
    if channel is not None:
        y = y[channel - 1, :]

    # convert to mono
    if mono:
        y = librosa.to_mono(y)

    return y.T, sample_rate
示例#7
0
def read_audio(current_file, sample_rate=None, mono=True):
    """Read audio file

    Parameters
    ----------
    current_file : dict
        Dictionary given by pyannote.database.
    sample_rate: int, optional
        Target sampling rate. Defaults to using native sampling rate.
    mono : int, optional
        Convert multi-channel to mono. Defaults to True.

    Returns
    -------
    y : (n_samples, n_channels) np.array
        Audio samples.
    sample_rate : int
        Sampling rate.

    Notes
    -----
    In case `current_file` contains a `channel` key, data of this (1-indexed)
    channel will be returned.

    """

    # sphere files
    if current_file['audio'][-4:] == '.sph':

        # dump sphere file to a temporary wav file
        # and load it from here...
        from sphfile import SPHFile
        sph = SPHFile(current_file['audio'])
        with tempfile.NamedTemporaryFile() as f:
            sph.write_wav(f.name)
            y, sample_rate = librosa.load(f.name, sr=sample_rate, mono=False)

    # all other files
    else:
        y, sample_rate = librosa.load(current_file['audio'],
                                      sr=sample_rate,
                                      mono=False)

    # reshape mono files to (1, n) [was (n, )]
    if y.ndim == 1:
        y = y.reshape(1, -1)

    # extract specific channel if requested
    channel = current_file.get('channel', None)
    if channel is not None:
        y = y[channel - 1, :]

    # convert to mono
    if mono:
        y = librosa.to_mono(y)

    return y.T, sample_rate
示例#8
0
  def load_audio(self, idx):
    idx = self.keep_indices[idx]
    # Extract segment-level acoustic features
    self.n_mfcc = self.configs.get('n_mfcc', 40)
    self.coeff = self.configs.get('coeff', 0.97)
    self.dct_type = self.configs.get('dct_type', 3)
    self.skip_ms = self.configs.get('skip_size', 10)
    self.window_ms = self.configs.get('window_len', 25)

    phone_boundary = np.zeros((2, self.max_nframes+1))
    nphones = 0
    for i_s, segment in enumerate(self.segmentations[idx]):
      start_ms, end_ms = segment
      start_frame, end_frame = int(float(start_ms) / 10), int(float(end_ms) / 10)
      if end_frame > self.max_nframes:
        break
      phone_boundary[0, start_frame] = 1.
      phone_boundary[1, end_frame] = 1.

    if self.audio_root_path.split('.')[-1] == 'json': # Assume kaldi format if audio_root_path is a json file
        mfcc = kaldiio.load_mat(self.audio_keys[idx])
        nframes = min(mfcc.shape[1], self.max_nframes)
        mfcc = self.convert_to_fixed_length(mfcc.T)
    else:
        audio_filename = '{}.wav'.format(self.audio_keys[idx])
        try:
            sr, y_wav = wavfile.read('{}/{}'.format(self.audio_root_path, audio_filename))
        except:
            if audio_filename.split('.')[-1] == 'wav':
                audio_filename_sph = '.'.join(audio_filename.split('.')[:-1]+['WAV'])
                sph = SPHFile(self.audio_root_path + audio_filename_sph)
                sph.write_wav(self.audio_root_path + audio_filename_sph)
            sr, y_wav = wavfile.read(self.audio_root_path + audio_filename)
    
        y_wav = preemphasis(y_wav, self.coeff) 
        n_fft = int(self.window_ms * sr / 1000)
        hop_length = int(self.skip_ms * sr / 1000)
        # mfcc = librosa.feature.mfcc(y_wav, sr=sr, n_mfcc=self.n_mfcc, dct_type=self.dct_type, n_fft=n_fft, hop_length=hop_length)
        mfcc = librosa.feature.melspectrogram(y_wav, sr=sr, n_mels=self.n_mfcc, n_fft=n_fft, hop_length=hop_length)
        mfcc -= np.mean(mfcc)
        mfcc /= max(np.sqrt(np.var(mfcc)), EPS)
        nframes = min(mfcc.shape[1], self.max_nframes)
        mfcc = self.convert_to_fixed_length(mfcc)

    mfcc = torch.FloatTensor(mfcc)
    if phone_boundary.sum() == 0:
        print('Warning: Caption {} with id {} is empty'.format(idx, self.audio_keys[idx]))
        nphones = 1
    phone_boundary = torch.FloatTensor(phone_boundary)
    if self.return_boundary:
        return mfcc, phone_boundary
    else:
        return mfcc, nframes
示例#9
0
def GetArrayFromWAV(filename):
    with open(filename, 'rb') as wavFile:
        header = wavFile.read(4)
    if header == b'RIFF':  # RIFF header, for WAVE files
        framerate, wavArray = WavFileTool.read(filename)
    else:  # NIST header, which uses SPHERE
        file = SPHFile(filename)
        framerate = file.format['sample_rate']
        wavArray = numpy.zeros(len(file.time_range()), dtype=numpy.int16)
        for i, value in enumerate(file.time_range()):
            wavArray[i] = value
    return framerate, wavArray
示例#10
0
def nist2wav(wav_file):

    sph = SPHFile(wav_file)
    txt_file = ""
    txt_file = wav_file[:-3] + "TXT"

    f = open(txt_file, 'r')
    for line in f:
        words = line.split(" ")
        start_time = (int(words[0]) / 16000)
        end_time = (int(words[1]) / 16000)
    print("writing file ", wav_file)
    sph.write_wav(wav_file.replace(".WAV", ".wav"), start_time, end_time)
示例#11
0
def convert_and_save(meta, speech_samples, path, mode):
    
    if not os.path.exists(os.path.join(path, mode, 'speech')):
        os.makedirs(os.path.join(path, mode, 'speech'), exist_ok=True)

    for index, row in meta.iterrows():
    
        name = 'sw0' + row['sent_id'].split('_')[0][-4:]
        fpath = [fname for fname in speech_samples if name in fname][0]
    
        sph = SPHFile(fpath)
        sph.write_wav(os.path.join(path, mode, 'speech', row['sent_id'] + '.wav'), 
                      row['start_time'], 
                      row['end_time'])
def Pattern_File_Generate_from_SPH(path,
                                   text_List,
                                   token_Index_Dict,
                                   dataset,
                                   spectral_Subtract=False,
                                   display_Prefix='',
                                   range_Ignore=False):
    sph_Loader = SPHFile(path)

    os.makedirs(os.path.join(tempfile.gettempdir(),
                             'mstts').replace('\\', '/'),
                exist_ok=True)
    for index, (start_Time, end_Time, text) in enumerate(text_List):
        temp_Wav_Path = os.path.join(
            tempfile.gettempdir(), 'mstts', '{}.{}.wav'.format(
                os.path.splitext(os.path.basename(path))[0],
                index)).replace('\\', '/')
        sph_Loader.write_wav(temp_Wav_Path, start_Time, end_Time)

        mel = Mel_Generate(temp_Wav_Path)
        if mel is None:
            print('[{}]'.format(display_Prefix),
                  '{}    {}-{}'.format(path, start_Time, end_Time), '->',
                  'Ignored because of length.')
            return

        token = np.array([token_Index_Dict[letter]
                          for letter in text]).astype(np.int32)

        new_Pattern_Dict = {
            'Token': token,
            'Mel': mel,
            'Text': text,
            'Dataset': dataset,
        }

        pickle_File_Name = '{}.{}.{}.PICKLE'.format(
            dataset,
            os.path.splitext(os.path.basename(path))[0], index).upper()

        with open(
                os.path.join(hp.Train.Pattern_Path,
                             pickle_File_Name).replace("\\", "/"), 'wb') as f:
            pickle.dump(new_Pattern_Dict, f, protocol=2)

        os.remove(temp_Wav_Path)

        print('[{}]'.format(display_Prefix),
              '{}    {}-{}'.format(path, start_Time, end_Time), '->',
              '{}'.format(pickle_File_Name))
示例#13
0
def timit_sph2wav(path):
    # to transform the dataset from sph file to .wav file
    if os.path.exists(path + r'TIMIT/done.txt'):
        print('The dataset has been already preprocessed')
    else:
        sph_files = glob.glob(path + r'TIMIT/*/*/*/*.WAV')
        for file_path in sph_files:
            sph = SPHFile(file_path)
            sph.write_wav(filename=file_path.replace('.WAV', 'copy.WAV'))
            os.remove(file_path)
            os.rename(file_path.replace('.WAV', 'copy.WAV'), file_path)
        with open(path + r'TIMIT/done.txt', 'w') as f:
            f.write('The dataset has been preprocessed')
        print('Done')
    return None
示例#14
0
    def from_sphere(sph_path: Pathlike,
                    relative_path_depth: Optional[int] = None) -> 'Recording':
        """
        Read a SPHERE file's header and create the corresponding ``Recording``.

        :param sph_path: Path to the sphere (.sph) file.
        :param relative_path_depth: optional int specifying how many last parts of the file path
            should be retained in the ``AudioSource``. By default writes the path as is.
        :return: a new ``Recording`` instance pointing to the sphere file.
        """
        from sphfile import SPHFile
        sph_path = Path(sph_path)
        sphf = SPHFile(sph_path)
        return Recording(
            id=sph_path.stem,
            sampling_rate=sphf.format['sample_rate'],
            num_samples=sphf.format['sample_count'],
            duration=sphf.format['sample_count'] / sphf.format['sample_rate'],
            sources=[
                AudioSource(
                    type='file',
                    channels=list(range(sphf.format['channel_count'])),
                    source=('/'.join(sph_path.parts[-relative_path_depth:])
                            if relative_path_depth is not None
                            and relative_path_depth > 0 else str(sph_path)))
            ])
示例#15
0
    def cache_sph2wav(self):
        """
        Converts an audio file in SPH format to WAV format, for sending to Google Cloud Speech API)
        """
        wav_cache_dir = os.path.join(os.getcwd(), 'datacache', 'speech_objects','wav/')
        if not os.path.exists(wav_cache_dir):
            os.makedirs(wav_cache_dir, exist_ok=True)
        
        cache_file = os.path.join(wav_cache_dir, '{}.wav'.format(self._speech_id.strip()))
        if not os.path.exists(cache_file):
            sph =SPHFile(self._source_file)

            # write out a wav file with content from {start_time} to {stop_time} seconds
            sph.write_wav(cache_file, self._start_time, self._stop_time)
        
        return cache_file
示例#16
0
def NIST_to_wav(input_directory, output_directory):
    """
    convert files in directory to wav files

    Parameters
    ----------
    input_directory: str
        directory matching all files to be converted
    output_directory: str
        directory to output all converted files
    """
    files = glob.glob(input_directory)

    for f in files:
        sph = SPHFile(f)
        name = f[f.rfind('/') + 1: f.rfind('.')]
        sph.write_wav(output_directory + name + ".wav")
示例#17
0
def transform_spkinfo(targetfp, spk_list):
    for i in os.listdir(targetfp):
        local_fp = targetfp + "/" + i
        for j in os.listdir(local_fp):
            counter = 0
            spk_index = spk_list.index(j)
            print(spk_index)
            audios = os.listdir(targetfp + "/" + i + "/" + j)
            for k in audios:
                if ".WAV" in k:
                    subfp = targetfp + "/" + i + "/" + j + "/" + k

                    sph = SPHFile(subfp)
                    sph.write_wav(filename="./all_wav/" + str(spk_index) +
                                  "_" + str(counter) + ".wav")
                    counter += 1
            if counter != 10:
                raise ValueError("Not enough speech")
示例#18
0
def load_sphfile(path, sampling_rate, frame_time, hop_time):
    """Return a Wav instance based on the data stored in a Sphere file."""
    # Build a temporary copy of the file, converted to actual waveform format.
    tmp_path = './' + os.path.basename(path[:-4]) + '_tmp.wav'
    SPHFile(path).write_wav(tmp_path)
    # Load data from the waveform file, and then remove the latter.
    wav = Wav(tmp_path, sampling_rate, frame_time, hop_time)
    os.remove(tmp_path)
    return wav
def convert_wav(path):
    """convert nist sphere file to wav file
    Args:
        path: path to dialect folder in timit directory
    """
    speaker_list = os.listdir(path)
    speaker_list = list(map(lambda x: os.path.join(path, x), speaker_list))
    for speaker in speaker_list:
        # list of files in a speaker folder
        sentence_list = list(
            map(lambda x: os.path.join(speaker, x), os.listdir(speaker)))
        # a folder to store the converted files
        wav_folder = os.path.join(speaker, 'wav_files')
        if os.path.exists(wav_folder):
            continue
        os.makedirs(wav_folder)
        for f in sentence_list:
            if re.match(r'.*\.WAV', f):
                name = f.split(os.path.sep)[-1]
                sph = SPHFile(f)
                sph.write_wav(os.path.join(wav_folder, name))
示例#20
0
def main():
    source_directory = sys.argv[1]
    destination_directory = sys.argv[2]

    files = []
    for (path, dirs, filename) in os.walk(source_directory):
        files.extend(filename)

    for f in files:
        sph = SPHFile(os.path.join(source_directory, f))
        dest = f.split('.')[0] + '.wav'
        create_wav(sph, os.path.join(destination_directory, dest))
示例#21
0
def copy_wavs(dest_folder):
    """Copy wav files, converting them from sph file format on the go."""
    utterances = []
    for speaker in SPEAKERS:
        folder = os.path.join(CONSTANTS['mocha_raw_folder'], speaker)
        spk_utterances = sorted(
            [name for name in os.listdir(folder) if name.endswith('.wav')])
        for filename in spk_utterances:
            SPHFile(os.path.join(folder, filename)).write_wav(
                os.path.join(dest_folder, filename))
        utterances.extend(spk_utterances)
    # Return the list of copied utterances.
    return [name[:-4] for name in utterances]
示例#22
0
def convert(args):
    PAUSE_MATCH = re.compile(r'\([0-9]\)')
    NOTATION = re.compile(r'\{[A-Z]*\}')
    print('Converting .sph to wav')
    # splits = ['test']
    labels = []

    root = os.path.join(args.save_path, 'TEDLIUM_release-3', 'data')
    wav_dir = os.path.join(root, 'wav')
    os.makedirs(wav_dir, exist_ok=True)
    sph_files = sorted(list(glob.glob(os.path.join(root, 'sph/*.sph'))))
    with tqdm(sph_files, dynamic_ncols=True, desc="data") as pbar:
        for sph_file in pbar:
            sph = SPHFile(sph_file)
            stm_file = sph_file.replace('sph', 'stm')
            with open(stm_file, 'r') as f:
                for idx, line in enumerate(f.readlines()):
                    tokens = line.split(' ')
                    start, end = float(tokens[3]), float(tokens[4])
                    name = tokens[0]
                    text = line.split('male> ')[-1]
                    text = text.split('unknown> ')[-1]
                    text = text.split('NA> ')[-1]
                    text = text.replace('<sil>', '')
                    text = text.replace('<unk>', '')
                    text = text.split('(' + name)[0]
                    text = PAUSE_MATCH.sub('', text)
                    text = NOTATION.sub('', text)
                    text = text.strip()
                    text = ' '.join(text.split())

                    wav_filename = '%s_%d.wav' % (name, idx)
                    assert ' ' not in wav_filename
                    sph.write_wav(os.path.join(wav_dir, wav_filename), start,
                                  end)
                    labels.append('%s %s' % (wav_filename, text))
        with open(os.path.join(wav_dir, 'labels.txt'), 'w') as f:
            f.write('\n'.join(labels))
示例#23
0
def timit_trans():
    # 下载的TIMIT可能无法直接使用,需要用此脚本转换一下
    path = '/home/lisen/uestc/Research/Dataset/TIMIT/TRAIN/*/*/*.WAV'
    sph_files = glob.glob(path)
    print(len(sph_files), "train utterences")
    for i in sph_files:
        sph = SPHFile(i)
        sph.write_wav(filename=i.replace(".WAV", "_.wav"))  # _不能删除
        os.remove(i)
    path = '/home/lisen/uestc/Research/Dataset/TIMIT/TEST/*/*/*.WAV'
    sph_files_test = glob.glob(path)
    print(len(sph_files_test), "test utterences")
    for i in sph_files_test:
        sph = SPHFile(i)
        sph.write_wav(filename=i.replace(".WAV", "_.wav"))  # _不能删除
        os.remove(i)
    print("Completed")
示例#24
0
def get_length_of_sph_file(sph_path):
    """
    Extracts the duration of a sph file in seconds

```python
import pathlib

from openspeechlib.utils.audio import get_length_of_sph_file

total_length_of_ciempiess = 0
for path in pathlib.Path('/mnt/16810535-988c-440c-a794-1c9b98899844/master_thesis/corpus/02_CIEMPIESS_SPH/train').rglob('*.sph'):
    total_length_of_ciempiess += get_length_of_sph_file(path.absolute())

print(total_length_of_ciempiess)
```
    :param sph_path:
    :return:
    """
    try:
        sph_file = SPHFile(sph_path)
        sph_file.open()
        return sph_file.format['sample_count'] / sph_file.format['sample_rate']
    except FileNotFoundError:
        LOGGER.error("File not found")
示例#25
0
def make_recording_callhome(
        sph_path: Pathlike,
        recording_id: Optional[str] = None,
        relative_path_depth: Optional[int] = None,
        sph2pipe_path: Optional[Pathlike] = None
) -> Recording:
    """
    This function creates manifests for CallHome recordings that are compressed
    with shorten, a rare and mostly unsupported codec. You will need to install
    sph2pipe (e.g. using Kaldi) in order to read these files.
    """
    try:
        from sphfile import SPHFile
    except ImportError:
        raise ImportError("Please install sphfile (pip install sphfile) instead and "
                          "try preparing CallHome English again.")
    if sph2pipe_path is None:
        sph2pipe_path = 'sph2pipe'
    else:
        sph2pipe_path = str(sph2pipe_path).strip()
    sph_path = Path(sph_path)
    sphf = SPHFile(sph_path)
    return Recording(
        id=recording_id if recording_id is not None else sph_path.stem,
        sampling_rate=sphf.format['sample_rate'],
        num_samples=sphf.format['sample_count'],
        duration=sphf.format['sample_count'] / sphf.format['sample_rate'],
        sources=[
            AudioSource(
                type='command',
                channels=list(range(sphf.format['channel_count'])),
                source=f'{sph2pipe_path} -f wav -p ' + (
                    '/'.join(sph_path.parts[-relative_path_depth:])
                    if relative_path_depth is not None and relative_path_depth > 0
                    else str(sph_path)
                )
            )
        ]
    )
示例#26
0
for root, _, fnames in sorted(os.walk(args.timit_dir)):
    spk = root.split("/")[-1].lower()
    if spk in dt_spks:
        f = dt_f
    elif spk in tt_spks:
        f = tt_f
    else:
        f = tr_f

    for fname in fnames:
        if fname.endswith(".wav") or fname.endswith(".WAV"):
            sph_path = "%s/%s" % (root, fname)
            path = "%s/%s_%s" % (wav_dir, spk, fname)
            uttid = "%s_%s" % (spk, os.path.splitext(fname)[0])
            f.write("%s %s\n" % (uttid, path))
            sph = SPHFile(sph_path)
            write_wav(path, sph)

tr_f.close()
dt_f.close()
tt_f.close()

print "converted to wav and dumped scp files"

# compute feature
feat_dir = os.path.abspath("%s/%s" % (args.out_dir, args.ftype))
maybe_makedir(feat_dir)


def compute_feature(name):
    cmd = [
示例#27
0
def sph_to_wav(dirpath, filename, output_path):
    full_path = os.path.join(dirpath, filename)
    new_path = os.path.join(output_path, filename[:-3] + ".wav")
    sph = SPHFile(full_path)
    sph.write_wav(new_path)
    return new_path
示例#28
0
from sphfile import SPHFile
import glob
import os
""""Convert SPH file to wav"""

dialects_path = "/home/marcin/Pobrane/TIMIT"
root_dir = os.path.join(dialects_path, '**/*.WAV')
wav_files = glob.glob(root_dir, recursive=True)

for wav_file in wav_files:
    sph = SPHFile(wav_file)
    txt_file = ""
    txt_file = wav_file[:-3] + "TXT"

    f = open(txt_file,'r')
    for line in f:
        words = line.split(" ")
        start_time = (int(words[0])/16000)
        end_time = (int(words[1])/16000)
    print("writing file ", wav_file)
    sph.write_wav(wav_file.replace(".WAV",".wav"),start_time,end_time)
示例#29
0
from sphfile import SPHFile

parser = argparse.ArgumentParser()
parser.add_argument("src_dir")
parser.add_argument("dest_dir")
args = parser.parse_args()

src = Path(args.src_dir)
dest = Path(args.dest_dir)

for file_sph in (src / "sph").iterdir():
    file_stm = src / "stm" / file_sph.name.replace(".sph", ".stm")

    dest.mkdir(parents=True, exist_ok=True)

    sph = SPHFile(str(file_sph))
    sph.write_wav(str(dest / file_sph.name.replace(".sph", ".wav")))

    with open(file_stm, "r") as f:
        output = []
        ground_truth = []
        for line in f.readlines():
            line_parts = line.split()
            line = " ".join(line_parts[6:])
            if line != "ignore_time_segment_in_scoring":
                output.append(line)
                ground_truth.append(" ".join([line_parts[3], line_parts[4]] +
                                             line_parts[6:]))

    with open(str(dest / file_sph.name.replace(".sph", ".txt")), "w") as f:
        f.write("\n".join(output))
示例#30
0
def load_sph(f):
    from sphfile import SPHFile
    sph = SPHFile(f)
    return (sph.format['sample_rate'], sph.content)
示例#31
0
import os
import sys
#sys.path.append("tracker/deep_sort/deep_sort")
import numpy as np
from path import Path
from sphfile import SPHFile

for file_path in Path('speech_TEST/TIMIT_TEST').walkfiles('*.WAV'):

    save_path = os.path.dirname(os.path.dirname(
        os.path.dirname(file_path))) + '_WAV/' + os.path.basename(
            os.path.
            dirname(os.path.dirname(file_path))) + '/' + os.path.basename(
                os.path.dirname(file_path)) + '/' + os.path.basename(file_path)

    try:
        if not os.path.exists(os.path.dirname(save_path)):
            os.makedirs(os.path.dirname(save_path))
    except OSError:
        print('Error: Creating directory of data')

    sph = SPHFile(file_path)
    sph.write_wav(save_path)