import pathlib import soundfile import numpy import shutil shutil.rmtree('FDA', ignore_errors=True) root = pathlib.Path('FDA_orig') README = (root / 'README').read_text() sentences = { int(k): v for k, v in [l.strip().split(' ', 1) for l in (root / 'orthographic.index').open()] } dataset = jbof.create_dataset('FDA', {'README': README}) import itertools for file in itertools.chain(root.glob('rl/*.sig'), root.glob('sb/*.sig')): speech, speech_samplerate = soundfile.read(str(file), samplerate=20_000, channels=1, format='RAW', subtype='PCM_16', endian='BIG') laryngograph, laryngograph_samplerate = soundfile.read(str( file.with_suffix('.lar')), samplerate=20_000, channels=1, format='RAW', subtype='PCM_16',
import jbof import pathlib import numpy import shutil shutil.rmtree('QUT_NOISE', ignore_errors=True) root = pathlib.Path('QUT-NOISE_original') dataset = jbof.create_dataset( 'QUT_NOISE', { 'README': (root / 'docs' / 'README.text').read_text(), 'LICENSE': (root / 'QUT-NOISE' / 'LICENSE.txt').read_text() }) for noisefile in root.glob('QUT-NOISE/*.wav'): item = dataset.add_item(noisefile.stem, {}) item.add_array_from_file('signal', noisefile, {}) labelfile = (root / 'QUT-NOISE' / 'labels' / (noisefile.stem + '.lab.txt')) labels = [] for line in labelfile.open(): start, stop, label = line.split(maxsplit=2) labels.append((float(start), float(stop), label)) labels = numpy.array(labels, dtype=[('start', float), ('stop', float), ('label', 'U32')]) item.add_array('labels', labels, {}) impulsefile = (root / 'QUT-NOISE' / 'impulses' / (noisefile.stem + '.imp.txt'))
signal = defer(item).signal for algo in algos: task = defer(algo, signal, signal.metadata['samplerate']) tasklist.schedule(task, metadata=dict(item=item, dataset=datasetname, algo=algo)) for task in tqdm(tasklist.run(nprocesses=16, autokill=600), smoothing=0, desc='processing'): pass # works for all items and algos except YIN and TIMIT for SX136GCS0 and SI572DMT0 # collect all pitches into a new dataset: dataset = jbof.create_dataset('ground truth data') for task in tqdm(tasklist.done_tasks(), desc='collecting pitches'): source_item = task.metadata['item'] if isinstance(source_item, str): print(task._id, type(task._id), source_item, task.metadata['algo'].__name__) metadata = source_item.metadata metadata['speech_dataset'] = task.metadata['dataset'] metadata['noise_dataset'] = None metadata['algo'] = task.metadata['algo'].__name__ metadata['speech'] = source_item.name itemname = f'{metadata["speech_dataset"]}_{metadata["algo"]}_{metadata["speech"]}' if dataset.has_item(itemname): dataset.delete_item(itemname) item = dataset.add_item(name=itemname, metadata=metadata) results = task.returnvalue
import jbof import pathlib import soundfile import numpy from tqdm import tqdm import shutil shutil.rmtree('MOCHA-TIMIT_dataset', ignore_errors=True) root = pathlib.Path('MOCHA-TIMIT_original') README = (root / 'README_v1.2.txt').read_text() LICENSE = (root / 'LICENCE.txt').read_text() sentences = {int(k.strip('.')): v for k, v in [l.strip().split(' ', 1) for l in (root / 'mocha-timit.txt').open() if l.strip()]} dataset = jbof.create_dataset('MOCHA-TIMIT_dataset', {'README': README, 'LICENSE': LICENSE}) import itertools for file in tqdm(list(itertools.chain(root.glob('*/*.wav'), root.glob('unchecked/*/*.wav'))), smoothing=0): # wav: speech, speech_samplerate = soundfile.read(str(file)) # lar: laryngograph, laryngograph_samplerate = soundfile.read(str(file.with_suffix('.lar'))) # lab: labels = [] if file.with_suffix('.lab').exists(): with file.with_suffix('.lab').open() as f: for line in f: start, stop, label = line.split()
import jbof import pathlib import soundfile import numpy import shutil shutil.rmtree('CMU_Arctic', ignore_errors=True) root = pathlib.Path('CMU_Arctic_orig') README = (root / 'CMU_Arctic_Databases.html').read_text() dataset = jbof.create_dataset('CMU_Arctic', {'README': README}) for directory in root.iterdir(): if directory.is_file(): continue acronym = directory.name[7:10] prompts = {} for line in (directory / 'etc' / 'txt.done.data').open('rt'): key, sentence = line.strip('( )').split(maxsplit=1) prompts[key] = sentence.strip('"') for wave in directory.glob('wav/*.wav'): name = wave.stem if not name in prompts: print(acronym, name) item = dataset.add_item( f'{acronym}_{name}', metadata={'transcription': prompts.get(name, None)}) item.add_array_from_file('signal', wave)
import jbof import pathlib from collections import defaultdict import re import numpy import shutil shutil.rmtree('PTDB_TUG', ignore_errors=True) root = pathlib.Path('PTDB_TUG_orig') dataset = jbof.create_dataset('PTDB_TUG', { 'Recording Protocol': (root / 'RECORDING-PROTOCOL.txt').read_text(), 'Speaker Profiles': (root / 'SPEAKER-PROFILES.txt').read_text(), 'TIMIT Prompts': (root / 'TIMIT-PROMPTS.txt').read_text()}) sentences = {} pattern = re.compile('([^.?!]+[.?!]).*\(([a-z0-9]+)\)') for line in dataset.metadata['TIMIT Prompts'].split('\n'): if line.startswith(';') or not line: continue line, label = pattern.match(line).groups() sentences[label] = line speaker_profiles = defaultdict(dict) pattern = re.compile('([MF][0-9]{2})\s+([0-9]{2})\s+(Male|Female)\s+' '(Ireland|USA|Canada|England|South Africa)\s+' '(sa1,2 sx[0-9]+-[0-9]+\s+si[0-9]+-[0-9]+)\s*' '(.*)') for line in dataset.metadata['Speaker Profiles'].split('\n'): if line.startswith('Speaker') or line.startswith('-') or not line:
import jbof import pathlib import soundfile import numpy import shutil shutil.rmtree('KEELE', ignore_errors=True) root = pathlib.Path('KEELE_orig') README = (root / 'keele_pitch_database.htm').read_text() dataset = jbof.create_dataset('KEELE', { 'README': README}) for file in root.glob('*.pes'): # pet: text transcription metadata = {} transcription = [] with file.with_suffix('.pet').open() as f: for line in f: if line == '\n': pass elif not line.startswith('LBO'): key, value = line.split(':', 1) metadata[key] = value.strip() if key == 'SAM': metadata['samplerate'] = value.strip() else: _, line = line.split(':', 1) # discard 'LBO' start, _, stop, text = line.split(',', 3) transcription.append({'begin': int(start),
import jbof import pathlib import soundfile import numpy import re import shutil shutil.rmtree('TIMIT_dataset', ignore_errors=True) root = pathlib.Path('TIMIT_original') README = (root / 'README.DOC').read_text() dataset = jbof.create_dataset('TIMIT_dataset', {'README': README}) sentences = {} for line in (root / 'TIMIT/DOC/PROMPTS.TXT').open(): if line.startswith(';'): continue sentence, identifier = line.rsplit('(', maxsplit=1) identifier = identifier.strip(' ()\n') sentence = sentence.strip() sentences[identifier] = sentence speakers = {} dialects = { 1: 'New England', 2: 'Northern', 3: 'North Midland', 4: 'South Midland', 5: 'Southern', 6: 'New York City', 7: 'Western',
], m4nw0000=[ 63975, 124940, 221279, 268320, 326651, 352241, 417722, 474547, 520835, 569381, 627712 ], m5nw0000=[ 69754, 141758, 247965, 307369, 375323, 483330, 560735, 616088, 673692, 747496 ]) import KEELE import shutil shutil.rmtree('KEELE_mod', ignore_errors=True) dataset = jbof.create_dataset('KEELE_mod', {'README': KEELE.dataset.metadata['README']}) for item in KEELE.dataset.all_items(): metadata = item.metadata pitch = item.pitch pitch_sr = int(pitch.metadata['samplerate']) laryngograph = item.laryngograph laryngograph_sr = int(laryngograph.metadata['samplerate']) signal = item.signal signal_sr = int(signal.metadata['samplerate']) breaks = [0, *labels[item.name], len(signal)] for idx in range(len(breaks) - 1): signal_start = breaks[idx] signal_stop = breaks[idx + 1]