# -*- coding: utf-8 -*- from __future__ import unicode_literals, division, print_function #Py2 __author__ = "Daniel van Niekerk" __email__ = "*****@*****.**" import sys, os import copy from glob import glob import pylab as pl #sometimes the limit needs to be increased to pickle large utts... sys.setrecursionlimit(10000) #default is generally 1000 import ttslab from ttslab.hrg import Utterance ttslab.extend(Utterance, "ufuncs_analysis") import speechlabels as sl UTTDIR = "build/utts" UTTDIR2 = "build/qc_utts" RECDIR = "build/halign/labels" def parse_logl_from_recs(u, ul, phoneset, absl=False): frameperiod = 50000 closure_phone = phoneset.features["closure_phone"] phmap = phoneset.map assert u["file_id"] == ul.name
""" from __future__ import unicode_literals, division, print_function #Py2 __author__ = "Daniel van Niekerk" __email__ = "*****@*****.**" import shutil, os from tempfile import mkdtemp from collections import OrderedDict from copy import deepcopy import numpy as np import ttslab from ttslab.trackfile import Track ttslab.extend(Track, "tfuncs_analysis") import speechlabels as sl NONE_WORD = "NONE" WAV_EXT = "wav" FEAT_EXT = "featvecs" TEXTGRID_EXT = "TextGrid" SIG2FV = "sig2fv -coefs melcep -delta melcep -melcep_order 12 -fbank_order 24 -shift %(shift)s -factor 5.0 -preemph 0.97 -otype est %(inputfile)s -o %(outputfile)s" def fill_startendtimes(utt): """ Use 'end' time feature in segments to fill info for other items in other common relations.. """ #segments (are contiguous in time)...
import os import sys from collections import defaultdict from glob import glob import copy from tempfile import mkstemp from ConfigParser import ConfigParser import numpy as np from wav2psmfcc import PMExtractor from make_f0_praat_script import script_writer as F0_PSCWriter from make_f0_praat import f0filler as F0Filler import ttslab import ttslab.hrg as hrg ttslab.extend(hrg.Utterance, "ufuncs_analysis") from ttslab.trackfile import Track SAVE_COMPLETE_UTTS = True #sometimes the limit needs to be increased to pickle large utts... BIGGER_RECURSION_LIMIT = 20000 #default is generally 1000 WAV_DIR = "wavs" PM_DIR = "pm" LPC_DIR = "lpc" F0_DIR = "f0" MCEP_DIR = "mcep" JOIN_DIR = "joincoef" UTT_DIR = "utts" COMPLETE_UTT_DIR = "complete_utts"
import networkx as nx import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt, mpld3 import numpy as np from tempfile import mkdtemp try: from cStringIO import StringIO except ImportError: from StringIO import StringIO #Py3 import scipy.signal as ss import ttslab from ttslab import hrg from ttslab.trackfile import Track ttslab.extend(hrg.Utterance, "ufuncs_analysis") ttslab.extend(Track, "ttslab.trackfile.funcs.tfuncs_praat") def itemrepr(item): if "tone" in item: tone = item["tone"].encode('ascii', 'ignore') name = unicodedata.normalize("NFKD", item["name"]).encode('ascii', 'ignore') if tone is not None and name == "syl": return tone else: return name else: return unicodedata.normalize("NFKD", item["name"]).encode('ascii', 'ignore')
especially if the voice is slightly hoarse... """ from __future__ import unicode_literals, division, print_function #Py2 __author__ = "Daniel van Niekerk" __email__ = "*****@*****.**" import sys import array import math import numpy as np import ttslab from ttslab.trackfile import Track ttslab.extend(Track, "ttslab.trackfile.funcs.tfuncs_praat") def friendly_log(f): try: return math.log(f) except ValueError: return float('-1e+10') if __name__ == "__main__": fn = sys.argv[1] outfn = sys.argv[2] minf0 = float(sys.argv[3]) maxf0 = float(sys.argv[4]) t = Track() t.get_f0(fn, minpitch=minf0, maxpitch=maxf0, timestep=0.005, fixocterrs=True) #timestep hardcoded here because of hack below...
# -*- coding: utf-8 -*- """ Implements a number of functions used to analyse utterances (research purposes)... """ from __future__ import unicode_literals, division, print_function #Py2 __author__ = "Daniel van Niekerk" __email__ = "*****@*****.**" import shutil, os from tempfile import mkdtemp from collections import OrderedDict import ttslab from ttslab.trackfile import Track ttslab.extend(Track, "tfuncs_analysis") import speechlabels as sl NONE_WORD = "NONE" WAV_EXT = "wav" FEAT_EXT = "featvecs" TEXTGRID_EXT = "TextGrid" SIG2FV = "sig2fv -coefs melcep -delta melcep -melcep_order 12 -fbank_order 24 -shift 0.005 -factor 5.0 -preemph 0.97 -otype est %(inputfile)s -o %(outputfile)s" def fill_startendtimes(utt): """ Use 'end' time feature in segments to fill info for other items in other common relations.. """ #segments (are contiguous in time)...
# -*- coding: utf-8 -*- """ Functions to create HTS labels for synthesis... See: lab_format.pdf in reference HTS training scripts... DEMITASSE: I have renamed the "syllable stress" feature to "tone" """ from __future__ import unicode_literals, division, print_function #Py2 __author__ = "Daniel van Niekerk" __email__ = "*****@*****.**" import ttslab import ttslab.hrg as hrg ttslab.extend(hrg.Item, "ttslab.ifuncs.synthcontext") NONE_STRING = "xxx" def float_to_htk_int(string): """ Converts a string representing a floating point number to an integer (time in 100ns units)... """ try: return int(round(float(string) * 10000000)) except: print(string) raise def htk_int_to_float(string): """ Converts a string representing an integer (time in 100ns units)
# -*- coding: utf-8 -*- """ Functions to create HTS labels for synthesis... See: lab_format.pdf in reference HTS training scripts... """ from __future__ import unicode_literals, division, print_function #Py2 __author__ = "Daniel van Niekerk" __email__ = "*****@*****.**" import ttslab import ttslab.hrg as hrg ttslab.extend(hrg.Item, "ttslab.funcs.ifuncs_hts") NONE_STRING = "xxx" def float_to_htk_int(string): """ Converts a string representing a floating point number to an integer (time in 100ns units)... """ try: return int(round(float(string)*10000000)) except: print(string) raise def htk_int_to_float(string): """ Converts a string representing an integer (time in 100ns units) to floating point value (time in seconds)... """ return float(string) / 10000000.0