def analyze_formants(corpus_context, sound_file, sound_file_path): if getattr(corpus_context.config, 'praat_path', None) is not None: formant_function = partial(PraatFormants, praatpath=corpus_context.config.praat_path, max_freq=5500, num_formants=5, win_len=0.025, time_step=0.01) algorithm = 'praat' else: formant_function = partial(ASFormants, max_freq=5500, num_formants=5, win_len=0.025, time_step=0.01) algorithm = 'acousticsim' if os.path.isdir(sound_file_path): path_mapping = [(os.path.join(sound_file_path, x), ) for x in os.listdir(sound_file_path)] cache = generate_cache(path_mapping, formant_function, None, default_njobs(), None, None) for k, v in cache.items(): name = os.path.basename(k) name = os.path.splitext(name)[0] _, begin, end = name.split('-') begin = float(begin) - padding if begin < 0: begin = 0 end = float(end) for timepoint, value in v.items(): timepoint += begin # true timepoint f1, f2, f3 = sanitize_formants(value) f = Formants(sound_file=sound_file, time=timepoint, F1=f1, F2=f2, F3=f3, source=algorithm) corpus_context.sql_session.add(f) else: formants = formant_function(sound_file_path) for timepoint, value in formants.items(): f1, f2, f3 = sanitize_formants(value) f = Formants(sound_file=sound_file, time=timepoint, F1=f1, F2=f2, F3=f3, source=algorithm) corpus_context.sql_session.add(f)
def analyze_pitch(corpus_context, sound_file, sound_file_path): if getattr(corpus_context.config, 'reaper_path', None) is not None: pitch_function = partial(ReaperPitch, reaper = corpus_context.config.reaper_path, time_step = 0.01, freq_lims = (75,500)) algorithm = 'reaper' elif getattr(corpus_context.config, 'praat_path', None) is not None: pitch_function = partial(PraatPitch, praatpath = corpus_context.config.praat_path, time_step = 0.01, freq_lims = (75,500)) algorithm = 'praat' else: pitch_function = partial(ASPitch, time_step = 0.01, freq_lims = (75,500)) algorithm = 'acousticsim' if os.path.isdir(sound_file_path): path_mapping = [(os.path.join(sound_file_path, x),) for x in os.listdir(sound_file_path)] cache = generate_cache(path_mapping, pitch_function, None, default_njobs(), None, None) for k, v in cache.items(): name = os.path.basename(k) name = os.path.splitext(name)[0] _, begin, end = name.split('-') begin = float(begin) - padding if begin < 0: begin = 0 end = float(end) for timepoint, value in v.items(): timepoint += begin # true timepoint try: value = value[0] except TypeError: pass p = Pitch(sound_file = sound_file, time = timepoint, F0 = value, source = algorithm) corpus_context.sql_session.add(p) else: pitch = pitch_function(sound_file_path) pitch.process() for timepoint, value in pitch.items(): try: value = value[0] except TypeError: pass p = Pitch(sound_file = sound_file, time = timepoint, F0 = value, source = algorithm) corpus_context.sql_session.add(p)
def analyze_formants(corpus_context, sound_file, sound_file_path): if getattr(corpus_context.config, 'praat_path', None) is not None: formant_function = partial(PraatFormants, praatpath = corpus_context.config.praat_path, max_freq = 5500, num_formants = 5, win_len = 0.025, time_step = 0.01) algorithm = 'praat' else: formant_function = partial(ASFormants, max_freq = 5500, num_formants = 5, win_len = 0.025, time_step = 0.01) algorithm = 'acousticsim' if os.path.isdir(sound_file_path): path_mapping = [(os.path.join(sound_file_path, x),) for x in os.listdir(sound_file_path)] cache = generate_cache(path_mapping, formant_function, None, default_njobs(), None, None) for k, v in cache.items(): name = os.path.basename(k) name = os.path.splitext(name)[0] _, begin, end = name.split('-') begin = float(begin) - padding if begin < 0: begin = 0 end = float(end) for timepoint, value in v.items(): timepoint += begin # true timepoint f1, f2, f3 = sanitize_formants(value) f = Formants(sound_file = sound_file, time = timepoint, F1 = f1, F2 = f2, F3 = f3, source = algorithm) corpus_context.sql_session.add(f) else: formants = formant_function(sound_file_path) for timepoint, value in formants.items(): f1, f2, f3 = sanitize_formants(value) f = Formants(sound_file = sound_file, time = timepoint, F1 = f1, F2 = f2, F3 = f3, source = algorithm) corpus_context.sql_session.add(f)
def analyze_formants(corpus_context, sound_file): """ Analyzes the formants using different algorithms based on the corpus the sound file is from Parameters ---------- corpus_context : : class: `polyglotdb.corpus.BaseContext` the type of corpus sound_file : : class: `polyglotdb.sql.models.SoundFile` the .wav sound file """ algorithm = corpus_context.config.formant_algorithm if algorithm == 'praat': if getattr(corpus_context.config, 'praat_path', None) is not None: formant_function = partial( PraatFormants, praatpath=corpus_context.config.praat_path, max_freq=5500, num_formants=5, win_len=0.025, time_step=0.01) else: return else: formant_function = partial(ASFormants, max_freq=5500, num_formants=5, win_len=0.025, time_step=0.01) if sound_file.duration > 5: atype = corpus_context.hierarchy.highest prob_utt = getattr(corpus_context, atype) q = corpus_context.query_graph(prob_utt) q = q.filter( prob_utt.discourse.name == sound_file.discourse.name).times() utterances = q.all() outdir = corpus_context.config.temporary_directory( sound_file.discourse.name) path_mapping = [] for i, u in enumerate(utterances): outpath = os.path.join( outdir, 'temp-{}-{}.wav'.format(u['begin'], u['end'])) if not os.path.exists(outpath): extract_audio(sound_file.filepath, outpath, u['begin'], u['end'], padding=padding) path_mapping.append((outpath, )) cache = generate_cache(path_mapping, formant_function, None, default_njobs() - 1, None, None) for k, v in cache.items(): name = os.path.basename(k) name = os.path.splitext(name)[0] _, begin, end = name.split('-') begin = float(begin) - padding if begin < 0: begin = 0 end = float(end) for timepoint, value in v.items(): timepoint += begin # true timepoint f1, f2, f3 = sanitize_formants(value) f = Formants(sound_file=sound_file, time=timepoint, F1=f1, F2=f2, F3=f3, source=algorithm) corpus_context.sql_session.add(f) else: formants = formant_function(sound_file.filepath) for timepoint, value in formants.items(): f1, f2, f3 = sanitize_formants(value) f = Formants(sound_file=sound_file, time=timepoint, F1=f1, F2=f2, F3=f3, source=algorithm) corpus_context.sql_session.add(f)
def analyze_pitch(corpus_context, sound_file): """ Analyzes the pitch using different algorithms based on the corpus the sound file is from Parameters ---------- corpus_context : : class: `polyglotdb.corpus.BaseContext` the type of corpus sound_file : : class: `polyglotdb.sql.models.SoundFile` the .wav sound file """ algorithm = corpus_context.config.pitch_algorithm if algorithm == 'reaper': if getattr(corpus_context.config, 'reaper_path', None) is not None: pitch_function = partial(ReaperPitch, reaper=corpus_context.config.reaper_path, time_step=0.01, freq_lims=(75, 500)) else: return elif algorithm == 'praat': if getattr(corpus_context.config, 'praat_path', None) is not None: pitch_function = partial( PraatPitch, praatpath=corpus_context.config.praat_path, time_step=0.01, freq_lims=(75, 500)) else: return else: pitch_function = partial(ASPitch, time_step=0.01, freq_lims=(75, 500)) if sound_file.duration > 5: atype = corpus_context.hierarchy.highest prob_utt = getattr(corpus_context, atype) q = corpus_context.query_graph(prob_utt) q = q.filter( prob_utt.discourse.name == sound_file.discourse.name).times() utterances = q.all() outdir = corpus_context.config.temporary_directory( sound_file.discourse.name) for i, u in enumerate(utterances): outpath = os.path.join( outdir, 'temp-{}-{}.wav'.format(u['begin'], u['end'])) if not os.path.exists(outpath): extract_audio(sound_file.filepath, outpath, u['begin'], u['end'], padding=padding * 3) path_mapping = [(os.path.join(outdir, x), ) for x in os.listdir(outdir)] try: cache = generate_cache(path_mapping, pitch_function, None, default_njobs() - 1, None, None) except FileNotFoundError: return for k, v in cache.items(): name = os.path.basename(k) name = os.path.splitext(name)[0] _, begin, end = name.split('-') begin = float(begin) - padding * 3 if begin < 0: begin = 0 end = float(end) for timepoint, value in v.items(): timepoint += begin # true timepoint try: value = value[0] except TypeError: pass p = Pitch(sound_file=sound_file, time=timepoint, F0=value, source=algorithm) corpus_context.sql_session.add(p) else: try: pitch = pitch_function(sound_file.filepath) except FileNotFoundError: return for timepoint, value in pitch.items(): try: value = value[0] except TypeError: pass p = Pitch(sound_file=sound_file, time=timepoint, F0=value, source=algorithm) corpus_context.sql_session.add(p) corpus_context.sql_session.flush()
def analyze_formants(corpus_context, sound_file): """ Analyzes the formants using different algorithms based on the corpus the sound file is from Parameters ---------- corpus_context : : class: `polyglotdb.corpus.BaseContext` the type of corpus sound_file : : class: `polyglotdb.sql.models.SoundFile` the .wav sound file """ algorithm = corpus_context.config.formant_algorithm if algorithm == 'praat': if getattr(corpus_context.config, 'praat_path', None) is not None: formant_function = partial(PraatFormants, praatpath = corpus_context.config.praat_path, max_freq = 5500, num_formants = 5, win_len = 0.025, time_step = 0.01) else: return else: formant_function = partial(ASFormants, max_freq = 5500, num_formants = 5, win_len = 0.025, time_step = 0.01) if sound_file.duration > 5: atype = corpus_context.hierarchy.highest prob_utt = getattr(corpus_context, atype) q = corpus_context.query_graph(prob_utt) q = q.filter(prob_utt.discourse.name == sound_file.discourse.name).times() utterances = q.all() outdir = corpus_context.config.temporary_directory(sound_file.discourse.name) path_mapping = [] for i, u in enumerate(utterances): outpath = os.path.join(outdir, 'temp-{}-{}.wav'.format(u['begin'], u['end'])) if not os.path.exists(outpath): extract_audio(sound_file.filepath, outpath, u['begin'], u['end'], padding = padding) path_mapping.append((outpath,)) cache = generate_cache(path_mapping, formant_function, None, default_njobs() - 1, None, None) for k, v in cache.items(): name = os.path.basename(k) name = os.path.splitext(name)[0] _, begin, end = name.split('-') begin = float(begin) - padding if begin < 0: begin = 0 end = float(end) for timepoint, value in v.items(): timepoint += begin # true timepoint f1, f2, f3 = sanitize_formants(value) f = Formants(sound_file = sound_file, time = timepoint, F1 = f1, F2 = f2, F3 = f3, source = algorithm) corpus_context.sql_session.add(f) else: formants = formant_function(sound_file.filepath) for timepoint, value in formants.items(): f1, f2, f3 = sanitize_formants(value) f = Formants(sound_file = sound_file, time = timepoint, F1 = f1, F2 = f2, F3 = f3, source = algorithm) corpus_context.sql_session.add(f)
def analyze_pitch(corpus_context, sound_file): """ Analyzes the pitch using different algorithms based on the corpus the sound file is from Parameters ---------- corpus_context : : class: `polyglotdb.corpus.BaseContext` the type of corpus sound_file : : class: `polyglotdb.sql.models.SoundFile` the .wav sound file """ algorithm = corpus_context.config.pitch_algorithm if algorithm == 'reaper': if getattr(corpus_context.config, 'reaper_path', None) is not None: pitch_function = partial(ReaperPitch, reaper = corpus_context.config.reaper_path, time_step = 0.01, freq_lims = (75,500)) else: return elif algorithm == 'praat': if getattr(corpus_context.config, 'praat_path', None) is not None: pitch_function = partial(PraatPitch, praatpath = corpus_context.config.praat_path, time_step = 0.01, freq_lims = (75,500)) else: return else: pitch_function = partial(ASPitch, time_step = 0.01, freq_lims = (75,500)) if sound_file.duration > 5: atype = corpus_context.hierarchy.highest prob_utt = getattr(corpus_context, atype) q = corpus_context.query_graph(prob_utt) q = q.filter(prob_utt.discourse.name == sound_file.discourse.name).times() utterances = q.all() outdir = corpus_context.config.temporary_directory(sound_file.discourse.name) for i, u in enumerate(utterances): outpath = os.path.join(outdir, 'temp-{}-{}.wav'.format(u['begin'], u['end'])) if not os.path.exists(outpath): extract_audio(sound_file.filepath, outpath, u['begin'], u['end'], padding = padding * 3) path_mapping = [(os.path.join(outdir, x),) for x in os.listdir(outdir)] try: cache = generate_cache(path_mapping, pitch_function, None, default_njobs() - 1, None, None) except FileNotFoundError: return for k, v in cache.items(): name = os.path.basename(k) name = os.path.splitext(name)[0] _, begin, end = name.split('-') begin = float(begin) - padding * 3 if begin < 0: begin = 0 end = float(end) for timepoint, value in v.items(): timepoint += begin # true timepoint try: value = value[0] except TypeError: pass p = Pitch(sound_file = sound_file, time = timepoint, F0 = value, source = algorithm) corpus_context.sql_session.add(p) else: try: pitch = pitch_function(sound_file.filepath) except FileNotFoundError: return for timepoint, value in pitch.items(): try: value = value[0] except TypeError: pass p = Pitch(sound_file = sound_file, time = timepoint, F0 = value, source = algorithm) corpus_context.sql_session.add(p) corpus_context.sql_session.flush()
def acoustic_similarity_mapping(path_mapping, **kwargs): """Takes in an explicit mapping of full paths to .wav files to have acoustic similarity computed. Parameters ---------- path_mapping : iterable of iterables Explicit mapping of full paths of .wav files, in the form of a list of tuples to be compared. rep : {'envelopes','mfcc'}, optional The type of representation to convert the wav files into before comparing for similarity. Amplitude envelopes will be computed when 'envelopes' is specified, and MFCCs will be computed when 'mfcc' is specified (default). match_function : {'dtw', 'xcorr'}, optional How similarity/distance will be calculated. Defaults to 'dtw' to use Dynamic Time Warping (can be slower) to compute distance. Cross-correlation can be specified with 'xcorr', which computes distance as the inverse of a maximum cross-correlation value between 0 and 1. num_filters : int, optional The number of frequency filters to use when computing representations. Defaults to 8 for amplitude envelopes and 26 for MFCCs. num_coeffs : int, optional The number of coefficients to use for MFCCs (not used for amplitude envelopes). Default is 20, which captures speaker- specific information, whereas 12 would be more speaker-independent. freq_lims : tuple, optional A tuple of the minimum frequency and maximum frequency in Hertz to use for computing representations. Defaults to (80, 7800) following Lewandowski (2012). output_sim : bool, optional If true (default), the function will return similarities (inverse distance). If false, distance measures will be returned instead. Returns ------- list of tuples Returns a list of tuples corresponding to the `path_mapping` input, with a new final element in the tuple being the similarity/distance score for that mapping. """ stop_check = kwargs.get('stop_check',None) call_back = kwargs.get('call_back',None) rep = kwargs.get('rep','mfcc') if callable(rep): to_rep = rep else: to_rep = _build_to_rep(**kwargs) if kwargs.get('use_multi',False): num_cores = kwargs.get('num_cores', 1) if num_cores == 0: num_cores = int((3*cpu_count())/4) else: num_cores = 1 output_sim = kwargs.get('output_sim',False) match_function = kwargs.get('match_function', 'dtw') cache = kwargs.get('cache',None) if isinstance(match_function, str): if match_function == 'xcorr': dist_func = xcorr_distance elif match_function == 'dct': dist_func = dct_distance else: dist_func = dtw_distance elif callable(match_function): dist_func = match_function attributes = kwargs.get('attributes',dict()) if cache is None: cache = generate_cache(path_mapping, to_rep, attributes, num_cores, call_back, stop_check) asim = calc_asim(path_mapping,cache,dist_func, output_sim,num_cores, call_back, stop_check) if kwargs.get('return_rep',False): return asim, cache return asim
def analyze_pitch(corpus_context, sound_file, sound_file_path): if getattr(corpus_context.config, 'reaper_path', None) is not None: pitch_function = partial(ReaperPitch, reaper=corpus_context.config.reaper_path, time_step=0.01, freq_lims=(75, 500)) algorithm = 'reaper' if corpus_context.config.reaper_path is None: return elif getattr(corpus_context.config, 'praat_path', None) is not None: pitch_function = partial(PraatPitch, praatpath=corpus_context.config.praat_path, time_step=0.01, freq_lims=(75, 500)) algorithm = 'praat' else: pitch_function = partial(ASPitch, time_step=0.01, freq_lims=(75, 500)) algorithm = 'acousticsim' if os.path.isdir(sound_file_path): path_mapping = [(os.path.join(sound_file_path, x), ) for x in os.listdir(sound_file_path)] try: cache = generate_cache(path_mapping, pitch_function, None, default_njobs(), None, None) except FileNotFoundError: return for k, v in cache.items(): name = os.path.basename(k) name = os.path.splitext(name)[0] _, begin, end = name.split('-') begin = float(begin) - padding if begin < 0: begin = 0 end = float(end) for timepoint, value in v.items(): timepoint += begin # true timepoint try: value = value[0] except TypeError: pass p = Pitch(sound_file=sound_file, time=timepoint, F0=value, source=algorithm) corpus_context.sql_session.add(p) else: try: pitch = pitch_function(sound_file_path) except FileNotFoundError: return for timepoint, value in pitch.items(): try: value = value[0] except TypeError: pass p = Pitch(sound_file=sound_file, time=timepoint, F0=value, source=algorithm) corpus_context.sql_session.add(p) corpus_context.sql_session.flush()