def analyze_intensity(corpus_context, source='praat', call_back=None, stop_check=None): """ Analyze intensity of an entire utterance, and save the resulting intensity tracks into the database. Parameters ---------- corpus_context : :class:`~polyglot.corpus.context.CorpusContext` corpus context to use call_back : callable call back function, optional stop_check : function stop check function, optional """ segment_mapping = generate_utterance_segments( corpus_context, padding=PADDING).grouped_mapping('speaker') if call_back is not None: call_back('Analyzing files...') for i, (speaker, v) in enumerate(segment_mapping.items()): gender = None try: q = corpus_context.query_speakers().filter( corpus_context.speaker.name == speaker) q = q.columns(corpus_context.speaker.gender.column_name('Gender')) gender = q.all()[0]['Gender'] except SpeakerAttributeError: pass intensity_function = generate_base_intensity_function(corpus_context) output = analyze_segments(v, intensity_function, stop_check=stop_check) corpus_context.save_intensity_tracks(output, speaker)
def test_analyze_file_segments_reaper(acoustic_corpus_path, reaper_func): mapping = SegmentMapping() seg = (acoustic_corpus_path, 1, 2, 0) mapping.add_file_segment(*seg) output = analyze_segments(mapping, reaper_func) for k in output.keys(): print(sorted(output[k].keys())) assert (all(x >= 1 for x in output[k].keys())) assert (all(x <= 2 for x in output[k].keys())) mapping[0].properties['padding'] = 0.5 output = analyze_segments(mapping, reaper_func) for k in output.keys(): print(sorted(output[k].keys())) assert (all(x >= 1 for x in output[k].keys())) assert (all(x <= 2 for x in output[k].keys()))
def analyze_track_script(corpus_context, acoustic_name, properties, script_path, duration_threshold=0.01, phone_class=None, arguments=None, call_back=None, file_type='consonant', stop_check=None, multiprocessing=True): if file_type not in ['consonant', 'vowel', 'low_freq']: raise ValueError('File type must be one of: consonant, vowel, or low_freq') if acoustic_name not in corpus_context.hierarchy.acoustics: corpus_context.hierarchy.add_acoustic_properties(corpus_context, acoustic_name, properties) corpus_context.encode_hierarchy() if call_back is not None: call_back('Analyzing phones...') if phone_class is None: segment_mapping = generate_utterance_segments(corpus_context, padding=PADDING) else: segment_mapping = generate_segments(corpus_context, corpus_context.phone_name, phone_class, file_type=file_type, padding=PADDING, duration_threshold=duration_threshold) segment_mapping = segment_mapping.grouped_mapping('speaker') praat_path = corpus_context.config.praat_path script_function = generate_praat_script_function(praat_path, script_path, arguments=arguments) for i, ((speaker,), v) in enumerate(segment_mapping.items()): output = analyze_segments(v, script_function, stop_check=stop_check, multiprocessing=multiprocessing) corpus_context.save_acoustic_tracks(acoustic_name, output, speaker)
def test_analyze_file_segments_reaper(acoustic_corpus_path, reaper_func): mapping = SegmentMapping() seg = (acoustic_corpus_path, 1, 2, 0) mapping.add_file_segment(*seg) output = analyze_segments(mapping, reaper_func, multiprocessing=False) for k in output.keys(): print(sorted(output[k].keys())) assert (all(x >= 1 for x in output[k].keys())) assert (all(x <= 2 for x in output[k].keys())) mapping[0].properties['padding'] = 0.5 output = analyze_segments(mapping, reaper_func, multiprocessing=False) for k in output.keys(): print(sorted(output[k].keys())) assert (all(x >= 1 for x in output[k].keys())) assert (all(x <= 2 for x in output[k].keys()))
def analyze_intensity(corpus_context, source='praat', call_back=None, stop_check=None, multiprocessing=True): """ Analyze intensity of an entire utterance, and save the resulting intensity tracks into the database. Parameters ---------- corpus_context : :class:`~polyglot.corpus.context.CorpusContext` corpus context to use call_back : callable call back function, optional stop_check : function stop check function, optional """ segment_mapping = generate_utterance_segments(corpus_context, padding=PADDING, file_type='consonant') segment_mapping = segment_mapping.grouped_mapping('speaker') if call_back is not None: call_back('Analyzing files...') if 'intensity' not in corpus_context.hierarchy.acoustics: corpus_context.hierarchy.add_acoustic_properties( corpus_context, 'intensity', [('Intensity', float)]) corpus_context.encode_hierarchy() for i, ((speaker, ), v) in enumerate(segment_mapping.items()): intensity_function = generate_base_intensity_function(corpus_context) output = analyze_segments(v, intensity_function, stop_check=stop_check, multiprocessing=multiprocessing) corpus_context.save_acoustic_tracks('intensity', output, speaker)
def analyze_formant_tracks(corpus_context, vowel_label=None, source='praat', call_back=None, stop_check=None, multiprocessing=True): """ Analyze formants of an entire utterance, and save the resulting formant tracks into the database. Parameters ---------- corpus_context : CorpusContext corpus context to use vowel_label : str, optional The subset of phones to analyze. call_back : callable call back function, optional stop_check : callable stop check function, optional """ if vowel_label is None: segment_mapping = generate_utterance_segments(corpus_context, padding=PADDING) else: if not corpus_context.hierarchy.has_type_subset('phone', vowel_label): raise Exception( 'Phones do not have a "{}" subset.'.format(vowel_label)) segment_mapping = generate_vowel_segments(corpus_context, padding=0, vowel_label=vowel_label) if 'formants' not in corpus_context.hierarchy.acoustics: corpus_context.hierarchy.add_acoustic_properties( corpus_context, 'formants', [('F1', float), ('F2', float), ('F3', float)]) corpus_context.encode_hierarchy() segment_mapping = segment_mapping.grouped_mapping('speaker') if call_back is not None: call_back('Analyzing files...') for i, ((speaker, ), v) in enumerate(segment_mapping.items()): gender = None try: q = corpus_context.query_speakers().filter( corpus_context.speaker.name == speaker) q = q.columns(corpus_context.speaker.gender.column_name('Gender')) gender = q.all()[0]['Gender'] except SpeakerAttributeError: pass if gender is not None: formant_function = generate_base_formants_function(corpus_context, gender=gender, source=source) else: formant_function = generate_base_formants_function(corpus_context, source=source) output = analyze_segments(v, formant_function, stop_check=stop_check, multiprocessing=multiprocessing) corpus_context.save_acoustic_tracks('formants', output, speaker)
def analyze_script(corpus_context, phone_class, script_path, duration_threshold=0.01, arguments=None, call_back=None, stop_check=None, multiprocessing=True): """ Perform acoustic analysis of phones using an input praat script. Saves the measurement results from the praat script into the database under the same names as the Praat output columns Praat script requirements: -the only input is the full path to the soundfile containing (only) the phone -the script prints the output to the Praat Info window in two rows (i.e. two lines). -the first row is a space-separated list of measurement names: these are the names that will be saved into the database -the second row is a space-separated list of the value for each measurement Parameters ---------- corpus_context : :class:`~polyglot.corpus.context.CorpusContext` corpus context to use phone_class : str the name of an already encoded phone class, on which the analysis will be run script_path : str full path to the praat script arguments : list a list containing any arguments to the praat script (currently not working) call_back : callable call back function, optional stop_check : callable stop check function, optional """ # print("analyzing sibilants") if call_back is not None: call_back('Analyzing phones...') directory = corpus_context.config.temporary_directory('csv') csv_name = 'analyze_script_import.csv' needs_header = True output_types = {} header = ['id', 'begin', 'end'] time_section = time.time() segment_mapping = generate_segments(corpus_context, corpus_context.phone_name, phone_class, file_type='consonant', padding=0, duration_threshold=duration_threshold) if call_back is not None: call_back("generate segments took: " + str(time.time() - time_section)) praat_path = corpus_context.config.praat_path script_function = generate_praat_script_function(praat_path, script_path, arguments=arguments) time_section = time.time() output = analyze_segments(segment_mapping.segments, script_function, stop_check=stop_check, multiprocessing=multiprocessing) if call_back is not None: call_back("time analyzing segments: " + str(time.time() - time_section)) header = sorted(list(output.values())[0].keys()) header_info = {h: float for h in header} point_measures_to_csv(corpus_context, output, header) point_measures_from_csv(corpus_context, header_info) return [x for x in header if x != 'id']
def extract_and_save_formant_tracks(corpus_context, data, num_formants=False, stop_check=None, multiprocessing=True): '''This function takes a dictionary with the best parameters for each vowels, then recalculates the formants as tracks rather than as points''' #Dictionary of segment mapping objects where each n_formants has its own segment mapping object segment_mappings = {} save_padding = 0.02 for k, v in data.items(): k.begin -= save_padding k.end += save_padding if "num_formants" in v: n_formants = v["num_formants"] else: #There was not enough samples, so we use the default n n_formants = 5 if not n_formants in segment_mappings: segment_mappings[n_formants] = SegmentMapping() segment_mappings[n_formants].segments.append(k) outputs = {} for n_formants in segment_mappings: func = PraatSegmentFormantTrackFunction( praat_path=corpus_context.config.praat_path, max_frequency=5500, num_formants=n_formants, window_length=0.025, time_step=0.01) output = analyze_segments( segment_mappings[n_formants], func, stop_check=stop_check, multiprocessing=multiprocessing) # Analyze the phone outputs.update(output) formant_tracks = ['F1', 'F2', 'F3', 'B1', 'B2', 'B3'] tracks = {} for k, v in outputs.items(): vowel_id = k.properties["id"] track = Track() for time, formants in v.items(): tp = TimePoint(time) for f in formant_tracks: tp.add_value(f, formants[f]) track.add(tp) if not k["speaker"] in tracks: tracks[k["speaker"]] = {} tracks[k["speaker"]][k] = track if 'formants' not in corpus_context.hierarchy.acoustics: corpus_context.hierarchy.add_acoustic_properties( corpus_context, 'formants', [(x, float) for x in formant_tracks]) for speaker, track_dict in tracks.items(): corpus_context.save_acoustic_tracks('formants', track_dict, speaker)
def analyze_discourse_pitch(corpus_context, discourse, pitch_source='praat', min_pitch=50, max_pitch=500, **kwargs): print(kwargs) segments = [] statement = '''MATCH (s:Speaker:{corpus_name})-[r:speaks_in]->(d:Discourse:{corpus_name}) WHERE d.name = {{discourse_name}} RETURN d, s, r'''.format( corpus_name=corpus_context.cypher_safe_name) results = corpus_context.execute_cypher(statement, discourse_name=discourse) segment_mapping = SegmentMapping() for r in results: channel = r['r']['channel'] speaker = r['s']['name'] discourse = r['d']['name'] file_path = r['d']['vowel_file_path'] atype = corpus_context.hierarchy.highest prob_utt = getattr(corpus_context, atype) q = corpus_context.query_graph(prob_utt) q = q.filter(prob_utt.discourse.name == discourse) q = q.filter(prob_utt.speaker.name == speaker) utterances = q.all() for u in utterances: segment_mapping.add_file_segment(file_path, u.begin, u.end, channel, padding=PADDING) path = None if pitch_source == 'praat': path = corpus_context.config.praat_path # kwargs = {'silence_threshold': 0.03, # 'voicing_threshold': 0.45, 'octave_cost': 0.01, 'octave_jump_cost': 0.35, # 'voiced_unvoiced_cost': 0.14} elif pitch_source == 'reaper': path = corpus_context.config.reaper_path pitch_function = generate_pitch_function(pitch_source, min_pitch, max_pitch, path=path, pulses=True) track = {} pulses = set() output = analyze_segments(segments, pitch_function) print(output) for v in output.values(): track.update(v[0]) pulses.update(v[1]) return track, sorted(pulses)
def analyze_vowel_formant_tracks(corpus_context, source='praat', call_back=None, stop_check=None, vowel_label='vowel', multiprocessing=True): """ Analyze formants of individual vowels, and save the resulting formant tracks into the database for each phone. Parameters ---------- corpus_context : CorpusContext corpus context to use call_back : callable call back function, optional stop_check : callable stop check function, optional vowel_label : str The subset of phones to analyze. """ if not corpus_context.hierarchy.has_type_subset('phone', vowel_label): raise Exception( 'Phones do not have a "{}" subset.'.format(vowel_label)) # gets segment mapping of phones that are vowels segment_mapping = generate_vowel_segments( corpus_context, padding=0, vowel_label=vowel_label).grouped_mapping('speaker') if call_back is not None: call_back('Analyzing files...') # goes through each phone and: makes a formant function, analyzes the phone, and saves the tracks for i, ((speaker, ), v) in enumerate(segment_mapping.items()): gender = None try: q = corpus_context.query_speakers().filter( corpus_context.speaker.name == speaker) q = q.columns(corpus_context.speaker.gender.column_name('Gender')) gender = q.all()[0]['Gender'] except SpeakerAttributeError: pass if gender is not None: formant_function = generate_base_formants_function(corpus_context, gender=gender, source=source) else: formant_function = generate_base_formants_function(corpus_context, source=source) output = analyze_segments(v, formant_function, stop_check=stop_check, multiprocessing=multiprocessing) corpus_context.save_formant_tracks(output, speaker) if 'formants' not in corpus_context.hierarchy.acoustics: corpus_context.hierarchy.acoustics.add('formants') corpus_context.encode_hierarchy()
def analyze_formant_points(corpus_context, call_back=None, stop_check=None, vowel_label='vowel', duration_threshold=None, multiprocessing=True): """First pass of the algorithm; generates prototypes. Parameters ---------- corpus_context : :class:`polyglot.corpus.context.CorpusContext` The CorpusContext object of the corpus. call_back : callable Information about callback. stop_check : string Information about stop check. vowel_label : str The subset of phones to analyze. duration_threshold : float, optional Segments with length shorter than this value (in milliseconds) will not be analyzed. Returns ------- dict Track data """ # ------------- Step 1: Prototypes ------------- if not corpus_context.hierarchy.has_type_subset('phone', vowel_label): raise Exception( 'Phones do not have a "{}" subset.'.format(vowel_label)) # Gets segment mapping of phones that are vowels segment_mapping = generate_vowel_segments( corpus_context, duration_threshold=duration_threshold, padding=.25, vowel_label=vowel_label) if call_back is not None: call_back('Analyzing files...') formant_function = generate_formants_point_function( corpus_context) # Make formant function output = analyze_segments( segment_mapping, formant_function, stop_check=stop_check, multiprocessing=multiprocessing) # Analyze the phone return output
def analyze_formant_tracks(corpus_context, source='praat', call_back=None, stop_check=None, multiprocessing=True): """ Analyze formants of an entire utterance, and save the resulting formant tracks into the database. Parameters ---------- corpus_context : CorpusContext corpus context to use call_back : callable call back function, optional stop_check : callable stop check function, optional """ segment_mapping = generate_utterance_segments(corpus_context, padding=PADDING) property_key = 'speaker' data = {x: [] for x in segment_mapping.levels(property_key)} for s in segment_mapping.segments: data[s[property_key]].append(s) segment_mapping = segment_mapping.grouped_mapping('speaker') if call_back is not None: call_back('Analyzing files...') for i, ((speaker, ), v) in enumerate(segment_mapping.items()): gender = None try: q = corpus_context.query_speakers().filter( corpus_context.speaker.name == speaker) q = q.columns(corpus_context.speaker.gender.column_name('Gender')) gender = q.all()[0]['Gender'] except SpeakerAttributeError: pass if gender is not None: formant_function = generate_base_formants_function(corpus_context, gender=gender, source=source) else: formant_function = generate_base_formants_function(corpus_context, source=source) output = analyze_segments(v, formant_function, stop_check=stop_check, multiprocessing=multiprocessing) corpus_context.save_formant_tracks(output, speaker) if 'formants' not in corpus_context.hierarchy.acoustics: corpus_context.hierarchy.acoustics.add('formants') corpus_context.encode_hierarchy()
def analyze_vowel_formant_tracks(corpus_context, source='praat', call_back=None, stop_check=None, vowel_inventory=None): """ Analyze formants of individual vowels, and save the resulting formant tracks into the database for each phone. Parameters ---------- corpus_context : CorpusContext corpus context to use call_back : callable call back function, optional stop_check : callable stop check function, optional vowel_inventory : list of strings list of vowels used to encode a class 'vowel', optional. if not used, it's assumed that 'vowel' is already a phone class """ # encodes vowel inventory into a phone class if it's specified if vowel_inventory is not None: corpus_context.encode_class(vowel_inventory, 'vowel') # gets segment mapping of phones that are vowels segment_mapping = generate_vowel_segments( corpus_context, padding=0).grouped_mapping('speaker') if call_back is not None: call_back('Analyzing files...') # goes through each phone and: makes a formant function, analyzes the phone, and saves the tracks for i, (speaker, v) in enumerate(segment_mapping.items()): gender = None try: q = corpus_context.query_speakers().filter( corpus_context.speaker.name == speaker) q = q.columns(corpus_context.speaker.gender.column_name('Gender')) gender = q.all()[0]['Gender'] except SpeakerAttributeError: pass if gender is not None: formant_function = generate_base_formants_function(corpus_context, gender=gender, source=source) else: formant_function = generate_base_formants_function(corpus_context, source=source) output = analyze_segments(v, formant_function, stop_check=stop_check) corpus_context.save_formant_tracks(output, speaker)
def analyze_formant_points(corpus_context, call_back=None, stop_check=None, vowel_inventory=None, duration_threshold=None): """First pass of the algorithm; generates prototypes. Parameters ---------- corpus_context : :class:`polyglot.corpus.context.CorpusContext` The CorpusContext object of the corpus. call_back : callable Information about callback. stop_check : string Information about stop check. vowel_inventory : list A list of all the vowels (in strings) used in the corpus. duration_threshold : float, optional Segments with length shorter than this value (in milliseconds) will not be analyzed. Returns ------- dict Track data """ # ------------- Step 1: Prototypes ------------- # Encodes vowel inventory into a phone class if it's specified if vowel_inventory is not None: corpus_context.encode_class(vowel_inventory, 'vowel') # Gets segment mapping of phones that are vowels segment_mapping = generate_vowel_segments( corpus_context, duration_threshold=duration_threshold, padding=.25) if call_back is not None: call_back('Analyzing files...') formant_function = generate_formants_point_function( corpus_context) # Make formant function output = analyze_segments(segment_mapping, formant_function, stop_check=stop_check) # Analyze the phone return output
def analyze_pitch(corpus_context, source='praat', call_back=None, stop_check=None, multiprocessing=True): """ Parameters ---------- corpus_context : :class:`~polyglotdb.CorpusContext` source call_back stop_check Returns ------- """ absolute_min_pitch = 50 absolute_max_pitch = 500 if not 'utterance' in corpus_context.hierarchy: raise ( Exception('Must encode utterances before pitch can be analyzed')) segment_mapping = generate_utterance_segments( corpus_context, padding=PADDING).grouped_mapping('speaker') num_speakers = len(segment_mapping) algorithm = corpus_context.config.pitch_algorithm path = None if source == 'praat': path = corpus_context.config.praat_path # kwargs = {'silence_threshold': 0.03, # 'voicing_threshold': 0.45, 'octave_cost': 0.01, 'octave_jump_cost': 0.35, # 'voiced_unvoiced_cost': 0.14} elif source == 'reaper': path = corpus_context.config.reaper_path # kwargs = None pitch_function = generate_pitch_function(source, absolute_min_pitch, absolute_max_pitch, path=path) if algorithm == 'speaker_adjusted': speaker_data = {} if call_back is not None: call_back('Getting original speaker means and SDs...') for i, ((k, ), v) in enumerate(segment_mapping.items()): if call_back is not None: call_back('Analyzing speaker {} ({} of {})'.format( k, i, num_speakers)) output = analyze_segments(v, pitch_function, stop_check=stop_check, multiprocessing=multiprocessing) sum_pitch = 0 sum_square_pitch = 0 n = 0 for seg, track in output.items(): for t, v in track.items(): v = v['F0'] if v is not None and v > 0: # only voiced frames n += 1 sum_pitch += v sum_square_pitch += v * v speaker_data[k] = [ sum_pitch / n, math.sqrt((n * sum_square_pitch - sum_pitch * sum_pitch) / (n * (n - 1))) ] for i, ((speaker, ), v) in enumerate(segment_mapping.items()): if call_back is not None: call_back('Analyzing speaker {} ({} of {})'.format( speaker, i, num_speakers)) if algorithm == 'gendered': min_pitch = absolute_min_pitch max_pitch = absolute_max_pitch try: q = corpus_context.query_speakers().filter( corpus_context.speaker.name == speaker) q = q.columns( corpus_context.speaker.gender.column_name('Gender')) gender = q.all()[0]['Gender'] if gender is not None: if gender.lower()[0] == 'f': min_pitch = 100 else: max_pitch = 400 except SpeakerAttributeError: pass pitch_function = generate_pitch_function(source, min_pitch, max_pitch, path=path) elif algorithm == 'speaker_adjusted': mean_pitch, sd_pitch = speaker_data[speaker] min_pitch = int(mean_pitch - 3 * sd_pitch) max_pitch = int(mean_pitch + 3 * sd_pitch) if min_pitch < absolute_min_pitch: min_pitch = absolute_min_pitch if max_pitch > absolute_max_pitch: max_pitch = absolute_max_pitch pitch_function = generate_pitch_function(source, min_pitch, max_pitch, path=path) output = analyze_segments(v, pitch_function, stop_check=stop_check, multiprocessing=multiprocessing) corpus_context.save_pitch_tracks(output, speaker) corpus_context.hierarchy.add_token_properties( corpus_context, 'utterance', [('pitch_last_edited', int)]) corpus_context.encode_hierarchy() today = datetime.utcnow() corpus_context.query_graph(corpus_context.utterance).set_properties( pitch_last_edited=today.timestamp()) corpus_context.hierarchy.acoustics.add('pitch') corpus_context.encode_hierarchy()
def analyze_pitch(corpus_context, call_back=None, stop_check=None): absolute_min_pitch = 55 absolute_max_pitch = 480 if not 'utterance' in corpus_context.hierarchy: raise ( Exception('Must encode utterances before pitch can be analyzed')) segment_mapping = generate_utterance_segments( corpus_context, padding=PADDING).grouped_mapping('speaker') num_speakers = len(segment_mapping) algorithm = corpus_context.config.pitch_algorithm path = None if corpus_context.config.pitch_source == 'praat': path = corpus_context.config.praat_path # kwargs = {'silence_threshold': 0.03, # 'voicing_threshold': 0.45, 'octave_cost': 0.01, 'octave_jump_cost': 0.35, # 'voiced_unvoiced_cost': 0.14} elif corpus_context.config.pitch_source == 'reaper': path = corpus_context.config.reaper_path # kwargs = None pitch_function = generate_pitch_function( corpus_context.config.pitch_source, absolute_min_pitch, absolute_max_pitch, path=path) if algorithm == 'speaker_adjusted': speaker_data = {} if call_back is not None: call_back('Getting original speaker means and SDs...') for i, (k, v) in enumerate(segment_mapping.items()): if call_back is not None: call_back('Analyzing speaker {} ({} of {})'.format( k, i, num_speakers)) output = analyze_segments(v, pitch_function, stop_check=stop_check) sum_pitch = 0 sum_square_pitch = 0 n = 0 for seg, track in output.items(): for t, v in track.items(): v = v['F0'] if v is not None and v > 0: # only voiced frames n += 1 sum_pitch += v sum_square_pitch += v * v speaker_data[k] = [ sum_pitch / n, math.sqrt((n * sum_square_pitch - sum_pitch * sum_pitch) / (n * (n - 1))) ] for i, (speaker, v) in enumerate(segment_mapping.items()): if call_back is not None: call_back('Analyzing speaker {} ({} of {})'.format( speaker, i, num_speakers)) if algorithm == 'gendered': min_pitch = absolute_min_pitch max_pitch = absolute_max_pitch try: q = corpus_context.query_speakers().filter( corpus_context.speaker.name == speaker) q = q.columns( corpus_context.speaker.gender.column_name('Gender')) gender = q.all()[0]['Gender'] if gender is not None: if gender.lower()[0] == 'f': min_pitch = 100 else: max_pitch = 400 except SpeakerAttributeError: pass pitch_function = generate_pitch_function( corpus_context.config.pitch_source, min_pitch, max_pitch, path=path) elif algorithm == 'speaker_adjusted': mean_pitch, sd_pitch = speaker_data[speaker] min_pitch = int(mean_pitch - 3 * sd_pitch) max_pitch = int(mean_pitch + 3 * sd_pitch) if min_pitch < absolute_min_pitch: min_pitch = absolute_min_pitch if max_pitch > absolute_max_pitch: max_pitch = absolute_max_pitch pitch_function = generate_pitch_function( corpus_context.config.pitch_source, min_pitch, max_pitch, path=path) output = analyze_segments(v, pitch_function, stop_check=stop_check) corpus_context.save_pitch_tracks(output, speaker)
def analyze_formant_points_refinement(corpus_context, vowel_label='vowel', duration_threshold=0, num_iterations=1, call_back=None, stop_check=None, vowel_prototypes_path='', drop_formant=False, multiprocessing=True): """Extracts F1, F2, F3 and B1, B2, B3. Parameters ---------- corpus_context : :class:`~polyglot.corpus.context.CorpusContext` The CorpusContext object of the corpus. vowel_label : str The subset of phones to analyze. duration_threshold : float, optional Segments with length shorter than this value (in milliseconds) will not be analyzed. num_iterations : int, optional How many times the algorithm should iterate before returning values. Returns ------- prototype_metadata : dict Means of F1, F2, F3, B1, B2, B3 and covariance matrices per vowel class. """ if not corpus_context.hierarchy.has_type_subset('phone', vowel_label): raise Exception( 'Phones do not have a "{}" subset.'.format(vowel_label)) # ------------- Step 2: Varying formants ------------- # Encodes vowel inventory into a phone class if it's specified use_vowel_prototypes = vowel_prototypes_path and os.path.exists( vowel_prototypes_path) base_formant_columns = ['F1', 'F2', 'F3', 'B1', 'B2', 'B3'] if use_vowel_prototypes: vowel_prototype_metadata, prototype_parameters = read_prototypes( vowel_prototypes_path) else: prototype_parameters = base_formant_columns # Gets segment mapping of phones that are vowels segment_mapping = generate_vowel_segments( corpus_context, duration_threshold=duration_threshold, padding=0.1, vowel_label=vowel_label) best_data = {} # we used to have just columns, a list of output columns and prototype columns. Now these are not the same thing # so we have extra_columns (a list of columns in the output but not the prototypes) and prototype_parameters (a list of columns in the prototypes) # columns = ['F1', 'F2', 'F3', 'B1', 'B2', 'B3'] # extra_columns = ['A1', 'A2', 'A3', 'Ax'] output_columns = [ 'F1', 'F2', 'F3', 'B1', 'B2', 'B3', 'A1', 'A2', 'A3', 'Ax', 'A1A2diff', 'A2A3diff' ] # print ('columns:', columns) # print ('extra_columns:', extra_columns) print('output_columns:', output_columns) log_output = [] log_output.append(','.join(['speaker', 'vowel', 'n', 'iterations'])) # Measure with varying levels of formants min_formants = 4 # Off by one error, due to how Praat measures it from F0 # This really measures with 3 formants: F1, F2, F3. And so on. if drop_formant: max_formants = 8 else: max_formants = 7 default_formant = 5 formant_function = generate_variable_formants_point_function( corpus_context, min_formants, max_formants) best_prototype_metadata = {} # For each vowel token, collect the formant measurements # Pick the best track that is closest to the averages gotten from prototypes total_speaker_vowel_pairs = len( segment_mapping.grouped_mapping('speaker', 'label').items()) for i, ((speaker, vowel), seg) in enumerate( segment_mapping.grouped_mapping('speaker', 'label').items()): if len(seg) == 0: continue print(speaker + ' ' + vowel + ': ' + str(i + 1) + ' of ' + str(total_speaker_vowel_pairs) + ': ' + str(len(seg)) + ' tokens') output = analyze_segments( seg, formant_function, stop_check=stop_check, multiprocessing=multiprocessing) # Analyze the phone if len(seg) < 6: print( "Not enough observations of vowel {}, at least 6 are needed, only found {}." .format(vowel, len(seg))) for s, data in output.items(): best_track = data[default_formant] best_data[s] = { k: best_track[k] for j, k in enumerate(base_formant_columns) } continue if drop_formant: # ADD ALL THE LEAVE-ONE-OUT CANDIDATES for s, data in output.items(): new_data = {} ignored_candidates = [] for candidate, measurements in data.items(): try: As = [ measurements['A1'], measurements['A2'], measurements['A3'], measurements['A4'] ] Fs = [ math.log2(measurements['F1']), math.log2(measurements['F2']), math.log2(measurements['F3']), math.log2(measurements['F4']) ] Farray = np.array([Fs, np.ones(len(Fs))]) [slope, intercept] = np.linalg.lstsq(Farray.T, As)[0] except: try: As = [ measurements['A1'], measurements['A2'], measurements['A3'] ] Fs = [ math.log2(measurements['F1']), math.log2(measurements['F2']), math.log2(measurements['F3']) ] Farray = np.array([Fs, np.ones(len(Fs))]) [slope, intercept] = np.linalg.lstsq(Farray.T, As)[0] except: try: As = [measurements['A1'], measurements['A2']] Fs = [ math.log2(measurements['F1']), math.log2(measurements['F2']) ] [slope, intercept] = [0, 0] except: # Lack of formants for these settings ignored_candidates.append(candidate) continue for leave_out in range(1, 1 + min(3, candidate)): new_measurements = {} new_measurements['Ax'] = measurements['A' + str(leave_out)] candidate_name = str(candidate) + 'x' + str(leave_out) if leave_out < len(As) and As[ leave_out - 1] < intercept + slope * Fs[leave_out - 1]: this_is_droppable = True else: this_is_droppable = False if this_is_droppable: for parameter in measurements.keys(): if int(parameter[-1]) < leave_out: new_measurements[parameter] = measurements[ parameter] elif int(parameter[-1]) > leave_out: new_measurements[ parameter[0] + str(int(parameter[-1]) - 1)] = measurements[parameter] new_data[candidate_name] = new_measurements data[candidate]['Ax'] = data[candidate]['A4'] data = { k: v for k, v in data.items() if k not in ignored_candidates } output[s] = {**data, **new_data} else: for s, data in output.items(): for candidate, measurements in data.items(): output[s][candidate]['Ax'] = output[s][candidate]['A4'] output = {k: v for k, v in output.items() if v} for s, data in output.items(): for candidate, measurements in data.items(): try: output[s][candidate]['A1A2diff'] = data[candidate][ 'A1'] - data[candidate]['A2'] try: output[s][candidate]['A2A3diff'] = data[candidate][ 'A2'] - data[candidate]['A3'] except: try: output[s][candidate]['A2A3diff'] = data[candidate][ 'A2'] except: output[s][candidate]['A2A3diff'] = 0 except: try: output[s][candidate]['A1A2diff'] = data[candidate][ 'A1'] except: output[s][candidate]['A1A2diff'] = 0 output[s][candidate]['A2A3diff'] = 0 selected_tracks = {} for s, data in output.items(): try: selected_tracks[s] = data[default_formant] except: print(s) print(data) raise if not use_vowel_prototypes: print('no prototypes, using get_mean_SD()') prev_prototype_metadata = get_mean_SD(selected_tracks, prototype_parameters) elif not vowel in vowel_prototype_metadata: print('no prototype for', vowel, 'so using get_mean_SD()') prev_prototype_metadata = get_mean_SD(selected_tracks, prototype_parameters) else: prev_prototype_metadata = vowel_prototype_metadata if num_iterations > 1 and len(seg) < 6: print( "Skipping iterations for vowel {}, at least 6 tokens are needed, only found {}." .format(vowel, len(seg))) my_iterations = [0] else: my_iterations = range(num_iterations) for _ in my_iterations: best_numbers = [] selected_tracks = {} prototype_means = prev_prototype_metadata[vowel][0] # Get Mahalanobis distance between every new observation and the sample/means covariance = np.array(prev_prototype_metadata[vowel][1]) inverse_covariance = np.linalg.pinv(covariance) best_number = 5 for s, data in output.items(): best_distance = math.inf best_track = 0 for number, point in data.items(): point = [ point[x] if point[x] else 0 for x in prototype_parameters ] distance = get_mahalanobis(prototype_means, point, inverse_covariance) if distance < best_distance: # Update "best" measures when new best distance is found best_distance = distance best_track = point best_number = number # selected_tracks[s] = {k: best_track[i] for i, k in enumerate(columns)} selected_tracks[s] = { k: best_track[i] for i, k in enumerate(prototype_parameters) } # best_data[s] = {k: best_track[i] for i, k in enumerate(output_columns)} # best_data[s] = {k: best_track[i] for i, k in enumerate(columns)} best_data[s] = {} for output_column in output_columns: best_data[s][output_column] = output[s][best_number][ output_column] best_data[s]['num_formants'] = float( str(best_number).split('x')[0]) best_data[s]['Fx'] = int(str(best_number)[0]) if 'x' in str(best_number): best_data[s]['drop_formant'] = int( str(best_number).split('x')[-1]) else: best_data[s]['drop_formant'] = 0 best_numbers.append(best_number) if len(seg) >= 6: prototype_metadata = get_mean_SD(selected_tracks, prototype_parameters) prev_prototype_metadata = prototype_metadata best_prototype_metadata.update(prototype_metadata) if _ > 0: changed_numbers = 0 for i, bn in enumerate(best_numbers): if bn != last_iteration_best_numbers[i]: changed_numbers += 1 if changed_numbers == 0: break last_iteration_best_numbers = best_numbers log_output.append(','.join( [speaker, vowel, str(len(output)), str(_ + 1)])) with open('iterations_log.csv', 'a') as f: for i in log_output: f.write(i + '\n') save_formant_point_data(corpus_context, best_data, num_formants=True) corpus_context.cache_hierarchy() return best_prototype_metadata
def analyze_vot(corpus_context, classifier, stop_label='stops', vot_min=5, vot_max=100, window_min=-30, window_max=30, overwrite_edited=False, call_back=None, stop_check=None, multiprocessing=False): """ Analyze VOT for stops using a pretrained AutoVOT classifier. Parameters ---------- corpus_context : :class:`~polyglotdb.corpus.AudioContext` classifier : str Path to an AutoVOT classifier model stop_label : str Label of subset to analyze vot_min : int Minimum VOT in ms vot_max : int Maximum VOT in ms window_min : int Window minimum in ms window_max : int Window maximum in Ms overwrite_edited: Whether to updated VOTs which have the property, edited set to True call_back : callable call back function, optional stop_check : callable stop check function, optional multiprocessing : bool Flag to use multiprocessing, otherwise will use threading """ if not corpus_context.hierarchy.has_token_subset('phone', stop_label) and not corpus_context.hierarchy.has_type_subset('phone', stop_label): raise Exception('Phones do not have a "{}" subset.'.format(stop_label)) already_encoded_vots = corpus_context.hierarchy.has_subannotation_type("vot") stop_mapping = generate_segments(corpus_context, annotation_type='phone', subset=stop_label, padding=PADDING, file_type="consonant", fetch_subannotations=True).grouped_mapping('discourse') segment_mapping = SegmentMapping() vot_func = AutoVOTAnalysisFunction(classifier_to_use=classifier, min_vot_length=vot_min, max_vot_length=vot_max, window_min=window_min, window_max=window_max ) for discourse in corpus_context.discourses: if (discourse,) in stop_mapping: sf = corpus_context.discourse_sound_file(discourse) speaker_mapped_stops = {} for x in stop_mapping[(discourse,)]: if already_encoded_vots: if "vot" in x["subannotations"]: vot = x["subannotations"]["vot"] else: vot = None if vot is not None: #Skip "edited" vots unless we're given the go-ahead to overwrite them if not overwrite_edited and hasattr(vot, "edited") and vot.edited: continue stop_info = (x["begin"], x["end"], x["id"], x["subannotations"]["vot"].id) else: stop_info = (x["begin"], x["end"], x["id"], "new_vot") else: stop_info = (x["begin"], x["end"], x["id"]) if x["speaker"] in speaker_mapped_stops: speaker_mapped_stops[x["speaker"]].append(stop_info) else: speaker_mapped_stops[x["speaker"]] = [stop_info] for speaker in speaker_mapped_stops: segment_mapping.add_file_segment(sf["consonant_file_path"], \ 0, sf["duration"], sf["channel"],\ name="{}-{}".format(speaker, discourse), vot_marks=speaker_mapped_stops[speaker]) output = analyze_segments(segment_mapping.segments, vot_func, stop_check=stop_check, multiprocessing=multiprocessing) if already_encoded_vots: new_data = [] updated_data = [] custom_props = [(prop, get_default_for_type(val)) for prop, val in corpus_context.hierarchy.subannotation_properties["vot"] \ if prop not in ["begin", "id", "end", "confidence"]] all_props = [x[0] for x in custom_props]+["id", "begin", "end", "confidence"] for discourse, discourse_output in output.items(): for (begin, end, confidence, stop_id, vot_id) in discourse_output: if vot_id == "new_vot": props = {"id":str(uuid1()), "begin":begin, "end":begin+end, "annotated_id":stop_id, "confidence":confidence} for prop, val in custom_props: props[prop] = val new_data.append(props) else: props = {"id":vot_id, "props":{"begin":begin, "end":begin+end, "confidence":confidence}} for prop, val in custom_props: props["props"][prop] = val updated_data.append(props) if updated_data: statement = """ UNWIND {{data}} as d MERGE (n:vot:{corpus_name} {{id: d.id}}) SET n += d.props """.format(corpus_name=corpus_context.cypher_safe_name) corpus_context.execute_cypher(statement, data=updated_data) if new_data: default_node = ", ".join(["{}: d.{}".format(p, p) for p in all_props]) statement = """ UNWIND {{data}} as d MATCH (annotated:phone:{corpus_name} {{id: d.annotated_id}}) CREATE (annotated) <-[:annotates]-(annotation:vot:{corpus_name} {{{default_node}}}) """.format(corpus_name=corpus_context.cypher_safe_name, default_node=default_node) corpus_context.execute_cypher(statement, data=new_data) else: list_of_stops = [] property_types = [("begin", float), ("end", float), ("confidence", float)] for discourse, discourse_output in output.items(): for (begin, end, confidence, stop_id) in discourse_output: list_of_stops.append({"begin":begin, "end":begin+end, "id":uuid1(), "confidence":confidence, "annotated_id":stop_id}) corpus_context.import_subannotations(list_of_stops, property_types, "vot", "phone")
def analyze_vot(corpus_context, stop_label='stops', classifier="/autovot/experiments/models/bb_jasa.classifier", vot_min=5, vot_max=100, window_min=-30, window_max=30, call_back=None, stop_check=None, multiprocessing=False): """ Parameters ---------- corpus_context : :class:`~polyglotdb.CorpusContext` source call_back stop_check Returns ------- """ if not corpus_context.hierarchy.has_token_subset( 'phone', stop_label) and not corpus_context.hierarchy.has_type_subset( 'phone', stop_label): raise Exception('Phones do not have a "{}" subset.'.format(stop_label)) stop_mapping = generate_segments( corpus_context, annotation_type='phone', subset=stop_label, padding=PADDING, file_type="consonant").grouped_mapping('discourse') segment_mapping = SegmentMapping() vot_func = AutoVOTAnalysisFunction(classifier_to_use=classifier, min_vot_length=vot_min, max_vot_length=vot_max, window_min=window_min, window_max=window_max) for discourse in corpus_context.discourses: if (discourse, ) in stop_mapping: sf = corpus_context.discourse_sound_file(discourse) speaker_mapped_stops = {} discourse_speakers = set() for x in stop_mapping[(discourse, )]: if x["speaker"] in speaker_mapped_stops: speaker_mapped_stops[x["speaker"]].append( (x["begin"], x["end"], x["id"])) else: speaker_mapped_stops[x["speaker"]] = [(x["begin"], x["end"], x["id"])] discourse_speakers.add(x["speaker"]) for speaker in discourse_speakers: segment_mapping.add_file_segment(sf["consonant_file_path"], \ sf["speech_begin"], sf["speech_end"], sf["channel"],\ name="{}-{}".format(speaker, discourse), vot_marks=speaker_mapped_stops[speaker]) output = analyze_segments(segment_mapping.segments, vot_func, stop_check=stop_check, multiprocessing=multiprocessing) list_of_stops = [] property_types = [("begin", float), ("end", float), ("confidence", float)] for discourse, discourse_output in output.items(): for (begin, end, confidence, stop_id) in discourse_output: list_of_stops.append({ "begin": begin, "end": begin + end, "id": uuid1(), "confidence": confidence, "annotated_id": stop_id }) corpus_context.import_subannotations(list_of_stops, property_types, "vot", "phone")
def analyze_formant_points_refinement(corpus_context, vowel_inventory, duration_threshold=0, num_iterations=1, call_back=None, stop_check=None): """Extracts F1, F2, F3 and B1, B2, B3. Parameters ---------- corpus_context : :class:`~polyglot.corpus.context.CorpusContext` The CorpusContext object of the corpus. vowel_inventory : list A list of vowels contained in the corpus. duration_threshold : float, optional Segments with length shorter than this value (in milliseconds) will not be analyzed. num_iterations : int, optional How many times the algorithm should iterate before returning values. Returns ------- prototype_metadata : dict Means of F1, F2, F3, B1, B2, B3 and covariance matrices per vowel class. """ if vowel_inventory is not None: corpus_context.encode_class(vowel_inventory, 'vowel') # ------------- Step 2: Varying formants ------------- # Encodes vowel inventory into a phone class if it's specified # Gets segment mapping of phones that are vowels segment_mapping = generate_vowel_segments( corpus_context, duration_threshold=duration_threshold, padding=0.1) best_data = {} columns = ['F1', 'F2', 'F3', 'B1', 'B2', 'B3'] # Measure with varying levels of formants min_formants = 4 # Off by one error, due to how Praat measures it from F0 # This really measures with 3 formants: F1, F2, F3. And so on. max_formants = 7 default_formant = 5 formant_function = generate_variable_formants_point_function( corpus_context, min_formants, max_formants) best_prototype_metadata = {} # For each vowel token, collect the formant measurements # Pick the best track that is closest to the averages gotten from prototypes for i, (vowel, seg) in enumerate( segment_mapping.grouped_mapping('label').items()): output = analyze_segments(seg, formant_function, stop_check=stop_check) # Analyze the phone if len(seg) < 6: print( "Not enough observations of vowel {}, at least 6 are needed, only found {}." .format(vowel, len(seg))) for s, data in output.items(): best_track = data[default_formant] best_data[s] = { k: best_track[k] for j, k in enumerate(columns) } continue selected_tracks = {} for s, data in output.items(): selected_tracks[s] = data[default_formant] prev_prototype_metadata = get_mean_SD(selected_tracks) for _ in range(num_iterations): selected_tracks = {} prototype_means = prev_prototype_metadata[vowel][0] # Get Mahalanobis distance between every new observation and the sample/means covariance = np.array(prev_prototype_metadata[vowel][1]) inverse_covariance = np.linalg.pinv(covariance) best_number = 5 for s, data in output.items(): best_distance = math.inf best_track = 0 for number, point in data.items(): point = [point[x] if point[x] else 0 for x in columns] distance = get_mahalanobis(prototype_means, point, inverse_covariance) if distance < best_distance: # Update "best" measures when new best distance is found best_distance = distance best_track = point best_number = number selected_tracks[s] = { k: best_track[i] for i, k in enumerate(columns) } best_data[s] = { k: best_track[i] for i, k in enumerate(columns) } best_data[s]['num_formants'] = best_number prototype_metadata = get_mean_SD(selected_tracks) prev_prototype_metadata = prototype_metadata best_prototype_metadata.update(prototype_metadata) save_formant_point_data(corpus_context, best_data, num_formants=True) corpus_context.cache_hierarchy() return best_prototype_metadata
def analyze_script(corpus_context, phone_class=None, subset=None, annotation_type=None, script_path=None, duration_threshold=0.01, arguments=None, call_back=None, file_type='consonant', stop_check=None, multiprocessing=True): """ Perform acoustic analysis of phones using an input praat script. Saves the measurement results from the praat script into the database under the same names as the Praat output columns Praat script requirements: - the only input is the full path to the sound file containing (only) the phone - the script prints the output to the Praat Info window in two rows (i.e. two lines). - the first row is a space-separated list of measurement names: these are the names that will be saved into the database - the second row is a space-separated list of the value for each measurement Parameters ---------- corpus_context : :class:`~polyglot.corpus.context.CorpusContext` corpus context to use phone_class : str DEPRECATED, the name of an already encoded subset of phones on which the analysis will be run subset : str, optional the name of an already encoded subset of an annotation type, on which the analysis will be run annotation_type : str the type of annotation that the analysis will go over script_path : str full path to the praat script duration_threshold : float Minimum duration of segments to be analyzed file_type : str File type to use for the script (consonant = 16kHz sample rate, vowel = 11kHz, low_freq = 1200 Hz) arguments : list a list containing any arguments to the praat script (currently not working) call_back : callable call back function, optional stop_check : callable stop check function, optional multiprocessing : bool Flag to use multiprocessing, otherwise will use threading """ if file_type not in ['consonant', 'vowel', 'low_freq']: raise ValueError( 'File type must be one of: consonant, vowel, or low_freq') if phone_class is not None: raise DeprecationWarning( "The phone_class parameter has now been deprecated, please use annotation_type='phone' and subset='{}'" .format(phone_class)) annotation_type = corpus_context.phone_name subset = phone_class if call_back is not None: call_back('Analyzing {}...'.format(annotation_type)) time_section = time.time() segment_mapping = generate_segments(corpus_context, annotation_type, subset, file_type=file_type, padding=0, duration_threshold=duration_threshold) if call_back is not None: call_back("generate segments took: " + str(time.time() - time_section)) praat_path = corpus_context.config.praat_path script_function = generate_praat_script_function(praat_path, script_path, arguments=arguments) time_section = time.time() output = analyze_segments(segment_mapping.segments, script_function, stop_check=stop_check, multiprocessing=multiprocessing) if call_back is not None: call_back("time analyzing segments: " + str(time.time() - time_section)) header = sorted(list(output.values())[0].keys()) header_info = {h: float for h in header} point_measures_to_csv(corpus_context, output, header) point_measures_from_csv(corpus_context, header_info, annotation_type=annotation_type) return [x for x in header if x != 'id']