def analyze_utterance_pitch(corpus_context, utterance, source='praat', min_pitch=50, max_pitch=500, **kwargs): if isinstance(utterance, str): utterance_id = utterance else: utterance_id = utterance.id padding = kwargs.pop('padding', None) if padding is None: padding = PADDING utt_type = corpus_context.hierarchy.highest statement = '''MATCH (s:Speaker:{corpus_name})-[r:speaks_in]->(d:Discourse:{corpus_name}), (u:{utt_type}:{corpus_name})-[:spoken_by]->(s), (u)-[:spoken_in]->(d) WHERE u.id = {{utterance_id}} RETURN u, d, r'''.format( corpus_name=corpus_context.cypher_safe_name, utt_type=utt_type) results = corpus_context.execute_cypher(statement, utterance_id=utterance_id) segment_mapping = SegmentMapping() for r in results: channel = r['r']['channel'] file_path = r['d']['vowel_file_path'] u = r['u'] segment_mapping.add_file_segment(file_path, u['begin'], u['end'], channel, padding=padding) path = None if source == 'praat': path = corpus_context.config.praat_path elif source == 'reaper': path = corpus_context.config.reaper_path pitch_function = generate_pitch_function(source, min_pitch, max_pitch, path=path) track = Track() for seg in segment_mapping: output = pitch_function(seg) for k, v in output.items(): if v['F0'] is None or v['F0'] <= 0: continue p = TimePoint(k) p.add_value('F0', v['F0']) track.add(p) if 'pitch' not in corpus_context.hierarchy.acoustics: corpus_context.hierarchy.add_acoustic_properties( corpus_context, 'pitch', [('F0', float)]) corpus_context.encode_hierarchy() return track
def analyze_discourse_pitch(corpus_context, discourse, pitch_source='praat', min_pitch=50, max_pitch=500, **kwargs): print(kwargs) segments = [] statement = '''MATCH (s:Speaker:{corpus_name})-[r:speaks_in]->(d:Discourse:{corpus_name}) WHERE d.name = {{discourse_name}} RETURN d, s, r'''.format( corpus_name=corpus_context.cypher_safe_name) results = corpus_context.execute_cypher(statement, discourse_name=discourse) segment_mapping = SegmentMapping() for r in results: channel = r['r']['channel'] speaker = r['s']['name'] discourse = r['d']['name'] file_path = r['d']['vowel_file_path'] atype = corpus_context.hierarchy.highest prob_utt = getattr(corpus_context, atype) q = corpus_context.query_graph(prob_utt) q = q.filter(prob_utt.discourse.name == discourse) q = q.filter(prob_utt.speaker.name == speaker) utterances = q.all() for u in utterances: segment_mapping.add_file_segment(file_path, u.begin, u.end, channel, padding=PADDING) path = None if pitch_source == 'praat': path = corpus_context.config.praat_path # kwargs = {'silence_threshold': 0.03, # 'voicing_threshold': 0.45, 'octave_cost': 0.01, 'octave_jump_cost': 0.35, # 'voiced_unvoiced_cost': 0.14} elif pitch_source == 'reaper': path = corpus_context.config.reaper_path pitch_function = generate_pitch_function(pitch_source, min_pitch, max_pitch, path=path, pulses=True) track = {} pulses = set() output = analyze_segments(segments, pitch_function) print(output) for v in output.values(): track.update(v[0]) pulses.update(v[1]) return track, sorted(pulses)
def test_analyze_file_segments_reaper(acoustic_corpus_path, reaper_func): mapping = SegmentMapping() seg = (acoustic_corpus_path, 1, 2, 0) mapping.add_file_segment(*seg) output = analyze_segments(mapping, reaper_func, multiprocessing=False) for k in output.keys(): print(sorted(output[k].keys())) assert (all(x >= 1 for x in output[k].keys())) assert (all(x <= 2 for x in output[k].keys())) mapping[0].properties['padding'] = 0.5 output = analyze_segments(mapping, reaper_func, multiprocessing=False) for k in output.keys(): print(sorted(output[k].keys())) assert (all(x >= 1 for x in output[k].keys())) assert (all(x <= 2 for x in output[k].keys()))
def test_analyze_file_segments_reaper(acoustic_corpus_path, reaper_func): mapping = SegmentMapping() seg = (acoustic_corpus_path, 1, 2, 0) mapping.add_file_segment(*seg) output = analyze_segments(mapping, reaper_func) for k in output.keys(): print(sorted(output[k].keys())) assert (all(x >= 1 for x in output[k].keys())) assert (all(x <= 2 for x in output[k].keys())) mapping[0].properties['padding'] = 0.5 output = analyze_segments(mapping, reaper_func) for k in output.keys(): print(sorted(output[k].keys())) assert (all(x >= 1 for x in output[k].keys())) assert (all(x <= 2 for x in output[k].keys()))
def extract_and_save_formant_tracks(corpus_context, data, num_formants=False, stop_check=None, multiprocessing=True): '''This function takes a dictionary with the best parameters for each vowels, then recalculates the formants as tracks rather than as points''' #Dictionary of segment mapping objects where each n_formants has its own segment mapping object segment_mappings = {} save_padding = 0.02 for k, v in data.items(): k.begin -= save_padding k.end += save_padding if "num_formants" in v: n_formants = v["num_formants"] else: #There was not enough samples, so we use the default n n_formants = 5 if not n_formants in segment_mappings: segment_mappings[n_formants] = SegmentMapping() segment_mappings[n_formants].segments.append(k) outputs = {} for n_formants in segment_mappings: func = PraatSegmentFormantTrackFunction( praat_path=corpus_context.config.praat_path, max_frequency=5500, num_formants=n_formants, window_length=0.025, time_step=0.01) output = analyze_segments( segment_mappings[n_formants], func, stop_check=stop_check, multiprocessing=multiprocessing) # Analyze the phone outputs.update(output) formant_tracks = ['F1', 'F2', 'F3', 'B1', 'B2', 'B3'] tracks = {} for k, v in outputs.items(): vowel_id = k.properties["id"] track = Track() for time, formants in v.items(): tp = TimePoint(time) for f in formant_tracks: tp.add_value(f, formants[f]) track.add(tp) if not k["speaker"] in tracks: tracks[k["speaker"]] = {} tracks[k["speaker"]][k] = track if 'formants' not in corpus_context.hierarchy.acoustics: corpus_context.hierarchy.add_acoustic_properties( corpus_context, 'formants', [(x, float) for x in formant_tracks]) for speaker, track_dict in tracks.items(): corpus_context.save_acoustic_tracks('formants', track_dict, speaker)
def test_grouping(): mapping = SegmentMapping() speakers = ['speaker1', 'speaker2'] vowels = ['aa', 'ae', 'iy'] for s in speakers: if s == 'speaker1': vs = ['aa', 'ae'] else: vs = ['aa', 'iy'] for v in vs: mapping.add_file_segment('path', 0, 1, 0, speaker=s, vowel=v) groups = mapping.grouped_mapping('speaker', 'vowel') assert len(groups) == 6 for g, lists in groups.items(): assert g[0] in speakers assert g[1] in vowels groups = mapping.grouped_mapping('vowel', 'speaker') assert len(groups) == 6 for g, lists in groups.items(): assert g[0] in vowels assert g[1] in speakers
def analyze_vot(corpus_context, classifier, stop_label='stops', vot_min=5, vot_max=100, window_min=-30, window_max=30, overwrite_edited=False, call_back=None, stop_check=None, multiprocessing=False): """ Analyze VOT for stops using a pretrained AutoVOT classifier. Parameters ---------- corpus_context : :class:`~polyglotdb.corpus.AudioContext` classifier : str Path to an AutoVOT classifier model stop_label : str Label of subset to analyze vot_min : int Minimum VOT in ms vot_max : int Maximum VOT in ms window_min : int Window minimum in ms window_max : int Window maximum in Ms overwrite_edited: Whether to updated VOTs which have the property, edited set to True call_back : callable call back function, optional stop_check : callable stop check function, optional multiprocessing : bool Flag to use multiprocessing, otherwise will use threading """ if not corpus_context.hierarchy.has_token_subset('phone', stop_label) and not corpus_context.hierarchy.has_type_subset('phone', stop_label): raise Exception('Phones do not have a "{}" subset.'.format(stop_label)) already_encoded_vots = corpus_context.hierarchy.has_subannotation_type("vot") stop_mapping = generate_segments(corpus_context, annotation_type='phone', subset=stop_label, padding=PADDING, file_type="consonant", fetch_subannotations=True).grouped_mapping('discourse') segment_mapping = SegmentMapping() vot_func = AutoVOTAnalysisFunction(classifier_to_use=classifier, min_vot_length=vot_min, max_vot_length=vot_max, window_min=window_min, window_max=window_max ) for discourse in corpus_context.discourses: if (discourse,) in stop_mapping: sf = corpus_context.discourse_sound_file(discourse) speaker_mapped_stops = {} for x in stop_mapping[(discourse,)]: if already_encoded_vots: if "vot" in x["subannotations"]: vot = x["subannotations"]["vot"] else: vot = None if vot is not None: #Skip "edited" vots unless we're given the go-ahead to overwrite them if not overwrite_edited and hasattr(vot, "edited") and vot.edited: continue stop_info = (x["begin"], x["end"], x["id"], x["subannotations"]["vot"].id) else: stop_info = (x["begin"], x["end"], x["id"], "new_vot") else: stop_info = (x["begin"], x["end"], x["id"]) if x["speaker"] in speaker_mapped_stops: speaker_mapped_stops[x["speaker"]].append(stop_info) else: speaker_mapped_stops[x["speaker"]] = [stop_info] for speaker in speaker_mapped_stops: segment_mapping.add_file_segment(sf["consonant_file_path"], \ 0, sf["duration"], sf["channel"],\ name="{}-{}".format(speaker, discourse), vot_marks=speaker_mapped_stops[speaker]) output = analyze_segments(segment_mapping.segments, vot_func, stop_check=stop_check, multiprocessing=multiprocessing) if already_encoded_vots: new_data = [] updated_data = [] custom_props = [(prop, get_default_for_type(val)) for prop, val in corpus_context.hierarchy.subannotation_properties["vot"] \ if prop not in ["begin", "id", "end", "confidence"]] all_props = [x[0] for x in custom_props]+["id", "begin", "end", "confidence"] for discourse, discourse_output in output.items(): for (begin, end, confidence, stop_id, vot_id) in discourse_output: if vot_id == "new_vot": props = {"id":str(uuid1()), "begin":begin, "end":begin+end, "annotated_id":stop_id, "confidence":confidence} for prop, val in custom_props: props[prop] = val new_data.append(props) else: props = {"id":vot_id, "props":{"begin":begin, "end":begin+end, "confidence":confidence}} for prop, val in custom_props: props["props"][prop] = val updated_data.append(props) if updated_data: statement = """ UNWIND {{data}} as d MERGE (n:vot:{corpus_name} {{id: d.id}}) SET n += d.props """.format(corpus_name=corpus_context.cypher_safe_name) corpus_context.execute_cypher(statement, data=updated_data) if new_data: default_node = ", ".join(["{}: d.{}".format(p, p) for p in all_props]) statement = """ UNWIND {{data}} as d MATCH (annotated:phone:{corpus_name} {{id: d.annotated_id}}) CREATE (annotated) <-[:annotates]-(annotation:vot:{corpus_name} {{{default_node}}}) """.format(corpus_name=corpus_context.cypher_safe_name, default_node=default_node) corpus_context.execute_cypher(statement, data=new_data) else: list_of_stops = [] property_types = [("begin", float), ("end", float), ("confidence", float)] for discourse, discourse_output in output.items(): for (begin, end, confidence, stop_id) in discourse_output: list_of_stops.append({"begin":begin, "end":begin+end, "id":uuid1(), "confidence":confidence, "annotated_id":stop_id}) corpus_context.import_subannotations(list_of_stops, property_types, "vot", "phone")
def generate_segments(corpus_context, annotation_type='utterance', subset=None, file_type='vowel', duration_threshold=0.001, padding=0, fetch_subannotations=False): """ Generate segment vectors for an annotation type, to be used as input to analyze_file_segments. Parameters ---------- corpus_context : :class:`~polyglot.corpus.context.CorpusContext` The CorpusContext object of the corpus annotation_type : str, optional The type of annotation to use in generating segments, defaults to utterance subset : str, optional Specify a subset to use for generating segments file_type : str, optional One of 'low_freq', 'vowel', or 'consonant', specifies the type of audio file to use duration_threshold: float, optional Segments with length shorter than this value (in seconds) will not be included Returns ------- SegmentMapping Object containing segments to be analyzed """ if annotation_type not in corpus_context.hierarchy.annotation_types: raise Exception() if subset is not None and not corpus_context.hierarchy.has_type_subset( annotation_type, subset) and not corpus_context.hierarchy.has_token_subset( annotation_type, subset): raise Exception() speakers = corpus_context.speakers segment_mapping = SegmentMapping() for s in speakers: statement = '''MATCH (s:Speaker:{corpus_name})-[r:speaks_in]->(d:Discourse:{corpus_name}) WHERE s.name = {{speaker_name}} RETURN d, r'''.format( corpus_name=corpus_context.cypher_safe_name) results = corpus_context.execute_cypher(statement, speaker_name=s) for r in results: channel = r['r']['channel'] discourse = r['d']['name'] if file_type == 'vowel': file_path = r['d']['vowel_file_path'] elif file_type == 'low_freq': file_path = r['d']['low_freq_file_path'] else: file_path = r['d']['consonant_file_path'] if file_path is None: print( "Skipping discourse {} because no wav file exists.".format( discourse)) continue discourse_duration = r['d']['duration'] at = getattr(corpus_context, annotation_type) qr = corpus_context.query_graph(at) if subset is not None: qr = qr.filter(at.subset == subset) qr = qr.filter(at.discourse.name == discourse) qr = qr.filter(at.end <= discourse_duration) qr = qr.filter(at.begin != at.end) # Skip zero duration segments if they exist if duration_threshold is not None: qr = qr.filter(at.duration >= duration_threshold) qr = qr.filter(at.speaker.name == s) if annotation_type != 'utterance' and 'utterance' in corpus_context.hierarchy.annotation_types: qr.preload(at.utterance) else: qr.preload(at.discourse) if fetch_subannotations: for t in corpus_context.hierarchy.annotation_types: if t in corpus_context.hierarchy.subannotations: for s in corpus_context.hierarchy.subannotations[t]: if t == 'utterance': qr = qr.preload( getattr(corpus_context.utterance, s)) else: qr = qr.preload( getattr( getattr(corpus_context.utterance, t), s)) if qr.count() == 0: continue annotations = qr.all() if annotations is not None: for a in annotations: if annotation_type == 'utterance': utt_id = a.id elif 'utterance' not in corpus_context.hierarchy.annotation_types: utt_id = None else: utt_id = a.utterance.id if fetch_subannotations: #Get subannotations too subannotations = {} if annotation_type in corpus_context.hierarchy.subannotations and corpus_context.hierarchy.subannotations[ annotation_type]: for s in corpus_context.hierarchy.subannotations[ annotation_type]: if getattr(a, s): subannotations[s] = getattr(a, s)[0] segment_mapping.add_file_segment( file_path, a.begin, a.end, label=a.label, id=a.id, utterance_id=utt_id, discourse=discourse, channel=channel, speaker=s, annotation_type=annotation_type, padding=padding, subannotations=subannotations) else: segment_mapping.add_file_segment( file_path, a.begin, a.end, label=a.label, id=a.id, utterance_id=utt_id, discourse=discourse, channel=channel, speaker=s, annotation_type=annotation_type, padding=padding) return segment_mapping
def analyze_vot(corpus_context, stop_label='stops', classifier="/autovot/experiments/models/bb_jasa.classifier", vot_min=5, vot_max=100, window_min=-30, window_max=30, call_back=None, stop_check=None, multiprocessing=False): """ Parameters ---------- corpus_context : :class:`~polyglotdb.CorpusContext` source call_back stop_check Returns ------- """ if not corpus_context.hierarchy.has_token_subset( 'phone', stop_label) and not corpus_context.hierarchy.has_type_subset( 'phone', stop_label): raise Exception('Phones do not have a "{}" subset.'.format(stop_label)) stop_mapping = generate_segments( corpus_context, annotation_type='phone', subset=stop_label, padding=PADDING, file_type="consonant").grouped_mapping('discourse') segment_mapping = SegmentMapping() vot_func = AutoVOTAnalysisFunction(classifier_to_use=classifier, min_vot_length=vot_min, max_vot_length=vot_max, window_min=window_min, window_max=window_max) for discourse in corpus_context.discourses: if (discourse, ) in stop_mapping: sf = corpus_context.discourse_sound_file(discourse) speaker_mapped_stops = {} discourse_speakers = set() for x in stop_mapping[(discourse, )]: if x["speaker"] in speaker_mapped_stops: speaker_mapped_stops[x["speaker"]].append( (x["begin"], x["end"], x["id"])) else: speaker_mapped_stops[x["speaker"]] = [(x["begin"], x["end"], x["id"])] discourse_speakers.add(x["speaker"]) for speaker in discourse_speakers: segment_mapping.add_file_segment(sf["consonant_file_path"], \ sf["speech_begin"], sf["speech_end"], sf["channel"],\ name="{}-{}".format(speaker, discourse), vot_marks=speaker_mapped_stops[speaker]) output = analyze_segments(segment_mapping.segments, vot_func, stop_check=stop_check, multiprocessing=multiprocessing) list_of_stops = [] property_types = [("begin", float), ("end", float), ("confidence", float)] for discourse, discourse_output in output.items(): for (begin, end, confidence, stop_id) in discourse_output: list_of_stops.append({ "begin": begin, "end": begin + end, "id": uuid1(), "confidence": confidence, "annotated_id": stop_id }) corpus_context.import_subannotations(list_of_stops, property_types, "vot", "phone")