Пример #1
0
def analyze_utterance_pitch(corpus_context,
                            utterance,
                            source='praat',
                            min_pitch=50,
                            max_pitch=500,
                            **kwargs):
    if isinstance(utterance, str):
        utterance_id = utterance
    else:
        utterance_id = utterance.id
    padding = kwargs.pop('padding', None)
    if padding is None:
        padding = PADDING
    utt_type = corpus_context.hierarchy.highest
    statement = '''MATCH (s:Speaker:{corpus_name})-[r:speaks_in]->(d:Discourse:{corpus_name}),
                (u:{utt_type}:{corpus_name})-[:spoken_by]->(s),
                (u)-[:spoken_in]->(d)
                WHERE u.id = {{utterance_id}}
                RETURN u, d, r'''.format(
        corpus_name=corpus_context.cypher_safe_name, utt_type=utt_type)
    results = corpus_context.execute_cypher(statement,
                                            utterance_id=utterance_id)
    segment_mapping = SegmentMapping()
    for r in results:
        channel = r['r']['channel']
        file_path = r['d']['vowel_file_path']
        u = r['u']
        segment_mapping.add_file_segment(file_path,
                                         u['begin'],
                                         u['end'],
                                         channel,
                                         padding=padding)

    path = None
    if source == 'praat':
        path = corpus_context.config.praat_path
    elif source == 'reaper':
        path = corpus_context.config.reaper_path
    pitch_function = generate_pitch_function(source,
                                             min_pitch,
                                             max_pitch,
                                             path=path)

    track = Track()
    for seg in segment_mapping:
        output = pitch_function(seg)

        for k, v in output.items():
            if v['F0'] is None or v['F0'] <= 0:
                continue
            p = TimePoint(k)
            p.add_value('F0', v['F0'])
            track.add(p)
    if 'pitch' not in corpus_context.hierarchy.acoustics:
        corpus_context.hierarchy.add_acoustic_properties(
            corpus_context, 'pitch', [('F0', float)])
        corpus_context.encode_hierarchy()
    return track
Пример #2
0
def analyze_discourse_pitch(corpus_context,
                            discourse,
                            pitch_source='praat',
                            min_pitch=50,
                            max_pitch=500,
                            **kwargs):
    print(kwargs)
    segments = []
    statement = '''MATCH (s:Speaker:{corpus_name})-[r:speaks_in]->(d:Discourse:{corpus_name})
                WHERE d.name = {{discourse_name}}
                RETURN d, s, r'''.format(
        corpus_name=corpus_context.cypher_safe_name)
    results = corpus_context.execute_cypher(statement,
                                            discourse_name=discourse)
    segment_mapping = SegmentMapping()
    for r in results:
        channel = r['r']['channel']
        speaker = r['s']['name']

        discourse = r['d']['name']
        file_path = r['d']['vowel_file_path']
        atype = corpus_context.hierarchy.highest
        prob_utt = getattr(corpus_context, atype)
        q = corpus_context.query_graph(prob_utt)
        q = q.filter(prob_utt.discourse.name == discourse)
        q = q.filter(prob_utt.speaker.name == speaker)
        utterances = q.all()
        for u in utterances:
            segment_mapping.add_file_segment(file_path,
                                             u.begin,
                                             u.end,
                                             channel,
                                             padding=PADDING)

    path = None
    if pitch_source == 'praat':
        path = corpus_context.config.praat_path
        # kwargs = {'silence_threshold': 0.03,
        #          'voicing_threshold': 0.45, 'octave_cost': 0.01, 'octave_jump_cost': 0.35,
        #          'voiced_unvoiced_cost': 0.14}
    elif pitch_source == 'reaper':
        path = corpus_context.config.reaper_path
    pitch_function = generate_pitch_function(pitch_source,
                                             min_pitch,
                                             max_pitch,
                                             path=path,
                                             pulses=True)
    track = {}
    pulses = set()
    output = analyze_segments(segments, pitch_function)
    print(output)
    for v in output.values():
        track.update(v[0])
        pulses.update(v[1])
    return track, sorted(pulses)
def test_analyze_file_segments_reaper(acoustic_corpus_path, reaper_func):
    mapping = SegmentMapping()
    seg = (acoustic_corpus_path, 1, 2, 0)
    mapping.add_file_segment(*seg)
    output = analyze_segments(mapping, reaper_func, multiprocessing=False)
    for k in output.keys():
        print(sorted(output[k].keys()))
        assert (all(x >= 1 for x in output[k].keys()))
        assert (all(x <= 2 for x in output[k].keys()))

    mapping[0].properties['padding'] = 0.5
    output = analyze_segments(mapping, reaper_func, multiprocessing=False)
    for k in output.keys():
        print(sorted(output[k].keys()))
        assert (all(x >= 1 for x in output[k].keys()))
        assert (all(x <= 2 for x in output[k].keys()))
Пример #4
0
def test_analyze_file_segments_reaper(acoustic_corpus_path, reaper_func):
    mapping = SegmentMapping()
    seg = (acoustic_corpus_path, 1, 2, 0)
    mapping.add_file_segment(*seg)
    output = analyze_segments(mapping, reaper_func)
    for k in output.keys():
        print(sorted(output[k].keys()))
        assert (all(x >= 1 for x in output[k].keys()))
        assert (all(x <= 2 for x in output[k].keys()))

    mapping[0].properties['padding'] = 0.5
    output = analyze_segments(mapping, reaper_func)
    for k in output.keys():
        print(sorted(output[k].keys()))
        assert (all(x >= 1 for x in output[k].keys()))
        assert (all(x <= 2 for x in output[k].keys()))
Пример #5
0
def extract_and_save_formant_tracks(corpus_context,
                                    data,
                                    num_formants=False,
                                    stop_check=None,
                                    multiprocessing=True):
    '''This function takes a dictionary with the best parameters for each vowels, then recalculates the formants
    as tracks rather than as points'''
    #Dictionary of segment mapping objects where each n_formants has its own segment mapping object
    segment_mappings = {}
    save_padding = 0.02
    for k, v in data.items():
        k.begin -= save_padding
        k.end += save_padding
        if "num_formants" in v:
            n_formants = v["num_formants"]
        else:
            #There was not enough samples, so we use the default n
            n_formants = 5
        if not n_formants in segment_mappings:
            segment_mappings[n_formants] = SegmentMapping()
        segment_mappings[n_formants].segments.append(k)
    outputs = {}
    for n_formants in segment_mappings:
        func = PraatSegmentFormantTrackFunction(
            praat_path=corpus_context.config.praat_path,
            max_frequency=5500,
            num_formants=n_formants,
            window_length=0.025,
            time_step=0.01)

        output = analyze_segments(
            segment_mappings[n_formants],
            func,
            stop_check=stop_check,
            multiprocessing=multiprocessing)  # Analyze the phone
        outputs.update(output)
    formant_tracks = ['F1', 'F2', 'F3', 'B1', 'B2', 'B3']
    tracks = {}
    for k, v in outputs.items():
        vowel_id = k.properties["id"]
        track = Track()
        for time, formants in v.items():
            tp = TimePoint(time)
            for f in formant_tracks:
                tp.add_value(f, formants[f])
            track.add(tp)
        if not k["speaker"] in tracks:
            tracks[k["speaker"]] = {}
        tracks[k["speaker"]][k] = track

    if 'formants' not in corpus_context.hierarchy.acoustics:
        corpus_context.hierarchy.add_acoustic_properties(
            corpus_context, 'formants', [(x, float) for x in formant_tracks])

    for speaker, track_dict in tracks.items():
        corpus_context.save_acoustic_tracks('formants', track_dict, speaker)
def test_grouping():
    mapping = SegmentMapping()
    speakers = ['speaker1', 'speaker2']
    vowels = ['aa', 'ae', 'iy']
    for s in speakers:
        if s == 'speaker1':
            vs = ['aa', 'ae']
        else:
            vs = ['aa', 'iy']
        for v in vs:
            mapping.add_file_segment('path', 0, 1, 0, speaker=s, vowel=v)

    groups = mapping.grouped_mapping('speaker', 'vowel')
    assert len(groups) == 6
    for g, lists in groups.items():
        assert g[0] in speakers
        assert g[1] in vowels

    groups = mapping.grouped_mapping('vowel', 'speaker')
    assert len(groups) == 6
    for g, lists in groups.items():
        assert g[0] in vowels
        assert g[1] in speakers
Пример #7
0
def analyze_vot(corpus_context, classifier, stop_label='stops',
                  vot_min=5,
                  vot_max=100,
                  window_min=-30,
                  window_max=30,
                  overwrite_edited=False,
                  call_back=None,
                  stop_check=None, multiprocessing=False):
    """
    Analyze VOT for stops using a pretrained AutoVOT classifier.

    Parameters
    ----------
    corpus_context : :class:`~polyglotdb.corpus.AudioContext`
    classifier : str
        Path to an AutoVOT classifier model
    stop_label : str
        Label of subset to analyze
    vot_min : int
        Minimum VOT in ms
    vot_max : int
        Maximum VOT in ms
    window_min : int
        Window minimum in ms
    window_max : int
        Window maximum in Ms
    overwrite_edited:
        Whether to updated VOTs which have the property, edited set to True
    call_back : callable
        call back function, optional
    stop_check : callable
        stop check function, optional
    multiprocessing : bool
        Flag to use multiprocessing, otherwise will use threading
    """
    if not corpus_context.hierarchy.has_token_subset('phone', stop_label) and not corpus_context.hierarchy.has_type_subset('phone', stop_label):
        raise Exception('Phones do not have a "{}" subset.'.format(stop_label))

    already_encoded_vots = corpus_context.hierarchy.has_subannotation_type("vot")

    stop_mapping = generate_segments(corpus_context, annotation_type='phone', subset=stop_label, padding=PADDING, file_type="consonant", fetch_subannotations=True).grouped_mapping('discourse')
    segment_mapping = SegmentMapping()
    vot_func = AutoVOTAnalysisFunction(classifier_to_use=classifier,
            min_vot_length=vot_min,
            max_vot_length=vot_max,
            window_min=window_min,
            window_max=window_max
            )
    for discourse in corpus_context.discourses:
        if (discourse,) in stop_mapping:
            sf = corpus_context.discourse_sound_file(discourse)
            speaker_mapped_stops = {}
            for x in stop_mapping[(discourse,)]:
                if already_encoded_vots:
                    if "vot" in x["subannotations"]:
                        vot = x["subannotations"]["vot"]
                    else:
                        vot = None 

                    if vot is not None:
                        #Skip "edited" vots unless we're given the go-ahead to overwrite them
                        if not overwrite_edited and hasattr(vot, "edited") and vot.edited:
                            continue

                        stop_info = (x["begin"], x["end"], x["id"], x["subannotations"]["vot"].id)
                    else:
                        stop_info = (x["begin"], x["end"], x["id"], "new_vot")
                else:
                    stop_info = (x["begin"], x["end"], x["id"])

                if x["speaker"] in speaker_mapped_stops:
                    speaker_mapped_stops[x["speaker"]].append(stop_info)
                else:
                    speaker_mapped_stops[x["speaker"]] = [stop_info]
            for speaker in speaker_mapped_stops:
                segment_mapping.add_file_segment(sf["consonant_file_path"], \
                        0, sf["duration"], sf["channel"],\
                        name="{}-{}".format(speaker, discourse), vot_marks=speaker_mapped_stops[speaker])
    output = analyze_segments(segment_mapping.segments, vot_func, stop_check=stop_check, multiprocessing=multiprocessing)


    if already_encoded_vots:
        new_data = []
        updated_data = []
        custom_props = [(prop, get_default_for_type(val)) for prop, val in corpus_context.hierarchy.subannotation_properties["vot"] \
                if prop not in ["begin", "id", "end", "confidence"]]
        all_props = [x[0] for x in custom_props]+["id", "begin", "end", "confidence"]

        for discourse, discourse_output in output.items():
            for (begin, end, confidence, stop_id, vot_id) in discourse_output:
                if vot_id == "new_vot":
                    props = {"id":str(uuid1()),
                             "begin":begin,
                             "end":begin+end,
                             "annotated_id":stop_id,
                             "confidence":confidence}
                    for prop, val in custom_props:
                        props[prop] = val
                    new_data.append(props)
                else:
                    props = {"id":vot_id,
                             "props":{"begin":begin,
                                 "end":begin+end,
                                 "confidence":confidence}}
                    for prop, val in custom_props:
                        props["props"][prop] = val
                    updated_data.append(props)
        if updated_data:
            statement = """
            UNWIND {{data}} as d
            MERGE (n:vot:{corpus_name} {{id: d.id}})
            SET n += d.props
            """.format(corpus_name=corpus_context.cypher_safe_name)
            corpus_context.execute_cypher(statement, data=updated_data)

        if new_data:
            default_node = ", ".join(["{}: d.{}".format(p, p) for p in all_props])
            statement = """
            UNWIND {{data}} as d
            MATCH (annotated:phone:{corpus_name} {{id: d.annotated_id}})
            CREATE (annotated) <-[:annotates]-(annotation:vot:{corpus_name}
                {{{default_node}}})
            """.format(corpus_name=corpus_context.cypher_safe_name, default_node=default_node)
            corpus_context.execute_cypher(statement, data=new_data)
    else:
        list_of_stops = []
        property_types = [("begin", float), ("end", float), ("confidence", float)]
        for discourse, discourse_output in output.items():
            for (begin, end, confidence, stop_id) in discourse_output:
                list_of_stops.append({"begin":begin,
                                      "end":begin+end,
                                      "id":uuid1(),
                                      "confidence":confidence,
                                      "annotated_id":stop_id})

        corpus_context.import_subannotations(list_of_stops, property_types, "vot", "phone")
Пример #8
0
def generate_segments(corpus_context,
                      annotation_type='utterance',
                      subset=None,
                      file_type='vowel',
                      duration_threshold=0.001,
                      padding=0,
                      fetch_subannotations=False):
    """
    Generate segment vectors for an annotation type, to be used as input to analyze_file_segments.

    Parameters
    ----------
    corpus_context : :class:`~polyglot.corpus.context.CorpusContext`
        The CorpusContext object of the corpus
    annotation_type : str, optional
        The type of annotation to use in generating segments, defaults to utterance
    subset : str, optional
        Specify a subset to use for generating segments
    file_type : str, optional
        One of 'low_freq', 'vowel', or 'consonant', specifies the type of audio file to use
    duration_threshold: float, optional
        Segments with length shorter than this value (in seconds) will not be included

    Returns
    -------
    SegmentMapping
        Object containing segments to be analyzed
    """
    if annotation_type not in corpus_context.hierarchy.annotation_types:
        raise Exception()
    if subset is not None and not corpus_context.hierarchy.has_type_subset(
            annotation_type,
            subset) and not corpus_context.hierarchy.has_token_subset(
                annotation_type, subset):
        raise Exception()
    speakers = corpus_context.speakers
    segment_mapping = SegmentMapping()
    for s in speakers:
        statement = '''MATCH (s:Speaker:{corpus_name})-[r:speaks_in]->(d:Discourse:{corpus_name})
                    WHERE s.name = {{speaker_name}}
                    RETURN d, r'''.format(
            corpus_name=corpus_context.cypher_safe_name)
        results = corpus_context.execute_cypher(statement, speaker_name=s)
        for r in results:
            channel = r['r']['channel']
            discourse = r['d']['name']
            if file_type == 'vowel':
                file_path = r['d']['vowel_file_path']
            elif file_type == 'low_freq':
                file_path = r['d']['low_freq_file_path']
            else:
                file_path = r['d']['consonant_file_path']
            if file_path is None:
                print(
                    "Skipping discourse {} because no wav file exists.".format(
                        discourse))
                continue
            discourse_duration = r['d']['duration']
            at = getattr(corpus_context, annotation_type)
            qr = corpus_context.query_graph(at)
            if subset is not None:
                qr = qr.filter(at.subset == subset)
            qr = qr.filter(at.discourse.name == discourse)
            qr = qr.filter(at.end <= discourse_duration)
            qr = qr.filter(at.begin !=
                           at.end)  # Skip zero duration segments if they exist
            if duration_threshold is not None:
                qr = qr.filter(at.duration >= duration_threshold)
            qr = qr.filter(at.speaker.name == s)
            if annotation_type != 'utterance' and 'utterance' in corpus_context.hierarchy.annotation_types:
                qr.preload(at.utterance)
            else:
                qr.preload(at.discourse)
            if fetch_subannotations:
                for t in corpus_context.hierarchy.annotation_types:
                    if t in corpus_context.hierarchy.subannotations:
                        for s in corpus_context.hierarchy.subannotations[t]:
                            if t == 'utterance':
                                qr = qr.preload(
                                    getattr(corpus_context.utterance, s))
                            else:
                                qr = qr.preload(
                                    getattr(
                                        getattr(corpus_context.utterance, t),
                                        s))

            if qr.count() == 0:
                continue
            annotations = qr.all()
            if annotations is not None:
                for a in annotations:
                    if annotation_type == 'utterance':
                        utt_id = a.id
                    elif 'utterance' not in corpus_context.hierarchy.annotation_types:
                        utt_id = None
                    else:
                        utt_id = a.utterance.id
                    if fetch_subannotations:
                        #Get subannotations too
                        subannotations = {}
                        if annotation_type in corpus_context.hierarchy.subannotations and corpus_context.hierarchy.subannotations[
                                annotation_type]:
                            for s in corpus_context.hierarchy.subannotations[
                                    annotation_type]:
                                if getattr(a, s):
                                    subannotations[s] = getattr(a, s)[0]
                        segment_mapping.add_file_segment(
                            file_path,
                            a.begin,
                            a.end,
                            label=a.label,
                            id=a.id,
                            utterance_id=utt_id,
                            discourse=discourse,
                            channel=channel,
                            speaker=s,
                            annotation_type=annotation_type,
                            padding=padding,
                            subannotations=subannotations)
                    else:
                        segment_mapping.add_file_segment(
                            file_path,
                            a.begin,
                            a.end,
                            label=a.label,
                            id=a.id,
                            utterance_id=utt_id,
                            discourse=discourse,
                            channel=channel,
                            speaker=s,
                            annotation_type=annotation_type,
                            padding=padding)
    return segment_mapping
Пример #9
0
def analyze_vot(corpus_context,
                stop_label='stops',
                classifier="/autovot/experiments/models/bb_jasa.classifier",
                vot_min=5,
                vot_max=100,
                window_min=-30,
                window_max=30,
                call_back=None,
                stop_check=None,
                multiprocessing=False):
    """

    Parameters
    ----------
    corpus_context : :class:`~polyglotdb.CorpusContext`
    source
    call_back
    stop_check

    Returns
    -------

    """
    if not corpus_context.hierarchy.has_token_subset(
            'phone',
            stop_label) and not corpus_context.hierarchy.has_type_subset(
                'phone', stop_label):
        raise Exception('Phones do not have a "{}" subset.'.format(stop_label))
    stop_mapping = generate_segments(
        corpus_context,
        annotation_type='phone',
        subset=stop_label,
        padding=PADDING,
        file_type="consonant").grouped_mapping('discourse')
    segment_mapping = SegmentMapping()
    vot_func = AutoVOTAnalysisFunction(classifier_to_use=classifier,
                                       min_vot_length=vot_min,
                                       max_vot_length=vot_max,
                                       window_min=window_min,
                                       window_max=window_max)
    for discourse in corpus_context.discourses:
        if (discourse, ) in stop_mapping:
            sf = corpus_context.discourse_sound_file(discourse)
            speaker_mapped_stops = {}
            discourse_speakers = set()
            for x in stop_mapping[(discourse, )]:
                if x["speaker"] in speaker_mapped_stops:
                    speaker_mapped_stops[x["speaker"]].append(
                        (x["begin"], x["end"], x["id"]))
                else:
                    speaker_mapped_stops[x["speaker"]] = [(x["begin"],
                                                           x["end"], x["id"])]
                    discourse_speakers.add(x["speaker"])
            for speaker in discourse_speakers:
                segment_mapping.add_file_segment(sf["consonant_file_path"], \
                        sf["speech_begin"], sf["speech_end"], sf["channel"],\
                        name="{}-{}".format(speaker, discourse), vot_marks=speaker_mapped_stops[speaker])
    output = analyze_segments(segment_mapping.segments,
                              vot_func,
                              stop_check=stop_check,
                              multiprocessing=multiprocessing)

    list_of_stops = []
    property_types = [("begin", float), ("end", float), ("confidence", float)]
    for discourse, discourse_output in output.items():
        for (begin, end, confidence, stop_id) in discourse_output:
            list_of_stops.append({
                "begin": begin,
                "end": begin + end,
                "id": uuid1(),
                "confidence": confidence,
                "annotated_id": stop_id
            })

    corpus_context.import_subannotations(list_of_stops, property_types, "vot",
                                         "phone")