Exemplo n.º 1
0
def analyze_intensity(corpus_context,
                      source='praat',
                      call_back=None,
                      stop_check=None):
    """
    Analyze intensity of an entire utterance, and save the resulting intensity tracks into the database.

    Parameters
    ----------
    corpus_context : :class:`~polyglot.corpus.context.CorpusContext`
        corpus context to use
    call_back : callable
        call back function, optional
    stop_check : function
        stop check function, optional
    """
    segment_mapping = generate_utterance_segments(
        corpus_context, padding=PADDING).grouped_mapping('speaker')
    if call_back is not None:
        call_back('Analyzing files...')
    for i, (speaker, v) in enumerate(segment_mapping.items()):
        gender = None
        try:
            q = corpus_context.query_speakers().filter(
                corpus_context.speaker.name == speaker)
            q = q.columns(corpus_context.speaker.gender.column_name('Gender'))
            gender = q.all()[0]['Gender']
        except SpeakerAttributeError:
            pass
        intensity_function = generate_base_intensity_function(corpus_context)
        output = analyze_segments(v, intensity_function, stop_check=stop_check)
        corpus_context.save_intensity_tracks(output, speaker)
Exemplo n.º 2
0
def test_analyze_file_segments_reaper(acoustic_corpus_path, reaper_func):
    mapping = SegmentMapping()
    seg = (acoustic_corpus_path, 1, 2, 0)
    mapping.add_file_segment(*seg)
    output = analyze_segments(mapping, reaper_func)
    for k in output.keys():
        print(sorted(output[k].keys()))
        assert (all(x >= 1 for x in output[k].keys()))
        assert (all(x <= 2 for x in output[k].keys()))

    mapping[0].properties['padding'] = 0.5
    output = analyze_segments(mapping, reaper_func)
    for k in output.keys():
        print(sorted(output[k].keys()))
        assert (all(x >= 1 for x in output[k].keys()))
        assert (all(x <= 2 for x in output[k].keys()))
Exemplo n.º 3
0
def analyze_track_script(corpus_context,
                         acoustic_name,
                         properties,
                         script_path,
                         duration_threshold=0.01,
                         phone_class=None,
                         arguments=None,
                         call_back=None,
                         file_type='consonant',
                         stop_check=None, multiprocessing=True):
    if file_type not in ['consonant', 'vowel', 'low_freq']:
        raise ValueError('File type must be one of: consonant, vowel, or low_freq')
    if acoustic_name not in corpus_context.hierarchy.acoustics:
        corpus_context.hierarchy.add_acoustic_properties(corpus_context, acoustic_name, properties)
        corpus_context.encode_hierarchy()
    if call_back is not None:
        call_back('Analyzing phones...')
    if phone_class is None:
        segment_mapping = generate_utterance_segments(corpus_context, padding=PADDING)
    else:
        segment_mapping = generate_segments(corpus_context, corpus_context.phone_name, phone_class, file_type=file_type,
                                            padding=PADDING, duration_threshold=duration_threshold)

    segment_mapping = segment_mapping.grouped_mapping('speaker')
    praat_path = corpus_context.config.praat_path
    script_function = generate_praat_script_function(praat_path, script_path, arguments=arguments)
    for i, ((speaker,), v) in enumerate(segment_mapping.items()):
        output = analyze_segments(v, script_function, stop_check=stop_check, multiprocessing=multiprocessing)
        corpus_context.save_acoustic_tracks(acoustic_name, output, speaker)
def test_analyze_file_segments_reaper(acoustic_corpus_path, reaper_func):
    mapping = SegmentMapping()
    seg = (acoustic_corpus_path, 1, 2, 0)
    mapping.add_file_segment(*seg)
    output = analyze_segments(mapping, reaper_func, multiprocessing=False)
    for k in output.keys():
        print(sorted(output[k].keys()))
        assert (all(x >= 1 for x in output[k].keys()))
        assert (all(x <= 2 for x in output[k].keys()))

    mapping[0].properties['padding'] = 0.5
    output = analyze_segments(mapping, reaper_func, multiprocessing=False)
    for k in output.keys():
        print(sorted(output[k].keys()))
        assert (all(x >= 1 for x in output[k].keys()))
        assert (all(x <= 2 for x in output[k].keys()))
Exemplo n.º 5
0
def analyze_intensity(corpus_context,
                      source='praat',
                      call_back=None,
                      stop_check=None,
                      multiprocessing=True):
    """
    Analyze intensity of an entire utterance, and save the resulting intensity tracks into the database.

    Parameters
    ----------
    corpus_context : :class:`~polyglot.corpus.context.CorpusContext`
        corpus context to use
    call_back : callable
        call back function, optional
    stop_check : function
        stop check function, optional
    """
    segment_mapping = generate_utterance_segments(corpus_context,
                                                  padding=PADDING,
                                                  file_type='consonant')
    segment_mapping = segment_mapping.grouped_mapping('speaker')
    if call_back is not None:
        call_back('Analyzing files...')
    if 'intensity' not in corpus_context.hierarchy.acoustics:
        corpus_context.hierarchy.add_acoustic_properties(
            corpus_context, 'intensity', [('Intensity', float)])
        corpus_context.encode_hierarchy()
    for i, ((speaker, ), v) in enumerate(segment_mapping.items()):
        intensity_function = generate_base_intensity_function(corpus_context)
        output = analyze_segments(v,
                                  intensity_function,
                                  stop_check=stop_check,
                                  multiprocessing=multiprocessing)
        corpus_context.save_acoustic_tracks('intensity', output, speaker)
Exemplo n.º 6
0
def analyze_formant_tracks(corpus_context,
                           vowel_label=None,
                           source='praat',
                           call_back=None,
                           stop_check=None,
                           multiprocessing=True):
    """
    Analyze formants of an entire utterance, and save the resulting formant tracks into the database.

    Parameters
    ----------
    corpus_context : CorpusContext
        corpus context to use
    vowel_label : str, optional
        The subset of phones to analyze.
    call_back : callable
        call back function, optional
    stop_check : callable
        stop check function, optional
    """
    if vowel_label is None:
        segment_mapping = generate_utterance_segments(corpus_context,
                                                      padding=PADDING)
    else:
        if not corpus_context.hierarchy.has_type_subset('phone', vowel_label):
            raise Exception(
                'Phones do not have a "{}" subset.'.format(vowel_label))
        segment_mapping = generate_vowel_segments(corpus_context,
                                                  padding=0,
                                                  vowel_label=vowel_label)
    if 'formants' not in corpus_context.hierarchy.acoustics:
        corpus_context.hierarchy.add_acoustic_properties(
            corpus_context, 'formants', [('F1', float), ('F2', float),
                                         ('F3', float)])
        corpus_context.encode_hierarchy()
    segment_mapping = segment_mapping.grouped_mapping('speaker')
    if call_back is not None:
        call_back('Analyzing files...')
    for i, ((speaker, ), v) in enumerate(segment_mapping.items()):
        gender = None
        try:
            q = corpus_context.query_speakers().filter(
                corpus_context.speaker.name == speaker)
            q = q.columns(corpus_context.speaker.gender.column_name('Gender'))
            gender = q.all()[0]['Gender']
        except SpeakerAttributeError:
            pass
        if gender is not None:
            formant_function = generate_base_formants_function(corpus_context,
                                                               gender=gender,
                                                               source=source)
        else:
            formant_function = generate_base_formants_function(corpus_context,
                                                               source=source)
        output = analyze_segments(v,
                                  formant_function,
                                  stop_check=stop_check,
                                  multiprocessing=multiprocessing)
        corpus_context.save_acoustic_tracks('formants', output, speaker)
Exemplo n.º 7
0
def analyze_script(corpus_context,
                   phone_class,
                   script_path,
                   duration_threshold=0.01,
                   arguments=None,
                   call_back=None,
                   stop_check=None, multiprocessing=True):
    """
    Perform acoustic analysis of phones using an input praat script.

    Saves the measurement results from the praat script into the database under the same names as the Praat output columns
    Praat script requirements:
        -the only input is the full path to the soundfile containing (only) the phone
        -the script prints the output to the Praat Info window in two rows (i.e. two lines).
            -the first row is a space-separated list of measurement names: these are the names that will be saved into the database
            -the second row is a space-separated list of the value for each measurement

    Parameters
    ----------
    corpus_context : :class:`~polyglot.corpus.context.CorpusContext`
        corpus context to use
    phone_class : str
        the name of an already encoded phone class, on which the analysis will be run
    script_path : str
        full path to the praat script
    arguments : list
        a list containing any arguments to the praat script (currently not working)
    call_back : callable
        call back function, optional
    stop_check : callable
        stop check function, optional
    """
    # print("analyzing sibilants")
    if call_back is not None:
        call_back('Analyzing phones...')
    directory = corpus_context.config.temporary_directory('csv')
    csv_name = 'analyze_script_import.csv'
    needs_header = True
    output_types = {}
    header = ['id', 'begin', 'end']
    time_section = time.time()
    segment_mapping = generate_segments(corpus_context, corpus_context.phone_name, phone_class, file_type='consonant',
                                        padding=0, duration_threshold=duration_threshold)
    if call_back is not None:
        call_back("generate segments took: " + str(time.time() - time_section))
    praat_path = corpus_context.config.praat_path
    script_function = generate_praat_script_function(praat_path, script_path, arguments=arguments)
    time_section = time.time()
    output = analyze_segments(segment_mapping.segments, script_function, stop_check=stop_check, multiprocessing=multiprocessing)
    if call_back is not None:
        call_back("time analyzing segments: " + str(time.time() - time_section))
    header = sorted(list(output.values())[0].keys())
    header_info = {h: float for h in header}
    point_measures_to_csv(corpus_context, output, header)
    point_measures_from_csv(corpus_context, header_info)
    return [x for x in header if x != 'id']
Exemplo n.º 8
0
def extract_and_save_formant_tracks(corpus_context,
                                    data,
                                    num_formants=False,
                                    stop_check=None,
                                    multiprocessing=True):
    '''This function takes a dictionary with the best parameters for each vowels, then recalculates the formants
    as tracks rather than as points'''
    #Dictionary of segment mapping objects where each n_formants has its own segment mapping object
    segment_mappings = {}
    save_padding = 0.02
    for k, v in data.items():
        k.begin -= save_padding
        k.end += save_padding
        if "num_formants" in v:
            n_formants = v["num_formants"]
        else:
            #There was not enough samples, so we use the default n
            n_formants = 5
        if not n_formants in segment_mappings:
            segment_mappings[n_formants] = SegmentMapping()
        segment_mappings[n_formants].segments.append(k)
    outputs = {}
    for n_formants in segment_mappings:
        func = PraatSegmentFormantTrackFunction(
            praat_path=corpus_context.config.praat_path,
            max_frequency=5500,
            num_formants=n_formants,
            window_length=0.025,
            time_step=0.01)

        output = analyze_segments(
            segment_mappings[n_formants],
            func,
            stop_check=stop_check,
            multiprocessing=multiprocessing)  # Analyze the phone
        outputs.update(output)
    formant_tracks = ['F1', 'F2', 'F3', 'B1', 'B2', 'B3']
    tracks = {}
    for k, v in outputs.items():
        vowel_id = k.properties["id"]
        track = Track()
        for time, formants in v.items():
            tp = TimePoint(time)
            for f in formant_tracks:
                tp.add_value(f, formants[f])
            track.add(tp)
        if not k["speaker"] in tracks:
            tracks[k["speaker"]] = {}
        tracks[k["speaker"]][k] = track

    if 'formants' not in corpus_context.hierarchy.acoustics:
        corpus_context.hierarchy.add_acoustic_properties(
            corpus_context, 'formants', [(x, float) for x in formant_tracks])

    for speaker, track_dict in tracks.items():
        corpus_context.save_acoustic_tracks('formants', track_dict, speaker)
Exemplo n.º 9
0
def analyze_discourse_pitch(corpus_context,
                            discourse,
                            pitch_source='praat',
                            min_pitch=50,
                            max_pitch=500,
                            **kwargs):
    print(kwargs)
    segments = []
    statement = '''MATCH (s:Speaker:{corpus_name})-[r:speaks_in]->(d:Discourse:{corpus_name})
                WHERE d.name = {{discourse_name}}
                RETURN d, s, r'''.format(
        corpus_name=corpus_context.cypher_safe_name)
    results = corpus_context.execute_cypher(statement,
                                            discourse_name=discourse)
    segment_mapping = SegmentMapping()
    for r in results:
        channel = r['r']['channel']
        speaker = r['s']['name']

        discourse = r['d']['name']
        file_path = r['d']['vowel_file_path']
        atype = corpus_context.hierarchy.highest
        prob_utt = getattr(corpus_context, atype)
        q = corpus_context.query_graph(prob_utt)
        q = q.filter(prob_utt.discourse.name == discourse)
        q = q.filter(prob_utt.speaker.name == speaker)
        utterances = q.all()
        for u in utterances:
            segment_mapping.add_file_segment(file_path,
                                             u.begin,
                                             u.end,
                                             channel,
                                             padding=PADDING)

    path = None
    if pitch_source == 'praat':
        path = corpus_context.config.praat_path
        # kwargs = {'silence_threshold': 0.03,
        #          'voicing_threshold': 0.45, 'octave_cost': 0.01, 'octave_jump_cost': 0.35,
        #          'voiced_unvoiced_cost': 0.14}
    elif pitch_source == 'reaper':
        path = corpus_context.config.reaper_path
    pitch_function = generate_pitch_function(pitch_source,
                                             min_pitch,
                                             max_pitch,
                                             path=path,
                                             pulses=True)
    track = {}
    pulses = set()
    output = analyze_segments(segments, pitch_function)
    print(output)
    for v in output.values():
        track.update(v[0])
        pulses.update(v[1])
    return track, sorted(pulses)
Exemplo n.º 10
0
def analyze_vowel_formant_tracks(corpus_context,
                                 source='praat',
                                 call_back=None,
                                 stop_check=None,
                                 vowel_label='vowel',
                                 multiprocessing=True):
    """
    Analyze formants of individual vowels, and save the resulting formant tracks into the database for each phone.

    Parameters
    ----------
    corpus_context : CorpusContext
        corpus context to use
    call_back : callable
        call back function, optional
    stop_check : callable
        stop check function, optional
    vowel_label : str
        The subset of phones to analyze.
    """
    if not corpus_context.hierarchy.has_type_subset('phone', vowel_label):
        raise Exception(
            'Phones do not have a "{}" subset.'.format(vowel_label))
    # gets segment mapping of phones that are vowels
    segment_mapping = generate_vowel_segments(
        corpus_context, padding=0,
        vowel_label=vowel_label).grouped_mapping('speaker')

    if call_back is not None:
        call_back('Analyzing files...')
    # goes through each phone and: makes a formant function, analyzes the phone, and saves the tracks
    for i, ((speaker, ), v) in enumerate(segment_mapping.items()):
        gender = None
        try:
            q = corpus_context.query_speakers().filter(
                corpus_context.speaker.name == speaker)
            q = q.columns(corpus_context.speaker.gender.column_name('Gender'))
            gender = q.all()[0]['Gender']
        except SpeakerAttributeError:
            pass
        if gender is not None:
            formant_function = generate_base_formants_function(corpus_context,
                                                               gender=gender,
                                                               source=source)
        else:
            formant_function = generate_base_formants_function(corpus_context,
                                                               source=source)
        output = analyze_segments(v,
                                  formant_function,
                                  stop_check=stop_check,
                                  multiprocessing=multiprocessing)
        corpus_context.save_formant_tracks(output, speaker)
    if 'formants' not in corpus_context.hierarchy.acoustics:
        corpus_context.hierarchy.acoustics.add('formants')
        corpus_context.encode_hierarchy()
Exemplo n.º 11
0
def analyze_formant_points(corpus_context,
                           call_back=None,
                           stop_check=None,
                           vowel_label='vowel',
                           duration_threshold=None,
                           multiprocessing=True):
    """First pass of the algorithm; generates prototypes.

    Parameters
    ----------
    corpus_context : :class:`polyglot.corpus.context.CorpusContext`
        The CorpusContext object of the corpus.
    call_back : callable
        Information about callback.
    stop_check : string
        Information about stop check.
    vowel_label : str
        The subset of phones to analyze.
    duration_threshold : float, optional
        Segments with length shorter than this value (in milliseconds) will not be analyzed.

    Returns
    -------
    dict
        Track data
    """
    # ------------- Step 1: Prototypes -------------
    if not corpus_context.hierarchy.has_type_subset('phone', vowel_label):
        raise Exception(
            'Phones do not have a "{}" subset.'.format(vowel_label))

    # Gets segment mapping of phones that are vowels

    segment_mapping = generate_vowel_segments(
        corpus_context,
        duration_threshold=duration_threshold,
        padding=.25,
        vowel_label=vowel_label)

    if call_back is not None:
        call_back('Analyzing files...')

    formant_function = generate_formants_point_function(
        corpus_context)  # Make formant function
    output = analyze_segments(
        segment_mapping,
        formant_function,
        stop_check=stop_check,
        multiprocessing=multiprocessing)  # Analyze the phone
    return output
Exemplo n.º 12
0
def analyze_formant_tracks(corpus_context,
                           source='praat',
                           call_back=None,
                           stop_check=None,
                           multiprocessing=True):
    """
    Analyze formants of an entire utterance, and save the resulting formant tracks into the database.

    Parameters
    ----------
    corpus_context : CorpusContext
        corpus context to use
    call_back : callable
        call back function, optional
    stop_check : callable
        stop check function, optional
    """
    segment_mapping = generate_utterance_segments(corpus_context,
                                                  padding=PADDING)
    property_key = 'speaker'
    data = {x: [] for x in segment_mapping.levels(property_key)}
    for s in segment_mapping.segments:
        data[s[property_key]].append(s)
    segment_mapping = segment_mapping.grouped_mapping('speaker')
    if call_back is not None:
        call_back('Analyzing files...')
    for i, ((speaker, ), v) in enumerate(segment_mapping.items()):
        gender = None
        try:
            q = corpus_context.query_speakers().filter(
                corpus_context.speaker.name == speaker)
            q = q.columns(corpus_context.speaker.gender.column_name('Gender'))
            gender = q.all()[0]['Gender']
        except SpeakerAttributeError:
            pass
        if gender is not None:
            formant_function = generate_base_formants_function(corpus_context,
                                                               gender=gender,
                                                               source=source)
        else:
            formant_function = generate_base_formants_function(corpus_context,
                                                               source=source)
        output = analyze_segments(v,
                                  formant_function,
                                  stop_check=stop_check,
                                  multiprocessing=multiprocessing)
        corpus_context.save_formant_tracks(output, speaker)
    if 'formants' not in corpus_context.hierarchy.acoustics:
        corpus_context.hierarchy.acoustics.add('formants')
        corpus_context.encode_hierarchy()
Exemplo n.º 13
0
def analyze_vowel_formant_tracks(corpus_context,
                                 source='praat',
                                 call_back=None,
                                 stop_check=None,
                                 vowel_inventory=None):
    """
    Analyze formants of individual vowels, and save the resulting formant tracks into the database for each phone.

    Parameters
    ----------
    corpus_context : CorpusContext
        corpus context to use
    call_back : callable
        call back function, optional
    stop_check : callable
        stop check function, optional
    vowel_inventory : list of strings
        list of vowels used to encode a class 'vowel', optional.
        if not used, it's assumed that 'vowel' is already a phone class
    """
    # encodes vowel inventory into a phone class if it's specified
    if vowel_inventory is not None:
        corpus_context.encode_class(vowel_inventory, 'vowel')
    # gets segment mapping of phones that are vowels
    segment_mapping = generate_vowel_segments(
        corpus_context, padding=0).grouped_mapping('speaker')

    if call_back is not None:
        call_back('Analyzing files...')
    # goes through each phone and: makes a formant function, analyzes the phone, and saves the tracks
    for i, (speaker, v) in enumerate(segment_mapping.items()):
        gender = None
        try:
            q = corpus_context.query_speakers().filter(
                corpus_context.speaker.name == speaker)
            q = q.columns(corpus_context.speaker.gender.column_name('Gender'))
            gender = q.all()[0]['Gender']
        except SpeakerAttributeError:
            pass
        if gender is not None:
            formant_function = generate_base_formants_function(corpus_context,
                                                               gender=gender,
                                                               source=source)
        else:
            formant_function = generate_base_formants_function(corpus_context,
                                                               source=source)
        output = analyze_segments(v, formant_function, stop_check=stop_check)
        corpus_context.save_formant_tracks(output, speaker)
Exemplo n.º 14
0
def analyze_formant_points(corpus_context,
                           call_back=None,
                           stop_check=None,
                           vowel_inventory=None,
                           duration_threshold=None):
    """First pass of the algorithm; generates prototypes.

    Parameters
    ----------
    corpus_context : :class:`polyglot.corpus.context.CorpusContext`
        The CorpusContext object of the corpus.
    call_back : callable
        Information about callback.
    stop_check : string
        Information about stop check.
    vowel_inventory : list
        A list of all the vowels (in strings) used in the corpus.
    duration_threshold : float, optional
        Segments with length shorter than this value (in milliseconds) will not be analyzed.

    Returns
    -------
    dict
        Track data
    """
    # ------------- Step 1: Prototypes -------------
    # Encodes vowel inventory into a phone class if it's specified
    if vowel_inventory is not None:
        corpus_context.encode_class(vowel_inventory, 'vowel')

    # Gets segment mapping of phones that are vowels

    segment_mapping = generate_vowel_segments(
        corpus_context, duration_threshold=duration_threshold, padding=.25)

    if call_back is not None:
        call_back('Analyzing files...')

    formant_function = generate_formants_point_function(
        corpus_context)  # Make formant function
    output = analyze_segments(segment_mapping,
                              formant_function,
                              stop_check=stop_check)  # Analyze the phone
    return output
Exemplo n.º 15
0
def analyze_pitch(corpus_context,
                  source='praat',
                  call_back=None,
                  stop_check=None,
                  multiprocessing=True):
    """

    Parameters
    ----------
    corpus_context : :class:`~polyglotdb.CorpusContext`
    source
    call_back
    stop_check

    Returns
    -------

    """
    absolute_min_pitch = 50
    absolute_max_pitch = 500
    if not 'utterance' in corpus_context.hierarchy:
        raise (
            Exception('Must encode utterances before pitch can be analyzed'))
    segment_mapping = generate_utterance_segments(
        corpus_context, padding=PADDING).grouped_mapping('speaker')
    num_speakers = len(segment_mapping)
    algorithm = corpus_context.config.pitch_algorithm
    path = None
    if source == 'praat':
        path = corpus_context.config.praat_path
        # kwargs = {'silence_threshold': 0.03,
        #          'voicing_threshold': 0.45, 'octave_cost': 0.01, 'octave_jump_cost': 0.35,
        #          'voiced_unvoiced_cost': 0.14}
    elif source == 'reaper':
        path = corpus_context.config.reaper_path
        # kwargs = None
    pitch_function = generate_pitch_function(source,
                                             absolute_min_pitch,
                                             absolute_max_pitch,
                                             path=path)
    if algorithm == 'speaker_adjusted':
        speaker_data = {}
        if call_back is not None:
            call_back('Getting original speaker means and SDs...')
        for i, ((k, ), v) in enumerate(segment_mapping.items()):
            if call_back is not None:
                call_back('Analyzing speaker {} ({} of {})'.format(
                    k, i, num_speakers))
            output = analyze_segments(v,
                                      pitch_function,
                                      stop_check=stop_check,
                                      multiprocessing=multiprocessing)

            sum_pitch = 0
            sum_square_pitch = 0
            n = 0
            for seg, track in output.items():
                for t, v in track.items():
                    v = v['F0']

                    if v is not None and v > 0:  # only voiced frames

                        n += 1
                        sum_pitch += v
                        sum_square_pitch += v * v
            speaker_data[k] = [
                sum_pitch / n,
                math.sqrt((n * sum_square_pitch - sum_pitch * sum_pitch) /
                          (n * (n - 1)))
            ]

    for i, ((speaker, ), v) in enumerate(segment_mapping.items()):
        if call_back is not None:
            call_back('Analyzing speaker {} ({} of {})'.format(
                speaker, i, num_speakers))
        if algorithm == 'gendered':
            min_pitch = absolute_min_pitch
            max_pitch = absolute_max_pitch
            try:
                q = corpus_context.query_speakers().filter(
                    corpus_context.speaker.name == speaker)
                q = q.columns(
                    corpus_context.speaker.gender.column_name('Gender'))
                gender = q.all()[0]['Gender']
                if gender is not None:
                    if gender.lower()[0] == 'f':
                        min_pitch = 100
                    else:
                        max_pitch = 400
            except SpeakerAttributeError:
                pass
            pitch_function = generate_pitch_function(source,
                                                     min_pitch,
                                                     max_pitch,
                                                     path=path)
        elif algorithm == 'speaker_adjusted':
            mean_pitch, sd_pitch = speaker_data[speaker]
            min_pitch = int(mean_pitch - 3 * sd_pitch)
            max_pitch = int(mean_pitch + 3 * sd_pitch)
            if min_pitch < absolute_min_pitch:
                min_pitch = absolute_min_pitch
            if max_pitch > absolute_max_pitch:
                max_pitch = absolute_max_pitch
            pitch_function = generate_pitch_function(source,
                                                     min_pitch,
                                                     max_pitch,
                                                     path=path)
        output = analyze_segments(v,
                                  pitch_function,
                                  stop_check=stop_check,
                                  multiprocessing=multiprocessing)
        corpus_context.save_pitch_tracks(output, speaker)
        corpus_context.hierarchy.add_token_properties(
            corpus_context, 'utterance', [('pitch_last_edited', int)])
        corpus_context.encode_hierarchy()
        today = datetime.utcnow()
        corpus_context.query_graph(corpus_context.utterance).set_properties(
            pitch_last_edited=today.timestamp())
        corpus_context.hierarchy.acoustics.add('pitch')
        corpus_context.encode_hierarchy()
Exemplo n.º 16
0
def analyze_pitch(corpus_context, call_back=None, stop_check=None):
    absolute_min_pitch = 55
    absolute_max_pitch = 480
    if not 'utterance' in corpus_context.hierarchy:
        raise (
            Exception('Must encode utterances before pitch can be analyzed'))
    segment_mapping = generate_utterance_segments(
        corpus_context, padding=PADDING).grouped_mapping('speaker')
    num_speakers = len(segment_mapping)
    algorithm = corpus_context.config.pitch_algorithm
    path = None
    if corpus_context.config.pitch_source == 'praat':
        path = corpus_context.config.praat_path
        # kwargs = {'silence_threshold': 0.03,
        #          'voicing_threshold': 0.45, 'octave_cost': 0.01, 'octave_jump_cost': 0.35,
        #          'voiced_unvoiced_cost': 0.14}
    elif corpus_context.config.pitch_source == 'reaper':
        path = corpus_context.config.reaper_path
        # kwargs = None
    pitch_function = generate_pitch_function(
        corpus_context.config.pitch_source,
        absolute_min_pitch,
        absolute_max_pitch,
        path=path)
    if algorithm == 'speaker_adjusted':
        speaker_data = {}
        if call_back is not None:
            call_back('Getting original speaker means and SDs...')
        for i, (k, v) in enumerate(segment_mapping.items()):
            if call_back is not None:
                call_back('Analyzing speaker {} ({} of {})'.format(
                    k, i, num_speakers))
            output = analyze_segments(v, pitch_function, stop_check=stop_check)

            sum_pitch = 0
            sum_square_pitch = 0
            n = 0
            for seg, track in output.items():
                for t, v in track.items():
                    v = v['F0']

                    if v is not None and v > 0:  # only voiced frames

                        n += 1
                        sum_pitch += v
                        sum_square_pitch += v * v
            speaker_data[k] = [
                sum_pitch / n,
                math.sqrt((n * sum_square_pitch - sum_pitch * sum_pitch) /
                          (n * (n - 1)))
            ]

    for i, (speaker, v) in enumerate(segment_mapping.items()):
        if call_back is not None:
            call_back('Analyzing speaker {} ({} of {})'.format(
                speaker, i, num_speakers))
        if algorithm == 'gendered':
            min_pitch = absolute_min_pitch
            max_pitch = absolute_max_pitch
            try:
                q = corpus_context.query_speakers().filter(
                    corpus_context.speaker.name == speaker)
                q = q.columns(
                    corpus_context.speaker.gender.column_name('Gender'))
                gender = q.all()[0]['Gender']
                if gender is not None:
                    if gender.lower()[0] == 'f':
                        min_pitch = 100
                    else:
                        max_pitch = 400
            except SpeakerAttributeError:
                pass
            pitch_function = generate_pitch_function(
                corpus_context.config.pitch_source,
                min_pitch,
                max_pitch,
                path=path)
        elif algorithm == 'speaker_adjusted':
            mean_pitch, sd_pitch = speaker_data[speaker]
            min_pitch = int(mean_pitch - 3 * sd_pitch)
            max_pitch = int(mean_pitch + 3 * sd_pitch)
            if min_pitch < absolute_min_pitch:
                min_pitch = absolute_min_pitch
            if max_pitch > absolute_max_pitch:
                max_pitch = absolute_max_pitch
            pitch_function = generate_pitch_function(
                corpus_context.config.pitch_source,
                min_pitch,
                max_pitch,
                path=path)
        output = analyze_segments(v, pitch_function, stop_check=stop_check)
        corpus_context.save_pitch_tracks(output, speaker)
Exemplo n.º 17
0
def analyze_formant_points_refinement(corpus_context,
                                      vowel_label='vowel',
                                      duration_threshold=0,
                                      num_iterations=1,
                                      call_back=None,
                                      stop_check=None,
                                      vowel_prototypes_path='',
                                      drop_formant=False,
                                      multiprocessing=True):
    """Extracts F1, F2, F3 and B1, B2, B3.

    Parameters
    ----------
    corpus_context : :class:`~polyglot.corpus.context.CorpusContext`
        The CorpusContext object of the corpus.
    vowel_label : str
        The subset of phones to analyze.
    duration_threshold : float, optional
        Segments with length shorter than this value (in milliseconds) will not be analyzed.
    num_iterations : int, optional
        How many times the algorithm should iterate before returning values.

    Returns
    -------
    prototype_metadata : dict
        Means of F1, F2, F3, B1, B2, B3 and covariance matrices per vowel class.
    """
    if not corpus_context.hierarchy.has_type_subset('phone', vowel_label):
        raise Exception(
            'Phones do not have a "{}" subset.'.format(vowel_label))
    # ------------- Step 2: Varying formants -------------
    # Encodes vowel inventory into a phone class if it's specified

    use_vowel_prototypes = vowel_prototypes_path and os.path.exists(
        vowel_prototypes_path)
    base_formant_columns = ['F1', 'F2', 'F3', 'B1', 'B2', 'B3']
    if use_vowel_prototypes:
        vowel_prototype_metadata, prototype_parameters = read_prototypes(
            vowel_prototypes_path)
    else:
        prototype_parameters = base_formant_columns

    # Gets segment mapping of phones that are vowels
    segment_mapping = generate_vowel_segments(
        corpus_context,
        duration_threshold=duration_threshold,
        padding=0.1,
        vowel_label=vowel_label)
    best_data = {}

    # we used to have just columns, a list of output columns and prototype columns. Now these are not the same thing
    # so we have extra_columns (a list of columns in the output but not the prototypes) and prototype_parameters (a list of columns in the prototypes)
    # columns = ['F1', 'F2', 'F3', 'B1', 'B2', 'B3']
    # extra_columns = ['A1', 'A2', 'A3', 'Ax']
    output_columns = [
        'F1', 'F2', 'F3', 'B1', 'B2', 'B3', 'A1', 'A2', 'A3', 'Ax', 'A1A2diff',
        'A2A3diff'
    ]

    # print ('columns:', columns)
    # print ('extra_columns:', extra_columns)
    print('output_columns:', output_columns)

    log_output = []
    log_output.append(','.join(['speaker', 'vowel', 'n', 'iterations']))
    # Measure with varying levels of formants
    min_formants = 4  # Off by one error, due to how Praat measures it from F0
    # This really measures with 3 formants: F1, F2, F3. And so on.
    if drop_formant:
        max_formants = 8
    else:
        max_formants = 7
    default_formant = 5
    formant_function = generate_variable_formants_point_function(
        corpus_context, min_formants, max_formants)
    best_prototype_metadata = {}

    # For each vowel token, collect the formant measurements
    # Pick the best track that is closest to the averages gotten from prototypes

    total_speaker_vowel_pairs = len(
        segment_mapping.grouped_mapping('speaker', 'label').items())
    for i, ((speaker, vowel), seg) in enumerate(
            segment_mapping.grouped_mapping('speaker', 'label').items()):

        if len(seg) == 0:
            continue
        print(speaker + ' ' + vowel + ': ' + str(i + 1) + ' of ' +
              str(total_speaker_vowel_pairs) + ': ' + str(len(seg)) +
              ' tokens')
        output = analyze_segments(
            seg,
            formant_function,
            stop_check=stop_check,
            multiprocessing=multiprocessing)  # Analyze the phone

        if len(seg) < 6:
            print(
                "Not enough observations of vowel {}, at least 6 are needed, only found {}."
                .format(vowel, len(seg)))
            for s, data in output.items():
                best_track = data[default_formant]
                best_data[s] = {
                    k: best_track[k]
                    for j, k in enumerate(base_formant_columns)
                }
            continue

        if drop_formant:
            # ADD ALL THE LEAVE-ONE-OUT CANDIDATES
            for s, data in output.items():
                new_data = {}
                ignored_candidates = []
                for candidate, measurements in data.items():

                    try:
                        As = [
                            measurements['A1'], measurements['A2'],
                            measurements['A3'], measurements['A4']
                        ]
                        Fs = [
                            math.log2(measurements['F1']),
                            math.log2(measurements['F2']),
                            math.log2(measurements['F3']),
                            math.log2(measurements['F4'])
                        ]
                        Farray = np.array([Fs, np.ones(len(Fs))])
                        [slope, intercept] = np.linalg.lstsq(Farray.T, As)[0]

                    except:
                        try:
                            As = [
                                measurements['A1'], measurements['A2'],
                                measurements['A3']
                            ]
                            Fs = [
                                math.log2(measurements['F1']),
                                math.log2(measurements['F2']),
                                math.log2(measurements['F3'])
                            ]
                            Farray = np.array([Fs, np.ones(len(Fs))])
                            [slope, intercept] = np.linalg.lstsq(Farray.T,
                                                                 As)[0]

                        except:
                            try:
                                As = [measurements['A1'], measurements['A2']]
                                Fs = [
                                    math.log2(measurements['F1']),
                                    math.log2(measurements['F2'])
                                ]
                                [slope, intercept] = [0, 0]
                            except:
                                # Lack of formants for these settings
                                ignored_candidates.append(candidate)
                                continue

                    for leave_out in range(1, 1 + min(3, candidate)):
                        new_measurements = {}
                        new_measurements['Ax'] = measurements['A' +
                                                              str(leave_out)]
                        candidate_name = str(candidate) + 'x' + str(leave_out)

                        if leave_out < len(As) and As[
                                leave_out -
                                1] < intercept + slope * Fs[leave_out - 1]:
                            this_is_droppable = True
                        else:
                            this_is_droppable = False
                        if this_is_droppable:
                            for parameter in measurements.keys():
                                if int(parameter[-1]) < leave_out:
                                    new_measurements[parameter] = measurements[
                                        parameter]
                                elif int(parameter[-1]) > leave_out:
                                    new_measurements[
                                        parameter[0] +
                                        str(int(parameter[-1]) -
                                            1)] = measurements[parameter]
                            new_data[candidate_name] = new_measurements

                    data[candidate]['Ax'] = data[candidate]['A4']
                data = {
                    k: v
                    for k, v in data.items() if k not in ignored_candidates
                }
                output[s] = {**data, **new_data}
        else:
            for s, data in output.items():
                for candidate, measurements in data.items():
                    output[s][candidate]['Ax'] = output[s][candidate]['A4']
        output = {k: v for k, v in output.items() if v}

        for s, data in output.items():
            for candidate, measurements in data.items():
                try:
                    output[s][candidate]['A1A2diff'] = data[candidate][
                        'A1'] - data[candidate]['A2']
                    try:
                        output[s][candidate]['A2A3diff'] = data[candidate][
                            'A2'] - data[candidate]['A3']
                    except:
                        try:
                            output[s][candidate]['A2A3diff'] = data[candidate][
                                'A2']
                        except:
                            output[s][candidate]['A2A3diff'] = 0
                except:
                    try:
                        output[s][candidate]['A1A2diff'] = data[candidate][
                            'A1']
                    except:
                        output[s][candidate]['A1A2diff'] = 0
                    output[s][candidate]['A2A3diff'] = 0
        selected_tracks = {}
        for s, data in output.items():

            try:
                selected_tracks[s] = data[default_formant]
            except:
                print(s)
                print(data)
                raise
        if not use_vowel_prototypes:
            print('no prototypes, using get_mean_SD()')
            prev_prototype_metadata = get_mean_SD(selected_tracks,
                                                  prototype_parameters)
        elif not vowel in vowel_prototype_metadata:
            print('no prototype for', vowel, 'so using get_mean_SD()')
            prev_prototype_metadata = get_mean_SD(selected_tracks,
                                                  prototype_parameters)
        else:
            prev_prototype_metadata = vowel_prototype_metadata

        if num_iterations > 1 and len(seg) < 6:
            print(
                "Skipping iterations for vowel {}, at least 6 tokens are needed, only found {}."
                .format(vowel, len(seg)))
            my_iterations = [0]
        else:
            my_iterations = range(num_iterations)
        for _ in my_iterations:
            best_numbers = []
            selected_tracks = {}
            prototype_means = prev_prototype_metadata[vowel][0]
            # Get Mahalanobis distance between every new observation and the sample/means
            covariance = np.array(prev_prototype_metadata[vowel][1])
            inverse_covariance = np.linalg.pinv(covariance)
            best_number = 5
            for s, data in output.items():
                best_distance = math.inf
                best_track = 0
                for number, point in data.items():
                    point = [
                        point[x] if point[x] else 0
                        for x in prototype_parameters
                    ]
                    distance = get_mahalanobis(prototype_means, point,
                                               inverse_covariance)
                    if distance < best_distance:  # Update "best" measures when new best distance is found
                        best_distance = distance
                        best_track = point
                        best_number = number
                # selected_tracks[s] = {k: best_track[i] for i, k in enumerate(columns)}
                selected_tracks[s] = {
                    k: best_track[i]
                    for i, k in enumerate(prototype_parameters)
                }
                # best_data[s] = {k: best_track[i] for i, k in enumerate(output_columns)}
                # best_data[s] = {k: best_track[i] for i, k in enumerate(columns)}
                best_data[s] = {}
                for output_column in output_columns:
                    best_data[s][output_column] = output[s][best_number][
                        output_column]

                best_data[s]['num_formants'] = float(
                    str(best_number).split('x')[0])
                best_data[s]['Fx'] = int(str(best_number)[0])
                if 'x' in str(best_number):
                    best_data[s]['drop_formant'] = int(
                        str(best_number).split('x')[-1])
                else:
                    best_data[s]['drop_formant'] = 0

                best_numbers.append(best_number)

            if len(seg) >= 6:
                prototype_metadata = get_mean_SD(selected_tracks,
                                                 prototype_parameters)
                prev_prototype_metadata = prototype_metadata
                best_prototype_metadata.update(prototype_metadata)

            if _ > 0:
                changed_numbers = 0
                for i, bn in enumerate(best_numbers):
                    if bn != last_iteration_best_numbers[i]:
                        changed_numbers += 1
                if changed_numbers == 0:
                    break
            last_iteration_best_numbers = best_numbers
        log_output.append(','.join(
            [speaker, vowel, str(len(output)),
             str(_ + 1)]))

    with open('iterations_log.csv', 'a') as f:
        for i in log_output:
            f.write(i + '\n')

    save_formant_point_data(corpus_context, best_data, num_formants=True)
    corpus_context.cache_hierarchy()
    return best_prototype_metadata
Exemplo n.º 18
0
def analyze_vot(corpus_context, classifier, stop_label='stops',
                  vot_min=5,
                  vot_max=100,
                  window_min=-30,
                  window_max=30,
                  overwrite_edited=False,
                  call_back=None,
                  stop_check=None, multiprocessing=False):
    """
    Analyze VOT for stops using a pretrained AutoVOT classifier.

    Parameters
    ----------
    corpus_context : :class:`~polyglotdb.corpus.AudioContext`
    classifier : str
        Path to an AutoVOT classifier model
    stop_label : str
        Label of subset to analyze
    vot_min : int
        Minimum VOT in ms
    vot_max : int
        Maximum VOT in ms
    window_min : int
        Window minimum in ms
    window_max : int
        Window maximum in Ms
    overwrite_edited:
        Whether to updated VOTs which have the property, edited set to True
    call_back : callable
        call back function, optional
    stop_check : callable
        stop check function, optional
    multiprocessing : bool
        Flag to use multiprocessing, otherwise will use threading
    """
    if not corpus_context.hierarchy.has_token_subset('phone', stop_label) and not corpus_context.hierarchy.has_type_subset('phone', stop_label):
        raise Exception('Phones do not have a "{}" subset.'.format(stop_label))

    already_encoded_vots = corpus_context.hierarchy.has_subannotation_type("vot")

    stop_mapping = generate_segments(corpus_context, annotation_type='phone', subset=stop_label, padding=PADDING, file_type="consonant", fetch_subannotations=True).grouped_mapping('discourse')
    segment_mapping = SegmentMapping()
    vot_func = AutoVOTAnalysisFunction(classifier_to_use=classifier,
            min_vot_length=vot_min,
            max_vot_length=vot_max,
            window_min=window_min,
            window_max=window_max
            )
    for discourse in corpus_context.discourses:
        if (discourse,) in stop_mapping:
            sf = corpus_context.discourse_sound_file(discourse)
            speaker_mapped_stops = {}
            for x in stop_mapping[(discourse,)]:
                if already_encoded_vots:
                    if "vot" in x["subannotations"]:
                        vot = x["subannotations"]["vot"]
                    else:
                        vot = None 

                    if vot is not None:
                        #Skip "edited" vots unless we're given the go-ahead to overwrite them
                        if not overwrite_edited and hasattr(vot, "edited") and vot.edited:
                            continue

                        stop_info = (x["begin"], x["end"], x["id"], x["subannotations"]["vot"].id)
                    else:
                        stop_info = (x["begin"], x["end"], x["id"], "new_vot")
                else:
                    stop_info = (x["begin"], x["end"], x["id"])

                if x["speaker"] in speaker_mapped_stops:
                    speaker_mapped_stops[x["speaker"]].append(stop_info)
                else:
                    speaker_mapped_stops[x["speaker"]] = [stop_info]
            for speaker in speaker_mapped_stops:
                segment_mapping.add_file_segment(sf["consonant_file_path"], \
                        0, sf["duration"], sf["channel"],\
                        name="{}-{}".format(speaker, discourse), vot_marks=speaker_mapped_stops[speaker])
    output = analyze_segments(segment_mapping.segments, vot_func, stop_check=stop_check, multiprocessing=multiprocessing)


    if already_encoded_vots:
        new_data = []
        updated_data = []
        custom_props = [(prop, get_default_for_type(val)) for prop, val in corpus_context.hierarchy.subannotation_properties["vot"] \
                if prop not in ["begin", "id", "end", "confidence"]]
        all_props = [x[0] for x in custom_props]+["id", "begin", "end", "confidence"]

        for discourse, discourse_output in output.items():
            for (begin, end, confidence, stop_id, vot_id) in discourse_output:
                if vot_id == "new_vot":
                    props = {"id":str(uuid1()),
                             "begin":begin,
                             "end":begin+end,
                             "annotated_id":stop_id,
                             "confidence":confidence}
                    for prop, val in custom_props:
                        props[prop] = val
                    new_data.append(props)
                else:
                    props = {"id":vot_id,
                             "props":{"begin":begin,
                                 "end":begin+end,
                                 "confidence":confidence}}
                    for prop, val in custom_props:
                        props["props"][prop] = val
                    updated_data.append(props)
        if updated_data:
            statement = """
            UNWIND {{data}} as d
            MERGE (n:vot:{corpus_name} {{id: d.id}})
            SET n += d.props
            """.format(corpus_name=corpus_context.cypher_safe_name)
            corpus_context.execute_cypher(statement, data=updated_data)

        if new_data:
            default_node = ", ".join(["{}: d.{}".format(p, p) for p in all_props])
            statement = """
            UNWIND {{data}} as d
            MATCH (annotated:phone:{corpus_name} {{id: d.annotated_id}})
            CREATE (annotated) <-[:annotates]-(annotation:vot:{corpus_name}
                {{{default_node}}})
            """.format(corpus_name=corpus_context.cypher_safe_name, default_node=default_node)
            corpus_context.execute_cypher(statement, data=new_data)
    else:
        list_of_stops = []
        property_types = [("begin", float), ("end", float), ("confidence", float)]
        for discourse, discourse_output in output.items():
            for (begin, end, confidence, stop_id) in discourse_output:
                list_of_stops.append({"begin":begin,
                                      "end":begin+end,
                                      "id":uuid1(),
                                      "confidence":confidence,
                                      "annotated_id":stop_id})

        corpus_context.import_subannotations(list_of_stops, property_types, "vot", "phone")
Exemplo n.º 19
0
def analyze_vot(corpus_context,
                stop_label='stops',
                classifier="/autovot/experiments/models/bb_jasa.classifier",
                vot_min=5,
                vot_max=100,
                window_min=-30,
                window_max=30,
                call_back=None,
                stop_check=None,
                multiprocessing=False):
    """

    Parameters
    ----------
    corpus_context : :class:`~polyglotdb.CorpusContext`
    source
    call_back
    stop_check

    Returns
    -------

    """
    if not corpus_context.hierarchy.has_token_subset(
            'phone',
            stop_label) and not corpus_context.hierarchy.has_type_subset(
                'phone', stop_label):
        raise Exception('Phones do not have a "{}" subset.'.format(stop_label))
    stop_mapping = generate_segments(
        corpus_context,
        annotation_type='phone',
        subset=stop_label,
        padding=PADDING,
        file_type="consonant").grouped_mapping('discourse')
    segment_mapping = SegmentMapping()
    vot_func = AutoVOTAnalysisFunction(classifier_to_use=classifier,
                                       min_vot_length=vot_min,
                                       max_vot_length=vot_max,
                                       window_min=window_min,
                                       window_max=window_max)
    for discourse in corpus_context.discourses:
        if (discourse, ) in stop_mapping:
            sf = corpus_context.discourse_sound_file(discourse)
            speaker_mapped_stops = {}
            discourse_speakers = set()
            for x in stop_mapping[(discourse, )]:
                if x["speaker"] in speaker_mapped_stops:
                    speaker_mapped_stops[x["speaker"]].append(
                        (x["begin"], x["end"], x["id"]))
                else:
                    speaker_mapped_stops[x["speaker"]] = [(x["begin"],
                                                           x["end"], x["id"])]
                    discourse_speakers.add(x["speaker"])
            for speaker in discourse_speakers:
                segment_mapping.add_file_segment(sf["consonant_file_path"], \
                        sf["speech_begin"], sf["speech_end"], sf["channel"],\
                        name="{}-{}".format(speaker, discourse), vot_marks=speaker_mapped_stops[speaker])
    output = analyze_segments(segment_mapping.segments,
                              vot_func,
                              stop_check=stop_check,
                              multiprocessing=multiprocessing)

    list_of_stops = []
    property_types = [("begin", float), ("end", float), ("confidence", float)]
    for discourse, discourse_output in output.items():
        for (begin, end, confidence, stop_id) in discourse_output:
            list_of_stops.append({
                "begin": begin,
                "end": begin + end,
                "id": uuid1(),
                "confidence": confidence,
                "annotated_id": stop_id
            })

    corpus_context.import_subannotations(list_of_stops, property_types, "vot",
                                         "phone")
Exemplo n.º 20
0
def analyze_formant_points_refinement(corpus_context,
                                      vowel_inventory,
                                      duration_threshold=0,
                                      num_iterations=1,
                                      call_back=None,
                                      stop_check=None):
    """Extracts F1, F2, F3 and B1, B2, B3.

    Parameters
    ----------
    corpus_context : :class:`~polyglot.corpus.context.CorpusContext`
        The CorpusContext object of the corpus.
    vowel_inventory : list
        A list of vowels contained in the corpus.
    duration_threshold : float, optional
        Segments with length shorter than this value (in milliseconds) will not be analyzed.
    num_iterations : int, optional
        How many times the algorithm should iterate before returning values.

    Returns
    -------
    prototype_metadata : dict
        Means of F1, F2, F3, B1, B2, B3 and covariance matrices per vowel class.
    """
    if vowel_inventory is not None:
        corpus_context.encode_class(vowel_inventory, 'vowel')
    # ------------- Step 2: Varying formants -------------
    # Encodes vowel inventory into a phone class if it's specified

    # Gets segment mapping of phones that are vowels
    segment_mapping = generate_vowel_segments(
        corpus_context, duration_threshold=duration_threshold, padding=0.1)
    best_data = {}
    columns = ['F1', 'F2', 'F3', 'B1', 'B2', 'B3']
    # Measure with varying levels of formants
    min_formants = 4  # Off by one error, due to how Praat measures it from F0
    # This really measures with 3 formants: F1, F2, F3. And so on.
    max_formants = 7
    default_formant = 5
    formant_function = generate_variable_formants_point_function(
        corpus_context, min_formants, max_formants)
    best_prototype_metadata = {}
    # For each vowel token, collect the formant measurements
    # Pick the best track that is closest to the averages gotten from prototypes
    for i, (vowel, seg) in enumerate(
            segment_mapping.grouped_mapping('label').items()):

        output = analyze_segments(seg, formant_function,
                                  stop_check=stop_check)  # Analyze the phone

        if len(seg) < 6:
            print(
                "Not enough observations of vowel {}, at least 6 are needed, only found {}."
                .format(vowel, len(seg)))
            for s, data in output.items():
                best_track = data[default_formant]
                best_data[s] = {
                    k: best_track[k]
                    for j, k in enumerate(columns)
                }
            continue
        selected_tracks = {}
        for s, data in output.items():
            selected_tracks[s] = data[default_formant]
        prev_prototype_metadata = get_mean_SD(selected_tracks)

        for _ in range(num_iterations):
            selected_tracks = {}
            prototype_means = prev_prototype_metadata[vowel][0]
            # Get Mahalanobis distance between every new observation and the sample/means
            covariance = np.array(prev_prototype_metadata[vowel][1])
            inverse_covariance = np.linalg.pinv(covariance)
            best_number = 5
            for s, data in output.items():
                best_distance = math.inf
                best_track = 0
                for number, point in data.items():
                    point = [point[x] if point[x] else 0 for x in columns]

                    distance = get_mahalanobis(prototype_means, point,
                                               inverse_covariance)
                    if distance < best_distance:  # Update "best" measures when new best distance is found
                        best_distance = distance
                        best_track = point
                        best_number = number
                selected_tracks[s] = {
                    k: best_track[i]
                    for i, k in enumerate(columns)
                }
                best_data[s] = {
                    k: best_track[i]
                    for i, k in enumerate(columns)
                }
                best_data[s]['num_formants'] = best_number
            prototype_metadata = get_mean_SD(selected_tracks)
            prev_prototype_metadata = prototype_metadata
            best_prototype_metadata.update(prototype_metadata)

    save_formant_point_data(corpus_context, best_data, num_formants=True)
    corpus_context.cache_hierarchy()
    return best_prototype_metadata
Exemplo n.º 21
0
def analyze_script(corpus_context,
                   phone_class=None,
                   subset=None,
                   annotation_type=None,
                   script_path=None,
                   duration_threshold=0.01,
                   arguments=None,
                   call_back=None,
                   file_type='consonant',
                   stop_check=None,
                   multiprocessing=True):
    """
    Perform acoustic analysis of phones using an input praat script.

    Saves the measurement results from the praat script into the database under the same names as the Praat output columns
    Praat script requirements:

    - the only input is the full path to the sound file containing (only) the phone
    - the script prints the output to the Praat Info window in two rows (i.e. two lines).
    - the first row is a space-separated list of measurement names: these are the names that will be saved into the database
    - the second row is a space-separated list of the value for each measurement

    Parameters
    ----------
    corpus_context : :class:`~polyglot.corpus.context.CorpusContext`
        corpus context to use
    phone_class : str
        DEPRECATED, the name of an already encoded subset of phones on which the analysis will be run
    subset : str, optional
        the name of an already encoded subset of an annotation type, on which the analysis will be run
    annotation_type : str
        the type of annotation that the analysis will go over
    script_path : str
        full path to the praat script
    duration_threshold : float
        Minimum duration of segments to be analyzed
    file_type : str
        File type to use for the script (consonant = 16kHz sample rate, vowel = 11kHz, low_freq = 1200 Hz)
    arguments : list
        a list containing any arguments to the praat script (currently not working)
    call_back : callable
        call back function, optional
    stop_check : callable
        stop check function, optional
    multiprocessing : bool
        Flag to use multiprocessing, otherwise will use threading
    """
    if file_type not in ['consonant', 'vowel', 'low_freq']:
        raise ValueError(
            'File type must be one of: consonant, vowel, or low_freq')

    if phone_class is not None:
        raise DeprecationWarning(
            "The phone_class parameter has now been deprecated, please use annotation_type='phone' and subset='{}'"
            .format(phone_class))
        annotation_type = corpus_context.phone_name
        subset = phone_class

    if call_back is not None:
        call_back('Analyzing {}...'.format(annotation_type))
    time_section = time.time()
    segment_mapping = generate_segments(corpus_context,
                                        annotation_type,
                                        subset,
                                        file_type=file_type,
                                        padding=0,
                                        duration_threshold=duration_threshold)
    if call_back is not None:
        call_back("generate segments took: " + str(time.time() - time_section))
    praat_path = corpus_context.config.praat_path
    script_function = generate_praat_script_function(praat_path,
                                                     script_path,
                                                     arguments=arguments)
    time_section = time.time()
    output = analyze_segments(segment_mapping.segments,
                              script_function,
                              stop_check=stop_check,
                              multiprocessing=multiprocessing)
    if call_back is not None:
        call_back("time analyzing segments: " +
                  str(time.time() - time_section))
    header = sorted(list(output.values())[0].keys())
    header_info = {h: float for h in header}
    point_measures_to_csv(corpus_context, output, header)
    point_measures_from_csv(corpus_context,
                            header_info,
                            annotation_type=annotation_type)
    return [x for x in header if x != 'id']