Exemplo n.º 1
0
 def run(self):
     kwargs = self.kwargs
     self.results = []
     context = kwargs.pop('context')
     if kwargs.pop('algorithm') == 'min_pairs':
         func = FL.minpair_fl
         rel_func = FL.relative_minpair_fl
     else:
         func = FL.deltah_fl
         rel_func = FL.relative_deltah_fl
     if context == ContextWidget.canonical_value:
         cm = CanonicalVariantContext
     elif context == ContextWidget.frequent_value:
         cm = MostFrequentVariantContext
     elif context == ContextWidget.separate_value:
         cm = SeparatedTokensVariantContext
     elif context == ContextWidget.relative_value:
         cm = WeightedVariantContext
     with cm(kwargs.pop('corpus'),
             kwargs.pop('sequence_type'),
             kwargs.pop('type_token'),
             frequency_threshold=kwargs.pop('frequency_cutoff')) as c:
         try:
             pairs = kwargs.pop('segment_pairs')
             output_filename = kwargs.pop('output_filename', None)
             if output_filename is not None:
                 to_output = []
                 outf = open(output_filename, mode='w', encoding='utf-8-sig')
                 save_minimal_pairs(outf, [], write_header= True)
             else:
                 outf = None
             for pair in pairs:
                 if len(pair) == 1:
                     res = rel_func(c, pair[0], **kwargs)
                         #output_filename = outf, **kwargs)
                 else:
                     if isinstance(pair[0], (list, tuple)):
                         in_list = list(zip(pair[0], pair[1]))
                     else:
                         in_list = [pair]
                     res = func(c, in_list, **kwargs)
                     if output_filename is not None:
                         to_output.append((pair, res[1]))
                 if self.stopped:
                     break
                 self.results.append(res)
             if output_filename is not None:
                 save_minimal_pairs(outf, to_output)
                 outf.close()
         except PCTError as e:
             self.errorEncountered.emit(e)
             return
         except Exception as e:
             e = PCTPythonError(e)
             self.errorEncountered.emit(e)
             return
     if self.stopped:
         self.finishedCancelling.emit()
         return
     self.dataReady.emit(self.results)
Exemplo n.º 2
0
 def run(self):
     kwargs = self.kwargs
     self.results = []
     context = kwargs.pop('context')
     if kwargs.pop('algorithm') == 'min_pairs':
         func = FL.minpair_fl
         rel_func = FL.relative_minpair_fl
     else:
         func = FL.deltah_fl
         rel_func = FL.relative_deltah_fl
     if context == ContextWidget.canonical_value:
         cm = CanonicalVariantContext
     elif context == ContextWidget.frequent_value:
         cm = MostFrequentVariantContext
     elif context == ContextWidget.separate_value:
         cm = SeparatedTokensVariantContext
     elif context == ContextWidget.relative_value:
         cm = WeightedVariantContext
     with cm(kwargs.pop('corpus'),
             kwargs.pop('sequence_type'),
             kwargs.pop('type_token'),
             frequency_threshold=kwargs.pop('frequency_cutoff')) as c:
         try:
             pairs = kwargs.pop('segment_pairs')
             output_filename = kwargs.pop('output_filename', None)
             if output_filename is not None:
                 to_output = []
                 outf = open(output_filename, mode='w', encoding='utf-8-sig')
                 save_minimal_pairs(outf, [], write_header= True)
             else:
                 outf = None
             for pair in pairs:
                 if len(pair) == 1:
                     res = rel_func(c, pair[0], **kwargs)
                         #output_filename = outf, **kwargs)
                 else:
                     if isinstance(pair[0], (list, tuple)):
                         in_list = list(zip(pair[0], pair[1]))
                     else:
                         in_list = [pair]
                     res = func(c, in_list, **kwargs)
                     if output_filename is not None:
                         to_output.append((pair, res[1]))
                 if self.stopped:
                     break
                 self.results.append(res)
             if output_filename is not None:
                 save_minimal_pairs(outf, to_output)
                 outf.close()
         except PCTError as e:
             self.errorEncountered.emit(e)
             return
         except Exception as e:
             e = PCTPythonError(e)
             self.errorEncountered.emit(e)
             return
     if self.stopped:
         self.finishedCancelling.emit()
         return
     self.dataReady.emit(self.results)
Exemplo n.º 3
0
def relative_minpair_fl(corpus_context,
                        segment,
                        relative_count_to_relevant_sounds=False,
                        relative_count_to_whole_corpus=True,
                        distinguish_homophones=False,
                        output_filename=None,
                        environment_filter=None,
                        prevent_normalization=False,
                        stop_check=None,
                        call_back=None):
    """Calculate the average functional load of the contrasts between a
    segment and all other segments, as a count of minimal pairs.

    Parameters
    ----------
    corpus_context : CorpusContext
        Context manager for a corpus
    segment : str
        The target segment.
    relative_count_to_relevant_sounds : bool, optional
        If True, divide the number of minimal pairs
        by the total number of words that contain either of the two segments.
    relative_count_to_whole_corpus : bool, optional
        If True, divide the number of minimal pairs by the total number of words 
        in the corpus (regardless of whether those words contain the target sounds).
        Defaults to True.
    distinguish_homophones : bool, optional
        If False, then you'll count sock~shock (sock=clothing) and
        sock~shock (sock=punch) as just one minimal pair; but if True,
        you'll overcount alternative spellings of the same word, e.g.
        axel~actual and axle~actual. False is the value used by Wedel et al.
    environment_filter : EnvironmentFilter
        Allows the user to restrict the neutralization process to segments in
        particular segmental contexts
    stop_check : callable, optional
        Optional function to check whether to gracefully terminate early
    call_back : callable, optional
        Optional function to supply progress information during the function

    Returns
    -------
    int or float
        If `relative_count_to_relevant_sounds`==False and `relative_count_to_whole_corpus`==False,
        returns an int of the raw number of
        minimal pairs. If `relative_count_to_relevant_sounds`==True, returns a float of
        that count divided by the total number of words in the corpus
        that include either `s1` or `s2`. If `relative_count_to_whole_corpus`==True, a
        float of the raw count divided by the total number of words in the corpus. 
    """
    all_segments = corpus_context.inventory
    segment_pairs = [(segment, other.symbol) for other in all_segments
                     if other.symbol != segment and other.symbol != '#']

    results = []
    to_output = []
    for sp in segment_pairs:
        res = minpair_fl(
            corpus_context, [sp],
            relative_count_to_relevant_sounds=relative_count_to_relevant_sounds,
            relative_count_to_whole_corpus=relative_count_to_whole_corpus,
            distinguish_homophones=distinguish_homophones,
            environment_filter=environment_filter,
            prevent_normalization=prevent_normalization,
            stop_check=stop_check,
            call_back=call_back)
        results.append(res[0])

        if output_filename is not None:
            to_output.append((sp, res[1]))
    if output_filename is not None:
        save_minimal_pairs(output_filename, to_output)
    return sum(results) / len(segment_pairs)
def relative_minpair_fl(corpus_context, segment,
                        relative_count_to_relevant_sounds=False, relative_count_to_whole_corpus=True,
                        distinguish_homophones=False, output_filename=None, environment_filter=None,
                        prevent_normalization=False, stop_check=None, call_back=None):
    """Calculate the average functional load of the contrasts between a
    segment and all other segments, as a count of minimal pairs.

    Parameters
    ----------
    corpus_context : CorpusContext
        Context manager for a corpus
    segment : str
        The target segment.
    relative_count_to_relevant_sounds : bool, optional
        If True, divide the number of minimal pairs
        by the total number of words that contain either of the two segments.
    relative_count_to_whole_corpus : bool, optional
        If True, divide the number of minimal pairs by the total number of words
        in the corpus (regardless of whether those words contain the target sounds).
        Defaults to True.
    distinguish_homophones : bool, optional
        If False, then you'll count sock~shock (sock=clothing) and
        sock~shock (sock=punch) as just one minimal pair; but if True,
        you'll overcount alternative spellings of the same word, e.g.
        axel~actual and axle~actual. False is the value used by Wedel et al.
    environment_filter : EnvironmentFilter
        Allows the user to restrict the neutralization process to segments in
        particular segmental contexts
    stop_check : callable, optional
        Optional function to check whether to gracefully terminate early
    call_back : callable, optional
        Optional function to supply progress information during the function

    Returns
    -------
    int or float
        If `relative_count_to_relevant_sounds`==False and `relative_count_to_whole_corpus`==False,
        returns an int of the raw number of
        minimal pairs. If `relative_count_to_relevant_sounds`==True, returns a float of
        that count divided by the total number of words in the corpus
        that include either `s1` or `s2`. If `relative_count_to_whole_corpus`==True, a
        float of the raw count divided by the total number of words in the corpus.
    """
    all_segments = corpus_context.inventory
    segment_pairs = [(segment, other.symbol) for other in all_segments
                     if other.symbol != segment and other.symbol != '#']

    results = []
    to_output = []
    for sp in segment_pairs:
        res = minpair_fl(corpus_context, [sp],
                         relative_count_to_relevant_sounds=relative_count_to_relevant_sounds,
                         relative_count_to_whole_corpus=relative_count_to_whole_corpus,
                         distinguish_homophones=distinguish_homophones,
                         environment_filter=environment_filter,
                         prevent_normalization=prevent_normalization,
                         stop_check=stop_check, call_back=call_back)
        results.append(res[0])

        if output_filename is not None:
            to_output.append((sp, res[1]))
    if output_filename is not None:
        save_minimal_pairs(output_filename, to_output)
    return sum(results) / len(segment_pairs)