Пример #1
0
    def train(self, corpus):
        """Train on segmented utterances."""
        # Get phoneme and boundary information
        self.phoneme_counts = corpus.phoneme_counts
        self.phoneme_freqs = counter_freqs(self.phoneme_counts)

        # Get diphone information
        self.diphone_freqs = counter_freqs(corpus.diphone_counts)

        ## Compute P(xy|#) and P(#|xy)
        # For P(#|xy), make a list of 1 for True, 0 for False to make
        # summing easier
        diphone_outcomes = defaultdict(list)
        # For P(xy|#), track all diphones with each label
        boundary_diphone_counts = {True: Counter(), False: Counter()}
        # Count it all up
        total_boundaries = 0
        total_diphones = 0
        for diphone, label in chain.from_iterable(corpus.diphone_boundaries):
            boundary_diphone_counts[label][diphone] += 1
            label = int(label)
            diphone_outcomes[diphone].append(label)
            total_diphones += 1
            total_boundaries += label

        # P(xy|#)
        self.boundary_diphone_probs = \
            {label: counter_freqs(counts) for label, counts in boundary_diphone_counts.items()}
        assert all(0 <= prob <= 1.0 for diphone_probs in self.boundary_diphone_probs.values()
                   for prob in diphone_probs.values())
        # If this assertion fails but the value is very close to 1.0, this just means
        # there is a lot of accumulated rounding error.
        assert all(.999 < sum(diphone_probs.values()) < 1.001
                   for diphone_probs in self.boundary_diphone_probs.values())

        # P(#|xy)
        self.diphone_boundary_probs = \
            {diphone: (sum(outcomes) / len(outcomes))
             for diphone, outcomes in diphone_outcomes.iteritems()}
        assert all(0 <= prob <= 1.0 for prob in self.diphone_boundary_probs.values())

        # P(#)
        self.p_boundary = total_boundaries / total_diphones
        assert 0 <= self.p_boundary <= 1.0

        ## Get phrase initial/final counts
        initial_counts, final_counts = corpus.outside_phoneme_counts
        # P(x|initial)
        self.initial_freqs = counter_freqs(initial_counts)
        # P(x|final)
        self.final_freqs = counter_freqs(final_counts)
def phoneme_features(corpus, out_csv):
    """Write phoneme counts to a CSV."""
    # Convert counts to probabilities
    phoneme_freq = counter_freqs(corpus.phoneme_counts)

    # Write header
    out_csv.writerow(('phoneme', 'prob', 'rank'))

    # Write data
    for idx, (phoneme, count) in enumerate(sorted(phoneme_freq.items(),
                                                  key=itemgetter(1), reverse=True)):
        out_csv.writerow((phoneme, count, idx + 1))
def diphone_features(corpus, out_csv):
    """Write diphone features of a corpus to a CSV."""
    diphone_boundaries = chain.from_iterable(corpus.diphone_boundaries)
    # Convert counts to probabilities
    diphone_freq = counter_freqs(corpus.diphone_counts)

    # Write header
    out_csv.writerow(('diphone', 'prob', BOUNDARY_HEADER))

    # Write data
    for diphone, label in diphone_boundaries:
        out_csv.writerow((''.join(diphone), diphone_freq[diphone], convert_r_bool(label)))
def dibs_features(corpus, out_csv):
    """Write information for the DiBs segmentation model to a CSV."""
    # Get phoneme and boundary information
    phoneme_counts = corpus.phoneme_counts
    phoneme_freq = counter_freqs(phoneme_counts)

    # Get diphone information
    diphone_freq = counter_freqs(corpus.diphone_counts)

    ## Compute P(xy|#) and P(#|xy)
    # For P(#|xy), make a list of 1 for True, 0 for False to make
    # summing easier
    diphone_outcomes = defaultdict(list)
    # For P(xy|#), track all diphones with each label
    boundary_diphone_counts = {True: Counter(), False: Counter()}
    # Count it all up
    total_boundaries = 0
    total_diphones = 0
    for diphone, label in chain.from_iterable(corpus.diphone_boundaries):
        boundary_diphone_counts[label][diphone] += 1
        label = int(label)
        diphone_outcomes[diphone].append(label)
        total_diphones += 1
        total_boundaries += label

    # P(xy|#)
    boundary_diphone_probs = \
        {label: counter_freqs(counts) for label, counts in boundary_diphone_counts.items()}
    assert all(0 <= prob <= 1.0 for diphone_probs in boundary_diphone_probs.values()
               for prob in diphone_probs.values())
    # If this assertion fails but the value is very close to 1.0, this just means
    # there is a lot of accumulated rounding error.
    assert all(.99 < sum(diphone_probs.values()) < 1.01
               for diphone_probs in boundary_diphone_probs.values())

    # P(#|xy)
    diphone_boundary_probs = \
        {diphone: (sum(outcomes) / len(outcomes))
         for diphone, outcomes in diphone_outcomes.iteritems()}
    assert all(0 <= prob <= 1.0 for prob in diphone_boundary_probs.values())

    # P(#)
    p_boundary = total_boundaries / total_diphones
    assert 0 <= p_boundary <= 1.0

    ## Get phrase initial/final counts
    initial_counts, final_counts = corpus.outside_phoneme_counts
    # P(x|initial)
    initial_freq = counter_freqs(initial_counts)
    # P(x|final)
    final_freq = counter_freqs(final_counts)

    # Output information for each boundary
    out_csv.writerow(('diphone', 'prob.true', 'prob.dibs', 'prob.est1', 'prob.est2', 'score',
                      BOUNDARY_HEADER))
    for diphone, label in chain.from_iterable(corpus.diphone_boundaries):
        # Estimate P(x|inital) and P(y|final) for a diphone xy
        phone1, phone2 = diphone
        p_phone1_final = final_freq[phone1] if phone1 in final_freq else 0.0
        p_phone2_init = initial_freq[phone2] if phone2 in initial_freq else 0.0

        # Compute the DiBS score
        assert 1.0 >= diphone_freq[diphone] >= 0.0
        dibs_score = (2.0 * p_phone1_final * p_phone2_init) / diphone_freq[diphone]

        # True P(#|xy)
        # If you want to do it by Bayes' rule, it would be:
        # P(xy|#) * P(#) / P(xy)
        # (boundary_diphone_probs[True][diphone] * p_boundary) / diphone_freq[diphone]
        true_prob = diphone_boundary_probs[diphone]

        # Compute Daland's estimated P(#|xy) assuming P(#) = .28, the mean
        # value explored in their study
        dibs_prob = (p_phone1_final * p_phone2_init * 0.28) / diphone_freq[diphone]
        # Probability that it's not a boundary
        p_phone1_final = final_freq[phone1] if phone1 in final_freq else 0.0
        p_phone2_init = initial_freq[phone2] if phone2 in initial_freq else 0.0

        # A way to estimate with a more normal normalization in the denominator
        est1_prob = ((p_phone1_final * p_phone2_init * 0.28) /
                     (phoneme_freq[phone1] * phoneme_freq[phone2]))

        # Another way of estimating:
        # P(x) * P(#|x) * P(y|#) / P(xy)
        # P(x) * P(final|x) * P(y|initial) / P(xy)
        try:
            p_phone1_final = final_counts[phone1] / phoneme_counts[phone1]
        except KeyError:
            p_phone1_final = 0.0
        est2_prob = ((phoneme_freq[phone1] * p_phone1_final * p_phone2_init) /
                     (diphone_freq[diphone]))

        out_csv.writerow((''.join(diphone), true_prob, dibs_prob, est1_prob,
                          est2_prob, dibs_score, convert_r_bool(label)))