Пример #1
0
    def do_mstep(self, curriter):
        Zevidence = self._fix_negative_values(self.VRWf.sum(0)
                                              + self.VRWt.sum(1)
                                              + self.alphaZ - 1)
        initialZ = normalize(Zevidence)
        Z = self._apply_entropic_prior_and_normalize(
            initialZ, Zevidence, self.betaZ, nu=self.nu)

        Wf_evidence = self._fix_negative_values(self.VRWf + self.alphaWf - 1)
        Wt_evidence = self._fix_negative_values(self.VRWt + self.alphaWt - 1)

        initialWf = normalize(Wf_evidence, axis=0)
        Wf = self._apply_entropic_prior_and_normalize(
             initialWf, Wf_evidence, self.betaWf, nu=self.nu, axis=0)

        initialWt = normalize(Wt_evidence, axis=1)
        Wt = self._apply_entropic_prior_and_normalize(
             initialWt, Wt_evidence, self.betaWt, nu=self.nu, axis=1)

        Hevidence = self._fix_negative_values(self.VRH.transpose((1,2,0))
                                              + self.alphaH - 1)
        initialH = normalize(Hevidence, axis=[1, 2])
        H = self._apply_entropic_prior_and_normalize(
            initialH, Hevidence, self.betaH, nu=self.nu, axis=[1, 2])

        return self._prune_undeeded_bases(Wf, Wt, Z, H, curriter)
Пример #2
0
def segment_song(seq, rank=4, win=32, seed=None,
                 nrep=1, minsegments=3, maxlowen=10, maxretries=5,
                 uninformativeWinit=False, uninformativeHinit=True,
                 normalize_frames=True, viterbi_segmenter=False,
                 align_downbeats=False, **kwargs):
    """Segment the given feature sequence using SI-PLCA

    Parameters
    ----------
    seq : array, shape (F, T)
        Feature sequence to segment.
    rank : int
        Number of patterns (unique segments) to search for.        
    win : int
        Length of patterns in frames.
    seed : int
        Random number generator seed.  Defaults to None.
    nrep : int
        Number of times to repeat the analysis.  The repetition with
        the lowest reconstrucion error is returned.  Defaults to 1.
    minsegments : int
        Minimum number of segments in the output.  The analysis is
        repeated until the output contains at least `minsegments`
        segments is or `maxretries` is reached.  Defaults to 3.
    maxlowen : int
        Maximum number of low energy frames in the SIPLCA
        reconstruction.  The analysis is repeated if it contains too
        many gaps.  Defaults to 10.
    maxretries : int
        Maximum number of retries to perform if `minsegments` or
       `maxlowen` are not satisfied.  Defaults to 5.
    uninformativeWinit : boolean
        If True, `W` is initialized to have a flat distribution.
        Defaults to False.
    uninformativeHinit : boolean
        If True, `H` is initialized to have a flat distribution.
        Defaults to True.
    normalize_frames : boolean
        If True, normalizes each frame of `seq` so that the maximum
        value is 1.  Defaults to True.
    viterbi_segmenter : boolean
        If True uses uses the Viterbi algorithm to convert SIPLCA
        decomposition into segmentation, otherwises uses the process
        described in [1].  Defaults to False.
    align_downbeats : boolean
        If True, postprocess the SIPLCA analysis to find the optimal
        alignments of the components of W with V.  I.e. try to align
        the first column of W to the downbeats in the song.  Defaults
        to False.
    kwargs : dict
        Keyword arguments passed to plca.SIPLCA.analyze.  See
        plca.SIPLCA for more details.

    Returns
    -------
    labels : array, length `T`
        Segment label for each frame of `seq`.
    W : array, shape (`F`, `rank`, `win`)
        Set of `F` x `win` shift-invariant basis functions found in `seq`.
    Z : array, length `rank`
        Set of mixing weights for each basis.
    H : array, shape (`rank`, `T`)
        Activations of each basis in time.
    segfun : array, shape (`rank`, `T`)
        Raw segmentation function used to generate segment labels from
        SI-PLCA decomposition.  Corresponds to $\ell_k(t)$ in [1].
    norm : float
        Normalization constant to make `seq` sum to 1.

    Notes
    -----
    The experimental results reported in [1] were found using the
    default values for all keyword arguments while varying kwargs.

    """
    seq = seq.copy()
    if normalize_frames:
        seq /= seq.max(0) + np.finfo(float).eps

    logger.info('Using random seed %s.', seed)
    np.random.seed(seed)
    
    if 'alphaWcutoff' in kwargs and 'alphaWslope' in kwargs:
        kwargs['alphaW'] = create_sparse_W_prior((seq.shape[0], win),
                                                 kwargs['alphaWcutoff'],
                                                 kwargs['alphaWslope'])
        del kwargs['alphaWcutoff']
        del kwargs['alphaWslope']

    F, T = seq.shape
    if uninformativeWinit:
        kwargs['initW'] = np.ones((F, rank, win)) / (F*win)
    if uninformativeHinit:
        kwargs['initH'] = np.ones((rank, T)) / T
        
    outputs = []
    for n in xrange(nrep):
        outputs.append(plca.SIPLCA.analyze(seq, rank=rank, win=win, **kwargs))
    div = [x[-1] for x in outputs]
    W, Z, H, norm, recon, div = outputs[np.argmin(div)]

    # Need to rerun segmentation if there are too few segments or
    # if there are too many gaps in recon (i.e. H)
    lowen = seq.shape[0] * np.finfo(float).eps
    nlowen_seq = np.sum(seq.sum(0) <= lowen)
    if nlowen_seq > maxlowen:
        maxlowen = nlowen_seq
    nlowen_recon = np.sum(recon.sum(0) <= lowen)
    nretries = maxretries
    while (len(Z) < minsegments or nlowen_recon > maxlowen) and nretries > 0:
        nretries -= 1
        logger.info('Redoing SIPLCA analysis (len(Z) = %d, number of '
                    'low energy frames = %d).', len(Z), nlowen_recon)
        outputs = []
        for n in xrange(nrep):
            outputs.append(plca.SIPLCA.analyze(seq, rank=rank, win=win,
                                               **kwargs))
        div = [x[-1] for x in outputs]
        W, Z, H, norm, recon, div = outputs[np.argmin(div)]
        nlowen_recon = np.sum(recon.sum(0) <= lowen)

    if align_downbeats:
        alignedW = plca.normalize(find_downbeat(seq, W)
                                  + 0.1 * np.finfo(float).eps, 1)
        rank = len(Z)
        if uninformativeHinit:
            kwargs['initH'] = np.ones((rank, T)) / T
        if 'alphaZ' in kwargs:
            kwargs['alphaZ'] = 0
        W, Z, H, norm, recon, div = plca.SIPLCA.analyze(
            seq, rank=rank, win=win, initW=alignedW, **kwargs)

    if viterbi_segmenter:
        segmentation_function = nmf_analysis_to_segmentation_using_viterbi_path
    else:
        segmentation_function = nmf_analysis_to_segmentation
    labels, segfun = segmentation_function(seq, win, W, Z, H, **kwargs)

    return labels, W, Z, H, segfun, norm
Пример #3
0
 def initialize(self):
     W, Z, H = SIPLCA2.initialize(self)
     Wf = normalize(np.random.rand(self.F, self.rank), 0)
     Wt = normalize(np.random.rand(self.rank, self.winT), 1)
     return Wf, Wt, Z, H
Пример #4
0
def segment_song(seq,
                 rank=4,
                 win=32,
                 seed=None,
                 nrep=1,
                 minsegments=3,
                 maxlowen=10,
                 maxretries=5,
                 uninformativeWinit=False,
                 uninformativeHinit=True,
                 normalize_frames=True,
                 viterbi_segmenter=False,
                 align_downbeats=False,
                 **kwargs):
    """Segment the given feature sequence using SI-PLCA

    Parameters
    ----------
    seq : array, shape (F, T)
        Feature sequence to segment.
    rank : int
        Number of patterns (unique segments) to search for.        
    win : int
        Length of patterns in frames.
    seed : int
        Random number generator seed.  Defaults to None.
    nrep : int
        Number of times to repeat the analysis.  The repetition with
        the lowest reconstrucion error is returned.  Defaults to 1.
    minsegments : int
        Minimum number of segments in the output.  The analysis is
        repeated until the output contains at least `minsegments`
        segments is or `maxretries` is reached.  Defaults to 3.
    maxlowen : int
        Maximum number of low energy frames in the SIPLCA
        reconstruction.  The analysis is repeated if it contains too
        many gaps.  Defaults to 10.
    maxretries : int
        Maximum number of retries to perform if `minsegments` or
       `maxlowen` are not satisfied.  Defaults to 5.
    uninformativeWinit : boolean
        If True, `W` is initialized to have a flat distribution.
        Defaults to False.
    uninformativeHinit : boolean
        If True, `H` is initialized to have a flat distribution.
        Defaults to True.
    normalize_frames : boolean
        If True, normalizes each frame of `seq` so that the maximum
        value is 1.  Defaults to True.
    viterbi_segmenter : boolean
        If True uses uses the Viterbi algorithm to convert SIPLCA
        decomposition into segmentation, otherwises uses the process
        described in [1].  Defaults to False.
    align_downbeats : boolean
        If True, postprocess the SIPLCA analysis to find the optimal
        alignments of the components of W with V.  I.e. try to align
        the first column of W to the downbeats in the song.  Defaults
        to False.
    kwargs : dict
        Keyword arguments passed to plca.SIPLCA.analyze.  See
        plca.SIPLCA for more details.

    Returns
    -------
    labels : array, length `T`
        Segment label for each frame of `seq`.
    W : array, shape (`F`, `rank`, `win`)
        Set of `F` x `win` shift-invariant basis functions found in `seq`.
    Z : array, length `rank`
        Set of mixing weights for each basis.
    H : array, shape (`rank`, `T`)
        Activations of each basis in time.
    segfun : array, shape (`rank`, `T`)
        Raw segmentation function used to generate segment labels from
        SI-PLCA decomposition.  Corresponds to $\ell_k(t)$ in [1].
    norm : float
        Normalization constant to make `seq` sum to 1.

    Notes
    -----
    The experimental results reported in [1] were found using the
    default values for all keyword arguments while varying kwargs.

    """
    seq = seq.copy()
    if normalize_frames:
        seq /= seq.max(0) + np.finfo(float).eps

    logger.info('Using random seed %s.', seed)
    np.random.seed(seed)

    if 'alphaWcutoff' in kwargs and 'alphaWslope' in kwargs:
        kwargs['alphaW'] = create_sparse_W_prior(
            (seq.shape[0], win), kwargs['alphaWcutoff'], kwargs['alphaWslope'])
        del kwargs['alphaWcutoff']
        del kwargs['alphaWslope']

    F, T = seq.shape
    if uninformativeWinit:
        kwargs['initW'] = np.ones((F, rank, win)) / (F * win)
    if uninformativeHinit:
        kwargs['initH'] = np.ones((rank, T)) / T

    outputs = []
    for n in xrange(nrep):
        outputs.append(plca.SIPLCA.analyze(seq, rank=rank, win=win, **kwargs))
    div = [x[-1] for x in outputs]
    W, Z, H, norm, recon, div = outputs[np.argmin(div)]

    # Need to rerun segmentation if there are too few segments or
    # if there are too many gaps in recon (i.e. H)
    lowen = seq.shape[0] * np.finfo(float).eps
    nlowen_seq = np.sum(seq.sum(0) <= lowen)
    if nlowen_seq > maxlowen:
        maxlowen = nlowen_seq
    nlowen_recon = np.sum(recon.sum(0) <= lowen)
    nretries = maxretries
    while (len(Z) < minsegments or nlowen_recon > maxlowen) and nretries > 0:
        nretries -= 1
        logger.info(
            'Redoing SIPLCA analysis (len(Z) = %d, number of '
            'low energy frames = %d).', len(Z), nlowen_recon)
        outputs = []
        for n in xrange(nrep):
            outputs.append(
                plca.SIPLCA.analyze(seq, rank=rank, win=win, **kwargs))
        div = [x[-1] for x in outputs]
        W, Z, H, norm, recon, div = outputs[np.argmin(div)]
        nlowen_recon = np.sum(recon.sum(0) <= lowen)

    if align_downbeats:
        alignedW = plca.normalize(
            find_downbeat(seq, W) + 0.1 * np.finfo(float).eps, 1)
        rank = len(Z)
        if uninformativeHinit:
            kwargs['initH'] = np.ones((rank, T)) / T
        if 'alphaZ' in kwargs:
            kwargs['alphaZ'] = 0
        W, Z, H, norm, recon, div = plca.SIPLCA.analyze(seq,
                                                        rank=rank,
                                                        win=win,
                                                        initW=alignedW,
                                                        **kwargs)

    if viterbi_segmenter:
        segmentation_function = nmf_analysis_to_segmentation_using_viterbi_path
    else:
        segmentation_function = nmf_analysis_to_segmentation
    labels, segfun = segmentation_function(seq, win, W, Z, H, **kwargs)

    return labels, W, Z, H, segfun, norm