Exemplo n.º 1
0
    def processFlat(self):
        """Main process.
        Returns
        -------
        est_idxs : np.array(N)
            Estimated indeces the segment boundaries in frame indeces.
        est_labels : np.array(N-1)
            Estimated labels for the segments.
        """
        # Preprocess to obtain features (array(n_frames, n_features))

        F = self._preprocess()
        F = librosa.util.normalize(F, axis=0)
        F = librosa.feature.stack_memory(F.T).T
        # F = pre.normalize(F, axis=0)
        ideal_t = vmo.find_threshold(F, dim=F.shape[1])
        oracle = vmo.build_oracle(F,
                                  flag='a',
                                  threshold=ideal_t[0][1],
                                  dim=F.shape[1])

        my_bounds, my_labels = segmentation(
            oracle,
            method=self.config['method'],
            connectivity=self.config['connectivity'])
        # Post process estimations
        est_idxs, est_labels = self._postprocess(my_bounds, my_labels[:-1])

        assert est_idxs[0] == 0 and est_idxs[-1] == F.shape[0] - 1
        # We're done!
        return est_idxs, est_labels
Exemplo n.º 2
0
def vmo_routine(feature):
    ideal_t = vmo.find_threshold(feature, dim=feature.shape[1])
    oracle = vmo.build_oracle(feature,
                              flag='a',
                              threshold=ideal_t[0][1],
                              dim=feature.shape[1])

    return oracle
Exemplo n.º 3
0
def _self_sim_vmo(feature, d, m='lrs'):
    if d == 'cosine' or 'correlation':
        r = (0., 1., 0.02)
    elif d == 'euclidean':
        r = (0., 30., .5)
    elif d == 'sqeuclidean':
        r = (0., 800., 16.)

    ideal_t = vmo.find_threshold(feature, r=r, flag='a', dfunc=d, dim=feature.shape[1])

    oracle = vmo.build_oracle(feature,
                              flag='a',
                              threshold=ideal_t[0][1],
                              dfunc=d, dim=feature.shape[1])
    return vse.create_selfsim(oracle, method=m)
Exemplo n.º 4
0
    def generate(self, audio_path, sr=44100, hop_length=512):
        """Segmentation of an audio recording using the Constant Q Transform
        (CQT)

        Args:
            audio_path (str): A string representing the path of the audio file
            sr (int): Sampling rate (default: 44100)
            hop_length (int): Number of samples between successive CQT columns
            (default: 512)
        """

        # Extract feature
        y, sr = librosa.load(audio_path, sr=sr)
        cqt = librosa.core.cqt(y, sr=sr, hop_length=hop_length)

        beat = range(1, len(y), 30)  # create synthetic beat of 33 frames
        cqt_sync = librosa.feature.sync(cqt, beat, aggregate=np.median)
        cqt_stack = librosa.feature.stack_memory(cqt_sync, n_steps=3)

        # Find ideal threshold
        r = (0, 1.1, 0.1)
        threshold = vmo.find_threshold(cqt_stack.T,
                                       r=r,
                                       dim=cqt_stack.shape[0])
        ideal_t = threshold[0][1]

        # Build Oracle
        cqt_vmo = vmo.build_oracle(cqt_stack.T,
                                   flag='a',
                                   threshold=ideal_t,
                                   dim=cqt_stack.shape[0])

        # Symbol spectral
        method = 'symbol_spectral'
        est_boundaries, est_labels = van.segmentation(cqt_vmo,
                                                      method=method,
                                                      connectivity='lrs')
        est_intervals = zip(est_boundaries[:-1], est_boundaries[1:])
        ulabel, invind = np.unique(est_labels, return_inverse=True)

        # Setting attributes
        self._oracle = cqt_vmo
        self._segmentation = [
            Section(l, Region(i1, i2))
            for l, (i1, i2) in zip(invind, est_intervals)
        ]
Exemplo n.º 5
0
Arquivo: main.py Projeto: wangsix/msaf
def vmo_routine(feature):
    ideal_t = vmo.find_threshold(feature, dim=feature.shape[1])
    oracle = vmo.build_oracle(feature, flag='a', threshold=ideal_t[0][1], dim=feature.shape[1])

    return oracle
Exemplo n.º 6
0
    for i, p in enumerate(audio_test['pattern']):
        for _p in p:
            start = _p[0] - audio_test['info'][2][0]
            end = _p[1] - audio_test['info'][2][0]
            len_list.append(end - start)
            ground[i][start:end + 1] = i + 1
    min_len = int(
        min(len_list) * len(subbeat_mat[ind]) /
        (audio_test['info'][2][1] - audio_test['info'][2][0])) + 1
    min_len_list.append(min_len)
print min_len_list

start_time = time.time()
for ind in range(5):
    chroma_frames = feature_mat[ind].transpose()
    r = (0.0, 1.0, 0.01)
    ideal_v_inv = vmo.find_threshold(chroma_frames,
                                     r=r,
                                     flag='a',
                                     dfunc='other',
                                     dfunc_handle=trnspose_inv,
                                     VERBOSE=False)
    oracle_inv = vmo.build_oracle(chroma_frames,
                                  flag='a',
                                  threshold=ideal_v_inv[0][1],
                                  feature='chroma',
                                  dfunc='other',
                                  dfunc_handle=trnspose_inv)
    pattern = van.find_repeated_patterns(oracle_inv, lower=5)
print str(time.time() - start_time)
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr, hop_length=hop_size)

    ### Sub-Beat-Synchronous Chromagram
    subbeats = []
    for bs, be in zip(beats[:-1], beats[1:]):
        subbeats.extend(np.linspace(bs, be, num=2, endpoint=False).astype("int").tolist())
    subbeats.extend(np.linspace(beats[-1], C.shape[1], num=2, endpoint=False).astype("int").tolist())
    C_sync = librosa.feature.sync(C, subbeats, aggregate=np.median)
    subbeats.append(C.shape[1])
    feature = np.log(C_sync + np.finfo(float).eps)
    feature = pre.normalize(feature, axis=0)

    ### Create VMO
    chroma_frames = feature.transpose()
    ideal_v_inv = vmo.find_threshold(
        chroma_frames, r=r, flag="a", dfunc="other", dfunc_handle=trnspose_inv, dim=chroma_frames.shape[1]
    )
    oracle_inv = vmo.build_oracle(
        chroma_frames,
        flag="a",
        threshold=ideal_v_inv[0][1],
        feature="chroma",
        dfunc="other",
        dfunc_handle=trnspose_inv,
        dim=chroma_frames.shape[1],
    )

    ### Gather Ground Truth from Dataset

    ground = np.zeros((len(audio_test["pattern"]), audio_test["info"][2][1] - audio_test["info"][2][0]))
    len_list = []
Exemplo n.º 8
0
    C = librosa.feature.chromagram(y=y, sr=sr, n_fft=fft_size, hop_length=hop_size)
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr, hop_length=hop_size)
    
    ### Sub-Beat-Synchronous Chromagram
    subbeats = []
    for bs, be in zip(beats[:-1],beats[1:]):
        subbeats.extend(np.linspace(bs, be, num=2, endpoint = False).astype('int').tolist())
    subbeats.extend(np.linspace(beats[-1], C.shape[1], num=2, endpoint=False).astype('int').tolist())
    C_sync = librosa.feature.sync(C, subbeats, aggregate=np.median)
    subbeats.append(C.shape[1])
    feature = np.log(C_sync+np.finfo(float).eps)
    feature = pre.normalize(feature, axis=0)
    
    ### Create VMO     
    chroma_frames = feature.transpose()
    ideal_v_inv = vmo.find_threshold(chroma_frames, r=r, flag='a',
                                     dfunc='other', dfunc_handle=trnspose_inv, dim=chroma_frames.shape[1])
    oracle_inv= vmo.build_oracle(chroma_frames, flag='a',
                                threshold=ideal_v_inv[0][1],
                                feature='chroma', dfunc='other', dfunc_handle=trnspose_inv,
                                dim=chroma_frames.shape[1])

    ### Gather Ground Truth from Dataset
        
    ground = np.zeros((len(audio_test['pattern']), audio_test['info'][2][1]-audio_test['info'][2][0]))
    len_list = []
    for i,p in enumerate(audio_test['pattern']):
        for _p in p:
            start = _p[0] - audio_test['info'][2][0]
            end = _p[1] - audio_test['info'][2][0]
            len_list.append(end-start)
            ground[i][start:end+1] = i+1
Exemplo n.º 9
0
plt.imshow(chroma, interpolation='nearest', aspect='auto')
plt.title('Chroma', fontsize=18)
plt.xlabel('Analysis Frame', fontsize=14)
plt.ylabel('Chroma', fontsize=14)
plt.tight_layout()

# <markdowncell>

# ### VMO - Variable Markov Oracle

# <codecell>

r = (0., 0.8, 0.02)
ideal_t = vmo.find_threshold(features,
                             r=r,
                             flag='a',
                             dfunc='euclidean',
                             dim=features.shape[1])

x_a = [i[1] for i in ideal_t[1]]
y_a = [i[0] for i in ideal_t[1]]
plt.figure()
plt.plot(x_a, y_a, linewidth=2)
plt.title('IR vs. Threshold Value', fontsize=18)
plt.grid(b='on')
plt.xlabel('Threshold', fontsize=14)
plt.ylabel('IR', fontsize=14)

# <markdowncell>

# Build the best oracle by choosing the ideal threshold (one that gives most informative oracle).
    
min_len_list = []
for ind in range(5):
    audio_test = audio_list[ind]    
    ground = np.zeros((len(audio_test['pattern']), audio_test['info'][2][1]-audio_test['info'][2][0]))
    len_list = []
    for i,p in enumerate(audio_test['pattern']):
        for _p in p:
            start = _p[0] - audio_test['info'][2][0]
            end = _p[1] - audio_test['info'][2][0]
            len_list.append(end-start)
            ground[i][start:end+1] = i+1
    min_len = int(min(len_list)*len(subbeat_mat[ind])/(audio_test['info'][2][1]-audio_test['info'][2][0]))+1
    min_len_list.append(min_len)
print min_len_list

start_time = time.time()
for ind in range(5):
    chroma_frames = feature_mat[ind].transpose()
    r = (0.0, 1.0, 0.01) 
    ideal_v_inv = vmo.find_threshold(chroma_frames, r = r,flag = 'a', dfunc = 'other', dfunc_handle = trnspose_inv, VERBOSE = False)
    oracle_inv= vmo.build_oracle(chroma_frames, flag = 'a', 
                                threshold = ideal_v_inv[0][1], 
                                feature = 'chroma', dfunc = 'other', dfunc_handle = trnspose_inv)
    pattern = van.find_repeated_patterns(oracle_inv, lower = 5)
print str(time.time()-start_time)