def processFlat(self): """Main process. Returns ------- est_idxs : np.array(N) Estimated indeces the segment boundaries in frame indeces. est_labels : np.array(N-1) Estimated labels for the segments. """ # Preprocess to obtain features (array(n_frames, n_features)) F = self._preprocess() F = librosa.util.normalize(F, axis=0) F = librosa.feature.stack_memory(F.T).T # F = pre.normalize(F, axis=0) ideal_t = vmo.find_threshold(F, dim=F.shape[1]) oracle = vmo.build_oracle(F, flag='a', threshold=ideal_t[0][1], dim=F.shape[1]) my_bounds, my_labels = segmentation( oracle, method=self.config['method'], connectivity=self.config['connectivity']) # Post process estimations est_idxs, est_labels = self._postprocess(my_bounds, my_labels[:-1]) assert est_idxs[0] == 0 and est_idxs[-1] == F.shape[0] - 1 # We're done! return est_idxs, est_labels
def vmo_routine(feature): ideal_t = vmo.find_threshold(feature, dim=feature.shape[1]) oracle = vmo.build_oracle(feature, flag='a', threshold=ideal_t[0][1], dim=feature.shape[1]) return oracle
def _self_sim_vmo(feature, d, m='lrs'): if d == 'cosine' or 'correlation': r = (0., 1., 0.02) elif d == 'euclidean': r = (0., 30., .5) elif d == 'sqeuclidean': r = (0., 800., 16.) ideal_t = vmo.find_threshold(feature, r=r, flag='a', dfunc=d, dim=feature.shape[1]) oracle = vmo.build_oracle(feature, flag='a', threshold=ideal_t[0][1], dfunc=d, dim=feature.shape[1]) return vse.create_selfsim(oracle, method=m)
def generate(self, audio_path, sr=44100, hop_length=512): """Segmentation of an audio recording using the Constant Q Transform (CQT) Args: audio_path (str): A string representing the path of the audio file sr (int): Sampling rate (default: 44100) hop_length (int): Number of samples between successive CQT columns (default: 512) """ # Extract feature y, sr = librosa.load(audio_path, sr=sr) cqt = librosa.core.cqt(y, sr=sr, hop_length=hop_length) beat = range(1, len(y), 30) # create synthetic beat of 33 frames cqt_sync = librosa.feature.sync(cqt, beat, aggregate=np.median) cqt_stack = librosa.feature.stack_memory(cqt_sync, n_steps=3) # Find ideal threshold r = (0, 1.1, 0.1) threshold = vmo.find_threshold(cqt_stack.T, r=r, dim=cqt_stack.shape[0]) ideal_t = threshold[0][1] # Build Oracle cqt_vmo = vmo.build_oracle(cqt_stack.T, flag='a', threshold=ideal_t, dim=cqt_stack.shape[0]) # Symbol spectral method = 'symbol_spectral' est_boundaries, est_labels = van.segmentation(cqt_vmo, method=method, connectivity='lrs') est_intervals = zip(est_boundaries[:-1], est_boundaries[1:]) ulabel, invind = np.unique(est_labels, return_inverse=True) # Setting attributes self._oracle = cqt_vmo self._segmentation = [ Section(l, Region(i1, i2)) for l, (i1, i2) in zip(invind, est_intervals) ]
for i, p in enumerate(audio_test['pattern']): for _p in p: start = _p[0] - audio_test['info'][2][0] end = _p[1] - audio_test['info'][2][0] len_list.append(end - start) ground[i][start:end + 1] = i + 1 min_len = int( min(len_list) * len(subbeat_mat[ind]) / (audio_test['info'][2][1] - audio_test['info'][2][0])) + 1 min_len_list.append(min_len) print min_len_list start_time = time.time() for ind in range(5): chroma_frames = feature_mat[ind].transpose() r = (0.0, 1.0, 0.01) ideal_v_inv = vmo.find_threshold(chroma_frames, r=r, flag='a', dfunc='other', dfunc_handle=trnspose_inv, VERBOSE=False) oracle_inv = vmo.build_oracle(chroma_frames, flag='a', threshold=ideal_v_inv[0][1], feature='chroma', dfunc='other', dfunc_handle=trnspose_inv) pattern = van.find_repeated_patterns(oracle_inv, lower=5) print str(time.time() - start_time)
tempo, beats = librosa.beat.beat_track(y=y, sr=sr, hop_length=hop_size) ### Sub-Beat-Synchronous Chromagram subbeats = [] for bs, be in zip(beats[:-1], beats[1:]): subbeats.extend(np.linspace(bs, be, num=2, endpoint=False).astype("int").tolist()) subbeats.extend(np.linspace(beats[-1], C.shape[1], num=2, endpoint=False).astype("int").tolist()) C_sync = librosa.feature.sync(C, subbeats, aggregate=np.median) subbeats.append(C.shape[1]) feature = np.log(C_sync + np.finfo(float).eps) feature = pre.normalize(feature, axis=0) ### Create VMO chroma_frames = feature.transpose() ideal_v_inv = vmo.find_threshold( chroma_frames, r=r, flag="a", dfunc="other", dfunc_handle=trnspose_inv, dim=chroma_frames.shape[1] ) oracle_inv = vmo.build_oracle( chroma_frames, flag="a", threshold=ideal_v_inv[0][1], feature="chroma", dfunc="other", dfunc_handle=trnspose_inv, dim=chroma_frames.shape[1], ) ### Gather Ground Truth from Dataset ground = np.zeros((len(audio_test["pattern"]), audio_test["info"][2][1] - audio_test["info"][2][0])) len_list = []
C = librosa.feature.chromagram(y=y, sr=sr, n_fft=fft_size, hop_length=hop_size) tempo, beats = librosa.beat.beat_track(y=y, sr=sr, hop_length=hop_size) ### Sub-Beat-Synchronous Chromagram subbeats = [] for bs, be in zip(beats[:-1],beats[1:]): subbeats.extend(np.linspace(bs, be, num=2, endpoint = False).astype('int').tolist()) subbeats.extend(np.linspace(beats[-1], C.shape[1], num=2, endpoint=False).astype('int').tolist()) C_sync = librosa.feature.sync(C, subbeats, aggregate=np.median) subbeats.append(C.shape[1]) feature = np.log(C_sync+np.finfo(float).eps) feature = pre.normalize(feature, axis=0) ### Create VMO chroma_frames = feature.transpose() ideal_v_inv = vmo.find_threshold(chroma_frames, r=r, flag='a', dfunc='other', dfunc_handle=trnspose_inv, dim=chroma_frames.shape[1]) oracle_inv= vmo.build_oracle(chroma_frames, flag='a', threshold=ideal_v_inv[0][1], feature='chroma', dfunc='other', dfunc_handle=trnspose_inv, dim=chroma_frames.shape[1]) ### Gather Ground Truth from Dataset ground = np.zeros((len(audio_test['pattern']), audio_test['info'][2][1]-audio_test['info'][2][0])) len_list = [] for i,p in enumerate(audio_test['pattern']): for _p in p: start = _p[0] - audio_test['info'][2][0] end = _p[1] - audio_test['info'][2][0] len_list.append(end-start) ground[i][start:end+1] = i+1
plt.imshow(chroma, interpolation='nearest', aspect='auto') plt.title('Chroma', fontsize=18) plt.xlabel('Analysis Frame', fontsize=14) plt.ylabel('Chroma', fontsize=14) plt.tight_layout() # <markdowncell> # ### VMO - Variable Markov Oracle # <codecell> r = (0., 0.8, 0.02) ideal_t = vmo.find_threshold(features, r=r, flag='a', dfunc='euclidean', dim=features.shape[1]) x_a = [i[1] for i in ideal_t[1]] y_a = [i[0] for i in ideal_t[1]] plt.figure() plt.plot(x_a, y_a, linewidth=2) plt.title('IR vs. Threshold Value', fontsize=18) plt.grid(b='on') plt.xlabel('Threshold', fontsize=14) plt.ylabel('IR', fontsize=14) # <markdowncell> # Build the best oracle by choosing the ideal threshold (one that gives most informative oracle).
min_len_list = [] for ind in range(5): audio_test = audio_list[ind] ground = np.zeros((len(audio_test['pattern']), audio_test['info'][2][1]-audio_test['info'][2][0])) len_list = [] for i,p in enumerate(audio_test['pattern']): for _p in p: start = _p[0] - audio_test['info'][2][0] end = _p[1] - audio_test['info'][2][0] len_list.append(end-start) ground[i][start:end+1] = i+1 min_len = int(min(len_list)*len(subbeat_mat[ind])/(audio_test['info'][2][1]-audio_test['info'][2][0]))+1 min_len_list.append(min_len) print min_len_list start_time = time.time() for ind in range(5): chroma_frames = feature_mat[ind].transpose() r = (0.0, 1.0, 0.01) ideal_v_inv = vmo.find_threshold(chroma_frames, r = r,flag = 'a', dfunc = 'other', dfunc_handle = trnspose_inv, VERBOSE = False) oracle_inv= vmo.build_oracle(chroma_frames, flag = 'a', threshold = ideal_v_inv[0][1], feature = 'chroma', dfunc = 'other', dfunc_handle = trnspose_inv) pattern = van.find_repeated_patterns(oracle_inv, lower = 5) print str(time.time()-start_time)