def show_spectrogram(y, sr, n_fft, nmels, hopl, AW=False): S = lbr.feature.melspectrogram(y, sr=sr, n_fft=2048, hop_length=hopl, n_mels=nmels) log_S = lbr.logamplitude(S, ref_power=np.max) if AW: # get frequencies for bins mel_freqs = lbr.mel_frequencies(n_mels=nmels, fmin=0, fmax=sr/2) itu_r_468 = itu_r_468_amplitude_weight_dB() # compute a_weighting coefficient for every bin log_aw = np.array(itu_r_468(mel_freqs)) log_S = log_S + log_aw[:, np.newaxis] lbr.display.specshow(log_S, sr=sr, hop_length=64, x_axis='time', y_axis='mel') plt.title('mel power spectrogram') plt.tight_layout() plt.show() return log_S
def F_Mel(fre_f, audio_conf): ''' Input: fre_f : FloatTensor log spectrum audio_conf : 主要需要用到采样率 Output: mel_f : FloatTensor 换成mel频谱 ''' n_mels = fre_f.size(1) mel_bin = librosa.mel_frequencies( n_mels=n_mels, fmin=0, fmax=audio_conf["sample_rate"] / 2) * audio_conf["window_size"] count = 0 fre_f = fre_f.numpy().tolist() mel_f = [] for frame in fre_f: mel_f_frame = [] for i in range(n_mels): left = int(math.floor(mel_bin[i])) right = left + 1 tmp = (frame[right] - frame[left]) * (mel_bin[i] - left) + frame[left] #线性插值 mel_f_frame.append(tmp) mel_f.append(mel_f_frame) return torch.FloatTensor(mel_f)
def centroid(spectrum, config=dict()): ''' Computes spectral centroid feature. Parameters ---------- spectrum : np.ndarray [shape=(n_bins, n_frames)] Spectrum from which the feature is computed. config : dict Configuration dictionary. For full list of parameters with their description, see README file. This function use no parameters. Returns ------- feature : np.ndarray [shape=(n_frames,)] Computed spectral centroid feature. ''' freq = None spectrum_type = get(config, 'spectrum.type') if spectrum_type == 'cqt': freq = librosa.cqt_frequencies(get(config, 'spectrum.n_bins'), fmin=librosa.note_to_hz('C1')) elif spectrum_type == 'mel': freq = librosa.mel_frequencies(n_mels=get(config, 'spectrum.n_bins'), htk=True) return librosa.feature.spectral_centroid(S=spectrum, freq=freq)[0]
def apply_mask_to_audio(mask, y, sr): y = np.copy(y) if len(mask.shape) == 1: num_features = len(mask) freqs = librosa.mel_frequencies(num_features + 1, LOWER_FREQUENCY_LIMIT, UPPER_FREQUENCY_LIMIT) bandstop = obspy.signal.filter.bandstop # rng = list(range(num_features)) # np.random.shuffle(rng) for i in range(num_features): lower_freq = freqs[i] upper_freq = freqs[i + 1] mask_value = mask[i] filtered = bandstop(y, lower_freq, upper_freq, sr) y = mask_value * y + (1 - mask_value) * filtered elif len(mask.shape) == 2: num_windows = len(mask) num_samples = len(y) step = int(np.round(num_samples / len(mask))) for window_id, sample_id in zip(range(num_windows), range(0, num_samples, step)): start, end = sample_id, sample_id + step y[start:end] = apply_mask_to_audio(mask[window_id], y[start:end], sr) return y
def _linear_to_mel(num_freq, num_mel, sample_rate): mel_f = librosa.mel_frequencies(num_mel + 2) enorm = 2.0 / (mel_f[2:num_mel + 2] - mel_f[:num_mel]) return tf.signal.linear_to_mel_weight_matrix( num_mel_bins=num_mel, num_spectrogram_bins=num_freq, sample_rate=sample_rate, lower_edge_hertz=0.0, upper_edge_hertz=sample_rate / 2) * enorm
def mel_weight(S, power): global _mel_freqs if _mel_freqs is None: _mel_freqs = librosa.mel_frequencies(S.shape[0], fmin=hparams.fmin) S = librosa.perceptual_weighting(np.abs(S)**power, _mel_freqs, ref=hparams.ref_level_db) S = _normalize(S - hparams.ref_level_db) return S
def get_mel_index(pitch, hparams): """Get row closest to this pitch in a mel spectrogram""" frequencies = librosa.mel_frequencies( constants.TIMBRE_SPEC_BANDS, fmin=librosa.midi_to_hz(constants.MIN_TIMBRE_PITCH), fmax=librosa.midi_to_hz(constants.MAX_TIMBRE_PITCH), htk=hparams.spec_mel_htk) return np.abs(frequencies - librosa.midi_to_hz(pitch.numpy())).argmin()
def mel(sr, n_fft, n_mels=128, fmin=0.0, fmax=None, htk=False): """Create a Filterbank matrix to combine FFT bins into Mel-frequency bins :usage: >>> mel_fb = librosa.filters.mel(22050, 2048) >>> # Or clip the maximum frequency to 8KHz >>> mel_fb = librosa.filters.mel(22050, 2048, fmax=8000) :parameters: - sr : int sampling rate of the incoming signal - n_fft : int number of FFT components - n_mels : int number of Mel bands - fmin : float lowest frequency (in Hz) - fmax : float highest frequency (in Hz) - htk : bool use HTK formula instead of Slaney :returns: - M : np.ndarray, shape=(n_mels, 1+ n_fft/2) Mel transform matrix """ if fmax is None: fmax = sr / 2.0 # Initialize the weights size = int(1 + n_fft / 2) n_mels = int(n_mels) weights = np.zeros( (n_mels, size) ) # Center freqs of each FFT bin fftfreqs = np.arange( size, dtype=float ) * sr / n_fft # 'Center freqs' of mel bands - uniformly spaced between limits freqs = librosa.mel_frequencies(n_mels, fmin=fmin, fmax=fmax, htk=htk, extra=True) # Slaney-style mel is scaled to be approx constant energy per channel enorm = 2.0 / (freqs[2:n_mels+2] - freqs[:n_mels]) for i in xrange(n_mels): # lower and upper slopes for all bins lower = (fftfreqs - freqs[i]) / (freqs[i+1] - freqs[i]) upper = (freqs[i+2] - fftfreqs) / (freqs[i+2] - freqs[i+1]) # .. then intersect them with each other and zero weights[i] = np.maximum(0, np.minimum(lower, upper)) * enorm[i] return weights
def melfilter(frames, sr, n_fft, n_mels=128, fmin=0.0, fmax=None, htk=True, norm=None): np = numpy if fmax is None: fmax = float(sr) / 2 if norm is not None and norm != 1 and norm != np.inf: raise ParameterError('Unsupported norm: {}'.format(repr(norm))) # Initialize the weights n_mels = int(n_mels) weights = np.zeros((n_mels, int(1 + n_fft // 2))) # Center freqs of each FFT bin fftfreqs = fft_freqs(sr=sr, n_fft=n_fft) fftfreqs2 = fft_freqs2(sr=sr, n_fft=n_fft) assert fftfreqs.shape == fftfreqs2.shape numpy.testing.assert_almost_equal(fftfreqs, fftfreqs2) # 'Center freqs' of mel bands - uniformly spaced between limits mel_f = librosa.mel_frequencies(n_mels + 2, fmin=fmin, fmax=fmax, htk=htk) fdiff = np.diff(mel_f) #ramps = np.subtract.outer(mel_f, fftfreqs) for i in range(n_mels): # lower and upper slopes for all bins rlow = mel_f[i] - fftfreqs rupper = mel_f[i + 2] - fftfreqs lower = -rlow / fdiff[i] upper = rupper / fdiff[i + 1] # .. then intersect them with each other and zero w = np.maximum(0, np.minimum(lower, upper)) if i == 4: print('wei', i, w[10:40]) weights[i] = w refweighs = librosa.filters.mel(sr, n_fft, n_mels, fmin, fmax, htk=htk, norm=norm) numpy.testing.assert_allclose(weights, refweighs) return numpy.dot(frames, weights.T)
def showspec(spec): r"""Display a spectrogram. Arguments: spec (2d numpy array): The spectrogram to display. """ plt.figure(figsize=(16, 4)) times = librosa.frames_to_time(np.arange(spec.shape[1]), constants.sr, constants.hl) freq = librosa.mel_frequencies(n_mels=constants.nb, fmin=constants.fm, htk=constants.htk) plt.pcolormesh(times, freq, spec)
def spectrogram(y, power, pcen=False): global _mel_freqs stftS = librosa.stft(y, n_fft=hparams.fft_size, hop_length=hparams.hop_size) if hparams.use_preemphasis: y = preemphasis(y) S = librosa.stft(y, n_fft=hparams.fft_size, hop_length=hparams.hop_size) if _mel_freqs is None: _mel_freqs = librosa.mel_frequencies(S.shape[0], fmin=hparams.fmin) _S = librosa.perceptual_weighting(np.abs(S)**power, _mel_freqs, ref=hparams.ref_level_db) return _normalize(_S - hparams.ref_level_db), stftS
def __init__(self, f_min, f_max, n_mels, sigmoid_range=(3, 12), pad_mels=20, band_attention_mode=False): self.sigmoid_range = sigmoid_range self.f_min = f_min self.f_max = f_max self.n_mels = n_mels self.pad_mels = pad_mels self.band_attention_mode = band_attention_mode self.mel_freqs = np.asarray( librosa.mel_frequencies(n_mels, f_min, f_max))
def fig4301(x, fig_w, fig_h, path_fig=None, verbose=False): fig = plt.figure(figsize=(fig_w, fig_h), tight_layout=True) ax = sns.heatmap( x.T[::-1], cbar=True, cbar_kws={"pad": .02}, linewidths=0.0, rasterized=True, cmap="magma", #"cubehelix" #"viridis" ) ax.collections[0].colorbar.ax.tick_params(length=0, pad=1) ax.tick_params( left=False, bottom=False, length=1, pad=1, width=1, ) xticks = ax.get_xticks() xticks = np.around(np.linspace(0, 500, 11), decimals=0).astype(int) xticklabels = np.around(np.linspace(0, 10, len(xticks)), decimals=0).astype(int) import librosa yticks = np.around(np.linspace(0, 64, 5), decimals=0).astype(int)[:-1] yticklabels = np.around( librosa.mel_frequencies(64)[np.linspace(0, 63, 5).astype(int)] / 1000, decimals=0).astype(int)[::-1][:-1] ax.set( title=None, xlabel="Time (s)", xticks=xticks, xticklabels=xticklabels, ylabel="Frequency (kHz)", yticks=yticks, yticklabels=yticklabels, ) ax.xaxis.set_tick_params(rotation='auto') ax.yaxis.set_tick_params(rotation='auto') plt.tight_layout() if path_fig: plt.savefig(path_fig) if verbose: plt.show(block=False) plt.close(fig)
def prepare_mel_matrix(hparams, rate, return_numpy=True, GPU_backend=False): """ Create mel filter """ # import tensorflow if needed if "tf" not in sys.modules: if not GPU_backend: os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = "" import tensorflow as tf # create a filter to convolve with the spectrogram mel_matrix = tf.signal.linear_to_mel_weight_matrix( num_mel_bins=hparams.num_mel_bins, num_spectrogram_bins=int(hparams.n_fft / 2) + 1, sample_rate=rate, lower_edge_hertz=hparams.mel_lower_edge_hertz, upper_edge_hertz=hparams.mel_upper_edge_hertz, dtype=tf.dtypes.float32, name=None, ) # gets the center frequencies of mel bands mel_f = mel_frequencies( n_mels=hparams.num_mel_bins + 2, fmin=hparams.mel_lower_edge_hertz, fmax=hparams.mel_upper_edge_hertz, ) # Slaney-style mel is scaled to be approx constant energy per channel (from librosa) enorm = tf.dtypes.cast( tf.expand_dims( tf.constant( 2.0 / (mel_f[2 : hparams.num_mel_bins + 2] - mel_f[: hparams.num_mel_bins]) ), 0, ), tf.float32, ) mel_matrix = tf.multiply(mel_matrix, enorm) mel_matrix = tf.divide(mel_matrix, tf.reduce_sum(mel_matrix, axis=0)) if return_numpy: return mel_matrix.numpy() else: return mel_matrix
def main(): src_dir = "../datasets/room2reverb/test_A/" data_dir = "../datasets/room2reverb/test_B/" if not os.path.isdir("output/images/"): os.makedirs("output/images/") set_style() f = sys.argv[1] example = os.path.join(data_dir, "%s_img.wav" % f) src = os.path.join(src_dir, "%s_label.jpg" % f) output = "output/images/%s_spec.png" % f src_output = "output/images/%s_input.jpg" % f y, sr = soundfile.read(example) shutil.copy2(src, src_output) y /= numpy.abs(y).max() t = numpy.where(numpy.abs(y) > 0.00001) y = y[t[0][0]:t[0][-1]] m = librosa.feature.melspectrogram(y) m = numpy.log(m + 1e-8)[1:, :] fig = pyplot.figure(figsize=(8, 4)) ax = fig.gca(projection="3d") f = librosa.mel_frequencies()[1:, None] t = (numpy.linspace(0, 1, m.shape[1]) * (len(y) / sr))[None, :] ax.plot_surface(t, f, m, cmap="coolwarm") ax.set_ylim(f[-1], f[0]) ax.set_zticks([]) ax.set_xlabel("Time (s)") ax.set_ylabel("Frequency (Hz)") bg = (0, 0, 0, 0) ax.w_zaxis.line.set_color(bg) ax.w_yaxis.line.set_color(bg) ax.w_zaxis.set_pane_color(bg) ax.w_yaxis.set_pane_color(bg) ax.w_xaxis.set_pane_color(bg) ax.grid(False) # pyplot.show() pyplot.savefig(output, bbox_inches="tight")
def F_Mel(fre_f, audio_conf): ''' Input: fre_f : FloatTensor log spectrum audio_conf : 主要需要用到采样率 Output: mel_f : FloatTensor 换成mel频谱 ''' n_mels = fre_f.size(1) mel_bin = librosa.mel_frequencies(n_mels=n_mels, fmin=0, fmax=audio_conf["sample_rate"]/2) * audio_conf["window_size"] count = 0 fre_f = fre_f.numpy().tolist() mel_f = [] for frame in fre_f: mel_f_frame = [] for i in range(n_mels): left = int(math.floor(mel_bin[i])) right = left + 1 tmp = (frame[right] - frame[left]) * (mel_bin[i] - left) + frame[left] #线性插值 mel_f_frame.append(tmp) mel_f.append(mel_f_frame) return torch.FloatTensor(mel_f)
import torch import librosa from model.metrics import Lwlrap mels = librosa.mel_frequencies(n_mels=256, fmin=50, fmax=15000) s1 = torch.tensor([[0.5, 0.3, 0.9, 0.1]]), torch.tensor([[0, 0, 1, 0]]) s2 = torch.tensor([[0.5, 0.7, 0.9, 0.1]]), torch.tensor([[0, 1, 0, 0]]) lwlrap = Lwlrap(None) lwlrap.update({'prediction': s1[0], 'target': s1[1]}) lwlrap.update({'prediction': s2[0], 'target': s2[1]}) print(lwlrap.compute())
def showdata(audiobeats, duration=None, offset=None, beatfinder=None, device=None, showpred=True): r"""Displays the data pointed by an AudioBeats object. It shows the onset envelope, the onsets, the onsets selected as beats, the ground truth beats, and the spectrogram. Arguments: audiobeats (AudioBeats): The `AudioBeats` object to display. duration (float): To see a smaller portion of the data, this can be set to a lower value than the duration of `audiobeats`. offset (float): Display only the data starting from `offset` (combine) with `duration` to see a smaller portion of the data). beatfinder (BeatFinder): A model, if available. device (torch.device): The device of the model. showpred (bool): To show the predicted beats. """ spec, onsets, isbeat, beats = audiobeats.get_data() if duration == None: duration = audiobeats.duration if offset == None: offset = 0 if showpred: pred_beats, _ = audiobeats.predicted_beats() onsets_times = librosa.frames_to_time(onsets, constants.sr, constants.hl) onsets_selected = onsets[isbeat == 1] onsets_selected_times = librosa.frames_to_time(onsets_selected, constants.sr, constants.hl) onset_envelope = utils.onset_strength(spec=spec) times = librosa.frames_to_time(np.arange(len(onset_envelope)), constants.sr, constants.hl) total_duration = librosa.frames_to_time(spec.shape[1], constants.sr, constants.hl) plt.figure(figsize=(16, 8)) plt.subplots_adjust(hspace=0) plt.subplot(4, 1, 1) if audiobeats.beats_file: plt.vlines(beats, 2, 3, color='g', label='Ground truth\nbeats') plt.ylim(0, 3) else: plt.ylim(0, 2) if showpred: plt.vlines(pred_beats, 1, 2, color='b', label='Predicted beats') plt.vlines(onsets_selected_times, 0, 1, color='m', linestyles='-', alpha=1, label='Onsets selected\nas beats') else: plt.vlines(onsets_selected_times, 1, 2, color='m', linestyles='-', alpha=1, label='Onsets selected\nas beats') plt.ylim(1, 3) plt.xlim(offset, offset + duration) plt.xticks([], []) plt.yticks([], []) plt.legend(frameon=True, framealpha=0.75, bbox_to_anchor=(1.15, 1)) plt.subplot(4, 1, 2) plt.vlines(onsets_times, 0, 1, color='k', linestyles='--', alpha=0.3, label='Onsets') if beatfinder: probs = audiobeats.probabilities(beatfinder, device) plt.vlines(onsets_times, 0, probs[onsets], color='r', linewidths=7, alpha=0.25, label='Probability of the\nonset to be a beat') else: plt.vlines(onsets_selected_times, 0, 1, color='m', linestyles='--', alpha=1, label='Onsets selected\nas beats') plt.plot(times, onset_envelope, label='Onset envelope') plt.xlim(offset, offset + duration) plt.ylim(0, 1) plt.xticks([], []) plt.legend(frameon=True, framealpha=0.75, bbox_to_anchor=(1.15, 1)) plt.subplot(2, 1, 2) freq = librosa.mel_frequencies(n_mels=constants.nb, fmin=constants.fm, htk=constants.htk) plt.pcolormesh(times, freq, spec) plt.xlabel('Time [seconds]') plt.ylabel('Frequency [Hz]') plt.xlim(offset, offset + duration)
def show_debug_img(self, mel, orig_mel, rows: pd.DataFrame, t_min, t_max): mel_frequencies = librosa.mel_frequencies(self.n_mels, self.fmin, self.fmax) def find_nearest_idx(array, value): array = np.asarray(array) idx = (np.abs(array - value)).argmin() return idx def _draw_rect(mel, name, row_t_min, row_t_max, f_min, f_max): mel_min = find_nearest_idx(mel_frequencies, f_min) mel_max = find_nearest_idx(mel_frequencies, f_max) mel_min = self.n_mels - mel_min mel_max = self.n_mels - mel_max # x1 = row_t_min - t_min # x2 = min(row_t_max, t_max) - t_min x1 = row_t_min x2 = row_t_max cv2.rectangle(mel, (x1, mel_max), (x2, mel_min), (0, 255, 0)) cv2.putText(mel, name, (x1, mel_max + 16 - 20), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 0)) orig_mel = orig_mel[::-1, :] mel_norm = np.zeros_like(orig_mel) cv2.normalize(orig_mel, mel_norm, 0, 255, cv2.NORM_MINMAX) mel_norm = cv2.applyColorMap(mel_norm.astype(np.uint8), cv2.COLORMAP_MAGMA) mel = cv2.applyColorMap((mel[::-1, :] * 255).astype(np.uint8), cv2.COLORMAP_MAGMA) # mel = cv2.cvtColor(mel[::-1, :], cv2.COLOR_GRAY2BGR) orig_mel = cv2.cvtColor(orig_mel, cv2.COLOR_GRAY2BGR) # mel_norm = cv2.cvtColor(mel_norm, cv2.COLOR_GRAY2BGR) # mel = cv2.cvtColor(mel[::-1, :], cv2.COLOR_GRAY2BGR) draw = False first_row_id = None for row_id, row in rows.iterrows(): # type:pd.DataFrame first_row_id = first_row_id or row[SampleDataset.k_recording_id] row_t_min = int(row[SampleDataset.k_t_min] * self.sampling_rate / self.hop_length) row_t_max = int(row[SampleDataset.k_t_max] * self.sampling_rate / self.hop_length) # if row_t_min <= t_max and row_t_max >= t_min: if 1: draw = True name = ('tp|' if row[SampleDataset.k_is_tp] else 'fp|') + row[SampleDataset.k_key] # row_t_min, row_t_max = max(row_t_min, t_min), min(row_t_max, t_max) rect_info = name, row_t_min, row_t_max, row[ SampleDataset.k_f_min], row[SampleDataset.k_f_max] if first_row_id == row[SampleDataset.k_recording_id]: _draw_rect(orig_mel, *rect_info) _draw_rect(mel_norm, *rect_info) _draw_rect(mel, *rect_info) if not draw: print('Missing draw!') # cv2.imshow('orig_mel', orig_mel) cv2.imshow('mel_norm', mel_norm) cv2.imshow('mel', mel) # cv2.moveWindow('orig_mel', 0, 0) cv2.moveWindow('mel_norm', 0, (orig_mel.shape[0] + 32) * 0) cv2.moveWindow('mel', 0, (orig_mel.shape[0] + 32) * 1) cv2.waitKey(0)
TIME = CFG.duration SR = 48000 FMIN = 40 FMAX = SR // 2 IMAGE_WIDTH = 456 IMAGE_HEIGHT = 456 N_MELS = IMAGE_HEIGHT HOP_SIZE = 512 WINDOW_SIZE = 512 * 6 # 各speciesのfminとmfaxを求める species_fmin = traint.groupby("species_id")["f_min"].agg(min).reset_index() species_fmax = traint.groupby("species_id")["f_max"].agg(max).reset_index() species_fmin_fmax = pd.merge(species_fmin, species_fmax, on="species_id") MEL_FREQ = librosa.mel_frequencies(fmin=FMIN, fmax=FMAX, n_mels=IMAGE_HEIGHT) def search_bin(value): n = 0 for i, v in enumerate(MEL_FREQ): if v < value: pass else: n = i - 1 break return n # mel specに変換したときの座標を求める # https://akifukka.hatenablog.com/entry/text2speech2
def mel_frequencies(self) -> List[float]: # according to librosa.filters.mel code return librosa.mel_frequencies(self.mel_frequency_count + 2, fmax=self.sample_rate / 2)
def main(): st.title('audio visualizer') uploaded_file = st.sidebar.file_uploader( "audio file upload (only monoral audio!)") if uploaded_file is not None: wav, sr = librosa.load(uploaded_file, sr=None) wav_seconds = int(len(wav) / sr) st.write('sampling rate = ', sr, 'Hz') st.audio(uploaded_file) st.sidebar.title('sound waveform') tgt_ranges = st.sidebar.slider("target range(s)", 0, wav_seconds, (0, wav_seconds)) st.sidebar.title('melspectrogram') hop_len = st.sidebar.slider('hop len', min_value=128, max_value=2048, step=128, value=1024) win_len = st.sidebar.slider('win len', min_value=512, max_value=4096, step=256, value=2048) n_mel = st.sidebar.slider('mel num', min_value=64, max_value=256, step=8, value=128) st.sidebar.title('spectrum') ave_win_len = st.sidebar.slider('ave win len', min_value=2, max_value=500, step=2, value=100) fig = go.Figure() x_wav = np.arange(len(wav)) / sr fig.add_trace(go.Scatter(y=wav[::HOP], name="wav")) fig.add_vrect(x0=int(tgt_ranges[0] * sr / HOP), x1=int(tgt_ranges[1] * sr / HOP), fillcolor="LightSalmon", opacity=0.5, layer="below", line_width=0) fig.update_layout( title="sound waveform", width=GRAPH_WIDTH, height=GRAPH_HEIGHT, xaxis=dict( tickmode='array', tickvals=[1, int(len(wav[::HOP]) / 2), len(wav[::HOP])], ticktext=[str(0), str(int(wav_seconds / 2)), str(wav_seconds)], title="time(s)")) st.plotly_chart(fig) wav_element = wav[tgt_ranges[0] * sr:tgt_ranges[1] * sr] # melspectrogram mel = calc_melspectrogram(wav_element, sr, win_len, hop_len, n_mel) mel_bins = librosa.mel_frequencies(n_mel, 0, int(sr / 2)) fig = px.imshow(np.flipud(mel), aspect='auto') fig.update_layout( title="melspectrogram", width=GRAPH_WIDTH, height=GRAPH_HEIGHT, xaxis=dict(showticklabels=False), yaxis=dict(tickmode='array', tickvals=[ 1, int(mel.shape[0] / 4), int(mel.shape[0] / 2), int(mel.shape[0] - 1) ], ticktext=[ str(int(mel_bins[int(mel.shape[0] - 1)])), str(int(mel_bins[int(3 * mel.shape[0] / 4)])), str(int(mel_bins[int(mel.shape[0] / 2)])), str(0) ], title="frequency(Hz)")) st.write(fig) # spectrum s_power, freqs = calc_spectrum(wav_element, sr) fig = go.Figure() fig.add_trace( go.Scatter(x=freqs, y=move_ave(s_power, ave_win_len), mode='lines')) fig.update_layout(title="spectrum", width=GRAPH_WIDTH, height=GRAPH_HEIGHT, xaxis=dict(title="frequency(Hz)"), yaxis=dict(title="power")) st.write(fig)
}) return labels, Z, clstrs_full, pc_corr_clstrs plt.style.use('mb') plt.rcParams.update({'font.size': 15}) # In[4]: from librosa import mel_frequencies mel_freqs = mel_frequencies(48, fmax=8000) def plot_Ws(Ws, corrs=None, vmax=None, vmin=None): if vmin is None or vmax is None: vmin = Ws.min() vmax = Ws.max() n_rows = np.ceil(Ws.shape[0] / 5).astype('int') fig, axes = plt.subplots(n_rows, 5, figsize=(20, n_rows * 3.75), constrained_layout=True) axes = axes.flatten() for n in range(Ws.shape[0]): mappable = axes[n].imshow(Ws[n].T, aspect='auto', origin='lower', cmap='viridis', vmin=vmin, vmax=vmax) plt.colorbar(mappable) if corrs: fig.suptitle('{0:.2f}'.format(corrs)) return fig