def _extract_metadata(tid): """Extract metadata from one audio file.""" metadata = pd.Series(name=tid) try: path = utils.get_audio_path(os.environ.get('AUDIO_DIR'), tid) f = mutagen.File(path) x, sr = librosa.load(path, sr=None, mono=False) assert f.info.channels == (x.shape[0] if x.ndim > 1 else 1) assert f.info.sample_rate == sr mode = { mutagen.mp3.BitrateMode.CBR: 'CBR', mutagen.mp3.BitrateMode.VBR: 'VBR', mutagen.mp3.BitrateMode.ABR: 'ABR', mutagen.mp3.BitrateMode.UNKNOWN: 'UNKNOWN', } metadata['bit_rate'] = f.info.bitrate metadata['mode'] = mode[f.info.bitrate_mode] metadata['channels'] = f.info.channels metadata['sample_rate'] = f.info.sample_rate metadata['samples'] = x.shape[-1] except Exception as e: print('{}: {}'.format(tid, repr(e))) metadata['bit_rate'] = 0 metadata['mode'] = 'UNKNOWN' metadata['channels'] = 0 metadata['sample_rate'] = 0 metadata['samples'] = 0 return metadata
def download_data(args): #traduire le chemin de maniere complete depuis le repertoire principal ~root puia rajoute fma_full a la fin dst_dir = os.path.join(os.path.abspath(args.path), 'fma_full') tracks = pd.read_csv('raw_tracks.csv', index_col=0) _create_subdirs(dst_dir, tracks) fma = utils.FreeMusicArchive(os.environ.get('FMA_KEY')) not_found = pickle.load(open('not_found.pickle', 'rb')) not_found['audio'] = [] # Download missing tracks. collected = 0 for tid in tqdm(tracks.index): #recupere le chemin d'un audio a partir de son index dst = utils.get_audio_path(dst_dir, tid) #telecharge la music qui manque dans le repertoire (initialement vide donc il telecharge tous) if not os.path.exists(dst): try: fma.download_track(tracks.at[tid, 'track_file'], dst) collected += 1 except: # requests.HTTPError not_found['audio'].append(tid) #serialiser le resultat pour stocker les audios introuvable pickle.dump(not_found, open('not_found.pickle', 'wb')) existing = len(tracks) - collected - len(not_found['audio']) print('audio: {} collected, {} existing, {} not found'.format( collected, existing, len(not_found['audio'])))
def worker(args): track = args[0] spectrograms_CREATE = args[1] # Loading track data. try: track_pathname = utils.get_audio_path(AUDIO_DIR, track) data, _ = librosa.load(track_pathname, duration = DURATION, sr = SAMPLING_RATE) for spectrogram_filename in spectrograms_CREATE: # Creating the spectrogram graph container. figure = PLT.figure() PLT.axis('off') # Generating the spectrogram. image = spectrograms_CREATE[spectrogram_filename](data, None, SAMPLING_RATE) # Saving the spectrogram to a file. PLT.savefig(spectrogram_filename, bbox_inches = 'tight', pad_inches = 0, dpi = 100) # Clearing memory to prevent leaks. PLT.cla(); PLT.clf(); PLT.close('all'); gc.collect(); # Informs the leader process whether generating spectrograms for the current track has failed. except Exception as E: return track return None
def download_data(args): dst_dir = os.path.join(os.path.abspath(args.path), 'fma_full') tracks = pd.read_csv('raw_tracks.csv', index_col=0) _create_subdirs(dst_dir, tracks) fma = utils.FreeMusicArchive(os.environ.get('FMA_KEY')) not_found = pickle.load(open('not_found.pickle', 'rb')) not_found['audio'] = [] # Download missing tracks. collected = 0 for tid in tqdm(tracks.index): dst = utils.get_audio_path(dst_dir, tid) if not os.path.exists(dst): try: fma.download_track(tracks.at[tid, 'track_file'], dst) collected += 1 except: # requests.HTTPError not_found['audio'].append(tid) pickle.dump(not_found, open('not_found.pickle', 'wb')) existing = len(tracks) - collected - len(not_found['audio']) print('audio: {} collected, {} existing, {} not found'.format( collected, existing, len(not_found['audio'])))
def _build_track_features(k): """ Build numpy array of numerical features for a given track """ # https://medium.com/@tanveer9812/mfccs-made-easy-7ef383006040 # https://towardsdatascience.com/getting-to-know-the-mel-spectrogram-31bca3e2d9d0 track_id, mfc_kwargs, rebuild_existing = k try: track_data_path = _get_track_data_path(track_id, mfc_kwargs) if os.path.exists(track_data_path): if rebuild_existing: # delete in case the data was bad, will be rebuilt anyway if good os.remove(track_data_path) else: # append and skip return (track_id, track_data_path) # comment claimed that this function doesn't work correctly track_filename = utils.get_audio_path(AUDIO_DIR, track_id) track_x = _build_features_for_file(track_filename, mfc_kwargs) # print(f"Processing {i}/{len(tracks)}, {track_filename=}", end="\r") # save data _save_track_data(track_data_path, track_x) # print(track_id, track_x.shape, track_info.shape, track_filename, sample_rate, flush=True) return (track_id, track_data_path) except Exception as e: print(f"Track {track_id} broke with error {e}", flush=True) return None
def trim_audio(dst_dir): dst_dir = os.path.abspath(dst_dir) fma_full = os.path.join(dst_dir, 'fma_full') fma_large = os.path.join(dst_dir, 'fma_large') tracks = pd.read_csv('raw_tracks.csv', index_col=0) _create_subdirs(fma_large, tracks) not_found = pickle.load(open('not_found.pickle', 'rb')) not_found['clips'] = [] for tid in tqdm(tracks.index): duration = convert_duration(tracks.at[tid, 'track_duration']) src = utils.get_audio_path(fma_full, tid) dst = utils.get_audio_path(fma_large, tid) if tid in not_found['audio']: continue elif os.path.exists(dst): continue elif duration <= 30: shutil.copyfile(src, dst) else: start = duration // 2 - 15 command = [ 'ffmpeg', '-i', src, '-ss', str(start), '-t', '30', '-acodec', 'copy', dst ] try: sp.run(command, check=True, stderr=sp.DEVNULL) except sp.CalledProcessError: not_found['clips'].append(tid) for tid in not_found['clips']: try: os.remove(utils.get_audio_path(fma_large, tid)) except FileNotFoundError: pass pickle.dump(not_found, open('not_found.pickle', 'wb'))
def trim_audio(args): path = os.path.abspath(args.path) fma_full = os.path.join(path, 'fma_full') fma_large = os.path.join(path, 'fma_large') tracks = pd.read_csv('mp3_metadata.csv', index_col=0) _create_subdirs(fma_large, tracks) not_found = pickle.load(open('not_found.pickle', 'rb')) not_found['clips'] = [] for tid, track in tqdm(tracks.iterrows(), total=len(tracks)): duration = track['samples'] / track['sample_rate'] src = utils.get_audio_path(fma_full, tid) dst = utils.get_audio_path(fma_large, tid) if os.path.exists(dst): continue elif duration <= 30: shutil.copyfile(src, dst) else: start = int(duration // 2 - 15) command = [ 'ffmpeg', '-i', src, '-ss', str(start), '-t', '30', '-acodec', 'copy', dst ] try: sp.run(command, check=True, stderr=sp.DEVNULL) except sp.CalledProcessError: not_found['clips'].append(tid) for tid in not_found['clips']: try: os.remove(utils.get_audio_path(fma_large, tid)) except FileNotFoundError: pass pickle.dump(not_found, open('not_found.pickle', 'wb'))
def download_data(dst_dir): dst_dir = os.path.abspath(dst_dir) tracks = pd.read_csv('raw_tracks.csv', index_col=0) _create_subdirs(dst_dir, tracks) fma = utils.FreeMusicArchive(os.environ.get('FMA_KEY')) not_found = pickle.load(open('not_found.pickle', 'rb')) not_found['audio'] = [] # Download missing tracks. for tid in tqdm(tracks.index): dst = utils.get_audio_path(dst_dir, tid) if not os.path.exists(dst): try: fma.download_track(tracks.at[tid, 'track_file'], dst) except: # requests.HTTPError not_found['audio'].append(tid) pickle.dump(not_found, open('not_found.pickle', 'wb'))
def _extract_metadata(tid, path): """Extract metadata from one audio file.""" #creer une serie qui respecte l'ordre d'indexation donnee par la liste tid (tid du track) metadata = pd.Series(name=tid) try: path = utils.get_audio_path(path, tid) #mutagen gere les metadonnees des audios f = mutagen.File(path) #return signal and rate of the audio x, sr = librosa.load(path, sr=None, mono=False) #verification des valeurs extraite par librosa et mutagen assert f.info.channels == (x.shape[0] if x.ndim > 1 else 1) assert f.info.sample_rate == sr mode = { mutagen.mp3.BitrateMode.CBR: 'CBR', mutagen.mp3.BitrateMode.VBR: 'VBR', mutagen.mp3.BitrateMode.ABR: 'ABR', mutagen.mp3.BitrateMode.UNKNOWN: 'UNKNOWN', } #stock les metadonnees metadata['bit_rate'] = f.info.bitrate metadata['mode'] = mode[f.info.bitrate_mode] metadata['channels'] = f.info.channels metadata['sample_rate'] = f.info.sample_rate metadata['samples'] = x.shape[-1] except Exception as e: #stock des valeurs null pour les audios non analyses print('{}: {}'.format(tid, repr(e))) metadata['bit_rate'] = 0 metadata['mode'] = 'ERROR' metadata['channels'] = 0 metadata['sample_rate'] = 0 metadata['samples'] = 0 return metadata
def compute_features(tid): features = pd.Series(index=columns(), dtype=np.float32, name=tid) # Catch warnings as exceptions (audioread leaks file descriptors). warnings.filterwarnings('error', module='librosa') def feature_stats(name, values): features[name, 'mean'] = np.mean(values, axis=1) features[name, 'std'] = np.std(values, axis=1) features[name, 'skew'] = stats.skew(values, axis=1) features[name, 'kurtosis'] = stats.kurtosis(values, axis=1) features[name, 'median'] = np.median(values, axis=1) features[name, 'min'] = np.min(values, axis=1) features[name, 'max'] = np.max(values, axis=1) try: filepath = utils.get_audio_path(os.environ.get('AUDIO_DIR'), tid) x, sr = librosa.load(filepath, sr=None, mono=True) # kaiser_fast f = librosa.feature.zero_crossing_rate(x, frame_length=2048, hop_length=512) feature_stats('zcr', f) cqt = np.abs(librosa.cqt(x, sr=sr, hop_length=512, bins_per_octave=12, n_bins=7*12, tuning=None)) assert cqt.shape[0] == 7 * 12 assert np.ceil(len(x)/512) <= cqt.shape[1] <= np.ceil(len(x)/512)+1 f = librosa.feature.chroma_cqt(C=cqt, n_chroma=12, n_octaves=7) feature_stats('chroma_cqt', f) f = librosa.feature.chroma_cens(C=cqt, n_chroma=12, n_octaves=7) feature_stats('chroma_cens', f) f = librosa.feature.tonnetz(chroma=f) feature_stats('tonnetz', f) del cqt stft = np.abs(librosa.stft(x, n_fft=2048, hop_length=512)) assert stft.shape[0] == 1 + 2048 // 2 assert np.ceil(len(x)/512) <= stft.shape[1] <= np.ceil(len(x)/512)+1 del x f = librosa.feature.chroma_stft(S=stft**2, n_chroma=12) feature_stats('chroma_stft', f) f = librosa.feature.rmse(S=stft) feature_stats('rmse', f) f = librosa.feature.spectral_centroid(S=stft) feature_stats('spectral_centroid', f) f = librosa.feature.spectral_bandwidth(S=stft) feature_stats('spectral_bandwidth', f) f = librosa.feature.spectral_contrast(S=stft, n_bands=6) feature_stats('spectral_contrast', f) f = librosa.feature.spectral_rolloff(S=stft) feature_stats('spectral_rolloff', f) mel = librosa.feature.melspectrogram(sr=sr, S=stft**2) del stft f = librosa.feature.mfcc(S=librosa.power_to_db(mel), n_mfcc=20) feature_stats('mfcc', f) except Exception as e: print('{}: {}'.format(tid, repr(e))) return features
def make_spec(x, y): audio_dir = "/media/ravi/507412DD7412C59E/fma_small/" spec_out_dir = "/media/ravi/507412DD7412C59E/spectrogram/" aud_path = utils.get_audio_path(audio_dir, x) out_path = spec_out_dir + "{}/{}".format(y, x) return (song_to_spec(aud_path), out_path)
def get_filepaths(subset): filepaths = [] tids = tracks.index[tracks['set', 'subset'] <= subset] for tid in tids: filepaths.append(utils.get_audio_path('', tid)) return filepaths
def autosample(audiofilename, genres, num_frequencies=3, segsize=.5, stride=3, low_feather_size=.1, high_feather_size=.001): audio = load_audio(audiofilename) segment_length = int(segsize * p.samplerate) # audio = audio[:-(len(audio)%segment_length)] audio_split = torch.split(audio, segment_length) fft_split = [torch.rfft(seg, 1) for idx, seg in enumerate(audio_split)] partitions = [decompose(seg, num_frequencies) for seg in fft_split] recreation_data = torch.zeros(len(audio_split), num_frequencies, 4) # genre, song_id, seg_idx, score tracks = utils.load(f'{p.meta_dir}/tracks.csv') small = tracks['set', 'subset'] <= 'small' train = tracks['set', 'split'] == 'training' for g, genre in enumerate(tqdm(genres)): dataset = torch.load( f"data/{genre}_{str(segsize).replace('.', '')}_{stride}") idx_to_id = torch.load( f"data/idx{genre}_{str(segsize).replace('.', '')}_{stride}") for i, (part, seg) in enumerate(zip(partitions, fft_split)): differences = torch.norm(dataset - seg, dim=3) per_freq_differences = [ torch.norm(differences[:, :, part[f]:part[f + 1]], dim=2) for f in range(len(part) - 1) ] best_per_freq = [ torch.argmin(diff) for diff in per_freq_differences ] best_per_freq = [[k // dataset.shape[1], k % dataset.shape[1]] for k in best_per_freq] for j, (song_idx, seg_idx) in enumerate(best_per_freq): value = per_freq_differences[j][best_per_freq[j][0], best_per_freq[j][1]] if recreation_data[i, j, 0] == 0 or recreation_data[i, j, 3] > value: song_id = idx_to_id[song_idx.cpu().numpy()] recreation_data[i, j] = torch.Tensor( [g, song_id, seg_idx, value]) result = torch.zeros_like(audio) if num_frequencies != 1: feather_sizes = [low_feather_size - i * (low_feather_size - high_feather_size) \ / (num_frequencies - 1) for i in range(num_frequencies)] else: feather_sizes = [low_feather_size] feather_length = int(low_feather_size * p.samplerate) ramps = [] flens = [] for fsize in feather_sizes: flen = int(fsize * p.samplerate // 2 * 2) ramps.append(torch.cat([torch.arange(flen).float()/flen, torch.ones(segment_length-flen).float(), \ torch.arange(flen, 0, -1).float()/flen])) flens.append(flen) for i, (segment, part) in enumerate( tqdm(zip(recreation_data, partitions), total=len(recreation_data))): res_start = i * segment_length - feather_length // 2 res_start_t = max(res_start, 0) res_end = (i + 1) * segment_length + feather_length // 2 res_end_t = min(res_end, result.shape[0]) for j, (freq_seg, ramp, flen) in enumerate(zip(segment, ramps, flens)): #load audio song_id = int(freq_seg[1]) filepath = utils.get_audio_path(p.audio_dir, song_id) tosample = load_audio(filepath) # get relevant region loc_start = int(freq_seg[2]) * segment_length - flen // 2 loc_end = loc_start + segment_length + flen loc_start_t = int(max(loc_start, 0)) loc_end_t = int(min(loc_end, tosample.shape[0])) to_sample_segment = torch.zeros(segment_length + feather_length) s = loc_start_t - loc_start + feather_length // 2 - flen // 2 e = to_sample_segment.shape[0] - (loc_end - loc_end_t) - ( feather_length // 2 - flen // 2) to_sample_segment[s:e] = tosample[loc_start_t:loc_end_t] #band pass filter to_sample_fourier = torch.rfft(to_sample_segment, 1) to_sample_fourier[:part[j]] = 0 to_sample_fourier[part[j + 1]:] = 0 filtered_segment = torch.irfft( to_sample_fourier, 1, signal_sizes=to_sample_segment.shape) #feather edges s1 = feather_length // 2 - flen // 2 e1 = filtered_segment.shape[0] - s1 filtered_segment[s1:e1] *= ramp # add to result result[res_start_t:res_end_t] += filtered_segment[ res_start_t - res_start:filtered_segment.shape[0] - (res_end - res_end_t)] return result, recreation_data
plot('track', 'comments', 20, 224) medium_subset['album'].describe() plt.figure(figsize=(17, 10)) plot('album', 'listens', 100e3, 221) plot('album', 'favorites', 100, 223) plot('album', 'comments', 20, 224) medium_subset['artist'].describe() plt.figure(figsize=(17, 5)) plot('artist', 'favorites', 100, 121) plot('artist', 'comments', 20, 122) #Checking audio files AUDIO_DIR = 'C:/MusicClassification/fma_medium' filename = utils.get_audio_path(AUDIO_DIR, 2) #reading audio file with librosa x, sr = librosa.load(filename, sr=None, mono=True) print('Duration: {:.2f}s, {} samples'.format(x.shape[-1] / sr, x.size)) #FFT plot def custom_fft(y, fs): T = 1.0 / fs N = y.shape[0] yf = fft(y) xf = np.linspace(0.0, 1.0 / (2.0 * T), N // 2) vals = 2.0 / N * np.abs( yf[0:N // 2]) # FFT is simmetrical, so we take just the first half # FFT is also complex, to we take just the real part (abs) return xf, vals
def build_features_for_track_id(self, track_id): """Convenience wrapper around _build_track_features for the features of one track""" track_path = utils.get_audio_path(AUDIO_DIR, track_id) return self.build_features_for_track_file(track_path)
plt.subplot(2, 1, 1) plt.semilogy(features[0].T, label=name[0]) plt.xticks([]) plt.xlim([0, features[0].shape[-1]]) plt.legend(loc='best') plt.subplot(2, 1, 2) librosa.display.specshow(features[1], x_axis='time') plt.colorbar() plt.title(name[1]) plt.tight_layout() plt.show() tids = utils.get_fs_tids('fma_small') filepath = utils.get_audio_path('fma_small', tids[0]) x, sr = librosa.load(filepath, sr=None, mono=True, res_type='kaiser_fast') cqt = np.abs( librosa.cqt(x, sr=sr, hop_length=512, bins_per_octave=12, n_bins=7 * 12, tuning=None)) assert cqt.shape[0] == 7 * 12 assert np.ceil(len(x) / 512) <= cqt.shape[1] <= np.ceil(len(x) / 512) + 1 chroma_cqt = librosa.feature.chroma_cqt(C=cqt, n_chroma=12, n_octaves=7) chroma_cens = librosa.feature.chroma_cens(C=cqt, n_chroma=12, n_octaves=7)