def download_metadata(): fma = utils.FreeMusicArchive(os.environ.get('FMA_KEY')) max_tid = int(fma.get_recent_tracks()[0][0]) print('Largest track id: {}'.format(max_tid)) not_found = {} id_range = trange(max_tid, desc='tracks') tracks, not_found['tracks'] = fma.get_all('track', id_range) id_range = tqdm(tracks['album_id'].unique(), desc='albums') albums, not_found['albums'] = fma.get_all('album', id_range) id_range = tqdm(tracks['artist_id'].unique(), desc='artists') artists, not_found['artists'] = fma.get_all('artist', id_range) genres = fma.get_all_genres() for dataset in 'tracks', 'albums', 'artists', 'genres': eval(dataset).sort_index(axis=0, inplace=True) eval(dataset).sort_index(axis=1, inplace=True) eval(dataset).to_csv('raw_' + dataset + '.csv') pickle.dump(not_found, open('not_found.pickle', 'wb'))
def download_data(args): dst_dir = os.path.join(os.path.abspath(args.path), 'fma_full') tracks = pd.read_csv('raw_tracks.csv', index_col=0) _create_subdirs(dst_dir, tracks) fma = utils.FreeMusicArchive(os.environ.get('FMA_KEY')) not_found = pickle.load(open('not_found.pickle', 'rb')) not_found['audio'] = [] # Download missing tracks. collected = 0 for tid in tqdm(tracks.index): dst = utils.get_audio_path(dst_dir, tid) if not os.path.exists(dst): try: fma.download_track(tracks.at[tid, 'track_file'], dst) collected += 1 except: # requests.HTTPError not_found['audio'].append(tid) pickle.dump(not_found, open('not_found.pickle', 'wb')) existing = len(tracks) - collected - len(not_found['audio']) print('audio: {} collected, {} existing, {} not found'.format( collected, existing, len(not_found['audio'])))
def download_metadata(args): fma = utils.FreeMusicArchive(os.environ.get('FMA_KEY')) if args.tid_max is None: args.tid_max = int(fma.get_recent_tracks()[0][0]) message = 'Collecting metadata from track ID {} to {}.' print(message.format(args.tid_min, args.tid_max)) not_found = {} id_range = trange(args.tid_min, args.tid_max, desc='tracks') tracks, not_found['tracks'] = fma.get_all('track', id_range) id_range = tqdm(tracks['album_id'].unique(), desc='albums') albums, not_found['albums'] = fma.get_all('album', id_range) id_range = tqdm(tracks['artist_id'].unique(), desc='artists') artists, not_found['artists'] = fma.get_all('artist', id_range) genres = fma.get_all_genres() for dataset in 'tracks', 'albums', 'artists', 'genres': eval(dataset).sort_index(axis=0, inplace=True) eval(dataset).sort_index(axis=1, inplace=True) eval(dataset).to_csv('raw_' + dataset + '.csv') if dataset != 'genres': print('{}: {} collected, {} not found'.format( dataset, len(eval(dataset)), len(not_found[dataset]))) pickle.dump(not_found, open('not_found.pickle', 'wb'))
def download_data(args): #traduire le chemin de maniere complete depuis le repertoire principal ~root puia rajoute fma_full a la fin dst_dir = os.path.join(os.path.abspath(args.path), 'fma_full') tracks = pd.read_csv('raw_tracks.csv', index_col=0) _create_subdirs(dst_dir, tracks) fma = utils.FreeMusicArchive(os.environ.get('FMA_KEY')) not_found = pickle.load(open('not_found.pickle', 'rb')) not_found['audio'] = [] # Download missing tracks. collected = 0 for tid in tqdm(tracks.index): #recupere le chemin d'un audio a partir de son index dst = utils.get_audio_path(dst_dir, tid) #telecharge la music qui manque dans le repertoire (initialement vide donc il telecharge tous) if not os.path.exists(dst): try: fma.download_track(tracks.at[tid, 'track_file'], dst) collected += 1 except: # requests.HTTPError not_found['audio'].append(tid) #serialiser le resultat pour stocker les audios introuvable pickle.dump(not_found, open('not_found.pickle', 'wb')) existing = len(tracks) - collected - len(not_found['audio']) print('audio: {} collected, {} existing, {} not found'.format( collected, existing, len(not_found['audio'])))
def download_data(dst_dir): dst_dir = os.path.abspath(dst_dir) tracks = pd.read_csv('raw_tracks.csv', index_col=0) _create_subdirs(dst_dir, tracks) fma = utils.FreeMusicArchive(os.environ.get('FMA_KEY')) not_found = pickle.load(open('not_found.pickle', 'rb')) not_found['audio'] = [] # Download missing tracks. for tid in tqdm(tracks.index): dst = utils.get_audio_path(dst_dir, tid) if not os.path.exists(dst): try: fma.download_track(tracks.at[tid, 'track_file'], dst) except: # requests.HTTPError not_found['audio'].append(tid) pickle.dump(not_found, open('not_found.pickle', 'wb'))
def download_metadata(args): #creer l'objet FreeMusicArchive definit sur utils.py / connexion a l API FMA fma = utils.FreeMusicArchive(os.environ.get('FMA_KEY')) #pointe sur recent morceau ajoute if args.tid_max is None: args.tid_max = int(fma.get_recent_tracks()[0][0]) message = 'Collecting metadata from track ID {} to {}.' print(message.format(args.tid_min, args.tid_max)) not_found = {} #recuperer le nombre de fichiers sous le nom de tracks par tid id_range = trange(args.tid_min, args.tid_max, desc='tracks') #affecte tous les informations obtenue par la fct predefinie get_all (tous infos sur track) et genere une frame tracks, not_found['tracks'] = fma.get_all('track', id_range) #a partir de track on genere id_range = tqdm(tracks['album_id'].unique(), desc='albums') albums, not_found['albums'] = fma.get_all('album', id_range) id_range = tqdm(tracks['artist_id'].unique(), desc='artists') artists, not_found['artists'] = fma.get_all('artist', id_range) genres = fma.get_all_genres() #Parcours chaqu'un de 'tracks', 'albums', 'artists', 'genres' et ordonne ses lignes et colonnes for dataset in 'tracks', 'albums', 'artists', 'genres': eval(dataset).sort_index(axis=0, inplace=True) eval(dataset).sort_index(axis=1, inplace=True) #convertir en CSV raw_tracks, raw_albums, raw_artists, raw_genres eval(dataset).to_csv('raw_' + dataset + '.csv') if dataset != 'genres': #anonce les valeurs non trouvee dans l'arXiv print('{}: {} collected, {} not found'.format( dataset, len(eval(dataset)), len(not_found[dataset]))) pickle.dump(not_found, open('not_found.pickle', 'wb'))