def create_track_file(maindir, trackid, track, song, artist, mbconnect=None): """ Main function to create an HDF5 song file. You got to have the track, song and artist already. If you pass an open connection to the musicbrainz database, we also use it. Returns True if song was created, False otherwise. False can mean another thread is already doing that song. We also check whether the path exists. INPUT maindir - main directory of the Million Song Dataset trackid - Echo Nest track id of the track object track - pyechonest track object song - pyechonest song object artist - pyechonest artist object mbconnect - open musicbrainz pg connection RETURN True if a track file was created, False otherwise """ hdf5_path = os.path.join(maindir, path_from_trackid(trackid)) if os.path.exists(hdf5_path): return False # file already exists, no stress hdf5_path_tmp = hdf5_path + '_tmp' # lock the file got_lock = get_lock_track(trackid) if not got_lock: return False # someone is taking care of that file if os.path.exists(hdf5_path): release_lock_track(trackid) return False # got the lock too late, file exists # count errors (=tries), stop after 100 tries try_cnt = 0 # create file and fill it try: while True: # try until we make it work! try: # we try one more time try_cnt += 1 if not os.path.isdir(os.path.split(hdf5_path)[0]): os.makedirs(os.path.split(hdf5_path)[0]) # check / delete tmp file if exist if os.path.isfile(hdf5_path_tmp): os.remove(hdf5_path_tmp) # create tmp file HDF5.create_song_file(hdf5_path_tmp) h5 = HDF5.open_h5_file_append(hdf5_path_tmp) HDF5.fill_hdf5_from_artist(h5, artist) HDF5.fill_hdf5_from_song(h5, song) HDF5.fill_hdf5_from_track(h5, track) if mbconnect is not None: HDF5.fill_hdf5_from_musicbrainz(h5, mbconnect) # TODO billboard? lastfm? ...? h5.close() except KeyboardInterrupt: close_creation() raise # we dont panic, delete file, wait and retry except Exception as e: # close hdf5 try: h5.close() except NameError, ValueError: pass # delete path try: os.remove(hdf5_path_tmp) except IOError: pass # print and wait print 'ERROR creating track:', trackid, 'on', time.ctime( ), '(pid=' + str(os.getpid()) + ')' print e if try_cnt < 100: print '(try again in', SLEEPTIME, 'seconds)' time.sleep(SLEEPTIME) continue # give up else: print 'we give up after', try_cnt, 'tries' release_lock_track(trackid) return False # move tmp file to real file shutil.move(hdf5_path_tmp, hdf5_path) # release lock release_lock_track(trackid) break # KeyboardInterrupt, we delete file, clean things up except KeyboardInterrupt: # close hdf5 try: h5.close() except NameError, ValueError: pass # delete path try: if os.path.isfile(hdf5_path_tmp): os.remove(hdf5_path_tmp) if os.path.isfile(hdf5_path): os.remove(hdf5_path) except IOError: pass raise
# sanity checks if not os.path.isdir(maindir): print(('ERROR: directory', maindir, 'does not exists.')) sys.exit(0) if os.path.isfile(output): print(('ERROR: file', output, 'exists, delete or provide a new filename.')) sys.exit(0) # start time t1 = time.time() # get all h5 files allh5 = get_all_files(maindir, ext='.h5') print(('found', len(allh5), 'H5 files.')) # create summary file HDF5.create_aggregate_file(output, expectedrows=len(allh5), summaryfile=True) print('Summary file created, we start filling it.') # fill it h5 = HDF5.open_h5_file_append(output) HDF5.fill_hdf5_aggregate_file(h5, allh5, summaryfile=True) h5.close() # done! stimelength = str(datetime.timedelta(seconds=time.time() - t1)) print(('Summarized', len(allh5), 'files in:', stimelength))
# sanity checks if not os.path.isdir(maindir): print('ERROR: directory',maindir,'does not exists.') sys.exit(0) if os.path.isfile(output): print('ERROR: file',output,'exists, delete or provide a new filename.') sys.exit(0) # start time t1 = time.time() # get all h5 files allh5 = get_all_files(maindir,ext='.h5') print('found',len(allh5),'H5 files.') # create summary file HDF5.create_aggregate_file(output,expectedrows=len(allh5), summaryfile=True) print('Summary file created, we start filling it.') # fill it h5 = HDF5.open_h5_file_append(output) HDF5.fill_hdf5_aggregate_file(h5,allh5,summaryfile=True) h5.close() # done! stimelength = str(datetime.timedelta(seconds=time.time()-t1)) print('Summarized',len(allh5),'files in:',stimelength)
def convert_one_song(audiofile,output,mbconnect=None,verbose=0,DESTROYAUDIO=False): """ PRINCIPAL FUNCTION Converts one given audio file to hdf5 format (saved in 'output') by uploading it to The Echo Nest API INPUT audiofile - path to a typical audio file (wav, mp3, ...) output - nonexisting hdf5 path mbconnect - if not None, open connection to musicbrainz server verbose - if >0 display more information DESTROYAUDIO - Careful! deletes audio file if everything went well RETURN 1 if we think a song is created, 0 otherwise """ # inputs + sanity checks if not os.path.exists(audiofile): print('ERROR: song file does not exist:',songfile) return 0 if os.path.exists(output): print('ERROR: hdf5 output file already exist:',output,', delete or choose new path') return 0 # get EN track / song / artist for that song if verbose>0: print('get analysis for file:',audiofile) track = trackEN.track_from_filename(audiofile) song_id = track.song_id song = songEN.Song(song_id) if verbose>0: print('found song:',song.title,'(',song_id,')') artist_id = song.artist_id artist = artistEN.Artist(artist_id) if verbose>0: print('found artist:',artist.name,'(',artist_id,')') # hack to fill missing values try: track.foreign_id except AttributeError: track.__setattr__('foreign_id','') if verbose>0: print('no track foreign_id found') try: track.foreign_release_id except AttributeError: track.__setattr__('foreign_release_id','') if verbose>0: print('no track foreign_release_id found') # create HDF5 file if verbose>0: print('create HDF5 file:',output) HDF5.create_song_file(output,force=False) # fill hdf5 file from track if verbose>0: if mbconnect is None: print('fill HDF5 file with info from track/song/artist') else: print('fill HDF5 file with info from track/song/artist/musicbrainz') h5 = HDF5.open_h5_file_append(output) HDF5.fill_hdf5_from_artist(h5,artist) HDF5.fill_hdf5_from_song(h5,song) HDF5.fill_hdf5_from_track(h5,track) if not mbconnect is None: HDF5.fill_hdf5_from_musicbrainz(h5,mbconnect) h5.close() # done if DESTROYAUDIO: if verbose>0: print('We remove audio file:',audiofile) os.remove(audiofile) return 1
def convert_one_song(audiofile, output, mbconnect=None, verbose=0, DESTROYAUDIO=False): """ PRINCIPAL FUNCTION Converts one given audio file to hdf5 format (saved in 'output') by uploading it to The Echo Nest API INPUT audiofile - path to a typical audio file (wav, mp3, ...) output - nonexisting hdf5 path mbconnect - if not None, open connection to musicbrainz server verbose - if >0 display more information DESTROYAUDIO - Careful! deletes audio file if everything went well RETURN 1 if we think a song is created, 0 otherwise """ # inputs + sanity checks if not os.path.exists(audiofile): print(('ERROR: song file does not exist:', songfile)) return 0 if os.path.exists(output): print(('ERROR: hdf5 output file already exist:', output, ', delete or choose new path')) return 0 # get EN track / song / artist for that song if verbose > 0: print(('get analysis for file:', audiofile)) track = trackEN.track_from_filename(audiofile) song_id = track.song_id song = songEN.Song(song_id) if verbose > 0: print(('found song:', song.title, '(', song_id, ')')) artist_id = song.artist_id artist = artistEN.Artist(artist_id) if verbose > 0: print(('found artist:', artist.name, '(', artist_id, ')')) # hack to fill missing values try: track.foreign_id except AttributeError: track.__setattr__('foreign_id', '') if verbose > 0: print('no track foreign_id found') try: track.foreign_release_id except AttributeError: track.__setattr__('foreign_release_id', '') if verbose > 0: print('no track foreign_release_id found') # create HDF5 file if verbose > 0: print(('create HDF5 file:', output)) HDF5.create_song_file(output, force=False) # fill hdf5 file from track if verbose > 0: if mbconnect is None: print('fill HDF5 file with info from track/song/artist') else: print( 'fill HDF5 file with info from track/song/artist/musicbrainz') h5 = HDF5.open_h5_file_append(output) HDF5.fill_hdf5_from_artist(h5, artist) HDF5.fill_hdf5_from_song(h5, song) HDF5.fill_hdf5_from_track(h5, track) if not mbconnect is None: HDF5.fill_hdf5_from_musicbrainz(h5, mbconnect) h5.close() # done if DESTROYAUDIO: if verbose > 0: print(('We remove audio file:', audiofile)) os.remove(audiofile) return 1
def create_track_file(maindir,trackid,track,song,artist,mbconnect=None): """ Main function to create an HDF5 song file. You got to have the track, song and artist already. If you pass an open connection to the musicbrainz database, we also use it. Returns True if song was created, False otherwise. False can mean another thread is already doing that song. We also check whether the path exists. INPUT maindir - main directory of the Million Song Dataset trackid - Echo Nest track id of the track object track - pyechonest track object song - pyechonest song object artist - pyechonest artist object mbconnect - open musicbrainz pg connection RETURN True if a track file was created, False otherwise """ hdf5_path = os.path.join(maindir,path_from_trackid(trackid)) if os.path.exists( hdf5_path ): return False # file already exists, no stress hdf5_path_tmp = hdf5_path + '_tmp' # lock the file got_lock = get_lock_track(trackid) if not got_lock: return False # someone is taking care of that file if os.path.exists( hdf5_path ): release_lock_track(trackid) return False # got the lock too late, file exists # count errors (=tries), stop after 100 tries try_cnt = 0 # create file and fill it try: while True: # try until we make it work! try: # we try one more time try_cnt += 1 if not os.path.isdir( os.path.split(hdf5_path)[0] ): os.makedirs( os.path.split(hdf5_path)[0] ) # check / delete tmp file if exist if os.path.isfile(hdf5_path_tmp): os.remove(hdf5_path_tmp) # create tmp file HDF5.create_song_file(hdf5_path_tmp) h5 = HDF5.open_h5_file_append(hdf5_path_tmp) HDF5.fill_hdf5_from_artist(h5,artist) HDF5.fill_hdf5_from_song(h5,song) HDF5.fill_hdf5_from_track(h5,track) if mbconnect is not None: HDF5.fill_hdf5_from_musicbrainz(h5,mbconnect) # TODO billboard? lastfm? ...? h5.close() except KeyboardInterrupt: close_creation() raise # we dont panic, delete file, wait and retry except Exception as e: # close hdf5 try: h5.close() except NameError,ValueError: pass # delete path try: os.remove( hdf5_path_tmp ) except IOError: pass # print and wait print 'ERROR creating track:',trackid,'on',time.ctime(),'(pid='+str(os.getpid())+')' print e if try_cnt < 100: print '(try again in',SLEEPTIME,'seconds)' time.sleep(SLEEPTIME) continue # give up else: print 'we give up after',try_cnt,'tries' release_lock_track(trackid) return False # move tmp file to real file shutil.move(hdf5_path_tmp, hdf5_path) # release lock release_lock_track(trackid) break # KeyboardInterrupt, we delete file, clean things up except KeyboardInterrupt: # close hdf5 try: h5.close() except NameError,ValueError: pass # delete path try: if os.path.isfile( hdf5_path_tmp ): os.remove( hdf5_path_tmp ) if os.path.isfile( hdf5_path ): os.remove( hdf5_path ) except IOError: pass raise