Пример #1
0
def create_track_file(maindir, trackid, track, song, artist, mbconnect=None):
    """
    Main function to create an HDF5 song file.
    You got to have the track, song and artist already.
    If you pass an open connection to the musicbrainz database, we also use it.
    Returns True if song was created, False otherwise.
    False can mean another thread is already doing that song.
    We also check whether the path exists.
    INPUT
       maindir      - main directory of the Million Song Dataset
       trackid      - Echo Nest track id of the track object
       track        - pyechonest track object
       song         - pyechonest song object
       artist       - pyechonest artist object
       mbconnect    - open musicbrainz pg connection
    RETURN
       True if a track file was created, False otherwise
    """
    hdf5_path = os.path.join(maindir, path_from_trackid(trackid))
    if os.path.exists(hdf5_path):
        return False  # file already exists, no stress
    hdf5_path_tmp = hdf5_path + '_tmp'
    # lock the file
    got_lock = get_lock_track(trackid)
    if not got_lock:
        return False  # someone is taking care of that file
    if os.path.exists(hdf5_path):
        release_lock_track(trackid)
        return False  # got the lock too late, file exists
    # count errors (=tries), stop after 100 tries
    try_cnt = 0
    # create file and fill it
    try:
        while True:  # try until we make it work!
            try:
                # we try one more time
                try_cnt += 1
                if not os.path.isdir(os.path.split(hdf5_path)[0]):
                    os.makedirs(os.path.split(hdf5_path)[0])
                # check / delete tmp file if exist
                if os.path.isfile(hdf5_path_tmp):
                    os.remove(hdf5_path_tmp)
                # create tmp file
                HDF5.create_song_file(hdf5_path_tmp)
                h5 = HDF5.open_h5_file_append(hdf5_path_tmp)
                HDF5.fill_hdf5_from_artist(h5, artist)
                HDF5.fill_hdf5_from_song(h5, song)
                HDF5.fill_hdf5_from_track(h5, track)
                if mbconnect is not None:
                    HDF5.fill_hdf5_from_musicbrainz(h5, mbconnect)
                # TODO billboard? lastfm? ...?
                h5.close()
            except KeyboardInterrupt:
                close_creation()
                raise
            # we dont panic, delete file, wait and retry
            except Exception as e:
                # close hdf5
                try:
                    h5.close()
                except NameError, ValueError:
                    pass
                # delete path
                try:
                    os.remove(hdf5_path_tmp)
                except IOError:
                    pass
                # print and wait
                print 'ERROR creating track:', trackid, 'on', time.ctime(
                ), '(pid=' + str(os.getpid()) + ')'
                print e
                if try_cnt < 100:
                    print '(try again in', SLEEPTIME, 'seconds)'
                    time.sleep(SLEEPTIME)
                    continue
                # give up
                else:
                    print 'we give up after', try_cnt, 'tries'
                    release_lock_track(trackid)
                    return False
            # move tmp file to real file
            shutil.move(hdf5_path_tmp, hdf5_path)
            # release lock
            release_lock_track(trackid)
            break
    # KeyboardInterrupt, we delete file, clean things up
    except KeyboardInterrupt:
        # close hdf5
        try:
            h5.close()
        except NameError, ValueError:
            pass
        # delete path
        try:
            if os.path.isfile(hdf5_path_tmp):
                os.remove(hdf5_path_tmp)
            if os.path.isfile(hdf5_path):
                os.remove(hdf5_path)
        except IOError:
            pass
        raise
Пример #2
0
    # sanity checks
    if not os.path.isdir(maindir):
        print(('ERROR: directory', maindir, 'does not exists.'))
        sys.exit(0)
    if os.path.isfile(output):
        print(('ERROR: file', output,
               'exists, delete or provide a new filename.'))
        sys.exit(0)

    # start time
    t1 = time.time()

    # get all h5 files
    allh5 = get_all_files(maindir, ext='.h5')
    print(('found', len(allh5), 'H5 files.'))

    # create summary file
    HDF5.create_aggregate_file(output,
                               expectedrows=len(allh5),
                               summaryfile=True)
    print('Summary file created, we start filling it.')

    # fill it
    h5 = HDF5.open_h5_file_append(output)
    HDF5.fill_hdf5_aggregate_file(h5, allh5, summaryfile=True)
    h5.close()

    # done!
    stimelength = str(datetime.timedelta(seconds=time.time() - t1))
    print(('Summarized', len(allh5), 'files in:', stimelength))
Пример #3
0
    # sanity checks
    if not os.path.isdir(maindir):
        print('ERROR: directory',maindir,'does not exists.')
        sys.exit(0)
    if os.path.isfile(output):
        print('ERROR: file',output,'exists, delete or provide a new filename.')
        sys.exit(0)

    # start time
    t1 = time.time()

    # get all h5 files
    allh5 = get_all_files(maindir,ext='.h5')
    print('found',len(allh5),'H5 files.')

    # create summary file
    HDF5.create_aggregate_file(output,expectedrows=len(allh5),
                               summaryfile=True)
    print('Summary file created, we start filling it.')

    # fill it
    h5 = HDF5.open_h5_file_append(output)
    HDF5.fill_hdf5_aggregate_file(h5,allh5,summaryfile=True)
    h5.close()

    # done!
    stimelength = str(datetime.timedelta(seconds=time.time()-t1))
    print('Summarized',len(allh5),'files in:',stimelength)
    
Пример #4
0
def convert_one_song(audiofile,output,mbconnect=None,verbose=0,DESTROYAUDIO=False):
    """
    PRINCIPAL FUNCTION
    Converts one given audio file to hdf5 format (saved in 'output')
    by uploading it to The Echo Nest API
    INPUT
         audiofile   - path to a typical audio file (wav, mp3, ...)
            output   - nonexisting hdf5 path
         mbconnect   - if not None, open connection to musicbrainz server
           verbose   - if >0 display more information
      DESTROYAUDIO   - Careful! deletes audio file if everything went well
    RETURN
       1 if we think a song is created, 0 otherwise
    """
    # inputs + sanity checks
    if not os.path.exists(audiofile):
        print('ERROR: song file does not exist:',songfile)
        return 0
    if os.path.exists(output):
        print('ERROR: hdf5 output file already exist:',output,', delete or choose new path')
        return 0
    # get EN track / song / artist for that song
    if verbose>0: print('get analysis for file:',audiofile)
    track = trackEN.track_from_filename(audiofile)
    song_id = track.song_id
    song = songEN.Song(song_id)
    if verbose>0: print('found song:',song.title,'(',song_id,')')
    artist_id = song.artist_id
    artist = artistEN.Artist(artist_id)
    if verbose>0: print('found artist:',artist.name,'(',artist_id,')')
    # hack to fill missing values
    try:
        track.foreign_id
    except AttributeError:
        track.__setattr__('foreign_id','')
        if verbose>0: print('no track foreign_id found')
    try:
        track.foreign_release_id
    except AttributeError:
        track.__setattr__('foreign_release_id','')
        if verbose>0: print('no track foreign_release_id found')
    # create HDF5 file
    if verbose>0: print('create HDF5 file:',output)
    HDF5.create_song_file(output,force=False)
    # fill hdf5 file from track
    if verbose>0:
        if mbconnect is None:
            print('fill HDF5 file with info from track/song/artist')
        else:
            print('fill HDF5 file with info from track/song/artist/musicbrainz')
    h5 = HDF5.open_h5_file_append(output)
    HDF5.fill_hdf5_from_artist(h5,artist)
    HDF5.fill_hdf5_from_song(h5,song)
    HDF5.fill_hdf5_from_track(h5,track)
    if not mbconnect is None:
        HDF5.fill_hdf5_from_musicbrainz(h5,mbconnect)
    h5.close()
    # done
    if DESTROYAUDIO:
        if verbose>0: print('We remove audio file:',audiofile)
        os.remove(audiofile)
    return 1
Пример #5
0
def convert_one_song(audiofile,
                     output,
                     mbconnect=None,
                     verbose=0,
                     DESTROYAUDIO=False):
    """
    PRINCIPAL FUNCTION
    Converts one given audio file to hdf5 format (saved in 'output')
    by uploading it to The Echo Nest API
    INPUT
         audiofile   - path to a typical audio file (wav, mp3, ...)
            output   - nonexisting hdf5 path
         mbconnect   - if not None, open connection to musicbrainz server
           verbose   - if >0 display more information
      DESTROYAUDIO   - Careful! deletes audio file if everything went well
    RETURN
       1 if we think a song is created, 0 otherwise
    """
    # inputs + sanity checks
    if not os.path.exists(audiofile):
        print(('ERROR: song file does not exist:', songfile))
        return 0
    if os.path.exists(output):
        print(('ERROR: hdf5 output file already exist:', output,
               ', delete or choose new path'))
        return 0
    # get EN track / song / artist for that song
    if verbose > 0: print(('get analysis for file:', audiofile))
    track = trackEN.track_from_filename(audiofile)
    song_id = track.song_id
    song = songEN.Song(song_id)
    if verbose > 0: print(('found song:', song.title, '(', song_id, ')'))
    artist_id = song.artist_id
    artist = artistEN.Artist(artist_id)
    if verbose > 0: print(('found artist:', artist.name, '(', artist_id, ')'))
    # hack to fill missing values
    try:
        track.foreign_id
    except AttributeError:
        track.__setattr__('foreign_id', '')
        if verbose > 0: print('no track foreign_id found')
    try:
        track.foreign_release_id
    except AttributeError:
        track.__setattr__('foreign_release_id', '')
        if verbose > 0: print('no track foreign_release_id found')
    # create HDF5 file
    if verbose > 0: print(('create HDF5 file:', output))
    HDF5.create_song_file(output, force=False)
    # fill hdf5 file from track
    if verbose > 0:
        if mbconnect is None:
            print('fill HDF5 file with info from track/song/artist')
        else:
            print(
                'fill HDF5 file with info from track/song/artist/musicbrainz')
    h5 = HDF5.open_h5_file_append(output)
    HDF5.fill_hdf5_from_artist(h5, artist)
    HDF5.fill_hdf5_from_song(h5, song)
    HDF5.fill_hdf5_from_track(h5, track)
    if not mbconnect is None:
        HDF5.fill_hdf5_from_musicbrainz(h5, mbconnect)
    h5.close()
    # done
    if DESTROYAUDIO:
        if verbose > 0: print(('We remove audio file:', audiofile))
        os.remove(audiofile)
    return 1
Пример #6
0
def create_track_file(maindir,trackid,track,song,artist,mbconnect=None):
    """
    Main function to create an HDF5 song file.
    You got to have the track, song and artist already.
    If you pass an open connection to the musicbrainz database, we also use it.
    Returns True if song was created, False otherwise.
    False can mean another thread is already doing that song.
    We also check whether the path exists.
    INPUT
       maindir      - main directory of the Million Song Dataset
       trackid      - Echo Nest track id of the track object
       track        - pyechonest track object
       song         - pyechonest song object
       artist       - pyechonest artist object
       mbconnect    - open musicbrainz pg connection
    RETURN
       True if a track file was created, False otherwise
    """
    hdf5_path = os.path.join(maindir,path_from_trackid(trackid))
    if os.path.exists( hdf5_path ):
        return False # file already exists, no stress
    hdf5_path_tmp = hdf5_path + '_tmp'
    # lock the file
    got_lock = get_lock_track(trackid)
    if not got_lock:
        return False # someone is taking care of that file
    if os.path.exists( hdf5_path ):
        release_lock_track(trackid)
        return False # got the lock too late, file exists
    # count errors (=tries), stop after 100 tries
    try_cnt = 0
    # create file and fill it
    try:
        while True: # try until we make it work!
            try:
                # we try one more time
                try_cnt += 1
                if not os.path.isdir( os.path.split(hdf5_path)[0] ):
                    os.makedirs( os.path.split(hdf5_path)[0] )
                # check / delete tmp file if exist
                if os.path.isfile(hdf5_path_tmp):
                    os.remove(hdf5_path_tmp)
                # create tmp file
                HDF5.create_song_file(hdf5_path_tmp)
                h5 = HDF5.open_h5_file_append(hdf5_path_tmp)
                HDF5.fill_hdf5_from_artist(h5,artist)
                HDF5.fill_hdf5_from_song(h5,song)
                HDF5.fill_hdf5_from_track(h5,track)
                if mbconnect is not None:
                    HDF5.fill_hdf5_from_musicbrainz(h5,mbconnect)
                # TODO billboard? lastfm? ...?
                h5.close()
            except KeyboardInterrupt:
                close_creation()
                raise
            # we dont panic, delete file, wait and retry
            except Exception as e:
                # close hdf5
                try:
                    h5.close()
                except NameError,ValueError:
                    pass
                # delete path
                try:
                    os.remove( hdf5_path_tmp )
                except IOError:
                    pass
                # print and wait
                print 'ERROR creating track:',trackid,'on',time.ctime(),'(pid='+str(os.getpid())+')'
                print e
                if try_cnt < 100:
                    print '(try again in',SLEEPTIME,'seconds)'
                    time.sleep(SLEEPTIME)
                    continue
                # give up
                else:
                    print 'we give up after',try_cnt,'tries'
                    release_lock_track(trackid)
                    return False
            # move tmp file to real file
            shutil.move(hdf5_path_tmp, hdf5_path)
            # release lock
            release_lock_track(trackid)
            break
    # KeyboardInterrupt, we delete file, clean things up
    except KeyboardInterrupt:
        # close hdf5
        try:
            h5.close()
        except NameError,ValueError:
            pass
        # delete path
        try:
            if os.path.isfile( hdf5_path_tmp ):
                os.remove( hdf5_path_tmp )
            if os.path.isfile( hdf5_path ):
                os.remove( hdf5_path )
        except IOError:
            pass
        raise