Пример #1
0
def add_data_to_artists_rel_table(con, data, song_index, artist_id):
    command = "INSERT INTO ArtistsRel (artist1_id, artist2_id) VALUES (?, ?);"
    similar_artists = h5.get_similar_artists(data, song_index)
    cursor = con.cursor()
    cursor.executemany(command, [(artist_id, similar_artist)
                                 for similar_artist in similar_artists])
    con.commit()
def get_fields(files):
    tracks = []
    counts = {}
    field_counts = []
    for file in files:
        h5 = hdf5_getters.open_h5_file_read(file)
        t = {}
        t['artist_familiarity'] = hdf5_getters.get_artist_familiarity(
            h5)  # estimation
        t['artist_hotttnesss'] = hdf5_getters.get_artist_hotttnesss(
            h5)  # estimation
        t['artist_name'] = hdf5_getters.get_artist_name(h5)  # artist name
        t['release'] = hdf5_getters.get_release(h5)  # album name
        t['title'] = hdf5_getters.get_title(h5)  # title
        t['len_similar_artists'] = len(
            hdf5_getters.get_similar_artists(h5))  # number of similar artists
        t['analysis_sample_rate'] = hdf5_getters.get_analysis_sample_rate(
            h5)  # sample rate of the audio used ?????????
        t['duration'] = hdf5_getters.get_duration(h5)  # seconds
        t['key'] = hdf5_getters.get_key(h5)  # key the song is in
        t['key_confidence'] = hdf5_getters.get_key_confidence(
            h5)  # confidence measure
        t['loudness'] = hdf5_getters.get_loudness(h5)  # overall loudness in dB
        t['mode_confidence'] = hdf5_getters.get_mode_confidence(
            h5)  # confidence measure
        t['start_of_fade_out'] = hdf5_getters.get_start_of_fade_out(
            h5)  # time in sec
        t['tempo'] = hdf5_getters.get_tempo(h5)  # estimated tempo in BPM
        t['time_signature'] = hdf5_getters.get_time_signature(
            h5)  # estimate of number of beats per bar, e.g. 4
        t['year'] = hdf5_getters.get_year(
            h5)  # song release year from MusicBrainz or 0

        timbre = hdf5_getters.get_segments_timbre(
            h5)  # 2D float array, texture features (MFCC+PCA-like)
        t['segments_timbre'] = timbre
        t['timbre_avg'] = timbre.mean(axis=0)  # list of 12 averages
        cov_mat_timbre = np.cov(timbre, rowvar=False)
        cov_timbre = []
        for i in range(len(cov_mat_timbre)):
            for j in range(len(cov_mat_timbre) - i):
                cov_timbre.append(cov_mat_timbre[i][j])
        t['timbre_cov'] = cov_timbre  # list of 78 covariances

        pitch = hdf5_getters.get_segments_pitches(
            h5)  # 2D float array, chroma feature, one value per note
        t['segments_pitch'] = pitch
        t['pitch_avg'] = pitch.mean(axis=0)  # list of 12 averages
        cov_mat_pitch = np.cov(pitch, rowvar=False)
        cov_pitch = []
        for i in range(len(cov_mat_pitch)):
            for j in range(len(cov_mat_pitch) - i):
                cov_pitch.append(cov_mat_timbre[i][j])
        t['pitch_cov'] = cov_pitch  # list of 78 covariances

        # seg_pitch = hdf5_getters.get_segments_pitches(h5)  # 2D float array, chroma feature, one value per note
        # print(seg_pitch.shape)

        # t['artist_latitude'] = hdf5_getters.get_artist_latitude(h5)  # float, ????????????????????????????????????????
        # t['artist_longitude'] = hdf5_getters.get_artist_longitude(h5)  # float, ??????????????????????????????????????
        # t['artist_location'] = hdf5_getters.get_artist_location(h5)  # location name
        # t['song_hotttnesss'] = hdf5_getters.get_song_hotttnesss(h5)  # estimation
        # t['danceability'] = hdf5_getters.get_danceability(h5)  # estimation
        # t['end_of_fade_in'] = hdf5_getters.get_end_of_fade_in(h5)  # seconds at the beginning of the song
        # t['energy'] = hdf5_getters.get_energy(h5)  # energy from listener point of view
        # t['mode'] = hdf5_getters.get_mode(h5)  # major or minor
        # t['time_signature_confidence'] = hdf5_getters.get_time_signature_confidence(h5)  # confidence measure
        # t['artist_mbtags_count'] = len(hdf5_getters.get_artist_mbtags_count(h5))  # array int, tag counts for musicbrainz tags
        # bad types or non arithmatic numbers
        '''
        # t['audio_md5'] = hdf5_getters.get_audio_md5(h5)  # hash code of the audio used for the analysis by The Echo Nest
        # t['artist_terms_weight'] = hdf5_getters.get_artist_terms_weight(h5)  # array float, echonest tags weight ?????
        # t['artist_terms_freq'] = hdf5_getters.get_artist_terms_freq(h5)  # array float, echonest tags freqs ??????????
        # t['artist_terms'] = hdf5_getters.get_artist_terms(h5)  # array string, echonest tags ?????????????????????????
        # t['artist_id'] = hdf5_getters.get_artist_id(h5)  # echonest id
        # t['artist_mbid'] = hdf5_getters.get_artist_mbid(h5)  # musicbrainz id
        # t['artist_playmeid'] = hdf5_getters.get_artist_playmeid(h5)  # playme id
        # t['artist_7digitalid'] = hdf5_getters.get_artist_7digitalid(h5)  # 7digital id
        # t['release_7digitalid'] = hdf5_getters.get_release_7digitalid(h5)  # 7digital id
        # t['song_id'] = hdf5_getters.get_song_id(h5)  # echonest id
        # t['track_7digitalid'] = hdf5_getters.get_track_7digitalid(h5)  # 7digital id
        # t['similar_artists'] = hdf5_getters.get_similar_artists(h5)  # string array of sim artist ids
        # t['track_id'] = hdf5_getters.get_track_id(h5)  # echonest track id
        # t['segments_start'] = hdf5_getters.get_segments_start(h5)  # array floats, musical events, ~ note onsets
        # t['segments_confidence'] = hdf5_getters.get_segments_confidence(h5)  # array floats, confidence measure
        # t['segments_pitches'] = hdf5_getters.get_segments_pitches(h5)  # 2D float array, chroma feature, one value per note
        # t['segments_timbre'] = hdf5_getters.get_segments_timbre(h5)  # 2D float array, texture features (MFCC+PCA-like)
        # t['segments_loudness_max'] = hdf5_getters.get_segments_loudness_max(h5)  # float array, max dB value
        # t['segments_loudness_max_time'] = hdf5_getters.get_segments_loudness_max_time(h5)  # float array, time of max dB value, i.e. end of attack
        # t['segments_loudness_start'] = hdf5_getters.get_segments_loudness_start(h5)  # array float, dB value at onset
        # t['sections_start'] = hdf5_getters.get_sections_start(h5)  # array float, largest grouping in a song, e.g. verse
        # t['sections_confidence'] = hdf5_getters.get_sections_confidence(h5)  # array float, confidence measure
        # t['beats_start'] = hdf5_getters.get_beats_start(h5)  # array float, result of beat tracking
        # t['beats_confidence'] = hdf5_getters.get_beats_confidence(h5)  # array float, confidence measure
        # t['bars_start'] = hdf5_getters.get_bars_start(h5)  # array float, beginning of bars, usually on a beat
        # t['bars_confidence'] = hdf5_getters.get_bars_confidence(h5)  # array float, confidence measure
        # t['tatums_start'] = hdf5_getters.get_tatums_start(h5)  # array float, smallest rythmic element
        # t['tatums_confidence'] = hdf5_getters.get_tatums_confidence(h5)  # array float, confidence measure
        # t['artist_mbtags'] = hdf5_getters.get_artist_mbtags(h5)  # array string, tags from musicbrainz.org 
        '''
        h5.close()

        for key, value in t.items():
            if isinstance(value, float) and math.isnan(value):
                pass
            if type(value) is np.ndarray:
                if key in counts.keys():
                    counts[key] += 1
                else:
                    counts[key] = 1
            elif value:
                if key in counts.keys():
                    counts[key] += 1
                else:
                    counts[key] = 1
            elif key not in counts.keys():
                counts[key] = 0

        count = 0
        for key, value in t.items():
            if isinstance(value, float) and math.isnan(value):
                pass
            elif type(value) is np.ndarray:
                count += 1
            elif value:
                count += 1
        field_counts.append(count)

        # progress bar
        if num_of_tracks >= 100:
            i = files.index(file) + 1
            scale = num_of_tracks / 100
            if i % math.ceil(len(files) * .05) == 0:
                sys.stdout.write('\r')
                # the exact output you're looking for:
                sys.stdout.write("Loading dataframe: [%-100s] %d%%" %
                                 ('=' * int(i // scale), 1 / scale * i))
                sys.stdout.flush()
                time.sleep(.01)

        tracks.append(t)
    print()
    return tracks, counts, field_counts
Пример #3
0
import os
import hdf5_getters
rootdir = '../MillionSongSubset/data/A/A/A'
for subdir, dirs, files in os.walk(rootdir):
   for file in files:
      if file.endswith(".h5"):
         h5 = hdf5_getters.open_h5_file_read(subdir+"/"+file)
         similar = hdf5_getters.get_similar_artists(h5)
         for i in range (0,99):
            print similar[i]
         h5.close()
def writeSingleHDF5FileToTxtFile(songHDF5FileName):
    global maximumArtistNameLen
    global maximumArtistTagLen
    global maximumSongNameLen
    global maximumAlbumNameLen
    """
    This function does 3 simple things:
    - open the song file
    - get artist ID and put it
    - close the file
    """
    songHDF5File = GETTERS.open_h5_file_read(songHDF5FileName)

    songID = GETTERS.get_song_id(songHDF5File)
    songName = GETTERS.get_title(songHDF5File)
    artistID = GETTERS.get_artist_id(songHDF5File)
    songAlbum = GETTERS.get_release(songHDF5File)
    songYear = GETTERS.get_year(songHDF5File)
    songTempo = GETTERS.get_tempo(songHDF5File)
    songDanceability = GETTERS.get_danceability(songHDF5File)
    songDuration = GETTERS.get_duration(songHDF5File)
    songEnergy = GETTERS.get_energy(songHDF5File)
    songKey = GETTERS.get_key(songHDF5File)
    songLoudness = GETTERS.get_loudness(songHDF5File)
    songMode = GETTERS.get_mode(songHDF5File)
    songTimeSignature = GETTERS.get_time_signature(songHDF5File)

    songsTableFile.write(songID + "\t" + songName + "\t" + artistID + "\t" +
                         songAlbum + "\t" + str(songYear) + "\t" +
                         str(songTempo) + "\t" + str(songDanceability) + "\t" +
                         str(songDuration) + "\t" + str(songEnergy) + "\t" +
                         str(songKey) + "\t" + str(songLoudness) + "\t" +
                         str(songMode) + "\t" + str(songTimeSignature) +
                         "\t\n")

    artistName = GETTERS.get_artist_name(songHDF5File)
    artistFamiliarity = GETTERS.get_artist_familiarity(songHDF5File)
    artistTagsArray = GETTERS.get_artist_mbtags(songHDF5File)

    artistsTableFile.write(artistID + "\t" + artistName + "\t" +
                           str(artistFamiliarity) + "\t\n")

    if len(songName) > maximumSongNameLen:
        maximumSongNameLen = len(songName)

    if len(songAlbum) > maximumAlbumNameLen:
        maximumAlbumNameLen = len(songAlbum)

    if len(artistName) > maximumArtistNameLen:
        maximumArtistNameLen = len(artistName)

    for artistTag in artistTagsArray:
        if artistTag in allowedTagsSet:

            artistsTagsTableFile.write(artistID + "\t" + artistTag + "\t\n")
            if artistTag not in tagsSet:
                tagsTableFile.write(artistTag + "\t\n")
                tagsSet.add(artistTag)
            if len(artistTag) > maximumArtistTagLen:
                maximumArtistTagLen = len(artistTag)

    similarArtists = GETTERS.get_similar_artists(songHDF5File)

    for similarArtist in similarArtists:
        similarArtistsPairsList.add((artistID, similarArtist))

    artistsIDsSet.add(artistID)
    artistsNamesSet.add(artistName)

    songHDF5File.close()
Пример #5
0
                mode = GETTERS.get_mode(h5, i)
                mode_confidence = GETTERS.get_mode_confidence(h5, i)
                release = GETTERS.get_release(h5, i)
                release_7digitalid = GETTERS.get_release_7digitalid(h5, i)
                #sections_confidence = ','.join(str(e) for e in GETTERS.get_sections_confidence(h5, i)) # array
                #sections_start = ','.join(str(e) for e in GETTERS.get_sections_start(h5, i)) # array
                #segments_confidence = ','.join(str(e) for e in GETTERS.get_segments_confidence(h5, i)) # array
                #segments_loudness_max = ','.join(str(e) for e in GETTERS.get_segments_loudness_max(h5, i)) # array
                #segments_loudness_max_time = ','.join(str(e) for e in GETTERS.get_segments_loudness_max_time(h5, i)) # array
                #segments_loudness_start = ','.join(str(e) for e in GETTERS.get_segments_loudness_start(h5, i)) # array
                #segments_pitches = ','.join(str(e) for e in GETTERS.get_segments_pitches(h5, i)) # array
                #segments_start = ','.join(str(e) for e in GETTERS.get_segments_start(h5, i)) # array
                #segments_timbre = ','.join(str(e) for e in GETTERS.get_segments_timbre(h5, i)) # array
                similar_artists = ','.join(
                    str(e)
                    for e in GETTERS.get_similar_artists(h5, i))  # array
                song_hotttnesss = GETTERS.get_song_hotttnesss(h5, i)
                song_id = GETTERS.get_song_id(h5, i)
                start_of_fade_out = GETTERS.get_start_of_fade_out(h5, i)
                #tatums_confidence = ','.join(str(e) for e in GETTERS.get_tatums_confidence(h5, i)) # array
                #tatums_start = ','.join(str(e) for e in GETTERS.get_tatums_start(h5, i)) # array
                tempo = GETTERS.get_tempo(h5, i)
                time_signature = GETTERS.get_time_signature(h5, i)
                time_signature_confidence = GETTERS.get_time_signature_confidence(
                    h5, i)
                title = GETTERS.get_title(h5, i)
                track_7digitalid = GETTERS.get_track_7digitalid(h5, i)
                track_id = GETTERS.get_track_id(h5, i)
                year = GETTERS.get_year(h5, i)
                loops += 1
Пример #6
0
def fill_attributes(song, songH5File):

    #----------------------------non array attributes-------------------------------
    song.analysisSampleRate = str(
        hdf5_getters.get_analysis_sample_rate(songH5File))
    song.artistDigitalID = str(hdf5_getters.get_artist_7digitalid(songH5File))
    song.artistFamiliarity = str(
        hdf5_getters.get_artist_familiarity(songH5File))
    song.artistHotness = str(hdf5_getters.get_artist_hottness(songH5File))
    song.artistID = str(hdf5_getters.get_artist_id(songH5File))
    song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File))
    song.artistLocation = str(hdf5_getters.get_artist_location(songH5File))
    song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File))
    song.artistmbID = str(hdf5_getters.get_artist_mbid(songH5File))
    song.artistName = str(hdf5_getters.get_artist_name(songH5File))
    song.artistPlayMeID = str(hdf5_getters.get_artist_playmeid(songH5File))
    song.audioMD5 = str(hdf5_getters.get_audio_md5(songH5File))
    song.danceability = str(hdf5_getters.get_danceability(songH5File))
    song.duration = str(hdf5_getters.get_duration(songH5File))
    song.endOfFadeIn = str(hdf5_getters.get_end_of_fade_in(songH5File))
    song.energy = str(hdf5_getters.get_energy(songH5File))
    song.key = str(hdf5_getters.get_key(songH5File))
    song.keyConfidence = str(hdf5_getters.get_key_confidence(songH5File))
    song.segementsConfidence = str(
        hdf5_getters.get_segments_confidence(songH5File))
    song.segementsConfidence = str(
        hdf5_getters.get_sections_confidence(songH5File))
    song.loudness = str(hdf5_getters.get_loudness(songH5File))
    song.mode = str(hdf5_getters.get_mode(songH5File))
    song.modeConfidence = str(hdf5_getters.get_mode_confidence(songH5File))
    song.release = str(hdf5_getters.get_release(songH5File))
    song.releaseDigitalID = str(
        hdf5_getters.get_release_7digitalid(songH5File))
    song.songHotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File))
    song.startOfFadeOut = str(hdf5_getters.get_start_of_fade_out(songH5File))
    song.tempo = str(hdf5_getters.get_tempo(songH5File))
    song.timeSignature = str(hdf5_getters.get_time_signature(songH5File))
    song.timeSignatureConfidence = str(
        hdf5_getters.get_time_signature_confidence(songH5File))
    song.title = str(hdf5_getters.get_title(songH5File))
    song.trackID = str(hdf5_getters.get_track_id(songH5File))
    song.trackDigitalID = str(hdf5_getters.get_track_7digitalid(songH5File))
    song.year = str(hdf5_getters.get_year(songH5File))

    #-------------------------------array attributes--------------------------------------
    #array float
    song.beatsStart_mean, song.beatsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_beats_start(songH5File))
    #array float
    song.artistTermsFreq_mean, song.artistTermsFreq_var = convert_array_to_meanvar(
        hdf5_getters.get_artist_terms_freq(songH5File))
    #array float
    song.artistTermsWeight_mean, song.artistTermsWeight_var = convert_array_to_meanvar(
        hdf5_getters.get_artist_terms_weight(songH5File))
    #array int
    song.artistmbTagsCount_mean, song.artistmbTagsCount_var = convert_array_to_meanvar(
        hdf5_getters.get_artist_mbtags_count(songH5File))
    #array float
    song.barsConfidence_mean, song.barsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_bars_confidence(songH5File))
    #array float
    song.barsStart_mean, song.barsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_bars_start(songH5File))
    #array float
    song.beatsConfidence_mean, song.beatsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_beats_confidence(songH5File))
    #array float
    song.sectionsConfidence_mean, song.sectionsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_sections_confidence(songH5File))
    #array float
    song.sectionsStart_mean, song.sectionsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_sections_start(songH5File))
    #array float
    song.segmentsConfidence_mean, song.segmentsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_confidence(songH5File))
    #array float
    song.segmentsLoudness_mean, song.segmentsLoudness_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_loudness_max(songH5File))
    #array float
    song.segmentsLoudnessMaxTime_mean, song.segmentsLoudnessMaxTime_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_loudness_max_time(songH5File))
    #array float
    song.segmentsLoudnessMaxStart_mean, song.segmentsLoudnessMaxStart_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_loudness_start(songH5File))
    #array float
    song.segmentsStart_mean, song.segmentsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_segments_start(songH5File))
    #array float
    song.tatumsConfidence_mean, song.tatumsConfidence_var = convert_array_to_meanvar(
        hdf5_getters.get_tatums_confidence(songH5File))
    #array float
    song.tatumsStart_mean, song.tatumsStart_var = convert_array_to_meanvar(
        hdf5_getters.get_tatums_start(songH5File))
    #array2d float
    song.segmentsTimbre_mean, song.segmentsTimbre_var = covert_2darray_to_meanvar(
        hdf5_getters.get_segments_timbre(songH5File))
    #array2d float
    song.segmentsPitches_mean, song.segmentsPitches_var = covert_2darray_to_meanvar(
        hdf5_getters.get_segments_pitches(songH5File))

    #------------------------array string attributes------------------------
    song.similarArtists = convert_array_to_string(
        hdf5_getters.get_similar_artists(songH5File))  #array string
    song.artistTerms = convert_array_to_string(
        hdf5_getters.get_artist_terms(songH5File))  #array string
    song.artistmbTags = convert_array_to_string(
        hdf5_getters.get_artist_mbtags(songH5File))  #array string

    return song
Пример #7
0
def main():
    outputFile1 = open('SongCSV.csv', 'w')
    csvRowString = ""

    #################################################
    #if you want to prompt the user for the order of attributes in the csv,
    #leave the prompt boolean set to True
    #else, set 'prompt' to False and set the order of attributes in the 'else'
    #clause
    prompt = False
    #################################################
    if prompt == True:
        while prompt:

            prompt = False

            csvAttributeString = raw_input("\n\nIn what order would you like the colums of the CSV file?\n" +
                "Please delineate with commas. The options are: " +
                "AlbumName, AlbumID, ArtistID, ArtistLatitude, ArtistLocation, ArtistLongitude,"+
                " ArtistName, Danceability, Duration, KeySignature, KeySignatureConfidence, Tempo," +
                " SongID, TimeSignature, TimeSignatureConfidence, Title, and Year.\n\n" +
                "For example, you may write \"Title, Tempo, Duration\"...\n\n" +
                "...or exit by typing 'exit'.\n\n")

            csvAttributeList = re.split('\W+', csvAttributeString)
            for i, v in enumerate(csvAttributeList):
                csvAttributeList[i] = csvAttributeList[i].lower()

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"


                if attribute == 'AlbumID'.lower():
                    csvRowString += 'AlbumID'
                elif attribute == 'AlbumName'.lower():
                    csvRowString += 'AlbumName'
                elif attribute == 'ArtistID'.lower():
                    csvRowString += 'ArtistID'
                elif attribute == 'ArtistLatitude'.lower():
                    csvRowString += 'ArtistLatitude'
                elif attribute == 'ArtistLocation'.lower():
                    csvRowString += 'ArtistLocation'
                elif attribute == 'ArtistLongitude'.lower():
                    csvRowString += 'ArtistLongitude'
                elif attribute == 'ArtistName'.lower():
                    csvRowString += 'ArtistName'
                elif attribute == 'Danceability'.lower():
                    csvRowString += 'Danceability'
                elif attribute == 'Duration'.lower():
                    csvRowString += 'Duration'
                elif attribute == 'ArtistHotttnesss'.lower():
                    csvRowString += 'ArtistHotttnesss'
                elif attribute == 'KeySignature'.lower():
                    csvRowString += 'KeySignature'
                elif attribute == 'KeySignatureConfidence'.lower():
                    csvRowString += 'KeySignatureConfidence'
                elif attribute == 'SongID'.lower():
                    csvRowString += "SongID"
                elif attribute == 'SongHotttnesss'.lower():
                    csvRowString += 'SongHotttnesss'
                elif attribute == 'Tempo'.lower():
                    csvRowString += 'Tempo'
                elif attribute == 'Loudness'.lower():
                    csvRowString += 'Loudness'
                elif attribute == 'Energy'.lower():
                    csvRowString += 'Energy'
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += 'TimeSignature'
                elif attribute == 'TimeSignatureConfidence'.lower():
                    csvRowString += 'TimeSignatureConfidence'
                elif attribute == 'Title'.lower():
                    csvRowString += 'Title'
                elif attribute == 'Year'.lower():
                    csvRowString += 'Year'
                elif attribute == 'Exit'.lower():
                    sys.exit()
                else:
                    prompt = True
                    print "=============="
                    print "I believe there has been an error with the input."
                    print "=============="
                    break

                csvRowString += ","

            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex-1]
            csvRowString += "\n"
            outputFile1.write(csvRowString);
            csvRowString = ""
    #else, if you want to hard code the order of the csv file and not prompt
    #the user, 
    else:
        #################################################
        #change the order of the csv file here
        #Default is to list all available attributes (in alphabetical order)
        csvRowString = ("SongID,AlbumID,AlbumName,ArtistID,ArtistHotttnesss,ArtistFamiliarity,ArtistLatitude,ArtistLocation,"+
            "ArtistLongitude,ArtistName,Danceability,Duration,KeySignature,"+
            "KeySignatureConfidence,Tempo,TimeSignature,TimeSignatureConfidence,"+
            "Title,SongHotttnesss,Loudness,Energy,Year,SimilarArtists,Genre,Audio")
        #################################################

        csvAttributeList = re.split('\W+', csvRowString)
        for i, v in enumerate(csvAttributeList):
            csvAttributeList[i] = csvAttributeList[i].lower()
        outputFile1.write("SongNumber,");
        outputFile1.write(csvRowString + "\n");
        csvRowString = ""  

    #################################################

    #TODO Enter base folder here
    #Set the basedir here, the root directory from which the search
    #for files stored in a (hierarchical data structure) will originate
    basedir = "C:\Users\NadavSpitzer\Documents\מדמח\שנה ג'\סמסטר א'\סדנה במסדי נתונים\DB\MillionSongSubset\data" # "." As the default means the current directory
    ext = ".h5" #Set the extension here. H5 is the extension for HDF5 files.
    #################################################

    #FOR LOOP
    for root, dirs, files in os.walk(basedir):        
        files = glob.glob(os.path.join(root,'*'+ext))
        for f in files:
            print f

            songH5File = hdf5_getters.open_h5_file_read(f)
            song = Song(str(hdf5_getters.get_song_id(songH5File)))

            testDanceability = hdf5_getters.get_danceability(songH5File)
            #print type(testDanceability)
            #print ("Here is the danceability: ") + str(testDanceability)

            song.artistID = str(hdf5_getters.get_artist_id(songH5File))
            song.albumID = str(hdf5_getters.get_release_7digitalid(songH5File))
            song.albumName = str(hdf5_getters.get_release(songH5File))
            song.artistLatitude = str(hdf5_getters.get_artist_latitude(songH5File))
            song.artistLocation = str(hdf5_getters.get_artist_location(songH5File))
            song.artistLongitude = str(hdf5_getters.get_artist_longitude(songH5File))
            song.artistName = str(hdf5_getters.get_artist_name(songH5File))
            song.danceability = str(hdf5_getters.get_danceability(songH5File))
            song.duration = str(hdf5_getters.get_duration(songH5File))
            song.artistHotttnesss = str(hdf5_getters.get_artist_hotttnesss(songH5File))
            song.keySignature = str(hdf5_getters.get_key(songH5File))
            song.keySignatureConfidence = str(hdf5_getters.get_key_confidence(songH5File))
            song.lyrics = None
            song.popularity = None
            song.tempo = str(hdf5_getters.get_tempo(songH5File))
            song.timeSignature = str(hdf5_getters.get_time_signature(songH5File))
            song.timeSignatureConfidence = str(hdf5_getters.get_time_signature_confidence(songH5File))
            song.songHotttnesss = str(hdf5_getters.get_song_hotttnesss(songH5File))
            song.title = str(hdf5_getters.get_title(songH5File))
            song.year = str(hdf5_getters.get_year(songH5File))
            song.setGenreList(songH5File)
            song.songLoudness = str(hdf5_getters.get_loudness(songH5File))
            song.songEnergy = str(hdf5_getters.get_energy(songH5File))
            song.artistFmiliarity = str(hdf5_getters.get_artist_familiarity(songH5File))
            song.similarArtists = hdf5_getters.get_similar_artists(songH5File)
            song.audio = hdf5_getters.get_audio_md5(songH5File)

            #print song count
            csvRowString += str(song.songCount) + ","

            for attribute in csvAttributeList:
                # print "Here is the attribute: " + attribute + " \n"

                if attribute == 'AlbumID'.lower():
                    csvRowString += song.albumID
                elif attribute == 'AlbumName'.lower():
                    albumName = song.albumName
                    albumName = albumName.replace(',',"")
                    csvRowString += "\"" + albumName + "\""
                elif attribute == 'ArtistID'.lower():
                    csvRowString += "\"" + song.artistID + "\""
                elif attribute == 'ArtistLatitude'.lower():
                    latitude = song.artistLatitude
                    if latitude == 'nan':
                        latitude = ''
                    csvRowString += latitude
                elif attribute == 'ArtistLocation'.lower():
                    location = song.artistLocation
                    location = location.replace(',','')
                    csvRowString += "\"" + location + "\""
                elif attribute == 'ArtistLongitude'.lower():
                    longitude = song.artistLongitude
                    if longitude == 'nan':
                        longitude = ''
                    csvRowString += longitude                
                elif attribute == 'ArtistName'.lower():
                    csvRowString += "\"" + song.artistName + "\""                
                elif attribute == 'Danceability'.lower():
                    csvRowString += song.danceability
                elif attribute == 'Duration'.lower():
                    csvRowString += song.duration
                elif attribute == 'ArtistHotttnesss'.lower():
                    csvRowString += song.artistHotttnesss
                elif attribute == 'ArtistFamiliarity'.lower():
                    csvRowString += song.artistFmiliarity
                elif attribute == 'KeySignature'.lower():
                    csvRowString += song.keySignature
                elif attribute == 'KeySignatureConfidence'.lower():
                    # print "key sig conf: " + song.timeSignatureConfidence                                 
                    csvRowString += song.keySignatureConfidence
                elif attribute == 'SongID'.lower():
                    csvRowString += "\"" + song.id + "\""
                elif attribute == 'SongHotttnesss'.lower():
                    csvRowString += song.songHotttnesss
                elif attribute == 'Tempo'.lower():
                    # print "Tempo: " + song.tempo
                    csvRowString += song.tempo
                elif attribute == 'Loudness'.lower():
                    csvRowString += song.songLoudness
                elif attribute == 'Energy'.lower():
                    csvRowString += song.songEnergy
                elif attribute == 'TimeSignature'.lower():
                    csvRowString += song.timeSignature
                elif attribute == 'TimeSignatureConfidence'.lower():
                    # print "time sig conf: " + song.timeSignatureConfidence                                   
                    csvRowString += song.timeSignatureConfidence
                elif attribute == 'Title'.lower():
                    csvRowString += "\"" + song.title + "\""
                elif attribute == 'Year'.lower():
                    csvRowString += song.year
                elif attribute == 'SimilarArtists'.lower():
                    csvRowString += "\""
                    for s in song.similarArtists:
                        csvRowString += "\"\"" + str(s) + "\"\"" + ","
                    csvRowString = csvRowString.rstrip(",")
                    csvRowString += "\""
                elif attribute == 'Genre'.lower():
                    csvRowString += "\""
                    for g in song.genreList:
                        csvRowString += "\"\"" + str(g) + "\"\"" + ","
                    csvRowString = csvRowString.rstrip(",")
                    csvRowString += "\""
                elif attribute == 'Audio'.lower():
                    csvRowString += song.audio
                else:
                    csvRowString += "Erm. This didn't work. Error. :( :(\n"

                csvRowString += ","

            #Remove the final comma from each row in the csv
            lastIndex = len(csvRowString)
            csvRowString = csvRowString[0:lastIndex-1]
            csvRowString += "\n"
            outputFile1.write(csvRowString)
            csvRowString = ""

            songH5File.close()

    outputFile1.close()
            song.endOfFadeIn = remove_trap_characters(
                str(hdf5_getters.get_end_of_fade_in(songH5File)))
            song.startOfFadeOut = remove_trap_characters(
                str(hdf5_getters.get_start_of_fade_out(songH5File)))
            song.energy = remove_trap_characters(
                str(hdf5_getters.get_energy(songH5File)))
            song.release = remove_trap_characters(
                str(hdf5_getters.get_release(songH5File)))
            song.release7digitalid = remove_trap_characters(
                str(hdf5_getters.get_release_7digitalid(songH5File)))
            song.songHotness = remove_trap_characters(
                str(hdf5_getters.get_song_hotttnesss(songH5File)))
            song.track7digitalid = remove_trap_characters(
                str(hdf5_getters.get_track_7digitalid(songH5File)))

            temp = hdf5_getters.get_similar_artists(songH5File)
            song.similarartists = remove_trap_characters(str(list(list(temp))))
            song.similarArtistsCount = get_list_length(temp)
            song.loudness = remove_trap_characters(
                str(hdf5_getters.get_loudness(songH5File)))
            song.mode = remove_trap_characters(
                str(hdf5_getters.get_mode(songH5File)))
            song.modeConfidence = remove_trap_characters(
                str(hdf5_getters.get_mode_confidence(songH5File)))
            song.artistName = remove_trap_characters(
                str(hdf5_getters.get_artist_name(songH5File)))
            song.danceability = remove_trap_characters(
                str(hdf5_getters.get_danceability(songH5File)))
            song.duration = remove_trap_characters(
                str(hdf5_getters.get_duration(songH5File)))
            song.keySignature = remove_trap_characters(
Пример #9
0
                            row += [h5.get_artist_terms_freq(ds)]
                            row += [h5.get_artist_terms_weight(ds)]
                            row += [h5.get_danceability(ds)]
                            row += [h5.get_energy(ds)]
                            row += [h5.get_key(ds)]
                            row += [h5.get_mode(ds)]
                            row += [h5.get_loudness(ds)]
                            row += [
                                parent_folder + '/' + sub_folder + '/' +
                                child_folder + '/'
                            ]
                            row += [file]

                            row += [h5.get_duration(ds)]
                            row += [h5.get_artist_familiarity(ds)]
                            row += [h5.get_similar_artists(ds)]
                            row += [h5.get_artist_id(ds)]
                            row += [h5.get_title(ds)]
                            row += [h5.get_song_hotttnesss(ds)]
                            row += [h5.get_year(ds)]
                            row += [h5.get_artist_latitude(ds)]
                            row += [h5.get_artist_longitude(ds)]
                            row += [
                                get_midi_name_from_matched(
                                    file[:-3], matched_scores)
                            ]

                            ds.close()

                            csv_writer.writerow(row)
Пример #10
0
            key_confidence = GETTERS.get_key_confidence(h5, i)
            loudness = GETTERS.get_loudness(h5, i)
            mode = GETTERS.get_mode(h5, i)
            mode_confidence = GETTERS.get_mode_confidence(h5, i)
            release = GETTERS.get_release(h5, i)
            release_7digitalid = GETTERS.get_release_7digitalid(h5, i)
            #sections_confidence = ','.join(str(e) for e in GETTERS.get_sections_confidence(h5, i)) # array
            #sections_start = ','.join(str(e) for e in GETTERS.get_sections_start(h5, i)) # array
            #segments_confidence = ','.join(str(e) for e in GETTERS.get_segments_confidence(h5, i)) # array
            #segments_loudness_max = ','.join(str(e) for e in GETTERS.get_segments_loudness_max(h5, i)) # array
            #segments_loudness_max_time = ','.join(str(e) for e in GETTERS.get_segments_loudness_max_time(h5, i)) # array
            #segments_loudness_start = ','.join(str(e) for e in GETTERS.get_segments_loudness_start(h5, i)) # array
            #segments_pitches = ','.join(str(e) for e in GETTERS.get_segments_pitches(h5, i)) # array
            #segments_start = ','.join(str(e) for e in GETTERS.get_segments_start(h5, i)) # array
            #segments_timbre = ','.join(str(e) for e in GETTERS.get_segments_timbre(h5, i)) # array
            similar_artists = ','.join(str(e) for e in GETTERS.get_similar_artists(h5, i)) # array
            song_hotttnesss = GETTERS.get_song_hotttnesss(h5, i)
            song_id = GETTERS.get_song_id(h5, i)
            start_of_fade_out = GETTERS.get_start_of_fade_out(h5, i)
            #tatums_confidence = ','.join(str(e) for e in GETTERS.get_tatums_confidence(h5, i)) # array
            #tatums_start = ','.join(str(e) for e in GETTERS.get_tatums_start(h5, i)) # array
            tempo = GETTERS.get_tempo(h5, i)
            time_signature = GETTERS.get_time_signature(h5, i)
            time_signature_confidence = GETTERS.get_time_signature_confidence(h5, i)
            title = GETTERS.get_title(h5, i)
            track_7digitalid = GETTERS.get_track_7digitalid(h5, i)
            track_id = GETTERS.get_track_id(h5, i)
            year = GETTERS.get_year(h5, i)
            loops += 1

def hd5_single_random_file_parser():
    # Open an h5 file in read mode
    h5 = hdf5_getters.open_h5_file_read(
        '/home/skalogerakis/Documents/MillionSong/MillionSongSubset/A/M/G/TRAMGDX12903CEF79F.h5'
    )

    function_tracker = filter(
        lambda x: x.startswith('get'),
        hdf5_getters.__dict__.keys())  # Detects all the getter functions

    for f in function_tracker:  # Print everything in function tracker
        print(f)

    # First effort to check what each field contains.
    print()  # 55 available fields (exluding number of songs fields)
    print("Num of songs -- ",
          hdf5_getters.get_num_songs(h5))  # One song per file
    print("Title -- ",
          hdf5_getters.get_title(h5))  # Print the title of a specific h5 file
    print("Artist familiarity -- ", hdf5_getters.get_artist_familiarity(h5))
    print("Artist hotness -- ", hdf5_getters.get_artist_hotttnesss(h5))
    print("Artist ID -- ", hdf5_getters.get_artist_id(h5))
    print("Artist mbID -- ", hdf5_getters.get_artist_mbid(h5))
    print("Artist playmeid -- ", hdf5_getters.get_artist_playmeid(h5))
    print("Artist 7DigitalID -- ", hdf5_getters.get_artist_7digitalid(h5))
    print("Artist latitude -- ", hdf5_getters.get_artist_latitude(h5))
    print("Artist longitude -- ", hdf5_getters.get_artist_longitude(h5))
    print("Artist location -- ", hdf5_getters.get_artist_location(h5))
    print("Artist Name -- ", hdf5_getters.get_artist_name(h5))
    print("Release -- ", hdf5_getters.get_release(h5))
    print("Release 7DigitalID -- ", hdf5_getters.get_release_7digitalid(h5))
    print("Song ID -- ", hdf5_getters.get_song_id(h5))
    print("Song Hotness -- ", hdf5_getters.get_song_hotttnesss(h5))
    print("Track 7Digital -- ", hdf5_getters.get_track_7digitalid(h5))
    print("Similar artists -- ", hdf5_getters.get_similar_artists(h5))
    print("Artist terms -- ", hdf5_getters.get_artist_terms(h5))
    print("Artist terms freq -- ", hdf5_getters.get_artist_terms_freq(h5))
    print("Artist terms weight -- ", hdf5_getters.get_artist_terms_weight(h5))
    print("Analysis sample rate -- ",
          hdf5_getters.get_analysis_sample_rate(h5))
    print("Audio md5 -- ", hdf5_getters.get_audio_md5(h5))
    print("Danceability -- ", hdf5_getters.get_danceability(h5))
    print("Duration -- ", hdf5_getters.get_duration(h5))
    print("End of Fade -- ", hdf5_getters.get_end_of_fade_in(h5))
    print("Energy -- ", hdf5_getters.get_energy(h5))
    print("Key -- ", hdf5_getters.get_key(h5))
    print("Key Confidence -- ", hdf5_getters.get_key_confidence(h5))
    print("Loudness -- ", hdf5_getters.get_loudness(h5))
    print("Mode -- ", hdf5_getters.get_mode(h5))
    print("Mode Confidence -- ", hdf5_getters.get_mode_confidence(h5))
    print("Start of fade out -- ", hdf5_getters.get_start_of_fade_out(h5))
    print("Tempo -- ", hdf5_getters.get_tempo(h5))
    print("Time signature -- ", hdf5_getters.get_time_signature(h5))
    print("Time signature confidence -- ",
          hdf5_getters.get_time_signature_confidence(h5))
    print("Track ID -- ", hdf5_getters.get_track_id(h5))
    print("Segments Start -- ", hdf5_getters.get_segments_start(h5))
    print("Segments Confidence -- ", hdf5_getters.get_segments_confidence(h5))
    print("Segments Pitches -- ", hdf5_getters.get_segments_pitches(h5))
    print("Segments Timbre -- ", hdf5_getters.get_segments_timbre(h5))
    print("Segments Loudness max -- ",
          hdf5_getters.get_segments_loudness_max(h5))
    print("Segments Loudness max time-- ",
          hdf5_getters.get_segments_loudness_max_time(h5))
    print("Segments Loudness start -- ",
          hdf5_getters.get_segments_loudness_start(h5))
    print("Sections start -- ", hdf5_getters.get_sections_start(h5))
    print("Sections Confidence -- ", hdf5_getters.get_sections_confidence(h5))
    print("Beats start -- ", hdf5_getters.get_beats_start(h5))
    print("Beats confidence -- ", hdf5_getters.get_beats_confidence(h5))
    print("Bars start -- ", hdf5_getters.get_bars_start(h5))
    print("Bars confidence -- ", hdf5_getters.get_bars_confidence(h5))
    print("Tatums start -- ", hdf5_getters.get_tatums_start(h5))
    print("Tatums confidence -- ", hdf5_getters.get_tatums_confidence(h5))
    print("Artist mbtags -- ", hdf5_getters.get_artist_mbtags(h5))
    print("Artist mbtags count -- ", hdf5_getters.get_artist_mbtags_count(h5))
    print("Year -- ", hdf5_getters.get_year(h5))

    fields = ['Title', 'Artist ID']

    with open('Tester2.csv', 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile, delimiter=';')

        # writing the fields
        csv_writer.writerow(fields)

        # writing the data rows
        csv_writer.writerow(
            [hdf5_getters.get_title(h5),
             hdf5_getters.get_artist_id(h5)])

    h5.close()  # close h5 when completed in the end
Пример #12
0
def getInfo(files):
    data = []
    build_str = ''
    with open(sys.argv[1], 'r') as f:
        contents = f.read()
        c = contents.split()
    f.close()
    print("creating csv with following fields:" + contents)
    for i in c:
        build_str = build_str + i + ','
    build_str = build_str[:-1]
    build_str = build_str + '\n'
    for fil in files:
        curFile = getters.open_h5_file_read(fil)
        d2 = {}
        get_table = {'track_id': getters.get_track_id(curFile), 'segments_pitches': getters.get_segments_pitches(curFile), 'time_signature_confidence': getters.get_time_signature_confidence(curFile), 'song_hotttnesss': getters.get_song_hotttnesss(curFile), 'artist_longitude': getters.get_artist_longitude(curFile), 'tatums_confidence': getters.get_tatums_confidence(curFile), 'num_songs': getters.get_num_songs(curFile), 'duration': getters.get_duration(curFile), 'start_of_fade_out': getters.get_start_of_fade_out(curFile), 'artist_name': getters.get_artist_name(curFile), 'similar_artists': getters.get_similar_artists(curFile), 'artist_mbtags': getters.get_artist_mbtags(curFile), 'artist_terms_freq': getters.get_artist_terms_freq(curFile), 'release': getters.get_release(curFile), 'song_id': getters.get_song_id(curFile), 'track_7digitalid': getters.get_track_7digitalid(curFile), 'title': getters.get_title(curFile), 'artist_latitude': getters.get_artist_latitude(curFile), 'energy': getters.get_energy(curFile), 'key': getters.get_key(curFile), 'release_7digitalid': getters.get_release_7digitalid(curFile), 'artist_mbid': getters.get_artist_mbid(curFile), 'segments_confidence': getters.get_segments_confidence(curFile), 'artist_hotttnesss': getters.get_artist_hotttnesss(curFile), 'time_signature': getters.get_time_signature(curFile), 'segments_loudness_max_time': getters.get_segments_loudness_max_time(curFile), 'mode': getters.get_mode(curFile), 'segments_loudness_start': getters.get_segments_loudness_start(curFile), 'tempo': getters.get_tempo(curFile), 'key_confidence': getters.get_key_confidence(curFile), 'analysis_sample_rate': getters.get_analysis_sample_rate(curFile), 'bars_confidence': getters.get_bars_confidence(curFile), 'artist_playmeid': getters.get_artist_playmeid(curFile), 'artist_terms_weight': getters.get_artist_terms_weight(curFile), 'segments_start': getters.get_segments_start(curFile), 'artist_location': getters.get_artist_location(curFile), 'loudness': getters.get_loudness(curFile), 'year': getters.get_year(curFile), 'artist_7digitalid': getters.get_artist_7digitalid(curFile), 'audio_md5': getters.get_audio_md5(curFile), 'segments_timbre': getters.get_segments_timbre(curFile), 'mode_confidence': getters.get_mode_confidence(curFile), 'end_of_fade_in': getters.get_end_of_fade_in(curFile), 'danceability': getters.get_danceability(curFile), 'artist_familiarity': getters.get_artist_familiarity(curFile), 'artist_mbtags_count': getters.get_artist_mbtags_count(curFile), 'tatums_start': getters.get_tatums_start(curFile), 'artist_id': getters.get_artist_id(curFile), 'segments_loudness_max': getters.get_segments_loudness_max(curFile), 'bars_start': getters.get_bars_start(curFile), 'beats_start': getters.get_beats_start(curFile), 'artist_terms': getters.get_artist_terms(curFile), 'sections_start': getters.get_sections_start(curFile), 'beats_confidence': getters.get_beats_confidence(curFile), 'sections_confidence': getters.get_sections_confidence(curFile)}
        tid = fil.split('/')[-1].split('.')[0]
        # print(c)
        for i in c:
            if i in get_table: 
               d2[i] = get_table[i]
               d2[i] = str(d2[i]).replace('\n','')  
               build_str = build_str + d2[i] + ','
            else:
                print('error: unspecified field')
                exit(0)
        build_str = build_str[:-1]
        # print(build_str[:-1])
        build_str = build_str + '\n'
        curFile.close()
    build_str = build_str.replace('b','').replace("'",'').replace('"','')  
    return (build_str)