def mergeDictionnaries(): '''Merge temporary dictionaries into one''' print("----Merging generated dictionaries----") files = [] files = glob.glob(savePath + "*.txt") print("Nb files : " + str(len(files))) ArtistsMainDic = dict() ArtistsAllDic = dict() for file in files: filename = file.replace(".txt", "") dictTemp = LU.loadDictionary(filename, path="") if ("All" in file): ArtistsAllDic = {**ArtistsAllDic, **dictTemp} if ("Main" in file): ArtistsMainDic = {**ArtistsMainDic, **dictTemp} print("Artist dictionaries loaded. Merging...", end="") LU.saveDictionary(ArtistsAllDic, AllGenresDicName, savePath, enc="UTF-8") LU.saveDictionary(ArtistsMainDic, MainGenreDicName, savePath, enc="UTF-8") print("done.") print("Deleting temp files...") for file in files: if ("-" in file and "temp" in file): print("Deleting temp file " + file) os.remove(file) print("DONE.")
def expand(df_src, df_dst="completeWithCoordinates.csv", dic_file="locationDic"): df = pd.read_csv(df_src) if('Unnamed: 0' in df.columns): df = df.drop('Unnamed: 0', 1) dic = lu.loadDictionary(dic_file) dic["Palais-des-Congrès"] = (47.134881, 7.248004000000001) dic["Il_Caffè"] = (46.9382202, 7.787970900000001) dic["Festivalgelände"] = (47.4222173, 9.3395195) dic["Festivalgelände-am-Rotten"] = dic["Festivalgelände"] dic["Römerareal"] = (47.136266, 7.30622) addCoordinatesColumn(df, dic, replace = True, inplace=True) lu.saveDictionary(dic, dic_file) df.to_csv(df_dst, index=False)
def createDictionnaryFromArtists(artists): print("Creating dictionnary from genres downloaded from Spotify...") dic = AE.createDictionnary() genres_list = [] for id,row in artists.iterrows(): try: genres = literal_eval(row.genres_spotify) if(genres!=None): genres_list+=(genres) except: print("Error reading "+row.genres_spotify) #print(genres_list) dic = AE.updateDictionnary(genres_list,dic) LU.saveDictionary(dic,filename_genres,PATH_DIC,encoding) return dic
def downloadGenresWikipediaAndRA(Artists,dictionaryOfGenres,dictionaryWiki=None, dictionaryRA=None, begin=0,end=100000): if(end>Artists.shape[0]): end = Artists.shape[0] #path = "FullData/ArtistDataframe_"+str(begin)+"_"+str(end)+".csv" print("Downloading genres from Wikipedia and Resident Advisor of "+str(end-begin)+" artist...") last_read = 0 if(dictionaryWiki==None): dictionaryWiki = {} if(dictionaryRA==None): dictionaryRA = {} for i in range(begin,end): if(i<39700 or i>39900): #Bugs S = Artists[Artists.index==i] artist = S["artist"].values[0] #Get genres from wikipedia genres_wiki = None if(dictionaryWiki!=None and artist in dictionaryWiki): genres_wiki = dictionaryWiki.get(artist) else: genres_wiki = AE.getGenresFromWikipedia(artist,dictionaryOfGenres) dictionaryWiki.update({artist : genres_wiki}) #Get genres from Resident Advisor genres_ra = None if(dictionaryRA!= None and artist in dictionaryRA): genres_ra = dictionaryRA.get(artist) else: genres_ra = AE.getGenresFromRA(artist,dictionaryOfGenres) dictionaryRA.update({artist : genres_ra}) if(genres_wiki== None or len(genres_wiki)<1): genres_wiki = None if(genres_ra == None or len(genres_ra)<1): genres_ra = None Artists.loc[Artists.index==i,"genres_wiki"] = str(genres_wiki) Artists.loc[Artists.index==i,"genres_ra"] = str(genres_ra) if(i%100==0): print(str(i)) LU.saveDictionary(dictionaryWiki,filename_wiki_dic,PATH_DIC,encoding) LU.saveDictionary(dictionaryRA,filename_ra_dic,PATH_DIC,encoding) Artists.to_csv(PATH_ARTISTS) #Saving dictionaries Artists.to_csv(PATH_ARTISTS) LU.saveDictionary(dictionaryWiki,filename_wiki_dic,PATH_DIC,encoding) LU.saveDictionary(dictionaryRA,filename_ra_dic,PATH_DIC,encoding) return Artists
def downloadGenresSpotify(artistsDF,dictionarySpotify=None,begin=0,end=100000): Artists = artistsDF.copy() Artists = Artists.drop_duplicates().reset_index()[["artist"]] Artists.columns=["artist"] Artists["genres_spotify"]=None Artists["genres_ra"]=None Artists["genres_wiki"]=None Artists["genres_events"]=None Artists["main_genres"]=None Artists["top3_genres"]=None Artists["genre"]=None if(end>Artists.shape[0]): end = Artists.shape[0] if(dictionarySpotify==None): dictionarySpotify = {} print("Downloading genres of "+str(end-begin)+" artist from Spotify...") last_read = 0 for i in range(begin,end): S = Artists[Artists.index==i] artist = S["artist"].values[0] genres_spotify=None if(dictionarySpotify!=None and artist in dictionarySpotify): genres_spotify = dictionarySpotify.get(artist) else: genres_spotify = AE.getGenresFromSpotify(artist) dictionarySpotify.update({artist : genres_spotify}) if(len(genres_spotify)<1): genres_spotify = None Artists.loc[Artists.index==i,"genres_spotify"] = str(genres_spotify) if(i%100==0): print(i) LU.saveDictionary(dictionarySpotify,filename_spotify_dic,PATH_DIC,encoding) LU.saveDictionary(dictionarySpotify,filename_spotify_dic,PATH_DIC,encoding) return Artists
try: WikiDic = LU.loadDictionary(filename_wiki_dic,PATH_DIC,encoding) except: print("Can't find the dictionary of Wikipedia genres.") WikiDic = None #Getting the dictionary of genres try: GenresDic = LU.loadDictionary(filename_genres,PATH_DIC,encoding) except: print("Can't find the dictionary of genres.") GenresDic = None #Running the pipeline artists,Dataframe, GenresDic, SpotifyDic, RADic, WikiDic = artistsPipeline(Dataframe, GenresDic, SpotifyDic, RADic, WikiDic) print("Saving artists dataframe to : "+PATH_ARTISTS) #Saving the Dataframe of artists artists.to_csv(PATH_ARTISTS,encoding) print("Saving cleaned dataframe to :"+PATH_DF) Dataframe.to_csv(PATH_DF,encoding) print("Saving dictionaries to : "+PATH_DIC) #Saving dictionaries LU.saveDictionary(GenresDic,filename_genres,PATH_DIC,encoding) LU.saveDictionary(SpotifyDic,filename_spotify_dic,PATH_DIC,encoding) LU.saveDictionary(RADic,filename_ra_dic,PATH_DIC,encoding) LU.saveDictionary(WikiDic,filename_wiki_dic,PATH_DIC,encoding)
def ExportGenres(ClubDataFrame, init, end): ArtistsSet = set() ArtistDicoMain = dict() ArtistDicoAll = dict() filenameMain = "ArtistDicoMain-" + str(init) + "-" + str(end) + "-temp" filenameAll = "ArtistDicoAll-" + str(init) + "-" + str(end) + "-temp" #Loading artists dictionaries try: ArtistDicoMain = LU.loadDictionary(filenameMain, path=savePath, enc="UTF-8") ArtistDicoAll = LU.loadDictionary(filenameAll, path=savePath, enc="UTF-8") ArtistsSet = set(ArtistDicoMain.keys()) except: print("Cannot find dictionnaries") ##Loading genre Dictionary try: print("Loading genre dictionnary : " + dictionaryPath + dictionary + ".txt") LU.loadDictionary(dictionary, dictionaryPath, enc="UTF-8") except: print("Cannot find genre dictionnary : " + dictionaryPath + dictionary + ".txt") ClubDataFrame["Genre"] = None ClubDataFrame["All Genres"] = None print("Retrieving genre for events[" + str(init) + "," + str(end) + "] :") i = 0 for id, row in ClubDataFrame[init:end].iterrows(): genres = [] lineup = row["LineUp"] artists = SplitLineup(lineup) for artist in artists: mainGenre = None allGenres = None if (artist in ArtistsSet): mainGenre = ArtistDicoMain.get(artist) allGenres = ArtistDicoAll.get(artist) else: #Updating dictionnaries ArtistsSet.add(artist) allGenres = AE.getGenre(artist, ReturnAllGenres=True) mainGenre = AE.getMaxGenre(allGenres) ArtistDicoMain.update({artist: mainGenre}) ArtistDicoAll.update({artist: allGenres}) #Adding to LineUp genres genres.append(mainGenre) if (len(genres) == 0): print(artists) else: maxGenre = AE.getMaxGenre(genres) #updating dataframe ClubDataFrame = ClubDataFrame.set_value(id, "Genre", maxGenre) ClubDataFrame = ClubDataFrame.set_value(id, "All Genres", str(genres)) ClubDataFrame = ClubDataFrame.set_value(id, "LineUp", str(artists)) i += 1 if (i % 10 == 0): print(str(i)) if (i % 50 == 0): LU.saveDictionary(ArtistDicoMain, filenameMain, path=savePath, enc="UTF-8") LU.saveDictionary(ArtistDicoAll, filenameAll, path=savePath, enc="UTF-8") print("Extraction finished. Saving dictionnaries..") LU.saveDictionary(ArtistDicoMain, filenameMain, path=savePath, enc="UTF-8") LU.saveDictionary(ArtistDicoAll, filenameAll, path=savePath, enc="UTF-8") print("Finished.") mergeDictionnaries()