示例#1
0
def mergeDictionnaries():
    '''Merge temporary dictionaries into one'''
    print("----Merging generated dictionaries----")
    files = []
    files = glob.glob(savePath + "*.txt")
    print("Nb files : " + str(len(files)))
    ArtistsMainDic = dict()
    ArtistsAllDic = dict()

    for file in files:
        filename = file.replace(".txt", "")
        dictTemp = LU.loadDictionary(filename, path="")

        if ("All" in file):
            ArtistsAllDic = {**ArtistsAllDic, **dictTemp}

        if ("Main" in file):
            ArtistsMainDic = {**ArtistsMainDic, **dictTemp}

    print("Artist dictionaries loaded. Merging...", end="")
    LU.saveDictionary(ArtistsAllDic, AllGenresDicName, savePath, enc="UTF-8")
    LU.saveDictionary(ArtistsMainDic, MainGenreDicName, savePath, enc="UTF-8")
    print("done.")
    print("Deleting temp files...")
    for file in files:
        if ("-" in file and "temp" in file):
            print("Deleting temp file " + file)
            os.remove(file)
    print("DONE.")
示例#2
0
def expand(df_src, df_dst="completeWithCoordinates.csv", dic_file="locationDic"):
    df = pd.read_csv(df_src)
    if('Unnamed: 0' in df.columns):
        df = df.drop('Unnamed: 0', 1)
    dic = lu.loadDictionary(dic_file)
    dic["Palais-des-Congrès"] = (47.134881, 7.248004000000001)
    dic["Il_Caffè"] = (46.9382202, 7.787970900000001)
    dic["Festivalgelände"] = (47.4222173, 9.3395195)
    dic["Festivalgelände-am-Rotten"] = dic["Festivalgelände"]
    dic["Römerareal"] = (47.136266, 7.30622)
    addCoordinatesColumn(df, dic, replace = True, inplace=True)
    lu.saveDictionary(dic, dic_file)
    df.to_csv(df_dst, index=False)
示例#3
0
def createDictionnaryFromArtists(artists):
	print("Creating dictionnary from genres downloaded from Spotify...")
	dic = AE.createDictionnary()
	genres_list = []
	for id,row in artists.iterrows():
		try:
			genres = literal_eval(row.genres_spotify)
			if(genres!=None):
				genres_list+=(genres)
		except:
			print("Error reading "+row.genres_spotify)
		
	#print(genres_list)
	dic = AE.updateDictionnary(genres_list,dic)
	LU.saveDictionary(dic,filename_genres,PATH_DIC,encoding)
	return dic                   
示例#4
0
def downloadGenresWikipediaAndRA(Artists,dictionaryOfGenres,dictionaryWiki=None, dictionaryRA=None, begin=0,end=100000):

	if(end>Artists.shape[0]):
		end = Artists.shape[0]
	
	#path = "FullData/ArtistDataframe_"+str(begin)+"_"+str(end)+".csv"
	print("Downloading genres from Wikipedia and Resident Advisor of "+str(end-begin)+" artist...")
	last_read = 0
	
	if(dictionaryWiki==None):
		dictionaryWiki = {}
	if(dictionaryRA==None):
		dictionaryRA = {}
	
	for i in range(begin,end):
		if(i<39700 or i>39900): #Bugs
			S = Artists[Artists.index==i]
			artist = S["artist"].values[0]
			
			#Get genres from wikipedia
			genres_wiki = None
			if(dictionaryWiki!=None and artist in dictionaryWiki):
				genres_wiki = dictionaryWiki.get(artist)
			else:
				genres_wiki = AE.getGenresFromWikipedia(artist,dictionaryOfGenres)
				dictionaryWiki.update({artist : genres_wiki})	
				
			#Get genres from Resident Advisor
			genres_ra = None
			if(dictionaryRA!= None and artist in dictionaryRA):
				genres_ra = dictionaryRA.get(artist)
			else:
				genres_ra = AE.getGenresFromRA(artist,dictionaryOfGenres)
				dictionaryRA.update({artist : genres_ra})
			
			if(genres_wiki== None or len(genres_wiki)<1):
				genres_wiki = None
			if(genres_ra == None or len(genres_ra)<1):
				genres_ra = None
				
			Artists.loc[Artists.index==i,"genres_wiki"] = str(genres_wiki)
			Artists.loc[Artists.index==i,"genres_ra"] = str(genres_ra)

			if(i%100==0):
				print(str(i))
				LU.saveDictionary(dictionaryWiki,filename_wiki_dic,PATH_DIC,encoding)
				LU.saveDictionary(dictionaryRA,filename_ra_dic,PATH_DIC,encoding)
				Artists.to_csv(PATH_ARTISTS)
				
	#Saving dictionaries
	Artists.to_csv(PATH_ARTISTS)
	LU.saveDictionary(dictionaryWiki,filename_wiki_dic,PATH_DIC,encoding)
	LU.saveDictionary(dictionaryRA,filename_ra_dic,PATH_DIC,encoding)	
	return Artists
示例#5
0
def downloadGenresSpotify(artistsDF,dictionarySpotify=None,begin=0,end=100000):

	Artists = artistsDF.copy()
	Artists = Artists.drop_duplicates().reset_index()[["artist"]]
	Artists.columns=["artist"]
	Artists["genres_spotify"]=None
	Artists["genres_ra"]=None
	Artists["genres_wiki"]=None
	Artists["genres_events"]=None
	Artists["main_genres"]=None
	Artists["top3_genres"]=None
	Artists["genre"]=None    
	
	if(end>Artists.shape[0]):
		end = Artists.shape[0]
        
	if(dictionarySpotify==None):
			dictionarySpotify = {}
	
	print("Downloading genres of "+str(end-begin)+" artist from Spotify...")
	last_read = 0
	for i in range(begin,end):
		S = Artists[Artists.index==i]
		artist = S["artist"].values[0]
		genres_spotify=None
		if(dictionarySpotify!=None and artist in dictionarySpotify):
			genres_spotify = dictionarySpotify.get(artist)
		else:
			genres_spotify = AE.getGenresFromSpotify(artist)
			dictionarySpotify.update({artist : genres_spotify})
			
		
		if(len(genres_spotify)<1):
			genres_spotify = None
		Artists.loc[Artists.index==i,"genres_spotify"] = str(genres_spotify)
		
		if(i%100==0):
			print(i)
			LU.saveDictionary(dictionarySpotify,filename_spotify_dic,PATH_DIC,encoding)
			
	LU.saveDictionary(dictionarySpotify,filename_spotify_dic,PATH_DIC,encoding)
	return Artists
示例#6
0
	try:
		WikiDic = LU.loadDictionary(filename_wiki_dic,PATH_DIC,encoding)
	except:
		print("Can't find the dictionary of Wikipedia genres.")
		WikiDic = None
	
	#Getting the dictionary of genres
	try:
		GenresDic = LU.loadDictionary(filename_genres,PATH_DIC,encoding)
	except:
		print("Can't find the dictionary of genres.")
		GenresDic = None
		
	#Running the pipeline
	artists,Dataframe, GenresDic, SpotifyDic, RADic, WikiDic = artistsPipeline(Dataframe, GenresDic, SpotifyDic, RADic, WikiDic)
	
	print("Saving artists dataframe to : "+PATH_ARTISTS)
	#Saving the Dataframe of artists
	artists.to_csv(PATH_ARTISTS,encoding)
	
	print("Saving cleaned dataframe to :"+PATH_DF)
	Dataframe.to_csv(PATH_DF,encoding)
	
	print("Saving dictionaries to : "+PATH_DIC)
	#Saving dictionaries
	LU.saveDictionary(GenresDic,filename_genres,PATH_DIC,encoding)
	LU.saveDictionary(SpotifyDic,filename_spotify_dic,PATH_DIC,encoding)
	LU.saveDictionary(RADic,filename_ra_dic,PATH_DIC,encoding)
	LU.saveDictionary(WikiDic,filename_wiki_dic,PATH_DIC,encoding)
	
	
示例#7
0
def ExportGenres(ClubDataFrame, init, end):

    ArtistsSet = set()
    ArtistDicoMain = dict()
    ArtistDicoAll = dict()

    filenameMain = "ArtistDicoMain-" + str(init) + "-" + str(end) + "-temp"
    filenameAll = "ArtistDicoAll-" + str(init) + "-" + str(end) + "-temp"

    #Loading artists dictionaries
    try:
        ArtistDicoMain = LU.loadDictionary(filenameMain,
                                           path=savePath,
                                           enc="UTF-8")
        ArtistDicoAll = LU.loadDictionary(filenameAll,
                                          path=savePath,
                                          enc="UTF-8")
        ArtistsSet = set(ArtistDicoMain.keys())
    except:
        print("Cannot find dictionnaries")

    ##Loading genre Dictionary
    try:
        print("Loading genre dictionnary : " + dictionaryPath + dictionary +
              ".txt")
        LU.loadDictionary(dictionary, dictionaryPath, enc="UTF-8")
    except:
        print("Cannot find genre dictionnary : " + dictionaryPath +
              dictionary + ".txt")

    ClubDataFrame["Genre"] = None
    ClubDataFrame["All Genres"] = None

    print("Retrieving genre for events[" + str(init) + "," + str(end) + "] :")

    i = 0
    for id, row in ClubDataFrame[init:end].iterrows():
        genres = []
        lineup = row["LineUp"]
        artists = SplitLineup(lineup)

        for artist in artists:
            mainGenre = None
            allGenres = None

            if (artist in ArtistsSet):
                mainGenre = ArtistDicoMain.get(artist)
                allGenres = ArtistDicoAll.get(artist)
            else:
                #Updating dictionnaries
                ArtistsSet.add(artist)
                allGenres = AE.getGenre(artist, ReturnAllGenres=True)
                mainGenre = AE.getMaxGenre(allGenres)
                ArtistDicoMain.update({artist: mainGenre})
                ArtistDicoAll.update({artist: allGenres})

            #Adding to LineUp genres
            genres.append(mainGenre)

        if (len(genres) == 0):
            print(artists)
        else:
            maxGenre = AE.getMaxGenre(genres)
            #updating dataframe
            ClubDataFrame = ClubDataFrame.set_value(id, "Genre", maxGenre)
            ClubDataFrame = ClubDataFrame.set_value(id, "All Genres",
                                                    str(genres))
            ClubDataFrame = ClubDataFrame.set_value(id, "LineUp", str(artists))

        i += 1
        if (i % 10 == 0):
            print(str(i))
        if (i % 50 == 0):
            LU.saveDictionary(ArtistDicoMain,
                              filenameMain,
                              path=savePath,
                              enc="UTF-8")
            LU.saveDictionary(ArtistDicoAll,
                              filenameAll,
                              path=savePath,
                              enc="UTF-8")

    print("Extraction finished. Saving dictionnaries..")
    LU.saveDictionary(ArtistDicoMain, filenameMain, path=savePath, enc="UTF-8")
    LU.saveDictionary(ArtistDicoAll, filenameAll, path=savePath, enc="UTF-8")
    print("Finished.")
    mergeDictionnaries()