def getSongListFromXML(XMLFileDir): root = ET.parse(XMLFileDir).getroot() songDict = {} songList = [] for song in root.iter('song'): title = cleanTitle(song.attrib['title']) artist = cleanAtrist(song.attrib['artist']) key = title + '_' + artist if key not in songDict: songDict[key] = True songList.append((title, artist)) return songList
def getTop50ChartFromURL(self, url): page = urllib2.urlopen(url) soup = BeautifulSoup(page.read()) songs = soup.findAll('tr') chart = [] for song in songs: rank = int(song.contents[0].text.strip()) title = cleanTitle(song.contents[1].text) artist = cleanAtrist(song.contents[2].text) item = (rank, title, artist) chart.append(item) chart.sort(key=lambda song : song[0]) return chart
def getSalesChartFromURL(self, URL): page = urllib2.urlopen(URL) soup = BeautifulSoup(page.read()) songs = soup.findAll(attrs = {'class' : 'latc_song'}) chart = [] for song in songs: rank = stringToInt(song.contents[0].text) lastWeek = song.contents[2].text if lastWeek == 'New': lastWeek = None elif lastWeek == 'RE': lastWeek = rank else: lastWeek = stringToInt(lastWeek) peak = stringToInt(song.contents[6].text) weeksOnChart = stringToInt(song.contents[7].text) title = cleanTitle(song.contents[3].contents[0].contents[0].contents[2].contents[0].text) artist = cleanAtrist(song.contents[3].contents[0].contents[0].contents[2].contents[1].text) item = (title, artist, rank, lastWeek, peak, weeksOnChart) chart.append(item) chart.sort(key= lambda song : song[2]) return chart