while commentFeed is not None: for comment in commentFeed.entry: commentText = comment.content.text commentDate = dateStringToSaturday(comment.updated.text) commentList.append({'week' : commentDate, 'comment' : commentText}) next_link = commentFeed.GetNextLink() if next_link is None: commentFeed = None else: commentFeed = self.client.GetYouTubeVideoCommentFeed(next_link.href) except Exception, e: print e return commentList def extractYoutubeCommentsToDB(self, songList): db = DBController() for song in songList: try: searchVideoName = song['title'] + ' ' + song['artist'] videoID = self.getVideoID(searchVideoName) comments = self.getComments(videoID) db.insertCommentToDB(song['id'], comments) except Exception as e: print e continue if __name__ == '__main__': extractor = YoutubeCommentsExtractor() db = DBController() songList = db.getSongByWeek(lastSaturday()) extractor.extractYoutubeCommentsToDB(songList)
title = cleanTitle(song.contents[3].contents[0].contents[0].contents[2].contents[0].text) artist = cleanAtrist(song.contents[3].contents[0].contents[0].contents[2].contents[1].text) item = (title, artist, rank, lastWeek, peak, weeksOnChart) chart.append(item) chart.sort(key= lambda song : song[2]) return chart def extractSalesRankToDB(self, beginDate=datetime.today(), endDate=datetime.today()): if beginDate < datetime(2007, 1, 1) or endDate > datetime.today(): raise Exception('Invalid input date!') beginDate = dateToSaturday(beginDate) endDate = dateToSaturday(endDate) endDate = endDate - timedelta(days=7) if endDate > datetime.today() else endDate iterDate = beginDate db = DBController() while iterDate <= endDate: if db.checkSalesRankExistInDB(iterDate): iterDate = iterDate + timedelta(days = 7) continue URL = self.getURL(iterDate) chart = self.getSalesChartFromURL(URL) db.insertSalesChartToDB(iterDate, chart) iterDate = iterDate + timedelta(days = 7) def getURL(self, date): return 'http://top40-charts.com/chart.php?cid=27&date=' + date.strftime('%Y-%m-%d') if __name__ == '__main__': extractor = SalesChartExtractor() extractor.extractSalesRankToDB(lastSaturday())