def scrape(self): ''' Goes through list of links to scrape and updates db''' parser = RedditParser() urls = self.getUrls("reddit.com") for url in list(urls): oldNum = url.comments newNum = parser.getNotificationsFromUrl(url.url) print oldNum, newNum if newNum > oldNum: self.updateDb(url.url, newNum)
class MasterParser(): def __init__(self): self.redditParser = RedditParser() def parseFromUrl(self, url): urlparts = urlparse(url) if urlparts.netloc == "www.reddit.com": return int(self.redditParser.getNotificationsFromUrl(url)) # TODO: Add other parsers raise Exception, "Site is not reddit.com"