def run(self): global cnt while True: url = self.fetchqueue.get() data = urlopener(url) parseComments(data) self.fetchqueue.task_done() print "[%3d / %d] completed" % (cnt + 1, len(linklist)) cnt += 1
def run(self): global cnt while True: url = self.fetchqueue.get() data = urlopener(url) parseComments(data) self.fetchqueue.task_done() textout = "[%3d / %d] completed\n" % (cnt + 1, len(linklist)) stderr.write(textout) cnt += 1
def main(url): global comments, linklist data = urlopener(url) if data is None: print "Zero data" exit(1) commentcount = parseCommentsTotalCount(data) if commentcount == -1: print "No customer reviews available or not in the reviews page" print "(or an ugly malfunction)" exit(1) elif commentcount <= 10: commentarea = commentsStartStopLineNmbr(data) # returns (start, end) of comments area # Only one page if commentarea != None: parseComments(data) return totalcommPages, baseUrl = parsePagesTotal(data) # returns (pagecount, lastpageurl) if totalcommPages == -1: print "That link surely is a comment page?" exit(1) linklist = generatePageLinks(baseUrl, totalcommPages) # Feed links to our Fetcher daemon for i in range(MAX_THREADS): t = Fetcher(fetchqueue) t.setDaemon(True) t.start() for url in linklist: fetchqueue.put(url) fetchqueue.join() return 0