Exemplo n.º 1
0
    def run(self):
        global cnt

        while True:
            url = self.fetchqueue.get()
            data = urlopener(url)
            parseComments(data)
            self.fetchqueue.task_done()
            print "[%3d / %d] completed" % (cnt + 1, len(linklist))
            cnt += 1
Exemplo n.º 2
0
    def run(self):
        global cnt

        while True:
            url = self.fetchqueue.get()
            data = urlopener(url)
            parseComments(data)
            self.fetchqueue.task_done()
            textout = "[%3d / %d] completed\n" % (cnt + 1, len(linklist))
            stderr.write(textout)
            cnt += 1
Exemplo n.º 3
0
def main(url):
    global comments, linklist

    data = urlopener(url)
    if data is None:
        print "Zero data"
        exit(1)

    commentcount = parseCommentsTotalCount(data)
    if commentcount == -1:
        print "No customer reviews available or not in the reviews page"
        print "(or an ugly malfunction)"
        exit(1)
    elif commentcount <= 10:
        commentarea = commentsStartStopLineNmbr(data) # returns (start, end) of comments area
        # Only one page
        if commentarea != None:
            parseComments(data)
            return

    totalcommPages, baseUrl = parsePagesTotal(data) # returns (pagecount, lastpageurl)
    if totalcommPages == -1:
        print "That link surely is a comment page?"
        exit(1)
    linklist = generatePageLinks(baseUrl, totalcommPages)

    # Feed links to our Fetcher daemon
    for i in range(MAX_THREADS):
        t = Fetcher(fetchqueue)
        t.setDaemon(True)
        t.start()
    for url in linklist:
        fetchqueue.put(url)
    fetchqueue.join()

    return 0