Пример #1
0
def generateLinks(listOfSiteObjects, tag):
        pageLinkList = []
        totalNumberOfImages = 0
        for obj in listOfSiteObjects:
                if obj.queryType == 'JSON':
                        # Retrieve source of the regular page
                        temporarySource = sourceRequester.getSource(
                            obj.siteRoot + '?tags=%s' % tag)

                        try:
                                searchResult = re.search(
                                    r'<link href="/post\?page=(\d+)&amp;tags=%s" rel="last" title="Last Page"' % tag, temporarySource).groups()
                        except:
                                continue

                        temporarySource = sourceRequester.getSource(
                            obj.siteRoot + '?tags=%s' % tag + obj.pageFlag + searchResult[0])

                        numberOfImages = (
                            20 * (int(searchResult[0]) - 1)) + len(re.findall(r'Post\.register\(\{', temporarySource))

                        totalNumberOfImages += numberOfImages

                        for pageNumber in [p + 1 for p in range(int(numberOfImages / 20) + 1) if True]:
                                pageLinkList.append(
                                    obj.siteRoot + obj.siteQuery + tag + obj.pageFlag + str(pageNumber))

                elif obj.queryType == 'XML':
                        temporarySource = sourceRequester.getSource(
                            obj.siteRoot + obj.siteQuery + tag + '&limit=1')
                        numberOfImages = 0

                        for ev, el in etree.iterparse(FileIO(temporarySource)):
                                if el.tag == 'posts':
                                        numberOfImages = int(
                                            el.attrib['count'])
                                        break

                        totalNumberOfImages += numberOfImages

                        if numberOfImages > 100:
                                for pageNumber in [p + 1 for p in range(int(numberOfImages / 100) + 1)]:
                                        pageLinkList.append(
                                            obj.siteRoot + obj.siteQuery + tag + obj.pageFlag + str(pageNumber))
                        else:
                                pageLinkList.append(
                                    obj.siteRoot + obj.siteQuery + tag + obj.pageFlag + "0")
        return (pageLinkList, totalNumberOfImages)
Пример #2
0
def work(appObject):
        appObject.enabler('DISABLED')
        printToLabel(appObject, 'Gathering links to all the pages...')
        objectList = booruInitializer.initialize(appObject.cacheSites)
        pageLinks = linkGenerator.generateLinks(
            objectList, appObject.cacheTags)

        imageLinkDictionary = {}

        # gathering links to the images we want
        visited = 0
        for page in pageLinks[0]:
                if appObject.is_running:
                        pageSource = sourceRequester.getSource(page)

                        currentPageDictionary = sourceParser.parse(pageSource)

                        for key in currentPageDictionary.keys():
                                if not key in imageLinkDictionary:
                                        imageLinkDictionary[
                                            key] = currentPageDictionary[key]
                        visited += 1
                        printToLabel(appObject, 'Visited %d out of %d pages so far.\nGot %d links to unique images so far.'
                                     % (visited, len(pageLinks[0]), len(imageLinkDictionary)), freeEndLine=0)
                else:
                        # cancel button was hit
                        printToLabel(appObject, "Link gathering cancelled")
                        appObject.enabler('NORMAL')
                        return None

        # download images
        llink = list(imageLinkDictionary.values())
        l_thread = []

        # spawn multiple threads
        for i_thread in range(appObject.num_threads):
                dl_thread = threading.Thread(
                    target=downloadWorker, args=(appObject, llink))
                l_thread.append(dl_thread)
                dl_thread.start()

        # wait for all threads to finish
        for dl_thread in l_thread:
                dl_thread.join()

        appObject.enabler('NORMAL')
        printToLabel(appObject, "Downloading terminated, %d links remaining" %
                     (len(llink)))