Exemplo n.º 1
0
def generateLinks(listOfSiteObjects, tag):
	pageLinkList = []
	totalNumberOfImages = 0
	for obj in listOfSiteObjects:
		if obj.queryType == 'JSON':
			#Retrieve source of the regular page
			temporarySource = sourceRequester.getSource(obj.siteRoot + '?tags=%s' % tag)
			
			try:
				searchResult = re.search(r'<link href="/post\?page=(\d+)&amp;tags=%s" rel="last" title="Last Page"' % tag, temporarySource).groups()
			except:
				continue

			temporarySource = sourceRequester.getSource(obj.siteRoot + '?tags=%s' % tag + obj.pageFlag + searchResult[0])
			
			numberOfImages = (20 * (int(searchResult[0]) - 1)) + len(re.findall(r'Post\.register\(\{', temporarySource))

			totalNumberOfImages += numberOfImages

			for pageNumber in [p + 1 for p in range(numberOfImages / 20 + 1) if True]:
				pageLinkList.append(obj.siteRoot + obj.siteQuery + tag + obj.pageFlag + str(pageNumber))

		elif obj.queryType == 'XML':
			temporarySource = sourceRequester.getSource(obj.siteRoot + obj.siteQuery + tag + '&limit=1')
			try:
				numberOfImages = int(re.search(r'<posts count="(\d+)" offset="0">', temporarySource).groups()[0])
			except:
				continue

			totalNumberOfImages += numberOfImages
		
			for pageNumber in [p + 1 for p in range(numberOfImages / 100 + 1) if True]:
				pageLinkList.append(obj.siteRoot + obj.siteQuery + tag + obj.pageFlag + str(pageNumber))

	return (pageLinkList, totalNumberOfImages)
Exemplo n.º 2
0
def work(appObject):
	appObject.enabler('DISABLED')
	printToLabel(appObject, 'Gathering links to all the pages...')
	objectList = booruInitializer.initialize(appObject.cacheSites)
	pageLinks = linkGenerator.generateLinks(objectList, appObject.cacheTags)
	
	imageLinkDictionary = {}

	# gathering links to the images we want
	visited = 0
	for page in pageLinks[0]:
		if appObject.is_running:
			pageSource = sourceRequester.getSource(page)
			currentPageDictionary = sourceParser.parse(pageSource) 
		
			for key in currentPageDictionary.keys():
				if not key in imageLinkDictionary:
					imageLinkDictionary[key] = currentPageDictionary[key]
			visited += 1
			printToLabel(appObject, 'Visited %d out of %d pages so far.\nGot %d links to unique images so far.' 
				     % (visited, len(pageLinks[0]), len(imageLinkDictionary)), freeEndLine = 0)
		else:
			# cancel button was hit
			printToLabel(appObject, "Link gathering cancelled")
			appObject.enabler('NORMAL')
			return None
	
	# download images
	llink = list(imageLinkDictionary.values())
	l_thread = []

	# spawn multiple threads
	for i_thread in range(appObject.num_threads):
		dl_thread = threading.Thread(target = downloadWorker, args = (appObject, llink))
		l_thread.append(dl_thread)
		dl_thread.start()

	# wait for all threads to finish
	for dl_thread in l_thread:
		dl_thread.join()


	appObject.enabler('NORMAL')
	printToLabel(appObject, "Downloading terminated, %d links remaining" % (len(llink)))