Пример #1
0
    def scanSingleDir(self, dirPath):
        self.log.info("Dir %s", dirPath)
        items = os.listdir(dirPath)
        items.sort()
        for item in items:
            item = os.path.join(dirPath, item)
            if os.path.isfile(item):
                fPath, fName = os.path.split(item)
                guessName = nt.guessSeriesFromFilename(fName)

                dirName = fPath.strip("/").split("/")[-1]
                guess2 = nt.guessSeriesFromFilename(dirName)

                dist = lv.distance(guessName, guess2)

                # Assumption: The filename probably has shit tacked onto it.
                # Therefore, the allowable edit distance delta is the extent to
                # which the filename is longer then the dirname
                normed = dist - (len(guessName) - len(guess2))
                if normed > 0:
                    self.log.warning("Wat: %s",
                                     (normed, item, guessName, guess2))
                elif normed < 0:
                    self.log.error("Wat: %s",
                                   (normed, item, guessName, guess2))
                else:
                    if guess2 in nt.dirNameProxy and nt.dirNameProxy[guess2][
                            "fqPath"]:
                        itemInfo = nt.dirNameProxy[guess2]
                        # print(itemInfo)
                        if itemInfo["fqPath"] != dirPath:
                            dstDir = itemInfo["fqPath"]
                            print("Move file '%s' from:" % fName)

                            print("	Src = '%s'" % fPath)
                            print("	Dst = '%s'" % dstDir)

                            dstPath = os.path.join(dstDir, fName)

                            try:
                                shutil.move(item, dstPath)

                                # Set pron to True, to prevent accidental uploading.
                                processDownload.processDownload(
                                    guess2,
                                    dstPath,
                                    deleteDups=True,
                                    includePHash=True,
                                    pron=True,
                                    crossReference=False)

                            except KeyboardInterrupt:
                                shutil.move(dstPath, item)
                                raise
                    else:
                        print("No match: ", fName)
Пример #2
0
    def getLink(self, link):
        sourceUrl = link["sourceUrl"]
        seriesName = link["seriesName"]
        originFileName = link["originName"]

        self.updateDbEntry(sourceUrl, dlState=1)
        self.log.info("Downloading = '%s', '%s'", seriesName, originFileName)
        dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)

        if link["flags"] == None:
            link["flags"] = ""

        if newDir:
            self.updateDbEntry(sourceUrl, flags=" ".join([link["flags"], "haddir"]))
            self.conn.commit()

        try:
            content, headerName = self.getLinkFile(sourceUrl)
        except:
            self.log.error("Unrecoverable error retreiving content %s", link)
            self.log.error("Traceback: %s", traceback.format_exc())

            self.updateDbEntry(sourceUrl, dlState=-1)
            return

        headerName = urllib.parse.unquote(headerName)

        fName = "%s - %s" % (originFileName, headerName)
        fName = nt.makeFilenameSafe(fName)

        fName, ext = os.path.splitext(fName)
        fName = "%s [CXC Scans]%s" % (fName, ext)

        fqFName = os.path.join(dlPath, fName)
        self.log.info("SaveName = %s", fqFName)

        loop = 1
        while os.path.exists(fqFName):
            fName, ext = os.path.splitext(fName)
            fName = "%s (%d)%s" % (fName, loop, ext)
            fqFName = os.path.join(link["targetDir"], fName)
            loop += 1
        self.log.info("Writing file")

        filePath, fileName = os.path.split(fqFName)

        try:
            with open(fqFName, "wb") as fp:
                fp.write(content)
        except TypeError:
            self.log.error("Failure trying to retreive content from source %s", sourceUrl)
            self.updateDbEntry(sourceUrl, dlState=-4, downloadPath=filePath, fileName=fileName)
            return
            # self.log.info( filePath)

        dedupState = processDownload.processDownload(seriesName, fqFName, deleteDups=True)

        self.log.info("Done")
        self.updateDbEntry(sourceUrl, dlState=2, downloadPath=filePath, fileName=fileName, tags=dedupState)
        return
Пример #3
0
	def getLink(self, linkDict):
		try:
			linkDict = self.getDownloadInfo(linkDict)
			images = self.getImages(linkDict)
			title  = linkDict['title']
			artist = linkDict['artist']

		except webFunctions.ContentError:
			self.updateDbEntry(linkDict["sourceUrl"], dlState=-2, downloadPath="ERROR", fileName="ERROR: FAILED")
			return False

		if images and title:
			fileN = title+" "+artist+".zip"
			fileN = nt.makeFilenameSafe(fileN)


			# self.log.info("geturl with processing", fileN)
			wholePath = os.path.join(linkDict["dirPath"], fileN)
			wholePath = self.insertCountIfFilenameExists(wholePath)
			self.log.info("Complete filepath: %s", wholePath)

					#Write all downloaded files to the archive.

			try:
				arch = zipfile.ZipFile(wholePath, "w")
			except OSError:
				title = title.encode('ascii','ignore').decode('ascii')
				fileN = title+".zip"
				fileN = nt.makeFilenameSafe(fileN)
				wholePath = os.path.join(linkDict["dirPath"], fileN)
				arch = zipfile.ZipFile(wholePath, "w")

			for imageName, imageContent in images:
				arch.writestr(imageName, imageContent)
			arch.close()


			self.log.info("Successfully Saved to path: %s", wholePath)


			self.updateDbEntry(linkDict["sourceUrl"], downloadPath=linkDict["dirPath"], fileName=fileN)

			# Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
			dedupState = processDownload.processDownload(None, wholePath, pron=True, deleteDups=True, includePHash=True, rowId=linkDict['dbId'])
			self.log.info( "Done")

			if dedupState:
				self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)


			self.updateDbEntry(linkDict["sourceUrl"], dlState=2)


			return wholePath

		else:

			self.updateDbEntry(linkDict["sourceUrl"], dlState=-1, downloadPath="ERROR", fileName="ERROR: FAILED")

			return False
Пример #4
0
	def scanSingleDir(self, dirPath):
		self.log.info("Dir %s", dirPath)
		items = os.listdir(dirPath)
		items.sort()
		for item in items:
			item = os.path.join(dirPath, item)
			if os.path.isfile(item):
				fPath, fName = os.path.split(item)
				guessName = nt.guessSeriesFromFilename(fName)

				dirName = fPath.strip("/").split("/")[-1]
				guess2 = nt.guessSeriesFromFilename(dirName)

				dist = lv.distance(guessName, guess2)

				# Assumption: The filename probably has shit tacked onto it.
				# Therefore, the allowable edit distance delta is the extent to
				# which the filename is longer then the dirname
				normed = dist - (len(guessName) - len(guess2))
				if normed > 0:
					self.log.warning("Wat: %s", (normed, item, guessName, guess2))
				elif normed < 0:
					self.log.error("Wat: %s", (normed, item, guessName, guess2))
				else:
					if guess2 in nt.dirNameProxy and nt.dirNameProxy[guess2]["fqPath"]:
						itemInfo = nt.dirNameProxy[guess2]
						# print(itemInfo)
						if itemInfo["fqPath"] != dirPath:
							dstDir = itemInfo["fqPath"]
							print("Move file '%s' from:" % fName)

							print("	Src = '%s'" % fPath)
							print("	Dst = '%s'" % dstDir)

							dstPath = os.path.join(dstDir, fName)

							try:
								shutil.move(item, dstPath)

								# Set pron to True, to prevent accidental uploading.
								processDownload.processDownload(guess2, dstPath, deleteDups=True, includePHash=True, pron=True, crossReference=False)

							except KeyboardInterrupt:
								shutil.move(dstPath, item)
								raise
					else:
						print("No match: ", fName)
Пример #5
0
	def getLink(self, link):
		sourceUrl, originFileName = link["sourceUrl"], link["originName"]

		self.log.info( "Should retreive: %s, url - %s", originFileName, sourceUrl)

		self.updateDbEntry(sourceUrl, dlState=1)
		self.conn.commit()

		fileUrl = self.getDownloadUrl(sourceUrl)
		if fileUrl is None:
			self.log.warning("Could not find url!")
			self.deleteRowsByValue(sourceUrl=sourceUrl)
			return


		try:
			content, hName = self.getLinkFile(fileUrl, sourceUrl)
		except:
			self.log.error("Unrecoverable error retreiving content %s", link)
			self.log.error("Traceback: %s", traceback.format_exc())

			self.updateDbEntry(sourceUrl, dlState=-1)
			return

		# print("Content type = ", type(content))


		# And fix %xx crap
		hName = urllib.parse.unquote(hName)

		fName = "%s - %s" % (originFileName, hName)
		fName = nt.makeFilenameSafe(fName)

		fqFName = os.path.join(link["targetDir"], fName)
		self.log.info( "SaveName = %s", fqFName)


		loop = 1
		while os.path.exists(fqFName):
			fName = "%s - (%d) - %s" % (originFileName, loop,  hName)
			fqFName = os.path.join(link["targetDir"], fName)
			loop += 1
		self.log.info( "Writing file")

		filePath, fileName = os.path.split(fqFName)

		try:
			with open(fqFName, "wb") as fp:
				fp.write(content)
		except TypeError:
			self.log.error("Failure trying to retreive content from source %s", sourceUrl)
			return
		#self.log.info( filePath)

		dedupState = processDownload.processDownload(link["seriesName"], fqFName, deleteDups=True, includePHash=True)
		self.log.info( "Done")

		self.updateDbEntry(sourceUrl, dlState=2, downloadPath=filePath, fileName=fileName, tags=dedupState)
		return
Пример #6
0
    def doDownload(self, linkDict, retag=False):

        downloadUrl = self.getDownloadUrl(linkDict["dlPage"], linkDict["sourceUrl"])

        if downloadUrl:

            fCont, fName = self.wg.getFileAndName(downloadUrl)

            # self.log.info(len(content))
            if linkDict["originName"] in fName:
                fileN = fName
            else:
                fileN = "%s - %s.zip" % (linkDict["originName"], fName)
                fileN = fileN.replace(".zip .zip", ".zip")

            fileN = nt.makeFilenameSafe(fileN)

            chop = len(fileN) - 4

            wholePath = "ERROR"
            while 1:

                try:
                    fileN = fileN[:chop] + fileN[-4:]
                    # self.log.info("geturl with processing", fileN)
                    wholePath = os.path.join(linkDict["dirPath"], fileN)
                    self.log.info("Complete filepath: %s", wholePath)

                    # Write all downloaded files to the archive.
                    with open(wholePath, "wb") as fp:
                        fp.write(fCont)
                    self.log.info("Successfully Saved to path: %s", wholePath)
                    break
                except IOError:
                    chop = chop - 1
                    self.log.warn("Truncating file length to %s characters.", chop)

            if not linkDict["tags"]:
                linkDict["tags"] = ""

            self.updateDbEntry(linkDict["sourceUrl"], downloadPath=linkDict["dirPath"], fileName=fileN)

            # Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
            dedupState = processDownload.processDownload(linkDict["seriesName"], wholePath, pron=True)
            self.log.info("Done")

            if dedupState:
                self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)

            self.updateDbEntry(linkDict["sourceUrl"], dlState=2)
            self.conn.commit()

        else:

            self.updateDbEntry(linkDict["sourceUrl"], dlState=-1, downloadPath="ERROR", fileName="ERROR: FAILED")

            self.conn.commit()
            return False
Пример #7
0
	def fetchLinkList(self, itemList):

		dbInt = utilities.EmptyRetreivalDb.ScraperDbTool()

		try:
			for item in itemList:

				srcStr = 'import-{hash}'.format(hash=hashlib.md5(item.encode("utf-8")).hexdigest())
				itemtags = self.extractTags(item)
				if itemtags == "None" or itemtags == None:
					itemtags = ''
				fPath = os.path.join(settings.djMoeDir, "imported")

				if not os.path.exists(fPath):
					os.makedirs(fPath)

				srcPath = os.path.join(self.sourcePath, item)
				dstPath = os.path.join(fPath, item)

				if os.path.exists(dstPath):
					raise ValueError("Destination path already exists? = '%s'" % dstPath)



				# print("os.path.exists", os.path.exists(srcPath), os.path.exists(dstPath))

				# print("Item '%s' '%s' '%s'" % (srcPath, dstPath, itemtags))
				shutil.move(srcPath, dstPath)
				dbInt.insertIntoDb(retreivalTime=200,
									sourceUrl=srcStr,
									originName=item,
									dlState=2,
									downloadPath=fPath,
									fileName=item,
									seriesName="imported")




				dedupState = processDownload.processDownload("imported", dstPath, pron=True, deleteDups=True)


				tags = dedupState + ' ' + ' '.join(itemtags)
				tags = tags.strip()
				if dedupState:
					dbInt.addTags(sourceUrl=srcStr, tags=tags)

				self.log.info( "Done")

				if not runStatus.run:
					self.log.info( "Breaking due to exit flag being set")
					break

		except:
			self.log.critical("Exception!")
			traceback.print_exc()
			self.log.critical(traceback.format_exc())
Пример #8
0
	def fetchLinkList(self, itemList):

		dbInt = utilities.EmptyRetreivalDb.ScraperDbTool()

		try:
			for item in itemList:

				srcStr = 'import-{hash}'.format(hash=hashlib.md5(item.encode("utf-8")).hexdigest())
				itemtags = self.extractTags(item)
				if itemtags == "None" or itemtags == None:
					itemtags = ''
				fPath = os.path.join(settings.djMoeDir, "imported")

				if not os.path.exists(fPath):
					os.makedirs(fPath)

				srcPath = os.path.join(self.sourcePath, item)
				dstPath = os.path.join(fPath, item)

				if os.path.exists(dstPath):
					raise ValueError("Destination path already exists? = '%s'" % dstPath)



				# print("os.path.exists", os.path.exists(srcPath), os.path.exists(dstPath))

				# print("Item '%s' '%s' '%s'" % (srcPath, dstPath, itemtags))
				shutil.move(srcPath, dstPath)
				dbInt.insertIntoDb(retreivalTime=200,
									sourceUrl=srcStr,
									originName=item,
									dlState=2,
									downloadPath=fPath,
									fileName=item,
									seriesName="imported")




				dedupState = processDownload.processDownload("imported", dstPath, pron=True, deleteDups=True)


				tags = dedupState + ' ' + ' '.join(itemtags)
				tags = tags.strip()
				if dedupState:
					dbInt.addTags(sourceUrl=srcStr, tags=tags)

				self.log.info( "Done")

				if not runStatus.run:
					self.log.info( "Breaking due to exit flag being set")
					break

		except:
			self.log.critical("Exception!")
			traceback.print_exc()
			self.log.critical(traceback.format_exc())
Пример #9
0
    def getFile(self, file_data):

        row = self.getRowsByValue(sourceUrl=file_data["baseUrl"],
                                  limitByKey=False)
        if row and row[0]['dlState'] != 0:
            return
        if not row:
            self.insertIntoDb(retreivalTime=time.time(),
                              sourceUrl=file_data["baseUrl"],
                              originName=file_data["title"],
                              dlState=1,
                              seriesName=file_data["title"])

        image_links = self.getFileInfo(file_data)

        images = []
        for imagen, imageurl in image_links:
            imdat = self.get_image(imageurl, file_data['xor_key'])
            images.append((imagen, imdat))

            # filen = nt.makeFilenameSafe(file_data['title'] + " - " + imagen)
            # with open(filen, "wb") as fp:
            # 	fp.write(imdat)

        fileN = '{series} - c{chapNo:03.0f} [MangaBox].zip'.format(
            series=file_data['title'], chapNo=file_data['chapter'])
        fileN = nt.makeFilenameSafe(fileN)

        dlPath, newDir = self.locateOrCreateDirectoryForSeries(
            file_data["title"])
        wholePath = os.path.join(dlPath, fileN)

        if newDir:
            self.updateDbEntry(file_data["baseUrl"], flags="haddir")
            self.conn.commit()

        arch = zipfile.ZipFile(wholePath, "w")
        for imageName, imageContent in images:
            arch.writestr(imageName, imageContent)
        arch.close()

        self.log.info("Successfully Saved to path: %s", wholePath)

        dedupState = processDownload.processDownload(file_data["title"],
                                                     wholePath,
                                                     deleteDups=True)
        if dedupState:
            self.addTags(sourceUrl=file_data["baseUrl"], tags=dedupState)

        self.updateDbEntry(file_data["baseUrl"],
                           dlState=2,
                           downloadPath=dlPath,
                           fileName=fileN,
                           originName=fileN)

        self.conn.commit()
        self.log.info("Done")
Пример #10
0
    def doDownload(self, linkDict, retag=False):

        images = self.fetchImages(linkDict)
        # images = ['wat']
        # print(linkDict)
        # self.log.info(len(content))

        if images:
            linkDict["chapterNo"] = float(linkDict["chapterNo"])
            fileN = '{series} - c{chapNo:06.1f} - {sourceName} [crunchyroll].zip'.format(
                series=linkDict['seriesName'],
                chapNo=linkDict["chapterNo"],
                sourceName=linkDict['originName'])
            fileN = nt.makeFilenameSafe(fileN)

            # self.log.info("geturl with processing", fileN)
            wholePath = os.path.join(linkDict["dirPath"], fileN)
            self.log.info("Complete filepath: %s", wholePath)

            #Write all downloaded files to the archive.
            arch = zipfile.ZipFile(wholePath, "w")
            for imageName, imageContent in images:
                arch.writestr(imageName, imageContent)
            arch.close()

            self.log.info("Successfully Saved to path: %s", wholePath)

            if not linkDict["tags"]:
                linkDict["tags"] = ""

            dedupState = processDownload.processDownload(
                linkDict["seriesName"], wholePath, deleteDups=True)
            self.log.info("Done")

            if dedupState:
                self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)

            self.updateDbEntry(linkDict["sourceUrl"],
                               dlState=2,
                               downloadPath=linkDict["dirPath"],
                               fileName=fileN,
                               originName=fileN)

            self.conn.commit()
            return wholePath

        else:

            self.updateDbEntry(linkDict["sourceUrl"],
                               dlState=-1,
                               downloadPath="ERROR",
                               fileName="ERROR: FAILED")

            self.conn.commit()
            return False
Пример #11
0
	def doDownload(self, linkDict, retag=False):

		images = self.fetchImages(linkDict)


		# self.log.info(len(content))

		if images:
			fileN = linkDict['originName']+".zip"
			fileN = nt.makeFilenameSafe(fileN)


			# self.log.info("geturl with processing", fileN)
			wholePath = os.path.join(linkDict["dirPath"], fileN)
			self.log.info("Complete filepath: %s", wholePath)

					#Write all downloaded files to the archive.
			arch = zipfile.ZipFile(wholePath, "w")
			for imageName, imageContent in images:
				arch.writestr(imageName, imageContent)
			arch.close()


			self.log.info("Successfully Saved to path: %s", wholePath)

			if not linkDict["tags"]:
				linkDict["tags"] = ""



			self.updateDbEntry(linkDict["sourceUrl"], downloadPath=linkDict["dirPath"], fileName=fileN)


			# Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
			dedupState = processDownload.processDownload(None, wholePath, pron=True, deleteDups=True, includePHash=True)
			self.log.info( "Done")

			if dedupState:
				self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)


			self.updateDbEntry(linkDict["sourceUrl"], dlState=2)
			self.conn.commit()




			return wholePath

		else:

			self.updateDbEntry(linkDict["sourceUrl"], dlState=-1, downloadPath="ERROR", fileName="ERROR: FAILED")

			self.conn.commit()
			return False
Пример #12
0
	def getFile(self, file_data):


		row = self.getRowsByValue(sourceUrl=file_data["baseUrl"], limitByKey=False)
		if row and row[0]['dlState'] != 0:
			return
		if not row:
			self.insertIntoDb(retreivalTime = time.time(),
								sourceUrl   = file_data["baseUrl"],
								originName  = file_data["title"],
								dlState     = 1,
								seriesName  = file_data["title"])

		image_links = self.getFileInfo(file_data)

		images = []
		for imagen, imageurl in image_links:
			imdat = self.get_image(imageurl, file_data['xor_key'])
			images.append((imagen, imdat))

			# filen = nt.makeFilenameSafe(file_data['title'] + " - " + imagen)
			# with open(filen, "wb") as fp:
			# 	fp.write(imdat)




		fileN = '{series} - c{chapNo:03.0f} [MangaBox].zip'.format(series=file_data['title'], chapNo=file_data['chapter'])
		fileN = nt.makeFilenameSafe(fileN)

		dlPath, newDir = self.locateOrCreateDirectoryForSeries(file_data["title"])
		wholePath = os.path.join(dlPath, fileN)


		if newDir:
			self.updateDbEntry(file_data["baseUrl"], flags="haddir")
			self.conn.commit()

		arch = zipfile.ZipFile(wholePath, "w")
		for imageName, imageContent in images:
			arch.writestr(imageName, imageContent)
		arch.close()

		self.log.info("Successfully Saved to path: %s", wholePath)

		dedupState = processDownload.processDownload(file_data["title"], wholePath, deleteDups=True)
		if dedupState:
			self.addTags(sourceUrl=file_data["baseUrl"], tags=dedupState)

		self.updateDbEntry(file_data["baseUrl"], dlState=2, downloadPath=dlPath, fileName=fileN, originName=fileN)

		self.conn.commit()
		self.log.info( "Done")
Пример #13
0
	def xdcc_receive_finish(self):
		self.log.info("XDCC Transfer starting!")
		self.changeState("xdcc finished")


		dedupState = processDownload.processDownload(self.currentItem["seriesName"], self.currentItem["downloadPath"], deleteDups=True)
		self.log.info( "Done")

		self.db.addTags(dbId=self.currentItem["dbId"], tags=dedupState)
		if dedupState != "damaged":
			self.db.updateDbEntry(self.currentItem["sourceUrl"], dlState=2)
		else:
			self.db.updateDbEntry(self.currentItem["sourceUrl"], dlState=-10)
Пример #14
0
	def doDownload(self, linkDict, retag=False):

		images = self.fetchImages(linkDict)
		# images = ['wat']
		# print(linkDict)
		# self.log.info(len(content))

		if images:
			linkDict["chapterNo"] = float(linkDict["chapterNo"])
			fileN = '{series} - c{chapNo:06.1f} - {sourceName} [crunchyroll].zip'.format(series=linkDict['seriesName'], chapNo=linkDict["chapterNo"], sourceName=linkDict['originName'])
			fileN = nt.makeFilenameSafe(fileN)


			# self.log.info("geturl with processing", fileN)
			wholePath = os.path.join(linkDict["dirPath"], fileN)
			self.log.info("Complete filepath: %s", wholePath)

					#Write all downloaded files to the archive.
			arch = zipfile.ZipFile(wholePath, "w")
			for imageName, imageContent in images:
				arch.writestr(imageName, imageContent)
			arch.close()


			self.log.info("Successfully Saved to path: %s", wholePath)

			if not linkDict["tags"]:
				linkDict["tags"] = ""



			dedupState = processDownload.processDownload(linkDict["seriesName"], wholePath, deleteDups=True)
			self.log.info( "Done")


			if dedupState:
				self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)

			self.updateDbEntry(linkDict["sourceUrl"], dlState=2, downloadPath=linkDict["dirPath"], fileName=fileN, originName=fileN)

			self.conn.commit()
			return wholePath

		else:

			self.updateDbEntry(linkDict["sourceUrl"], dlState=-1, downloadPath="ERROR", fileName="ERROR: FAILED")

			self.conn.commit()
			return False
Пример #15
0
    def doDownload(self, seriesName, dlurl, chapter_name):

        row = self.getRowsByValue(sourceUrl=dlurl, limitByKey=False)
        if row and row[0]['dlState'] != 0:
            return

        if not row:
            self.insertIntoDb(retreivalTime=time.time(),
                              sourceUrl=dlurl,
                              originName=seriesName,
                              dlState=1,
                              seriesName=seriesName)

        fctnt, fname = self.wg.getFileAndName(dlurl)

        fileN = '{series} - {chap} [YoManga].zip'.format(series=seriesName,
                                                         chap=chapter_name)
        fileN = nt.makeFilenameSafe(fileN)

        dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)
        wholePath = os.path.join(dlPath, fileN)

        self.log.info("Source name: %s", fname)
        self.log.info("Generated name: %s", fileN)

        if newDir:
            self.updateDbEntry(dlurl, flags="haddir")
            self.conn.commit()

        with open(wholePath, "wb") as fp:
            fp.write(fctnt)

        self.log.info("Successfully Saved to path: %s", wholePath)

        dedupState = processDownload.processDownload(seriesName,
                                                     wholePath,
                                                     deleteDups=True)
        if dedupState:
            self.addTags(sourceUrl=dlurl, tags=dedupState)

        self.updateDbEntry(dlurl,
                           dlState=2,
                           downloadPath=dlPath,
                           fileName=fileN,
                           originName=fileN)

        self.conn.commit()
Пример #16
0
	def doDownload(self, seriesName, dlurl, chapter_name):


		row = self.getRowsByValue(sourceUrl=dlurl, limitByKey=False)
		if row and row[0]['dlState'] != 0:
			return

		if not row:
			self.insertIntoDb(retreivalTime = time.time(),
								sourceUrl   = dlurl,
								originName  = seriesName,
								dlState     = 1,
								seriesName  = seriesName)


		fctnt, fname = self.wg.getFileAndName(dlurl)


		fileN = '{series} - {chap} [YoManga].zip'.format(series=seriesName, chap=chapter_name)
		fileN = nt.makeFilenameSafe(fileN)

		dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)
		wholePath = os.path.join(dlPath, fileN)

		self.log.info("Source name: %s", fname)
		self.log.info("Generated name: %s", fileN)

		if newDir:
			self.updateDbEntry(dlurl, flags="haddir")
			self.conn.commit()

		with open(wholePath, "wb") as fp:
			fp.write(fctnt)

		self.log.info("Successfully Saved to path: %s", wholePath)


		dedupState = processDownload.processDownload(seriesName, wholePath, deleteDups=True)
		if dedupState:
			self.addTags(sourceUrl=dlurl, tags=dedupState)

		self.updateDbEntry(dlurl, dlState=2, downloadPath=dlPath, fileName=fileN, originName=fileN)

		self.conn.commit()
Пример #17
0
	def getLink(self, link):
		sourceUrl  = link["sourceUrl"]
		seriesName = link["seriesName"]
		chapterVol = link["originName"]


		try:
			self.log.info( "Should retreive url - %s", sourceUrl)
			self.updateDbEntry(sourceUrl, dlState=1)

			imageUrls = self.getImageUrls(sourceUrl)
			if not imageUrls:
				self.log.critical("Failure on retreiving content at %s", sourceUrl)
				self.log.critical("Page not found - 404")
				self.updateDbEntry(sourceUrl, dlState=-1)
				return

			self.log.info("Downloading = '%s', '%s'", seriesName, chapterVol)
			dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)

			if link["flags"] == None:
				link["flags"] = ""

			if newDir:
				self.updateDbEntry(sourceUrl, flags=" ".join([link["flags"], "haddir"]))
				self.conn.commit()

			chapterName = nt.makeFilenameSafe(chapterVol)

			fqFName = os.path.join(dlPath, chapterName+"[MangaCow].zip")

			loop = 1
			while os.path.exists(fqFName):
				fqFName, ext = os.path.splitext(fqFName)
				fqFName = "%s (%d)%s" % (fqFName, loop,  ext)
				loop += 1
			self.log.info("Saving to archive = %s", fqFName)

			images = []
			for imgUrl, referrerUrl in imageUrls:
				imageName, imageContent = self.getImage(imgUrl, referrerUrl)

				images.append([imageName, imageContent])

				if not runStatus.run:
					self.log.info( "Breaking due to exit flag being set")
					self.updateDbEntry(sourceUrl, dlState=0)
					return

			self.log.info("Creating archive with %s images", len(images))

			if not images:
				self.updateDbEntry(sourceUrl, dlState=-1, seriesName=seriesName, originName=chapterVol, tags="error-404")
				return

			#Write all downloaded files to the archive.
			arch = zipfile.ZipFile(fqFName, "w")
			for imageName, imageContent in images:
				arch.writestr(imageName, imageContent)
			arch.close()


			dedupState = processDownload.processDownload(seriesName, fqFName, deleteDups=True, includePHash=True, phashThresh=6)
			self.log.info( "Done")

			filePath, fileName = os.path.split(fqFName)
			self.updateDbEntry(sourceUrl, dlState=2, downloadPath=filePath, fileName=fileName, seriesName=seriesName, originName=chapterVol, tags=dedupState)
			return



		except Exception:
			self.log.critical("Failure on retreiving content at %s", sourceUrl)
			self.log.critical("Traceback = %s", traceback.format_exc())
			self.updateDbEntry(sourceUrl, dlState=-1)
Пример #18
0
    def doDownload(self, linkDict, link, retag=False):

        downloadUrl = self.getDownloadUrl(linkDict['dlPage'],
                                          linkDict["sourceUrl"])

        if downloadUrl:

            fCont, fName = self.wg.getFileAndName(downloadUrl)

            # self.log.info(len(content))
            if linkDict['originName'] in fName:
                fileN = fName
            else:
                fileN = '%s - %s.zip' % (linkDict['originName'], fName)
                fileN = fileN.replace('.zip .zip', '.zip')

            fileN = nt.makeFilenameSafe(fileN)

            chop = len(fileN) - 4

            wholePath = "ERROR"
            while 1:

                try:
                    fileN = fileN[:chop] + fileN[-4:]
                    # self.log.info("geturl with processing", fileN)
                    wholePath = os.path.join(linkDict["dirPath"], fileN)
                    wholePath = self.insertCountIfFilenameExists(wholePath)
                    self.log.info("Complete filepath: %s", wholePath)

                    #Write all downloaded files to the archive.
                    with open(wholePath, "wb") as fp:
                        fp.write(fCont)
                    fileN = os.path.split(wholePath)[-1]
                    self.log.info("Succesfully Saved to path: %s", wholePath)
                    break
                except IOError:
                    chop = chop - 1
                    self.log.warn("Truncating file length to %s characters.",
                                  chop)

            if not linkDict["tags"]:
                linkDict["tags"] = ""

            self.updateDbEntry(linkDict["sourceUrl"],
                               downloadPath=linkDict["dirPath"],
                               fileName=fileN)

            # Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
            dedupState = processDownload.processDownload(
                linkDict["seriesName"],
                wholePath,
                pron=True,
                rowId=link['dbId'])
            self.log.info("Done")

            if dedupState:
                self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)

            self.updateDbEntry(linkDict["sourceUrl"], dlState=2)

        else:

            self.updateDbEntry(linkDict["sourceUrl"],
                               dlState=-1,
                               downloadPath="ERROR",
                               fileName="ERROR: FAILED")
            return False
Пример #19
0
	def doDownload(self, linkDict, retag=False):

		images = self.fetchImages(linkDict)


		# self.log.info(len(content))

		if images:
			fileN = linkDict['originName']+".zip"
			fileN = nt.makeFilenameSafe(fileN)


			# self.log.info("geturl with processing", fileN)
			wholePath = os.path.join(linkDict["dirPath"], fileN)
			self.log.info("Complete filepath: %s", wholePath)

					#Write all downloaded files to the archive.


			chop = len(fileN)-4
			wholePath = "ERROR"
			while 1:

				try:
					fileN = fileN[:chop]+fileN[-4:]
					# self.log.info("geturl with processing", fileN)
					wholePath = os.path.join(linkDict["dirPath"], fileN)
					self.log.info("Complete filepath: %s", wholePath)

					#Write all downloaded files to the archive.

					arch = zipfile.ZipFile(wholePath, "w")
					for imageName, imageContent in images:
						arch.writestr(imageName, imageContent)
					arch.close()

					self.log.info("Successfully Saved to path: %s", wholePath)
					break
				except IOError:
					chop = chop - 1
					self.log.warn("Truncating file length to %s characters.", chop)




			if not linkDict["tags"]:
				linkDict["tags"] = ""



			self.updateDbEntry(linkDict["sourceUrl"], downloadPath=linkDict["dirPath"], fileName=fileN)


			# Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
			dedupState = processDownload.processDownload(linkDict["seriesName"], wholePath, pron=True)
			self.log.info( "Done")

			if dedupState:
				self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)


			self.updateDbEntry(linkDict["sourceUrl"], dlState=2)
			self.conn.commit()

			return wholePath

		else:

			self.updateDbEntry(linkDict["sourceUrl"], dlState=-1, downloadPath="ERROR", fileName="ERROR: FAILED")

			self.conn.commit()
			return False
Пример #20
0
	def getLink(self, link):


		sourceUrl  = link["sourceUrl"]
		print("Link", link)






		try:
			self.log.info( "Should retreive url - %s", sourceUrl)
			# self.updateDbEntry(sourceUrl, dlState=1)

			imageUrls, meta = self.getImageUrlsInfo(sourceUrl)

			self.updateDbEntry(sourceUrl, seriesName=meta['series_name'])
			seriesName = meta['series_name']

			link["originName"] = meta['rel_name']

			if not imageUrls:
				self.log.critical("Failure on retreiving content at %s", sourceUrl)
				self.log.critical("Page not found - 404")
				# self.updateDbEntry(sourceUrl, dlState=-1)
				return



			self.log.info("Downloading = '%s', '%s' ('%s images)", seriesName, link["originName"], len(imageUrls))
			dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)

			if link["flags"] == None:
				link["flags"] = ""

			if newDir:
				# self.updateDbEntry(sourceUrl, flags=" ".join([link["flags"], "haddir"]))
				self.conn.commit()

			chapterName = nt.makeFilenameSafe(link["originName"])

			fqFName = os.path.join(dlPath, chapterName+" [Comic Zenon].zip")

			loop = 1
			prefix, ext = os.path.splitext(fqFName)
			while os.path.exists(fqFName):
				fqFName = "%s (%d)%s" % (prefix, loop,  ext)
				loop += 1
			self.log.info("Saving to archive = %s", fqFName)

			images = []
			imgCnt = 1
			for imgUrl in imageUrls:
				imageName, imageContent = self.getImage(imgUrl, sourceUrl)
				imageName = "{num:03.0f} - {srcName}".format(num=imgCnt, srcName=imageName)
				imgCnt += 1
				images.append([imageName, imageContent])

				if not runStatus.run:
					self.log.info( "Breaking due to exit flag being set")
					# self.updateDbEntry(sourceUrl, dlState=0)
					return

			self.log.info("Creating archive with %s images", len(images))

			if not images:
				# self.updateDbEntry(sourceUrl, dlState=-1, tags="error-404")
				return

			#Write all downloaded files to the archive.
			arch = zipfile.ZipFile(fqFName, "w")
			for imageName, imageContent in images:
				arch.writestr(imageName, imageContent)
			arch.close()


			dedupState = processDownload.processDownload(seriesName, fqFName, deleteDups=True, includePHash=True)
			self.log.info( "Done")

			filePath, fileName = os.path.split(fqFName)
			# self.updateDbEntry(sourceUrl, dlState=2, downloadPath=filePath, fileName=fileName, tags=dedupState)
			return



		except Exception:
			self.log.critical("Failure on retreiving content at %s", sourceUrl)
			self.log.critical("Traceback = %s", traceback.format_exc())
			# self.updateDbEntry(sourceUrl, dlState=-1)
			raise
Пример #21
0
    def getLink(self, link):
        sourceUrl = link["sourceUrl"]

        try:
            self.log.info("Should retreive url - %s", sourceUrl)
            self.updateDbEntry(sourceUrl, dlState=1)

            seriesName, chapterVol, imageUrls = self.getContainerPages(
                sourceUrl)
            if not seriesName and not chapterVol and not imageUrls:
                self.log.critical("Failure on retreiving content at %s",
                                  sourceUrl)
                self.log.critical("Page not found - 404")
                self.updateDbEntry(sourceUrl, dlState=-1)
                return

            self.log.info("Downloading = '%s', '%s'", seriesName, chapterVol)
            dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)

            if link["flags"] == None:
                link["flags"] = ""

            if newDir:
                self.updateDbEntry(sourceUrl,
                                   flags=" ".join([link["flags"], "haddir"]))

            chapterNameRaw = " - ".join((seriesName, chapterVol))
            chapterName = nt.makeFilenameSafe(chapterNameRaw)

            fqFName = os.path.join(dlPath, chapterName + " [batoto].zip")

            loop = 1
            while os.path.exists(fqFName):
                fName = "%s [batoto] - (%d).zip" % (chapterName, loop)
                fqFName = os.path.join(dlPath, fName)
                loop += 1
            self.log.info("Saving to archive = %s", fqFName)

            images = []
            for imgUrl in imageUrls:
                self.log.info("Fetching content for item: %s", imgUrl)
                imageName, imageContent = self.getImage(
                    imgUrl, "http://bato.to/reader")

                images.append([imageName, imageContent])

                if not runStatus.run:
                    self.log.info("Breaking due to exit flag being set")
                    self.updateDbEntry(sourceUrl, dlState=0)
                    return

            self.log.info("Creating archive with %s images", len(images))

            if not images:
                self.updateDbEntry(sourceUrl,
                                   dlState=-1,
                                   seriesName=seriesName,
                                   originName=chapterNameRaw,
                                   tags="error-404")
                return

            #Write all downloaded files to the archive.
            arch = zipfile.ZipFile(fqFName, "w")
            for imageName, imageContent in images:
                arch.writestr(imageName, imageContent)
            arch.close()

            dedupState = processDownload.processDownload(seriesName,
                                                         fqFName,
                                                         deleteDups=True,
                                                         includePHash=False,
                                                         rowId=link['dbId'])
            self.log.info("Done")

            filePath, fileName = os.path.split(fqFName)
            self.updateDbEntry(sourceUrl,
                               dlState=2,
                               downloadPath=filePath,
                               fileName=fileName,
                               seriesName=seriesName,
                               originName=chapterNameRaw,
                               tags=dedupState)
            return

        except Exception:
            self.log.critical("Failure on retreiving content at %s", sourceUrl)
            self.log.critical("Traceback = %s", traceback.format_exc())
            self.updateDbEntry(sourceUrl, dlState=-1)
Пример #22
0
    def doDownload(self, linkDict):

        images = []
        containerUrl = linkDict["sourceUrl"] + "/read"

        if "http://www.fakku.net/videos/" in containerUrl:
            self.log.warning("Cannot download video items.")
            self.updateDbEntry(linkDict["sourceUrl"],
                               dlState=-5,
                               downloadPath="Video",
                               fileName="ERROR: Video",
                               lastUpdate=time.time())
            return False

        if "http://www.fakku.net/games/" in containerUrl:
            self.log.warning("Cannot download game items.")
            self.updateDbEntry(linkDict["sourceUrl"],
                               dlState=-6,
                               downloadPath="Game",
                               fileName="ERROR: Game",
                               lastUpdate=time.time())
            return False

        try:
            imagePage = self.wg.getpage(
                containerUrl, addlHeaders={'Referer': linkDict["sourceUrl"]})
        except urllib.error.URLError:
            self.log.warning("Failure to retreive base page!.")
            self.updateDbEntry(linkDict["sourceUrl"],
                               dlState=-1,
                               downloadPath="ERROR",
                               fileName="ERROR",
                               lastUpdate=time.time())
            return False

        if "This content has been disabled due to a DMCA takedown notice, it is no longer available to download or read online in your region." in imagePage:
            self.log.warning(
                "Assholes have DMCAed this item. Not available anymore.")
            self.updateDbEntry(linkDict["sourceUrl"],
                               dlState=-4,
                               downloadPath="DMCA",
                               fileName="ERROR: DMCAed",
                               lastUpdate=time.time())
            return False

        if "Content does not exist." in imagePage:
            self.log.warning("Page removed?.")
            self.updateDbEntry(linkDict["sourceUrl"],
                               dlState=-7,
                               downloadPath="REMOVED",
                               fileName="ERROR: File removed",
                               lastUpdate=time.time())
            return False

        # F**k you Fakku, don't include pay-content in your free gallery system.
        if "You must purchase this book in order to read it." in imagePage:
            self.log.warning("Page removed?.")
            self.updateDbEntry(linkDict["sourceUrl"],
                               dlState=-7,
                               downloadPath="REMOVED",
                               fileName="ERROR: Paywalled. ",
                               lastUpdate=time.time())
            return False

        # So...... Fakku's reader is completely javascript driven. No (easily) parseable shit here.
        # Therefore: WE DECEND TO THE LEVEL OF REGEXBOMINATIONS!
        pathFormatterRe = re.compile(
            r"return '(//t\.fakku\.net/images/.+/.+/.+?/images/)' \+ x \+ '(\.jpg|\.gif|\.png)';",
            re.IGNORECASE)

        # We need to know how many images there are, but there is no convenient way to access this information.
        # The fakku code internally uses the length of the thumbnail array for the number of images, so
        # we extract that array, parse it (since it's javascript, variables are JSON, after all), and
        # just look at the length ourselves as well.
        thumbsListRe = re.compile(r"window\.params\.thumbs = (\[.+?\]);",
                                  re.IGNORECASE)

        thumbs = thumbsListRe.search(imagePage)
        pathFormatter = pathFormatterRe.search(imagePage)

        if not thumbs:
            self.log.error("Could not find thumbnail array on page!")
            self.log.error("URL: '%s'", containerUrl)

        if not pathFormatter:
            self.log.error("Could not find pathformatter on page!")
            self.log.error("URL: '%s'", containerUrl)

        items = json.loads(thumbs.group(1))

        prefix, postfix = pathFormatter.group(1), pathFormatter.group(2)
        print("pathFormatter = ", prefix, prefix)

        imageUrls = []
        for x in range(len(items)):
            item = '{prefix}{num:03d}{postfix}'.format(
                prefix=pathFormatter.group(1),
                num=x + 1,
                postfix=pathFormatter.group(2))
            imageUrls.append(item)

        # print("Prepared image URLs = ")
        # print(imageUrls)

        # print(linkDict)

        images = []
        try:
            for imageUrl in imageUrls:

                imagePath = urllib.parse.urlsplit(imageUrl)[2]
                imageFileName = imagePath.split("/")[-1]
                if imageUrl.startswith("//"):
                    imageUrl = "https:" + imageUrl
                imageData = self.wg.getpage(
                    imageUrl, addlHeaders={'Referer': containerUrl})

                images.append((imageFileName, imageData))
                # Find next page
        except urllib.error.URLError:
            self.log.error("Failure retreiving item images.")
            self.updateDbEntry(linkDict["sourceUrl"],
                               dlState=-1,
                               downloadPath="ERROR",
                               fileName="ERROR: Could not retreive images!",
                               lastUpdate=time.time())

            self.conn.commit()
            return False

        # self.log.info(len(content))

        if images:
            fileN = linkDict["originName"] + ".zip"
            fileN = nt.makeFilenameSafe(fileN)

            # self.log.info("geturl with processing", fileN)
            wholePath = os.path.join(linkDict["dirPath"], fileN)
            self.log.info("Complete filepath: %s", wholePath)

            #Write all downloaded files to the archive.
            arch = zipfile.ZipFile(wholePath, "w")
            for imageName, imageContent in images:
                arch.writestr(imageName, imageContent)
            arch.close()

            self.log.info("Successfully Saved to path: %s", wholePath)

            if not linkDict["tags"]:
                linkDict["tags"] = ""

            self.updateDbEntry(linkDict["sourceUrl"],
                               downloadPath=linkDict["dirPath"],
                               fileName=fileN)

            # Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
            dedupState = processDownload.processDownload(None,
                                                         wholePath,
                                                         pron=True,
                                                         deleteDups=True,
                                                         includePHash=True)
            self.log.info("Done")

            if dedupState:
                self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)

            self.updateDbEntry(linkDict["sourceUrl"], dlState=2)
            self.conn.commit()

            return wholePath

        else:

            self.updateDbEntry(linkDict["sourceUrl"],
                               dlState=-1,
                               downloadPath="ERROR",
                               fileName="ERROR: FAILED",
                               lastUpdate=time.time())

            self.conn.commit()
            return False
Пример #23
0
    def doDownload(self, linkDict):

        images = []
        title = None
        nextPage = linkDict["dlLink"]

        while nextPage:
            gatewayPage = self.wg.getpage(
                nextPage, addlHeaders={'Referer': linkDict["sourceUrl"]})

            soup = bs4.BeautifulSoup(gatewayPage, "lxml")
            titleCont = soup.find("div", class_="image-menu")

            title = titleCont.h1.get_text()
            title = title.replace("Reading ", "")
            title, dummy = title.rsplit(" Page ", 1)
            title = title.strip()

            imageUrl = soup.find("img", class_="b")
            imageUrl = urllib.parse.urljoin(self.urlBase, imageUrl["src"])

            imagePath = urllib.parse.urlsplit(imageUrl)[2]
            imageFileName = imagePath.split("/")[-1]

            imageData = self.wg.getpage(imageUrl,
                                        addlHeaders={'Referer': nextPage})

            images.append((imageFileName, imageData))
            # Find next page
            nextPageLink = soup.find("a", class_="link-next")
            if not nextPageLink:
                nextPage = None
            elif nextPageLink["href"].startswith(
                    "/finish/"):  # Break on the last image.
                nextPage = None
            else:
                nextPage = urllib.parse.urljoin(self.urlBase,
                                                nextPageLink["href"])

        # self.log.info(len(content))

        if images and title:
            fileN = title + ".zip"
            fileN = nt.makeFilenameSafe(fileN)

            # self.log.info("geturl with processing", fileN)
            wholePath = os.path.join(linkDict["dirPath"], fileN)
            self.log.info("Complete filepath: %s", wholePath)

            #Write all downloaded files to the archive.
            try:
                arch = zipfile.ZipFile(wholePath, "w")
            except OSError:
                title = title.encode('ascii', 'ignore').decode('ascii')
                fileN = title + ".zip"
                fileN = nt.makeFilenameSafe(fileN)
                wholePath = os.path.join(linkDict["dirPath"], fileN)
                arch = zipfile.ZipFile(wholePath, "w")

            for imageName, imageContent in images:
                arch.writestr(imageName, imageContent)
            arch.close()

            self.log.info("Successfully Saved to path: %s", wholePath)

            self.updateDbEntry(linkDict["sourceUrl"],
                               downloadPath=linkDict["dirPath"],
                               fileName=fileN)

            # Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
            dedupState = processDownload.processDownload(None,
                                                         wholePath,
                                                         pron=True,
                                                         deleteDups=True,
                                                         includePHash=True)
            self.log.info("Done")

            if dedupState:
                self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)

            self.updateDbEntry(linkDict["sourceUrl"], dlState=2)
            self.conn.commit()

            return wholePath

        else:

            self.updateDbEntry(linkDict["sourceUrl"],
                               dlState=-1,
                               downloadPath="ERROR",
                               fileName="ERROR: FAILED")

            self.conn.commit()
            return False
Пример #24
0
	def getLink(self, link):


		sourceUrl  = link["sourceUrl"]
		print("Link", link)



		seriesName = link['seriesName']


		try:
			self.log.info( "Should retreive url - %s", sourceUrl)
			self.updateDbEntry(sourceUrl, dlState=1)

			seriesName = nt.getCanonicalMangaUpdatesName(seriesName)


			self.log.info("Downloading = '%s', '%s'", seriesName, link["originName"])
			dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)

			if link["flags"] == None:
				link["flags"] = ""

			if newDir:
				self.updateDbEntry(sourceUrl, flags=" ".join([link["flags"], "haddir"]))
				self.conn.commit()

			chapterName = nt.makeFilenameSafe(link["originName"])

			fqFName = os.path.join(dlPath, chapterName+" [MangaHere].zip")

			loop = 1
			prefix, ext = os.path.splitext(fqFName)
			while os.path.exists(fqFName):
				fqFName = "%s (%d)%s" % (prefix, loop,  ext)
				loop += 1
			self.log.info("Saving to archive = %s", fqFName)

			images = self.proceduralGetImages(sourceUrl)

			self.log.info("Creating archive with %s images", len(images))

			if not images:
				self.updateDbEntry(sourceUrl, dlState=-1, tags="error-404")
				return

			#Write all downloaded files to the archive.
			arch = zipfile.ZipFile(fqFName, "w")
			for imageName, imageContent in images:
				arch.writestr(imageName, imageContent)
			arch.close()


			dedupState = processDownload.processDownload(seriesName, fqFName, deleteDups=True, includePHash=True)
			self.log.info( "Done")

			filePath, fileName = os.path.split(fqFName)
			self.updateDbEntry(sourceUrl, dlState=2, downloadPath=filePath, fileName=fileName, tags=dedupState)
			return



		except Exception:
			self.log.critical("Failure on retreiving content at %s", sourceUrl)
			self.log.critical("Traceback = %s", traceback.format_exc())
			self.updateDbEntry(sourceUrl, dlState=-1)
			raise
Пример #25
0
	def getLink(self, link):


		seriesName = link["seriesName"]
		seriesName = seriesName.replace("[", "(").replace("]", "(")
		safeBaseName = nt.makeFilenameSafe(link["seriesName"])



		if seriesName in nt.dirNameProxy:
			self.log.info( "Have target dir for '%s' Dir = '%s'", seriesName, nt.dirNameProxy[seriesName]['fqPath'])
			link["targetDir"] = nt.dirNameProxy[seriesName]["fqPath"]
		else:
			self.log.info( "Don't have target dir for: %s Using default for: %s, full name = %s", seriesName, link["seriesName"], link["originName"])
			targetDir = os.path.join(settings.mkSettings["dirs"]['mDlDir'], safeBaseName)
			if not os.path.exists(targetDir):
				try:
					os.makedirs(targetDir)
					link["targetDir"] = targetDir
					self.updateDbEntry(link["sourceUrl"],flags=" ".join([link["flags"], "newdir"]))
					self.conn.commit()

					self.conn.commit()
				except OSError:
					self.log.critical("Directory creation failed?")
					self.log.critical(traceback.format_exc())
			else:
				self.log.warning("Directory not found in dir-dict, but it exists!")
				self.log.warning("Directory-Path: %s", targetDir)
				link["targetDir"] = targetDir

				self.updateDbEntry(link["sourceUrl"],flags=" ".join([link["flags"], "haddir"]))
				self.conn.commit()




		sourceUrl, originFileName = link["sourceUrl"], link["originName"]

		self.log.info( "Should retreive: %s, url - %s", originFileName, sourceUrl)

		self.updateDbEntry(sourceUrl, dlState=1)
		self.conn.commit()


		try:
			content, hName = self.getLinkFile(sourceUrl)
		except:
			self.log.error("Unrecoverable error retreiving content %s", link)
			self.log.error("Traceback: %s", traceback.format_exc())

			self.updateDbEntry(sourceUrl, dlState=-1)
			return

		# print("Content type = ", type(content))


		# And fix %xx crap
		hName = urllib.parse.unquote(hName)

		fName = "%s - %s" % (originFileName, hName)
		fName = nt.makeFilenameSafe(fName)

		fqFName = os.path.join(link["targetDir"], fName)
		self.log.info( "SaveName = %s", fqFName)


		loop = 1
		while os.path.exists(fqFName):
			fName = "%s - (%d) - %s" % (originFileName, loop,  hName)
			fqFName = os.path.join(link["targetDir"], fName)
			loop += 1
		self.log.info( "Writing file")

		filePath, fileName = os.path.split(fqFName)

		try:
			chop = len(fileName)-4

			wholePath = "ERROR"
			while 1:

				try:
					fileName = fileName[:chop]+fileName[-4:]
					# self.log.info("geturl with processing", fileName)
					wholePath = os.path.join(filePath, fileName)
					self.log.info("Complete filepath: %s", wholePath)

					#Write all downloaded files to the archive.
					with open(wholePath, "wb") as fp:
						fp.write(content)
					self.log.info("Successfully Saved to path: %s", wholePath)
					break
				except IOError:
					chop = chop - 1
					if chop < 200:
						raise RuntimeError("Don't know what's going on, but a file truncated too far!")
					self.log.warn("Truncating file length to %s characters.", chop)




		except TypeError:
			self.log.error("Failure trying to retreive content from source %s", sourceUrl)
			self.updateDbEntry(sourceUrl, dlState=-4, downloadPath=filePath, fileName=fileName)
			return
		#self.log.info( filePath)

		ext = os.path.splitext(fileName)[-1]
		imageExts = ["jpg", "png", "bmp"]
		if not any([ext.endswith(ex) for ex in imageExts]):
			# We don't want to upload the file we just downloaded, so specify doUpload as false.
			dedupState = processDownload.processDownload(False, fqFName, deleteDups=True, doUpload=False)
		else:
			dedupState = ""

		self.log.info( "Done")
		self.updateDbEntry(sourceUrl, dlState=2, downloadPath=filePath, fileName=fileName, tags=dedupState)
		return
Пример #26
0
    def getLink(self, link):

        sourceUrl = link["sourceUrl"]
        print("Link", link)

        seriesName = link['seriesName']

        try:
            self.log.info("Should retreive url - %s", sourceUrl)
            self.updateDbEntry(sourceUrl, dlState=1)

            imageUrls = self.getImageUrls(sourceUrl)
            if not imageUrls:
                self.log.critical("Failure on retreiving content at %s",
                                  sourceUrl)
                self.log.critical("Page not found - 404")
                self.updateDbEntry(sourceUrl, dlState=-1)
                return

            self.log.info("Downloading = '%s', '%s' ('%s images)", seriesName,
                          link["originName"], len(imageUrls))
            dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)

            if link["flags"] == None:
                link["flags"] = ""

            if newDir:
                self.updateDbEntry(sourceUrl,
                                   flags=" ".join([link["flags"], "haddir"]))

            chapterName = nt.makeFilenameSafe(link["originName"])

            fqFName = os.path.join(dlPath, chapterName + " [KissManga].zip")

            loop = 1
            prefix, ext = os.path.splitext(fqFName)
            while os.path.exists(fqFName):
                fqFName = "%s (%d)%s" % (prefix, loop, ext)
                loop += 1
            self.log.info("Saving to archive = %s", fqFName)

            images = []
            imgCnt = 1
            for imgUrl in imageUrls:
                imageName, imageContent = self.getImage(imgUrl, sourceUrl)
                imageName = "{num:03.0f} - {srcName}".format(num=imgCnt,
                                                             srcName=imageName)
                imgCnt += 1
                images.append([imageName, imageContent])

                if not runStatus.run:
                    self.log.info("Breaking due to exit flag being set")
                    self.updateDbEntry(sourceUrl, dlState=0)
                    return

            self.log.info("Creating archive with %s images", len(images))

            if not images:
                self.updateDbEntry(sourceUrl, dlState=-1, tags="error-404")
                return

            #Write all downloaded files to the archive.
            arch = zipfile.ZipFile(fqFName, "w")
            for imageName, imageContent in images:
                arch.writestr(imageName, imageContent)
            arch.close()

            dedupState = processDownload.processDownload(seriesName,
                                                         fqFName,
                                                         deleteDups=True,
                                                         includePHash=True,
                                                         rowId=link['dbId'])
            self.log.info("Done")

            filePath, fileName = os.path.split(fqFName)
            self.updateDbEntry(sourceUrl,
                               dlState=2,
                               downloadPath=filePath,
                               fileName=fileName,
                               tags=dedupState)
            return

        except SystemExit:
            print("SystemExit!")
            raise

        except Exception:
            self.log.critical("Failure on retreiving content at %s", sourceUrl)
            self.log.critical("Traceback = %s", traceback.format_exc())
            self.updateDbEntry(sourceUrl, dlState=-1)
Пример #27
0
    def doDownload(self, linkDict, link):

        contentUrl = urllib.parse.urljoin(
            self.urlBase, "zipf.php?token={token}&hash={hash}".format(
                token=linkDict["contentId"], hash=linkDict["dlToken"]))
        print("Fetching: ", contentUrl, " Referer ", linkDict["sourceUrl"])
        content, handle = self.wg.getpage(contentUrl,
                                          returnMultiple=True,
                                          addlHeaders={
                                              'Referer': linkDict["sourceUrl"],
                                              "Host": "doujins.com"
                                          })

        # self.log.info(len(content))

        if handle:
            # self.log.info("handle = ", handle)
            # self.log.info("geturl", handle.geturl())
            urlFileN = urllib.parse.unquote(
                urllib.parse.urlparse(handle.geturl())[2].split("/")[-1])
            urlFileN = bs4.UnicodeDammit(urlFileN).unicode_markup
            urlFileN.encode("utf-8")

            # DjMoe is apparently returning "zip.php" for ALL filenames.
            # Blargh
            if urlFileN == "zipf.php":
                urlFileN = ".zip"
                fileN = "%s%s" % (linkDict["originName"], urlFileN)
            else:
                self.log.error("Unknown file extension?")
                self.log.error("Unknown file extension?")
                self.log.error("Dict filename = %s", linkDict["originName"])
                self.log.error("URL filename = %s", urlFileN)
                fileN = "%s - %s" % (linkDict["originName"], urlFileN)

            fileN = nt.makeFilenameSafe(fileN)

            # self.log.info("geturl with processing", fileN)
            wholePath = os.path.join(linkDict["dirPath"], fileN)
            wholePath = self.insertCountIfFilenameExists(wholePath)
            self.log.info("Complete filepath: %s", wholePath)

            fp = open(wholePath, "wb")
            fp.write(content)
            fp.close()
            self.log.info("Successfully Saved to path: %s", wholePath)

            self.updateDbEntry(linkDict["contentId"],
                               downloadPath=linkDict["dirPath"],
                               fileName=fileN)

            # Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
            dedupState = processDownload.processDownload(None,
                                                         wholePath,
                                                         pron=True,
                                                         deleteDups=True,
                                                         includePHash=True,
                                                         rowId=link['dbId'])
            self.log.info("Done")

            if dedupState:
                self.addTags(sourceUrl=linkDict["contentId"], tags=dedupState)

            self.updateDbEntry(linkDict["contentId"], dlState=2)

            return wholePath

        else:

            self.updateDbEntry(linkDict["contentId"],
                               dlState=-1,
                               downloadPath="ERROR",
                               fileName="ERROR: FAILED")

            # cur.execute('UPDATE djmoe SET downloaded=1 WHERE contentID=?;', (linkDict["contentId"], ))
            # cur.execute('UPDATE djmoe SET dlPath=?, dlName=?, itemTags=?  WHERE contentID=?;', ("ERROR", 'ERROR: FAILED', "N/A", linkDict["contentId"]))
            # self.log.info("fetchall = ", cur.fetchall())
            return False
Пример #28
0
    def getLink(self, link):

        sourceUrl = link["sourceUrl"]
        print("Link", link)

        seriesName = link['seriesName']

        try:
            self.log.info("Should retreive url - %s", sourceUrl)
            self.updateDbEntry(sourceUrl, dlState=1)

            seriesName = nt.getCanonicalMangaUpdatesName(seriesName)

            self.log.info("Downloading = '%s', '%s'", seriesName,
                          link["originName"])
            dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)

            if link["flags"] == None:
                link["flags"] = ""

            if newDir:
                self.updateDbEntry(sourceUrl,
                                   flags=" ".join([link["flags"], "haddir"]))
                self.conn.commit()

            chapterName = nt.makeFilenameSafe(link["originName"])

            fqFName = os.path.join(dlPath, chapterName + " [MangaHere].zip")

            loop = 1
            prefix, ext = os.path.splitext(fqFName)
            while os.path.exists(fqFName):
                fqFName = "%s (%d)%s" % (prefix, loop, ext)
                loop += 1
            self.log.info("Saving to archive = %s", fqFName)

            images = self.proceduralGetImages(sourceUrl)

            self.log.info("Creating archive with %s images", len(images))

            if not images:
                self.updateDbEntry(sourceUrl, dlState=-1, tags="error-404")
                return

            #Write all downloaded files to the archive.
            arch = zipfile.ZipFile(fqFName, "w")
            for imageName, imageContent in images:
                arch.writestr(imageName, imageContent)
            arch.close()

            dedupState = processDownload.processDownload(seriesName,
                                                         fqFName,
                                                         deleteDups=True,
                                                         includePHash=True)
            self.log.info("Done")

            filePath, fileName = os.path.split(fqFName)
            self.updateDbEntry(sourceUrl,
                               dlState=2,
                               downloadPath=filePath,
                               fileName=fileName,
                               tags=dedupState)
            return

        except Exception:
            self.log.critical("Failure on retreiving content at %s", sourceUrl)
            self.log.critical("Traceback = %s", traceback.format_exc())
            self.updateDbEntry(sourceUrl, dlState=-1)
            raise
Пример #29
0
	def doDownload(self, linkDict):

		contentUrl = urllib.parse.urljoin(self.urlBase, "/zip.php?token=%s" % linkDict["contentId"])
		content, handle = self.wg.getpage(contentUrl, returnMultiple=True, addlHeaders={'Referer': linkDict["sourceUrl"]})

		# self.log.info(len(content))

		if handle:
			# self.log.info("handle = ", handle)
			# self.log.info("geturl", handle.geturl())
			urlFileN = urllib.parse.unquote(urllib.parse.urlparse(handle.geturl())[2].split("/")[-1])
			urlFileN = bs4.UnicodeDammit(urlFileN).unicode_markup
			urlFileN.encode("utf-8")




			# DjMoe is apparently returning "zip.php" for ALL filenames.
			# Blargh
			if urlFileN == "zip.php":
				urlFileN = ".zip"
				fileN = "%s%s" % (linkDict["originName"], urlFileN)
			else:
				self.log.error("Unknown file extension?")
				self.log.error("Unknown file extension?")
				self.log.error("Dict filename = %s", linkDict["originName"])
				self.log.error("URL filename = %s", urlFileN)
				fileN = "%s - %s" % (linkDict["originName"], urlFileN)

			fileN = nt.makeFilenameSafe(fileN)


			# self.log.info("geturl with processing", fileN)
			wholePath = os.path.join(linkDict["dirPath"], fileN)
			self.log.info("Complete filepath: %s", wholePath)

			fp = open(wholePath, "wb")
			fp.write(content)
			fp.close()
			self.log.info("Successfully Saved to path: %s", wholePath)


			self.updateDbEntry(linkDict["contentId"], downloadPath=linkDict["dirPath"], fileName=fileN)

			# Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
			dedupState = processDownload.processDownload(None, wholePath, pron=True, deleteDups=True)
			self.log.info( "Done")

			if dedupState:
				self.addTags(sourceUrl=linkDict["contentId"], tags=dedupState)

			self.updateDbEntry(linkDict["contentId"], dlState=2)
			self.conn.commit()





		else:

			self.updateDbEntry(linkDict["contentId"], dlState=-1, downloadPath="ERROR", fileName="ERROR: FAILED")

			# cur.execute('UPDATE djmoe SET downloaded=1 WHERE contentID=?;', (linkDict["contentId"], ))
			# cur.execute('UPDATE djmoe SET dlPath=?, dlName=?, itemTags=?  WHERE contentID=?;', ("ERROR", 'ERROR: FAILED', "N/A", linkDict["contentId"]))
			# self.log.info("fetchall = ", cur.fetchall())
			self.conn.commit()
Пример #30
0
	def doDownload(self, linkDict):


		images = []
		containerUrl = linkDict["sourceUrl"]+"/read"

		if "http://www.fakku.net/videos/" in containerUrl:
			self.log.warning("Cannot download video items.")
			self.updateDbEntry(linkDict["sourceUrl"], dlState=-5, downloadPath="Video", fileName="ERROR: Video", lastUpdate=time.time())
			return False

		if "http://www.fakku.net/games/" in containerUrl:
			self.log.warning("Cannot download game items.")
			self.updateDbEntry(linkDict["sourceUrl"], dlState=-6, downloadPath="Game", fileName="ERROR: Game", lastUpdate=time.time())
			return False

		try:
			imagePage = self.wg.getpage(containerUrl, addlHeaders={'Referer': linkDict["sourceUrl"]})
		except urllib.error.URLError:
			self.log.warning("Failure to retreive base page!.")
			self.updateDbEntry(linkDict["sourceUrl"], dlState=-1, downloadPath="ERROR", fileName="ERROR", lastUpdate=time.time())
			return False


		if "This content has been disabled due to a DMCA takedown notice, it is no longer available to download or read online in your region." in imagePage:
			self.log.warning("Assholes have DMCAed this item. Not available anymore.")
			self.updateDbEntry(linkDict["sourceUrl"], dlState=-4, downloadPath="DMCA", fileName="ERROR: DMCAed", lastUpdate=time.time())
			return False

		if "Content does not exist." in imagePage:
			self.log.warning("Page removed?.")
			self.updateDbEntry(linkDict["sourceUrl"], dlState=-7, downloadPath="REMOVED", fileName="ERROR: File removed", lastUpdate=time.time())
			return False

		# F**k you Fakku, don't include pay-content in your free gallery system.
		if "You must purchase this book in order to read it." in imagePage:
			self.log.warning("Page removed?.")
			self.updateDbEntry(linkDict["sourceUrl"], dlState=-7, downloadPath="REMOVED", fileName="ERROR: Paywalled. ", lastUpdate=time.time())
			return False


		# F**k you Fakku, don't include pay-content in your free gallery system.
		if "Enter your account information below." in imagePage:
			self.log.warning("Subscription bullshit?.")
			self.updateDbEntry(linkDict["sourceUrl"], dlState=-7, downloadPath="REMOVED", fileName="ERROR: Paywalled. ", lastUpdate=time.time())
			return False

		# So...... Fakku's reader is completely javascript driven. No (easily) parseable shit here.
		# Therefore: WE DECEND TO THE LEVEL OF REGEXBOMINATIONS!
		pathFormatterRe = re.compile(r"return '(https://t\.fakku\.net/images/.+/.+/.+?/images/)' \+ x \+ '(\.jpg|\.gif|\.png)';", re.IGNORECASE)

		# We need to know how many images there are, but there is no convenient way to access this information.
		# The fakku code internally uses the length of the thumbnail array for the number of images, so
		# we extract that array, parse it (since it's javascript, variables are JSON, after all), and
		# just look at the length ourselves as well.
		thumbsListRe    = re.compile(r"window\.params\.thumbs = (\[.+?\]);", re.IGNORECASE)

		thumbs        = thumbsListRe.search(imagePage)
		pathFormatter = pathFormatterRe.search(imagePage)


		if not thumbs:
			self.log.error("Could not find thumbnail array on page!")
			self.log.error("URL: '%s'", containerUrl)

		if not pathFormatter:
			self.log.error("Could not find pathformatter on page!")
			self.log.error("URL: '%s'", containerUrl)

		items = json.loads(thumbs.group(1))

		prefix, postfix = pathFormatter.group(1), pathFormatter.group(2)
		print("pathFormatter = ", prefix, prefix)


		imageUrls = []
		for x in range(len(items)):
			item = '{prefix}{num:03d}{postfix}'.format(prefix=pathFormatter.group(1), num=x+1, postfix=pathFormatter.group(2))
			imageUrls.append(item)

		# print("Prepared image URLs = ")
		# print(imageUrls)

		# print(linkDict)

		images = []
		try:
			for imageUrl in imageUrls:

				imagePath = urllib.parse.urlsplit(imageUrl)[2]
				imageFileName = imagePath.split("/")[-1]
				if imageUrl.startswith("//"):
					imageUrl = "https:" + imageUrl
				imageData = self.wg.getpage(imageUrl, addlHeaders={'Referer': containerUrl})

				images.append((imageFileName, imageData))
				# Find next page
		except urllib.error.URLError:
			self.log.error("Failure retreiving item images.")
			self.updateDbEntry(linkDict["sourceUrl"], dlState=-1, downloadPath="ERROR", fileName="ERROR: Could not retreive images!", lastUpdate=time.time())

			self.conn.commit()
			return False


		# self.log.info(len(content))

		if images:
			fileN = linkDict["originName"]+".zip"
			fileN = nt.makeFilenameSafe(fileN)


			# self.log.info("geturl with processing", fileN)
			wholePath = os.path.join(linkDict["dirPath"], fileN)
			self.log.info("Complete filepath: %s", wholePath)

					#Write all downloaded files to the archive.
			arch = zipfile.ZipFile(wholePath, "w")
			for imageName, imageContent in images:
				arch.writestr(imageName, imageContent)
			arch.close()


			self.log.info("Successfully Saved to path: %s", wholePath)

			if not linkDict["tags"]:
				linkDict["tags"] = ""

			self.updateDbEntry(linkDict["sourceUrl"], downloadPath=linkDict["dirPath"], fileName=fileN)


			# Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
			dedupState = processDownload.processDownload(None, wholePath, pron=True, deleteDups=True, includePHash=True)
			self.log.info( "Done")

			if dedupState:
				self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)


			self.updateDbEntry(linkDict["sourceUrl"], dlState=2)
			self.conn.commit()


			return wholePath

		else:

			self.updateDbEntry(linkDict["sourceUrl"], dlState=-1, downloadPath="ERROR", fileName="ERROR: FAILED", lastUpdate=time.time())

			self.conn.commit()
			return False
Пример #31
0
	def getLink(self, link):
		sourceUrl  = link["sourceUrl"]
		seriesName = link["seriesName"]
		chapterVol = link["originName"]

		sourceUrl = sourceUrl.encode("ascii").decode('ascii')

		# print("Item:", link)
		try:
			self.log.info( "Should retreive url - %s", sourceUrl)
			self.updateDbEntry(sourceUrl, dlState=1)

			chapterVol, imageUrls = self.getImageUrls(sourceUrl)
			if not imageUrls:
				self.log.critical("Failure on retreiving content at %s", sourceUrl)
				self.log.critical("No images found on page!")
				self.updateDbEntry(sourceUrl, dlState=-1)
				return


			self.log.info("Downloading = '%s', '%s'", seriesName, chapterVol)
			dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)

			if link["flags"] == None:
				link["flags"] = ""

			if newDir:
				self.updateDbEntry(sourceUrl, flags=" ".join([link["flags"], "haddir"]), originName=chapterVol)
				# self.conn.commit()
			self.updateDbEntry(sourceUrl, originName=chapterVol)

			chapterName = nt.makeFilenameSafe(chapterVol)

			fqFName = os.path.join(dlPath, chapterName+"[Sura's Place].zip")

			loop = 1
			while os.path.exists(fqFName):
				fqFName, ext = os.path.splitext(fqFName)
				fqFName = "%s (%d)%s" % (fqFName, loop,  ext)
				loop += 1
			self.log.info("Saving to archive = %s", fqFName)

			images = []
			imgCnt = 1

			for imgUrl, referrerUrl in imageUrls:
				imageName, imageContent = self.getImage(imgUrl, referrerUrl)
				imageName = "{num:03.0f} - {srcName}".format(num=imgCnt, srcName=imageName)
				imgCnt += 1

				images.append([imageName, imageContent])

				if not runStatus.run:
					self.log.info( "Breaking due to exit flag being set")
					self.updateDbEntry(sourceUrl, dlState=0)
					return

			self.log.info("Creating archive with %s images", len(images))

			if not images:
				self.updateDbEntry(sourceUrl, dlState=-1, seriesName=seriesName, originName=chapterVol, tags="error-404")
				return

			#Write all downloaded files to the archive.
			arch = zipfile.ZipFile(fqFName, "w")
			for imageName, imageContent in images:
				arch.writestr(imageName, imageContent)
			arch.close()


			dedupState = processDownload.processDownload(seriesName, fqFName, deleteDups=True, includePHash=True)
			self.log.info( "Done")

			filePath, fileName = os.path.split(fqFName)
			self.updateDbEntry(sourceUrl, dlState=2, downloadPath=filePath, fileName=fileName, seriesName=seriesName, originName=chapterVol, tags=dedupState)
			return



		except Exception:
			self.log.critical("Failure on retreiving content at %s", sourceUrl)
			self.log.critical("Traceback = %s", traceback.format_exc())
			self.updateDbEntry(sourceUrl, dlState=-1)
Пример #32
0
    def doDownload(self, linkDict, link, retag=False):

        images = self.fetchImages(linkDict)

        # self.log.info(len(content))

        if images:
            fileN = linkDict['originName'] + ".zip"
            fileN = nt.makeFilenameSafe(fileN)

            # self.log.info("geturl with processing", fileN)
            wholePath = os.path.join(linkDict["dirPath"], fileN)
            self.log.info("Complete filepath: %s", wholePath)

            #Write all downloaded files to the archive.

            chop = len(fileN) - 4
            wholePath = "ERROR"
            while 1:

                try:
                    fileN = fileN[:chop] + fileN[-4:]
                    # self.log.info("geturl with processing", fileN)
                    wholePath = os.path.join(linkDict["dirPath"], fileN)
                    wholePath = self.insertCountIfFilenameExists(wholePath)
                    self.log.info("Complete filepath: %s", wholePath)

                    #Write all downloaded files to the archive.

                    arch = zipfile.ZipFile(wholePath, "w")
                    for imageName, imageContent in images:
                        arch.writestr(imageName, imageContent)
                    arch.close()

                    self.log.info("Successfully Saved to path: %s", wholePath)
                    break
                except IOError:
                    chop = chop - 1
                    self.log.warn("Truncating file length to %s characters.",
                                  chop)

            if not linkDict["tags"]:
                linkDict["tags"] = ""

            self.updateDbEntry(linkDict["sourceUrl"],
                               downloadPath=linkDict["dirPath"],
                               fileName=fileN)

            # Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
            dedupState = processDownload.processDownload(
                linkDict["seriesName"],
                wholePath,
                pron=True,
                rowId=link['dbId'])
            self.log.info("Done")

            if dedupState:
                self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)

            self.updateDbEntry(linkDict["sourceUrl"], dlState=2)

            return wholePath

        else:

            self.updateDbEntry(linkDict["sourceUrl"],
                               dlState=-1,
                               downloadPath="ERROR",
                               fileName="ERROR: FAILED")

            return False
Пример #33
0
	def getLink(self, link):


		seriesName = link["seriesName"]
		seriesName = seriesName.replace("[", "(").replace("]", "(")
		safeBaseName = nt.makeFilenameSafe(link["seriesName"])



		if seriesName in nt.dirNameProxy:
			self.log.info( "Have target dir for '%s' Dir = '%s'", seriesName, nt.dirNameProxy[seriesName]['fqPath'])
			link["targetDir"] = nt.dirNameProxy[seriesName]["fqPath"]
		else:
			self.log.info( "Don't have target dir for: %s Using default for: %s, full name = %s", seriesName, link["seriesName"], link["originName"])
			targetDir = os.path.join(settings.jzSettings["dirs"]['mDlDir'], safeBaseName)
			if not os.path.exists(targetDir):
				try:
					os.makedirs(targetDir)
					link["targetDir"] = targetDir
					self.updateDbEntry(link["sourceUrl"],flags=" ".join([link["flags"], "newdir"]))
					self.conn.commit()

					self.conn.commit()
				except OSError:
					self.log.critical("Directory creation failed?")
					self.log.critical(traceback.format_exc())
			else:
				self.log.warning("Directory not found in dir-dict, but it exists!")
				self.log.warning("Directory-Path: %s", targetDir)
				link["targetDir"] = targetDir

				self.updateDbEntry(link["sourceUrl"],flags=" ".join([link["flags"], "haddir"]))
				self.conn.commit()




		sourceUrl, originFileName = link["sourceUrl"], link["originName"]

		self.log.info( "Should retreive: %s, url - %s", originFileName, sourceUrl)

		self.updateDbEntry(sourceUrl, dlState=1)
		self.conn.commit()


		try:
			content, hName = self.getLinkFile(sourceUrl)
		except:
			self.log.error("Unrecoverable error retreiving content %s", link)
			self.log.error("Traceback: %s", traceback.format_exc())

			self.updateDbEntry(sourceUrl, dlState=-1)
			return

		# print("Content type = ", type(content))


		# And fix %xx crap
		hName = urllib.parse.unquote(hName)

		fName = "%s - %s" % (originFileName, hName)
		fName = nt.makeFilenameSafe(fName)

		fqFName = os.path.join(link["targetDir"], fName)
		self.log.info( "SaveName = %s", fqFName)


		loop = 1
		while os.path.exists(fqFName):
			fName = "%s - (%d) - %s" % (originFileName, loop,  hName)
			fqFName = os.path.join(link["targetDir"], fName)
			loop += 1
		self.log.info( "Writing file")

		filePath, fileName = os.path.split(fqFName)

		try:
			with open(fqFName, "wb") as fp:
				fp.write(content)
		except TypeError:
			self.log.error("Failure trying to retreive content from source %s", sourceUrl)
			self.updateDbEntry(sourceUrl, dlState=-4, downloadPath=filePath, fileName=fileName)
			return
		#self.log.info( filePath)

		ext = os.path.splitext(fileName)[-1]
		imageExts = ["jpg", "png", "bmp"]
		if not any([ext.endswith(ex) for ex in imageExts]):
			dedupState = processDownload.processDownload(False, fqFName, deleteDups=True)
		else:
			dedupState = ""

		self.log.info( "Done")
		self.updateDbEntry(sourceUrl, dlState=2, downloadPath=filePath, fileName=fileName, tags=dedupState)
		return
Пример #34
0
    def getLink(self, link):
        sourceUrl, originFileName = link["sourceUrl"], link["originName"]

        self.log.info("Should retreive: %s, url - %s", originFileName,
                      sourceUrl)

        self.updateDbEntry(sourceUrl, dlState=1)
        self.conn.commit()

        fileUrl = self.getDownloadUrl(sourceUrl)
        if fileUrl is None:
            self.log.warning("Could not find url!")
            self.deleteRowsByValue(sourceUrl=sourceUrl)
            return

        try:
            content, hName = self.getLinkFile(fileUrl, sourceUrl)
        except:
            self.log.error("Unrecoverable error retreiving content %s", link)
            self.log.error("Traceback: %s", traceback.format_exc())

            self.updateDbEntry(sourceUrl, dlState=-1)
            return

        # print("Content type = ", type(content))

        # And fix %xx crap
        hName = urllib.parse.unquote(hName)

        fName = "%s - %s" % (originFileName, hName)
        fName = nt.makeFilenameSafe(fName)

        fqFName = os.path.join(link["targetDir"], fName)
        self.log.info("SaveName = %s", fqFName)

        loop = 1
        while os.path.exists(fqFName):
            fName = "%s - (%d) - %s" % (originFileName, loop, hName)
            fqFName = os.path.join(link["targetDir"], fName)
            loop += 1
        self.log.info("Writing file")

        filePath, fileName = os.path.split(fqFName)

        try:
            with open(fqFName, "wb") as fp:
                fp.write(content)
        except TypeError:
            self.log.error("Failure trying to retreive content from source %s",
                           sourceUrl)
            return
        #self.log.info( filePath)

        dedupState = processDownload.processDownload(link["seriesName"],
                                                     fqFName,
                                                     deleteDups=True,
                                                     includePHash=True)
        self.log.info("Done")

        self.updateDbEntry(sourceUrl,
                           dlState=2,
                           downloadPath=filePath,
                           fileName=fileName,
                           tags=dedupState)
        return
Пример #35
0
    def getLink(self, link):
        sourceUrl = link["sourceUrl"]
        seriesName = link["seriesName"]
        originFileName = link["originName"]

        self.updateDbEntry(sourceUrl, dlState=1)
        self.log.info("Downloading = '%s', '%s'", seriesName, originFileName)
        dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)

        if link["flags"] == None:
            link["flags"] = ""

        if newDir:
            self.updateDbEntry(sourceUrl,
                               flags=" ".join([link["flags"], "haddir"]))
            self.conn.commit()

        try:
            content, headerName = self.getLinkFile(sourceUrl)
        except:
            self.log.error("Unrecoverable error retreiving content %s", link)
            self.log.error("Traceback: %s", traceback.format_exc())

            self.updateDbEntry(sourceUrl, dlState=-1)
            return

        headerName = urllib.parse.unquote(headerName)

        fName = "%s - %s" % (originFileName, headerName)
        fName = nt.makeFilenameSafe(fName)

        fName, ext = os.path.splitext(fName)
        fName = "%s [CXC Scans]%s" % (fName, ext)

        fqFName = os.path.join(dlPath, fName)
        self.log.info("SaveName = %s", fqFName)

        loop = 1
        while os.path.exists(fqFName):
            fName, ext = os.path.splitext(fName)
            fName = "%s (%d)%s" % (fName, loop, ext)
            fqFName = os.path.join(link["targetDir"], fName)
            loop += 1
        self.log.info("Writing file")

        filePath, fileName = os.path.split(fqFName)

        try:
            with open(fqFName, "wb") as fp:
                fp.write(content)
        except TypeError:
            self.log.error("Failure trying to retreive content from source %s",
                           sourceUrl)
            self.updateDbEntry(sourceUrl,
                               dlState=-4,
                               downloadPath=filePath,
                               fileName=fileName)
            return
        #self.log.info( filePath)

        dedupState = processDownload.processDownload(seriesName,
                                                     fqFName,
                                                     deleteDups=True)

        self.log.info("Done")
        self.updateDbEntry(sourceUrl,
                           dlState=2,
                           downloadPath=filePath,
                           fileName=fileName,
                           tags=dedupState)
        return
Пример #36
0
    def getLink(self, link):

        seriesName = link["seriesName"]
        seriesName = seriesName.replace("[", "(").replace("]", "(")
        safeBaseName = nt.makeFilenameSafe(link["seriesName"])

        if seriesName in nt.dirNameProxy:
            self.log.info("Have target dir for '%s' Dir = '%s'", seriesName,
                          nt.dirNameProxy[seriesName]['fqPath'])
            link["targetDir"] = nt.dirNameProxy[seriesName]["fqPath"]
        else:
            self.log.info(
                "Don't have target dir for: %s Using default for: %s, full name = %s",
                seriesName, link["seriesName"], link["originName"])
            targetDir = os.path.join(settings.jzSettings["dirs"]['mDlDir'],
                                     safeBaseName)
            if not os.path.exists(targetDir):
                try:
                    os.makedirs(targetDir)
                    link["targetDir"] = targetDir
                    self.updateDbEntry(link["sourceUrl"],
                                       flags=" ".join(
                                           [link["flags"], "newdir"]))
                    self.conn.commit()

                    self.conn.commit()
                except OSError:
                    self.log.critical("Directory creation failed?")
                    self.log.critical(traceback.format_exc())
            else:
                self.log.warning(
                    "Directory not found in dir-dict, but it exists!")
                self.log.warning("Directory-Path: %s", targetDir)
                link["targetDir"] = targetDir

                self.updateDbEntry(link["sourceUrl"],
                                   flags=" ".join([link["flags"], "haddir"]))
                self.conn.commit()

        sourceUrl, originFileName = link["sourceUrl"], link["originName"]

        self.log.info("Should retreive: %s, url - %s", originFileName,
                      sourceUrl)

        self.updateDbEntry(sourceUrl, dlState=1)
        self.conn.commit()

        try:
            content, hName = self.getLinkFile(sourceUrl)
        except:
            self.log.error("Unrecoverable error retreiving content %s", link)
            self.log.error("Traceback: %s", traceback.format_exc())

            self.updateDbEntry(sourceUrl, dlState=-1)
            return

        # print("Content type = ", type(content))

        # And fix %xx crap
        hName = urllib.parse.unquote(hName)

        fName = "%s - %s" % (originFileName, hName)
        fName = nt.makeFilenameSafe(fName)

        fqFName = os.path.join(link["targetDir"], fName)
        self.log.info("SaveName = %s", fqFName)

        loop = 1
        while os.path.exists(fqFName):
            fName = "%s - (%d) - %s" % (originFileName, loop, hName)
            fqFName = os.path.join(link["targetDir"], fName)
            loop += 1
        self.log.info("Writing file")

        filePath, fileName = os.path.split(fqFName)

        try:
            with open(fqFName, "wb") as fp:
                fp.write(content)
        except TypeError:
            self.log.error("Failure trying to retreive content from source %s",
                           sourceUrl)
            self.updateDbEntry(sourceUrl,
                               dlState=-4,
                               downloadPath=filePath,
                               fileName=fileName)
            return
        #self.log.info( filePath)

        ext = os.path.splitext(fileName)[-1]
        imageExts = ["jpg", "png", "bmp"]
        if not any([ext.endswith(ex) for ex in imageExts]):
            dedupState = processDownload.processDownload(False,
                                                         fqFName,
                                                         deleteDups=True)
        else:
            dedupState = ""

        self.log.info("Done")
        self.updateDbEntry(sourceUrl,
                           dlState=2,
                           downloadPath=filePath,
                           fileName=fileName,
                           tags=dedupState)
        return
Пример #37
0
	def getLink(self, link):
		sourceUrl  = link["sourceUrl"]
		seriesName = link["seriesName"]
		chapterVol = link["originName"]


		try:
			self.log.info( "Should retreive url - %s", sourceUrl)
			self.updateDbEntry(sourceUrl, dlState=1)

			imageUrls = self.getImageUrls(sourceUrl)
			if not imageUrls:
				self.log.critical("Failure on retreiving content at %s", sourceUrl)
				self.log.critical("Page not found - 404")
				self.updateDbEntry(sourceUrl, dlState=-1)
				return



			self.log.info("Downloading = '%s', '%s' ('%s images)", seriesName, chapterVol, len(imageUrls))
			dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)

			if link["flags"] == None:
				link["flags"] = ""

			if newDir:
				self.updateDbEntry(sourceUrl, flags=" ".join([link["flags"], "haddir"]))
				self.conn.commit()

			chapterName = nt.makeFilenameSafe(chapterVol)

			fqFName = os.path.join(dlPath, chapterName+"["+self.groupName+"].zip")

			loop = 1
			while os.path.exists(fqFName):
				fqFName, ext = os.path.splitext(fqFName)
				fqFName = "%s (%d)%s" % (fqFName, loop,  ext)
				loop += 1
			self.log.info("Saving to archive = %s", fqFName)

			images = []
			for imageName, imgUrl, referrerUrl in imageUrls:
				dummy_imageName, imageContent = self.getImage(imgUrl, referrerUrl)
				images.append([imageName, imageContent])

				if not runStatus.run:
					self.log.info( "Breaking due to exit flag being set")
					self.updateDbEntry(sourceUrl, dlState=0)
					return

			self.log.info("Creating archive with %s images", len(images))

			if not images:
				self.updateDbEntry(sourceUrl, dlState=-1, seriesName=seriesName, originName=chapterVol, tags="error-404")
				return

			#Write all downloaded files to the archive.
			arch = zipfile.ZipFile(fqFName, "w")
			for imageName, imageContent in images:
				arch.writestr(imageName, imageContent)
			arch.close()


			filePath, fileName = os.path.split(fqFName)
			self.updateDbEntry(sourceUrl, downloadPath=filePath, fileName=fileName)

			dedupState = processDownload.processDownload(seriesName, fqFName, deleteDups=True)
			self.log.info( "Done")

			self.updateDbEntry(sourceUrl, dlState=2, downloadPath=filePath, fileName=fileName, seriesName=seriesName, originName=chapterVol, tags=dedupState)
			return



		except Exception:
			self.log.critical("Failure on retreiving content at %s", sourceUrl)
			self.log.critical("Traceback = %s", traceback.format_exc())
			self.updateDbEntry(sourceUrl, dlState=-1)
Пример #38
0
	def getLink(self, link):

		seriesName = link["seriesName"]
		seriesName = seriesName.replace("[", "(").replace("]", "(")
		safeBaseName = nt.makeFilenameSafe(link["seriesName"])

		if seriesName in nt.dirNameProxy:
			self.log.info( "Have target dir for '%s' Dir = '%s'", seriesName, nt.dirNameProxy[seriesName]['fqPath'])
			link["targetDir"] = nt.dirNameProxy[seriesName]["fqPath"]
		else:
			self.log.info( "Don't have target dir for: %s Using default for: %s, full name = %s", seriesName, link["seriesName"], link["originName"])
			targetDir = os.path.join(settings.mkSettings["dirs"]['mDlDir'], safeBaseName)
			if not os.path.exists(targetDir):
				try:
					os.makedirs(targetDir)
					link["targetDir"] = targetDir
					self.updateDbEntry(link["sourceUrl"],flags=" ".join([link["flags"], "newdir"]))

				except OSError:
					self.log.critical("Directory creation failed?")
					self.log.critical(traceback.format_exc())
			else:
				self.log.warning("Directory not found in dir-dict, but it exists!")
				self.log.warning("Directory-Path: %s", targetDir)
				link["targetDir"] = targetDir

				self.updateDbEntry(link["sourceUrl"],flags=" ".join([link["flags"], "haddir"]))

		sourceUrl, originFileName = link["sourceUrl"], link["originName"]

		self.log.info( "Should retreive: %s, url - %s", originFileName, sourceUrl)

		self.updateDbEntry(sourceUrl, dlState=1)


		try:
			content, hName = self.getLinkFile(sourceUrl)
		except:
			self.log.error("Unrecoverable error retreiving content %s", link)
			self.log.error("Traceback: %s", traceback.format_exc())

			self.updateDbEntry(sourceUrl, dlState=-1)
			return

		# print("Content type = ", type(content))


		# And fix %xx crap
		hName = urllib.parse.unquote(hName)

		fName = "%s - %s" % (originFileName, hName)
		fName = nt.makeFilenameSafe(fName)

		fqFName = os.path.join(link["targetDir"], fName)
		self.log.info( "SaveName = %s", fqFName)


		loop = 1
		while os.path.exists(fqFName):
			fName = "%s - (%d) - %s" % (originFileName, loop,  hName)
			fqFName = os.path.join(link["targetDir"], fName)
			loop += 1
		self.log.info( "Writing file")

		filePath, fileName = os.path.split(fqFName)

		try:
			chop = len(fileName) - 4

			wholePath = "ERROR"
			while 1:

				try:
					fileName = fileName[:chop]+fileName[-4:]
					# self.log.info("geturl with processing", fileName)
					wholePath = os.path.join(filePath, fileName)
					self.log.info("Complete filepath: %s", wholePath)

					#Write all downloaded files to the archive.
					with open(wholePath, "wb") as fp:
						fp.write(content)
					self.log.info("Successfully Saved to path: %s", wholePath)
					break
				except IOError:
					chop = chop - 1
					if chop < 200:
						raise RuntimeError("Don't know what's going on, but a file truncated too far!")
					self.log.warn("Truncating file length to %s characters.", chop)




		except TypeError:
			self.log.error("Failure trying to retreive content from source %s", sourceUrl)
			self.updateDbEntry(sourceUrl, dlState=-4, downloadPath=filePath, fileName=fileName)
			return
		#self.log.info( filePath)

		ext = os.path.splitext(fileName)[-1]
		imageExts = ["jpg", "png", "bmp"]
		if not any([ext.endswith(ex) for ex in imageExts]):
			# We don't want to upload the file we just downloaded, so specify doUpload as false.
			dedupState = processDownload.processDownload(False, fqFName, deleteDups=True, doUpload=False, rowId=link['dbId'])
		else:
			dedupState = ""

		self.log.info( "Done")
		self.updateDbEntry(sourceUrl, dlState=2, downloadPath=filePath, fileName=fileName, tags=dedupState)
		return
Пример #39
0
	def getLink(self, link):
		sourceUrl = link["sourceUrl"]


		try:
			self.log.info( "Should retreive url - %s", sourceUrl)
			self.updateDbEntry(sourceUrl, dlState=1)

			seriesName, chapterVol, imageUrls = self.getContainerPages(sourceUrl)
			if not seriesName and not chapterVol and not imageUrls:
				self.log.critical("Failure on retreiving content at %s", sourceUrl)
				self.log.critical("Page not found - 404")
				self.updateDbEntry(sourceUrl, dlState=-1)
				return

			self.log.info("Downloading = '%s', '%s'", seriesName, chapterVol)
			dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)

			if link["flags"] == None:
				link["flags"] = ""

			if newDir:
				self.updateDbEntry(sourceUrl, flags=" ".join([link["flags"], "haddir"]))
				self.conn.commit()

			chapterNameRaw = " - ".join((seriesName, chapterVol))
			chapterName = nt.makeFilenameSafe(chapterNameRaw)

			fqFName = os.path.join(dlPath, chapterName+" [batoto].zip")

			loop = 1
			while os.path.exists(fqFName):
				fName = "%s - (%d).zip" % (chapterName, loop)
				fqFName = os.path.join(dlPath, fName)
				loop += 1
			self.log.info("Saving to archive = %s", fqFName)

			images = []
			for imgUrl in imageUrls:
				self.log.info("Fetching content for item: %s", imgUrl)
				imageName, imageContent = self.getImage(imgUrl, "http://bato.to/reader")

				images.append([imageName, imageContent])

				if not runStatus.run:
					self.log.info( "Breaking due to exit flag being set")
					self.updateDbEntry(sourceUrl, dlState=0)
					return

			self.log.info("Creating archive with %s images", len(images))

			if not images:
				self.updateDbEntry(sourceUrl, dlState=-1, seriesName=seriesName, originName=chapterNameRaw, tags="error-404")
				return

			#Write all downloaded files to the archive.
			arch = zipfile.ZipFile(fqFName, "w")
			for imageName, imageContent in images:
				arch.writestr(imageName, imageContent)
			arch.close()


			dedupState = processDownload.processDownload(seriesName, fqFName, deleteDups=True, includePHash=False)
			self.log.info( "Done")

			filePath, fileName = os.path.split(fqFName)
			self.updateDbEntry(sourceUrl, dlState=2, downloadPath=filePath, fileName=fileName, seriesName=seriesName, originName=chapterNameRaw, tags=dedupState)
			return



		except Exception:
			self.log.critical("Failure on retreiving content at %s", sourceUrl)
			self.log.critical("Traceback = %s", traceback.format_exc())
			self.updateDbEntry(sourceUrl, dlState=-1)
Пример #40
0
	def doDownload(self, linkDict):


		images = []
		title = None
		nextPage = linkDict["dlLink"]

		while nextPage:
			gatewayPage = self.wg.getpage(nextPage, addlHeaders={'Referer': linkDict["sourceUrl"]})

			soup = bs4.BeautifulSoup(gatewayPage, "lxml")
			titleCont = soup.find("div", class_="image-menu")

			title = titleCont.h1.get_text()
			title = title.replace("Reading ", "")
			title, dummy = title.rsplit(" Page ", 1)
			title = title.strip()


			imageUrl = soup.find("img", class_="b")
			imageUrl = urllib.parse.urljoin(self.urlBase, imageUrl["src"])

			imagePath = urllib.parse.urlsplit(imageUrl)[2]
			imageFileName = imagePath.split("/")[-1]


			imageData = self.wg.getpage(imageUrl, addlHeaders={'Referer': nextPage})

			images.append((imageFileName, imageData))
			# Find next page
			nextPageLink = soup.find("a", class_="link-next")
			if not nextPageLink:
				nextPage = None
			elif nextPageLink["href"].startswith("/finish/"):    # Break on the last image.
				nextPage = None
			else:
				nextPage = urllib.parse.urljoin(self.urlBase, nextPageLink["href"])


		# self.log.info(len(content))

		if images and title:
			fileN = title+".zip"
			fileN = nt.makeFilenameSafe(fileN)


			# self.log.info("geturl with processing", fileN)
			wholePath = os.path.join(linkDict["dirPath"], fileN)
			self.log.info("Complete filepath: %s", wholePath)

					#Write all downloaded files to the archive.
			try:
				arch = zipfile.ZipFile(wholePath, "w")
			except OSError:
				title = title.encode('ascii','ignore').decode('ascii')
				fileN = title+".zip"
				fileN = nt.makeFilenameSafe(fileN)
				wholePath = os.path.join(linkDict["dirPath"], fileN)
				arch = zipfile.ZipFile(wholePath, "w")

			for imageName, imageContent in images:
				arch.writestr(imageName, imageContent)
			arch.close()


			self.log.info("Successfully Saved to path: %s", wholePath)


			self.updateDbEntry(linkDict["sourceUrl"], downloadPath=linkDict["dirPath"], fileName=fileN)

			# Deduper uses the path info for relinking, so we have to dedup the item after updating the downloadPath and fileN
			dedupState = processDownload.processDownload(None, wholePath, pron=True, deleteDups=True, includePHash=True)
			self.log.info( "Done")

			if dedupState:
				self.addTags(sourceUrl=linkDict["sourceUrl"], tags=dedupState)


			self.updateDbEntry(linkDict["sourceUrl"], dlState=2)
			self.conn.commit()


			return wholePath

		else:

			self.updateDbEntry(linkDict["sourceUrl"], dlState=-1, downloadPath="ERROR", fileName="ERROR: FAILED")

			self.conn.commit()
			return False