示例#1
0
    def processDownload(self,
                        seriesName,
                        archivePath,
                        pathPositiveFilter=None,
                        crossReference=True,
                        doUpload=True,
                        **kwargs):

        if self.mon_con:
            self.mon_con.incr('processed-download', 1)

        if 'phashThresh' in kwargs:
            phashThreshIn = kwargs.pop('phashThresh')
            self.log.warn("Phash search distance overridden!")
            self.log.warn("Search distance = %s", phashThreshIn)
            for line in traceback.format_stack():
                self.log.warn(line.rstrip())

        else:
            phashThreshIn = PHASH_DISTANCE
            self.log.info("Phash search distance = %s", phashThreshIn)

        if 'dedupMove' in kwargs:
            moveToPath = kwargs.pop('dedupMove')
        else:
            moveToPath = False

        if moveToPath:
            retTags = ""
        else:
            archCleaner = MangaCMS.cleaner.archCleaner.ArchCleaner()
            try:
                retTags, archivePath_updated = archCleaner.processNewArchive(
                    archivePath, **kwargs)
                if archivePath_updated != archivePath:
                    self._crossLink(archivePath, archivePath_updated)
                    archivePath = archivePath_updated

            except Exception:
                self.log.critical("Error processing archive '%s'", archivePath)
                self.log.critical(traceback.format_exc())
                retTags = "corrupt unprocessable"
                self.mon_con.incr('corrupt-archive', 1)

        with self.db.session_context() as sess:

            # Limit dedup matches to the served directories.
            if not pathPositiveFilter:
                self.log.info(
                    "Using manga download folders for path filtering.")
                pathPositiveFilter = [
                    item['dir'] for item in settings.mangaFolders.values()
                ]

            # Let the remote deduper do it's thing.
            # It will delete duplicates automatically.

            phashThresh = phashThreshIn

            while True:
                dc = deduplicator.archChecker.ArchChecker(
                    archivePath,
                    phashDistance=phashThresh,
                    pathPositiveFilter=pathPositiveFilter,
                    lock=False)
                retTagsTmp, bestMatch, intersections = dc.process(
                    moveToPath=moveToPath)

                if 'deleted' in retTagsTmp:
                    self.mon_con.incr('deleted-archive', 1)
                    break
                if phashThresh == 0:
                    self.mon_con.incr('phash-exhausted', 1)
                    break
                if not 'phash-conflict' in retTagsTmp:
                    break
                if phashThresh < phashThreshIn:
                    self.mon_con.incr('phash-thresh-reduced', 1)
                    retTagsTmp += " phash-thresh-reduced phash-thresh-%s" % phashThresh
                phashThresh = phashThresh - 1
                self.log.warning(
                    "Phash conflict! Reducing search threshold to %s to try to work around.",
                    phashThresh)

            retTags += " " + retTagsTmp
            retTags = retTags.strip()

            if "phash-duplicate" in retTags:
                self.mon_con.incr('phash-duplicate', 1)

            elif 'deleted' in retTags:
                self.mon_con.incr('binary-duplicate', 1)

            if bestMatch and crossReference:
                isPhash = False
                if "phash-duplicate" in retTags:
                    isPhash = True

                self._crossLink(archivePath, bestMatch, isPhash=isPhash)

            # processNewArchive returns "damaged" or "duplicate" for the corresponding archive states.
            # Since we don't want to upload archives that are either, we skip if retTags is anything other then ""
            # Also, don't upload p**n
            if self.is_manga and (not retTags or retTags
                                  == "fewfiles") and seriesName and doUpload:
                try:
                    self.log.info("Trying to upload file '%s'.", archivePath)
                    up.uploadFile(seriesName, archivePath)
                    retTags += " uploaded"
                    self.mon_con.incr('uploaded', 1)
                except ConnectionRefusedError:
                    self.log.warning(
                        "Uploading file failed! Connection Refused!")
                    for line in traceback.format_exc().split("\n"):
                        self.log.error("	%s", line)
                except Exception:
                    self.log.error("Uploading file failed! Unknown Error!")
                    for line in traceback.format_exc().split("\n"):
                        self.log.error("	%s", line)
            else:
                self.log.info("File not slated for upload: '%s' (tags: '%s')",
                              archivePath, retTags)

            if retTags:
                self.log.info("Applying tags to archive: '%s'", retTags)
            if "deleted" in retTags:
                self.log.warning("Item was deleted!")
        return retTags.strip()
示例#2
0
    def processDownload(self,
                        seriesName,
                        archivePath,
                        deleteDups=False,
                        includePHash=False,
                        pathPositiveFilter=None,
                        crossReference=True,
                        doUpload=True,
                        rowId=None,
                        **kwargs):

        if 'phashThresh' in kwargs:
            phashThresh = kwargs.pop('phashThresh')
            self.log.warn("Phash search distance overridden!")
            self.log.warn("Search distance = %s", phashThresh)
            for line in traceback.format_stack():
                self.log.warn(line.rstrip())

        else:
            phashThresh = PHASH_DISTANCE
            self.log.info("Phash search distance = %s", phashThresh)

        if 'dedupMove' in kwargs:
            moveToPath = kwargs.pop('dedupMove')
        else:
            moveToPath = False

        if moveToPath:
            retTags = ""
        else:
            archCleaner = ac.ArchCleaner()
            try:
                retTags, archivePath = archCleaner.processNewArchive(
                    archivePath, **kwargs)
            except Exception:
                self.log.critical("Error processing archive '%s'", archivePath)
                self.log.critical(traceback.format_exc())
                retTags = "corrupt unprocessable"

        # Limit dedup matches to the served directories.
        if not pathPositiveFilter:
            self.log.info("Using manga download folders for path filtering.")
            pathPositiveFilter = [
                item['dir'] for item in settings.mangaFolders.values()
            ]

        # Let the remote deduper do it's thing.
        # It will delete duplicates automatically.
        dc = deduplicator.archChecker.ArchChecker(
            archivePath,
            phashDistance=phashThresh,
            pathPositiveFilter=pathPositiveFilter,
            lock=False)
        retTagsTmp, bestMatch, intersections = dc.process(
            moveToPath=moveToPath)
        retTags += " " + retTagsTmp
        retTags = retTags.strip()

        if bestMatch and crossReference:
            isPhash = False
            if "phash-duplicate" in retTags:
                isPhash = True
            self.crossLink(archivePath,
                           bestMatch,
                           isPhash=isPhash,
                           rowId=rowId)

        # try:
        # 	self.scanIntersectingArchives(os.path.split(archivePath)[0], intersections, phashThresh, moveToPath)
        # except Exception:
        # 	self.log.error("Failure in scanIntersectingArchives()?")
        # 	for line in traceback.format_exc().split("\n"):
        # 		self.log.error(line)
        # 	self.log.error("Ignoring exception")

        # processNewArchive returns "damaged" or "duplicate" for the corresponding archive states.
        # Since we don't want to upload archives that are either, we skip if retTags is anything other then ""
        # Also, don't upload p**n
        if (not self.pron) and (not retTags or retTags
                                == "fewfiles") and seriesName and doUpload:
            try:
                self.log.info("Trying to upload file '%s'.", archivePath)
                up.uploadFile(seriesName, archivePath)
                retTags += " uploaded"
            except ConnectionRefusedError:
                self.log.warning("Uploading file failed! Connection Refused!")
                for line in traceback.format_exc().split("\n"):
                    self.log.error("	%s", line)
            except Exception:
                self.log.error("Uploading file failed! Unknown Error!")
                for line in traceback.format_exc().split("\n"):
                    self.log.error("	%s", line)
        else:
            self.log.info("File not slated for upload: '%s' (tags: '%s')",
                          archivePath, retTags)

        if retTags:
            self.log.info("Applying tags to archive: '%s'", retTags)
        if "deleted" in retTags:
            self.log.warning("Item was deleted!")
        return retTags.strip()
示例#3
0
	def processDownload(self, seriesName, archivePath, deleteDups=False, includePHash=False, pathFilter=None, **kwargs):

		if 'phashThresh' in kwargs:
			phashThresh = kwargs.pop('phashThresh')
			self.log.warn("Phash search distance overridden!")
			self.log.warn("Search distance = %s", phashThresh)
			for line in traceback.format_stack():
				self.log.warn(line.rstrip())

		else:
			phashThresh = PHASH_DISTANCE
			self.log.info("Phash search distance = %s", phashThresh)

		if 'dedupMove' in kwargs:
			moveToPath = kwargs.pop('dedupMove')
		else:
			moveToPath = False

		if moveToPath:
			retTags = ""
		else:
			archCleaner = ac.ArchCleaner()
			try:
				retTags, archivePath = archCleaner.processNewArchive(archivePath, **kwargs)
			except Exception:
				self.log.critical("Error processing archive '%s'", archivePath)
				self.log.critical(traceback.format_exc())
				retTags = "corrupt unprocessable"



		# Limit dedup matches to the served directories.
		if not pathFilter:
			pathFilter = [item['dir'] for item in settings.mangaFolders.values()]

		# Let the remote deduper do it's thing.
		# It will delete duplicates automatically.
		dc = deduplicator.archChecker.ArchChecker(archivePath, phashDistance=phashThresh, pathFilter=pathFilter, lock=False)
		retTagsTmp, bestMatch, intersections = dc.process(moveToPath=moveToPath)
		retTags += " " + retTagsTmp
		retTags = retTags.strip()

		if bestMatch:
			isPhash = False
			if "phash-duplicate" in retTags:
				isPhash = True
			self.crossLink(archivePath, bestMatch, isPhash=isPhash)


		# try:
		# 	self.scanIntersectingArchives(os.path.split(archivePath)[0], intersections, phashThresh, moveToPath)
		# except Exception:
		# 	self.log.error("Failure in scanIntersectingArchives()?")
		# 	for line in traceback.format_exc().split("\n"):
		# 		self.log.error(line)
		# 	self.log.error("Ignoring exception")


		# processNewArchive returns "damaged" or "duplicate" for the corresponding archive states.
		# Since we don't want to upload archives that are either, we skip if retTags is anything other then ""
		# Also, don't upload p**n
		if (not self.pron) and (not retTags) and seriesName:
			try:
				self.log.info("Trying to upload file '%s'.", archivePath)
				up.uploadFile(seriesName, archivePath)
				retTags += " uploaded"
			except ConnectionRefusedError:
				self.log.warning("Uploading file failed! Connection Refused!")
			except Exception:
				self.log.error("Uploading file failed! Unknown Error!")
				self.log.error(traceback.format_exc())
		else:
			self.log.info("File not slated for upload: '%s' (tags: '%s')", archivePath, retTags)

		if retTags:
			self.log.info("Applying tags to archive: '%s'", retTags)
		return retTags.strip()