def processDownload(self, seriesName, archivePath, pathPositiveFilter=None, crossReference=True, doUpload=True, **kwargs): if self.mon_con: self.mon_con.incr('processed-download', 1) if 'phashThresh' in kwargs: phashThreshIn = kwargs.pop('phashThresh') self.log.warn("Phash search distance overridden!") self.log.warn("Search distance = %s", phashThreshIn) for line in traceback.format_stack(): self.log.warn(line.rstrip()) else: phashThreshIn = PHASH_DISTANCE self.log.info("Phash search distance = %s", phashThreshIn) if 'dedupMove' in kwargs: moveToPath = kwargs.pop('dedupMove') else: moveToPath = False if moveToPath: retTags = "" else: archCleaner = MangaCMS.cleaner.archCleaner.ArchCleaner() try: retTags, archivePath_updated = archCleaner.processNewArchive( archivePath, **kwargs) if archivePath_updated != archivePath: self._crossLink(archivePath, archivePath_updated) archivePath = archivePath_updated except Exception: self.log.critical("Error processing archive '%s'", archivePath) self.log.critical(traceback.format_exc()) retTags = "corrupt unprocessable" self.mon_con.incr('corrupt-archive', 1) with self.db.session_context() as sess: # Limit dedup matches to the served directories. if not pathPositiveFilter: self.log.info( "Using manga download folders for path filtering.") pathPositiveFilter = [ item['dir'] for item in settings.mangaFolders.values() ] # Let the remote deduper do it's thing. # It will delete duplicates automatically. phashThresh = phashThreshIn while True: dc = deduplicator.archChecker.ArchChecker( archivePath, phashDistance=phashThresh, pathPositiveFilter=pathPositiveFilter, lock=False) retTagsTmp, bestMatch, intersections = dc.process( moveToPath=moveToPath) if 'deleted' in retTagsTmp: self.mon_con.incr('deleted-archive', 1) break if phashThresh == 0: self.mon_con.incr('phash-exhausted', 1) break if not 'phash-conflict' in retTagsTmp: break if phashThresh < phashThreshIn: self.mon_con.incr('phash-thresh-reduced', 1) retTagsTmp += " phash-thresh-reduced phash-thresh-%s" % phashThresh phashThresh = phashThresh - 1 self.log.warning( "Phash conflict! Reducing search threshold to %s to try to work around.", phashThresh) retTags += " " + retTagsTmp retTags = retTags.strip() if "phash-duplicate" in retTags: self.mon_con.incr('phash-duplicate', 1) elif 'deleted' in retTags: self.mon_con.incr('binary-duplicate', 1) if bestMatch and crossReference: isPhash = False if "phash-duplicate" in retTags: isPhash = True self._crossLink(archivePath, bestMatch, isPhash=isPhash) # processNewArchive returns "damaged" or "duplicate" for the corresponding archive states. # Since we don't want to upload archives that are either, we skip if retTags is anything other then "" # Also, don't upload p**n if self.is_manga and (not retTags or retTags == "fewfiles") and seriesName and doUpload: try: self.log.info("Trying to upload file '%s'.", archivePath) up.uploadFile(seriesName, archivePath) retTags += " uploaded" self.mon_con.incr('uploaded', 1) except ConnectionRefusedError: self.log.warning( "Uploading file failed! Connection Refused!") for line in traceback.format_exc().split("\n"): self.log.error(" %s", line) except Exception: self.log.error("Uploading file failed! Unknown Error!") for line in traceback.format_exc().split("\n"): self.log.error(" %s", line) else: self.log.info("File not slated for upload: '%s' (tags: '%s')", archivePath, retTags) if retTags: self.log.info("Applying tags to archive: '%s'", retTags) if "deleted" in retTags: self.log.warning("Item was deleted!") return retTags.strip()
def processDownload(self, seriesName, archivePath, deleteDups=False, includePHash=False, pathPositiveFilter=None, crossReference=True, doUpload=True, rowId=None, **kwargs): if 'phashThresh' in kwargs: phashThresh = kwargs.pop('phashThresh') self.log.warn("Phash search distance overridden!") self.log.warn("Search distance = %s", phashThresh) for line in traceback.format_stack(): self.log.warn(line.rstrip()) else: phashThresh = PHASH_DISTANCE self.log.info("Phash search distance = %s", phashThresh) if 'dedupMove' in kwargs: moveToPath = kwargs.pop('dedupMove') else: moveToPath = False if moveToPath: retTags = "" else: archCleaner = ac.ArchCleaner() try: retTags, archivePath = archCleaner.processNewArchive( archivePath, **kwargs) except Exception: self.log.critical("Error processing archive '%s'", archivePath) self.log.critical(traceback.format_exc()) retTags = "corrupt unprocessable" # Limit dedup matches to the served directories. if not pathPositiveFilter: self.log.info("Using manga download folders for path filtering.") pathPositiveFilter = [ item['dir'] for item in settings.mangaFolders.values() ] # Let the remote deduper do it's thing. # It will delete duplicates automatically. dc = deduplicator.archChecker.ArchChecker( archivePath, phashDistance=phashThresh, pathPositiveFilter=pathPositiveFilter, lock=False) retTagsTmp, bestMatch, intersections = dc.process( moveToPath=moveToPath) retTags += " " + retTagsTmp retTags = retTags.strip() if bestMatch and crossReference: isPhash = False if "phash-duplicate" in retTags: isPhash = True self.crossLink(archivePath, bestMatch, isPhash=isPhash, rowId=rowId) # try: # self.scanIntersectingArchives(os.path.split(archivePath)[0], intersections, phashThresh, moveToPath) # except Exception: # self.log.error("Failure in scanIntersectingArchives()?") # for line in traceback.format_exc().split("\n"): # self.log.error(line) # self.log.error("Ignoring exception") # processNewArchive returns "damaged" or "duplicate" for the corresponding archive states. # Since we don't want to upload archives that are either, we skip if retTags is anything other then "" # Also, don't upload p**n if (not self.pron) and (not retTags or retTags == "fewfiles") and seriesName and doUpload: try: self.log.info("Trying to upload file '%s'.", archivePath) up.uploadFile(seriesName, archivePath) retTags += " uploaded" except ConnectionRefusedError: self.log.warning("Uploading file failed! Connection Refused!") for line in traceback.format_exc().split("\n"): self.log.error(" %s", line) except Exception: self.log.error("Uploading file failed! Unknown Error!") for line in traceback.format_exc().split("\n"): self.log.error(" %s", line) else: self.log.info("File not slated for upload: '%s' (tags: '%s')", archivePath, retTags) if retTags: self.log.info("Applying tags to archive: '%s'", retTags) if "deleted" in retTags: self.log.warning("Item was deleted!") return retTags.strip()
def processDownload(self, seriesName, archivePath, deleteDups=False, includePHash=False, pathFilter=None, **kwargs): if 'phashThresh' in kwargs: phashThresh = kwargs.pop('phashThresh') self.log.warn("Phash search distance overridden!") self.log.warn("Search distance = %s", phashThresh) for line in traceback.format_stack(): self.log.warn(line.rstrip()) else: phashThresh = PHASH_DISTANCE self.log.info("Phash search distance = %s", phashThresh) if 'dedupMove' in kwargs: moveToPath = kwargs.pop('dedupMove') else: moveToPath = False if moveToPath: retTags = "" else: archCleaner = ac.ArchCleaner() try: retTags, archivePath = archCleaner.processNewArchive(archivePath, **kwargs) except Exception: self.log.critical("Error processing archive '%s'", archivePath) self.log.critical(traceback.format_exc()) retTags = "corrupt unprocessable" # Limit dedup matches to the served directories. if not pathFilter: pathFilter = [item['dir'] for item in settings.mangaFolders.values()] # Let the remote deduper do it's thing. # It will delete duplicates automatically. dc = deduplicator.archChecker.ArchChecker(archivePath, phashDistance=phashThresh, pathFilter=pathFilter, lock=False) retTagsTmp, bestMatch, intersections = dc.process(moveToPath=moveToPath) retTags += " " + retTagsTmp retTags = retTags.strip() if bestMatch: isPhash = False if "phash-duplicate" in retTags: isPhash = True self.crossLink(archivePath, bestMatch, isPhash=isPhash) # try: # self.scanIntersectingArchives(os.path.split(archivePath)[0], intersections, phashThresh, moveToPath) # except Exception: # self.log.error("Failure in scanIntersectingArchives()?") # for line in traceback.format_exc().split("\n"): # self.log.error(line) # self.log.error("Ignoring exception") # processNewArchive returns "damaged" or "duplicate" for the corresponding archive states. # Since we don't want to upload archives that are either, we skip if retTags is anything other then "" # Also, don't upload p**n if (not self.pron) and (not retTags) and seriesName: try: self.log.info("Trying to upload file '%s'.", archivePath) up.uploadFile(seriesName, archivePath) retTags += " uploaded" except ConnectionRefusedError: self.log.warning("Uploading file failed! Connection Refused!") except Exception: self.log.error("Uploading file failed! Unknown Error!") self.log.error(traceback.format_exc()) else: self.log.info("File not slated for upload: '%s' (tags: '%s')", archivePath, retTags) if retTags: self.log.info("Applying tags to archive: '%s'", retTags) return retTags.strip()