Python getCanonicalMangaUpdatesName示例，nameTools.getCanonicalMangaUpdatesName Python示例

示例#1

0

显示文件

文件： autoOrganize.py 项目： gregseb/MangaCMS

def consolicateSeriesToSingleDir():
    print(
        "Looking for series directories that can be flattened to a single dir")
    idLut = nt.MtNamesMapWrapper("buId->fsName")
    db = DbInterface()
    for key, luDict in nt.dirNameProxy.iteritems():
        # print("Key = ", key)
        mId = db.getIdFromDirName(key)

        # Skip cases where we have no match
        if not mId:
            continue

        dups = set()
        for name in idLut[mId]:
            cName = nt.prepFilenameForMatching(name)

            # Skip if it's one of the manga names that falls apart under the directory name cleaning mechanism
            if not cName:
                continue

            if cName in nt.dirNameProxy:
                dups.add(cName)
                db.getIdFromDirName(cName)
        if len(dups) > 1:
            row = db.getRowByValue(buId=mId)
            targetName = nt.prepFilenameForMatching(row["buName"])
            dest = nt.dirNameProxy[targetName]
            if luDict["dirKey"] != targetName and dest["fqPath"]:
                print("baseName = ", row["buName"], ", id = ", mId,
                      ", names = ", dups)

                print("	URL: https://www.mangaupdates.com/series.html?id=%s" %
                      (mId, ))
                print(" Dir 1 ", luDict["fqPath"])
                print(" Dir 2 ", dest["fqPath"])

                dirName = os.path.split(luDict["fqPath"])[-1]
                dir2Name = os.path.split(dest["fqPath"])[-1]

                print("	1:	", dirName, ' ->',
                      nt.getCanonicalMangaUpdatesName(dirName))
                print("	2:	", dir2Name, ' ->',
                      nt.getCanonicalMangaUpdatesName(dir2Name))
                print("	1:	({num} items)".format(
                    num=len(os.listdir(luDict["fqPath"]))))
                print("	2:	({num} items)".format(
                    num=len(os.listdir(dest["fqPath"]))))

                doMove = query_response(
                    "move files ('f' dir 1 -> dir 2. 'r' dir 2 -> dir 1. 'n' do not move)?"
                )
                if doMove == "forward":
                    moveFiles(luDict["fqPath"], dest["fqPath"])
                    os.rmdir(luDict["fqPath"])
                elif doMove == "reverse":
                    moveFiles(dest["fqPath"], luDict["fqPath"])
                    os.rmdir(dest["fqPath"])

示例#2

0

显示文件

文件： test-nametools.py 项目： GJdan/MangaCMS

	def test_choice(self):
		print("Verifying directory linking mechanism")
		print(nt.dirNameProxy["Kurogane"]["fqPath"], nt.getCanonicalMangaUpdatesName(nt.dirNameProxy["Kurogane"]["fqPath"]))
		print(nt.dirNameProxy["Kyoumen no Silhouette"]["fqPath"], nt.getCanonicalMangaUpdatesName(nt.dirNameProxy["Kyoumen no Silhouette"]["fqPath"]))
		print(nt.dirNameProxy["Neko Ane "]["fqPath"], nt.getCanonicalMangaUpdatesName(nt.dirNameProxy["Neko Ane "]["fqPath"]))
		print(nt.dirNameProxy["Rescue Me"]["fqPath"], nt.getCanonicalMangaUpdatesName(nt.dirNameProxy["Rescue Me"]["fqPath"]))
		print(nt.dirNameProxy["Maken Ki!"]["fqPath"], nt.getCanonicalMangaUpdatesName(nt.dirNameProxy["Maken Ki!"]["fqPath"]))
		print(nt.dirNameProxy[":REverSAL"]["fqPath"], nt.getCanonicalMangaUpdatesName(nt.dirNameProxy[":REverSAL"]["fqPath"]))
		print(nt.dirNameProxy["Silva"]["fqPath"], nt.getCanonicalMangaUpdatesName(nt.dirNameProxy["Silva"]["fqPath"]))
		print(nt.dirNameProxy["Kouya ni Kemono Doukokusu"]["fqPath"], nt.getCanonicalMangaUpdatesName(nt.dirNameProxy["Kouya ni Kemono Doukokusu"]["fqPath"]))
		print(nt.dirNameProxy["Koukaku no Regios - Missing Mail"]["fqPath"], nt.getCanonicalMangaUpdatesName(nt.dirNameProxy["Koukaku no Regios - Missing Mail"]["fqPath"]))
		print(nt.dirNameProxy["Kuraudo (NOUJOU Junichi) "]["fqPath"], nt.getCanonicalMangaUpdatesName(nt.dirNameProxy["Kuraudo (NOUJOU Junichi) "]["fqPath"]))

示例#3

0

显示文件

文件： autoOrganize.py 项目： GDXN/MangaCMS

def consolicateSeriesToSingleDir():
	print("Looking for series directories that can be flattened to a single dir")
	idLut = nt.MtNamesMapWrapper("buId->fsName")
	db = DbInterface()
	for key, luDict in nt.dirNameProxy.iteritems():
		# print("Key = ", key)
		mId = db.getIdFromDirName(key)

		# Skip cases where we have no match
		if not mId:
			continue

		dups = set()
		for name in idLut[mId]:
			cName = nt.prepFilenameForMatching(name)

			# Skip if it's one of the manga names that falls apart under the directory name cleaning mechanism
			if not cName:
				continue

			if cName in nt.dirNameProxy:
				dups.add(cName)
				db.getIdFromDirName(cName)
		if len(dups) > 1:
			row = db.getRowByValue(buId=mId)
			targetName = nt.prepFilenameForMatching(row["buName"])
			dest = nt.dirNameProxy[targetName]
			if luDict["dirKey"] != targetName and dest["fqPath"]:
				print("baseName = ", row["buName"], ", id = ", mId, ", names = ", dups)

				print("	URL: https://www.mangaupdates.com/series.html?id=%s" % (mId, ))
				print(" Dir 1 ", luDict["fqPath"])
				print(" Dir 2 ", dest["fqPath"])

				dirName = os.path.split(luDict["fqPath"])[-1]
				dir2Name = os.path.split(dest["fqPath"])[-1]

				print("	1:	", dirName, ' ->', nt.getCanonicalMangaUpdatesName(dirName))
				print("	2:	", dir2Name, ' ->', nt.getCanonicalMangaUpdatesName(dir2Name))
				print("	1:	({num} items)".format(num=len(os.listdir(luDict["fqPath"]))))
				print("	2:	({num} items)".format(num=len(os.listdir(dest["fqPath"]))))


				doMove = query_response("move files ('f' dir 1 -> dir 2. 'r' dir 2 -> dir 1. 'n' do not move)?")
				if doMove == "forward":
					moveFiles(luDict["fqPath"], dest["fqPath"])
					os.rmdir(luDict["fqPath"])
				elif doMove == "reverse":
					moveFiles(dest["fqPath"], luDict["fqPath"])
					os.rmdir(dest["fqPath"])

示例#4

0

显示文件

文件： TwistedHelRun.py 项目： nothing628/MangaCMS

	def getChaptersFromSeriesPage(self, inUrl):

		soup = self.wg.getSoup(inUrl)

		if 'The following content is intended for mature' in soup.get_text():
			self.log.info("Adult check page. Confirming...")
			soup = self.wg.getSoup(inUrl, postData={"adult": "true"})

		mainDiv = soup.find('div', id='series_right')

		seriesName = mainDiv.h1.get_text()

		seriesName = nt.getCanonicalMangaUpdatesName(seriesName)

		# No idea why chapters are class 'staff_link'. Huh.
		chapters = mainDiv.find_all('div', class_='staff_link')


		ret = []
		for chapter in chapters:
			item = {}
			item['originName'] = "{series} - {file}".format(series=seriesName, file=chapter.a.get_text())
			item['sourceUrl']  = chapter.a['href']
			item['seriesName'] = seriesName
			item['retreivalTime'] = time.time()    # Fukkit, just use the current date.
			ret.append(item)
		return ret

示例#5

0

显示文件

文件： FeedLoader.py 项目： xiaodoudou/MangaCMS

	def process_tree_elements(self, elements, cum_path="/"):
		ret = []

		for element in elements:
			if element['type'] == "report":
				continue
			elif element['type'] == 'directory':
				item_path = os.path.join(cum_path, element['name'])
				ret.extend(self.process_tree_elements(element['contents'], item_path))
			elif element['type'] == 'file':
				item_path = os.path.join(cum_path, element['name'])

				if any([item_path.startswith(prefix) for prefix in MASK_PATHS]):
					continue

				# Parse out the series name if we're in a directory we understand,
				# otherwise just assume the dir name is the series.
				match = re.search(r'/Manga/[^/]/[^/]{2}/[^/]{4}/([^/]+)/', item_path)
				if match:
					sname = match.group(1)
				else:
					sname = os.path.split(cum_path)[-1]

				item = {
					'source_id'   : urllib.parse.urljoin(self.url_base, item_path),
					'origin_name' : element['name'],
					'series_name' : nt.getCanonicalMangaUpdatesName(sname),
				}
				ret.append(item)
			else:
				self.log.error("Unknown element type: '%s'", element)

		return ret

示例#6

0

显示文件

文件： LoaderBase.py 项目： nothing628/MangaCMS

    def _processLinksIntoDB(self, linksDicts):

        self.log.info("Inserting...", )

        newItems = 0
        for link in linksDicts:
            if link is None:
                print("linksDicts", linksDicts)
                print("WAT")
                continue

            row = self.getRowsByValue(sourceUrl=link["sourceUrl"],
                                      limitByKey=False)

            if not row:
                newItems += 1

                if not "dlState" in link:
                    link['dlState'] = 0

                # Patch series name.
                if 'seriesName' in link and self.shouldCanonize:
                    link["seriesName"] = nt.getCanonicalMangaUpdatesName(
                        link["seriesName"])

                self.insertIntoDb(**link)

                self.log.info("New item: %s", link)

        if self.mon_con:
            self.mon_con.incr('new_links', newItems)

        self.log.info("Done (%s new items)", newItems)

        return newItems

示例#7

0

显示文件

文件： MangaScraperDbBase.py 项目： cbunch/MangaCMS

    def updateDbEntryById(self,
                          rowId=None,
                          dbId=None,
                          commit=True,
                          cur=None,
                          **kwargs):
        if dbId is None:
            assert rowId is not None
            dbId = rowId

        # Patch series name.
        if "seriesName" in kwargs and kwargs[
                "seriesName"] and self.shouldCanonize:
            kwargs["seriesName"] = nt.getCanonicalMangaUpdatesName(
                kwargs["seriesName"])

        query, queryArguments = self.generateUpdateQuery(dbId=dbId, **kwargs)

        if self.QUERY_DEBUG:
            print("Query = ", query)
            print("Args = ", queryArguments)

        if cur:
            cur.execute(query, queryArguments)
        else:
            with self.transaction(commit=commit) as cur:
                cur.execute(query, queryArguments)
                print("ret =", cur.rowcount)

示例#8

0

显示文件

文件： uploader.py 项目： GodOfConquest/MangaCMS

	def loadRemoteDirectory(self, fullPath, aggregate=False):
		ret = {}

		for dirName, stats in self.ftp.mlsd(fullPath):

			# Skip items that aren't directories
			if stats["type"]!="dir":
				continue

			canonName = nt.getCanonicalMangaUpdatesName(dirName)
			matchingName = nt.prepFilenameForMatching(canonName)

			fqPath = os.path.join(fullPath, dirName)

			# matchName = os.path.split(ret[matchingName])[-1]

			if matchingName in ret:
				# if aggregate:
				# 	fqPath = self.aggregateDirs(fullPath, dirName, matchName)
				# else:
				if COMPLAIN_ABOUT_DUPS:
					self.log.warning("Duplicate directories for series '%s'!", canonName)
					self.log.warning("	'%s'", dirName)
					self.log.warning("	'%s'", matchingName)
				ret[matchingName] = fqPath

			else:
				ret[matchingName] = fqPath

		return ret

示例#9

0

显示文件

文件： uploader.py 项目： gregseb/MangaCMS

	def loadRemoteDirectory(self, fullPath, aggregate=False):
		ret = {}

		for dirName, stats in self.ftp.mlsd(fullPath):

			# Skip items that aren't directories
			if stats["type"]!="dir":
				continue

			canonName = nt.getCanonicalMangaUpdatesName(dirName)
			matchingName = nt.prepFilenameForMatching(canonName)

			fqPath = os.path.join(fullPath, dirName)

			# matchName = os.path.split(ret[matchingName])[-1]

			if matchingName in ret:
				# if aggregate:
				# 	fqPath = self.aggregateDirs(fullPath, dirName, matchName)
				# else:
				if COMPLAIN_ABOUT_DUPS:
					self.log.warning("Duplicate directories for series '%s'!", canonName)
					self.log.warning("	'%s'", dirName)
					self.log.warning("	'%s'", matchingName)
				ret[matchingName] = fqPath

			else:
				ret[matchingName] = fqPath

		return ret

示例#10

0

显示文件

文件： uploader.py 项目： GDXN/MangaCMS

	def getUploadDirectory(self, seriesName):

		ulDir = self.getExistingDir(seriesName)

		if not ulDir:
			seriesName   = nt.getCanonicalMangaUpdatesName(seriesName)
			safeFilename = nt.makeFilenameSafe(seriesName)
			matchName    = nt.prepFilenameForMatching(seriesName)
			matchName    = matchName.encode('utf-8', 'ignore').decode('utf-8')

			self.checkInitDirs()
			if matchName in self.mainDirs:
				ulDir = self.mainDirs[matchName][0]
			elif seriesName in self.mainDirs:
				ulDir = self.mainDirs[seriesName][0]
			else:

				self.log.info("Need to create container directory for %s", seriesName)
				ulDir = os.path.join(settings.mkSettings["uploadContainerDir"], settings.mkSettings["uploadDir"], safeFilename)
				try:
					self.sftp.mkdir(ulDir)
				except OSError as e:
					# If the error is just a "directory exists" warning, ignore it silently
					if str(e) == 'OSError: File already exists':
						pass
					else:
						self.log.warn("Error creating directory?")
						self.log.warn(traceback.format_exc())


		return ulDir

示例#11

0

显示文件

文件： uploader.py 项目： GodOfConquest/MangaCMS

	def getUploadDirectory(self, seriesName):

		ulDir = self.getExistingDir(seriesName)

		if not ulDir:
			seriesName = nt.getCanonicalMangaUpdatesName(seriesName)
			safeFilename = nt.makeFilenameSafe(seriesName)
			matchName = nt.prepFilenameForMatching(seriesName)
			matchName = matchName.encode('latin-1', 'ignore').decode('latin-1')

			self.checkInitDirs()
			if matchName in self.unsortedDirs:
				ulDir = self.unsortedDirs[matchName]
			elif safeFilename in self.unsortedDirs:
				ulDir = self.unsortedDirs[safeFilename]
			else:

				self.log.info("Need to create container directory for %s", seriesName)
				ulDir = os.path.join(settings.mkSettings["uploadContainerDir"], settings.mkSettings["uploadDir"], safeFilename)
				try:
					self.ftp.mkd(ulDir)
				except ftplib.error_perm as e:
					# If the error is just a "directory exists" warning, ignore it silently
					if str(e).startswith("550") and str(e).endswith('File exists'):
						pass
					else:
						self.log.warn("Error creating directory?")
						self.log.warn(traceback.format_exc())


		return ulDir

示例#12

0

显示文件

	def getChaptersFromSeriesPage(self, inUrl):

		soup = self.wg.getSoup(inUrl)

		if 'The following content is intended for mature' in soup.get_text():
			self.log.info("Adult check page. Confirming...")
			soup = self.wg.getSoup(inUrl, postData={"adult": "true"})

		mainDiv = soup.find('div', id='series_right')

		seriesName = mainDiv.h1.get_text()

		seriesName = nt.getCanonicalMangaUpdatesName(seriesName)

		# No idea why chapters are class 'staff_link'. Huh.
		chapters = mainDiv.find_all('div', class_='staff_link')


		ret = []
		for chapter in chapters:
			item = {}
			item['originName'] = "{series} - {file}".format(series=seriesName, file=chapter.a.get_text())
			item['sourceUrl']  = chapter.a['href']
			item['seriesName'] = seriesName
			item['retreivalTime'] = time.time()    # Fukkit, just use the current date.
			ret.append(item)
		return ret

示例#13

0

显示文件

文件： uploader.py 项目： GDXN/MangaCMS

	def getDoujinshiUploadDirectory(self, seriesName):
		ulDir = self.getExistingDir(seriesName)

		if not ulDir:
			seriesName = nt.getCanonicalMangaUpdatesName(seriesName)
			safeFilename = nt.makeFilenameSafe(seriesName)
			matchName = nt.prepFilenameForMatching(seriesName)
			matchName = matchName.encode('latin-1', 'ignore').decode('latin-1')

			self.checkInitDirs()
			if matchName in self.unsortedDirs:
				ulDir = self.unsortedDirs[matchName]
			elif safeFilename in self.unsortedDirs:
				ulDir = self.unsortedDirs[safeFilename]
			else:

				self.log.info("Need to create container directory for %s", seriesName)
				ulDir = os.path.join(settings.mkSettings["uploadContainerDir"], settings.mkSettings["uploadDir"], safeFilename)
				try:
					self.sftp.mkdir(ulDir)
				except ftplib.error_perm:
					self.log.warn("Directory exists?")
					self.log.warn(traceback.format_exc())


		return ulDir

示例#14

0

显示文件

文件： ContentLoader.py 项目： Kadantte/MangaCMS-1

    def locateOrCreateDirectoryForSeries(self, seriesName):

        if self.shouldCanonize and self.is_manga:
            canonSeriesName = nt.getCanonicalMangaUpdatesName(seriesName)
        else:
            canonSeriesName = seriesName

        safeBaseName = nt.makeFilenameSafe(canonSeriesName)

        targetDir = os.path.join(settings.mkSettings["dirs"]['bookDir'],
                                 safeBaseName)
        if not os.path.exists(targetDir):
            self.log.info("Don't have target dir for: %s, full name = %s",
                          canonSeriesName, seriesName)
            try:
                os.makedirs(targetDir)
                return targetDir, True

            except FileExistsError:
                # Probably means the directory was concurrently created by another thread in the background?
                self.log.critical("Directory doesn't exist, and yet it does?")
                self.log.critical(traceback.format_exc())
            except OSError:
                self.log.critical("Directory creation failed?")
                self.log.critical(traceback.format_exc())

        else:
            self.log.info("Directory exists.")
            self.log.info("Directory not found in dir-dict, but it exists!")
            self.log.info("Directory-Path: %s", targetDir)
            self.log.info("Base series name: %s", seriesName)
            self.log.info("Canonized series name: %s", canonSeriesName)
            self.log.info("Safe canonized name: %s", safeBaseName)
        return targetDir, False

示例#15

0

显示文件

文件： MangaScraperDbBase.py 项目： cbunch/MangaCMS

    def updateDbEntry(self, sourceUrl, commit=True, **kwargs):
        cur = kwargs.pop('cur', None)

        # Patch series name.
        if "seriesName" in kwargs and kwargs[
                "seriesName"] and self.shouldCanonize:
            kwargs["seriesName"] = nt.getCanonicalMangaUpdatesName(
                kwargs["seriesName"])

        # Clamp the retreivaltime to now, so parsing issues that result in invalid, future
        # time-stamps don't cause posts to stick to the top of the post list.
        if 'retreivalTime' in kwargs:
            if kwargs['retreivalTime'] > time.time():
                kwargs['retreivalTime'] = time.time()

        query, queryArguments = self.generateUpdateQuery(sourceUrl=sourceUrl,
                                                         **kwargs)

        if self.QUERY_DEBUG:
            print("Query = ", query)
            print("Args = ", queryArguments)

        if cur is not None:
            cur.execute(query, queryArguments)
        else:
            with self.transaction(commit=commit) as cur:
                cur.execute(query, queryArguments)

示例#16

0

显示文件

文件： SeriesRetreivalDbBase.py 项目： GJdan/MangaCMS

	def updateSeriesDbEntryById(self, rowId, commit=True, **kwargs):

		# Patch series name.
		if "seriesName" in kwargs and kwargs["seriesName"]:
			kwargs["seriesName"] = nt.getCanonicalMangaUpdatesName(kwargs["seriesName"])

		queries = []
		qArgs = []
		for key in kwargs.keys():
			if key not in self.validSeriesKwargs:
				raise ValueError("Invalid keyword argument: %s" % key)
			else:
				queries.append("{k}=%s".format(k=key))
				qArgs.append(kwargs[key])

		qArgs.append(rowId)

		column = ", ".join(queries)


		query = '''UPDATE {tableName} SET {v} WHERE dbId=%s;'''.format(tableName=self.seriesTableName, v=column)

		if QUERY_DEBUG:
			print("Query = ", query)
			print("Args = ", qArgs)

		with self.conn.cursor() as cur:

			if commit:
				cur.execute("BEGIN;")

			cur.execute(query, qArgs)

			if commit:
				cur.execute("COMMIT;")

示例#17

0

显示文件

	def getUploadDirectory(self, seriesName):

		ulDir = self.getExistingDir(seriesName)

		if not ulDir:
			seriesName   = nt.getCanonicalMangaUpdatesName(seriesName)
			safeFilename = nt.makeFilenameSafe(seriesName)
			matchName    = nt.prepFilenameForMatching(seriesName)
			matchName    = matchName.encode('utf-8', 'ignore').decode('utf-8')

			self.checkInitDirs()
			if matchName in self.mainDirs:
				ulDir = self.mainDirs[matchName][0]
			elif seriesName in self.mainDirs:
				ulDir = self.mainDirs[seriesName][0]
			else:

				self.log.info("Need to create container directory for %s", seriesName)
				ulDir = os.path.join(settings.mkSettings["uploadContainerDir"], settings.mkSettings["uploadDir"], safeFilename)
				try:
					self.sftp.mkdir(ulDir)
				except OSError as e:
					# If the error is just a "directory exists" warning, ignore it silently
					if str(e) == 'OSError: File already exists':
						pass
					else:
						self.log.warn("Error creating directory?")
						self.log.warn(traceback.format_exc())


		return ulDir

示例#18

0

显示文件

	def getDoujinshiUploadDirectory(self, seriesName):
		ulDir = self.getExistingDir(seriesName)

		if not ulDir:
			seriesName = nt.getCanonicalMangaUpdatesName(seriesName)
			safeFilename = nt.makeFilenameSafe(seriesName)
			matchName = nt.prepFilenameForMatching(seriesName)
			matchName = matchName.encode('latin-1', 'ignore').decode('latin-1')

			self.checkInitDirs()
			if matchName in self.unsortedDirs:
				ulDir = self.unsortedDirs[matchName]
			elif safeFilename in self.unsortedDirs:
				ulDir = self.unsortedDirs[safeFilename]
			else:

				self.log.info("Need to create container directory for %s", seriesName)
				ulDir = os.path.join(settings.mkSettings["uploadContainerDir"], settings.mkSettings["uploadDir"], safeFilename)
				try:
					self.sftp.mkdir(ulDir)
				except ftplib.error_perm:
					self.log.warn("Directory exists?")
					self.log.warn(traceback.format_exc())


		return ulDir

示例#19

0

显示文件

    def aggregateDirs(self, pathBase_1, pathBase_2, dir1, dir2):
        canonName = nt.getCanonicalMangaUpdatesName(dir1)
        canonNameAlt = nt.getCanonicalMangaUpdatesName(dir2)
        cname1 = nt.prepFilenameForMatching(canonName)
        cname2 = nt.prepFilenameForMatching(canonNameAlt)
        if canonName.lower() != canonNameAlt.lower():
            self.log.critical(
                "Error in uploading file. Name lookup via MangaUpdates table not commutative!"
            )
            self.log.critical("First returned value    '%s'", canonName)
            self.log.critical("For directory with path '%s'", dir1)
            self.log.critical("Second returned value   '%s'", canonNameAlt)
            self.log.critical("For directory with path '%s'", dir2)
            self.log.critical("After cleaning: '%s', '%s', equal: '%s'",
                              cname1, cname2, cname1 == cname2)

            raise CanonMismatch("Identical and yet not? '%s' - '%s'" %
                                (canonName, canonNameAlt))
        self.log.info("Aggregating directories for canon name '%s':",
                      canonName)

        n1 = lv.distance(dir1, canonName)
        n2 = lv.distance(dir2, canonName)

        self.log.info("	%s - '%s'", n1, dir1)
        self.log.info("	%s - '%s'", n2, dir2)

        # I'm using less then or equal, so situations where
        # both names are equadistant get aggregated anyways.
        if n1 <= n2:
            src = os.path.join(pathBase_2, dir2)
            dst = os.path.join(pathBase_1, dir1)
        else:
            src = os.path.join(pathBase_1, dir1)
            dst = os.path.join(pathBase_2, dir2)

        self.moveItemsInDir(src, dst)
        self.log.info("Removing directory '%s'", src)
        try:
            self.sftp.mkdir("/Admin cleanup/autoclean dirs")
        except:
            pass
        self.sftp.rename(
            src, "/Admin cleanup/autoclean dirs/garbage dir %s" %
            src.replace("/", ";").replace(" ", "_"))

        return dst

示例#20

0

显示文件

文件： mkFeedLoader.py 项目： GJdan/MangaCMS

	def getItemsFromContainer(self, dirName, dirUrl):

		# Skip the needs sorting directory.
		if dirName == 'Needs sorting':
			return [], []
		if dirName == 'Admin Cleanup':
			return [], []
		if dirName == 'Raws':
			return [], []
		if dirName == 'Requests':
			return [], []
		if dirName == '_Autouploads':
			return [], []


		self.log.info("Original name - %s", dirName)

		bracketStripRe = re.compile(r"(\[.*?\])")
		dirName = bracketStripRe.sub(" ", dirName)
		while dirName.find("  ")+1:
			dirName = dirName.replace("  ", " ")
		dirName = dirName.strip()

		if not dirName:
			self.log.critical("Empty dirname = '%s', baseURL = '%s'", dirName, dirUrl)
			raise ValueError("No dir name for directory!")

		dirName = nt.getCanonicalMangaUpdatesName(dirName)

		self.log.info("Canonical name - %s", dirName)
		self.log.info("Fetching items for directory '%s'", dirName)

		self.log.info("Using URL '%s'", dirUrl)
		try:
			itemPage = self.wg.getpage(dirUrl)
		except urllib.error.URLError:
			self.log.error("Could not fetch page '%s'", dirUrl)
			return [], []

		soup = bs4.BeautifulSoup(itemPage)




		itemRet = []
		dirRet  = []

		for row in soup.find_all("tr"):

			dirDat, itemDat = self.parseRow(row, dirUrl, dirName)

			if dirDat:
				dirRet.append(dirDat)

			if itemDat:
				itemRet.append(itemDat)


		return dirRet, itemRet

示例#21

0

显示文件

文件： ContentLoader.py 项目： cbunch/MangaCMS

	def extractFilename(self, inString):
		title, dummy_blurb = inString.rsplit("|", 1)
		# title, chapter = title.rsplit("-", 1)

		# Unescape htmlescaped items in the name/chapter
		ps = html.parser.HTMLParser()
		title = ps.unescape(title)

		vol = None
		chap = None
		volChap = None

		try:
			if " vol " in title.lower():
				title, volChap = title.rsplit(" vol ", 1)
				vol, dummy = volChap.strip().split(" ", 1)
		except ValueError:
			self.log.error("Could not parse volume number from title %s", title)
			traceback.print_exc()


		try:
			if volChap and " ch " in volChap:
				dummy, chap = volChap.rsplit(" ch ", 1)

			elif " ch " in title:
				title, chap = title.rsplit(" ch ", 1)

		except ValueError:
			self.log.error("Could not parse chapter number from title %s", title)
			traceback.print_exc()

		if chap:
			if "Page" in chap:
				chap, dummy = chap.split("Page", 1)

		elif title and "Page" in title:
			title, dummy = title.split("Page", 1)

		title = title.rstrip(" -")
		# haveLookup = nt.haveCanonicalMangaUpdatesName(title)
		# if not haveLookup:
		# 	self.log.warning("Did not find title '%s' in MangaUpdates database!", title)
		title = nt.getCanonicalMangaUpdatesName(title).strip()


		volChap = []

		if vol:
			volChap.append("v{}".format(vol))
		if chap:
			volChap.append("c{}".format(chap))

		chapter = " ".join(volChap)

		return title, chapter.strip()

示例#22

0

显示文件

文件： btContentLoader.py 项目： MyAnimeDays/MangaCMS

	def extractFilename(self, inString):
		title, dummy_blurb = inString.rsplit("|", 1)
		# title, chapter = title.rsplit("-", 1)

		# Unescape htmlescaped items in the name/chapter
		ps = html.parser.HTMLParser()
		title = ps.unescape(title)

		vol = None
		chap = None
		volChap = None

		try:
			if " vol " in title.lower():
				title, volChap = title.rsplit(" vol ", 1)
				vol, dummy = volChap.strip().split(" ", 1)
		except ValueError:
			self.log.error("Could not parse volume number from title %s", title)
			traceback.print_exc()


		try:
			if volChap and " ch " in volChap:
				dummy, chap = volChap.rsplit(" ch ", 1)

			elif " ch " in title:
				title, chap = title.rsplit(" ch ", 1)

		except ValueError:
			self.log.error("Could not parse chapter number from title %s", title)
			traceback.print_exc()

		if chap:
			if "Page" in chap:
				chap, dummy = chap.split("Page", 1)

		elif title and "Page" in title:
			title, dummy = title.split("Page", 1)

		title = title.rstrip(" -")
		# haveLookup = nt.haveCanonicalMangaUpdatesName(title)
		# if not haveLookup:
		# 	self.log.warning("Did not find title '%s' in MangaUpdates database!", title)
		title = nt.getCanonicalMangaUpdatesName(title).strip()


		volChap = []

		if vol:
			volChap.append("v{}".format(vol))
		if chap:
			volChap.append("c{}".format(chap))

		chapter = " ".join(volChap)

		return title, chapter.strip()

示例#23

0

显示文件

文件： __main__.py 项目： naafx8/MangaCMS

def two_arg_lookup(val):
	print("Passed name = '%s'" % val)
	import nameTools as nt
	haveLookup = nt.haveCanonicalMangaUpdatesName(val)
	if not haveLookup:
		print("Item not found in MangaUpdates name synonym table")
		print("Processed item as searched = '%s'" % nt.prepFilenameForMatching(val))
	else:
		print("Item found in lookup table!")
		print("Canonical name = '%s'" % nt.getCanonicalMangaUpdatesName(val) )

示例#24

0

显示文件

    def getItemsFromContainer(self, dirName, dirUrl):

        # Skip the needs sorting directory.
        if dirName == 'Needs sorting':
            return [], []
        if dirName == 'Admin Cleanup':
            return [], []
        if dirName == 'Raws':
            return [], []
        if dirName == 'Requests':
            return [], []
        if dirName == '_Autouploads':
            return [], []

        self.log.info("Original name - %s", dirName)

        bracketStripRe = re.compile(r"(\[.*?\])")
        dirName = bracketStripRe.sub(" ", dirName)
        while dirName.find("  ") + 1:
            dirName = dirName.replace("  ", " ")
        dirName = dirName.strip()

        if not dirName:
            self.log.critical("Empty dirname = '%s', baseURL = '%s'", dirName,
                              dirUrl)
            raise ValueError("No dir name for directory!")

        dirName = nt.getCanonicalMangaUpdatesName(dirName)

        self.log.info("Canonical name - %s", dirName)
        self.log.info("Fetching items for directory '%s'", dirName)

        self.log.info("Using URL '%s'", dirUrl)
        try:
            itemPage = self.wg.getpage(dirUrl)
        except urllib.error.URLError:
            self.log.error("Could not fetch page '%s'", dirUrl)
            return [], []

        soup = bs4.BeautifulSoup(itemPage)

        itemRet = []
        dirRet = []

        for row in soup.find_all("tr"):

            dirDat, itemDat = self.parseRow(row, dirUrl, dirName)

            if dirDat:
                dirRet.append(dirDat)

            if itemDat:
                itemRet.append(itemDat)

        return dirRet, itemRet

示例#25

0

显示文件

文件： uploader.py 项目： GDXN/MangaCMS

	def aggregateDirs(self, pathBase_1, pathBase_2, dir1, dir2):
		canonName    = nt.getCanonicalMangaUpdatesName(dir1)
		canonNameAlt = nt.getCanonicalMangaUpdatesName(dir2)
		cname1 = nt.prepFilenameForMatching(canonName)
		cname2 = nt.prepFilenameForMatching(canonNameAlt)
		if canonName.lower() != canonNameAlt.lower():
			self.log.critical("Error in uploading file. Name lookup via MangaUpdates table not commutative!")
			self.log.critical("First returned value    '%s'", canonName)
			self.log.critical("For directory with path '%s'", dir1)
			self.log.critical("Second returned value   '%s'", canonNameAlt)
			self.log.critical("For directory with path '%s'", dir2)
			self.log.critical("After cleaning: '%s', '%s', equal: '%s'", cname1, cname2, cname1 == cname2)


			raise CanonMismatch("Identical and yet not? '%s' - '%s'" % (canonName, canonNameAlt))
		self.log.info("Aggregating directories for canon name '%s':", canonName)

		n1 = lv.distance(dir1, canonName)
		n2 = lv.distance(dir2, canonName)

		self.log.info("	%s - '%s'", n1, dir1)
		self.log.info("	%s - '%s'", n2, dir2)

		# I'm using less then or equal, so situations where
		# both names are equadistant get aggregated anyways.
		if n1 <= n2:
			src = os.path.join(pathBase_2, dir2)
			dst = os.path.join(pathBase_1, dir1)
		else:
			src = os.path.join(pathBase_1, dir1)
			dst = os.path.join(pathBase_2, dir2)


		self.moveItemsInDir(src, dst)
		self.log.info("Removing directory '%s'", src)
		try:
			self.sftp.mkdir("/Admin cleanup/autoclean dirs")
		except:
			pass
		self.sftp.rename(src, "/Admin cleanup/autoclean dirs/garbage dir %s" % src.replace("/", ";").replace(" ", "_"))

		return dst

示例#26

0

显示文件

文件： test-nametools.py 项目： oliuz/MangaCMS

 def test_choice(self):
     print("Verifying directory linking mechanism")
     print(
         nt.dirNameProxy["Kurogane"]["fqPath"],
         nt.getCanonicalMangaUpdatesName(
             nt.dirNameProxy["Kurogane"]["fqPath"]))
     print(
         nt.dirNameProxy["Kyoumen no Silhouette"]["fqPath"],
         nt.getCanonicalMangaUpdatesName(
             nt.dirNameProxy["Kyoumen no Silhouette"]["fqPath"]))
     print(
         nt.dirNameProxy["Neko Ane "]["fqPath"],
         nt.getCanonicalMangaUpdatesName(
             nt.dirNameProxy["Neko Ane "]["fqPath"]))
     print(
         nt.dirNameProxy["Rescue Me"]["fqPath"],
         nt.getCanonicalMangaUpdatesName(
             nt.dirNameProxy["Rescue Me"]["fqPath"]))
     print(
         nt.dirNameProxy["Maken Ki!"]["fqPath"],
         nt.getCanonicalMangaUpdatesName(
             nt.dirNameProxy["Maken Ki!"]["fqPath"]))
     print(
         nt.dirNameProxy[":REverSAL"]["fqPath"],
         nt.getCanonicalMangaUpdatesName(
             nt.dirNameProxy[":REverSAL"]["fqPath"]))
     print(
         nt.dirNameProxy["Silva"]["fqPath"],
         nt.getCanonicalMangaUpdatesName(
             nt.dirNameProxy["Silva"]["fqPath"]))
     print(
         nt.dirNameProxy["Kouya ni Kemono Doukokusu"]["fqPath"],
         nt.getCanonicalMangaUpdatesName(
             nt.dirNameProxy["Kouya ni Kemono Doukokusu"]["fqPath"]))
     print(
         nt.dirNameProxy["Koukaku no Regios - Missing Mail"]["fqPath"],
         nt.getCanonicalMangaUpdatesName(
             nt.dirNameProxy["Koukaku no Regios - Missing Mail"]["fqPath"]))
     print(
         nt.dirNameProxy["Kuraudo (NOUJOU Junichi) "]["fqPath"],
         nt.getCanonicalMangaUpdatesName(
             nt.dirNameProxy["Kuraudo (NOUJOU Junichi) "]["fqPath"]))

示例#27

0

显示文件

文件： uploader.py 项目： Gazzilow/MangaCMS

    def aggregateDirs(self, pathBase, dir1, dir2):
        canonName = nt.getCanonicalMangaUpdatesName(dir1)
        canonNameAlt = nt.getCanonicalMangaUpdatesName(dir2)
        if canonName.lower() != canonNameAlt.lower():
            self.log.critical(
                "Error in uploading file. Name lookup via MangaUpdates table not commutative!"
            )
            self.log.critical("First returned value    '%s'", canonName)
            self.log.critical("For directory with path '%s'", dir1)
            self.log.critical("Second returned value   '%s'", canonNameAlt)
            self.log.critical("For directory with path '%s'", dir2)

            raise ValueError("Identical and yet not? '%s' - '%s'" %
                             (canonName, canonNameAlt))
        self.log.info("Aggregating directories for canon name '%s':",
                      canonName)

        n1 = lv.distance(dir1, canonName)
        n2 = lv.distance(dir2, canonName)

        self.log.info("	%s - '%s'", n1, dir1)
        self.log.info("	%s - '%s'", n2, dir2)

        # I'm using less then or equal, so situations where
        # both names are equadistant get aggregated anyways.
        if n1 <= n2:
            src = dir2
            dst = dir1
        else:
            src = dir1
            dst = dir2

        src = os.path.join(pathBase, src)
        dst = os.path.join(pathBase, dst)

        self.moveItemsInDir(src, dst)
        self.log.info("Removing directory '%s'", src)
        # self.ftp.rmd(src)
        # self.ftp.rename(src, "/Admin Cleanup/garbage dir %s" % id(src))

        return dst

示例#28

0

显示文件

文件： ContentLoader.py 项目： Kadantte/MangaCMS-1

    def get_link(self, link_row_id):

        with self.row_context(dbid=link_row_id) as row:
            series_name = row.series_name
            chapter_name = row.origin_name
            source_url = row.source_id
            row.state = 'fetching'

        try:
            self.log.info("Downloading = '%s', '%s'", series_name,
                          chapter_name)
            file_contents, name_from_source = self.wg.getFileAndName(
                source_url, addlHeaders={'Referer': 'https://mangazuki.co/'})

            series_name = nt.getCanonicalMangaUpdatesName(series_name)
            dlPath, newDir = self.locateOrCreateDirectoryForSeries(series_name)

            if name_from_source.endswith(".zip"):
                name_from_source = name_from_source[:-4]
            fname = "{} - {} [MangaZuki].zip".format(chapter_name,
                                                     name_from_source)

            fqFName = os.path.join(dlPath, fname)

            # This call also inserts the file parameters into the row
            with self.row_sess_context(dbid=link_row_id) as row_tup:
                row, sess = row_tup
                row.dirstate = "had_dir" if newDir is False else 'created_dir'
                fqFName = self.save_archive(row, sess, fqFName, file_contents)

            with self.row_context(dbid=link_row_id) as row:
                row.state = 'processing'

            # We don't want to upload the file we just downloaded, so specify doUpload as false.
            # As a result of this, the seriesName paramerer also no longer matters
            self.processDownload(seriesName=False,
                                 archivePath=fqFName,
                                 doUpload=False)

            self.log.info("Done")
            with self.row_context(dbid=link_row_id) as row:
                row.state = 'complete'
                row.downloaded_at = datetime.datetime.now()
                row.last_checked = datetime.datetime.now()

        except Exception:
            self.log.critical("Failure on retrieving content at %s",
                              source_url)
            self.log.critical("Traceback = %s", traceback.format_exc())
            with self.row_context(dbid=link_row_id) as row:
                row.state = 'error'
                row.err_str = traceback.format_exc()
            raise

示例#29

0

显示文件

文件： ContentLoader.py 项目： GDXN/MangaCMS

	def getSeries(self, markup):
		soup = bs4.BeautifulSoup(markup, "lxml")
		title = soup.find("h3", id='chapter-title')

		if title.b.find('a'):
			title = title.b.a.get_text()

		else:
			title = title.b.get_text()

		title = nt.getCanonicalMangaUpdatesName(title)
		print("Title '%s'" % title)
		return title

示例#30

0

显示文件

文件： ContentLoader.py 项目： Kadantte/MangaCMS-1

    def getSeries(self, markup):
        soup = bs4.BeautifulSoup(markup, "lxml")
        title = soup.find("h3", id='chapter-title')

        if title.b.find('a'):
            title = title.b.a.get_text()

        else:
            title = title.b.get_text()

        title = nt.getCanonicalMangaUpdatesName(title)
        print("Title '%s'" % title)
        return title

示例#31

0

显示文件

	def updateDbEntryById(self, rowId, commit=True, **kwargs):

		# Patch series name.
		if "seriesName" in kwargs and kwargs["seriesName"] and self.shouldCanonize:
			kwargs["seriesName"] = nt.getCanonicalMangaUpdatesName(kwargs["seriesName"])

		query, queryArguments = self.generateUpdateQuery(dbId=rowId, **kwargs)

		if self.QUERY_DEBUG:
			print("Query = ", query)
			print("Args = ", queryArguments)

		with self.transaction(commit=commit) as cur:
			cur.execute(query, queryArguments)

示例#32

0

显示文件

    def get_link(self, link_row_id):

        with self.row_context(dbid=link_row_id) as row:
            series_name = row.series_name
            chapter_name = row.origin_name
            source_url = row.source_id
            row.state = 'fetching'

        series_name = nt.getCanonicalMangaUpdatesName(series_name)

        self.log.info("Should retreive url - %s", source_url)

        images = self.proceduralGetImages(source_url)

        if not images:
            self.log.critical("Failure on retrieving content at %s",
                              source_url)
            self.log.critical("Page not found - 404")
            with self.row_context(dbid=link_row_id) as row:
                row.state = 'error'
                row.err_str = "error-404"
                return

        imgCnt = 1
        for imageName, imageContent in images:

            imageName = "{num:03.0f} - {srcName}".format(num=imgCnt,
                                                         srcName=imageName)
            imgCnt += 1
            images.append([imageName, imageContent])

            if not runStatus.run:
                self.log.info("Breaking due to exit flag being set")
                with self.row_context(dbid=link_row_id) as row:
                    row.state = 'new'
                    return

        if not images:
            self.log.error("No images! Download failed?")
            with self.row_context(dbid=link_row_id) as row:
                row.state = 'error'
                row.err_str = "error-404"
                return

        self.save_manga_image_set(link_row_id,
                                  series_name,
                                  chapter_name,
                                  images,
                                  source_name='MangaHere')

示例#33

0

显示文件

文件： RetreivalDbBase.py 项目： gregseb/MangaCMS

	def updateDbEntry(self, sourceUrl, commit=True, **kwargs):

		# Patch series name.
		if "seriesName" in kwargs and kwargs["seriesName"] and self.shouldCanonize:
			kwargs["seriesName"] = nt.getCanonicalMangaUpdatesName(kwargs["seriesName"])

		query, queryArguments = self.generateUpdateQuery(sourceUrl=sourceUrl, **kwargs)

		if self.QUERY_DEBUG:
			print("Query = ", query)
			print("Args = ", queryArguments)

		with self.conn.cursor() as cur:
			with transaction(cur, commit=commit):
				cur.execute(query, queryArguments)

示例#34

0

显示文件

文件： RetreivalDbBase.py 项目： kajeagentspi/MangaCMS

    def updateDbEntryById(self, rowId, commit=True, **kwargs):

        # Patch series name.
        if "seriesName" in kwargs and kwargs["seriesName"] and self.shouldCanonize:
            kwargs["seriesName"] = nt.getCanonicalMangaUpdatesName(kwargs["seriesName"])

        query, queryArguments = self.generateUpdateQuery(dbId=rowId, **kwargs)

        if self.QUERY_DEBUG:
            print("Query = ", query)
            print("Args = ", queryArguments)

        with self.conn.cursor() as cur:
            with transaction(cur, commit=commit):
                cur.execute(query, queryArguments)

示例#35

0

显示文件

文件： uploader.py 项目： gregseb/MangaCMS

	def aggregateDirs(self, pathBase, dir1, dir2):
		canonName = nt.getCanonicalMangaUpdatesName(dir1)
		canonNameAlt = nt.getCanonicalMangaUpdatesName(dir2)
		if canonName != canonNameAlt:
			self.log.critical("Error in uploading file. Name lookup via MangaUpdates table not commutative!")
			self.log.critical("First returned value    '%s'", canonName)
			self.log.critical("For directory with path '%s'", dir1)
			self.log.critical("Second returned value   '%s'", canonNameAlt)
			self.log.critical("For directory with path '%s'", dir2)

			raise ValueError("Identical and yet not?")
		self.log.info("Aggregating directories for canon name '%s':", canonName)

		n1 = lv.distance(dir1, canonName)
		n2 = lv.distance(dir2, canonName)

		self.log.info("	%s - '%s'", n1, dir1)
		self.log.info("	%s - '%s'", n2, dir2)

		# I'm using less then or equal, so situations where
		# both names are equadistant get aggregated anyways.
		if n1 <= n2:
			src = dir2
			dst = dir1
		else:
			src = dir1
			dst = dir2

		src = os.path.join(pathBase, src)
		dst = os.path.join(pathBase, dst)

		self.moveItemsInDir(src, dst)
		self.log.info("Removing directory '%s'", src)
		self.ftp.rmd(src)

		return dst

示例#36

0

显示文件

文件： LoaderBase.py 项目： naafx8/MangaCMS

	def _process_links_into_db(self, linksDicts):

		self.log.info( "Inserting...")


		newItems = 0
		with self.db.session_context() as sess:
			for link in linksDicts:

				self._check_keys(link)

				tags = link.pop("tags", [])
				assert isinstance(tags, (list, tuple)), "tags must be a list or tuple!"

				if 'series_name' in link and self.shouldCanonize:
					link["series_name"] = nt.getCanonicalMangaUpdatesName(link["series_name"])

				have = sess.query(self.target_table)                            \
					.filter(self.target_table.source_site == self.plugin_key)     \
					.filter(self.target_table.source_id == link["source_id"]) \
					.scalar()

				if not have:
					newItems += 1
					have = self.target_table(
							state       = 'new',            # Should be set automatically.
							source_site = self.plugin_key,
							first_seen  = datetime.datetime.now(),
							**link
						)

					sess.add(have)

					if newItems % 10000 == 0:
						self.log.info("Added %s rows, doing incremental commit!", newItems)
						sess.commit()
				try:
					self.update_tags(tags=tags, row=have)
				except ScrapeExceptions.UnwantedContentError:
					self.log.info("How does something have masked tags on insertion?")
					sess.delete(have)

		if self.mon_con:
			self.mon_con.incr('new_links', newItems)

		self.log.info( "Done (%s new items, %s total)", newItems, len(linksDicts))

		return newItems

示例#37

0

显示文件

文件： DbLoader.py 项目： cbunch/MangaCMS

	def getFeed(self):
		treedata = self.wg.getJson(self.tree_api)
		assert 'contents' in treedata
		assert treedata['name'] == 'mango'
		assert treedata['type'] == 'directory'
		data_unfiltered = self.process_tree_elements(treedata['contents'])

		data = []
		for sName, filen in data_unfiltered:
			if not any([filen.startswith(prefix) for prefix in MASK_PATHS]):
				assert filen.startswith(STRIP_PREFIX)
				filen = filen[len(STRIP_PREFIX):]
				sName = nt.getCanonicalMangaUpdatesName(sName)
				data.append((sName, filen))

		return data

示例#38

0

显示文件

文件： FeedLoader.py 项目： GDXN/MangaCMS

	def getFeed(self):
		treedata = self.wg.getJson(self.tree_api)
		assert 'contents' in treedata
		assert treedata['name'] == 'mango'
		assert treedata['type'] == 'directory'
		data_unfiltered = self.process_tree_elements(treedata['contents'])

		data = []
		for sName, filen in data_unfiltered:
			assert filen.startswith(STRIP_PREFIX)
			filen = filen[len(STRIP_PREFIX):]
			if not any([filen.startswith(prefix) for prefix in MASK_PATHS]):
				sName = nt.getCanonicalMangaUpdatesName(sName)
				data.append((sName, filen))

		return data

示例#39

0

显示文件

文件： FetchBot.py 项目： gregseb/MangaCMS

    def getDownloadPath(self, item, fName):

        if not item['seriesName']:
            self.log.info("No series set for item. Guessing from filename:")
            self.log.info("Filename = '%s'", fName)
            bareName = nt.guessSeriesFromFilename(fName)

            # if not matchName or not matchName in nt.dirNameProxy:
            if not nt.haveCanonicalMangaUpdatesName(bareName):
                item["seriesName"] = settings.ircBot["unknown-series"]
            else:
                item["seriesName"] = nt.getCanonicalMangaUpdatesName(bareName)

            self.log.info("Guessed  = '%s'. Updating series information",
                          item['seriesName'])
            self.updateDbEntry(item["sourceUrl"],
                               seriesName=item["seriesName"])

        dlPath, newDir = self.locateOrCreateDirectoryForSeries(
            item["seriesName"])

        if item["flags"] == None:
            item["flags"] = ""

        if newDir:
            self.updateDbEntry(item["sourceUrl"],
                               flags=" ".join([item["flags"], "haddir"]))
            self.conn.commit()

        fqFName = os.path.join(dlPath, fName)

        loop = 1

        fName, ext = os.path.splitext(fName)

        while os.path.exists(fqFName):
            fName = "%s - (%d).%s" % (fName, loop, ext)
            fqFName = os.path.join(dlPath, fName)
            loop += 1
        self.log.info("Saving to archive = %s", fqFName)

        self.updateDbEntry(item["sourceUrl"],
                           downloadPath=dlPath,
                           fileName=fName,
                           originName=fName)

        return fqFName

示例#40

0

显示文件

文件： RetreivalDbBase.py 项目： gregseb/MangaCMS

	def processLinksIntoDB(self, linksDicts):

		self.log.info( "Inserting...",)
		newItems = 0
		for link in linksDicts:
			if link is None:
				print("linksDicts", linksDicts)
				print("WAT")

			row = self.getRowsByValue(sourceUrl=link["sourceUrl"], limitByKey=False)

			if not row:
				newItems += 1

				if not "dlState" in link:
					link['dlState'] = 0

				# Patch series name.
				if 'seriesName' in link and self.shouldCanonize:
					link["seriesName"] = nt.getCanonicalMangaUpdatesName(link["seriesName"])


				# Using fancy dict hijinks now. Old call below for reference.

				# self.insertIntoDb(retreivalTime = link["date"],
				# 					sourceUrl   = link["dlLink"],
				# 					originName  = link["dlName"],
				# 					dlState     = 0,
				# 					seriesName  = link["baseName"],
				# 					flags       = flagStr)

				self.insertIntoDb(**link)


				self.log.info("New item: %s", link)




		self.log.info( "Done")
		self.log.info( "Committing...",)
		self.conn.commit()
		self.log.info( "Committed")

		return newItems

示例#41

0

显示文件

文件： cleanDb.py 项目： GJdan/MangaCMS

	def findIfMigrated(self, filePath):
		dirPath, fileName = os.path.split(filePath)

		series = dirPath.split("/")[-1]
		series = nt.getCanonicalMangaUpdatesName(series)
		otherDir = nt.dirNameProxy[series]

		if not otherDir["fqPath"]:
			return False
		if otherDir["fqPath"] == dirPath:
			return False

		newPath = os.path.join(otherDir["fqPath"], fileName)
		if os.path.exists(newPath):
			print("File moved!")
			return otherDir["fqPath"]

		return False

示例#42

0

显示文件

文件： cleanDb.py 项目： cbunch/MangaCMS

	def findIfMigrated(self, filePath):
		dirPath, fileName = os.path.split(filePath)

		series = dirPath.split("/")[-1]
		series = nt.getCanonicalMangaUpdatesName(series)
		otherDir = nt.dirNameProxy[series]

		if not otherDir["fqPath"]:
			return False
		if otherDir["fqPath"] == dirPath:
			return False

		newPath = os.path.join(otherDir["fqPath"], fileName)
		if os.path.exists(newPath):
			print("File moved!")
			return otherDir["fqPath"]

		return False

示例#43

0

显示文件

文件： FetchBot.py 项目： nothing628/MangaCMS

	def getDownloadPath(self, item, fName):

		if not item['seriesName']:
			self.log.info("No series set for item. Guessing from filename:")
			self.log.info("Filename = '%s'", fName)
			bareName = nt.guessSeriesFromFilename(fName)

			# if not matchName or not matchName in nt.dirNameProxy:
			if not nt.haveCanonicalMangaUpdatesName(bareName):
				item["seriesName"] = settings.ircBot["unknown-series"]
			else:
				item["seriesName"] = nt.getCanonicalMangaUpdatesName(bareName)

			self.log.info("Guessed  = '%s'. Updating series information", item['seriesName'])
			self.updateDbEntry(item["sourceUrl"], seriesName=item["seriesName"])


		dlPath, newDir = self.locateOrCreateDirectoryForSeries(item["seriesName"])

		if item["flags"] == None:
			item["flags"] = ""

		if newDir:
			self.updateDbEntry(item["sourceUrl"], flags=" ".join([item["flags"], "haddir"]))
			self.conn.commit()

		fqFName = os.path.join(dlPath, fName)

		loop = 1

		fName, ext = os.path.splitext(fName)

		while os.path.exists(fqFName):
			fName = "%s - (%d).%s" % (fName, loop, ext)
			fqFName = os.path.join(dlPath, fName)
			loop += 1
		self.log.info("Saving to archive = %s", fqFName)


		self.updateDbEntry(item["sourceUrl"], downloadPath=dlPath, fileName=fName, originName=fName)

		return fqFName

示例#44

0

显示文件

文件： RetreivalBase.py 项目： nothing628/MangaCMS

    def locateOrCreateDirectoryForSeries(self, seriesName):

        if self.shouldCanonize:
            canonSeriesName = nt.getCanonicalMangaUpdatesName(seriesName)
        else:
            canonSeriesName = seriesName

        safeBaseName = nt.makeFilenameSafe(canonSeriesName)

        if canonSeriesName in nt.dirNameProxy:
            self.log.info("Have target dir for '%s' Dir = '%s'",
                          canonSeriesName,
                          nt.dirNameProxy[canonSeriesName]['fqPath'])
            return nt.dirNameProxy[canonSeriesName]["fqPath"], False
        else:
            self.log.info("Don't have target dir for: %s, full name = %s",
                          canonSeriesName, seriesName)
            targetDir = os.path.join(settings.baseDir, safeBaseName)
            if not os.path.exists(targetDir):
                try:
                    os.makedirs(targetDir)
                    return targetDir, True

                except FileExistsError:
                    # Probably means the directory was concurrently created by another thread in the background?
                    self.log.critical(
                        "Directory doesn't exist, and yet it does?")
                    self.log.critical(traceback.format_exc())
                    pass
                except OSError:
                    self.log.critical("Directory creation failed?")
                    self.log.critical(traceback.format_exc())

            else:
                self.log.warning(
                    "Directory not found in dir-dict, but it exists!")
                self.log.warning("Directory-Path: %s", targetDir)
                self.log.warning("Base series name: %s", seriesName)
                self.log.warning("Canonized series name: %s", canonSeriesName)
                self.log.warning("Safe canonized name: %s", safeBaseName)
            return targetDir, False

示例#45

0

显示文件

文件： RetreivalDbBase.py 项目： kajeagentspi/MangaCMS

    def updateDbEntry(self, sourceUrl, commit=True, **kwargs):

        # Patch series name.
        if "seriesName" in kwargs and kwargs["seriesName"] and self.shouldCanonize:
            kwargs["seriesName"] = nt.getCanonicalMangaUpdatesName(kwargs["seriesName"])

            # Clamp the retreivaltime to now, so parsing issues that result in invalid, future
            # time-stamps don't cause posts to stick to the top of the post list.
        if "retreivalTime" in kwargs:
            if kwargs["retreivalTime"] > time.time():
                kwargs["retreivalTime"] = time.time()

        query, queryArguments = self.generateUpdateQuery(sourceUrl=sourceUrl, **kwargs)

        if self.QUERY_DEBUG:
            print("Query = ", query)
            print("Args = ", queryArguments)

        with self.conn.cursor() as cur:
            with transaction(cur, commit=commit):
                cur.execute(query, queryArguments)

示例#46

0

显示文件

文件： uploader.py 项目： Gazzilow/MangaCMS

    def loadRemoteDirectory(self, fullPath, aggregate=False):
        ret = {}

        for dirName, stats in self.ftp.mlsd(fullPath):

            dirName = ftfy.fix_text(dirName)
            # Skip items that aren't directories
            if stats["type"] != "dir":
                continue

            canonName = nt.getCanonicalMangaUpdatesName(dirName)
            matchingName = nt.prepFilenameForMatching(canonName)

            fqPath = os.path.join(fullPath, dirName)

            if matchingName in ret:
                if aggregate:
                    matchName = os.path.split(ret[matchingName])[-1]
                    try:
                        fqPath = self.aggregateDirs(fullPath, dirName,
                                                    matchName)
                    except ValueError:
                        traceback.print_exc()
                    except ftplib.error_perm:
                        traceback.print_exc()
                else:
                    if COMPLAIN_ABOUT_DUPS:
                        self.log.warning(
                            "Duplicate directories for series '%s'!",
                            canonName)
                        self.log.warning("	'%s/%s'", fullPath, dirName)
                        self.log.warning("	'%s/%s'", fullPath, matchingName)
                ret[matchingName] = fqPath

            else:
                ret[matchingName] = fqPath

        return ret

示例#47

0

显示文件

文件： RetreivalDbBase.py 项目： kajeagentspi/MangaCMS

    def locateOrCreateDirectoryForSeries(self, seriesName):

        if self.shouldCanonize:
            canonSeriesName = nt.getCanonicalMangaUpdatesName(seriesName)
        else:
            canonSeriesName = seriesName

        safeBaseName = nt.makeFilenameSafe(canonSeriesName)

        if canonSeriesName in nt.dirNameProxy:
            self.log.info(
                "Have target dir for '%s' Dir = '%s'", canonSeriesName, nt.dirNameProxy[canonSeriesName]["fqPath"]
            )
            return nt.dirNameProxy[canonSeriesName]["fqPath"], False
        else:
            self.log.info("Don't have target dir for: %s, full name = %s", canonSeriesName, seriesName)
            targetDir = os.path.join(settings.baseDir, safeBaseName)
            if not os.path.exists(targetDir):
                try:
                    os.makedirs(targetDir)
                    return targetDir, True

                except FileExistsError:
                    # Probably means the directory was concurrently created by another thread in the background?
                    self.log.critical("Directory doesn't exist, and yet it does?")
                    self.log.critical(traceback.format_exc())
                    pass
                except OSError:
                    self.log.critical("Directory creation failed?")
                    self.log.critical(traceback.format_exc())

            else:
                self.log.warning("Directory not found in dir-dict, but it exists!")
                self.log.warning("Directory-Path: %s", targetDir)
                self.log.warning("Base series name: %s", seriesName)
                self.log.warning("Canonized series name: %s", canonSeriesName)
                self.log.warning("Safe canonized name: %s", safeBaseName)
            return targetDir, False

示例#48

0

显示文件

文件： LoaderBase.py 项目： GDXN/MangaCMS

	def _processLinksIntoDB(self, linksDicts):

		self.log.info( "Inserting...",)


		newItems = 0
		for link in linksDicts:
			if link is None:
				print("linksDicts", linksDicts)
				print("WAT")
				continue

			row = self.getRowsByValue(sourceUrl=link["sourceUrl"], limitByKey=False)

			if not row:
				newItems += 1

				if not "dlState" in link:
					link['dlState'] = 0

				# Patch series name.
				if 'seriesName' in link and self.shouldCanonize:
					link["seriesName"] = nt.getCanonicalMangaUpdatesName(link["seriesName"])


				self.insertIntoDb(**link)


				self.log.info("New item: %s", link)


		if self.mon_con:
			self.mon_con.incr('new_links', newItems)

		self.log.info( "Done (%s new items)", newItems)

		return newItems

示例#49

0

显示文件

    def updateSeriesDbEntryById(self, rowId, commit=True, **kwargs):

        # Patch series name.
        if "seriesName" in kwargs and kwargs["seriesName"]:
            kwargs["seriesName"] = nt.getCanonicalMangaUpdatesName(
                kwargs["seriesName"])

        queries = []
        qArgs = []
        for key in kwargs.keys():
            if key not in self.validSeriesKwargs:
                raise ValueError("Invalid keyword argument: %s" % key)
            else:
                queries.append("{k}=%s".format(k=key))
                qArgs.append(kwargs[key])

        qArgs.append(rowId)

        column = ", ".join(queries)

        query = '''UPDATE {tableName} SET {v} WHERE dbId=%s;'''.format(
            tableName=self.seriesTableName, v=column)

        if QUERY_DEBUG:
            print("Query = ", query)
            print("Args = ", qArgs)

        with self.context_cursor() as cur:

            if commit:
                cur.execute("BEGIN;")

            cur.execute(query, qArgs)

            if commit:
                cur.execute("COMMIT;")

示例#50

0

显示文件

文件： autoOrganize.py 项目： GDXN/MangaCMS

def consolidateMangaFolders(dirPath, smartMode=True):


	idLut = nt.MtNamesMapWrapper("fsName->buId")

	pc = PathCleaner()
	
	count = 0
	print("Dir", dirPath)
	items = os.listdir(dirPath)
	items.sort()
	for item in items:
		item = os.path.join(dirPath, item)
		if os.path.isdir(item):
			fPath, dirName = os.path.split(item)

			lookup = nt.dirNameProxy[dirName]
			if lookup["fqPath"] != item:
				print()
				print()
				print("------------------------------------------------------")
				canonName = nt.getCanonicalMangaUpdatesName(dirName)
				print("Duplicate Directory '%s' - Canon = '%s'" % (dirName, canonName))

				count += 1

				mtId = idLut[nt.prepFilenameForMatching(dirName)]
				for num in mtId:
					print("	URL: https://www.mangaupdates.com/series.html?id=%s" % (num, ))

				fPath, dir2Name = os.path.split(lookup["fqPath"])


				if not os.path.exists(item):
					print("'%s' has been removed. Skipping" % item)
					continue
				if not os.path.exists(lookup["fqPath"]):
					print("'%s' has been removed. Skipping" % lookup["fqPath"])
					continue


				n1 = lv.distance(dirName, canonName)
				n2 = lv.distance(dir2Name, canonName)

				r1 = abs(nt.extractRatingToFloat(dirName))
				r2 = abs(nt.extractRatingToFloat(dir2Name))

				if "[complete]" in dirName.lower():
					r1 += 0.1
				if "[complete]" in dir2Name.lower():
					r2 += 0.1

				if "[wtf]" in dirName.lower():
					r1 += 0.2
				if "[wtf]" in dir2Name.lower():
					r2 += 0.2


				print("	1: ", item)
				print("	2: ", lookup["fqPath"])
				print("	1:	", dirName, ' ->', nt.getCanonicalMangaUpdatesName(dirName))
				print("	2:	", dir2Name, ' ->', nt.getCanonicalMangaUpdatesName(dir2Name))
				print("	1:	({num} items)(distance {dist})(rating {rat})".format(num=len(os.listdir(item)), dist=n1, rat=r1))
				print("	2:	({num} items)(distance {dist})(rating {rat})".format(num=len(os.listdir(lookup["fqPath"])), dist=n2, rat=r2))



				mtId2 = idLut[nt.prepFilenameForMatching(dir2Name)]
				if mtId != mtId2:
					print("DISCORDANT ID NUMBERS - {num1}, {num2}!".format(num1=mtId, num2=mtId2))
					for num in mtId2:
						print("	URL: https://www.mangaupdates.com/series.html?id=%s" % (num, ))

					continue

				if r1 > r2:
					doMove = "reverse"
				elif r2 > r1:
					doMove = "forward"
				else:
					doMove = ''

				if not doMove or not smartMode:
					doMove = query_response("move files ('f' dir 1 -> dir 2. 'r' dir 1 <- dir 2. 'l' use levenshtein distance. 'n' do not move)?")

				if doMove == "forward":
					print("Forward move")
					fromDir = item
					toDir   = lookup["fqPath"]
				elif doMove == "reverse":
					print("Reverse move")
					fromDir = lookup["fqPath"]
					toDir   = item
				elif doMove == "levenshtein":
					print("Levenshtein distance chooser")


					# I'm using less then or equal, so situations where
					# both names are equadistant get aggregated anyways.
					if n1 <= n2:
						fromDir = lookup["fqPath"]
						toDir   = item
					else:
						fromDir = item
						toDir   = lookup["fqPath"]

				else:
					print("Skipping")
					continue

				print("moving from: '%s' " % fromDir)
				print("         to: '%s' " % toDir)



				items = os.listdir(fromDir)
				for item in items:
					fromPath = os.path.join(fromDir, item)
					toPath   = os.path.join(toDir, item)

					loop = 2
					while os.path.exists(toPath):
						pathBase, ext = os.path.splitext(toPath)
						print("	Duplicate file!")
						toPath = "{start} ({loop}){ext}".format(start=pathBase, loop=loop, ext=ext)
					print("		Moving: ", item)
					print("		From: ", fromPath)
					print("		To:   ", toPath)
					pc.moveFile(fromPath, toPath)

					try:
						pc.moveFile(fromPath, toPath)
					except psycopg2.IntegrityError:
						print("Error moving item in dedup database")

						# pc.deletePath(toPath)

					shutil.move(fromPath, toPath)


				print("Deleting directory")
				os.rmdir(fromDir)

	print("total items", count)

示例#51

0

显示文件

文件： ContentLoader.py 项目： GJdan/MangaCMS

	def getLink(self, link):


		sourceUrl  = link["sourceUrl"]
		print("Link", link)



		seriesName = link['seriesName']


		try:
			self.log.info( "Should retreive url - %s", sourceUrl)
			self.updateDbEntry(sourceUrl, dlState=1)

			seriesName = nt.getCanonicalMangaUpdatesName(seriesName)


			self.log.info("Downloading = '%s', '%s'", seriesName, link["originName"])
			dlPath, newDir = self.locateOrCreateDirectoryForSeries(seriesName)

			if link["flags"] == None:
				link["flags"] = ""

			if newDir:
				self.updateDbEntry(sourceUrl, flags=" ".join([link["flags"], "haddir"]))
				self.conn.commit()

			chapterName = nt.makeFilenameSafe(link["originName"])

			fqFName = os.path.join(dlPath, chapterName+" [MangaHere].zip")

			loop = 1
			prefix, ext = os.path.splitext(fqFName)
			while os.path.exists(fqFName):
				fqFName = "%s (%d)%s" % (prefix, loop,  ext)
				loop += 1
			self.log.info("Saving to archive = %s", fqFName)

			images = self.proceduralGetImages(sourceUrl)

			self.log.info("Creating archive with %s images", len(images))

			if not images:
				self.updateDbEntry(sourceUrl, dlState=-1, tags="error-404")
				return

			#Write all downloaded files to the archive.
			arch = zipfile.ZipFile(fqFName, "w")
			for imageName, imageContent in images:
				arch.writestr(imageName, imageContent)
			arch.close()


			dedupState = processDownload.processDownload(seriesName, fqFName, deleteDups=True, includePHash=True)
			self.log.info( "Done")

			filePath, fileName = os.path.split(fqFName)
			self.updateDbEntry(sourceUrl, dlState=2, downloadPath=filePath, fileName=fileName, tags=dedupState)
			return



		except Exception:
			self.log.critical("Failure on retreiving content at %s", sourceUrl)
			self.log.critical("Traceback = %s", traceback.format_exc())
			self.updateDbEntry(sourceUrl, dlState=-1)
			raise

示例#52

0

显示文件

文件： uploader.py 项目： GDXN/MangaCMS

	def loadRemoteDirectory(self, fullPath, aggregate=False):
		ret = {}
		dirs = self.wg.getpage("https://manga.madokami.al/stupidapi/fakedirs")

		requirePrefix = splitall(fullPath)

		badwords = [
			'Non-English',
			'Oneshots',
			'Raws',
			'Novels',
			'_Doujinshi',
			'AutoUploaded from Assorted Sources',
		]

		rows = [tmp for tmp in
					[splitall(item) for item in
						[item[1:] if item.startswith("./") else item for item in dirs.split("\n")]
					]
				if
					(
							len(tmp) >= len(requirePrefix)
						and
							all([tmp[x] == requirePrefix[x] for x in range(len(requirePrefix))])
						and
							not any([badword in tmp for badword in badwords]))
				]


		print(len(rows))
		for line in rows:
			if len(line) == 6:

				dirName = line[-1]
				if not dirName:
					continue
				canonName = nt.getCanonicalMangaUpdatesName(dirName)
				matchingName = nt.prepFilenameForMatching(canonName)

				# prepFilenameForMatching can result in empty directory names in some cases.
				# Detect that, and don't bother with it if that happened.
				if not matchingName:
					continue
				fqPath   = os.path.join(*line)
				fullPath = os.path.join(*line[:-1])

				if matchingName in ret:
					tmp = ret[matchingName]
					matchpath, matchName = os.path.split(tmp[-1])
					if isinstance(tmp, list):
						tmp = tmp.pop()
					if aggregate:
						try:
							fqPath = self.aggregateDirs(fullPath, matchpath,dirName, matchName)
						except CanonMismatch:
							pass
						except ValueError:
							traceback.print_exc()
						except ftplib.error_perm:
							traceback.print_exc()
						except PermissionError:
							traceback.print_exc()
					else:
						if COMPLAIN_ABOUT_DUPS:
							self.log.warning("Duplicate directories for series '%s'!", canonName)
							self.log.warning("	'%s/%s'", fullPath, dirName)
							self.log.warning("	'%s/%s'", matchpath, matchName)
					ret[matchingName].append(fqPath)

				else:
					ret[matchingName] = [fqPath]

		return ret

示例#53

0

显示文件

文件： autoOrganize.py 项目： gregseb/MangaCMS

def consolidateMangaFolders(dirPath, smartMode=True):

    idLut = nt.MtNamesMapWrapper("fsName->buId")

    pc = PathCleaner()
    pc.openDB()

    count = 0
    print("Dir", dirPath)
    items = os.listdir(dirPath)
    items.sort()
    for item in items:
        item = os.path.join(dirPath, item)
        if os.path.isdir(item):
            fPath, dirName = os.path.split(item)

            lookup = nt.dirNameProxy[dirName]
            if lookup["fqPath"] != item:
                print()
                print()
                print("------------------------------------------------------")
                canonName = nt.getCanonicalMangaUpdatesName(dirName)
                print("Duplicate Directory '%s' - Canon = '%s'" %
                      (dirName, canonName))

                count += 1

                mtId = idLut[nt.prepFilenameForMatching(dirName)]
                for num in mtId:
                    print(
                        "	URL: https://www.mangaupdates.com/series.html?id=%s"
                        % (num, ))

                fPath, dir2Name = os.path.split(lookup["fqPath"])

                if not os.path.exists(item):
                    print("'%s' has been removed. Skipping" % item)
                    continue
                if not os.path.exists(lookup["fqPath"]):
                    print("'%s' has been removed. Skipping" % lookup["fqPath"])
                    continue

                n1 = lv.distance(dirName, canonName)
                n2 = lv.distance(dir2Name, canonName)

                r1 = abs(nt.extractRatingToFloat(dirName))
                r2 = abs(nt.extractRatingToFloat(dir2Name))

                if "[complete]" in dirName.lower():
                    r1 += 0.1
                if "[complete]" in dir2Name.lower():
                    r2 += 0.1

                if "[wtf]" in dirName.lower():
                    r1 += 0.2
                if "[wtf]" in dir2Name.lower():
                    r2 += 0.2

                print("	1: ", item)
                print("	2: ", lookup["fqPath"])
                print("	1:	", dirName, ' ->',
                      nt.getCanonicalMangaUpdatesName(dirName))
                print("	2:	", dir2Name, ' ->',
                      nt.getCanonicalMangaUpdatesName(dir2Name))
                print(
                    "	1:	({num} items)(distance {dist})(rating {rat})".format(
                        num=len(os.listdir(item)), dist=n1, rat=r1))
                print(
                    "	2:	({num} items)(distance {dist})(rating {rat})".format(
                        num=len(os.listdir(lookup["fqPath"])), dist=n2,
                        rat=r2))

                mtId2 = idLut[nt.prepFilenameForMatching(dir2Name)]
                if mtId != mtId2:
                    print("DISCORDANT ID NUMBERS - {num1}, {num2}!".format(
                        num1=mtId, num2=mtId2))
                    for num in mtId2:
                        print(
                            "	URL: https://www.mangaupdates.com/series.html?id=%s"
                            % (num, ))

                    continue

                if r1 > r2:
                    doMove = "reverse"
                elif r2 > r1:
                    doMove = "forward"
                else:
                    doMove = ''

                if not doMove or not smartMode:
                    doMove = query_response(
                        "move files ('f' dir 1 -> dir 2. 'r' dir 1 <- dir 2. 'l' use levenshtein distance. 'n' do not move)?"
                    )

                if doMove == "forward":
                    print("Forward move")
                    fromDir = item
                    toDir = lookup["fqPath"]
                elif doMove == "reverse":
                    print("Reverse move")
                    fromDir = lookup["fqPath"]
                    toDir = item
                elif doMove == "levenshtein":
                    print("Levenshtein distance chooser")

                    # I'm using less then or equal, so situations where
                    # both names are equadistant get aggregated anyways.
                    if n1 <= n2:
                        fromDir = lookup["fqPath"]
                        toDir = item
                    else:
                        fromDir = item
                        toDir = lookup["fqPath"]

                else:
                    print("Skipping")
                    continue

                print("moving from: '%s' " % fromDir)
                print("         to: '%s' " % toDir)

                items = os.listdir(fromDir)
                for item in items:
                    fromPath = os.path.join(fromDir, item)
                    toPath = os.path.join(toDir, item)

                    loop = 2
                    while os.path.exists(toPath):
                        pathBase, ext = os.path.splitext(toPath)
                        print("	Duplicate file!")
                        toPath = "{start} ({loop}){ext}".format(start=pathBase,
                                                                loop=loop,
                                                                ext=ext)
                    print("		Moving: ", item)
                    print("		From: ", fromPath)
                    print("		To:   ", toPath)
                    pc.moveFile(fromPath, toPath)

                    try:
                        pc.moveFile(fromPath, toPath)
                    except psycopg2.IntegrityError:
                        print("Error moving item in dedup database")

                        # pc.deletePath(toPath)

                    shutil.move(fromPath, toPath)

                print("Deleting directory")
                os.rmdir(fromDir)

    print("total items", count)

示例#54

0

显示文件

文件： mkFeedLoader.py 项目： GJdan/MangaCMS

	def processLinksIntoDB(self, linksDicts, isPicked=False):

		self.log.info( "Inserting...",)
		newItems = 0
		oldItems = 0
		for link in linksDicts:
			if link is None:
				print("linksDicts", linksDicts)
				print("WAT")

			rows = self.getRowsByValue(originName  = link["dlName"])    #We only look at filenames to determine uniqueness,
			if not rows:
				rows = self.getRowsByValue(sourceUrl  = link["dlLink"])    #Check against URLs as well, so we don't break the UNIQUE constraint

			if not rows:
				newItems += 1

				# Patch series name.
				seriesName = nt.getCanonicalMangaUpdatesName(link["baseName"])

				# Flags has to be an empty string, because the DB is annoying.
				# TL;DR, comparing with LIKE in a column that has NULLs in it is somewhat broken.
				self.insertIntoDb(retreivalTime = link["date"],
									sourceUrl   = link["dlLink"],
									originName  = link["dlName"],
									dlState     = 0,
									seriesName  = seriesName,
									flags       = '',
									commit      = False)  # Defer commiting changes to speed things up



				self.log.info("New item: %s", (link["date"], link["dlLink"], link["baseName"], link["dlName"]))

			elif len(rows) > 1:
				self.log.warning("Have more then one item for filename! Wat?")
				self.log.warning("Info dict for file:")
				self.log.warning("'%s'", link)
				self.log.warning("Found rows:")
				self.log.warning("'%s'", rows)
			elif len(rows) == 1:
				row = rows.pop()
				if row["sourceUrl"] != link["dlLink"]:
					self.log.info("File has been moved!")
					self.log.info("File: '%s'", link)
					self.updateDbEntryById(row["dbId"], sourceUrl = link["dlLink"])
				else:
					oldItems += 1
					# self.log.info("Existing item: %s", (link["date"], link["dlName"]))

			else:
				row = row.pop()

		self.log.info( "Done")

		if newItems:

			self.log.info( "Committing...",)
			self.conn.commit()
			self.log.info( "Committed")
		else:
			self.log.info("No new items, %s old items.", oldItems)


		return newItems

示例#55

0

显示文件

文件： cleanDb.py 项目： GJdan/MangaCMS

	def consolidateSeriesNaming(self):


		cur = self.conn.cursor()
		# cur.execute("BEGIN;")
		# print("Querying")
		# cur.execute("SELECT DISTINCT(seriesName) FROM {tableName};".format(tableName=self.tableName))
		# print("Queried. Fetching results")
		# ret = cur.fetchall()
		# cur.execute("COMMIT;")
		# print("Have results. Processing")

		# for item in ret:
		# 	item = item[0]
		# 	if not item:
		# 		continue

		# 	mId = nt.getMangaUpdatesId(item)
		# 	if not mId:
		# 		print("Item '{old}', '{new}', mid:{mid}".format(old=item, new=nt.getCanonicalMangaUpdatesName(item), mid=mId))
		# print("Total: ", len(ret))

		items = ["Murciélago", "Murcielago", "Murciélago"]

		for item in items:
			print("------", item, nt.getCanonicalMangaUpdatesName(item), nt.haveCanonicalMangaUpdatesName(item))

		# cur.execute("BEGIN;")
		# print("Querying")
		# cur.execute("SELECT DISTINCT ON (buname) buname, buId FROM mangaseries ORDER BY buname, buid;")
		# print("Queried. Fetching results")
		# ret = cur.fetchall()
		# cur.execute("COMMIT;")
		# print("Have results. Processing")

		# cur.execute("BEGIN;")

		# missing = 0
		# for item in ret:
		# 	buName, buId = item
		# 	if not buName:
		# 		continue

		# 	cur.execute("SELECT * FROM munamelist WHERE name=%s;", (buName, ))
		# 	ret = cur.fetchall()
		# 	# mId = nt.getMangaUpdatesId(buName)

		# 	if not ret:
		# 		print("Item missing '{item}', mid:{mid}".format(item=item, mid=ret))
		# 		self.insertNames(buId, [buName])
		# 		missing += 1

		# 	if not runStatus.run:
		# 		break
		# 		# print("Item '{old}', '{new}', mid:{mid}".format(old=item, new=nt.getCanonicalMangaUpdatesName(item), mid=mId))
		# print("Total: ", len(ret))
		# print("Missing: ", missing)


		# for  dbId, sourceUrl in ret:
		# 	if "batoto" in sourceUrl.lower():
		# 		sourceUrl = sourceUrl.replace("http://www.batoto.net/", "http://bato.to/")
		# 		print("Link", sourceUrl)

		# 		cur.execute("SELECT dbId FROM {tableName} WHERE sourceUrl=%s;".format(tableName=self.tableName), (sourceUrl, ))
		# 		ret = cur.fetchall()
		# 		if not ret:
		# 			print("Updating")
		# 			cur.execute("UPDATE {tableName} SET sourceUrl=%s WHERE dbId=%s;".format(tableName=self.tableName), (sourceUrl, dbId))

		# 		else:
		# 			print("Replacing")
		# 			cur.execute("DELETE FROM {tableName} WHERE sourceUrl=%s;".format(tableName=self.tableName), (sourceUrl, ))
		# 			cur.execute("UPDATE {tableName} SET sourceUrl=%s WHERE dbId=%s;".format(tableName=self.tableName), (sourceUrl, dbId))


		cur.execute("COMMIT;")

示例#56

0

显示文件

文件： __main__.py 项目： GDXN/MangaCMS

def parseTwoArgCall(cmd, val):
	if cmd == "import":
		if not os.path.exists(val):
			print("Passed path '%s' does not exist!" % val)
			return
		autoImporter.importDirectories(val)

	elif cmd == "organize":
		if not os.path.exists(val):
			print("Passed path '%s' does not exist!" % val)
			return
		autOrg.organizeFolder(val)

	elif cmd == "run":
		utilities.runPlugin.runPlugin(val)

	elif cmd == "rename":
		if not os.path.exists(val):
			print("Passed path '%s' does not exist!" % val)
			return
		autOrg.renameSeriesToMatchMangaUpdates(val)

	elif cmd == "lookup":
		print("Passed name = '%s'" % val)
		import nameTools as nt
		haveLookup = nt.haveCanonicalMangaUpdatesName(val)
		if not haveLookup:
			print("Item not found in MangaUpdates name synonym table")
			print("Processed item as searched = '%s'" % nt.prepFilenameForMatching(val))
		else:
			print("Item found in lookup table!")
			print("Canonical name = '%s'" % nt.getCanonicalMangaUpdatesName(val) )

	elif cmd == "purge-dir":
		if not os.path.exists(val):
			print("Passed path '%s' does not exist!" % val)
			return
		utilities.dedupDir.purgeDedupTemps(val)
	elif cmd == "purge-dir-phash":
		if not os.path.exists(val):
			print("Passed path '%s' does not exist!" % val)
			return
		utilities.dedupDir.purgeDedupTempsPhash(val)

	elif cmd == "dirs-restore":
		if not os.path.exists(val):
			print("Passed path '%s' does not exist!" % val)
			return
		utilities.dedupDir.runRestoreDeduper(val)

	elif cmd == "sort-dir-contents":
		if not os.path.exists(val):
			print("Passed path '%s' does not exist!" % val)
			return
		utilities.approxFileSorter.scanDirectories(val)


	elif cmd == "clean-archives":
		if not os.path.exists(val):
			print("Passed path '%s' does not exist!" % val)
			return
		utilities.cleanFiles.cleanArchives(val)

	else:
		print("Did not understand command!")
		print("Sys.argv = ", sys.argv)

示例#57

0

显示文件

	def loadRemoteDirectory(self, fullPath, aggregate=False):
		ret = {}
		dirs = self.wg.getpage("https://manga.madokami.al/stupidapi/fakedirs")

		requirePrefix = splitall(fullPath)

		badwords = [
			'Non-English',
			'Oneshots',
			'Raws',
			'Novels',
			'_Doujinshi',
			'AutoUploaded from Assorted Sources',
		]

		rows = [tmp for tmp in
					[splitall(item) for item in
						[item[1:] if item.startswith("./") else item for item in dirs.split("\n")]
					]
				if
					(
							len(tmp) >= len(requirePrefix)
						and
							all([tmp[x] == requirePrefix[x] for x in range(len(requirePrefix))])
						and
							not any([badword in tmp for badword in badwords]))
				]


		print(len(rows))
		for line in rows:
			if len(line) == 6:

				dirName = line[-1]
				if not dirName:
					continue
				canonName = nt.getCanonicalMangaUpdatesName(dirName)
				matchingName = nt.prepFilenameForMatching(canonName)

				# prepFilenameForMatching can result in empty directory names in some cases.
				# Detect that, and don't bother with it if that happened.
				if not matchingName:
					continue
				fqPath   = os.path.join(*line)
				fullPath = os.path.join(*line[:-1])

				if matchingName in ret:
					tmp = ret[matchingName]
					matchpath, matchName = os.path.split(tmp[-1])
					if isinstance(tmp, list):
						tmp = tmp.pop()
					if aggregate:
						try:
							fqPath = self.aggregateDirs(fullPath, matchpath,dirName, matchName)
						except CanonMismatch:
							pass
						except ValueError:
							traceback.print_exc()
						except ftplib.error_perm:
							traceback.print_exc()
						except PermissionError:
							traceback.print_exc()
					else:
						if COMPLAIN_ABOUT_DUPS:
							self.log.warning("Duplicate directories for series '%s'!", canonName)
							self.log.warning("	'%s/%s'", fullPath, dirName)
							self.log.warning("	'%s/%s'", matchpath, matchName)
					ret[matchingName].append(fqPath)

				else:
					ret[matchingName] = [fqPath]

		return ret