Python fixFormatting示例，util.fixFormatting Python示例

示例#1

0

显示文件

文件： xmlparser.py 项目： PalmerCurling/manga_downloader

	def downloadManga(self):
		print("Parsing XML File...")
		dom = minidom.parse(self.xmlfile_path)
		
		threadPool = []
		self.options.auto = True
		
		SetOutputPathToName_Flag = False
		# Default OutputDir is the ./MangaName
		if (self.options.outputDir == 'DEFAULT_VALUE'):
			SetOutputPathToName_Flag = True
			
		for node in dom.getElementsByTagName("MangaSeries"):
			name = getText(node.getElementsByTagName('name')[0])
			site = getText(node.getElementsByTagName('HostSite')[0])
			
			try:
				lastDownloaded = getText(node.getElementsByTagName('LastChapterDownloaded')[0])
			except IndexError:
				lastDownloaded = ""
			
			try:
				download_path =	getText(node.getElementsByTagName('downloadPath')[0])
			except IndexError:
				download_path = ('./' + fixFormatting(name))
			
			self.options.site = site
			self.options.manga = name
			self.options.downloadPath = download_path
			self.options.lastDownloaded = lastDownloaded
			if SetOutputPathToName_Flag:
				self.options.outputDir = download_path
			
			# Because the SiteParserThread constructor parses the site to retrieve which chapters to 
			# download the following code would be faster
			
			# thread = SiteParserThread(self.options, dom, node)
			# thread.start()
			# threadPool.append(thread)
			
			# Need to remove the loop which starts the thread's downloading. The disadvantage is that the 
			# the print statement would intermingle with the progress bar. It would be very difficult to 
			# understand what was happening. Do not believe this change is worth it.
			
			threadPool.append(SiteParserThread(self.options, dom, node))
		
		for thread in threadPool: 
			thread.start()
			thread.join()

		#print (dom.toxml())		
		f = open(self.xmlfile_path, 'w')
		f.write(dom.toxml())

示例#2

0

显示文件

文件： xmlparser.py 项目： vidyuthd/manga_downloader

    def downloadManga(self):
        print("Parsing XML File...")
        if (self.verbose_FLAG):
            print("XML Path = %s" % self.xmlfile_path)

        dom = minidom.parse(self.xmlfile_path)

        threadPool = []
        self.options.auto = True

        SetOutputPathToName_Flag = False
        # Default OutputDir is the ./MangaName
        if (self.options.outputDir == 'DEFAULT_VALUE'):
            SetOutputPathToName_Flag = True

        for node in dom.getElementsByTagName("MangaSeries"):
            seriesOptions = copy.copy(self.options)
            seriesOptions.manga = getText(node.getElementsByTagName('name')[0])
            seriesOptions.site = getText(
                node.getElementsByTagName('HostSite')[0])

            try:
                lastDownloaded = getText(
                    node.getElementsByTagName('LastChapterDownloaded')[0])
            except IndexError:
                lastDownloaded = ""

            try:
                download_path = getText(
                    node.getElementsByTagName('downloadPath')[0])
            except IndexError:
                download_path = ('./' + fixFormatting(
                    seriesOptions.manga, seriesOptions.spaceToken))

            if self.options.downloadPath != 'DEFAULT_VALUE' and not os.path.isabs(
                    download_path):
                download_path = os.path.join(self.options.downloadPath,
                                             download_path)

            seriesOptions.downloadPath = download_path
            seriesOptions.lastDownloaded = lastDownloaded
            if SetOutputPathToName_Flag:
                seriesOptions.outputDir = download_path

            # Because the SiteParserThread constructor parses the site to retrieve which chapters to
            # download the following code would be faster

            # thread = SiteParserThread(self.options, dom, node)
            # thread.start()
            # threadPool.append(thread)

            # Need to remove the loop which starts the thread's downloading. The disadvantage is that the
            # the print statement would intermingle with the progress bar. It would be very difficult to
            # understand what was happening. Do not believe this change is worth it.

            threadPool.append(SiteParserThread(seriesOptions, dom, node))

        for thread in threadPool:
            thread.start()
            thread.join()

        #Backs up file
        backupFileName = self.xmlfile_path + "_bak"
        os.rename(self.xmlfile_path, backupFileName)
        f = open(self.xmlfile_path, 'w')

        outputStr = '\n'.join(
            [line for line in dom.toprettyxml().split('\n') if line.strip()])
        outputStr = outputStr.encode('utf-8')
        f.write(outputStr)

        # The file was succesfully saved and now remove backup
        os.remove(backupFileName)

示例#3

0

显示文件

文件： xmlparser.py 项目： GDXN/manga_downloader

	def downloadManga(self):
		print("Parsing XML File...")
		if (self.verbose_FLAG):
			print("XML Path = %s" % self.xmlfile_path)

		dom = minidom.parse(self.xmlfile_path)

		threadPool = []
		self.options.auto = True

		SetOutputPathToName_Flag = False
		# Default OutputDir is the ./MangaName
		if (self.options.outputDir == 'DEFAULT_VALUE'):
			SetOutputPathToName_Flag = True

		for node in dom.getElementsByTagName("MangaSeries"):
			seriesOptions = self.options
			seriesOptions.manga = getText(node.getElementsByTagName('name')[0])
			seriesOptions.site = getText(node.getElementsByTagName('HostSite')[0])

			try:
				lastDownloaded = getText(node.getElementsByTagName('LastChapterDownloaded')[0])
			except IndexError:
				lastDownloaded = ""

			try:
				download_path =	getText(node.getElementsByTagName('downloadPath')[0])
			except IndexError:
				download_path = ('./' + fixFormatting(seriesOptions.manga, seriesOptions.spaceToken))

			if self.options.downloadPath != 'DEFAULT_VALUE' and not os.path.isabs(download_path):
				download_path = os.path.join(self.options.downloadPath, download_path)

			seriesOptions.downloadPath = download_path
			seriesOptions.lastDownloaded = lastDownloaded
			if SetOutputPathToName_Flag:
				seriesOptions.outputDir = download_path

			# Because the SiteParserThread constructor parses the site to retrieve which chapters to
			# download the following code would be faster

			# thread = SiteParserThread(self.options, dom, node)
			# thread.start()
			# threadPool.append(thread)

			# Need to remove the loop which starts the thread's downloading. The disadvantage is that the
			# the print statement would intermingle with the progress bar. It would be very difficult to
			# understand what was happening. Do not believe this change is worth it.

			threadPool.append(SiteParserThread(seriesOptions, dom, node))

		for thread in threadPool:
			thread.start()
			thread.join()

		#Backs up file
		backupFileName = self.xmlfile_path + "_bak"
		os.rename(self.xmlfile_path, backupFileName)
		f = open(self.xmlfile_path, 'w')

		outputStr = '\n'.join([line for line in dom.toprettyxml().split('\n') if line.strip()])
		outputStr = outputStr.encode('utf-8')
		f.write(outputStr)

		# The file was succesfully saved and now remove backup
		os.remove(backupFileName)

示例#4

0

显示文件

文件： manga.py 项目： EmperorArthur/manga_downloader

def main():
	printLicenseInfo()

	# for easier parsing, adds free --help and --version
	# optparse (v2.3-v2.7) was chosen over argparse (v2.7+) for compatibility (and relative similarity) reasons
	# and over getopt(v?) for additional functionality
	parser = optparse.OptionParser(	usage='usage: %prog [options] <manga name>',
					version=('Manga Downloader %s' % VERSION)									)

	parser.set_defaults(
				all_chapters_FLAG = False,
				auto = False,
				conversion_FLAG = False,
				convert_Directory = False,
				device = 'Kindle 3',
				downloadFormat = '.cbz',
				downloadPath = 'DEFAULT_VALUE',
				inputDir = None,
				outputDir = 'DEFAULT_VALUE',
				overwrite_FLAG = False,
				verbose_FLAG = False,
				timeLogging_FLAG = False,
				maxChapterThreads = 3,
				useShortName = False,
				spaceToken = '.',
				proxy = None,
				siteSelect = 0
				)

	parser.add_option(	'--all',
				action = 'store_true',
				dest = 'all_chapters_FLAG',
				help = 'Download all available chapters.'										)

	parser.add_option(	'-d', '--directory',
				dest = 'downloadPath',
				help = 'The destination download directory.  Defaults to the directory of the script.'					)

	parser.add_option(	'--overwrite',
				action = 'store_true',
				dest = 'overwrite_FLAG',
				help = 'Overwrites previous copies of downloaded chapters.'								)

	parser.add_option(	'--verbose',
				action = 'store_true',
				dest = 'verbose_FLAG',
				help = 'Verbose Output.'								)

	parser.add_option(	'-x','--xml',
				dest = 'xmlfile_path',
				help = 'Parses the .xml file and downloads all chapters newer than the last chapter downloaded for the listed mangas.'	)

	parser.add_option(	'-c', '--convertFiles',
				action = 'store_true',
				dest = 'conversion_FLAG',
				help = 'Converts downloaded files to a Format/Size acceptable to the device specified by the --device parameter.'				)

	parser.add_option( 	'--device',
				dest = 'device',
				help = 'Specifies the conversion device. Omitting this option default to %default.'				)

	parser.add_option( 	'--convertDirectory',
				action = 'store_true',
				dest = 'convert_Directory',
				help = 'Converts the image files stored in the directory specified by --inputDirectory. Stores the converted images in the directory specified by --outputDirectory'	)

	parser.add_option( 	'--inputDirectory',
				dest = 'inputDir',
				help = 'The directory containing the images to convert when --convertDirectory is specified.'					)

	parser.add_option( 	'--outputDirectory',
				dest = 'outputDir',
				help = 'The directory to store the images when --convertDirectory is specified.'					)

	parser.add_option(	'-z', '--zip',
				action = 'store_const',
				dest = 'downloadFormat',
				const = '.zip',
				help = 'Downloads using .zip compression.  Omitting this option defaults to %default.'					)

	parser.add_option(	'-t', '--threads',
				dest = 'maxChapterThreads',
				help = 'Limits the number of chapter threads to the value specified.'					)

	parser.add_option(	'--timeLogging',
				action = 'store_true',
				dest = 'timeLogging_FLAG',
				help = 'Output time logging.'					)

	parser.add_option(	'--useShortName',
				action = 'store_true',
				dest = 'useShortName_FLAG',
				help = 'To support devices that limit the size of the filename, this parameter uses a short name'				)

	parser.add_option( 	'--spaceToken',
				dest = 'spaceToken',
				help = 'Specifies the character used to replace spaces in the manga name.'				)

	parser.add_option( 	'--proxy',
				dest = 'proxy',
				help = 'Specifies the proxy.'				)

	parser.add_option( 	'-s', '--site',
				dest = 'siteSelect',
				help = 'Specifies the site to download from.'				)

	(options, args) = parser.parse_args()

	try:
		options.siteSelect = int(options.siteSelect)
	except:
		options.siteSelect = 0

	try:
		options.maxChapterThreads = int(options.maxChapterThreads)
	except:
		options.maxChapterThreads = 2

	if (options.maxChapterThreads <= 0):
		options.maxChapterThreads = 2;

	if(len(args) == 0 and ( not (options.convert_Directory or options.xmlfile_path != None) )):
		parser.error('Manga not specified.')

	#if(len(args) > 1):
	#	parser.error('Possible multiple mangas specified, please select one.  (Did you forget to put quotes around a multi-word manga?)')

	SetDownloadPathToName_Flag = False
	SetOutputPathToDefault_Flag = False
	if(len(args) > 0):

		# Default Directory is the ./MangaName
		if (options.downloadPath == 'DEFAULT_VALUE'):
			SetDownloadPathToName_Flag = True


		# Default outputDir is the ./MangaName
		if (options.outputDir == 'DEFAULT_VALUE'):
			SetOutputPathToDefault_Flag = True


	PILAvailable = isImageLibAvailable()
	# Check if PIL Library is available if either of convert Flags are set
	if ((not PILAvailable)  and (options.convert_Directory or options.conversion_FLAG)):
		print ("\nConversion Functionality Not available.\nMust install the PIL (Python Image Library)")
		sys.exit()
	else:
		if (PILAvailable):
			from ConvertPackage.ConvertFile import convertFile

	if (options.convert_Directory):
		options.inputDir = os.path.abspath(options.inputDir)

	# Changes the working directory to the script location
	if (os.path.dirname(sys.argv[0]) != ""):
		os.chdir(os.path.dirname(sys.argv[0]))

	options.outputMgr = progressBarManager()
	options.outputMgr.start()
	try:
		if (options.convert_Directory):
			if ( options.outputDir == 'DEFAULT_VALUE' ):
				options.outputDir = '.'
			print("Converting Files: %s" % options.inputDir)
			convertFile.convert(options.outputMgr, options.inputDir, options.outputDir, options.device, options.verbose_FLAG)

		elif options.xmlfile_path != None:
			xmlParser = MangaXmlParser(options)
			xmlParser.downloadManga()
		else:
			threadPool = []
			for manga in args:
				print( manga )
				options.manga = manga

				if SetDownloadPathToName_Flag:
					options.downloadPath = ('./' + fixFormatting(options.manga, options.spaceToken))

				if SetOutputPathToDefault_Flag:
					options.outputDir = options.downloadPath

				options.downloadPath = os.path.realpath(options.downloadPath) + os.sep

				# site selection
				if(options.siteSelect == 0):
					print('Which site?')
					for i in siteDict:
						print(siteDict[i][1])

					# Python3 fix - removal of raw_input()
					try:
						options.siteSelect = raw_input()
					except NameError:
						options.siteSelect = input()

				try:
					options.site = siteDict[int(options.siteSelect)][0]
				except KeyError:
					raise InvalidSite('Site selection invalid.')

				threadPool.append(SiteParserThread(options, None, None))

			for thread in threadPool:
				thread.start()
				thread.join()
	finally:
		# Must always stop the manager
		options.outputMgr.stop()

示例#5

0

显示文件

文件： manga.py 项目： kiptenai/manga_downloader

def main():
    printLicenseInfo()

    # for easier parsing, adds free --help and --version
    # optparse (v2.3-v2.7) was chosen over argparse (v2.7+) for compatibility (and relative similarity) reasons
    # and over getopt(v?) for additional functionality
    parser = optparse.OptionParser(usage='usage: %prog [options] <manga name>',
                                   version=('Manga Downloader %s' % VERSION))

    parser.set_defaults(all_chapters_FLAG=False,
                        auto=False,
                        conversion_FLAG=False,
                        convert_Directory=False,
                        device='Kindle 3',
                        downloadFormat='.cbz',
                        downloadPath='DEFAULT_VALUE',
                        inputDir=None,
                        outputDir='DEFAULT_VALUE',
                        overwrite_FLAG=False,
                        verbose_FLAG=False,
                        timeLogging_FLAG=False,
                        maxChapterThreads=3,
                        useShortName=False,
                        spaceToken='.',
                        proxy=None,
                        siteSelect=0)

    parser.add_option('--all',
                      action='store_true',
                      dest='all_chapters_FLAG',
                      help='Download all available chapters.')

    parser.add_option(
        '-d',
        '--directory',
        dest='downloadPath',
        help=
        'The destination download directory.  Defaults to the directory of the script.'
    )

    parser.add_option(
        '--overwrite',
        action='store_true',
        dest='overwrite_FLAG',
        help='Overwrites previous copies of downloaded chapters.')

    parser.add_option('--verbose',
                      action='store_true',
                      dest='verbose_FLAG',
                      help='Verbose Output.')

    parser.add_option(
        '-x',
        '--xml',
        dest='xmlfile_path',
        help=
        'Parses the .xml file and downloads all chapters newer than the last chapter downloaded for the listed mangas.'
    )

    parser.add_option(
        '-c',
        '--convertFiles',
        action='store_true',
        dest='conversion_FLAG',
        help=
        'Converts downloaded files to a Format/Size acceptable to the device specified by the --device parameter.'
    )

    parser.add_option(
        '--device',
        dest='device',
        help=
        'Specifies the conversion device. Omitting this option default to %default.'
    )

    parser.add_option(
        '--convertDirectory',
        action='store_true',
        dest='convert_Directory',
        help=
        'Converts the image files stored in the directory specified by --inputDirectory. Stores the converted images in the directory specified by --outputDirectory'
    )

    parser.add_option(
        '--inputDirectory',
        dest='inputDir',
        help=
        'The directory containing the images to convert when --convertDirectory is specified.'
    )

    parser.add_option(
        '--outputDirectory',
        dest='outputDir',
        help=
        'The directory to store the images when --convertDirectory is specified.'
    )

    parser.add_option(
        '-z',
        '--zip',
        action='store_const',
        dest='downloadFormat',
        const='.zip',
        help=
        'Downloads using .zip compression.  Omitting this option defaults to %default.'
    )

    parser.add_option(
        '-t',
        '--threads',
        dest='maxChapterThreads',
        help='Limits the number of chapter threads to the value specified.')

    parser.add_option('--timeLogging',
                      action='store_true',
                      dest='timeLogging_FLAG',
                      help='Output time logging.')

    parser.add_option(
        '--useShortName',
        action='store_true',
        dest='useShortName_FLAG',
        help=
        'To support devices that limit the size of the filename, this parameter uses a short name'
    )

    parser.add_option(
        '--spaceToken',
        dest='spaceToken',
        help='Specifies the character used to replace spaces in the manga name.'
    )

    parser.add_option('--proxy', dest='proxy', help='Specifies the proxy.')

    parser.add_option('-s',
                      '--site',
                      dest='siteSelect',
                      help='Specifies the site to download from.')

    (options, args) = parser.parse_args()

    try:
        options.siteSelect = int(options.siteSelect)
    except:
        options.siteSelect = 0

    try:
        options.maxChapterThreads = int(options.maxChapterThreads)
    except:
        options.maxChapterThreads = 2

    if (options.maxChapterThreads <= 0):
        options.maxChapterThreads = 2

    if (len(args) == 0 and
        (not (options.convert_Directory or options.xmlfile_path != None))):
        parser.error('Manga not specified.')

    #if(len(args) > 1):
    #	parser.error('Possible multiple mangas specified, please select one.  (Did you forget to put quotes around a multi-word manga?)')

    SetDownloadPathToName_Flag = False
    SetOutputPathToDefault_Flag = False
    if (len(args) > 0):

        # Default Directory is the ./MangaName
        if (options.downloadPath == 'DEFAULT_VALUE'):
            SetDownloadPathToName_Flag = True

        # Default outputDir is the ./MangaName
        if (options.outputDir == 'DEFAULT_VALUE'):
            SetOutputPathToDefault_Flag = True

    PILAvailable = isImageLibAvailable()
    # Check if PIL Library is available if either of convert Flags are set
    if ((not PILAvailable)
            and (options.convert_Directory or options.conversion_FLAG)):
        print(
            "\nConversion Functionality Not available.\nMust install the PIL (Python Image Library)"
        )
        sys.exit()
    else:
        if (PILAvailable):
            from ConvertPackage.ConvertFile import convertFile

    if (options.convert_Directory):
        options.inputDir = os.path.abspath(options.inputDir)

    # Changes the working directory to the script location
    if (os.path.dirname(sys.argv[0]) != ""):
        os.chdir(os.path.dirname(sys.argv[0]))

    options.outputMgr = progressBarManager()
    options.outputMgr.start()
    try:
        if (options.convert_Directory):
            if (options.outputDir == 'DEFAULT_VALUE'):
                options.outputDir = '.'
            print("Converting Files: %s" % options.inputDir)
            convertFile.convert(options.outputMgr, options.inputDir,
                                options.outputDir, options.device,
                                options.verbose_FLAG)

        elif options.xmlfile_path != None:
            xmlParser = MangaXmlParser(options)
            xmlParser.downloadManga()
        else:
            threadPool = []
            for manga in args:
                print(manga)
                options.manga = manga

                if SetDownloadPathToName_Flag:
                    options.downloadPath = (
                        './' +
                        fixFormatting(options.manga, options.spaceToken))

                if SetOutputPathToDefault_Flag:
                    options.outputDir = options.downloadPath

                options.downloadPath = os.path.realpath(
                    options.downloadPath) + os.sep

                # site selection
                if (options.siteSelect == 0):
                    print('Which site?')
                    for i in siteDict:
                        print(siteDict[i][1])

                    # Python3 fix - removal of raw_input()
                    try:
                        options.siteSelect = raw_input()
                    except NameError:
                        options.siteSelect = input()

                try:
                    options.site = siteDict[int(options.siteSelect)][0]
                except KeyError:
                    raise InvalidSite('Site selection invalid.')

                threadPool.append(SiteParserThread(options, None, None))

            for thread in threadPool:
                thread.start()
                thread.join()
    finally:
        # Must always stop the manager
        options.outputMgr.stop()

示例#6

0

显示文件

文件： otakuworks.py 项目： PalmerCurling/manga_downloader

	def parseSite(self):
		print('Beginning OtakuWorks check: %s' % self.manga)
		url = 'http://www.otakuworks.com/search/%s' % '+'.join(self.manga.split())

		source = getSourceCode(url)
		
		info = OtakuWorks.re_getMangas.findall(source)
		
		# we either have 0 search results or we have already been redirected to the manga homepage
		if len(info) != 0:
			keyword = self.selectFromResults(info)
			source = getSourceCode(keyword)
	
		if(source.find('has been licensed and as per request all releases under it have been removed.') != -1):
			raise self.MangaNotFound('It has been removed.')
		
		# can't pre-compile this because relies on class name
		self.chapters = re.compile('a href="([^>]*%s[^>]*)">([^<]*#([^<]*))</a>' % '-'.join(fixFormatting(self.manga, '.').replace('_', ' ').split())).findall(source)
		self.chapters.reverse()

		lowerRange = 0
		
		for i in range(0, len(self.chapters)):
			self.chapters[i] = ('http://www.otakuworks.com' + self.chapters[i][0] + '/read', self.chapters[i][1], self.chapters[i][2])
			if (not self.auto):
				print('(%i) %s' % (i + 1, self.chapters[i][1]))
			else:
				if (self.lastDownloaded == self.chapters[i][1]):
					lowerRange = i + 1
		
		# this might need to be len(self.chapters) + 1, I'm unsure as to whether python adds +1 to i after the loop or not
		upperRange = len(self.chapters)	
	
		if (not self.auto):
			self.chapters_to_download = self.selectChapters(self.chapters)
		else:
			if ( lowerRange == upperRange):
				raise self.NoUpdates
			for i in range (lowerRange, upperRange):
				self.chapters_to_download.append(i)
		return