Python fix_formatting 예제들, util.util.fix_formatting Python 예제들

예제 #1

0

파일 보기

파일: json_parser.py 프로젝트: joaquinpf/manga_downloader

    def _download_manga(self):
        cprint("Parsing JSON File...", 'white', attrs=['bold'], file=sys.stdout)
        if self.options.verbose_FLAG:
            cprint("JSON Path = %s" % self.options.json_file_path, 'white', attrs=['bold'], file=sys.stdout)

        with open(self.options.json_file_path) as data:
            configuration = json.load(data)

        self.options.auto = True

        if 'configuration' in configuration:
            self.options.notificator = NotificationFactory.get_instance(configuration['configuration']['notificator'])

        # Default OutputDir is the ./MangaName
        set_output_path_to_name = False
        if self.options.outputDir == 'DEFAULT_VALUE':
            set_output_path_to_name = True

        for manga in configuration['manga_series']:
            series_options = copy.copy(self.options)
            series_options.manga = manga['name']
            series_options.site = manga['host_site']
            last_downloaded = manga.get('last_chapter_downloaded', "")
            download_path = manga.get('download_path',
                                      ('./' + fix_formatting(series_options.manga, series_options.spaceToken)))

            if self.options.downloadPath != 'DEFAULT_VALUE' and not os.path.isabs(download_path):
                download_path = os.path.join(self.options.downloadPath, download_path)

            series_options.downloadPath = download_path
            series_options.lastDownloaded = last_downloaded
            if set_output_path_to_name:
                series_options.outputDir = download_path

            serie = MangaDownloader(series_options)
            result, last_chapter = serie.download_new_chapters()

            if result:
                t = datetime.datetime.today()
                timestamp = "%d-%02d-%02d %02d:%02d:%02d" % (t.year, t.month, t.day, t.hour, t.minute, t.second)
                manga['timestamp'] = timestamp
                manga['last_chapter_downloaded'] = last_chapter

        # Backs up file
        backup_file_name = self.options.json_file_path + "_bak"
        os.rename(self.options.json_file_path, backup_file_name)

        with open(self.options.json_file_path, 'w') as outfile:
            json.dump(configuration, outfile, indent=4, sort_keys=True)

        # The file was succesfully saved and now remove backup
        os.remove(backup_file_name)

예제 #2

0

파일 보기

파일: manga.py 프로젝트: joaquinpf/manga_downloader

def main():
    # Initialize Colorama
    init()

    #Load available plugins
    i = 1
    for plugin_name in SiteParserFactory.Instance().plugins:
        siteDict[str(i)] = plugin_name
        i += 1

    print_license_info()

    # for easier parsing, adds free --help and --version
    # optparse (v2.3-v2.7) was chosen over argparse (v2.7+) for compatibility (and relative similarity) reasons
    # and over getopt(v?) for additional functionality
    parser = optparse.OptionParser(usage='usage: %prog [options] <manga name>',
                                   version=('Manga Downloader %s' % VERSION))

    parser.set_defaults(
        all_chapters_FLAG=False,
        auto=False,
        conversion_FLAG=False,
        convert_Directory=False,
        device='Kindle 3',
        downloadFormat='.cbz',
        downloadPath='DEFAULT_VALUE',
        inputDir=None,
        outputDir='DEFAULT_VALUE',
        overwrite_FLAG=False,
        verbose_FLAG=False,
        timeLogging_FLAG=False,
        maxChapterThreads=3,
        useShortName=False,
        spaceToken='.',
        proxy=None,
        check_every_minutes=-1,
        no_progress_bars=False
    )

    parser.add_option('--all',
                      action='store_true',
                      dest='all_chapters_FLAG',
                      help='Download all available chapters.')

    parser.add_option('-d', '--directory',
                      dest='downloadPath',
                      help='The destination download directory.  Defaults to the directory of the script.')

    parser.add_option('--overwrite',
                      action='store_true',
                      dest='overwrite_FLAG',
                      help='Overwrites previous copies of downloaded chapters.')

    parser.add_option('--verbose',
                      action='store_true',
                      dest='verbose_FLAG',
                      help='Verbose Output.')

    parser.add_option('-j', '--json',
                      dest='json_file_path',
                      help='Parses the .json file and downloads all chapters newer than the last chapter downloaded for'
                           ' the listed mangas.')

    parser.add_option('-c', '--convertFiles',
                      action='store_true',
                      dest='conversion_FLAG',
                      help='Converts downloaded files to a Format/Size acceptable to the device specified by the '
                           '--device parameter.')

    parser.add_option('--device',
                      dest='device',
                      help='Specifies the conversion device. Omitting this option default to %default.')

    parser.add_option('--convertDirectory',
                      action='store_true',
                      dest='convert_Directory',
                      help='Converts the image files stored in the directory specified by --inputDirectory. Stores the '
                           'converted images in the directory specified by --outputDirectory')

    parser.add_option('--inputDirectory',
                      dest='inputDir',
                      help='The directory containing the images to convert when --convertDirectory is specified.')

    parser.add_option('--outputDirectory',
                      dest='outputDir',
                      help='The directory to store the images when --convertDirectory is specified.')

    parser.add_option('-z', '--zip',
                      action='store_const',
                      dest='downloadFormat',
                      const='.zip',
                      help='Downloads using .zip compression.  Omitting this option defaults to %default.')

    parser.add_option('-t', '--threads',
                      dest='maxChapterThreads',
                      help='Limits the number of chapter threads to the value specified.')

    parser.add_option('--timeLogging',
                      action='store_true',
                      dest='timeLogging_FLAG',
                      help='Output time logging.')

    parser.add_option('--useShortName',
                      action='store_true',
                      dest='useShortName_FLAG',
                      help='To support devices that limit the size of the filename, this parameter uses a short name')

    parser.add_option('--spaceToken',
                      dest='spaceToken',
                      help='Specifies the character used to replace spaces in the manga name.')

    parser.add_option('--proxy',
                      dest='proxy',
                      help='Specifies the proxy.')

    parser.add_option('--checkEveryMinutes',
                      dest='check_every_minutes',
                      help='When used with -x sets the time in minutes between checks for your bookmarked manga.',
                      type="int")

    parser.add_option('--noProgressBars',
                      action='store_true',
                      dest='no_progress_bars',
                      help='Disable progress bars.')

    (options, args) = parser.parse_args()

    try:
        options.maxChapterThreads = int(options.maxChapterThreads)
    except:
        options.maxChapterThreads = 2

    if options.maxChapterThreads <= 0:
        options.maxChapterThreads = 2

    if len(args) == 0 and (not (options.convert_Directory or options.json_file_path is not None)):
        parser.error('Manga not specified.')

    set_download_path_to_name_flag = False
    set_output_path_to_default_flag = False
    if len(args) > 0:

        # Default Directory is the ./MangaName
        if options.downloadPath == 'DEFAULT_VALUE':
            set_download_path_to_name_flag = True

        # Default outputDir is the ./MangaName
        if options.outputDir == 'DEFAULT_VALUE':
            set_output_path_to_default_flag = True

    pil_available = is_image_lib_available()
    # Check if PIL Library is available if either of convert Flags are set
    if (not pil_available) and (options.convert_Directory or options.conversion_FLAG):
        print ("\nConversion Functionality Not available.\nMust install the PIL (Python Image Library)")
        sys.exit()
    elif pil_available:
        from convert.convert_file import ConvertFile

    if options.convert_Directory:
        options.inputDir = os.path.abspath(options.inputDir)

    # Changes the working directory to the script location
    if os.path.dirname(sys.argv[0]) != "":
        os.chdir(os.path.dirname(sys.argv[0]))

    options.notificator = None
    options.outputMgr = ProgressBarManager()
    if not options.no_progress_bars:
        options.outputMgr.start()

    try:
        if options.convert_Directory:
            if options.outputDir == 'DEFAULT_VALUE':
                options.outputDir = '.'
            print("Converting Files: %s" % options.inputDir)
            ConvertFile.convert(options.outputMgr, options.inputDir, options.outputDir, options.device,
                                options.verbose_FLAG)

        elif options.json_file_path is not None:
            json_parser = MangaJsonParser(options)
            json_parser.download_manga()
        else:
            for manga in args:
                series_options = copy.copy(options)
                print(manga)
                series_options.manga = manga

                if set_download_path_to_name_flag:
                    series_options.downloadPath = (
                        './' + fix_formatting(series_options.manga, series_options.spaceToken))

                if set_output_path_to_default_flag:
                    series_options.outputDir = series_options.downloadPath

                series_options.downloadPath = os.path.realpath(series_options.downloadPath) + os.sep

                # site selection
                print('\nWhich site?')
                for index in siteDict:
                    print('(%s) %s' % (index, siteDict[index]))

                # Python3 fix - removal of raw_input()
                try:
                    site = raw_input()
                except NameError:
                    site = input()

                try:
                    series_options.site = siteDict[site]
                except KeyError:
                    raise InvalidSite('Site selection invalid.')

                serie = MangaDownloader(series_options)
                serie.download_new_chapters()

    except KeyboardInterrupt:
        sys.exit(0)

    finally:
        # Must always stop the manager
        if not options.no_progress_bars:
            options.outputMgr.stop()

예제 #3

0

파일 보기

파일: starkana.py 프로젝트: joaquinpf/manga_downloader

 def get_manga_url(self):
     url = '%s/manga/%s/%s' % (self.base_url, self.options.manga[0], fix_formatting(self.options.manga, '_', remove_special_chars=False, lower_case=True, use_ignore_chars=False))
     return url

예제 #4

0

파일 보기

파일: mangahere.py 프로젝트: joaquinpf/manga_downloader

    def parse_site(self, url):

        source = get_source_code(url, self.options.proxy)

        if source is None or 'the page you have requested can' in source:
            # do a 'begins-with' search, then a 'contains' search
            url = '%s/search.php?name=%s' % (self.base_url, '+'.join(self.options.manga.split()))

            try:
                source = get_source_code(url, self.options.proxy)
                if 'Sorry you have just searched, please try 5 seconds later.' in source:
                    print('Searched too soon, waiting 5 seconds...')
                    time.sleep(5)

                series_results = []
                if source is not None:
                    series_results = MangaHere.re_get_series.findall(source)

                if 0 == len(series_results):
                    url = '%s/search.php?name=%s' % (self.base_url, '+'.join(self.options.manga.split()))
                    source = get_source_code(url, self.options.proxy)
                    if source is not None:
                        series_results = MangaHere.re_get_series.findall(source)

            # 0 results
            except AttributeError:
                raise self.MangaNotFound('It doesn\'t exist, or cannot be resolved by autocorrect.')
            else:
                keyword = self.select_from_results(series_results)
                url = '%s/manga/%s/' % (self.base_url, keyword)
                source = get_source_code(url, self.options.proxy)

        else:
            # The Guess worked
            keyword = fix_formatting(self.options.manga, '_', remove_special_chars=True, lower_case=True, use_ignore_chars=False)

        # other check for manga removal if our initial guess for the name was wrong
        if 'it is not available in' in source or "It's not available in" in source:
            raise self.MangaLicenced('It has been removed.')

        # that's nice of them
        # url = 'http://www.mangahere.com/cache/manga/%s/chapters.js' % keyword
        # source = getSourceCode(url, self.proxy)

        # chapters is a 2-tuple
        # chapters[0] contains the chapter URL
        # chapters[1] contains the chapter title

        is_chapter_only = False

        # can't pre-compile this because relies on class name
        re_get_chapters = re.compile(
            'a.*?href="http://.*?mangahere.*?/manga/%s/(v[\d]+)/(c[\d]+(\.[\d]+)?)/[^"]*?"' % keyword)
        self.chapters = re_get_chapters.findall(source)
        if not self.chapters:
            is_chapter_only = True
            re_get_chapters = re.compile(
                'a.*?href="http://.*?mangahere.*?/manga/%s/(c[\d]+(\.[\d]+)?)/[^"]*?"' % keyword)
            self.chapters = re_get_chapters.findall(source)

        # Sort chapters by volume and chapter number. Needed because next chapter isn't always accurate.
        self.chapters = sorted(self.chapters, cmp=self.chapter_compare)

        lower_range = 0

        if is_chapter_only:
            for i in range(0, len(self.chapters)):
                if self.options.auto:
                    if self.options.lastDownloaded == self.chapters[i][0]:
                        lower_range = i + 1

                ch_number = self.re_non_decimal.sub('', self.chapters[i][0])
                self.chapters[i] = (
                    '%s/manga/%s/%s' % (self.base_url, keyword, self.chapters[i][0]), self.chapters[i][0],
                    ch_number)

        else:
            for i in range(0, len(self.chapters)):

                ch_number = self.re_non_decimal.sub('', self.chapters[i][1])
                self.chapters[i] = (
                    '%s/manga/%s/%s/%s' % (self.base_url, keyword, self.chapters[i][0], self.chapters[i][1]),
                    self.chapters[i][0] + "." + self.chapters[i][1], ch_number)
                if self.options.auto:
                    if self.options.lastDownloaded == self.chapters[i][1]:
                        lower_range = i + 1

        upper_range = len(self.chapters)

        # Validate whether the last chapter is available
        source = get_source_code(self.chapters[upper_range - 1][0], self.options.proxy)

        if ('not available yet' in source) or ('Sorry, the page you have requested can’t be found' in source):
            # If the last chapter is not available remove it from the list
            del self.chapters[upper_range - 1]
            upper_range -= 1

        # which ones do we want?
        if not self.options.auto:
            for i in range(0, upper_range):
                if is_chapter_only:
                    print('(%i) %s' % (i + 1, self.chapters[i][0]))
                else:
                    print('(%i) %s' % (i + 1, self.chapters[i][1]))

            self.chapters_to_download = self.select_chapters(self.chapters)
        # XML component
        else:
            if lower_range == upper_range:
                raise self.NoUpdates

            for i in range(lower_range, upper_range):
                self.chapters_to_download.append(i)
        return

예제 #5

0

파일 보기

파일: mangafox.py 프로젝트: joaquinpf/manga_downloader

    def parse_site(self, url):

        source, redirect_url = get_source_code(url, self.options.proxy, True)

        if redirect_url != url or source is None or "the page you have requested cannot be found" in source:
            # Could not find the manga page by guessing
            # Use the website search
            url = "%s/search.php?name_method=bw&name=%s&is_completed=&advopts=1" % (
                self.base_url,
                "+".join(self.options.manga.split()),
            )
            if self.options.verbose_FLAG:
                print(url)
            try:
                source = get_source_code(url, self.options.proxy)
                series_results = []
                if source is not None:
                    series_results = MangaFox.re_get_series.findall(source)

                if 0 == len(series_results):
                    url = "%s/search.php?name_method=cw&name=%s&is_completed=&advopts=1" % (
                        self.base_url,
                        "+".join(self.options.manga.split()),
                    )
                    if self.options.verbose_FLAG:
                        print(url)
                    source = get_source_code(url, self.options.proxy)
                    if source is not None:
                        series_results = MangaFox.re_get_series.findall(source)

            # 0 results
            except AttributeError:
                raise self.MangaNotFound("It doesn't exist, or cannot be resolved by autocorrect.")
            else:
                keyword = self.select_from_results(series_results)
                if self.options.verbose_FLAG:
                    print("Keyword: %s" % keyword)
                url = self.base_url % keyword
                if self.options.verbose_FLAG:
                    print("URL: %s" % url)
                source = get_source_code(url, self.options.proxy)

                if source is None:
                    raise self.MangaNotFound("Search Failed to find Manga.")
        else:
            # The Guess worked
            keyword = fix_formatting(self.options.manga)
            if self.options.verbose_FLAG:
                print("Keyword: %s" % keyword)

        if "it is not available in Manga Fox." in source:
            raise self.MangaNotFound("It has been removed.")

        # that's nice of them
        # url = 'http://mangafox.me/cache/manga/%s/chapters.js' % keyword
        # source = getSourceCode(url, self.proxy)
        # chapters is a 2-tuple
        # chapters[0] contains the chapter URL
        # chapters[1] contains the chapter title

        is_chapter_only = False

        # can't pre-compile this because relies on class name
        re_get_chapters = re.compile('a href="http://.*?mangafox.*?/manga/%s/(v[\d]+)/(c[\d]+)/[^"]*?" title' % keyword)
        self.chapters = re_get_chapters.findall(source)
        if not self.chapters:
            if self.options.verbose_FLAG:
                print("Trying chapter only regex")
            is_chapter_only = True
            re_get_chapters = re.compile('a href="http://.*?mangafox.*?/manga/%s/(c[\d]+)/[^"]*?" title' % keyword)
            self.chapters = re_get_chapters.findall(source)

        self.chapters.reverse()

        lower_range = 0

        if is_chapter_only:
            for i in range(0, len(self.chapters)):
                if self.options.verbose_FLAG:
                    print("%s" % self.chapters[i])
                if not self.options.auto:
                    print("(%i) %s" % (i + 1, self.chapters[i]))
                else:
                    if self.options.lastDownloaded == self.chapters[i]:
                        lower_range = i + 1

                self.chapters[i] = (
                    "%s/manga/%s/%s" % (self.base_url, keyword, self.chapters[i]),
                    self.chapters[i],
                    self.chapters[i],
                )

        else:
            for i in range(0, len(self.chapters)):
                if self.options.verbose_FLAG:
                    print("%s %s" % (self.chapters[i][0], self.chapters[i][1]))
                self.chapters[i] = (
                    "%s/manga/%s/%s/%s" % (self.base_url, keyword, self.chapters[i][0], self.chapters[i][1]),
                    self.chapters[i][0] + "." + self.chapters[i][1],
                    self.chapters[i][1],
                )
                if not self.options.auto:
                    print("(%i) %s" % (i + 1, self.chapters[i][1]))
                else:
                    if self.options.lastDownloaded == self.chapters[i][1]:
                        lower_range = i + 1

        upper_range = len(self.chapters)

        # which ones do we want?
        if not self.options.auto:
            self.chapters_to_download = self.select_chapters(self.chapters)
        # XML component
        else:
            if lower_range == upper_range:
                raise self.NoUpdates

            for i in range(lower_range, upper_range):
                self.chapters_to_download.append(i)
        return

예제 #6

0

파일 보기

파일: mangafox.py 프로젝트: joaquinpf/manga_downloader

 def get_manga_url(self):
     url = "%s/manga/%s/" % (
         self.base_url,
         fix_formatting(self.options.manga, "_", remove_special_chars=True, lower_case=True, use_ignore_chars=False),
     )
     return url