def _download_manga(self): cprint("Parsing JSON File...", 'white', attrs=['bold'], file=sys.stdout) if self.options.verbose_FLAG: cprint("JSON Path = %s" % self.options.json_file_path, 'white', attrs=['bold'], file=sys.stdout) with open(self.options.json_file_path) as data: configuration = json.load(data) self.options.auto = True if 'configuration' in configuration: self.options.notificator = NotificationFactory.get_instance(configuration['configuration']['notificator']) # Default OutputDir is the ./MangaName set_output_path_to_name = False if self.options.outputDir == 'DEFAULT_VALUE': set_output_path_to_name = True for manga in configuration['manga_series']: series_options = copy.copy(self.options) series_options.manga = manga['name'] series_options.site = manga['host_site'] last_downloaded = manga.get('last_chapter_downloaded', "") download_path = manga.get('download_path', ('./' + fix_formatting(series_options.manga, series_options.spaceToken))) if self.options.downloadPath != 'DEFAULT_VALUE' and not os.path.isabs(download_path): download_path = os.path.join(self.options.downloadPath, download_path) series_options.downloadPath = download_path series_options.lastDownloaded = last_downloaded if set_output_path_to_name: series_options.outputDir = download_path serie = MangaDownloader(series_options) result, last_chapter = serie.download_new_chapters() if result: t = datetime.datetime.today() timestamp = "%d-%02d-%02d %02d:%02d:%02d" % (t.year, t.month, t.day, t.hour, t.minute, t.second) manga['timestamp'] = timestamp manga['last_chapter_downloaded'] = last_chapter # Backs up file backup_file_name = self.options.json_file_path + "_bak" os.rename(self.options.json_file_path, backup_file_name) with open(self.options.json_file_path, 'w') as outfile: json.dump(configuration, outfile, indent=4, sort_keys=True) # The file was succesfully saved and now remove backup os.remove(backup_file_name)
def main(): # Initialize Colorama init() #Load available plugins i = 1 for plugin_name in SiteParserFactory.Instance().plugins: siteDict[str(i)] = plugin_name i += 1 print_license_info() # for easier parsing, adds free --help and --version # optparse (v2.3-v2.7) was chosen over argparse (v2.7+) for compatibility (and relative similarity) reasons # and over getopt(v?) for additional functionality parser = optparse.OptionParser(usage='usage: %prog [options] <manga name>', version=('Manga Downloader %s' % VERSION)) parser.set_defaults( all_chapters_FLAG=False, auto=False, conversion_FLAG=False, convert_Directory=False, device='Kindle 3', downloadFormat='.cbz', downloadPath='DEFAULT_VALUE', inputDir=None, outputDir='DEFAULT_VALUE', overwrite_FLAG=False, verbose_FLAG=False, timeLogging_FLAG=False, maxChapterThreads=3, useShortName=False, spaceToken='.', proxy=None, check_every_minutes=-1, no_progress_bars=False ) parser.add_option('--all', action='store_true', dest='all_chapters_FLAG', help='Download all available chapters.') parser.add_option('-d', '--directory', dest='downloadPath', help='The destination download directory. Defaults to the directory of the script.') parser.add_option('--overwrite', action='store_true', dest='overwrite_FLAG', help='Overwrites previous copies of downloaded chapters.') parser.add_option('--verbose', action='store_true', dest='verbose_FLAG', help='Verbose Output.') parser.add_option('-j', '--json', dest='json_file_path', help='Parses the .json file and downloads all chapters newer than the last chapter downloaded for' ' the listed mangas.') parser.add_option('-c', '--convertFiles', action='store_true', dest='conversion_FLAG', help='Converts downloaded files to a Format/Size acceptable to the device specified by the ' '--device parameter.') parser.add_option('--device', dest='device', help='Specifies the conversion device. Omitting this option default to %default.') parser.add_option('--convertDirectory', action='store_true', dest='convert_Directory', help='Converts the image files stored in the directory specified by --inputDirectory. Stores the ' 'converted images in the directory specified by --outputDirectory') parser.add_option('--inputDirectory', dest='inputDir', help='The directory containing the images to convert when --convertDirectory is specified.') parser.add_option('--outputDirectory', dest='outputDir', help='The directory to store the images when --convertDirectory is specified.') parser.add_option('-z', '--zip', action='store_const', dest='downloadFormat', const='.zip', help='Downloads using .zip compression. Omitting this option defaults to %default.') parser.add_option('-t', '--threads', dest='maxChapterThreads', help='Limits the number of chapter threads to the value specified.') parser.add_option('--timeLogging', action='store_true', dest='timeLogging_FLAG', help='Output time logging.') parser.add_option('--useShortName', action='store_true', dest='useShortName_FLAG', help='To support devices that limit the size of the filename, this parameter uses a short name') parser.add_option('--spaceToken', dest='spaceToken', help='Specifies the character used to replace spaces in the manga name.') parser.add_option('--proxy', dest='proxy', help='Specifies the proxy.') parser.add_option('--checkEveryMinutes', dest='check_every_minutes', help='When used with -x sets the time in minutes between checks for your bookmarked manga.', type="int") parser.add_option('--noProgressBars', action='store_true', dest='no_progress_bars', help='Disable progress bars.') (options, args) = parser.parse_args() try: options.maxChapterThreads = int(options.maxChapterThreads) except: options.maxChapterThreads = 2 if options.maxChapterThreads <= 0: options.maxChapterThreads = 2 if len(args) == 0 and (not (options.convert_Directory or options.json_file_path is not None)): parser.error('Manga not specified.') set_download_path_to_name_flag = False set_output_path_to_default_flag = False if len(args) > 0: # Default Directory is the ./MangaName if options.downloadPath == 'DEFAULT_VALUE': set_download_path_to_name_flag = True # Default outputDir is the ./MangaName if options.outputDir == 'DEFAULT_VALUE': set_output_path_to_default_flag = True pil_available = is_image_lib_available() # Check if PIL Library is available if either of convert Flags are set if (not pil_available) and (options.convert_Directory or options.conversion_FLAG): print ("\nConversion Functionality Not available.\nMust install the PIL (Python Image Library)") sys.exit() elif pil_available: from convert.convert_file import ConvertFile if options.convert_Directory: options.inputDir = os.path.abspath(options.inputDir) # Changes the working directory to the script location if os.path.dirname(sys.argv[0]) != "": os.chdir(os.path.dirname(sys.argv[0])) options.notificator = None options.outputMgr = ProgressBarManager() if not options.no_progress_bars: options.outputMgr.start() try: if options.convert_Directory: if options.outputDir == 'DEFAULT_VALUE': options.outputDir = '.' print("Converting Files: %s" % options.inputDir) ConvertFile.convert(options.outputMgr, options.inputDir, options.outputDir, options.device, options.verbose_FLAG) elif options.json_file_path is not None: json_parser = MangaJsonParser(options) json_parser.download_manga() else: for manga in args: series_options = copy.copy(options) print(manga) series_options.manga = manga if set_download_path_to_name_flag: series_options.downloadPath = ( './' + fix_formatting(series_options.manga, series_options.spaceToken)) if set_output_path_to_default_flag: series_options.outputDir = series_options.downloadPath series_options.downloadPath = os.path.realpath(series_options.downloadPath) + os.sep # site selection print('\nWhich site?') for index in siteDict: print('(%s) %s' % (index, siteDict[index])) # Python3 fix - removal of raw_input() try: site = raw_input() except NameError: site = input() try: series_options.site = siteDict[site] except KeyError: raise InvalidSite('Site selection invalid.') serie = MangaDownloader(series_options) serie.download_new_chapters() except KeyboardInterrupt: sys.exit(0) finally: # Must always stop the manager if not options.no_progress_bars: options.outputMgr.stop()
def get_manga_url(self): url = '%s/manga/%s/%s' % (self.base_url, self.options.manga[0], fix_formatting(self.options.manga, '_', remove_special_chars=False, lower_case=True, use_ignore_chars=False)) return url
def parse_site(self, url): source = get_source_code(url, self.options.proxy) if source is None or 'the page you have requested can' in source: # do a 'begins-with' search, then a 'contains' search url = '%s/search.php?name=%s' % (self.base_url, '+'.join(self.options.manga.split())) try: source = get_source_code(url, self.options.proxy) if 'Sorry you have just searched, please try 5 seconds later.' in source: print('Searched too soon, waiting 5 seconds...') time.sleep(5) series_results = [] if source is not None: series_results = MangaHere.re_get_series.findall(source) if 0 == len(series_results): url = '%s/search.php?name=%s' % (self.base_url, '+'.join(self.options.manga.split())) source = get_source_code(url, self.options.proxy) if source is not None: series_results = MangaHere.re_get_series.findall(source) # 0 results except AttributeError: raise self.MangaNotFound('It doesn\'t exist, or cannot be resolved by autocorrect.') else: keyword = self.select_from_results(series_results) url = '%s/manga/%s/' % (self.base_url, keyword) source = get_source_code(url, self.options.proxy) else: # The Guess worked keyword = fix_formatting(self.options.manga, '_', remove_special_chars=True, lower_case=True, use_ignore_chars=False) # other check for manga removal if our initial guess for the name was wrong if 'it is not available in' in source or "It's not available in" in source: raise self.MangaLicenced('It has been removed.') # that's nice of them # url = 'http://www.mangahere.com/cache/manga/%s/chapters.js' % keyword # source = getSourceCode(url, self.proxy) # chapters is a 2-tuple # chapters[0] contains the chapter URL # chapters[1] contains the chapter title is_chapter_only = False # can't pre-compile this because relies on class name re_get_chapters = re.compile( 'a.*?href="http://.*?mangahere.*?/manga/%s/(v[\d]+)/(c[\d]+(\.[\d]+)?)/[^"]*?"' % keyword) self.chapters = re_get_chapters.findall(source) if not self.chapters: is_chapter_only = True re_get_chapters = re.compile( 'a.*?href="http://.*?mangahere.*?/manga/%s/(c[\d]+(\.[\d]+)?)/[^"]*?"' % keyword) self.chapters = re_get_chapters.findall(source) # Sort chapters by volume and chapter number. Needed because next chapter isn't always accurate. self.chapters = sorted(self.chapters, cmp=self.chapter_compare) lower_range = 0 if is_chapter_only: for i in range(0, len(self.chapters)): if self.options.auto: if self.options.lastDownloaded == self.chapters[i][0]: lower_range = i + 1 ch_number = self.re_non_decimal.sub('', self.chapters[i][0]) self.chapters[i] = ( '%s/manga/%s/%s' % (self.base_url, keyword, self.chapters[i][0]), self.chapters[i][0], ch_number) else: for i in range(0, len(self.chapters)): ch_number = self.re_non_decimal.sub('', self.chapters[i][1]) self.chapters[i] = ( '%s/manga/%s/%s/%s' % (self.base_url, keyword, self.chapters[i][0], self.chapters[i][1]), self.chapters[i][0] + "." + self.chapters[i][1], ch_number) if self.options.auto: if self.options.lastDownloaded == self.chapters[i][1]: lower_range = i + 1 upper_range = len(self.chapters) # Validate whether the last chapter is available source = get_source_code(self.chapters[upper_range - 1][0], self.options.proxy) if ('not available yet' in source) or ('Sorry, the page you have requested can’t be found' in source): # If the last chapter is not available remove it from the list del self.chapters[upper_range - 1] upper_range -= 1 # which ones do we want? if not self.options.auto: for i in range(0, upper_range): if is_chapter_only: print('(%i) %s' % (i + 1, self.chapters[i][0])) else: print('(%i) %s' % (i + 1, self.chapters[i][1])) self.chapters_to_download = self.select_chapters(self.chapters) # XML component else: if lower_range == upper_range: raise self.NoUpdates for i in range(lower_range, upper_range): self.chapters_to_download.append(i) return
def parse_site(self, url): source, redirect_url = get_source_code(url, self.options.proxy, True) if redirect_url != url or source is None or "the page you have requested cannot be found" in source: # Could not find the manga page by guessing # Use the website search url = "%s/search.php?name_method=bw&name=%s&is_completed=&advopts=1" % ( self.base_url, "+".join(self.options.manga.split()), ) if self.options.verbose_FLAG: print(url) try: source = get_source_code(url, self.options.proxy) series_results = [] if source is not None: series_results = MangaFox.re_get_series.findall(source) if 0 == len(series_results): url = "%s/search.php?name_method=cw&name=%s&is_completed=&advopts=1" % ( self.base_url, "+".join(self.options.manga.split()), ) if self.options.verbose_FLAG: print(url) source = get_source_code(url, self.options.proxy) if source is not None: series_results = MangaFox.re_get_series.findall(source) # 0 results except AttributeError: raise self.MangaNotFound("It doesn't exist, or cannot be resolved by autocorrect.") else: keyword = self.select_from_results(series_results) if self.options.verbose_FLAG: print("Keyword: %s" % keyword) url = self.base_url % keyword if self.options.verbose_FLAG: print("URL: %s" % url) source = get_source_code(url, self.options.proxy) if source is None: raise self.MangaNotFound("Search Failed to find Manga.") else: # The Guess worked keyword = fix_formatting(self.options.manga) if self.options.verbose_FLAG: print("Keyword: %s" % keyword) if "it is not available in Manga Fox." in source: raise self.MangaNotFound("It has been removed.") # that's nice of them # url = 'http://mangafox.me/cache/manga/%s/chapters.js' % keyword # source = getSourceCode(url, self.proxy) # chapters is a 2-tuple # chapters[0] contains the chapter URL # chapters[1] contains the chapter title is_chapter_only = False # can't pre-compile this because relies on class name re_get_chapters = re.compile('a href="http://.*?mangafox.*?/manga/%s/(v[\d]+)/(c[\d]+)/[^"]*?" title' % keyword) self.chapters = re_get_chapters.findall(source) if not self.chapters: if self.options.verbose_FLAG: print("Trying chapter only regex") is_chapter_only = True re_get_chapters = re.compile('a href="http://.*?mangafox.*?/manga/%s/(c[\d]+)/[^"]*?" title' % keyword) self.chapters = re_get_chapters.findall(source) self.chapters.reverse() lower_range = 0 if is_chapter_only: for i in range(0, len(self.chapters)): if self.options.verbose_FLAG: print("%s" % self.chapters[i]) if not self.options.auto: print("(%i) %s" % (i + 1, self.chapters[i])) else: if self.options.lastDownloaded == self.chapters[i]: lower_range = i + 1 self.chapters[i] = ( "%s/manga/%s/%s" % (self.base_url, keyword, self.chapters[i]), self.chapters[i], self.chapters[i], ) else: for i in range(0, len(self.chapters)): if self.options.verbose_FLAG: print("%s %s" % (self.chapters[i][0], self.chapters[i][1])) self.chapters[i] = ( "%s/manga/%s/%s/%s" % (self.base_url, keyword, self.chapters[i][0], self.chapters[i][1]), self.chapters[i][0] + "." + self.chapters[i][1], self.chapters[i][1], ) if not self.options.auto: print("(%i) %s" % (i + 1, self.chapters[i][1])) else: if self.options.lastDownloaded == self.chapters[i][1]: lower_range = i + 1 upper_range = len(self.chapters) # which ones do we want? if not self.options.auto: self.chapters_to_download = self.select_chapters(self.chapters) # XML component else: if lower_range == upper_range: raise self.NoUpdates for i in range(lower_range, upper_range): self.chapters_to_download.append(i) return
def get_manga_url(self): url = "%s/manga/%s/" % ( self.base_url, fix_formatting(self.options.manga, "_", remove_special_chars=True, lower_case=True, use_ignore_chars=False), ) return url