def main(): parser = argparse.ArgumentParser() parser.add_argument("nhentai_no") parser.add_argument("-c", "--concurrent_count", metavar='n', type=int, default=20) args = parser.parse_args() nhentai_no = args.nhentai_no concurrent_count = args.concurrent_count print("Downloading gallery id#{}...".format(nhentai_no)) meta_scraper = MetadataScraper(nhentai_no) metadata, link_generator = meta_scraper.get_info() if metadata and link_generator: print("Title: {}\t Pages: {}".format(metadata['title'], metadata['pages'])) download_paths = download_base_paths + [metadata['title']] prepare_folder(download_paths) downloaders = [ Downloader(page_link, download_paths) for page_link in link_generator ] with mp.Pool(concurrent_count) as p: p.map(download, downloaders) else: print("no metadata is retrieved. exiting...")
def test_flush_cache(self): my_downloader = Downloader()
def test_custom_dir(self): my_downloader = Downloader()
def test_extension(self): my_downloader = Downloader(('.jpg', ))
def test_download(self): """Uncomment THe line for testing the downloads """ my_downloader = Downloader()
def test_search_urls(self): """Uncomment The line for testing URLs """ my_downloader = Downloader()
logging.FileHandler('scraper.log', 'a', 'utf-8'), logging.StreamHandler(sys.stdout) ], level=logging.INFO, format='[%(asctime)s] [%(levelname)s]: %(message)s') # determine the source from which scraping will be # only one source will be used. priority LIST > URL source = None if bool(Config.DOWNLOAD_LIST): source = Config.DOWNLOAD_LIST elif Config.START_URL: source = Config.START_URL else: logging.error(f'Source not specified for the scraper') sys.exit(-1) downloader = Downloader(source, Config.REDIS_URI, Config.DOWNLOAD_LIMIT) parser = Parser(Config.REDIS_URI, Config.PARSE_LIMIT) writer = Writer(Config.OUTPUT_FILE_PATH, Config.REDIS_URI) downloader_process = Process(target=downloader.run) downloader_process.start() parser_process = Process(target=parser.run) parser_process.start() writer_process = Process(target=writer.run) writer_process.start() writer_process.join()
from scraper import Downloader my_downloader = Downloader() my_downloader.search_urls('Landsapes', limit=10, verbose=True) # Get List of Saved URLs in cache # print(my_downloader.get_urls()) # # # Prints the Whole Cache # print(my_downloader.cached_urls) # # # Download + search file # my_downloader.download('spaceship', limit=2) # # # Now donwload all the Searched picture # my_downloader.download(download_cache=True) # # # Flush cache # my_downloader.flush_cache() # # # Change Direcotory # my_downloader.directory = 'my_dir/' # # Change File extension type # my_downloader.extensions = '.jpg' # print(my_downloader.extensions) # my_downloader.download('laptop', limit=10, verbose=True)