Пример #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("nhentai_no")
    parser.add_argument("-c",
                        "--concurrent_count",
                        metavar='n',
                        type=int,
                        default=20)

    args = parser.parse_args()

    nhentai_no = args.nhentai_no
    concurrent_count = args.concurrent_count

    print("Downloading gallery id#{}...".format(nhentai_no))
    meta_scraper = MetadataScraper(nhentai_no)
    metadata, link_generator = meta_scraper.get_info()
    if metadata and link_generator:
        print("Title: {}\t Pages: {}".format(metadata['title'],
                                             metadata['pages']))
        download_paths = download_base_paths + [metadata['title']]
        prepare_folder(download_paths)

        downloaders = [
            Downloader(page_link, download_paths)
            for page_link in link_generator
        ]

        with mp.Pool(concurrent_count) as p:
            p.map(download, downloaders)

    else:
        print("no metadata is retrieved. exiting...")
Пример #2
0
 def test_flush_cache(self):
     my_downloader = Downloader()
Пример #3
0
 def test_custom_dir(self):
     my_downloader = Downloader()
Пример #4
0
 def test_extension(self):
     my_downloader = Downloader(('.jpg', ))
Пример #5
0
 def test_download(self):
     """Uncomment THe line for testing the downloads """
     my_downloader = Downloader()
Пример #6
0
 def test_search_urls(self):
     """Uncomment The line for testing URLs """
     my_downloader = Downloader()
Пример #7
0
    logging.FileHandler('scraper.log', 'a', 'utf-8'),
    logging.StreamHandler(sys.stdout)
],
                    level=logging.INFO,
                    format='[%(asctime)s] [%(levelname)s]: %(message)s')

# determine the source from which scraping will be
# only one source will be used. priority LIST > URL
source = None
if bool(Config.DOWNLOAD_LIST):
    source = Config.DOWNLOAD_LIST
elif Config.START_URL:
    source = Config.START_URL
else:
    logging.error(f'Source not specified for the scraper')
    sys.exit(-1)

downloader = Downloader(source, Config.REDIS_URI, Config.DOWNLOAD_LIMIT)
parser = Parser(Config.REDIS_URI, Config.PARSE_LIMIT)
writer = Writer(Config.OUTPUT_FILE_PATH, Config.REDIS_URI)

downloader_process = Process(target=downloader.run)
downloader_process.start()

parser_process = Process(target=parser.run)
parser_process.start()

writer_process = Process(target=writer.run)
writer_process.start()
writer_process.join()
Пример #8
0
from scraper import Downloader

my_downloader = Downloader()
my_downloader.search_urls('Landsapes', limit=10, verbose=True)

# Get List of Saved URLs in cache
# print(my_downloader.get_urls())
#
# # Prints the Whole Cache
# print(my_downloader.cached_urls)
#
# # Download + search file
# my_downloader.download('spaceship', limit=2)
#
# # Now donwload all the Searched picture
# my_downloader.download(download_cache=True)
#
# # Flush cache
# my_downloader.flush_cache()
#
# # Change Direcotory
# my_downloader.directory = 'my_dir/'
# # Change File extension type
# my_downloader.extensions = '.jpg'
# print(my_downloader.extensions)
# my_downloader.download('laptop', limit=10, verbose=True)