def main(): client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() links = get_links(client_id) # By placing the executor inside a with block, the executors shutdown method # will be called cleaning up threads. # # By default, the executor sets number of workers to 5 times the number of # CPUs. with ThreadPoolExecutor() as executor: # Create a new partially applied function that stores the directory # argument. # # This allows the download_link function that normally takes two # arguments to work with the map function that expects a function of a # single argument. fn = partial(download_link, download_dir) # Executes fn concurrently using threads on the links iterable. The # timeout is for the entire process, not a single call, so downloading # all images must complete within 30 seconds. executor.map(fn, links, timeout=30)
def main(): ts = time() download_dir = setup_download_dir() links = [l for l in get_links() if l.endswith('.csv')] for link in links: download_link(download_dir, link) print('Took {}s'.format(time() - ts))
def main(): ts = time() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() links = get_links(client_id) queue = Queue() # Creating 3 worker threads for x in range(3): worker = DownloadWorker(queue) # Setting daemon to True will let the main thread exit even though the workers are blocking worker.daemon = True worker.start() # Put the tasks into the queue as a tuple for link in links: logger.info('Queueing {}'.format(link)) queue.put((download_dir, link)) # Causes the main thread to wait for the queue to finish processing all the tasks queue.join() logging.info('Took %s', time() - ts)
def main(): print("In Main") ts = time() download_dir = setup_download_dir() links = [l for l in get_links('c53645e1e12ad62') if l.endswith('.jpg')] for link in links: download_link(download_dir, link) print('Took {}s'.format(time() -ts))
def main(): client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() links = get_links(client_id) q = Queue(connection=Redis(host='localhost', port=6379)) for link in links: q.enqueue(download_link, download_dir, link)
async def main(): client_id = 'f8f603617f590ed' download_dir = setup_download_dir() # We use a session to take advantage of tcp keep-alive # Set a 3 second read and connect timeout. Default is 5 minutes async with aiohttp.ClientSession(conn_timeout=3, read_timeout=3) as session: tasks = [(async_download_link(session, download_dir, l)) for l in get_links(client_id)] # gather aggregates all the tasks and schedules them in the event loop await asyncio.gather(*tasks, return_exceptions=True)
def main(): ts = time() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() links = [l for l in get_links(client_id) if l.endswith('.jpg')] for link in links: download_link(download_dir, link) print('Took {}s'.format(time() - ts))
def main(): st= time() client_id = 'ee43c9d73f7dcc9' if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID enviroment variable") download_dir = setup_download_dir() loop = asyncio.get_event_loop() #Instead of asyncio.async you can use loop.create_task, but loop.create_task is only avaible # in python >=3.4.2 tasks = [asyncio.async(async_download_link(download_dir, l)) for l in get_links(client_id)]
def main(): ts = time() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() links = get_links(client_id) for link in links: download_link(download_dir, link) logging.info('Took %s seconds', time() - ts)
def main(): ts = time() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() links = (l for l in get_links(client_id) if l.endswith('.jpg')) for link in links: download_link(download_dir, link) logging.info('Took %s seconds', time() - ts)
def main(): ts = time() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception('Couldn\'t find IMGUR_CLIENT_ID environment variable!') download_dir = setup_download_dir() links = [l for l in get_links(client_id) if l.endswith('.jpg')] for link in links: download_link(download_dir, link) print('Took {}s'.format(time() - ts))
def main(): ts = time() client_id = 'bef2d9292d6bcbd' if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() links = get_links(client_id) for link in links: download_link(download_dir, link) logging.info('Took %s seconds', time() - ts)
def main(): ts = time() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() loop = asyncio.get_event_loop() # Instead of asyncio.async you can also use loop.create_task, but loop.create_task is only available # in Python >= 3.4.2 tasks = [asyncio.async(async_download_link(download_dir, l)) for l in get_links(client_id)]
def main(): ts = time() download_dir = setup_download_dir() links = ['http://img3.6comic.com:99/2/103/861/001_mdh.jpg', 'http://img3.6comic.com:99/2/103/861/002_9uj.jpg', 'http://img3.6comic.com:99/2/103/861/003_c8x.jpg', 'http://img3.6comic.com:99/2/103/861/004_y3b.jpg', 'http://img3.6comic.com:99/2/103/861/005_hu6.jpg'] for link in links: download_link(download_dir, link) print('Took {}s'.format(time() - ts))
def main(): ts = time() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception('Couldn\'t find IMGUR_CLIENT_ID environment variable!') download_dir = setup_download_dir() links = [l for l in get_links(client_id) if l.endswith('.jpg')] download = partial(download_link, download_dir) with Pool(num_processes) as p: p.map(download, links) print('Took {}s'.format(time() - ts))
def main(): ts = time() try: client_id = key.client_id except ImportError: logger.error('Cannot Import client_id') download_dir = setup_download_dir() links = get_links(client_id) for link in links: download_link(download_dir, link) logger.info(f"Took {time() - ts} seconds")
def main(): ts = time() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() links = [l for l in get_links(client_id) if l.endswith('.jpg')] download = partial(download_link, download_dir) with Pool(8) as p: p.map(download, links) logging.info('Took %s seconds', time() - ts)
def main(): ts = time() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() links = get_links(client_id) download = partial(download_link, download_dir) with Pool(4) as p: p.map(download, links) logger.info('Took: %s', time() - ts)
def main(): ts = time() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() links = (l for l in get_links(client_id) if l.endswith('.jpg')) download = partial(download_link, download_dir) with Pool(8) as p: p.map(download, links) logging.info('Took %s seconds', time() - ts)
def main(): client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() links = get_links(client_id) q = Queue(connection=Redis(host='localhost', port=6379)) for link in links: # puts the job in a redis server that can be in other machine # rqworker in a terminal window and it will start a worker listening on the default queue. # rqworker queue_name will listen to that named queue. q.enqueue(download_link, download_dir, link)
def main(): ts = time() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable") download_dir = setup_download_dir() links = get_links(client_id) print (links, len(links)) download = partial(download_link, download_dir) with ThreadPoolExecutor() as ex: ex.map(download, links) logging.info('Took %s seconds', time() - ts)
async def main(): client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() # We use a session to take advantage of tcp keep-alive # Set a 3 second read and connect timeout. Default is 5 minutes async with aiohttp.ClientSession(conn_timeout=3, read_timeout=3) as session: tasks = [(async_download_link(session, download_dir, l)) for l in get_links(client_id)] # gather aggregates all the tasks and schedules them in the event loop await asyncio.gather(*tasks, return_exceptions=True)
def main(): ts = time() download_dir = setup_download_dir() links = [ 'http://img3.6comic.com:99/2/103/861/001_mdh.jpg', 'http://img3.6comic.com:99/2/103/861/002_9uj.jpg', 'http://img3.6comic.com:99/2/103/861/003_c8x.jpg', 'http://img3.6comic.com:99/2/103/861/004_y3b.jpg', 'http://img3.6comic.com:99/2/103/861/005_hu6.jpg' ] q = Queue(connection=Redis()) result = [q.enqueue(download_link, download_dir, url) for url in links] print('Took {}s'.format(time() - ts))
def main(): ts = time() hello() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() links = [l for l in get_links(client_id) if l.endswith('.jpg')] print(download_dir) testurl = u'http://i.imgur.com/i5QjTPA.jpg' download_link(download_dir, testurl) for link in links: download_link(download_dir, link) print('Took {}s'.format(time() - ts))
def main(): ts = time() download_dir = setup_download_dir() links = [ 'http://img3.6comic.com:99/2/103/861/001_mdh.jpg', 'http://img3.6comic.com:99/2/103/861/002_9uj.jpg', 'http://img3.6comic.com:99/2/103/861/003_c8x.jpg', 'http://img3.6comic.com:99/2/103/861/004_y3b.jpg', 'http://img3.6comic.com:99/2/103/861/005_hu6.jpg' ] jobs = [gevent.spawn(download_link, download_dir, _url) for _url in links] gevent.wait(jobs) print('Took {}s'.format(time() - ts))
def main(): ts = time() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() loop = asyncio.get_event_loop() # Instead of asyncio.async you can also use loop.create_task, but loop.create_task is only available # in Python >= 3.4.2 tasks = [asyncio.async(async_download_link(download_dir, l)) for l in get_links(client_id) if l.endswith('.jpg')] loop.run_until_complete(asyncio.wait(tasks)) loop.close() logger.info('Took %s seconds to complete', time() - ts) if __name__ == '__main__': main()
def main(): ts = time() url1 = 'https://item.taobao.com/item.htm?spm=a217l.8087239.620352.3.512Gng&id=536843329282' url2 = 'https://item.taobao.com/item.htm?spm=a217l.8087239.620352.4.512Gng&id=44022485238' download_dir = setup_download_dir('process_imgs') links = list(chain( get_links(url1), get_links(url2), )) download = partial(download_link, download_dir) with Pool(8) as p: p.map(download, links) print('一共下载了 {} 张图片'.format(len(links))) print('Took {}s'.format(time() - ts))
def main(): ts = time() download_dir = setup_download_dir() links = [ 'http://img3.6comic.com:99/2/103/861/001_mdh.jpg', 'http://img3.6comic.com:99/2/103/861/002_9uj.jpg', 'http://img3.6comic.com:99/2/103/861/003_c8x.jpg', 'http://img3.6comic.com:99/2/103/861/004_y3b.jpg', 'http://img3.6comic.com:99/2/103/861/005_hu6.jpg' ] with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: url_futures = { executor.submit(download_link, download_dir, url): url for url in links } concurrent.futures.wait(url_futures) print('Took {}s'.format(time() - ts))
def main(): ts = time() url1 = 'http://www.toutiao.com/a6333981316853907714' url2 = 'http://www.toutiao.com/a6334459308533350658' url3 = 'http://www.toutiao.com/a6313664289211924737' url4 = 'http://www.toutiao.com/a6334337170774458625' url5 = 'http://www.toutiao.com/a6334486705982996738' download_dir = setup_download_dir('thread_imgs') # Create a queue to communicate with the worker threads queue = Queue() links = list( chain( get_links(url1), get_links(url2), get_links(url3), get_links(url4), get_links(url5), )) # Create 8 worker threads for x in range(16): worker = DownloadWorker(queue) # Setting daemon to True will let the main thread exit even though the # workers are blocking worker.daemon = True worker.start() # Put the tasks into the queue as a tuple i = 1 for link in links: queue.put((download_dir, link)) print(i, link) i += 1 # Causes the main thread to wait for the queue to finish processing all # the tasks queue.join() print('Total photos {}'.format(len(links))) print('Took {}s'.format(time() - ts))
def main(): ts = time() url1 = 'http://www.toutiao.com/a6333981316853907714' url2 = 'http://www.toutiao.com/a6334459308533350658' url3 = 'http://www.toutiao.com/a6313664289211924737' url4 = 'http://www.toutiao.com/a6334337170774458625' url5 = 'http://www.toutiao.com/a6334486705982996738' download_dir = setup_download_dir('single_imgs') links = list( chain( get_links(url1), get_links(url2), get_links(url3), get_links(url4), get_links(url5), )) for link in links: download_link(download_dir, link) print('一共下载了 {} 张图片'.format(len(links))) print('Took {}s'.format(time() - ts))
def main(): ts = time() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable") download_dir = setup_download_dir() links = get_links(client_id) # create a queue to communicate with worker threads queue = Queue() # Create 8 threads for x in range(8): worker = DownloadWorker(queue) worker.daemon = True worker.start() # Put tasks into queue as a tuple for link in links: logger.info('Queuing {}'.format(link)) queue.put((download_dir, link)) # Make the main thread wait queue.join() logger.info('Took %s', time() - ts)
def main(): ts = time() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!") download_dir = setup_download_dir() links = (l for l in get_links(client_id) if l.endswith('.jpg')) # Create a queue to communicate with the worker threads queue = Queue() # Create 8 worker threads for x in range(8): worker = DownloadWorker(queue) # Setting daemon to True will let the main thread exit even though the workers are blocking worker.daemon = True worker.start() # Put the tasks into the queue as a tuple for link in links: logger.info('Queueing {}'.format(link)) queue.put((download_dir, link)) # Causes the main thread to wait for the queue to finish processing all the tasks queue.join() logging.info('Took %s', time() - ts)
def main(): ts = time() url1 = 'http://www.toutiao.com/a6333981316853907714' url2 = 'http://www.toutiao.com/a6334459308533350658' url3 = 'http://www.toutiao.com/a6313664289211924737' url4 = 'http://www.toutiao.com/a6334337170774458625' url5 = 'http://www.toutiao.com/a6334486705982996738' download_dir = setup_download_dir('process_imgs') links = list( chain( get_links(url1), get_links(url2), get_links(url3), get_links(url4), get_links(url5), )) download = partial(download_link, download_dir) with Pool(8) as p: p.map(download, links) print('一共下载了 {} 张图片'.format(len(links))) print('Took {}s'.format(time() - ts))
def main(): ts = time() client_id = IMGUR_CLIENT_ID if not client_id: raise Exception("Need a valid IMGUR_CLIENT_ID to use the API!") download_dir = setup_download_dir() # get only image links from the API links = [l for l in get_links(client_id) if l.endswith('.jpg')] # Create a queue to communicate with the worker threads queue = Queue() for x in range(8): # Create 8 worker threads logging.info('Starting thread %s', x) worker = DownloadWorker(queue) # main thread can exit even though workers are blocked worker.daemon = True worker.start() # Create a task in the queue for each image link for link in links: logger.info('Queueing {}'.format(link)) queue.put((download_dir, link)) # Causes the main thread to wait for the queue to finish processing all the tasks queue.join() print('Execution time: {} seconds.'.format(time() - ts))
import logging from functools import partial from multiprocessing import Pool from download import get_links, setup_download_dir,\ download_link, CLIENT_ID logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) PROCESSES = 4 if __name__ == '__main__': download_dir = setup_download_dir('images') download = partial(download_link, download_dir) links = get_links(CLIENT_ID) with Pool(PROCESSES) as p: p.map(download, links)
import logging import os from time import time from download import setup_download_dir, get_links, download_link logging.basicConfig( level=logging.INFO, format='%(asctime)s|%(levelname)s|%(threadName)s|%(message)s') log = logging.getLogger(__name__) if __name__ == '__main__': ts = time() client_id = os.getenv('IMGUR_CLIENT_ID') if not client_id: raise Exception("Couldn't find IMGUR_CLIENT_ID env variable") download_dir = setup_download_dir() links = get_links(client_id) for link in links: download_link(download_dir, link) log.info('Took %s seconds', time() - ts)