Пример #1
0
def main():
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)

    # By placing the executor inside a with block, the executors shutdown method
    # will be called cleaning up threads.
    #
    # By default, the executor sets number of workers to 5 times the number of
    # CPUs.
    with ThreadPoolExecutor() as executor:

        # Create a new partially applied function that stores the directory
        # argument.
        #
        # This allows the download_link function that normally takes two
        # arguments to work with the map function that expects a function of a
        # single argument.
        fn = partial(download_link, download_dir)

        # Executes fn concurrently using threads on the links iterable. The
        # timeout is for the entire process, not a single call, so downloading
        # all images must complete within 30 seconds.
        executor.map(fn, links, timeout=30)
Пример #2
0
def main():
	ts = time()
	download_dir = setup_download_dir()
	links = [l for l in get_links() if l.endswith('.csv')]
	for link in links:
		download_link(download_dir, link)
	print('Took {}s'.format(time() - ts))
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)

    queue = Queue()

    # Creating 3 worker threads
    for x in range(3):
        worker = DownloadWorker(queue)

        # Setting daemon to True will let the main thread exit even though the workers are blocking
        worker.daemon = True
        worker.start()

    # Put the tasks into the queue as a tuple
    for link in links:
        logger.info('Queueing {}'.format(link))
        queue.put((download_dir, link))

    # Causes the main thread to wait for the queue to finish processing all the tasks
    queue.join()
    logging.info('Took %s', time() - ts)
Пример #4
0
def main():
    print("In Main")
    ts = time()
    download_dir = setup_download_dir()
    links = [l for l in get_links('c53645e1e12ad62') if l.endswith('.jpg')]
    for link in links:
        download_link(download_dir, link)
    print('Took {}s'.format(time() -ts))
Пример #5
0
def main():
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)
    q = Queue(connection=Redis(host='localhost', port=6379))
    for link in links:
        q.enqueue(download_link, download_dir, link)
def main():
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)
    q = Queue(connection=Redis(host='localhost', port=6379))
    for link in links:
        q.enqueue(download_link, download_dir, link)
Пример #7
0
async def main():
    client_id = 'f8f603617f590ed'
    download_dir = setup_download_dir()
    # We use a session to take advantage of tcp keep-alive
    # Set a 3 second read and connect timeout. Default is 5 minutes
    async with aiohttp.ClientSession(conn_timeout=3, read_timeout=3) as session:
        tasks = [(async_download_link(session, download_dir, l)) for l in get_links(client_id)]
        # gather aggregates all the tasks and schedules them in the event loop
        await asyncio.gather(*tasks, return_exceptions=True)
Пример #8
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = [l for l in get_links(client_id) if l.endswith('.jpg')]
    for link in links:
        download_link(download_dir, link)
    print('Took {}s'.format(time() - ts))
def main():
	st= time()
	client_id = 'ee43c9d73f7dcc9'
	if not client_id:
		raise Exception("Couldn't find IMGUR_CLIENT_ID enviroment variable")
	download_dir = setup_download_dir()
	loop = asyncio.get_event_loop()
	#Instead of asyncio.async you can use loop.create_task, but loop.create_task is only avaible
	# in python >=3.4.2
	tasks = [asyncio.async(async_download_link(download_dir, l)) for l in get_links(client_id)]
Пример #10
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)
    for link in links:
        download_link(download_dir, link)
    logging.info('Took %s seconds', time() - ts)
Пример #11
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = (l for l in get_links(client_id) if l.endswith('.jpg'))
    for link in links:
        download_link(download_dir, link)
    logging.info('Took %s seconds', time() - ts)
Пример #12
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception('Couldn\'t find IMGUR_CLIENT_ID environment variable!')
    download_dir = setup_download_dir()
    links = [l for l in get_links(client_id) if l.endswith('.jpg')]
    for link in links:
        download_link(download_dir, link)
    print('Took {}s'.format(time() - ts))
Пример #13
0
def main():
    ts = time()
    client_id = 'bef2d9292d6bcbd'
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)
    for link in links:
        download_link(download_dir, link)
    logging.info('Took %s seconds', time() - ts)
Пример #14
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    loop = asyncio.get_event_loop()
    # Instead of asyncio.async you can also use loop.create_task, but loop.create_task is only available
    # in Python >= 3.4.2
    tasks = [asyncio.async(async_download_link(download_dir, l)) for l in get_links(client_id)]
Пример #15
0
def main():
    ts = time()
    download_dir = setup_download_dir()
    links = ['http://img3.6comic.com:99/2/103/861/001_mdh.jpg',
            'http://img3.6comic.com:99/2/103/861/002_9uj.jpg',
            'http://img3.6comic.com:99/2/103/861/003_c8x.jpg',
            'http://img3.6comic.com:99/2/103/861/004_y3b.jpg',
            'http://img3.6comic.com:99/2/103/861/005_hu6.jpg']
    for link in links:
        download_link(download_dir, link)
    print('Took {}s'.format(time() - ts))
Пример #16
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception('Couldn\'t find IMGUR_CLIENT_ID environment variable!')
    download_dir = setup_download_dir()
    links = [l for l in get_links(client_id) if l.endswith('.jpg')]
    download = partial(download_link, download_dir)
    with Pool(num_processes) as p:
        p.map(download, links)
    print('Took {}s'.format(time() - ts))
Пример #17
0
def main():
    ts = time()
    try:
        client_id = key.client_id
    except ImportError:
        logger.error('Cannot Import client_id')
    download_dir = setup_download_dir()
    links = get_links(client_id)
    for link in links:
        download_link(download_dir, link)
    logger.info(f"Took {time() - ts} seconds")
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = [l for l in get_links(client_id) if l.endswith('.jpg')]
    download = partial(download_link, download_dir)
    with Pool(8) as p:
        p.map(download, links)
    logging.info('Took %s seconds', time() - ts)
Пример #19
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)
    download = partial(download_link, download_dir)
    with Pool(4) as p:
        p.map(download, links)
    logger.info('Took: %s', time() - ts)
Пример #20
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = (l for l in get_links(client_id) if l.endswith('.jpg'))
    download = partial(download_link, download_dir)
    with Pool(8) as p:
        p.map(download, links)
    logging.info('Took %s seconds', time() - ts)
Пример #21
0
def main():
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = get_links(client_id)
    q = Queue(connection=Redis(host='localhost', port=6379))
    for link in links:
        # puts the job in a redis server that can be in other machine
        # rqworker in a terminal window and it will start a worker listening on the default queue.
        # rqworker queue_name will listen to that named queue.
        q.enqueue(download_link, download_dir, link)
Пример #22
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable")
    download_dir = setup_download_dir()
    links = get_links(client_id)
    print (links, len(links))
    download = partial(download_link, download_dir)
    with ThreadPoolExecutor() as ex:
        ex.map(download, links)
    logging.info('Took %s seconds', time() - ts)
Пример #23
0
async def main():
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    # We use a session to take advantage of tcp keep-alive
    # Set a 3 second read and connect timeout. Default is 5 minutes
    async with aiohttp.ClientSession(conn_timeout=3,
                                     read_timeout=3) as session:
        tasks = [(async_download_link(session, download_dir, l))
                 for l in get_links(client_id)]
        # gather aggregates all the tasks and schedules them in the event loop
        await asyncio.gather(*tasks, return_exceptions=True)
Пример #24
0
def main():
    ts = time()
    download_dir = setup_download_dir()
    links = [
        'http://img3.6comic.com:99/2/103/861/001_mdh.jpg',
        'http://img3.6comic.com:99/2/103/861/002_9uj.jpg',
        'http://img3.6comic.com:99/2/103/861/003_c8x.jpg',
        'http://img3.6comic.com:99/2/103/861/004_y3b.jpg',
        'http://img3.6comic.com:99/2/103/861/005_hu6.jpg'
    ]

    q = Queue(connection=Redis())
    result = [q.enqueue(download_link, download_dir, url) for url in links]
    print('Took {}s'.format(time() - ts))
Пример #25
0
def main():
   ts = time()
   hello()
   client_id = os.getenv('IMGUR_CLIENT_ID')
   if not client_id:
       raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
   download_dir = setup_download_dir()
   links = [l for l in get_links(client_id) if l.endswith('.jpg')]
   print(download_dir)
   testurl = u'http://i.imgur.com/i5QjTPA.jpg'
   download_link(download_dir, testurl)
   for link in links:
       download_link(download_dir, link)
   print('Took {}s'.format(time() - ts))
Пример #26
0
def main():
    ts = time()
    download_dir = setup_download_dir()
    links = [
        'http://img3.6comic.com:99/2/103/861/001_mdh.jpg',
        'http://img3.6comic.com:99/2/103/861/002_9uj.jpg',
        'http://img3.6comic.com:99/2/103/861/003_c8x.jpg',
        'http://img3.6comic.com:99/2/103/861/004_y3b.jpg',
        'http://img3.6comic.com:99/2/103/861/005_hu6.jpg'
    ]

    jobs = [gevent.spawn(download_link, download_dir, _url) for _url in links]
    gevent.wait(jobs)
    print('Took {}s'.format(time() - ts))
Пример #27
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    loop = asyncio.get_event_loop()
    # Instead of asyncio.async you can also use loop.create_task, but loop.create_task is only available
    # in Python >= 3.4.2
    tasks = [asyncio.async(async_download_link(download_dir, l)) for l in get_links(client_id) if l.endswith('.jpg')]
    loop.run_until_complete(asyncio.wait(tasks))
    loop.close()
    logger.info('Took %s seconds to complete', time() - ts)


if __name__ == '__main__':
    main()
Пример #28
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    loop = asyncio.get_event_loop()
    # Instead of asyncio.async you can also use loop.create_task, but loop.create_task is only available
    # in Python >= 3.4.2
    tasks = [asyncio.async(async_download_link(download_dir, l)) for l in get_links(client_id) if l.endswith('.jpg')]
    loop.run_until_complete(asyncio.wait(tasks))
    loop.close()
    logger.info('Took %s seconds to complete', time() - ts)


if __name__ == '__main__':
    main()
Пример #29
0
def main():
    ts = time()

    url1 = 'https://item.taobao.com/item.htm?spm=a217l.8087239.620352.3.512Gng&id=536843329282'
    url2 = 'https://item.taobao.com/item.htm?spm=a217l.8087239.620352.4.512Gng&id=44022485238'

    download_dir = setup_download_dir('process_imgs')
    links = list(chain(
        get_links(url1),
        get_links(url2),
    ))

    download = partial(download_link, download_dir)
    with Pool(8) as p:
        p.map(download, links)
    print('一共下载了 {} 张图片'.format(len(links)))
    print('Took {}s'.format(time() - ts))
Пример #30
0
def main():
    ts = time()
    download_dir = setup_download_dir()
    links = [
        'http://img3.6comic.com:99/2/103/861/001_mdh.jpg',
        'http://img3.6comic.com:99/2/103/861/002_9uj.jpg',
        'http://img3.6comic.com:99/2/103/861/003_c8x.jpg',
        'http://img3.6comic.com:99/2/103/861/004_y3b.jpg',
        'http://img3.6comic.com:99/2/103/861/005_hu6.jpg'
    ]

    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        url_futures = {
            executor.submit(download_link, download_dir, url): url
            for url in links
        }
    concurrent.futures.wait(url_futures)
    print('Took {}s'.format(time() - ts))
Пример #31
0
def main():
    ts = time()

    url1 = 'http://www.toutiao.com/a6333981316853907714'
    url2 = 'http://www.toutiao.com/a6334459308533350658'
    url3 = 'http://www.toutiao.com/a6313664289211924737'
    url4 = 'http://www.toutiao.com/a6334337170774458625'
    url5 = 'http://www.toutiao.com/a6334486705982996738'
    download_dir = setup_download_dir('thread_imgs')
    # Create a queue to communicate with the worker threads
    queue = Queue()

    links = list(
        chain(
            get_links(url1),
            get_links(url2),
            get_links(url3),
            get_links(url4),
            get_links(url5),
        ))

    # Create 8 worker threads
    for x in range(16):
        worker = DownloadWorker(queue)
        # Setting daemon to True will let the main thread exit even though the
        # workers are blocking
        worker.daemon = True
        worker.start()

    # Put the tasks into the queue as a tuple
    i = 1
    for link in links:
        queue.put((download_dir, link))
        print(i, link)
        i += 1

    # Causes the main thread to wait for the queue to finish processing all
    # the tasks
    queue.join()
    print('Total photos {}'.format(len(links)))
    print('Took {}s'.format(time() - ts))
Пример #32
0
def main():
    ts = time()

    url1 = 'http://www.toutiao.com/a6333981316853907714'
    url2 = 'http://www.toutiao.com/a6334459308533350658'
    url3 = 'http://www.toutiao.com/a6313664289211924737'
    url4 = 'http://www.toutiao.com/a6334337170774458625'
    url5 = 'http://www.toutiao.com/a6334486705982996738'
    download_dir = setup_download_dir('single_imgs')
    links = list(
        chain(
            get_links(url1),
            get_links(url2),
            get_links(url3),
            get_links(url4),
            get_links(url5),
        ))
    for link in links:
        download_link(download_dir, link)
    print('一共下载了 {} 张图片'.format(len(links)))
    print('Took {}s'.format(time() - ts))
Пример #33
0
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable")

    download_dir = setup_download_dir()
    links = get_links(client_id)
    # create a queue to communicate with worker threads
    queue = Queue()
    # Create 8 threads
    for x in range(8):
        worker = DownloadWorker(queue)
        worker.daemon = True
        worker.start()
    # Put tasks into queue as a tuple
    for link in links:
        logger.info('Queuing {}'.format(link))
        queue.put((download_dir, link))
    # Make the main thread wait
    queue.join()
    logger.info('Took %s', time() - ts)
def main():
    ts = time()
    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID environment variable!")
    download_dir = setup_download_dir()
    links = (l for l in get_links(client_id) if l.endswith('.jpg'))
    # Create a queue to communicate with the worker threads
    queue = Queue()
    # Create 8 worker threads
    for x in range(8):
        worker = DownloadWorker(queue)
        # Setting daemon to True will let the main thread exit even though the workers are blocking
        worker.daemon = True
        worker.start()
    # Put the tasks into the queue as a tuple
    for link in links:
        logger.info('Queueing {}'.format(link))
        queue.put((download_dir, link))
    # Causes the main thread to wait for the queue to finish processing all the tasks
    queue.join()
    logging.info('Took %s', time() - ts)
def main():
    ts = time()

    url1 = 'http://www.toutiao.com/a6333981316853907714'
    url2 = 'http://www.toutiao.com/a6334459308533350658'
    url3 = 'http://www.toutiao.com/a6313664289211924737'
    url4 = 'http://www.toutiao.com/a6334337170774458625'
    url5 = 'http://www.toutiao.com/a6334486705982996738'
    download_dir = setup_download_dir('process_imgs')
    links = list(
        chain(
            get_links(url1),
            get_links(url2),
            get_links(url3),
            get_links(url4),
            get_links(url5),
        ))

    download = partial(download_link, download_dir)
    with Pool(8) as p:
        p.map(download, links)
    print('一共下载了 {} 张图片'.format(len(links)))
    print('Took {}s'.format(time() - ts))
Пример #36
0
def main():
    ts = time()
    client_id = IMGUR_CLIENT_ID
    if not client_id:
        raise Exception("Need a valid IMGUR_CLIENT_ID to use the API!")
    download_dir = setup_download_dir()
    # get only image links from the API
    links = [l for l in get_links(client_id) if l.endswith('.jpg')]
    # Create a queue to communicate with the worker threads
    queue = Queue()  
    for x in range(8):
        # Create 8 worker threads
        logging.info('Starting thread %s', x)
        worker = DownloadWorker(queue)
        # main thread can exit even though workers are blocked
        worker.daemon = True
        worker.start()
    # Create a task in the queue for each image link
    for link in links:
        logger.info('Queueing {}'.format(link))
        queue.put((download_dir, link))
    # Causes the main thread to wait for the queue to finish processing all the tasks
    queue.join()
    print('Execution time: {} seconds.'.format(time() - ts))
Пример #37
0
import logging
from functools import partial
from multiprocessing import Pool
from download import get_links, setup_download_dir,\
                     download_link, CLIENT_ID

logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

logger = logging.getLogger(__name__)

PROCESSES = 4

if __name__ == '__main__':
    download_dir = setup_download_dir('images')
    download = partial(download_link, download_dir)
    links = get_links(CLIENT_ID)
    with Pool(PROCESSES) as p:
        p.map(download, links)
Пример #38
0
import logging
import os
from time import time

from download import setup_download_dir, get_links, download_link

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s|%(levelname)s|%(threadName)s|%(message)s')
log = logging.getLogger(__name__)

if __name__ == '__main__':
    ts = time()

    client_id = os.getenv('IMGUR_CLIENT_ID')
    if not client_id:
        raise Exception("Couldn't find IMGUR_CLIENT_ID env variable")

    download_dir = setup_download_dir()
    links = get_links(client_id)
    for link in links:
        download_link(download_dir, link)

    log.info('Took %s seconds', time() - ts)