def process_venue_list(venue_id_list):
    pool = ThreadPool(5)
    m = ThreadManager()
    d = m.dict()

    for venue_id in venue_id_list:
        d[venue_id] = 'none'

    print("[.] Processing %s venues" % len(venue_id_list))
    result = pool.map_async(process_venue,
                            zip(venue_id_list, itertools.repeat(d)))
    monitor_map_progress(result, d, len(venue_id_list))

    result.wait()
    _ = result.get()

    print("[x] Done with %s venues" % len(venue_id_list))
    def get_init_params(self, manager: Manager):
        d = {}
        if manager is not None:
            d = manager.dict()

        return None, self.zoom_offset, self.max_zoom_level, d
Пример #3
0
import json
import logging
from multiprocessing.dummy import Manager
import random
from urllib.parse import urlparse, parse_qs
from config.config import BASEPYCURLCONFIG
from core import MySpider
from core.Spider import CrawlJob

__author__ = 'Florian'
m = Manager()
crawled = m.dict()

logger = logging.getLogger("logger")
fh = logging.FileHandler("clubs.jsonl", 'a+')
simpleFormat = logging.Formatter("%(message)s")
fh.setFormatter(simpleFormat)
fh.setLevel(logging.WARNING)
ch = logging.StreamHandler()
ch.setFormatter(simpleFormat)
ch.setLevel(logging.DEBUG)
logger.addHandler(fh)
logger.addHandler(ch)
logger.setLevel(logging.DEBUG)


@MySpider.QueueInitializer.register()
def seeds():
    jobs = []
    logger = logging.getLogger("logger")
    for federation in [