示例#1
0
min_cache_imgs_before_refill = config.getint("Cache", "Images_min_limit")
user_agent = config.get("General", "Useragent")
logverbosity = config.get("Logging", "Verbosity")
logger = logging.getLogger(config.get("Logging", "Log_name"))
hdlr = logging.FileHandler(config.get("Logging", "File"))
hdlr.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
logger.addHandler(hdlr)
logger.setLevel(logverbosity.upper())

call_flush_timeout = 10  # value in seconds
call_flush_last = time.time() - call_flush_timeout

call_reset_timeout = 10  # value in seconds
call_reset_last = time.time() - call_reset_timeout

Crawler.request_headers({'User-Agent': user_agent})
Crawler.set_logger(logger)

### config the  crawlers
from crawler.reddit import Reddit
from crawler.soupio import SoupIO
from crawler.pr0gramm import Pr0gramm
from crawler.ninegag import NineGag
from crawler.instagram import Instagram
from crawler.fourchan import Fourchan
from crawler.giphy import Giphy
from crawler.bildschirmarbeiter import Bildschirmarbeiter


def get_crawlers(configuration, section):
    """