def crawl(site, maxpage=None): proxy = ybk.config.conf.get('proxy') if proxy: session.proxies = {'http': proxy} conf = get_conf(site) ex = Exchange({ 'name': conf['name'], 'url': conf['url'], 'abbr': conf['abbr'], }) ex.upsert() for type_ in ['result', 'offer', 'stock']: tconf = conf.get(type_) if not tconf: continue if maxpage is None: maxpage = tconf['maxpage'] else: maxpage = min(maxpage, tconf['maxpage']) index = tconf['index'] if not isinstance(index, list): index = [index] for url in index: content = session.get(url, timeout=(5, 10)).content content = fix_javascript(url, content) parse_index(ex, type_, content, tconf) for page in range(2, maxpage + 1): url = tconf['page'].format(page=page) content = session.get(url, timeout=(5, 10)).content content = fix_javascript(url, content) parse_index(ex, type_, content, tconf)