def run(self): accounts = RedisClient('accounts', self.website) cookies = RedisClient('cookies', self.website) accounts_usernames = accounts.all_keys() cookies_usernames = cookies.all_keys() for username in accounts_usernames: if username not in cookies_usernames: password = accounts.get(username) print('正在生成Cookies, 账号:{} 密码: {}'.format(username, password)) new = BilibiliCookies(username, password) new_cookies = new.cookie() if new_cookies: print('生成Cookies成功') else: print('生成Cookies失败') cookies.set(username, new_cookies)
def start(): '''非协程启动爬虫''' rds = RedisClient('url', '127.0.0.1', None) my = MysqlClient() ip_pv = GetIpPv(rds, my) while ip_pv.get_num(): domain = ip_pv.get_domain() print(ip_pv.get_result(domain))
def schedule_getter(self, cycle=GETTER_CYCLE): """定时获取代理""" getter = Getter() db = RedisClient() while True: print(' 开始抓取代理 ') getter.run_specific('crawl_xdaili') db.clear() time.sleep(cycle)
def start_coro(): '''非协程启动爬虫''' rds = RedisClient('url', '127.0.0.1', None) my = MysqlClient() ip_pv = GetIpPv(rds, my) event_loop = asyncio.get_event_loop() try: event_loop.run_until_complete(ip_pv.download()) finally: event_loop.close()
def init_module(): global detector global booter global redis_client redis_client = RedisClient(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, s_key=REDIS_KEY, num=GET_NUM) xc = XiCiProxyHelper(quantity=QUANTITY, threshold=THRESHOLD) detector = Detector(redis_client, test_url=TEST_URL) booter = Booter(redis_client, xc, capacity=CAPACITY)
def __init__(self, test_url=url): self.test_url = test_url self.db = RedisClient() self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/84.0.4147.135 Safari/537.36 Edg/84.0.522.63', 'Origin': 'https://www.bilibili.com', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6' }
from crawl import XiCiProxyHelper from storage import Booter, RedisClient from detector import Detector if __name__ == "__main__": xch = XiCiProxyHelper(quantity=40, threshold=1.000) rc = RedisClient() b = Booter(rc, xch) b.run() de = Detector(rc) de.run() print('一共有{}'.format(rc.count())) rc.show() rc.remove_by_range(0, 100)
def get_domain(self): '''获取域名''' return self.redis_db.pop() def get_rest_domain_num(self): '''剩余域名数量''' return self.redis_db.get_num() def save(self, text): '''保存结果''' with open('title.txt', 'a+') as f: f.write(text) def download(self): while self.get_rest_domain_num(): url = self.get_domain() logging.info('req ' + url) try: response = self.get_page(url) response.encoding = response.apparent_encoding logging.info(response.status_code) doc = self.parse(response) self.save(url + ';' + doc + '\n') except Exception as e: self.save(url + ';\n') if __name__ == '__main__': tc = ThemeCrawler(RedisClient('url', '127.0.0.1', None), MysqlClient()) tc.download()
def __init__(self, website='default'): self.website = website self.cookies_db = RedisClient('cookies', self.website) self.accounts_db = RedisClient('accounts', self.website)
def conn(): if not hasattr(g, 'conn'): g.conn = RedisClient() return g.conn
def get_coon(): if not hasattr(g, 'redis'): g.redis = RedisClient() return g.redis
def __init__(self): self.redis = RedisClient()
def __init__(self): self.redis = RedisClient() self.crawler = Crawler()
from storage import RedisClient rc = RedisClient('url', '127.0.0.1', None) with open('ul.txt', 'r') as f: n = [line.rstrip() for line in f] # print(n) for x in n: rc.add(x)
'Please input config file path(if you use default file type \'d\'.): ') if path == 'd': path = 'proxy.conf' sure = input( 'Are you sure the config file in \'{}\'. [y/n]: '.format(path)) if sure == 'y': break cfg = ConfigParser() cfg.read(path) REDIS_HOST = try_to_get_options(cfg.get, 'redis', 'host') REDIS_PORT = try_to_get_options(cfg.getint, 'redis', 'port') REDIS_PASSWORD = try_to_get_options(cfg.get, 'redis', 'password') REDIS_KEY = try_to_get_options(cfg.get, 'redis', 'key') redis_client = RedisClient(host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, s_key=REDIS_KEY) count = redis_client.count() if count == 0: print('Already cleaning!') else: redis_client.show() sure = input( 'Are you sure remove that data? amount {} items! [y/n]: '.format( count)) if sure == 'y': redis_client.remove_by_range(0, 100) else: print('Good luck! Bye Bye')