Exemplo n.º 1
0
 def run(self):
     accounts = RedisClient('accounts', self.website)
     cookies = RedisClient('cookies', self.website)
     accounts_usernames = accounts.all_keys()
     cookies_usernames = cookies.all_keys()
     for username in accounts_usernames:
         if username not in cookies_usernames:
             password = accounts.get(username)
             print('正在生成Cookies, 账号:{} 密码: {}'.format(username, password))
             new = BilibiliCookies(username, password)
             new_cookies = new.cookie()
             if new_cookies:
                 print('生成Cookies成功')
             else:
                 print('生成Cookies失败')
             cookies.set(username, new_cookies)
Exemplo n.º 2
0
def start():
    '''非协程启动爬虫'''
    rds = RedisClient('url', '127.0.0.1', None)
    my = MysqlClient()
    ip_pv = GetIpPv(rds, my)
    while ip_pv.get_num():

        domain = ip_pv.get_domain()
        print(ip_pv.get_result(domain))
Exemplo n.º 3
0
 def schedule_getter(self, cycle=GETTER_CYCLE):
     """定时获取代理"""
     getter = Getter()
     db = RedisClient()
     while True:
         print(' 开始抓取代理 ')
         getter.run_specific('crawl_xdaili')
         db.clear()
         time.sleep(cycle)
Exemplo n.º 4
0
def start_coro():
    '''非协程启动爬虫'''
    rds = RedisClient('url', '127.0.0.1', None)
    my = MysqlClient()
    ip_pv = GetIpPv(rds, my)
    event_loop = asyncio.get_event_loop()
    try:
        event_loop.run_until_complete(ip_pv.download())
    finally:
        event_loop.close()
Exemplo n.º 5
0
def init_module():
    global detector
    global booter
    global redis_client
    redis_client = RedisClient(host=REDIS_HOST,
                               port=REDIS_PORT,
                               password=REDIS_PASSWORD,
                               s_key=REDIS_KEY,
                               num=GET_NUM)
    xc = XiCiProxyHelper(quantity=QUANTITY, threshold=THRESHOLD)
    detector = Detector(redis_client, test_url=TEST_URL)
    booter = Booter(redis_client, xc, capacity=CAPACITY)
Exemplo n.º 6
0
 def __init__(self, test_url=url):
     self.test_url = test_url
     self.db = RedisClient()
     self.headers = {
         'User-Agent':
         'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
         'Chrome/84.0.4147.135 Safari/537.36 Edg/84.0.522.63',
         'Origin':
         'https://www.bilibili.com',
         'accept-encoding':
         'gzip, deflate, br',
         'accept-language':
         'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6'
     }
Exemplo n.º 7
0
from crawl import XiCiProxyHelper
from storage import Booter, RedisClient
from detector import Detector

if __name__ == "__main__":
    xch = XiCiProxyHelper(quantity=40, threshold=1.000)
    rc = RedisClient()

    b = Booter(rc, xch)
    b.run()

    de = Detector(rc)

    de.run()
    print('一共有{}'.format(rc.count()))

    rc.show()

    rc.remove_by_range(0, 100)
Exemplo n.º 8
0
    def get_domain(self):
        '''获取域名'''
        return self.redis_db.pop()

    def get_rest_domain_num(self):
        '''剩余域名数量'''
        return self.redis_db.get_num()

    def save(self, text):
        '''保存结果'''
        with open('title.txt', 'a+') as f:
            f.write(text)

    def download(self):
        while self.get_rest_domain_num():
            url = self.get_domain()
            logging.info('req ' + url)
            try:
                response = self.get_page(url)
                response.encoding = response.apparent_encoding
                logging.info(response.status_code)
                doc = self.parse(response)
                self.save(url + ';' + doc + '\n')
            except Exception as e:
                self.save(url + ';\n')


if __name__ == '__main__':
    tc = ThemeCrawler(RedisClient('url', '127.0.0.1', None), MysqlClient())
    tc.download()
Exemplo n.º 9
0
 def __init__(self, website='default'):
     self.website = website
     self.cookies_db = RedisClient('cookies', self.website)
     self.accounts_db = RedisClient('accounts', self.website)
Exemplo n.º 10
0
def conn():
    if not hasattr(g, 'conn'):
        g.conn = RedisClient()
        return g.conn
Exemplo n.º 11
0
def get_coon():
    if not hasattr(g, 'redis'):
        g.redis = RedisClient()
    return g.redis
Exemplo n.º 12
0
 def __init__(self):
     self.redis = RedisClient()
Exemplo n.º 13
0
 def __init__(self):
     self.redis = RedisClient()
     self.crawler = Crawler()
Exemplo n.º 14
0
from storage import RedisClient

rc = RedisClient('url', '127.0.0.1', None)
with open('ul.txt', 'r') as f:
	n = [line.rstrip() for line in f]
# print(n)

for x in n:
	rc.add(x)
Exemplo n.º 15
0
        'Please input config file path(if you use default file type \'d\'.): ')
    if path == 'd':
        path = 'proxy.conf'
    sure = input(
        'Are you sure the config file in \'{}\'. [y/n]: '.format(path))
    if sure == 'y':
        break

cfg = ConfigParser()
cfg.read(path)
REDIS_HOST = try_to_get_options(cfg.get, 'redis', 'host')
REDIS_PORT = try_to_get_options(cfg.getint, 'redis', 'port')
REDIS_PASSWORD = try_to_get_options(cfg.get, 'redis', 'password')
REDIS_KEY = try_to_get_options(cfg.get, 'redis', 'key')
redis_client = RedisClient(host=REDIS_HOST,
                           port=REDIS_PORT,
                           password=REDIS_PASSWORD,
                           s_key=REDIS_KEY)
count = redis_client.count()
if count == 0:
    print('Already cleaning!')
else:
    redis_client.show()
    sure = input(
        'Are you sure remove that data? amount {} items! [y/n]: '.format(
            count))
    if sure == 'y':
        redis_client.remove_by_range(0, 100)
    else:
        print('Good luck! Bye Bye')