示例#1
0
def fetch_kxdaili() -> list:
    '''
    抓取kaixin代理
    '''
    base_url = 'http://www.kxdaili.com/dailiip/1/{}.html#ip'
    try:
        proxies = []
        for page in range(1, 4):
            url = base_url.format(page)
            html = get_html(url)
            tmp = [i.text for i in html.xpath('//tbody/tr/td')]
            res = [
                i for i in tmp
                if i and (i.isdigit() or ''.join(i.split('.')).isdigit())
            ]
            ip = [i for l, i in enumerate(res) if l % 2 == 0]
            port = [i for l, i in enumerate(res) if l % 2 == 1]
            for l, r in zip(ip, port):
                proxies.append('{}:{}'.format(l, r))
    except Exception as e:
        logger.exception('error')
        traceback.print_exc()
        proxies = []
    if not proxies:
        logger.warning(' fail to fetch kxdaili')
    return proxies
示例#2
0
def fetch_kuaidaili() -> list:
    '''
    抓取快代理
    '''
    base_url = 'https://www.kuaidaili.com/free/inha/{}/'
    try:
        proxies = []
        for page in range(1, 3):
            url = base_url.format(page)
            html = get_html(url)
            ip = html.xpath(
                '//div[@id="list"]/table/tbody/tr/td[@data-title="IP"]/text()')
            port = html.xpath(
                '//div[@id="list"]/table/tbody/tr/td[@data-title="PORT"]/text()'
            )
            for l, r in zip(ip, port):
                proxies.append('{}:{}'.format(l, r))
            sleep(3)
    except Exception as e:
        logger.exception('error')
        traceback.print_exc()
        proxies = []
    if not proxies:
        logger.warning(' fail to fetch kuaidaili')
    return proxies
示例#3
0
def fetch_3366ip() -> list:
    '''
    抓取3366ip代理
    '''
    base_url = 'http://www.ip3366.net/free/?stype=1&page={}'
    try:
        proxies = []
        for page in range(1, 4):
            url = base_url.format(page)
            html = get_html(url)
            tmp = [i.text for i in html.xpath('//tbody/tr/td')]
            res = [
                i for i in tmp
                if i.isdigit() or ''.join(i.split('.')).isdigit()
            ]
            ip = [i for l, i in enumerate(res) if l % 2 == 0]
            port = [i for l, i in enumerate(res) if l % 2 == 1]
            for l, r in zip(ip, port):
                proxies.append('{}:{}'.format(l, r))
    except Exception as e:
        logger.exception('error')
        traceback.print_exc()
        proxies = []
    if not proxies:
        logger.warning(' fail to fetch 3366ip')
    return proxies
示例#4
0
 def set_proxy(self, request):
     '''
     设置代理
     '''
     proxy = ProxyDatabase()
     count = proxy.get_valid_count()
     if count < self.proxies_min_count:
         self.fetch_proxies()
     if 'proxy' in request.meta.keys():
         invalid_proxy = request.meta['proxy'].split('//')[1]
         logger.info('{} will be set false'.format(invalid_proxy))
         proxy.set_false(invalid_proxy)
         if int(proxy.get_valid_count()
                ) < self.proxies_min_count and datetime.now() > (
                    self.last_no_proxy_time +
                    timedelta(minutes=self.fetch_proxy_timedelta)):
             logger.warning(
                 'proxies counts are only {}, start to fetch'.format(
                     proxy.get_valid_count()))
             self.fetch_proxies()
         if int(proxy.get_valid_count()) == 0:
             logger.warning('proxies all invalid, fetch new.')
             self.fetch_proxies()
         request.meta['proxy'] = 'http://' + proxy.get_one()
         logger.info('request proxy change to {}'.format(
             request.meta['proxy']))
     else:
         request.meta['proxy'] = 'http://' + proxy.get_one()
         logger.info('request proxy change to {}'.format(
             request.meta['proxy']))
示例#5
0
 def clear(self):
     '''
     清除表中所有无用proxies
     '''
     try:
         sql = "DELETE FROM %s WHERE valid=0"
         self.cur.execute(sql % self.db_name)
         self.conn.commit()
     except Exception as e:
         logger.warning('clear  fail')
         traceback.print_exc()
示例#6
0
 def add_items(self, item: list):
     '''
     向表中添加多个proxies
     '''
     try:
         sql = "INSERT INTO %s (IP_PORT, valid) VALUES ('%s', 1)"
         for i in item:
             logger.info(sql % (self.db_name, i))
             self.cur.execute(sql % (self.db_name, i))
         self.conn.commit()
     except Exception as e:
         self.conn.rollback()
         logger.warning('add_items  fail')
         traceback.print_exc()
示例#7
0
def fetch_66ip() -> list:
    '''
    抓取66ip代理
    '''
    url = 'http://www.66ip.cn/nmtq.php?getnum=512&isp=0&anonymoustype=0&start=&ports=&export=&ipaddress=&area=0&proxytype=2&api=66ip'
    try:
        html = get_html(url)
        ip_port = html.xpath('string(//body)').split('\r\n\t\t')[1:-2]
        proxies = []
        for i in ip_port:
            proxies.append(i)
    except Exception as e:
        logger.exception('error')
        traceback.print_exc()
        proxies = []
    if not proxies:
        logger.warning(' fail to fetch 66ip')
    return proxies
示例#8
0
def fetch_mimvp() -> list:
    '''
    抓取mimvp代理
    '''
    url = 'https://proxy.mimvp.com/free.php?proxy=in_hp'
    try:
        html = get_html(url)
        ip = html.xpath(
            '//div[@class="free-list"]/table/tbody/td[@class="tbl-proxy-ip"]/text()'
        )
        port = [80 for i in ip]
        proxies = []
        for l, r in zip(ip, port):
            proxies.append('{}:{}'.format(l, r))
    except Exception as e:
        logger.exception('error')
        traceback.print_exc()
        proxies = []
    if not proxies:
        logger.warning(' fail to fetch mimvp')
    return proxies
示例#9
0
def fetch_xici() -> list:
    '''
    抓取xici代理
    '''
    base_url = 'http://www.xicidaili.com/nn/{}'
    try:
        proxies = []
        for page in range(1, 3):
            url = base_url.format(page)
            html = get_html(url)
            ip = html.xpath('//tr[@class="odd"]/td[2]/text()')
            port = html.xpath('//tr[@class="odd"]/td[3]/text()')
            for l, r in zip(ip, port):
                proxies.append('{}:{}'.format(l, r))
    except Exception as e:
        logger.exception('error')
        traceback.print_exc()
        proxies = []
    if not proxies:
        logger.warning(' fail to fetch xici')
    return proxies
示例#10
0
def fetch_ip181() -> list:
    '''
    抓取ip181代理
    '''
    url = 'http://www.ip181.com/'
    try:
        html = get_html(url)
        tmp = [i.text for i in html.xpath('//tbody/tr/td')]
        res = [
            i for i in tmp
            if i and (i.isdigit() or ''.join(i.split('.')).isdigit())
        ]
        ip = [i for l, i in enumerate(res) if l % 2 == 0]
        port = [i for l, i in enumerate(res) if l % 2 == 1]
        proxies = []
        for l, r in zip(ip, port):
            proxies.append('{}:{}'.format(l, r))
    except Exception as e:
        logger.exception('error')
        traceback.print_exc()
        proxies = []
    if not proxies:
        logger.warning(' fail to fetch ip181')
    return proxies
示例#11
0
def fetch_data5u() -> list:
    '''
    抓取data5u代理
    '''
    url = 'http://www.data5u.com/free/gngn/index.shtml'
    try:
        html = get_html(url)
        tmp = [i.text for i in html.xpath('//ul[@class="l2"]/span/li')]
        res = [
            i for i in tmp
            if i and (i.isdigit() or ''.join(i.split('.')).isdigit())
        ]
        ip = [i for l, i in enumerate(res) if l % 2 == 0]
        port = [i for l, i in enumerate(res) if l % 2 == 1]
        proxies = []
        for l, r in zip(ip, port):
            proxies.append('{}:{}'.format(l, r))
    except Exception as e:
        logger.exception('error')
        traceback.print_exc()
        proxies = []
    if not proxies:
        logger.warning(' fail to fetch data5u')
    return proxies