Exemplo n.º 1
0
def get_toppost100(params):
    while True:
        proxy_ip = cpn.get_proxy_ip()
        try:
            resp = requests.get(toppost100_url,
                                params=params,
                                headers=toppost100_headers,
                                proxies=proxy_ip,
                                timeout=5)
            if resp is not None:
                print("抓取:" + resp.request.url)
                soup = cpn.get_bs(resp.text)
                ul = soup.find(
                    'ul',
                    attrs={
                        'class':
                        'l-clearfix gridList workImageCards js-workTopList'
                    })
                lis = ul.findAll('li')
                for li in lis:
                    img = li.find('img', attrs={'class':
                                                'cardImage'})['src'][:-4]
                    if not img == '':
                        name = li.find('p', attrs={
                            'class': 'fz14 text cut'
                        }).get_text().strip()
                        if name == '':
                            name = str(int(time.time()))
                        cpn.write_str_data(name + "Θ" + img, pic_urls_file)
                return None
        except Exception as e:
            print(threading.current_thread().name + "~" + str(e))
Exemplo n.º 2
0
def get_ajax_data(data):
    while True:
        proxy_ip = cpn.get_proxy_ip()
        try:
            resp = requests.post(ajax_url,
                                 data=data,
                                 headers=ajax_headers,
                                 proxies=proxy_ip,
                                 timeout=5)
            if resp is not None:
                soup = cpn.get_bs(resp.text)
                lis = soup.findAll('li')
                for li in lis:
                    img = li.find('img', attrs={'class':
                                                'cardImage'})['src'][:-4]
                    if not img == '':
                        name = li.find('p', attrs={
                            'class': 'fz14 text cut'
                        }).get_text().strip()
                        if name == '':
                            name = str(int(time.time()))
                        cpn.write_str_data(name + "Θ" + img, pic_urls_file)
                return None
        except Exception as e:
            print(threading.current_thread().name + "~" + str(e))
Exemplo n.º 3
0
def get_article_url(url):
    try:
        resp = requests.get(url, headers=headers, timeout=5)
        if resp is not None:
            print("解析:" + resp.request.url)
            soup = cpn.get_bs(resp.text)
            div = soup.find('div', attrs={'id': 'article_list'})
            spans = div.findAll('span', attrs={'class': 'link_title'})
            for span in spans:
                cpn.write_str_data(base_url + span.find('a')['href'], articles_file)
            return None
    except Exception as e:
        print(str(e))
Exemplo n.º 4
0
def get_pics(count):
    while True:
        params = {'pn': count, 'ajax': '1', 't': int(time.time())}
        try:
            resp = requests.get(tiezi_url,
                                headers=headers,
                                timeout=5,
                                params=params)
            if resp is not None:
                soup = cpn.get_bs(resp.text)
                imgs = soup.findAll('img', attrs={'class': 'BDE_Image'})
                for img in imgs:
                    cpn.write_str_data(img['src'], pic_urls_file)
                return None
        except Exception as e:
            pass
    pass
Exemplo n.º 5
0
def get_pics(count):
    while True:
        params = {
            'pn': count,
            'ajax': '1',
            't': int(time.time())
        }
        try:
            resp = requests.get(tiezi_url, headers=headers, timeout=5, params=params)
            if resp is not None:
                soup = cpn.get_bs(resp.text)
                imgs = soup.findAll('img', attrs={'class': 'BDE_Image'})
                for img in imgs:
                    cpn.write_str_data(img['src'], pic_urls_file)
                return None
        except Exception as e:
            pass
    pass
Exemplo n.º 6
0
def get_ajax_data(data):
    while True:
        proxy_ip = cpn.get_proxy_ip()
        try:
            resp = requests.post(ajax_url, data=data, headers=ajax_headers, proxies=proxy_ip, timeout=5)
            if resp is not None:
                soup = cpn.get_bs(resp.text)
                lis = soup.findAll('li')
                for li in lis:
                    img = li.find('img', attrs={'class': 'cardImage'})['src'][:-4]
                    if not img == '':
                        name = li.find('p', attrs={'class': 'fz14 text cut'}).get_text().strip()
                        if name == '':
                            name = str(int(time.time()))
                        cpn.write_str_data(name + "Θ" + img, pic_urls_file)
                return None
        except Exception as e:
            print(threading.current_thread().name + "~" + str(e))
Exemplo n.º 7
0
def get_toppost100(params):
    while True:
        proxy_ip = cpn.get_proxy_ip()
        try:
            resp = requests.get(toppost100_url, params=params, headers=toppost100_headers, proxies=proxy_ip, timeout=5)
            if resp is not None:
                print("抓取:" + resp.request.url)
                soup = cpn.get_bs(resp.text)
                ul = soup.find('ul', attrs={'class': 'l-clearfix gridList workImageCards js-workTopList'})
                lis = ul.findAll('li')
                for li in lis:
                    img = li.find('img', attrs={'class': 'cardImage'})['src'][:-4]
                    if not img == '':
                        name = li.find('p', attrs={'class': 'fz14 text cut'}).get_text().strip()
                        if name == '':
                            name = str(int(time.time()))
                        cpn.write_str_data(name + "Θ" + img, pic_urls_file)
                return None
        except Exception as e:
            print(threading.current_thread().name + "~" + str(e))