Exemplo n.º 1
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = "7sDream"

import functools
import re
import os

import requests
from bs4 import BeautifulSoup as _Bs

try:
    __import__("lxml")
    BeautifulSoup = lambda makeup: _Bs(makeup, "lxml")
except ImportError:
    BeautifulSoup = lambda makeup: _Bs(makeup, "html.parser")

Default_Header = {
    "X-Requested-With": "XMLHttpRequest",
    "Referer": "http://www.zhihu.com",
    "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; " "rv:39.0) Gecko/20100101 Firefox/39.0",
    "Host": "www.zhihu.com",
}

Zhihu_URL = "http://www.zhihu.com"
Login_URL = Zhihu_URL + "/login/email"
Captcha_URL_Prefix = Zhihu_URL + "/captcha.gif?r="
Get_Profile_Card_URL = "http://www.zhihu.com/node/MemberProfileCardV2"
Get_More_Answer_URL = "http://www.zhihu.com/node/QuestionAnswerListV2"
Get_More_Followers_URL = "http://www.zhihu.com/node/ProfileFollowersListV2"
Exemplo n.º 2
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import functools
import re
import os

from requests import Session
from bs4 import BeautifulSoup as _Bs
from bs4 import Tag, NavigableString

try:
    __import__('lxml')
    BeautifulSoup = lambda makeup: _Bs(makeup, 'lxml')
except ImportError:
    BeautifulSoup = lambda makeup: _Bs(makeup, 'html.parser')

Default_Header = {
    'X-Requested-With': 'XMLHttpRequest',
    'Referer': 'http://www.zhihu.com',
    'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; '
    'rv:39.0) Gecko/20100101 Firefox/39.0',
    'Host': 'www.zhihu.com'
}

Zhihu_URL = 'http://www.zhihu.com'
Login_URL = Zhihu_URL + '/login/email'
Captcha_URL_Prefix = Zhihu_URL + '/captcha.gif?r='
Get_Profile_Card_URL = Zhihu_URL + '/node/MemberProfileCardV2'
Question_Get_More_Answer_URL = Zhihu_URL + '/node/QuestionAnswerListV2'
Answer_Add_Comment_URL = Zhihu_URL + '/node/AnswerCommentAddV2'
Exemplo n.º 3
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import functools
import re
import os

from requests import Session
from bs4 import BeautifulSoup as _Bs
from bs4 import Tag, NavigableString
from requests.packages.urllib3.util import Retry

try:
    __import__('lxml')
    BeautifulSoup = lambda makeup: _Bs(makeup, 'lxml')
except ImportError:
    BeautifulSoup = lambda makeup: _Bs(makeup, 'html.parser')

Default_Header = {'X-Requested-With': 'XMLHttpRequest',
                  'Referer': 'http://www.zhihu.com',
                  'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; '
                                'rv:39.0) Gecko/20100101 Firefox/39.0',
                  'Host': 'www.zhihu.com'}

Zhihu_URL = 'https://www.zhihu.com'
Login_URL = Zhihu_URL + '/login/email'
Captcha_URL_Prefix = Zhihu_URL + '/captcha.gif?r='
Get_Profile_Card_URL = Zhihu_URL + '/node/MemberProfileCardV2'
Question_Get_More_Answer_URL = Zhihu_URL + '/node/QuestionAnswerListV2'
Answer_Add_Comment_URL = Zhihu_URL + '/node/AnswerCommentAddV2'
Answer_Comment_Box_URL = Zhihu_URL + '/node/AnswerCommentBoxV2'
Exemplo n.º 4
0
"""Usage:link-verifier.py <pageUrl>
Example:
    linkVerifier.py https://www.python.org
"""

import re
import sys
import chardet
import requests
from urllib.parse import urlparse, urlunparse
from concurrent.futures import ThreadPoolExecutor, as_completed
from bs4 import BeautifulSoup as _Bs

try:
    __import__('lxml')
    BeautifulSoup = (lambda markup: _Bs(markup, 'lxml'))
except ImportError:
    BeautifulSoup = (lambda markup: _Bs(markup, 'html.parser'))

TIMEOUT = 6
HEADERS = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:48.0) Gecko/20100101 Firefox/48.0'
}
PATTERN_URL = re.compile(
    r'(?:(?:(?:https?|ftp|file):)?//)?[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]'
)


def download(url, params=None, **kwargs):
    if 'timeout' not in kwargs: