#!/usr/bin/env python3 # -*- coding: utf-8 -*- __author__ = "7sDream" import functools import re import os import requests from bs4 import BeautifulSoup as _Bs try: __import__("lxml") BeautifulSoup = lambda makeup: _Bs(makeup, "lxml") except ImportError: BeautifulSoup = lambda makeup: _Bs(makeup, "html.parser") Default_Header = { "X-Requested-With": "XMLHttpRequest", "Referer": "http://www.zhihu.com", "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; " "rv:39.0) Gecko/20100101 Firefox/39.0", "Host": "www.zhihu.com", } Zhihu_URL = "http://www.zhihu.com" Login_URL = Zhihu_URL + "/login/email" Captcha_URL_Prefix = Zhihu_URL + "/captcha.gif?r=" Get_Profile_Card_URL = "http://www.zhihu.com/node/MemberProfileCardV2" Get_More_Answer_URL = "http://www.zhihu.com/node/QuestionAnswerListV2" Get_More_Followers_URL = "http://www.zhihu.com/node/ProfileFollowersListV2"
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import functools import re import os from requests import Session from bs4 import BeautifulSoup as _Bs from bs4 import Tag, NavigableString try: __import__('lxml') BeautifulSoup = lambda makeup: _Bs(makeup, 'lxml') except ImportError: BeautifulSoup = lambda makeup: _Bs(makeup, 'html.parser') Default_Header = { 'X-Requested-With': 'XMLHttpRequest', 'Referer': 'http://www.zhihu.com', 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; ' 'rv:39.0) Gecko/20100101 Firefox/39.0', 'Host': 'www.zhihu.com' } Zhihu_URL = 'http://www.zhihu.com' Login_URL = Zhihu_URL + '/login/email' Captcha_URL_Prefix = Zhihu_URL + '/captcha.gif?r=' Get_Profile_Card_URL = Zhihu_URL + '/node/MemberProfileCardV2' Question_Get_More_Answer_URL = Zhihu_URL + '/node/QuestionAnswerListV2' Answer_Add_Comment_URL = Zhihu_URL + '/node/AnswerCommentAddV2'
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import functools import re import os from requests import Session from bs4 import BeautifulSoup as _Bs from bs4 import Tag, NavigableString from requests.packages.urllib3.util import Retry try: __import__('lxml') BeautifulSoup = lambda makeup: _Bs(makeup, 'lxml') except ImportError: BeautifulSoup = lambda makeup: _Bs(makeup, 'html.parser') Default_Header = {'X-Requested-With': 'XMLHttpRequest', 'Referer': 'http://www.zhihu.com', 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; ' 'rv:39.0) Gecko/20100101 Firefox/39.0', 'Host': 'www.zhihu.com'} Zhihu_URL = 'https://www.zhihu.com' Login_URL = Zhihu_URL + '/login/email' Captcha_URL_Prefix = Zhihu_URL + '/captcha.gif?r=' Get_Profile_Card_URL = Zhihu_URL + '/node/MemberProfileCardV2' Question_Get_More_Answer_URL = Zhihu_URL + '/node/QuestionAnswerListV2' Answer_Add_Comment_URL = Zhihu_URL + '/node/AnswerCommentAddV2' Answer_Comment_Box_URL = Zhihu_URL + '/node/AnswerCommentBoxV2'
"""Usage:link-verifier.py <pageUrl> Example: linkVerifier.py https://www.python.org """ import re import sys import chardet import requests from urllib.parse import urlparse, urlunparse from concurrent.futures import ThreadPoolExecutor, as_completed from bs4 import BeautifulSoup as _Bs try: __import__('lxml') BeautifulSoup = (lambda markup: _Bs(markup, 'lxml')) except ImportError: BeautifulSoup = (lambda markup: _Bs(markup, 'html.parser')) TIMEOUT = 6 HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:48.0) Gecko/20100101 Firefox/48.0' } PATTERN_URL = re.compile( r'(?:(?:(?:https?|ftp|file):)?//)?[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]' ) def download(url, params=None, **kwargs): if 'timeout' not in kwargs: