def download_proxy(testUrl, proxyFilePath, counts, startPage=1, timeout=5): ''' :param testUrl: Check whether the agent can request the URL normally :param proxyFilePath: Finally save the path to the proxy file :param counts: Page counts :param startPage: The number of pages starting :param timeout: timeout ''' getLoop = [] checkLoop = [] for page in xrange(startPage, startPage + counts): target = "http://www.xicidaili.com/nn/%d" % page t = ProxyGet(target) getLoop.append(t) t.start() for t in getLoop: t.join() logger.info("raw proxy list - %s" % len(_RAW_PROXY_LIST)) logger.debug('check url [-] %s' % testUrl) i = 0 while i < len(_RAW_PROXY_LIST): t = ProxyCheck(_RAW_PROXY_LIST[i:i + 10], testUrl, timeout) i += 10 checkLoop.append(t) t.start() for t in checkLoop: t.join() logger.info("checked proxy list - %s" % len(_CHECKED_PROXY_LIST)) with open(proxyFilePath, 'w+') as f: for i in _CHECKED_PROXY_LIST: f.write(i + '\n') logger.info("write to file succeed [-] %s" % proxyFilePath)
def getProxy(self): logger.debug('target [-] %s' % self.target) session = requests.session() response = session.get(url=self.target, headers=_HEADERS) response.encoding = 'utf-8' result = re.findall(_PATTERN, response.text, re.DOTALL) for row in result: ip = row[0] port = row[1] agent = row[2].lower() proxy = (agent, ip, port) _RAW_PROXY_LIST.append(proxy)
def checkProxy(self): session = requests.session() for proxy in self.proxyList: proxies = {proxy[0]: "%s://%s:%s" % (proxy[0], proxy[1], proxy[2])} try: res = session.get(url=self.testUrl, proxies=proxies, headers=_HEADERS, timeout=self.timeout) if res.status_code == 200: logger.debug('checked [-] %s' % proxies[proxy[0]]) _CHECKED_PROXY_LIST.append("%s://%s:%s" % (proxy[0], proxy[1], proxy[2])) else: continue except: continue
def download(self, method, url, proxyEnable=False, **kwargs): ''' :param method: 'GET','POST','PUT','DELETE','HEAD','OPTIONS' :param url: url :param proxyEnable: use proxy or not :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request` :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request` :param json: (optional) json data to send in the body of the :class:`Request` :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request` :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request` :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': file-tuple}``) for multipart encoding upload.``file-tuple`` can be a 2-tuple ``('filename', fileobj)``, 3-tuple ``('filename', fileobj, 'content_type')``or a 4-tuple ``('filename', fileobj, 'content_type', custom_headers)``, where ``'content-type'`` is a string defining the content type of the given file and ``custom_headers`` a dict-like object containing additional headers to add for the file :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth :param timeout: (optional) How long to wait for the server to send data before giving up, as a float, or a :ref:`(connect timeout, read timeout) <timeouts>` tuple <float or tuple> :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed <class bool> :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy :param verify: (optional) whether the SSL cert will be verified. A CA_BUNDLE path can also be provided. Defaults to ``True`` :param stream: (optional) if ``False``, the response content will be immediately downloaded :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair :return: Response if failed Response=None ''' if (not proxyEnable) or (proxyEnable and not self.proxies): if proxyEnable and not self.proxies: logger.warning('No initialization proxy file or proxy file is not available') try: return requests.request(method, url, **kwargs) except Exception as e: logger.warning(e) else: try: oneProxy = self.proxies.pop(0) self.proxies.append(oneProxy) key = oneProxy.split(":")[0] oneProxy = {key: oneProxy} logger.debug('USE PROXY [-] %s' % oneProxy.values()[0]) return requests.request(method, url, proxies=oneProxy, **kwargs) except ProxyError: return self.download(method, url, proxyEnable, **kwargs) except Exception as e: logger.warning(e)
# coding: utf-8 from dplog import logger # ----参数含义参看 README.md---- logger.LOG_LEVEL = 10 # ----参数:输出到控制台(不建议修改)---- logger.IS_CONSOLE = (True, True) logger.COLOR_ERROR = ('red', None, 'bold') logger.COLOR_WARNING = ('yellow', None, 'bold') logger.COLOR_INFO = ('cyan', None, 'bold') logger.COLOR_DEBUG = ('green', None, 'bold') # ----参数:日志写入部分---- logger.FILE_ERROR = None logger.FILE_WARNING = None logger.FILE_INFO = None logger.FILE_DEBUG = None logger.FILE_LOG = None logger.FILE_MAX_BYTES = 128 * 1024 * 1024 logger.FILE_BACKUP_COUNT = 10 # ----参数:日志格式(不建议修改)---- logger.LOG_FORMAT = '[%(levelname)s] %(asctime)s %(message)s' logger.TIME_FORMAT = "%Y-%m-%d %H:%M:%S" logger.FULL_FILE_PATH = False if __name__ == '__main__': logger.error("123456789") logger.warning("123456789") logger.debug("123456789") logger.info("123456789")