__author__ = 'zhangxa' from curl import Curl import pycurl from html.parser import HTMLParser from htmlParser.htmlParser import UrlHtmlParser from download.downFile import DownFile from urlHandler.urlHandler import UrlBaseHandler from urlQueue.urlQueue import UrlQueue start_url = "http://www.pcgames.com.cn/" c = Curl() c.set_url(start_url) data = c.get() info = c.info() #print(info) def get_charset(c_type): charset = None try: if c_type and 'charset' in c_type: start = c_type.find('charset=') charset_str = c_type[start:] end = charset_str.find(' ') if end > -1: charset = charset_str[len('charset='):end] else: charset = charset_str[len('charset='):] except:
def pycurl_detection(url, ip): ''' 探测程序 :param url: 请求地址 :param ip: dig ip :return: 状态码,响应时间 ''' try: domain = getDomain(url) # protol=getProtol(url) # path = getPath(url) # print("="*74+"pycurl_detect ion\n") new_url = url.replace(domain, ip) # print("url:"+url,"ip:"+ip) # print("domain:"+domain) # print("path:"+path) # print("new_url:"+new_url) header = [ 'GET %s HTTP/1.1' % path, 'Host: %s' % domain, 'Accept: */*', 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'Accept-Encoding: gzip, deflate', 'Connection: keep-alive', 'Cache-Control: no-cache', 'User-Agent: Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)', ] if url.find("baidu.com") > 0: header.pop(len(header) - 1) c = Curl(fakeheaders=header) c.get(new_url) http_code = c.get_info(pycurl.HTTP_CODE) # 返回的HTTP状态码 # print("返回的HTTP状态码:%s"%http_code) size_download = c.get_info(pycurl.SIZE_DOWNLOAD) # 下载数据包大小 speed_download = c.get_info(pycurl.SPEED_DOWNLOAD) # 平均下载速度 file_time = c.get_info(pycurl.INFO_FILETIME) # 检索文档的远程时间 namelookup_time = c.get_info(pycurl.NAMELOOKUP_TIME) # DNS解析所消耗的时间 content_time = c.get_info(pycurl.CONNECT_TIME) # 建立连接所消耗的时间 pretransfer_time = c.get_info( pycurl.PRETRANSFER_TIME) # 从建立连接到准备传输所消耗的时间 starttransfer_time = c.get_info( pycurl.STARTTRANSFER_TIME) # 从建立连接到传输开始消耗的时间 total_time = c.get_info(pycurl.TOTAL_TIME) # 传输结束所消耗的总时间 redirect_time = c.get_info(pycurl.REDIRECT_TIME) # 重定向所消耗的时间 redirect_url = c.get_info(pycurl.REDIRECT_URL) # 重定向url redirect_count = c.get_info(pycurl.REDIRECT_COUNT) # 重定向次数 primary_ip = '' # c.get_info(pycurl.PRIMARY_IP) primary_port = '' # c.get_info(pycurl.PRIMARY_PORT) local_ip = '' #c.get_info(pycurl.LOCAL_IP) local_port = '' # c.get_info(pycurl.LOCAL_PORT) info = c.info() header = c.header() str = ''' url:%s,ip:%s,size_download:%s,speed_download:%s,file_time:%s,redirect_count:%s, namelookup_time:%s,content_time:%s,pretransfer_time:%s,starttransfer_time:%s,total_time:%s,redirect_time:%s redirect url:%s,count:%s primary ip:%s,port:%s local ip:%s,port:%s info:%s ''' % (url, ip, size_download, speed_download, file_time, redirect_count, namelookup_time, content_time, pretransfer_time, starttransfer_time, total_time, redirect_time, redirect_url, redirect_count, primary_ip, primary_port, local_ip, local_port, info) print(str) ''' #print("传输结束所消耗的总时间:%s" % total_time) namelookup_time=c.get_info(pycurl.NAMELOOKUP_TIME) # DNS解析所消耗的时间 #print("DNS解析所消耗的时间:%s" % namelookup_time) content_time=c.get_info(pycurl.CONNECT_TIME) # 建立连接所消耗的时间 #print("建立连接所消耗的时间:%s" % content_time) pretransfer_time=c.get_info(pycurl.PRETRANSFER_TIME) # 从建立连接到准备传输所消耗的时间 #print("从建立连接到准备传输所消耗的时间:%s" % pretransfer_time) starttransfer_time=c.get_info(pycurl.STARTTRANSFER_TIME) # 从建立连接到传输开始消耗的时间 #print("从建立连接到传输开始消耗的时间:%s" % starttransfer_time) redirect_time=c.get_info(pycurl.REDIRECT_TIME) # 重定向所消耗的时间 #print("重定向所消耗的时间:%s" % redirect_time) size_upload=c.get_info(pycurl.SIZE_UPLOAD) # 上传数据包大小 size_download=c.get_info(pycurl.SIZE_DOWNLOAD) # 下载数据包大小 speed_download=c.get_info(pycurl.SPEED_DOWNLOAD) # 平均下载速度 speed_upload=c.get_info(pycurl.SPEED_UPLOAD) # 平均上传速度 header_size=c.get_info(pycurl.HEADER_SIZE) # HTTP头部大小 print(c.body()) print('=' * 74 + '\n') print(c.header()) print('=' * 74 + '\n') import pprint pprint.pprint(c.info()) print(c.get_info(pycurl.OS_ERRNO)) print(c.info()['os-errno']) ''' except Exception as e: str = "def pycurl_detection(%s,%s) Exception %s" % (url, ip, e.args) print(str) logging.exception(str) return -1, -1, 0, 0, 0 finally: c.close() return http_code, "%.3f" % total_time, size_download, speed_download, redirect_count
from curl import Curl import os import sys if __name__ == "__main__": if len(sys.argv) < 2: url = 'http://curl.haxx.se' else: url = sys.argv[1] c = Curl() c.get(url) print c.body() print '=' * 74 + '\n' import pprint pprint.pprint(c.info()) print c.get_info(pycurl.OS_ERRNO) print c.info()['os-errno'] c.close()
__author__ = 'zhangxa' from curl import Curl import pycurl from html.parser import HTMLParser from htmlParser.htmlParser import UrlHtmlParser from download.downFile import DownFile from urlHandler.urlHandler import UrlBaseHandler from urlQueue.urlQueue import UrlQueue start_url = "http://www.pcgames.com.cn/" c = Curl() c.set_url(start_url) data = c.get() info = c.info() #print(info) def get_charset(c_type): charset=None try: if c_type and 'charset' in c_type: start = c_type.find('charset=') charset_str = c_type[start:] end = charset_str.find(' ') if end > -1: charset = charset_str[len('charset='):end] else: charset = charset_str[len('charset='):] except: return 'UTF-8'