def cmd5_get_result(self, r_url, r_header, r_data, rps): if "验证码错误" in rps: request_img_url = "https://www.cmd5.com/checkcode.aspx/0?" + self.cmd5_repe * "?" request_img_header = { "Host": "www.cmd5.com", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0", "Accept": "image/webp,*/*", "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", "Accept-Encoding": "gzip, deflate, br", "Referer": "https://www.cmd5.com/", "Connection": "keep-alive", "Pragma": "no-cache", "Cache-Control": "no-cache", "TE": "Trailers" } response_img = host.get(url=request_img_url, headers=request_img_header) open(self.path + '/img/cimg.gif', 'wb').write(response_img.content) # 将内容写入图片 del response_img code_demo = Code(img_path=self.path + "/img/cimg.gif", name="cmd5", code_num=4, search_num=1) code_result = code_demo.get_code() if self.cmd5_repe > 4: return 0, "验证码错误" if code_result["code"] == 0: self.cmd5_repe += 1 return self.cmd5_get_result(r_url, r_header, r_data, rps) else: r_data["ctl00$ContentPlaceHolder1$TextBoxCode"] = code_result[ "result"] response_md5 = host.post(url=r_url, headers=r_header, data=r_data) html_md5 = HTML(response_md5.text) text_md5 = html_md5.xpath( '//*[@id="ctl00_ContentPlaceHolder1_table3"]/tr/td/div/span//text()' ) self.cmd5_repe += 1 return self.cmd5_get_result(r_url, r_header, r_data, text_md5) elif rps[0] == "请": return 0, "需要登录" else: return 1, rps[0]
def get_crt(self): REQ_URL = "https://crt.sh/?q="+self.key REQ_HEADER = { "Host": "crt.sh", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "Pragma": "no-cache", "Cache-Control": "no-cache", } res = host.get(REQ_URL,REQ_HEADER) html = HTML(res.text) crt_shID = xfilter(html.xpath("//table[2]/tr[1]/td/a/text()")) Summary = xfilter(html.xpath("//table[2]/tr[2]/td/text()")) self.result["crt_shID"] = crt_shID self.result["Summary"] = Summary if not len(crt_shID) and not len(Summary): print({"value":None}) return ({"value": None}) ct_td_text = "//table[2]/tr[3]/td/div/table/tr/td[1]/table/tr" Certificate_Transparency = html.xpath(ct_td_text) ct_th = html.xpath(ct_td_text+"/th/text()") ct_len = len(Certificate_Transparency)-2 ct_tr_start_num = 3 self.result["Certificate_Transparency"] = get_table(ct_len, ct_tr_start_num, ct_th, html, ct_td_text) rt_td_text = "//table[2]/tr[4]/td/table/tr" Revocation = html.xpath(rt_td_text) rt_th = html.xpath(rt_td_text+"/th/text()") rt_len = len(Revocation)-1 rt_tr_start_num = 2 self.result["Revocation"] = get_table(rt_len,rt_tr_start_num,rt_th,html,rt_td_text) rf_td_text = "//table[2]/tr[5]/td/table/tr" RevoCertificate_Fingerprints= html.xpath(rf_td_text) rf_th = html.xpath(rf_td_text+"/th/text()") rf_len = len(RevoCertificate_Fingerprints) rf_tr_start_num = 1 self.result["RevoCertificate_Fingerprints"] = get_table(rf_len, rf_tr_start_num, rf_th, html, rf_td_text,self.table_gdata) self.result["Certificate "] = html.xpath("//table[2]/tr[6]/td//text()") return self.result
def md5_tellyou(self): request_url = "http://md5.tellyou.top/default.html" response = host.get(request_url) html = HTML(response.text) VIEWSTATE = html.xpath('//*[@id="__VIEWSTATE"]/@value') VIEWSTATEGENERATOR = html.xpath( '//*[@id="__VIEWSTATEGENERATOR"]/@value') EVENTVALIDATION = html.xpath('//*[@id="__EVENTVALIDATION"]/@value') MD5GET = "正在处理" request_header = { "Host": "md5.tellyou.top", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", "Accept-Encoding": "gzip, deflate", "Referer": "http://md5.tellyou.top/default.html", "Content-Type": "application/x-www-form-urlencoded", "Content-Length": "1512", "Origin": "http://md5.tellyou.top", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1" } request_data = { "__VIEWSTATE": VIEWSTATE, "__VIEWSTATEGENERATOR": VIEWSTATEGENERATOR, "__EVENTVALIDATION": EVENTVALIDATION, "Textmd5": self.Textmd5, "MD5GET": MD5GET } response_md5 = host.post(url=request_url, headers=request_header, data=request_data) html_md5 = HTML(response_md5.text) md5_text = html_md5.xpath('//td[@class="styleh"]/span[2]//text()') return 1, md5_text[0]
def get_icp(self): request_url_1 = "http://icp.chinaz.com/" + self.url_icp_companyName_q request_header = { "Host": "icp.chinaz.com", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", "Accept-Encoding": "gzip, deflate", "Connection": "keep-alive", "Referer": "http://icp.chinaz.com/" + self.url_icp_companyName_q, "Upgrade-Insecure-Requests": "1", "Pragma": "no-cache", "Cache-Control": "no-cache", } response_1 = host.get(url=request_url_1, headers=request_header) html = HTML(response_1.text) get_first_id = xfilter(html.xpath('//*[@id="first"]')) if get_first_id is not None: company_name = xfilter( html.xpath('//*[@id="first"]/li[1]/p/a/text()')) unit_properties = xfilter( html.xpath('//*[@id="first"]/li[2]/p/strong/text()')) icp_id = xfilter( html.xpath('//*[@id="first"]/li[3]/p/font/text()')) web_name = xfilter(html.xpath('//*[@id="first"]/li[4]/p/text()')) web_home_isture = xfilter( html.xpath('//*[@id="first"]/li[5]/span/text()')) if web_home_isture == "网站负责人": web_home_url = xfilter( html.xpath('//*[@id="first"]/li[6]/p/text()')) verify_time = xfilter( html.xpath('//*[@id="first"]/li[8]/p/text()')) else: web_home_url = xfilter( html.xpath('//*[@id="first"]/li[5]/p/text()')) verify_time = xfilter( html.xpath('//*[@id="first"]/li[7]/p/text()')) self.result["icp_data"] = { "company_name": company_name, "unit_properties": unit_properties, "icp_id": icp_id, "web_name": web_name, "web_home_isture": web_home_isture, "web_home_url": web_home_url, "verify_time": verify_time } else: self.result["icp_data"] = None request_header[ "Accept"] = "application/json, text/javascript, */*; q=0.01" request_header["Accept-Encoding"] = "gzip, deflate" request_header[ "Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8" request_header["X-Requested-With"] = "XMLHttpRequest" request_header["Content-Length"] = "13" request_header["Origin"] = "http://icp.chinaz.com" request_qiye_url = "http://icp.chinaz.com/Home/QiYeData" request_qiye_data = {"Kw": self.url_icp_companyName} response_qiye = host.post(url=request_qiye_url, data=request_qiye_data, headers=request_header) response_qiye_json = json.loads(response_qiye.text) if response_qiye_json["code"] == 200 and response_qiye_json[ "data"] is not None: self.result["company_data"] = response_qiye_json["data"] #获取企业的Icp备案 request_company_icp_url = "http://icp.chinaz.com/Home/PageData" request_company_icp_data = { "pageNo": "1", "pageSize": "10", "Kw": response_qiye_json["data"]["companyName"] } request_header["Content-Length"] = "150" response_company_icp = host.post(url=request_company_icp_url, data=request_company_icp_data, headers=request_header) response_company_icp_json = json.loads(response_company_icp.text) if response_company_icp_json["code"] == 200: self.result["company_other_icp"] = self.get_page_data( response_company_icp_json["data"], request_company_icp_url, request_company_icp_data, request_header, response_company_icp_json['amount'], response_company_icp_json['pageSize']) else: self.result["company_other_icp"] = response_company_icp_json[ "data"] #获取企业注销的icp company_delicp_result = {} request_company_delicp_url = "http://icp.chinaz.com/Home/PageDelData" request_company_delicp_data = { "pageNo": "1", "pageSize": "10", "Kw": response_qiye_json["data"]["companyName"] } response_company_delicp = host.post( url=request_company_delicp_url, data=request_company_delicp_data, headers=request_header) response_company_delicp_json = json.loads( response_company_delicp.text) if response_company_delicp_json["code"] == 200: self.result["company_delicp"] = self.get_page_data( response_company_delicp_json["data"], request_company_delicp_url, request_company_delicp_data, request_header, response_company_delicp_json['amount'], response_company_delicp_json['pageSize']) else: self.result["company_delicp"] = response_company_delicp_json[ "data"] else: self.result["company_data"] = response_qiye_json["data"] return self.result
def md5_cmd5(self): request_url = "https://www.cmd5.com/" response = host.get(request_url) html = HTML(response.text) EVENTTARGET_xpath = html.xpath('//*[@id="__EVENTTARGET"]/@value') EVENTTARGET = EVENTTARGET_xpath[0] if EVENTTARGET_xpath else "" EVENTARGUMENT_xpath = html.xpath('//*[@id="__EVENTARGUMENT"]/@value') EVENTARGUMENT = EVENTARGUMENT_xpath[0] if EVENTARGUMENT_xpath else "" VIEWSTATE_xpath = html.xpath('//*[@id="__VIEWSTATE"]/@value') VIEWSTATE = VIEWSTATE_xpath[0] if VIEWSTATE_xpath else "" VIEWSTATEGENERATOR_xpath = html.xpath( '//*[@id="__VIEWSTATEGENERATOR"]/@value') VIEWSTATEGENERATOR = VIEWSTATEGENERATOR_xpath[ 0] if VIEWSTATEGENERATOR_xpath else "" InputHashType = "md5" HiddenField1_xpath = html.xpath( '//*[@id="ctl00_ContentPlaceHolder1_HiddenField1"]/@value') HiddenField1 = HiddenField1_xpath[0] if HiddenField1_xpath else "" HiddenField2_xpath = html.xpath( '//*[@id="ctl00_ContentPlaceHolder1_HiddenField2"]/@value') HiddenField2 = HiddenField2_xpath[0] if HiddenField2_xpath else "" request_data = { "__EVENTTARGET": EVENTTARGET, "__EVENTARGUMENT": EVENTARGUMENT, "__VIEWSTATE": VIEWSTATE, "__VIEWSTATEGENERATOR": VIEWSTATEGENERATOR, "ctl00$ContentPlaceHolder1$TextBoxInput": self.Textmd5, "ctl00$ContentPlaceHolder1$InputHashType": InputHashType, "ctl00$ContentPlaceHolder1$Button1": "查询", "ctl00$ContentPlaceHolder1$HiddenField1": HiddenField1, "ctl00$ContentPlaceHolder1$HiddenField2": HiddenField2 } request_header = { "Host": "www.cmd5.com", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2", "Accept-Encoding": "gzip, deflate, br", "Content-Type": "application/x-www-form-urlencoded", "Content-Length": "2631", "Origin": "https://www.cmd5.com", "Connection": "keep-alive", "Referer": "https://www.cmd5.com/", "Upgrade-Insecure-Requests": "1" } response_md5 = host.post(url=request_url, headers=request_header, data=request_data) html_md5 = HTML(response_md5.text) text_md5 = html_md5.xpath( '//*[@id="ctl00_ContentPlaceHolder1_table3"]/tr/td/div/span//text()' ) result_md5 = self.cmd5_get_result(request_url, request_header, request_data, text_md5) return result_md5
def md5_pmd5(self): request_img_url = "https://api.pmd5.com/pmd5api/checkcode" res = host.get(url=request_img_url) if res.status_code == 200: if len(res.text) < 100: #验证码请求失败 res_text = json.loads(res.text) return 0, res_text["msg"] open(self.path + '/img/img.png', 'wb').write(res.content) # 将内容写入图片 code_demo = Code(img_path=self.path + "/img/img.png", code_num=4, search_num=3) code_result = code_demo.get_code() if self.pmd5_repe > 15: return 0, "验证码错误" if code_result["code"] == 0: time.sleep(1) self.pmd5_repe += 1 return (self.md5_pmd5()) else: cookies = res.cookies cookie = utils.dict_from_cookiejar(cookies) ck = "" for k in cookie: ck += k + "=" + cookie[k] + ";" del res request_img_header = { "accept": "*/*", "accept-encoding": "gzip, deflate, br", "accept-language": "zh-CN,zh;q=0.9", "cookie": ck, "origin": "https://pmd5.com", "referer": "https://pmd5.com/", "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-site", "user-agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36" } request_md5_url = "https://api.pmd5.com/pmd5api/pmd5?checkcode=" + code_result[ "result"] + "&pwd=" + self.Textmd5 reponse_md5 = host.get(url=request_md5_url, headers=request_img_header) result = json.loads(reponse_md5.text) if result["code"] == 403: time.sleep(2) self.pmd5_repe += 1 return (self.md5_pmd5()) else: if result["code"] == 0: return 1, result["result"][self.Textmd5] else: return 0, result else: return 0, "pmd5_request_error"