def _work(self, entry_url): try: logger.info("[{}] req = > {}".format(len(self.done_url_list), entry_url)) if utils.url_ext(entry_url) in self.ignore_ext: return URLsimilarList() conn = utils.http_req(entry_url) if conn.status_code in [301, 302, 307]: _url = urljoin(entry_url, conn.headers.get("Location", "")).strip() _url = utils.normal_url(_url) if _url is None: return URLsimilarList() url_info = URLinfo(entry_url, _url, URLTYPE.document) if utils.same_netloc(entry_url, _url) and (url_info not in self.done_url_list): entry_url = _url logger.info("[{}] req 302 = > {}".format( len(self.done_url_list), entry_url)) conn = utils.http_req(_url) self.done_url_list.add(url_info) self.all_url_list.add(url_info) html = conn.content if "html" not in conn.headers.get("Content-Type", "").lower(): return URLsimilarList() dom = pq(html) ret_url = URLsimilarList() for tag in self.tagMap: items = dom(tag['name']).items() for i in items: _url = urljoin(entry_url, i.attr(tag['attr'])).strip() _url = utils.normal_url(_url) if _url is None: continue _type = tag["type"] if utils.same_netloc(_url, entry_url): url_info = URLinfo(entry_url, _url, _type) ret_url.add(url_info) self.all_url_list.add(url_info) return ret_url except Exception as e: logger.error("error on {} {}".format(entry_url, e)) return URLsimilarList()
def quota(self): auth = (self.auth_email, self.auth_key) conn = utils.http_req(self.quota_api, auth=auth) data = conn.json() count = data["user"]["counts"]["search_api"] limit = data["user"]["limits"]["search_api"] return count, limit
def work(self, site): _, hostname, _ = get_host(site) conn = utils.http_req(site) item = { "site": site, "hostname": hostname, "ip":"", "title": utils.get_title(conn.content), "status": conn.status_code, "headers": utils.get_headers(conn), "http_server": conn.headers.get("Server", ""), "body_length": len(conn.content), "finger": [], "favicon": fetch_favicon(site) } domain_parsed = utils.domain_parsed(hostname) if domain_parsed: item["fld"] = domain_parsed["fld"] ips = utils.get_ip(hostname) if ips: item["ip"] = ips[0] else: item["ip"] = hostname self.site_info_list.append(item) if conn.status_code == 301 or conn.status_code == 302: url_302 = urljoin(site, conn.headers.get("Location", "")) if url_302 != site and url_302.startswith(site): self.work(url_302)
def get_domains(): url = "http://10.0.83.77:5018/domain/?task_id=5f2298aa6591e770f69e8f62&source=altdns&size=2000" data = utils.http_req(url).json() items = data["items"] domains = [x["domain"] for x in items] print(domains) return services.probe_http(domains)
def __init__(self, cluster): self.api = cluster.sunstone_api.replace('http://', '') cookie_req = http_req(host=self.api, uri='/login', method='POST', headers={"Authorization": cluster.sunstone_auth}) for x in cookie_req.getheaders(): if x[0] == "set-cookie": cookie = x[1].split(';')[0] self.headers = {"Cookie": cookie}
def search(self, domain): param = {"output": "json", "q": domain} data = utils.http_req(self.url, 'get', params=param, timeout=(30.1, 50.1)).json() return data
def result_num(self): url = self.search_url.format(page=0, keyword=quote(self.keyword)) #logger.info("search url {}".format(url)) html = utils.http_req(url).text self.first_html = html result = re.findall(self.num_pattern, html) num = int("".join(result[0].split(","))) self.search_result_num = num return num
def get_favicon_data(self, favicon_url): conn = http_req(favicon_url) if "/favicon.ico" in favicon_url: if conn.headers.get("Content-Type", "") == "image/x-icon": data = self.encode_bas64_lines(conn.content) return data else: if "image" in conn.headers.get("Content-Type", ""): data = self.encode_bas64_lines(conn.content) return data
def check(self, url): conn = utils.http_req(url, method="head", timeout=self.timeout) if conn.status_code == 400: return None if (conn.status_code >= 501) and (conn.status_code < 600): return None if conn.status_code == 403: conn2 = utils.http_req(url) check = b'</title><style type="text/css">body{margin:5% auto 0 auto;padding:0 18px}' if check in conn2.content: return None item = { "status": conn.status_code, "content-type": conn.headers.get("Content-Type", "") } return item
def get_favicon_data(self, favicon_url): conn = http_req(favicon_url) if conn.status_code != 200: return if len(conn.content) <= 80: logger.debug("favicon content len lt 100") return if "image" in conn.headers.get("Content-Type", ""): data = self.encode_bas64_lines(conn.content) return data
def get_urls(): url = "http://10.0.83.77:5018/site/?page=1&hostname=baidu.com&size=6000" data = utils.http_req(url).json() items = data["items"] urls = [] print(len(items)) for item in items: urls.append(item["site"]) with open("../arl_tool/urls2.txt", "w") as f: for x in set(urls): f.write(x + "\n")
def test_fetch_fingerprint(self): site = "https://www.baidu.com/" conn = utils.http_req(site) headers = utils.get_headers(conn) title = utils.get_title(conn.content) finger_list = [ { "name": "百度测试", "rule": { "html": [ "百度" ], "title": [], "headers": [], "favicon_hash": [] } }, { "name": "百度测试2", "rule": { "html": [], "title": ["百度222222", "百度"], "headers": [], "favicon_hash": [] } }, { "name": "百度测试3", "rule": { "html": [], "title": [], "headers": ["TTTBAIDUIDTTT", "BAIDUID"], "favicon_hash": [] } }, { "name": "百度测试4", "rule": { "html": [], "title": [], "headers": [], "favicon_hash": [789, 123456] } } ] finger_list.extend(load_fingerprint()) result = fetch_fingerprint(content=conn.content, headers=headers, title=title, favicon_hash=789, finger_list=finger_list) self.assertTrue(len(result) >= 4) self.assertTrue(result[0] == finger_list[0]["name"]) self.assertTrue(result[3] == finger_list[3]["name"])
def match_urls(self, html): dom = pq(html) result_items = dom(self.pq_query).items() urls_result = [item.attr("href") for item in result_items] urls = set() for u in urls_result: try: resp = utils.http_req(u, "head") real_url = resp.headers.get('Location') if real_url: urls.add(real_url) except Exception as e: logger.exception(e) return list(urls)
def search_subdomain(self, target): params = {"query": "*.{}".format(target)} auth = (self.auth_email, self.auth_key) conn = utils.http_req(self.subdomain_api, params=params, auth=auth, timeout=(20, 120)) data = conn.json() subdomains = [] for item in data['subdomains']: item = item.strip("*.") domain = "{}.{}".format(item, target) if utils.domain_parsed(domain): subdomains.append(domain) return list(set(subdomains))
def find_icon_url_from_html(self): conn = http_req(self.url) if b"<link" not in conn.content: return d = pq(conn.content) links = d('link').items() icon_link_list = [] for link in links: if link.attr("href") and 'icon' in link.attr("rel"): icon_link_list.append(link) for link in icon_link_list: if "shortcut" in link: return urljoin(self.url, link.attr('href')) if icon_link_list: return urljoin(self.url, icon_link_list[0].attr('href'))
def run(self): self.result_num() logger.info("baidu search {} results found for keyword {}".format( self.search_result_num, self.keyword)) urls = [] for page in range( 1, min(int(self.search_result_num / 10) + 2, self.page_num + 1)): if page == 1: _urls = self.match_urls(self.first_html) logger.info("baidu firsturl result {}".format(len(_urls))) else: time.sleep(self.default_interval) url = self.search_url.format(page=(page - 1) * 10, keyword=quote(self.keyword)) html = utils.http_req(url).text _urls = self.match_urls(html) logger.info("baidu search url {}, result {}".format( url, len(_urls))) urls.extend(_urls) return urls
def match_urls(self, html): dom = pq(html) result_items = dom(self.pq_query).items() urls_result = [ urljoin(self.base_search_url, item.attr("href")) for item in result_items ] urls = set() if urls_result: for u in urls_result: try: resp = utils.http_req(u, "head", allow_redirects=False, verify=False) real_url = resp.headers.get('Location') if real_url: urls.add(real_url) except Exception as e: continue return list(urls)
def req(self): content = b'' conn = utils.http_req(self.url.url, 'get', timeout=(3, 6), stream=True) self.conn = conn start_time = time.time() for data in conn.iter_content(chunk_size=512): if time.time() - start_time >= self.read_timeout: break content += data if len(content) >= int(self.max_length): break self.status_code = conn.status_code self.content = content[:self.max_length] content_len = self.conn.headers.get("Content-Length", len(self.content)) self.conn.headers["Content-Length"] = content_len conn.close() return self.status_code, self.content
def dingding_send(msg, access_token, secret, msgtype="text", title="灯塔消息推送"): ding_url = "https://oapi.dingtalk.com/robot/send?access_token={}".format(access_token) timestamp = str(round(time.time() * 1000)) secret_enc = secret.encode('utf-8') string_to_sign = '{}\n{}'.format(timestamp, secret) string_to_sign_enc = string_to_sign.encode('utf-8') hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest() sign = urllib.parse.quote_plus(base64.b64encode(hmac_code)) param = "×tamp={}&sign={}".format(timestamp, sign) ding_url = ding_url + param send_json = { "msgtype": msgtype, "text": { "content": msg }, "markdown": { "title": title, "text": msg } } conn = http_req(ding_url, method='post', json=send_json) return conn.json()
def _api(self, url): data = utils.http_req(url, 'get', params=self.param).json() return data
def compute_delete(self, id): return http_req(host=self.api, uri=('/compute/%s' % id), method='DELETE', headers=self.headers)
def compute_create(self, template): return http_req(host=self.api, uri='/compute', method='POST', params=template, headers=self.headers)
def host(self): return http_req(host=self.api, uri='/host', headers=self.headers)
def vm_action(self, vm_id, params): return http_req(host=self.api, uri=('/vm/%s/action' % vm_id), method='POST', params=params, headers=self.headers)
def vm_id(self, vm_id): return http_req(host=self.api, uri=('/vm/%s' % vm_id), headers=self.headers)
def vm(self): return http_req(host=self.api, uri='/vm', headers=self.headers)
def datastore_id(self, ds_id): return http_req(host=self.api, uri=('/datastore/%s' % ds_id), headers=self.headers)
def image_id(self, image_id, method='GET'): return http_req(host=self.api, uri=('/image/%s' % image_id), method=method, headers=self.headers)
def vnet(self): return http_req(host=self.api, uri='/vnet', headers=self.headers)
def image_action(self, image_id, params, method='POST'): return http_req(host=self.api, uri=('/image/%s/action' % image_id), method=method, params=params, headers=self.headers)
def datastore(self): return http_req(host=self.api, uri='/datastore', headers=self.headers)
def image(self, method='GET', params=''): return http_req(host=self.api, uri='/image', method=method, params=params, headers=self.headers)