def getAllDomainName(realUrlSet): domainNameSet = set() for url in realUrlSet: urllib2.splithost(url) proto, rest = urllib.splittype(url) domainName, rest = urllib.splithost(rest) domainNameSet.add(proto + "://" + domainName) return domainNameSet
return #客户端方法 def csqueryConents(self,csdompagination): return csdompagination def csqueryPagination(self,csdom,pagesPath): pages=[] for index,item in enumerate(pagesPath): csquery=item cspage=csdom.Select(csquery) if cspage: #找到所有的a标签 children=cspage.Find("a") if children.Length>0: for i in range(0,children.Length): cshyper=CsQuery.CQ.Create(children[i]) href=cshyper.Attr("href") text=cshyper.Text() pagelamda=self.config.cfgContent.Options.PageLamda if pagelamda: str=pagelamda(cshyper,href,text) print "this is type for url : %s" % (type(str)) if str: href=str print " this is true" else: print "this is false" continue if href and href[0:1]=="/": proto, rest = urllib2.splittype(self.config.cfgUrl) host, rest = urllib2.splithost(rest) href=proto+"://"+host+href elif href and href[0:1]=="?": proto, rest = urllib2.splittype(self.config.cfgUrl) host, rest = urllib2.splithost(rest) p=rest.split("?") p[1]=href[1:] href=proto+"://"+host+"?".join(p) elif href.find("http")==-1: proto, rest = urllib2.splittype(self.config.cfgUrl) host, rest = urllib2.splithost(rest) p_rest=rest.split("/") p_rest[len(p_rest)-1]=href href=proto+"://"+host+"/".join(p_rest) scale=self.config.cfgContent.Options.PageSimilarity rate=0.0 simlilar=StringHelper.LevenshteinDistance(self.__url,href,rate) print "this is simliar :%f " % simlilar[1] if href and simlilar[1]>scale and simlilar[1]<1: pages.append(href)
def _hook(self, request): host = request.get_host() if not host: raise urllib2.URLError('no host given') if request.has_data(): # POST data = request.get_data() if not request.has_header('Content-type'): request.add_unredirected_header( 'Content-type', 'application/x-www-form-urlencoded') if not request.has_header('Content-length') and not conf.chunk: request.add_unredirected_header( 'Content-length', '%d' % len(data)) sel_host = host if request.has_proxy(): scheme, sel = urllib2.splittype(request.get_selector()) sel_host, sel_path = urllib2.splithost(sel) if not request.has_header('Host'): request.add_unredirected_header('Host', sel_host) for name, value in self.parent.addheaders: name = name.capitalize() if not request.has_header(name): request.add_unredirected_header(name, value) return request
def download_file(self, url): injectd_url = self.extract_url(urllib2.unquote(url)) try: req = urllib2.Request(injectd_url) # Set User-Agent to look more credible req.add_unredirected_header('User-Agent', '-') # FIXME: We need a timeout on read here injected_file = urllib2.urlopen(req, timeout=4).read() # If the file is hosted on a SSL enabled host get the certificate if re.match('^https', injectd_url, re.IGNORECASE): proto, rest = urllib2.splittype(injectd_url) host, rest = urllib2.splithost(rest) host, port = urllib2.splitport(host) if port is None: port = 443 cert_file = ssl.get_server_certificate((host, int(port))) cert_name = self.store_file(cert_file) except IOError as e: logger.exception("Failed to fetch injected file, I/O error: {0}".format(e)) # TODO: We want to handle the case where we can't download # the injected file but pretend to be vulnerable. file_name = None else: file_name, file_sha256 = self.store_file(injected_file) return file_name, file_sha256
def file_or_url_context(resource_name): """Yield name of file from the given resource (i.e. file or url).""" if is_url(resource_name): _, ext = os.path.splitext(resource_name) try: with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as f: proto, rest = urllib2.splittype(resource_name) HOST, rest = urllib2.splithost(rest) HEADER['Host'] = HOST req = urllib2.Request(resource_name, headers=HEADER) u = urllib2.urlopen(req, timeout=4) f.write(u.read()) # f must be closed before yielding yield f.name finally: os.remove(f.name) else: yield resource_name # from skimage.io import imread # fname = "https://imgsa.baidu.com/forum/w%3D580/sign=e960450646086e066aa83f4332097b5a/36844b59252dd42a79cdd89c093b5bb5c8eab874.jpg" # with file_or_url_context(fname) as f: # img = imread(f) # print img.shape
def get_cookie(url): """ 获取该的可用cookie :param url: :return: """ domain = urllib2.splithost(urllib2.splittype(url)[1])[0] domain_list = ['.' + domain, domain] if len(domain.split('.')) > 2: dot_index = domain.find('.') domain_list.append(domain[dot_index:]) domain_list.append(domain[dot_index + 1:]) print domain_list conn = None cookie_str = None try: conn = sqlite3.connect(r'%s\Google\Chrome\User Data\Default\Cookies' % os.getenv('LOCALAPPDATA')) cursor = conn.cursor() sql = 'select host_key, name, value, encrypted_value, path from cookies where host_key in (%s)' % ','.join(['"%s"' % x for x in domain_list]) row_list = cursor.execute(sql).fetchall() cookie_list = [] for host_key, name, value, encrypted_value, path in row_list: decrypted_value = win32crypt.CryptUnprotectData(encrypted_value, None, None, None, 0)[1].decode(print_charset) or value cookie_list.append(name + '=' + decrypted_value) cookie_str = '; '.join(cookie_list) except Exception: raise CookieException() finally: conn.close() print cookie_str return cookie_str, domain
def doQuery(self,query,name): # urllib doesn't honor user Content-type, use urllib2 garbage, path = urllib2.splittype(FieldVal(self.site,"url")) host, selector = urllib2.splithost(path) response=False try: errmsg= "** An ERROR occurred attempting HTTPS connection to" h = httplib.HTTPSConnection(host, timeout=5) errmsg= "** An ERROR occurred sending POST request to" p = h.request('POST', selector, query, {"Content-type": "application/x-ofx", "Accept": "*/*, application/x-ofx"} ) errmsg= "** An ERROR occurred retrieving POST response from" #allow up to 30 secs for the server response (it has to assemble the statement) h.sock.settimeout(30) response = h.getresponse().read() f = file(name,"w") f.write(response) f.close() except Exception as inst: self.status = False print errmsg, host print " Exception type:", type(inst) print " Exception Val :", inst if response: print " HTTPS ResponseCode :", response.status print " HTTPS ResponseReason:", response.reason if h: h.close()
def get_host_from_url(url): """ 功能:把url转换为域名 """ root_proto, root_rest = urllib2.splittype(url) root_host, root_rest = urllib2.splithost(root_rest) return root_host
def doQuery(self, query, name): # urllib doesn't honor user Content-type, use urllib2 garbage, path = urllib2.splittype(FieldVal(self.site, "url")) host, selector = urllib2.splithost(path) response = False try: errmsg = "** An ERROR occurred attempting HTTPS connection to" h = httplib.HTTPSConnection(host, timeout=5) errmsg = "** An ERROR occurred sending POST request to" p = h.request( 'POST', selector, query, { "Content-type": "application/x-ofx", "Accept": "*/*, application/x-ofx" }) errmsg = "** An ERROR occurred retrieving POST response from" #allow up to 30 secs for the server response (it has to assemble the statement) h.sock.settimeout(30) response = h.getresponse().read() f = file(name, "w") f.write(response) f.close() except Exception as inst: self.status = False print errmsg, host print " Exception type:", type(inst) print " Exception Val :", inst if response: print " HTTPS ResponseCode :", response.status print " HTTPS ResponseReason:", response.reason if h: h.close()
def url_splits(url): domain_splits = [] path_splits = [] try: if url: protocol, rest = urllib2.splittype(url) if not protocol: rest = '//' + rest host, rest = urllib2.splithost(rest) #域部分解析 if host: splits = host.split('.') if splits: index_list = range(len(splits)) index_list.reverse() for index in index_list: if not splits[index]: splits.remove('') domain_splits += splits #路径部分解析 if rest: rest = urlparse.urlparse(rest) splits = rest.path.split('/') if splits: index_list = range(len(splits)) index_list.reverse() for index in index_list: if not splits[index]: splits.remove('') path_splits += splits except Exception, e: log.msg(traceback.format_exc(), level=log.ERROR)
def download_file(self, url): injectd_url = self.extract_url(urllib2.unquote(url)) try: req = urllib2.Request(injectd_url) # Set User-Agent to look more credible req.add_unredirected_header('User-Agent', '-') # FIXME: We need a timeout on read here injected_file = urllib2.urlopen(req, timeout=4).read() # If the file is hosted on a SSL enabled host get the certificate if re.match('^https', injectd_url, re.IGNORECASE): proto, rest = urllib2.splittype(injectd_url) host, rest = urllib2.splithost(rest) host, port = urllib2.splitport(host) if port is None: port = 443 cert_file = ssl.get_server_certificate((host, int(port))) cert_name = self.store_file(cert_file) except IOError as e: logger.exception( "Failed to fetch injected file, I/O error: {0}".format(e)) # TODO: We want to handle the case where we can't download # the injected file but pretend to be vulnerable. file_name = None else: file_name, file_sha256 = self.store_file(injected_file) return file_name, file_sha256
def get_local_name(url): url = url.strip() url = re.sub('[\/]+$', '', url) rest = urllib2.splittype(url)[1] host, rest = urllib2.splithost(rest) if rest is None or rest == '': return host return os.path.basename(rest)
def get_domain(url): try: return get_tld(url) except: base_url = "".join(url) # 删除所有\s+ protocol, rest = urllib2.splittype(base_url) host, rest = urllib2.splithost(rest) return host
echo(content) except Exception as err: pass finally: return content def pageCsContentImage(self,cspage): """本地img替换为完全img路径""" proto, rest = urllib2.splittype(self.config.cfgUrl) host, rest = urllib2.splithost(rest) csimgs=cspage.Find("img")
def _setup_server(self, server=None): if server: host, path = urllib2.splithost(urllib2.splittype(server)[-1]) if not path: path = '/' self.client_con = python_webdav.client.Client(host, webdav_path=path) self.client_con.set_connection('wibble', 'fish') else: print "I need a server!" self.client_con = None
def url_size(url): import httplib, urllib2 proto, url = urllib2.splittype(url) assert (proto.lower() == 'http') host, path = urllib2.splithost(url) # http://stackoverflow.com/questions/107405/how-do-you-send-a-head-http-request-in-python conn = httplib.HTTPConnection(host) conn.request('HEAD', path) res = conn.getresponse() # FIXME: Follow any redirects return int(res.getheader('content-length'))
def lamda(self,csblock): href=csblock[0].Attr("href") if not href: href=csblock[1] if href: href=href.replace("\\","").replace("\"","") if href and href[0:1]=="/": proto, rest = urllib2.splittype(self.config.cfgUrl) host, rest = urllib2.splithost(rest) href=proto+"://"+host+href return href
def url_size(url): import httplib, urllib2 proto, url = urllib2.splittype(url) assert(proto.lower() == 'http') host, path = urllib2.splithost(url) # http://stackoverflow.com/questions/107405/how-do-you-send-a-head-http-request-in-python conn = httplib.HTTPConnection(host) conn.request('HEAD', path) res = conn.getresponse() # FIXME: Follow any redirects return int(res.getheader('content-length'))
def _get_pingback_server(self, target): " Try to find the target's pingback xmlrpc server address " # first try to find the pingback server in the HTTP header try: host, path = urllib2.splithost(urllib2.splittype(target)[1]) conn = httplib.HTTPConnection(host) conn.request('HEAD', path) res = conn.getresponse() server = dict(res.getheaders()).get('x-pingback') except Exception, e: raise PingbackClientError(e.message)
def url_split(url): protocol = None domain = None rest = None try: protocol, rest = urllib2.splittype(url) if not protocol: protocol = 'https' rest = '//' + rest domain, rest = urllib2.splithost(rest) except Exception, e: log.msg(traceback.format_exc(), level=log.ERROR)
def __init__(self, url): self.url = url self.schema, url = urllib2.splittype(url) host, path = urllib2.splithost(url) userpass, host = urllib2.splituser(host) if userpass: self.user, self.password = urllib2.splitpasswd(userpass) path, self.querystring = urllib.splitquery(path) self.query = self.querystring and self.querystring.split('&') or [] #urllib.splitquery(url) self.host, self.port = urllib2.splitport(host) path, self.tag = urllib2.splittag(path) self.path = path.strip('/')
def get_host(url): ''' 通过url获取域名 :param url: 带获取的url地址 :return: host结果 ''' proto, rest = urllib2.splittype(url) res, rest = urllib2.splithost(rest) if res: return res else: print "获取host" + url + "失败" sys.exit(0)
def get_host(url): """ 通过url获取域名 :param url: 带获取的url地址 :return: host结果 """ proto, rest = urllib2.splittype(url) res, rest = urllib2.splithost(rest) if res: return res else: print "获取host" + url + "失败" sys.exit(0)
def format_and_filter_urls(base_url, url): # 转换非完整的url格式 if url.startswith('/'): # 以根开头的绝对url地址 base_url = "".join(base_url.split()) # 删除所有\s+ protocol, rest = urllib2.splittype(base_url) host, rest = urllib2.splithost(rest) url = (protocol + "://" + host).rstrip('/') + "/" + url.lstrip('/') if url.startswith('.') or not url.startswith('http'): # 相对url地址 url = base_url.rstrip('/') + "/" + url.lstrip('./') # 过滤描点 return url.split('#')[0]
def go(url): protocol, address=urllib2.splittype(url) # print protocol,address if protocol == "http": global host; host,path=urllib2.splithost(address) # print host,path; content = getPageContent(url); soup = BeautifulSoup(content,'html.parser'); getAllImage(soup); getAllHyperlink(soup); else : print 'URL is not http'
def go(url): protocol, address = urllib2.splittype(url) # print protocol,address if protocol == "http": global host host, path = urllib2.splithost(address) # print host,path; content = getPageContent(url) soup = BeautifulSoup(content, 'html.parser') getAllImage(soup) getAllHyperlink(soup) else: print 'URL is not http'
def gain_links(url='http://www.jianshu.com/p/05cfea46e4fd'): html_page = urllib2.urlopen(url) links = BeautifulSoup(html_page).findAll('a') links = [ i.get('href') for i in links if i.get('href') and not i.get('href').startswith('javascript:') ] proto, rest = urllib2.splittype(url) # python提取url中的域名和端口号 domain = urllib2.splithost(rest)[0] # 获取url的host links = map(lambda i: proto + '://' + domain + i if i[0] == '/' else url + i if i[0] == '#' else i, links) # 把链接补全 with open('links_list.txt', 'w') as f: f.write('\n'.join(links))
def getHtmlByUrl(url): global domains try: u = urllib2.urlopen(url,timeout = 10.0) content = u.read() if content !="": try: proto, rest = urllib2.splittype(url) host, rest = urllib2.splithost(rest) host, port = urllib2.splitport(host) domains[host] = int(port) except: pass return content except: pass
def getHtmlByUrl(url): global domains try: u = urllib2.urlopen(url, timeout=10.0) content = u.read() if content != "": try: proto, rest = urllib2.splittype(url) host, rest = urllib2.splithost(rest) host, port = urllib2.splitport(host) domains[host] = int(port) except: pass return content except: pass
def __init__(self, development=False, user=None, password=None, industry=None, message=None, api_url=None, bin_code=None, merchant=None, host=None, terminal=None, target=None): # PaymenTech point of sales data self.user = user self.password = password self.industry = industry self.message = message self.bin_code = bin_code self.merchant = merchant self.terminal = terminal # Service options self.development = development self.target = target self.host = host self.api_url = api_url # dev: https://orbitalvar1.paymentech.net/authorize:443 # prod: https://orbital1.paymentech.net/authorize if self.development is False: if not self.target: # production self.target = "https://orbital1.paymentech.net/authorize" self.host, self.api_url = \ urllib2.splithost(urllib2.splittype(self.target)[1]) else: if not self.target: # development self.target = "https://orbitalvar1.paymentech.net/authorize" if not self.host: self.host = "orbitalvar1.paymentech.net/authorize:443" if not self.api_url: self.api_url = "/"
def indexDocs(self, indexfile, writer): t1 = FieldType() t1.setIndexed(False) t1.setStored(True) t1.setTokenized(False) t1.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS) t2 = FieldType() t2.setIndexed(True) t2.setStored(False) t2.setTokenized(True) t2.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) index_info = open(indexfile) for line in index_info.readlines(): info = line.split('\t', 2) "adding", info[0] try: path = os.path.join(root, info[0]) file = open(path) #contents = unicode(file.read(), 'gbk') contents = file.read() file.close() proto, rest = urllib2.splittype(info[1]) domain, rest = urllib2.splithost(rest) if not domain: domain = "unknown" if domain[0:4] == "www.": domain = domain[4:] doc = Document() doc.add(Field("name", info[0], t1)) doc.add(Field("path", os.path.abspath(path), t1)) doc.add(Field("url", info[1], t1)) doc.add(Field("title", info[2], t1)) doc.add( Field("site", domain, Field.Store.YES, Field.Index.ANALYZED)) if len(contents) > 0: doc.add(Field("contents", contents, t2)) else: print "warning: no content in %s" % info[0] writer.addDocument(doc) except Exception, e: print "Failed in indexDocs:", e finally:
def get_domain(self): info = self.driver.find_elements(By.XPATH, self.search_key) for i in info: link = i.get_attribute('href') if link is None: continue try: proto, rest = urllib2.splittype(link) host, rest = urllib2.splithost(rest) host, port = urllib2.splitport(host) if port is None: port = 80 if host not in domains: if check_ip_bind(host, self.value): domains[host] = port except: pass
def doQuery(self,query): logging.info('Builder.doQuery') # N.B. urllib doesn't honor user Content-type, use urllib2 i = self.institution bank = i.dsn garbage, path = urllib2.splittype(bank['url']) host, selector = urllib2.splithost(path) h = httplib.HTTPSConnection(host) h.request('POST', selector, query, { "Content-type": "application/x-ofx", "Accept": "*/*, application/x-ofx" }) res = h.getresponse() response = res.read() res.close() return response
def response(self): query = urlparse.parse_qs(self.environ["QUERY_STRING"]) url = query.get("url", [""])[0] if not re.match('http://', url): url = 'http://' + url domain, uri = urllib2.splithost(url.lstrip('https:')) print domain, uri if not domain or not uri: status = "" else: user = auth.authdomain(self.environ, domain) if user != self.environ["USER"]: return user status = self.purge(domain, uri) if self.environ["HTTP_HOST"] == "api.dpool.cluster.sina.com.cn": return ("application/json", json.dumps(status)) print status return ("text/html", web.template(self.environ, "purge.html", {"status": status, "user": self.environ["USER"], "key": util.userkey(self.environ["USER"])}))
def page_parse(self, response): items = [] try: page = response.request.meta['page'] if 'page' in response.request.meta else 1 page = int(page) if page > int(self.max_search_page): return items kw_id = response.request.meta['kw_id'] if 'kw_id' in response.request.meta else None pg_id = response.request.meta['pg_id'] if 'pg_id' in response.request.meta else None cat_id = response.request.meta['cat_id'] if 'cat_id' in response.request.meta else None subject_id = response.request.meta['subject_id'] if 'subject_id' in response.request.meta else None url = response.request.url protocol, rest = urllib2.splittype(url) if not protocol: protocol = 'http' rest = '//' + rest if rest: domain, rest = urllib2.splithost(rest) if domain: url_prefix = protocol + '://' + domain else: url_prefix = self.iqiyi_url_prefix body = response.body #用户 results = iqiyi_url_extract.user_url_extract(url, body) if results: for result in results: items.append(Request(url=result, callback=self.user_parse, meta={'page':1, 'kw_id':kw_id, 'pg_id':pg_id, 'cat_id':cat_id, 'subject_id':subject_id})) #播放 results = iqiyi_url_extract.video_url_extract(url, body) if results: for result in results: items.append(Request(url=result, callback=self.video_parse, meta={'page':1, 'kw_id':kw_id, 'pg_id':pg_id, 'cat_id':cat_id, 'subject_id':subject_id})) #下一页 next_pages = response.xpath('//div[@class="mod-page"]//a[@data-key="down"]/@href').extract() if next_pages: for href in next_pages: if href.startswith('/'): href = url_prefix + href items.append(Request(url=href, callback=self.page_parse, meta={'page': page+1, 'kw_id':kw_id, 'pg_id':pg_id, 'cat_id':cat_id, 'subject_id':subject_id})) except Exception, e: log.msg(traceback.format_exc(), level=log.ERROR)
def doQuery(self,query,name): # N.B. urllib doesn't honor user Content-type, use urllib2 garbage, path = urllib2.splittype(self.config["url"]) host, selector = urllib2.splithost(path) h = httplib.HTTPSConnection(host) h.request('POST', selector, query, { "Content-type": "application/x-ofx", "Accept": "*/*, application/x-ofx" }) if 1: res = h.getresponse() response = res.read() res.close() with open(name,"w") as f: f.write(response) else: print self.config["url"] print query
def bing_get(domain): trytime = 0 f = 1 domainsbing = [] #bing里面获取的数据不是很完全 while True: try: req = urllib2.Request( 'http://cn.bing.com/search?count=50&q=site:' + domain + '&first=' + str(f)) req.add_header('User-Agent', random_useragent()) res = urllib2.urlopen(req, timeout=30) src = res.read() TempD = re.findall('<cite>(.*?)<\/cite>', src) for item in TempD: item = item.split('<strong>')[0] item += domain try: if not (item.startswith('http://') or item.startswith('https://')): item = "http://" + item proto, rest = urllib2.splittype(item) host, rest = urllib2.splithost(rest) host, port = urllib2.splitport(host) if port == None: item = host else: item = host + ":" + port except: print traceback.format_exc() pass domainsbing.append(item) if f < 500 and re.search('class="sb_pagN"', src) is not None: f = int(f) + 50 else: subdomainbing = {}.fromkeys(domainsbing).keys() return subdomainbing break except Exception, e: pass trytime += 1 if trytime > 3: return domainsbing
def getDomains(info): global domains if info != "": match = re.search(r'<ol[^>]*id="b_results">([\s\S]*?)</ol>', info) if match : info = match.group(1) match = re.findall('<a[^>]*href="([^"]*)"[^>]*>', info) if len(match) > 0: for a in match: try: proto, rest = urllib2.splittype(a) host, rest = urllib2.splithost(rest) host, port = urllib2.splitport(host) if port == None: port = 80 if not domains.has_key(host): domains[host] = port except: pass
def getDomains(info): global domains if info != "": match = re.search(r'<ol[^>]*id="b_results">([\s\S]*?)</ol>', info) if match: info = match.group(1) match = re.findall('<a[^>]*href="([^"]*)"[^>]*>', info) if len(match) > 0: for a in match: try: proto, rest = urllib2.splittype(a) host, rest = urllib2.splithost(rest) host, port = urllib2.splitport(host) if port == None: port = 80 if not domains.has_key(host): domains[host] = port except: pass
def getDomains(ip, page): global domains trytime = 0 while True: try: request = urllib2.Request( "http://dns.aizhan.com/index.php?r=index/domains&ip=" + ip + "&page=" + str(page)) request.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0' ) request.add_header('Accept-encoding', 'gzip') request.add_header('X-FORWARDED-FOR', ip) request.add_header('Referer', request.get_full_url()) u = urllib2.urlopen(request, timeout=30) content = '' if u.info().get('Content-Encoding') == 'gzip': buf = StringIO(u.read()) f = gzip.GzipFile(fileobj=buf) content = f.read() else: content = u.read() type = sys.getfilesystemencoding() content = content.decode("UTF-8").encode(type) domaintemp = json.loads(content, encoding="utf-8") for d in domaintemp["domains"]: try: proto, rest = urllib2.splittype("http://" + str(d)) host, rest = urllib2.splithost(rest) host, port = urllib2.splitport(host) if port == None: port = 80 if not domains.has_key(host): domains[host] = port except: pass return except: trytime += 1 if trytime > 0: return
def _build_request(url): # Detect basic auth # Adapted from python-feedparser urltype, rest = urllib2.splittype(url) realhost, rest = urllib2.splithost(rest) if realhost: user_passwd, realhost = urllib2.splituser(realhost) if user_passwd: url = '%s://%s%s' % (urltype, realhost, rest) # Start request req = urllib2.Request(url) # Add headers req.add_header('User-Agent', 'SABnzbd+/%s' % sabnzbd.version.__version__) if not any(item in url for item in _BAD_GZ_HOSTS): req.add_header('Accept-encoding', 'gzip') if user_passwd: req.add_header('Authorization', 'Basic ' + user_passwd.encode('base64').strip()) return urllib2.urlopen(req)
def doQuery(self, query, name): # N.B. urllib doesn't honor user Content-type, use urllib2 garbage, path = urllib2.splittype(self.config["url"]) host, selector = urllib2.splithost(path) h = httplib.HTTPSConnection(host) h.request( 'POST', selector, query, { "Content-type": "application/x-ofx", "Accept": "*/*, application/x-ofx" }) if 1: res = h.getresponse() response = res.read() res.close() with open(name, "w") as f: f.write(response) else: print self.config["url"] print query
def bing_get(domain): trytime = 0 f = 1 domainsbing = [] #bing里面获取的数据不是很完全 while True: try: req=urllib2.Request('http://cn.bing.com/search?count=50&q=site:'+domain+'&first='+str(f)) req.add_header('User-Agent',random_useragent()) res=urllib2.urlopen(req, timeout = 30) src=res.read() TempD=re.findall('<cite>(.*?)<\/cite>',src) for item in TempD: item=item.split('<strong>')[0] item += domain try: if not (item.startswith('http://') or item.startswith('https://')): item = "http://" + item proto, rest = urllib2.splittype(item) host, rest = urllib2.splithost(rest) host, port = urllib2.splitport(host) if port == None: item = host else: item = host + ":" + port except: print traceback.format_exc() pass domainsbing.append(item) if f<500 and re.search('class="sb_pagN"',src) is not None: f = int(f)+50 else: subdomainbing={}.fromkeys(domainsbing).keys() return subdomainbing break except Exception, e: pass trytime+=1 if trytime>3: return domainsbing
]} class FunctionHelper(object): @staticmethod def urlSegments(baseindex): result={ "proto":"", "host":"", "path":[], "query":"" } proto, rest = urllib2.splittype(baseindex) host, rest = urllib2.splithost(rest) result["proto"]=proto result["host"]=host path=rest if rest.find("?")>-1: res=rest.split("?") path=res[0] query=res[1] result["query"]=query
def post(self, query): # N.B. urllib doesn't honor user Content-type, use urllib2 i = self.institution logging.debug('posting data to %s' % i.url) logging.debug('---- request ----') logging.debug(query) garbage, path = urllib2.splittype(i.url) host, selector = urllib2.splithost(path) h = httplib.HTTPSConnection(host) h.request('POST', selector, query, { "Content-type": "application/x-ofx", "Accept": "*/*, application/x-ofx" }) res = h.getresponse() response = res.read() logging.debug('---- response ----') logging.debug(res.__dict__) logging.debug(response) res.close() return response
def getDomains(ip,page): global domains trytime = 0 while True: try: request = urllib2.Request("http://dns.aizhan.com/index.php?r=index/domains&ip="+ ip +"&page="+ str(page) ) request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0') request.add_header('Accept-encoding', 'gzip') request.add_header('X-FORWARDED-FOR', ip) request.add_header('Referer', request.get_full_url()) u = urllib2.urlopen(request , timeout = 30) content = '' if u.info().get('Content-Encoding') == 'gzip': buf = StringIO(u.read()) f = gzip.GzipFile(fileobj=buf) content = f.read() else: content = u.read() type = sys.getfilesystemencoding() content = content.decode("UTF-8").encode(type) domaintemp = json.loads(content,encoding="utf-8") for d in domaintemp["domains"]: try: proto, rest = urllib2.splittype("http://"+ str(d)) host, rest = urllib2.splithost(rest) host, port = urllib2.splitport(host) if port == None: port = 80 if not domains.has_key(host): domains[host] = port except: pass return except: trytime+=1 if trytime>0: return
def purge(environ): ctype = 'text/plain; charset=utf-8' response_body = '' request_uri = environ['REQUEST_URI'] if not re.match("/purge\?url=.*", request_uri): response_body = script.response(os.path.join(environ['DOCUMENT_ROOT'], 'app/purge/README'), {'host': environ['HTTP_HOST']}) return (ctype, response_body) url = request_uri.split('/purge?url=')[1] mc = memcache.Client(environ['SINASRV_MEMCACHED_SERVERS'].split(' ')) try: client = environ['HTTP_X_FORWARDED_FOR'] except: client = environ['REMOTE_ADDR'] #for url in query['url']: if mc.get(url) or mc.get(client): print mc.get(client) response_body = '%s: Purge too often.\n' % url clientlog(environ, client, response_body) return (ctype, response_body) mc.set(client, 1, time=60) name, path = urllib2.splithost('//' + url.split('//')[-1]) response = {} try: ip_list = query['ip'] except: ip_list = getip(environ, mc) print ip_list for ip in ip_list: response.update({ip: request(environ, ip, 8899, name, path, client, url)}) if 1 not in response.values(): response_body += '%s: Purge OK.\n' % url mc.set(url, 1, time=300) else: response_body += '%s: Purge Failed.\n' % url clientlog(environ, client, response_body) return (ctype, response_body)
def cmdLineParser(): """Implementation to WPHardening.""" usage = "usage: python %prog [options]" version = colored('WPHardening', 'green') + ' version' + \ colored(' 1.5', 'yellow') + '\n' parser = OptionParser(usage, version=version) parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="Active verbose mode output results") parser.add_option("--update", action="store_true", dest="update", default=False, help="Check for WPHardening latest stable version") target = OptionGroup(parser, "Target", "This option must be " "specified to modify the package WordPress.") target.add_option("-d", "--dir", dest="path", help="**REQUIRED** -" " Working Directory.", metavar="DIRECTORY") target.add_option("--load-conf", dest="loadconf", metavar="FILE", help="Load file configuration.") hardening = OptionGroup(parser, "Hardening", "Different tools to" " hardening WordPress.") hardening.add_option("-c", "--chmod", action="store_true", dest="chmod", help="Chmod 755 in directory and 644 in files.") hardening.add_option("-r", "--remove", action="store_true", dest="remove", help="Remove files and directory.") hardening.add_option("-b", "--robots", action="store_true", dest="robots", help="Create file robots.txt") hardening.add_option("-f", "--fingerprinting", action="store_true", dest="finger", help="Deleted fingerprinting " "WordPress.") hardening.add_option("-t", "--timthumb", action="store_true", dest="timthumb", help="Find the library TimThumb.") hardening.add_option("--chown", action="store", type="string", dest="chown", metavar="user:group", help="Changing " "file and directory owner.") hardening.add_option("--wp-config", action="store_true", dest="wpconfig", help="Wizard generated wp-config.php") hardening.add_option("--plugins", action="store_true", dest="plugins", help="Download Plugins Security.") hardening.add_option("--proxy", action="store", type="string", dest="proxy", help="Use a HTTP proxy to connect to " "the target url for --plugins and --wp-config.") hardening.add_option("--indexes", action="store_true", dest="indexes", help="It allows you to display the contents of " "directories.") hardening.add_option("--minify", action="store_true", dest="minify", help="Compressing static file .css and .js") hardening.add_option("--malware-scan", action="store_true", dest="malwares", help="Malware Scan in WordPress " "project.") miscellaneous = OptionGroup(parser, "Miscellaneous") miscellaneous.add_option("-o", "--output", help="Write log report to " "FILE.log", metavar="FILE", dest="output") parser.add_option_group(target) parser.add_option_group(hardening) parser.add_option_group(miscellaneous) cmdBanner() (options, args) = parser.parse_args() if options.loadconf is not None: options.path = loadConfWordPress(options.loadconf).getDirectory() options.chmod = loadConfWordPress(options.loadconf).getChmod() options.robots = loadConfWordPress(options.loadconf).getRobots() options.finger = loadConfWordPress( options.loadconf ).getFingerprinting() options.wpconfig = loadConfWordPress(options.loadconf).getWpConfig() options.indexes = loadConfWordPress(options.loadconf).getIndexes() options.timthumb = loadConfWordPress(options.loadconf).getTimthumb() options.malwares = loadConfWordPress(options.loadconf).getMalwareScan() options.output = loadConfWordPress(options.loadconf).getOutput() if options.output is None: filename = 'wphardening.log' else: filename = options.output log = registerLog(filename) log.setConfigure() if options.update: log.add("Check for WPHardening latest stable version") updateWPHardening(os.path.abspath(".")).update() sys.exit() if options.path is None: log.add("Did not specify a working directory.") parser.print_help() sys.exit() options.path = os.path.abspath(options.path) if os.path.exists(options.path): if checkWordpress(options.path, options.verbose).isWordPress(): if options.chown is not None: changeOwner = chownWordPress( options.path, options.chown, options.verbose ) if changeOwner.isValid(): changeOwner.changeOwner() if options.chmod is not None: chmodWordPress( options.path, options.verbose ).changePermisions() if options.robots is not None: robotsWordPress(options.path).createRobots() if options.finger is not None: deleteVersionWordPress(options.path).delete() fingerprintingWordPress( options.path, options.verbose ).searchStaticFile() if options.wpconfig is not None: if options.proxy is not None: protocolo, rest = urllib2.splittype(options.proxy) if protocolo is None: raise ValueError("unknown URL type: %s") % \ (options.proxy) host, rest = urllib2.splithost(rest) host, port = urllib2.splitport(host) if port is None: raise ValueError("unknown protocol for %s") % \ (options.proxy) puerto = int(port) asdf = wpconfigWordPress(options.path, options.proxy) else: asdf = wpconfigWordPress(options.path, options.proxy) asdf.createConfig() if options.indexes is not None: indexesWordPress(options.path, options.verbose).createIndexes() if options.timthumb is not None: timthumbWordPress(options.path).checkTimbthumb() if options.plugins is not None: if options.proxy is not None: protocolo, rest = urllib2.splittype(options.proxy) if protocolo is None: raise ValueError("unknown URL type: %s") % \ (options.proxy) host, rest = urllib2.splithost(rest) host, port = urllib2.splitport(host) if port is None: raise ValueError("unknown protocol for %s") % \ (options.proxy) puerto = int(port) asdf = pluginsWordPress(options.path, options.proxy) else: asdf = pluginsWordPress(options.path, options.proxy) asdf.questions() if options.malwares is not None: malwareScanWordPress(options.path).scan() if options.remove is not None: removeWordPress(options.path).delete() if options.minify is not None: minifyWordPress(options.path, options.verbose).minify() else: log.add("Could not find the specified directory.") print colored('\nCould not find the specified directory.\n', 'red')
def get_host(url): proto, rest = urllib2.splittype(url) host, rest = urllib2.splithost(rest) return None if not host else host
if len(sys.argv) < 2: print "Usage: %s <url> [ip override]" % sys.argv[0] print " Logs in to the server and requests the provided URL. If provided," print " this uses the supplied IP rather than what would be returned by" print " a standard DNS request." print print " Example: %s http://nu.futurebazaar.com/ 10.0.5.33" % sys.argv[0] print " The above would request the home page of nu.futurebazaar.com" print " from 10.0.5.33." print sys.exit(1) url = sys.argv[1] protocol, remainder = urllib2.splittype(url) host, path = urllib2.splithost(remainder) if len(sys.argv) > 2: ip = sys.argv[2] else: ip = host def MyResolver(host): return ip class MyHTTPConnection(httplib.HTTPConnection): def connect(self): self.sock = socket.create_connection((MyResolver(self.host), self.port), self.timeout) class MyHTTPSConnection(httplib.HTTPSConnection): def connect(self):
def getURLBase(url): host = urllib2.splithost(urllib2.splittype(url)[1])[0] method = urllib2.splittype(url)[0] return method + '://' + host
def main(): usage = "usage: %prog [options] arg" version = colored('WP Hardening', 'green') + ' version' + \ colored(' 1.1', 'yellow') parser = OptionParser(usage, version=version) parser.add_option( "-v", "--verbose", action="store_true", dest="verbose", help="active verbose mode output results", ) group1 = OptionGroup( parser, "Target", "This option must be specified to modify the package WordPress." ) group1.add_option( "-d", "--dir", dest="path", help="**REQUIRED** - Working Directory.", metavar="DIRECTORY" ) parser.add_option_group(group1) group2 = OptionGroup( parser, "Hardening", "Different tools to hardening WordPress." ) group2.add_option( "-c", "--chmod", action="store_true", dest="chmod", help="Chmod 755 in directory and 644 in files." ) group2.add_option( "-r", "--remove", action="store_true", dest="remove", help="Remove files and directory." ) group2.add_option( "-b", "--robots", action="store_true", dest="robots", help="Create file robots.txt" ) group2.add_option( "-f", "--fingerprinting", action="store_true", dest="finger", help="Deleted fingerprinting WordPress." ) group2.add_option( "--wp-config", action="store_true", dest="wpconfig", help="Generated wp-config.php" ) group2.add_option( "--delete-version", action="store_true", dest="delete_version", help="Deleted version WordPress." ) group2.add_option( "--plugins", action="store_true", dest="plugins", help="Download Plugins Security." ) group2.add_option( "--proxy", action="store", type="string", dest="proxy", help="Use a HTTP proxy to connect to the target url for --plugins and \ --wp-config." ) group2.add_option( "--indexes", action="store_true", dest="indexes", help="It allows you to display the contents of directories." ) parser.add_option_group(group2) group3 = OptionGroup( parser, "Miscellaneous", ) group3.add_option( "-o", "--output", help="Write log report to FILE.log", metavar="FILE", dest="output" ) parser.add_option_group(group3) (options, args) = parser.parse_args() if options.output is None: filename = 'wphardening.log' else: filename = options.output log = registerLog(filename) log.setConfigure() if options.path is None: log.add("Did not specify a working directory.") parser.print_help() sys.exit() options.path = os.path.abspath(options.path) if os.path.exists(options.path): wordpress = checkWordpress(options.path) if wordpress.isWordPress(): log.add(options.path + " This project directory is a WordPress.") print colored(options.path, 'yellow') + ' -', \ colored('\nThis project directory is a WordPress.', 'green') if options.delete_version is not None: asdf = deleteVersionWordPress(options.path) asdf.delete() if options.chmod is not None: asdf = chmodWordPress(options.path) asdf.changePermisions() if options.remove is not None: qwer = removeWordPress(options.path) qwer.delete() if options.robots is not None: zxcv = robotsWordPress(options.path) zxcv.createRobots() if options.finger is not None: asdf = fingerprintingWordPress(options.path) asdf.searchStaticFile() if options.wpconfig is not None: if options.proxy is not None: protocolo, rest = urllib2.splittype(options.proxy) if protocolo is None: raise ValueError("unknown URL type: %s") % \ (options.proxy) host, rest = urllib2.splithost(rest) host, port = urllib2.splitport(host) if port is None: raise ValueError("unknown protocol for %s") % \ (options.proxy) puerto = int(port) asdf = wpconfigWordPress(options.path, options.proxy) else: asdf = wpconfigWordPress(options.path, options.proxy) asdf.createConfig() if options.indexes is not None: asdf = indexesWordPress(options.path) asdf.createIndexes() if options.plugins is not None: if options.proxy is not None: protocolo, rest = urllib2.splittype(options.proxy) if protocolo is None: raise ValueError("unknown URL type: %s") % \ (options.proxy) host, rest = urllib2.splithost(rest) host, port = urllib2.splitport(host) if port is None: raise ValueError("unknown protocol for %s") % \ (options.proxy) puerto = int(port) asdf = pluginsWordPress(options.path, options.proxy) else: asdf = pluginsWordPress(options.path, options.proxy) asdf.questions() else: log.add( options.path + " This Project directory is not a WordPress." ) print colored(options.path, 'yellow') + ' -', \ colored('This Project directory is not a WordPress.\n', 'red') else: log.add("Could not find the specified directory.") print colored('\nCould not find the specified directory.\n', 'red')
def cmdLineParser(): """Implementation to WPHardening.""" version_wph = "1.6" usage = "usage: python %prog [options]" version = colored('WPHardening', 'green') + ' version ' + \ colored(version_wph, 'yellow') + '\n' parser = OptionParser(usage, version=version) parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="Active verbose mode output results") parser.add_option("--update", action="store_true", dest="update", default=False, help="Check for WPHardening latest stable version") target = OptionGroup(parser, "Target", "This option must be " "specified to modify the package WordPress.") target.add_option("-d", "--dir", dest="path", help="**REQUIRED** -" " Working Directory.", metavar="DIRECTORY") target.add_option("--load-conf", dest="loadconf", metavar="FILE", help="Load file configuration.") hardening = OptionGroup(parser, "Hardening", "Different tools to" " hardening WordPress.") hardening.add_option("-c", "--chmod", action="store_true", dest="chmod", help="Chmod 755 in directory and 644 in files.") hardening.add_option("-r", "--remove", action="store_true", dest="remove", help="Remove files and directory.") hardening.add_option("-b", "--robots", action="store_true", dest="robots", help="Create file robots.txt") hardening.add_option("-f", "--fingerprinting", action="store_true", dest="finger", help="Deleted fingerprinting " "WordPress.") hardening.add_option("-t", "--timthumb", action="store_true", dest="timthumb", help="Find the library TimThumb.") hardening.add_option("--chown", action="store", type="string", dest="chown", metavar="user:group", help="Changing " "file and directory owner.") hardening.add_option("--wp-config", action="store_true", dest="wpconfig", help="Wizard generated wp-config.php") hardening.add_option("--plugins", action="store_true", dest="plugins", help="Download Plugins Security.") hardening.add_option("--proxy", action="store", type="string", dest="proxy", help="Use a HTTP proxy to connect to " "the target url for --plugins and --wp-config.") hardening.add_option("--indexes", action="store_true", dest="indexes", help="It deny you to display the contents of " "directories.") hardening.add_option("--minify", action="store_true", dest="minify", help="Compressing static file .css and .js") hardening.add_option("--malware-scan", action="store_true", dest="malwares", help="Malware Scan in WordPress " "project.") hardening.add_option("--6g-firewall", action="store_true", dest="sixg", help="6G Firewall.") hardening.add_option("--rest-api", action="store_true", dest="api", help="Disable REST API.") miscellaneous = OptionGroup(parser, "Miscellaneous") miscellaneous.add_option("-o", "--output", help="Write log report to " "FILE.log", metavar="FILE", dest="output") parser.add_option_group(target) parser.add_option_group(hardening) parser.add_option_group(miscellaneous) cmdBanner() (options, args) = parser.parse_args() if options.loadconf is not None: options.path = loadConfWordPress(options.loadconf).getDirectory() options.chmod = loadConfWordPress(options.loadconf).getChmod() options.robots = loadConfWordPress(options.loadconf).getRobots() options.finger = loadConfWordPress( options.loadconf ).getFingerprinting() options.wpconfig = loadConfWordPress(options.loadconf).getWpConfig() options.indexes = loadConfWordPress(options.loadconf).getIndexes() options.timthumb = loadConfWordPress(options.loadconf).getTimthumb() options.malwares = loadConfWordPress(options.loadconf).getMalwareScan() options.output = loadConfWordPress(options.loadconf).getOutput() if options.output is None: filename = 'wphardening.log' else: filename = options.output log = registerLog(filename) log.setConfigure() if options.update: log.add("Check for WPHardening latest stable version") updateWPHardening(os.path.abspath(".")).update() sys.exit() if options.path is None: log.add("Did not specify a working directory.") parser.print_help() sys.exit() options.path = os.path.abspath(options.path) if os.path.exists(options.path): fname = "output.html" context = { 'directory': options.path, 'version': version_wph } if checkWordpress(options.path, options.verbose).isWordPress(): if options.chown is not None: changeOwner = chownWordPress( options.path, options.chown, options.verbose ) if changeOwner.isValid(): changeOwner.changeOwner() context['chown'] = options.chown if options.chmod is not None: chmodWordPress( options.path, options.verbose ).changePermisions() context['chmod'] = True if options.robots is not None: robotsWordPress(options.path).createRobots() context['robots'] = True if options.finger is not None: deleteVersionWordPress(options.path).delete() fingerprintingWordPress( options.path, options.verbose ).searchStaticFile() context['finger'] = True if options.wpconfig is not None: if options.proxy is not None: protocolo, rest = urllib2.splittype(options.proxy) if protocolo is None: raise ValueError("unknown URL type: %s") % \ (options.proxy) host, rest = urllib2.splithost(rest) host, port = urllib2.splitport(host) if port is None: raise ValueError("unknown protocol for %s") % \ (options.proxy) puerto = int(port) asdf = wpconfigWordPress(options.path, options.proxy) else: asdf = wpconfigWordPress(options.path, options.proxy) asdf.createConfig() if options.indexes is not None: indexesWordPress(options.path, options.verbose).createIndexes() context['indexes'] = True if options.timthumb is not None: timthumbWordPress(options.path).checkTimbthumb() if options.plugins is not None: if options.proxy is not None: protocolo, rest = urllib2.splittype(options.proxy) if protocolo is None: raise ValueError("unknown URL type: %s") % \ (options.proxy) host, rest = urllib2.splithost(rest) host, port = urllib2.splitport(host) if port is None: raise ValueError("unknown protocol for %s") % \ (options.proxy) puerto = int(port) asdf = pluginsWordPress(options.path, options.proxy) else: asdf = pluginsWordPress(options.path, options.proxy) asdf.questions() if options.malwares is not None: malwareScanWordPress(options.path).scan() if options.remove is not None: removeWordPress(options.path).delete() context['remove'] = True if options.minify is not None: minifyWordPress(options.path, options.verbose).minify() if options.sixg is not None: sixgWordPress(options.path, options.verbose).createFirewall() context['sixg'] = True if options.api is not None: restApiWordPress(options.path).disableRestApi() context['api'] = True # output jinja2 with open(fname, 'w') as f: html = render_template('index.html.tmpl', context) f.write(html) else: log.add("Could not find the specified directory.") print colored('\nCould not find the specified directory.\n', 'red')