def __init__(self, reply): # save shortcuts to URL details self.url = reply.url() self.host = self.url.host() self.path = self.url.path() self.qs = self.url.queryItems() self.data = reply.data self.content_type = reply.content_type self.content = common.to_unicode(str(reply.content)) try: self.parsed_content = parser.parse(self.content, self.content_type) except ValueError as e: print 'Error parsing URL with lxml: {}'.format(self.url.toString()) self.parsed_content = None self.columns = None self.cookies = QNetworkCookie.parseCookies(reply.rawHeader('Set-Cookie')) # map of Qt verbs verbs = { QNetworkAccessManager.HeadOperation: 'HEAD', QNetworkAccessManager.GetOperation: 'GET', QNetworkAccessManager.PutOperation: 'PUT', QNetworkAccessManager.PostOperation: 'POST', QNetworkAccessManager.DeleteOperation: 'DELETE', QNetworkAccessManager.CustomOperation: 'CUSTOM', } self.verb = verbs[reply.operation()] # save request details request = reply.orig_request self.request_headers = [(header, request.rawHeader(header)) for header in request.rawHeaderList()] self.response_headers = [(header, request.rawHeader(header)) for header in reply.rawHeaderList()]
def loadCookies(self, cookies): """ Public method to restore the saved cookies. @param cookies byte array containing the saved cookies (QByteArray) @return list of cookies """ if cookies.isEmpty(): return [] cookiesList = [] data = QByteArray(cookies) stream = QDataStream(data, QIODevice.ReadOnly) version = stream.readUInt16() if version != self.JAR_VERSION: return [] noCookies = stream.readUInt32() rawCookie = QByteArray() while not stream.atEnd(): stream >> rawCookie newCookies = QNetworkCookie.parseCookies(rawCookie) for newCookie in newCookies: cookiesList.append(newCookie) return cookiesList
def __init__(self, url, app): global _cookieJar, cookie self._url = url self._app = app QWebPage.__init__(self) self.networkAccessManager().setCookieJar(_cookieJar) _cookieJar.setCookiesFromUrl(QNetworkCookie.parseCookies(_cookie), QUrl(url)) self.bind()
def load(self): cookieFile = self._cookiesFilePath() if os.path.exists(cookieFile): if os.stat(cookieFile).st_size: self.setAllCookies( QNetworkCookie.parseCookies( QByteArray(open(cookieFile).read()))) else: self.setAllCookies([])
def import_raw_cookie_list(self, raw_cookie_list): cookieJar = self.get_global_cookie_jar() # merge cookies cookie_list = cookieJar.allCookies() for raw_cookie in raw_cookie_list: cookies = QNetworkCookie.parseCookies(raw_cookie) for cookie in cookies: if cookie not in cookie_list: cookie_list.append(cookie) cookieJar.setAllCookies(cookie_list) self.signal_cookie_jar_updated()
def _parse_cookie_attribs_into_QtCookies_list(self, cookie_attrs, default_domain): #ugly, but works around bugs in parseCookies cookies = [] for cookie_attr in cookie_attrs: # parsing every attribute on its own because parser seems to be <censored>! tmp_cookie_list = QNetworkCookie.parseCookies(cookie_attr) if tmp_cookie_list: tmp_cookie = tmp_cookie_list[0] if not tmp_cookie.domain(): tmp_cookie.setDomain(QString(default_domain)) cookies.append(tmp_cookie) return cookies
def load_cookies_from_file(self, filename): cookieList = [] if filename.endswith('.xml.bz2'): source = bz2.BZ2File(filename, 'r') elif filename.endswith('.xml.xz'): source = lzma.LZMAFile(filename, 'r') elif filename.endswith('.xml'): source = open(filename, 'rb') else: raise Exception('unhandled file type: %s' % (filename)) # http://effbot.org/zone/element-iterparse.htm#incremental-parsing context = etree.iterparse(source, events=('start', 'end'), huge_tree=True) iterator = iter(context) root = None in_cookies = False in_cookie = False while True: try: event, elem = next(iterator) tag = elem.tag if not in_cookies and 'cookies' == tag and 'start' == event: in_cookies = True elif in_cookies and 'cookie' == tag and 'start' == event: in_cookie = True elif in_cookie and 'raw' == tag and 'end' == event: value = str(elem.text) encoding = None if 'encoding' in elem.attrib: encoding = str(elem.attrib['encoding']) if 'base64' == encoding: value = base64.b64decode(value) cookies = QNetworkCookie.parseCookies(value) for cookie in cookies: cookieList.append(cookie) elif 'cookies' == tag and 'end' == event: in_cookies = False elif in_cookie and 'cookie' == tag and 'end' == event: in_cookie = False except StopIteration: source.close() break return cookieList
def _createCookieJarfromInjectedResponse(self, default_domain): #ugly, but works around bugs in parseCookies cookies = [] for cookie_header in self._getCookieHeaders(): tmp_cookieList = QNetworkCookie.parseCookies(cookie_header) tmp_cookie = tmp_cookieList[0] if not tmp_cookie.domain(): tmp_cookie.setDomain(QString(default_domain)) cookies = cookies + tmp_cookieList cj = QNetworkCookieJar() cj.setAllCookies(cookies) return cj
def __init__(self, url, app): global _cookieJar, cookie self._url = url self._app = app QWebPage.__init__(self) self.networkAccessManager().setCookieJar(_cookieJar) _cookieJar.setCookiesFromUrl(QNetworkCookie.parseCookies(_cookie), QUrl(url)) self.bind() self._app.exec_() self.user_agent_for_url = user_agent self.js_alert = alert self.js_prompt = alert self.js_confirm = alert self.js_console_message = console_message
def __init__(self, url, app): global _cookieJar, cookie self._url = url self._app = app QWebPage.__init__(self) self.networkAccessManager().setCookieJar(_cookieJar) _cookieJar.setCookiesFromUrl(QNetworkCookie.parseCookies(_cookie), QUrl(url)) self.bind() self._app.exec_() self.user_agent_for_url = user_agent self.js_alert = alert self.js_prompt = alert self.js_confirm = alert self.js_console_message = console_message
def load_cookies_from_file(self, filename): cookieList = [] if filename.endswith('.xml.bz2'): source = bz2.BZ2File(filename, 'r') elif filename.endswith('.xml.xz'): source = lzma.LZMAFile(filename, 'r') elif filename.endswith('.xml'): source = open(filename, 'rb') else: raise Exception('unhandled file type: %s' % (filename)) # http://effbot.org/zone/element-iterparse.htm#incremental-parsing context = etree.iterparse(source, events=('start', 'end'), huge_tree = True) iterator = iter(context) root = None in_cookies = False in_cookie = False while True: try: event, elem = next(iterator) tag = elem.tag if not in_cookies and 'cookies' == tag and 'start' == event: in_cookies = True elif in_cookies and 'cookie' == tag and 'start' == event: in_cookie = True elif in_cookie and 'raw' == tag and 'end' == event: value = str(elem.text) encoding = None if 'encoding' in elem.attrib: encoding = str(elem.attrib['encoding']) if 'base64' == encoding: value = base64.b64decode(value) cookies = QNetworkCookie.parseCookies(value) for cookie in cookies: cookieList.append(cookie) elif 'cookies' == tag and 'end' == event: in_cookies = False elif in_cookie and 'cookie' == tag and 'end' == event: in_cookie = False except StopIteration: source.close() break return cookieList
def __init__(self, parent=None, allowed=None, storage=None): """ Load cookies from a file """ super(CookieJar, self).__init__(parent) print "INIT CookieJar" if not allowed: self.allowed = [] else: self.allowed = allowed if storage: try: with open(storage,"r") as readfile: cookies = [QNetworkCookie.parseCookies(k) for k in readfile.readlines()] cookies = [x for y in cookies for x in y] # flatten self.setAllCookies(cookies) except IOError: print "LOAD COOKIES: empty?"
def __init__(self, parent=None, allowed=None, storage=None): """ Load cookies from a file """ super(CookieJar, self).__init__(parent) print "INIT CookieJar" if not allowed: self.allowed = [] else: self.allowed = allowed if storage: try: with open(storage, "r") as readfile: cookies = [ QNetworkCookie.parseCookies(k) for k in readfile.readlines() ] cookies = [x for y in cookies for x in y] # flatten self.setAllCookies(cookies) except IOError: print "LOAD COOKIES: empty?"
def doUpdateSiteMap(self): if not self.qlock.tryLock(): return try: if self.fillAll: self.fillAll = False self.treeViewModel.clearModel() self.lastId = 0 rows = self.Data.get_sitemap_info(self.cursor, self.lastId) global_cookie_jar = self.framework.get_global_cookie_jar() count = 0 for row in rows: count += 1 if 0 == (count % 100): self.yieldCurrentThread() rowItems = [m or '' for m in list(row)] Id = str(rowItems[0]) try: self.lastId = int(Id) except ValueError: pass # XXX: review all for bytes usage if isinstance(rowItems[1], bytes): url = str(rowItems[1], 'utf-8', 'ignore') else: url = str(rowItems[1]) status = str(rowItems[2]) response_headers = str(rowItems[3]) request_headers = str(rowItems[4]) # TODO: make configurable if status in ('400', '404', '500', '501'): continue # TODO: m = self.re_set_cookie.search(response_headers) if m: setCookies = m.group(1) cookieList = QNetworkCookie.parseCookies(setCookies) global_cookie_jar.setCookiesFromUrl( cookieList, QUrl.fromEncoded(url)) parsed = urlparse.urlsplit(url) hostname = '' if not parsed.hostname: m = self.re_host_name.search(request_headers) if m: hostname = m.group(1).rstrip() else: hostname = parsed.hostname hostname = hostname.lower() hostloc = urlparse.urlunsplit( (parsed.scheme, parsed.netloc, '/', '', '')) rootNode = self.treeViewModel.findOrAddNode(hostname) hostLocNode = rootNode.findOrAddNode(self.treeViewModel, hostloc) pathval = parsed.path # add directories parentNode = hostLocNode parentNode.setResponseId(None, hostloc) lastSlash = 0 slash = 0 while True: slash = pathval.find('/', slash + 1) if slash < 0: break dirname = pathval[lastSlash + 1:slash + 1] parentNode = parentNode.findOrAddNode( self.treeViewModel, dirname) parentNode.setResponseId( None, urlparse.urlunsplit((parsed.scheme, parsed.netloc, pathval[0:slash + 1], '', ''))) lastSlash = slash # add file element if lastSlash + 1 < len(pathval): filename = pathval[lastSlash + 1:] parentNode = parentNode.findOrAddNode( self.treeViewModel, filename) parentNode.setResponseId( None, urlparse.urlunsplit( (parsed.scheme, parsed.netloc, pathval, '', ''))) # add query if parsed.query: parentNode = parentNode.findOrAddNode( self.treeViewModel, '?' + parsed.query) # store the latest Id # TODO: should determine best candidate to display parentNode.setResponseId(Id, url) finally: self.qlock.unlock()
def doUpdateSiteMap(self): if not self.qlock.tryLock(): return try: if self.fillAll: self.fillAll = False self.treeViewModel.clearModel() self.lastId = 0 rows = self.Data.get_sitemap_info(self.cursor, self.lastId) global_cookie_jar = self.framework.get_global_cookie_jar() count = 0 for row in rows: count += 1 if 0 == (count % 100): self.yieldCurrentThread() rowItems = [m or "" for m in list(row)] Id = str(rowItems[0]) try: self.lastId = int(Id) except ValueError: pass # XXX: review all for bytes usage if isinstance(rowItems[1], bytes): url = str(rowItems[1], "utf-8", "ignore") else: url = str(rowItems[1]) status = str(rowItems[2]) response_headers = str(rowItems[3]) request_headers = str(rowItems[4]) # TODO: make configurable if status in ("400", "404", "500", "501"): continue # TODO: m = self.re_set_cookie.search(response_headers) if m: setCookies = m.group(1) cookieList = QNetworkCookie.parseCookies(setCookies) global_cookie_jar.setCookiesFromUrl(cookieList, QUrl.fromEncoded(url)) parsed = urlparse.urlsplit(url) hostname = "" if not parsed.hostname: m = self.re_host_name.search(request_headers) if m: hostname = m.group(1).rstrip() else: hostname = parsed.hostname hostname = hostname.lower() hostloc = urlparse.urlunsplit((parsed.scheme, parsed.netloc, "/", "", "")) rootNode = self.treeViewModel.findOrAddNode(hostname) hostLocNode = rootNode.findOrAddNode(self.treeViewModel, hostloc) pathval = parsed.path # add directories parentNode = hostLocNode parentNode.setResponseId(None, hostloc) lastSlash = 0 slash = 0 while True: slash = pathval.find("/", slash + 1) if slash < 0: break dirname = pathval[lastSlash + 1 : slash + 1] parentNode = parentNode.findOrAddNode(self.treeViewModel, dirname) parentNode.setResponseId( None, urlparse.urlunsplit((parsed.scheme, parsed.netloc, pathval[0 : slash + 1], "", "")) ) lastSlash = slash # add file element if lastSlash + 1 < len(pathval): filename = pathval[lastSlash + 1 :] parentNode = parentNode.findOrAddNode(self.treeViewModel, filename) parentNode.setResponseId(None, urlparse.urlunsplit((parsed.scheme, parsed.netloc, pathval, "", ""))) # add query if parsed.query: parentNode = parentNode.findOrAddNode(self.treeViewModel, "?" + parsed.query) # store the latest Id # TODO: should determine best candidate to display parentNode.setResponseId(Id, url) finally: self.qlock.unlock()