Пример #1
0
 def __init__(self, reply):
     # save shortcuts to URL details
     self.url = reply.url()
     self.host = self.url.host()
     self.path = self.url.path()
     self.qs = self.url.queryItems()
     self.data = reply.data
     self.content_type = reply.content_type
     self.content = common.to_unicode(str(reply.content))
     try:
         self.parsed_content = parser.parse(self.content, self.content_type)
     except ValueError as e:
         print 'Error parsing URL with lxml: {}'.format(self.url.toString())
         self.parsed_content = None
     self.columns = None
     self.cookies = QNetworkCookie.parseCookies(reply.rawHeader('Set-Cookie'))
     # map of Qt verbs
     verbs = {
         QNetworkAccessManager.HeadOperation: 'HEAD',
         QNetworkAccessManager.GetOperation: 'GET',
         QNetworkAccessManager.PutOperation: 'PUT',
         QNetworkAccessManager.PostOperation: 'POST',
         QNetworkAccessManager.DeleteOperation: 'DELETE',
         QNetworkAccessManager.CustomOperation: 'CUSTOM',
     }
     self.verb = verbs[reply.operation()]
     # save request details
     request = reply.orig_request
     self.request_headers = [(header, request.rawHeader(header)) for header in request.rawHeaderList()]
     self.response_headers = [(header, request.rawHeader(header)) for header in reply.rawHeaderList()]
Пример #2
0
 def loadCookies(self, cookies):
     """
     Public method to restore the saved cookies.
     
     @param cookies byte array containing the saved cookies (QByteArray)
     @return list of cookies
     """
     if cookies.isEmpty():
         return []
     
     cookiesList = []
     data = QByteArray(cookies)
     stream = QDataStream(data, QIODevice.ReadOnly)
     
     version = stream.readUInt16()
     if version != self.JAR_VERSION:
         return []
     
     noCookies = stream.readUInt32()
     
     rawCookie = QByteArray()
     while not stream.atEnd():
         stream >> rawCookie
         newCookies = QNetworkCookie.parseCookies(rawCookie)
         for newCookie in newCookies:
             cookiesList.append(newCookie)
     
     return cookiesList
Пример #3
0
	def __init__(self, url, app):
		global _cookieJar, cookie
		self._url = url
		self._app = app
		QWebPage.__init__(self)
		self.networkAccessManager().setCookieJar(_cookieJar)
		_cookieJar.setCookiesFromUrl(QNetworkCookie.parseCookies(_cookie), QUrl(url))
		self.bind()
Пример #4
0
    def load(self):
        cookieFile = self._cookiesFilePath()

        if os.path.exists(cookieFile):
            if os.stat(cookieFile).st_size:
                self.setAllCookies(
                    QNetworkCookie.parseCookies(
                        QByteArray(open(cookieFile).read())))
        else:
            self.setAllCookies([])
Пример #5
0
    def import_raw_cookie_list(self, raw_cookie_list):
        cookieJar = self.get_global_cookie_jar()
        # merge cookies
        cookie_list = cookieJar.allCookies()
        for raw_cookie in raw_cookie_list:
            cookies = QNetworkCookie.parseCookies(raw_cookie)
            for cookie in cookies:
                if cookie not in cookie_list:
                    cookie_list.append(cookie)

        cookieJar.setAllCookies(cookie_list)
        self.signal_cookie_jar_updated()
	def _parse_cookie_attribs_into_QtCookies_list(self, cookie_attrs, default_domain):
		#ugly, but works around bugs in parseCookies
		cookies = []

		for cookie_attr in cookie_attrs:
			# parsing every attribute on its own because parser seems to be <censored>!
			tmp_cookie_list = QNetworkCookie.parseCookies(cookie_attr)
			if tmp_cookie_list:
				tmp_cookie = tmp_cookie_list[0]
				if not tmp_cookie.domain():
					tmp_cookie.setDomain(QString(default_domain))
				cookies.append(tmp_cookie)

		return cookies
Пример #7
0
    def load_cookies_from_file(self, filename):
        cookieList = []

        if filename.endswith('.xml.bz2'):
            source = bz2.BZ2File(filename, 'r')
        elif filename.endswith('.xml.xz'):
            source = lzma.LZMAFile(filename, 'r')
        elif filename.endswith('.xml'):
            source = open(filename, 'rb')
        else:
            raise Exception('unhandled file type: %s' % (filename))

        # http://effbot.org/zone/element-iterparse.htm#incremental-parsing
        context = etree.iterparse(source,
                                  events=('start', 'end'),
                                  huge_tree=True)
        iterator = iter(context)
        root = None

        in_cookies = False
        in_cookie = False
        while True:
            try:
                event, elem = next(iterator)
                tag = elem.tag
                if not in_cookies and 'cookies' == tag and 'start' == event:
                    in_cookies = True
                elif in_cookies and 'cookie' == tag and 'start' == event:
                    in_cookie = True
                elif in_cookie and 'raw' == tag and 'end' == event:
                    value = str(elem.text)
                    encoding = None
                    if 'encoding' in elem.attrib:
                        encoding = str(elem.attrib['encoding'])
                    if 'base64' == encoding:
                        value = base64.b64decode(value)
                    cookies = QNetworkCookie.parseCookies(value)
                    for cookie in cookies:
                        cookieList.append(cookie)
                elif 'cookies' == tag and 'end' == event:
                    in_cookies = False
                elif in_cookie and 'cookie' == tag and 'end' == event:
                    in_cookie = False

            except StopIteration:
                source.close()
                break

        return cookieList
	def _createCookieJarfromInjectedResponse(self, default_domain):
		#ugly, but works around bugs in parseCookies
		cookies = []

		for cookie_header in self._getCookieHeaders():
			tmp_cookieList = QNetworkCookie.parseCookies(cookie_header)
			tmp_cookie = tmp_cookieList[0]
			if not tmp_cookie.domain():
				tmp_cookie.setDomain(QString(default_domain))

			cookies = cookies + tmp_cookieList

		cj = QNetworkCookieJar()
		cj.setAllCookies(cookies)
		return cj
Пример #9
0
    def __init__(self, url, app):
        global _cookieJar, cookie
        self._url = url
        self._app = app
        QWebPage.__init__(self)
        self.networkAccessManager().setCookieJar(_cookieJar)
        _cookieJar.setCookiesFromUrl(QNetworkCookie.parseCookies(_cookie), QUrl(url))
        self.bind()
        self._app.exec_()

        self.user_agent_for_url = user_agent
        self.js_alert = alert
        self.js_prompt = alert
        self.js_confirm = alert
        self.js_console_message = console_message
Пример #10
0
    def __init__(self, url, app):
        global _cookieJar, cookie
        self._url = url
        self._app = app
        QWebPage.__init__(self)
        self.networkAccessManager().setCookieJar(_cookieJar)
        _cookieJar.setCookiesFromUrl(QNetworkCookie.parseCookies(_cookie),
                                     QUrl(url))
        self.bind()
        self._app.exec_()

        self.user_agent_for_url = user_agent
        self.js_alert = alert
        self.js_prompt = alert
        self.js_confirm = alert
        self.js_console_message = console_message
Пример #11
0
    def load_cookies_from_file(self, filename):
        cookieList = []

        if filename.endswith('.xml.bz2'):
            source = bz2.BZ2File(filename, 'r')
        elif filename.endswith('.xml.xz'):
            source = lzma.LZMAFile(filename, 'r')
        elif filename.endswith('.xml'):
            source = open(filename, 'rb')
        else:
            raise Exception('unhandled file type: %s' % (filename))

        # http://effbot.org/zone/element-iterparse.htm#incremental-parsing
        context = etree.iterparse(source, events=('start', 'end'), huge_tree = True)
        iterator = iter(context)
        root = None

        in_cookies = False
        in_cookie = False
        while True:
            try:
                event, elem = next(iterator)
                tag = elem.tag
                if not in_cookies and 'cookies' == tag and 'start' == event:
                    in_cookies = True
                elif in_cookies and 'cookie' == tag and 'start' == event:
                    in_cookie = True
                elif in_cookie and 'raw' == tag and 'end' == event:
                    value = str(elem.text)
                    encoding = None
                    if 'encoding' in elem.attrib:
                        encoding = str(elem.attrib['encoding'])
                    if 'base64' == encoding:
                        value = base64.b64decode(value)
                    cookies = QNetworkCookie.parseCookies(value)
                    for cookie in cookies:
                        cookieList.append(cookie)
                elif 'cookies' == tag and 'end' == event:
                    in_cookies = False
                elif in_cookie and 'cookie' == tag and 'end' == event:
                    in_cookie = False

            except StopIteration:
                source.close()
                break

        return cookieList
Пример #12
0
    def __init__(self, parent=None, allowed=None, storage=None):
        """ Load cookies from a file

        """
        super(CookieJar, self).__init__(parent)
        print "INIT CookieJar"
        if not allowed:
            self.allowed = []
        else:
            self.allowed = allowed
        if storage:
            try:
                with open(storage,"r") as readfile:
                    cookies = [QNetworkCookie.parseCookies(k)
                            for k in readfile.readlines()]
                    cookies = [x for y in cookies for x in y] # flatten
                    self.setAllCookies(cookies)
            except IOError:
                print "LOAD COOKIES: empty?"
Пример #13
0
    def __init__(self, parent=None, allowed=None, storage=None):
        """ Load cookies from a file

        """
        super(CookieJar, self).__init__(parent)
        print "INIT CookieJar"
        if not allowed:
            self.allowed = []
        else:
            self.allowed = allowed
        if storage:
            try:
                with open(storage, "r") as readfile:
                    cookies = [
                        QNetworkCookie.parseCookies(k)
                        for k in readfile.readlines()
                    ]
                    cookies = [x for y in cookies for x in y]  # flatten
                    self.setAllCookies(cookies)
            except IOError:
                print "LOAD COOKIES: empty?"
Пример #14
0
    def doUpdateSiteMap(self):
        if not self.qlock.tryLock():
            return
        try:

            if self.fillAll:
                self.fillAll = False
                self.treeViewModel.clearModel()
                self.lastId = 0

            rows = self.Data.get_sitemap_info(self.cursor, self.lastId)

            global_cookie_jar = self.framework.get_global_cookie_jar()

            count = 0
            for row in rows:
                count += 1
                if 0 == (count % 100):
                    self.yieldCurrentThread()

                rowItems = [m or '' for m in list(row)]

                Id = str(rowItems[0])
                try:
                    self.lastId = int(Id)
                except ValueError:
                    pass

                # XXX: review all for bytes usage
                if isinstance(rowItems[1], bytes):
                    url = str(rowItems[1], 'utf-8', 'ignore')
                else:
                    url = str(rowItems[1])
                status = str(rowItems[2])
                response_headers = str(rowItems[3])
                request_headers = str(rowItems[4])
                # TODO: make configurable
                if status in ('400', '404', '500', '501'):
                    continue

                # TODO:
                m = self.re_set_cookie.search(response_headers)
                if m:
                    setCookies = m.group(1)
                    cookieList = QNetworkCookie.parseCookies(setCookies)
                    global_cookie_jar.setCookiesFromUrl(
                        cookieList, QUrl.fromEncoded(url))

                parsed = urlparse.urlsplit(url)
                hostname = ''
                if not parsed.hostname:
                    m = self.re_host_name.search(request_headers)
                    if m:
                        hostname = m.group(1).rstrip()
                else:
                    hostname = parsed.hostname

                hostname = hostname.lower()
                hostloc = urlparse.urlunsplit(
                    (parsed.scheme, parsed.netloc, '/', '', ''))

                rootNode = self.treeViewModel.findOrAddNode(hostname)
                hostLocNode = rootNode.findOrAddNode(self.treeViewModel,
                                                     hostloc)
                pathval = parsed.path

                # add directories
                parentNode = hostLocNode
                parentNode.setResponseId(None, hostloc)
                lastSlash = 0
                slash = 0
                while True:
                    slash = pathval.find('/', slash + 1)
                    if slash < 0:
                        break
                    dirname = pathval[lastSlash + 1:slash + 1]
                    parentNode = parentNode.findOrAddNode(
                        self.treeViewModel, dirname)
                    parentNode.setResponseId(
                        None,
                        urlparse.urlunsplit((parsed.scheme, parsed.netloc,
                                             pathval[0:slash + 1], '', '')))
                    lastSlash = slash

                # add file element
                if lastSlash + 1 < len(pathval):
                    filename = pathval[lastSlash + 1:]
                    parentNode = parentNode.findOrAddNode(
                        self.treeViewModel, filename)
                    parentNode.setResponseId(
                        None,
                        urlparse.urlunsplit(
                            (parsed.scheme, parsed.netloc, pathval, '', '')))

                # add query
                if parsed.query:
                    parentNode = parentNode.findOrAddNode(
                        self.treeViewModel, '?' + parsed.query)

                # store the latest Id
                # TODO: should determine best candidate to display
                parentNode.setResponseId(Id, url)

        finally:
            self.qlock.unlock()
Пример #15
0
    def doUpdateSiteMap(self):
        if not self.qlock.tryLock():
            return
        try:

            if self.fillAll:
                self.fillAll = False
                self.treeViewModel.clearModel()
                self.lastId = 0

            rows = self.Data.get_sitemap_info(self.cursor, self.lastId)

            global_cookie_jar = self.framework.get_global_cookie_jar()

            count = 0
            for row in rows:
                count += 1
                if 0 == (count % 100):
                    self.yieldCurrentThread()

                rowItems = [m or "" for m in list(row)]

                Id = str(rowItems[0])
                try:
                    self.lastId = int(Id)
                except ValueError:
                    pass

                # XXX: review all for bytes usage
                if isinstance(rowItems[1], bytes):
                    url = str(rowItems[1], "utf-8", "ignore")
                else:
                    url = str(rowItems[1])
                status = str(rowItems[2])
                response_headers = str(rowItems[3])
                request_headers = str(rowItems[4])
                # TODO: make configurable
                if status in ("400", "404", "500", "501"):
                    continue

                # TODO:
                m = self.re_set_cookie.search(response_headers)
                if m:
                    setCookies = m.group(1)
                    cookieList = QNetworkCookie.parseCookies(setCookies)
                    global_cookie_jar.setCookiesFromUrl(cookieList, QUrl.fromEncoded(url))

                parsed = urlparse.urlsplit(url)
                hostname = ""
                if not parsed.hostname:
                    m = self.re_host_name.search(request_headers)
                    if m:
                        hostname = m.group(1).rstrip()
                else:
                    hostname = parsed.hostname

                hostname = hostname.lower()
                hostloc = urlparse.urlunsplit((parsed.scheme, parsed.netloc, "/", "", ""))

                rootNode = self.treeViewModel.findOrAddNode(hostname)
                hostLocNode = rootNode.findOrAddNode(self.treeViewModel, hostloc)
                pathval = parsed.path

                # add directories
                parentNode = hostLocNode
                parentNode.setResponseId(None, hostloc)
                lastSlash = 0
                slash = 0
                while True:
                    slash = pathval.find("/", slash + 1)
                    if slash < 0:
                        break
                    dirname = pathval[lastSlash + 1 : slash + 1]
                    parentNode = parentNode.findOrAddNode(self.treeViewModel, dirname)
                    parentNode.setResponseId(
                        None, urlparse.urlunsplit((parsed.scheme, parsed.netloc, pathval[0 : slash + 1], "", ""))
                    )
                    lastSlash = slash

                # add file element
                if lastSlash + 1 < len(pathval):
                    filename = pathval[lastSlash + 1 :]
                    parentNode = parentNode.findOrAddNode(self.treeViewModel, filename)
                    parentNode.setResponseId(None, urlparse.urlunsplit((parsed.scheme, parsed.netloc, pathval, "", "")))

                # add query
                if parsed.query:
                    parentNode = parentNode.findOrAddNode(self.treeViewModel, "?" + parsed.query)

                # store the latest Id
                # TODO: should determine best candidate to display
                parentNode.setResponseId(Id, url)

        finally:
            self.qlock.unlock()