class CookieTransport(xmlrpclib.Transport): '''A subclass of xmlrpclib.Transport that supports cookies.''' cookiejar = None scheme = 'http' # Cribbed from xmlrpclib.Transport.send_user_agent def send_cookies(self, connection, cookie_request): if self.cookiejar is None: self.cookiejar = CookieJar() elif self.cookiejar: # Let the cookiejar figure out what cookies are appropriate self.cookiejar.add_cookie_header(cookie_request) # Pull the cookie headers out of the request object... cookielist=list() for h,v in cookie_request.header_items(): if h.startswith('Cookie'): cookielist.append([h,v]) # ...and put them over the connection for h,v in cookielist: connection.putheader(h,v) # This is just python 2.7's xmlrpclib.Transport.single_request, with # send additions noted below to send cookies along with the request def single_request_with_cookies(self, host, handler, request_body, verbose=0): # ADDED: construct the URL and Request object for proper cookie handling request_url = "%s://%s%s" % (self.scheme,host,handler) #log.debug("request_url is %s" % request_url) cookie_request = urllib2.Request(request_url) try: if six.PY2: h = self.make_connection(host) if verbose: h.set_debuglevel(1) self.send_request(h, handler, request_body) self.send_host(h, host) self.send_cookies(h, cookie_request) # ADDED. creates cookiejar if None. self.send_user_agent(h) self.send_content(h, request_body) else: # Python 3 xmlrpc.client.Transport makes its own connection h = self.send_request(host, handler, request_body, verbose) response = h.getresponse() # ADDED: parse headers and get cookies here cookie_response = CookieResponse(response.msg) # Okay, extract the cookies from the headers self.cookiejar.extract_cookies(cookie_response,cookie_request) #log.debug("cookiejar now contains: %s" % self.cookiejar._cookies) # And write back any changes if hasattr(self.cookiejar,'save'): try: self.cookiejar.save(self.cookiejar.filename) except Exception as e: raise #log.error("Couldn't write cookiefile %s: %s" % \ # (self.cookiejar.filename,str(e))) if response.status == 200: self.verbose = verbose return self.parse_response(response) if (response.getheader("content-length", 0)): response.read() raise xmlrpclib.ProtocolError( host + handler, response.status, response.reason, response.msg, ) except xmlrpclib.Fault: raise finally: try: h.close() except NameError: # h not initialized yet pass # Override the appropriate request method single_request = single_request_with_cookies # python 2.7+
class CookieManager(object): """ Each Grab instance has `cookies` attribute that is instance of `CookieManager` class. That class contains helpful methods to create, load, save cookies from/to different places. """ __slots__ = ('cookiejar',) def __init__(self, cookiejar=None): if cookiejar is not None: self.cookiejar = cookiejar else: self.cookiejar = CookieJar() # self.disable_cookiejar_lock(self.cookiejar) # def disable_cookiejar_lock(self, cj): # cj._cookies_lock = dummy_threading.RLock() def set(self, name, value, domain, **kwargs): """Add new cookie or replace existing cookie with same parameters. :param name: name of cookie :param value: value of cookie :param kwargs: extra attributes of cookie """ if domain == 'localhost': domain = '' self.cookiejar.set_cookie(create_cookie(name, value, domain, **kwargs)) def update(self, cookies): if isinstance(cookies, CookieJar): for cookie in cookies: self.cookiejar.set_cookie(cookie) elif isinstance(cookies, CookieManager): for cookie in cookies.cookiejar: self.cookiejar.set_cookie(cookie) else: raise GrabMisuseError('Unknown type of cookies argument: %s' % type(cookies)) @classmethod def from_cookie_list(cls, clist): jar = CookieJar() for cookie in clist: jar.set_cookie(cookie) return cls(jar) def clear(self): self.cookiejar = CookieJar() def __getstate__(self): state = {} for cls in type(self).mro(): cls_slots = getattr(cls, '__slots__', ()) for slot in cls_slots: if slot != '__weakref__': if hasattr(self, slot): state[slot] = getattr(self, slot) state['_cookiejar_cookies'] = list(self.cookiejar) del state['cookiejar'] return state def __setstate__(self, state): state['cookiejar'] = CookieJar() for cookie in state['_cookiejar_cookies']: state['cookiejar'].set_cookie(cookie) del state['_cookiejar_cookies'] for slot, value in state.items(): setattr(self, slot, value) def __getitem__(self, key): for cookie in self.cookiejar: if cookie.name == key: return cookie.value raise KeyError def items(self): res = [] for cookie in self.cookiejar: res.append((cookie.name, cookie.value)) return res def load_from_file(self, path): """ Load cookies from the file. Content of file should be a JSON-serialized list of dicts. """ with open(path) as inf: data = inf.read() if data: items = json.loads(data) else: items = {} for item in items: extra = dict((x, y) for x, y in item.items() if x not in ['name', 'value', 'domain']) self.set(item['name'], item['value'], item['domain'], **extra) def get_dict(self): res = [] for cookie in self.cookiejar: res.append(dict((x, getattr(cookie, x)) for x in COOKIE_ATTRS)) return res def save_to_file(self, path): """ Dump all cookies to file. Cookies are dumped as JSON-serialized dict of keys and values. """ with open(path, 'w') as out: out.write(json.dumps(self.get_dict())) def get_cookie_header(self, req): """ :param req: object with httplib.Request interface Actually, it have to have `url` and `headers` attributes """ mocked_req = MockRequest(req) self.cookiejar.add_cookie_header(mocked_req) return mocked_req.get_new_headers().get('Cookie')
class CookieManager(object): """ Each Grab instance has `cookies` attribute that is instance of `CookieManager` class. That class contains helpful methods to create, load, save cookies from/to different places. """ __slots__ = ('cookiejar', ) def __init__(self, cookiejar=None): if cookiejar is not None: self.cookiejar = cookiejar else: self.cookiejar = CookieJar() # self.disable_cookiejar_lock(self.cookiejar) # def disable_cookiejar_lock(self, cj): # cj._cookies_lock = dummy_threading.RLock() def set(self, name, value, domain, **kwargs): """Add new cookie or replace existing cookie with same parameters. :param name: name of cookie :param value: value of cookie :param kwargs: extra attributes of cookie """ if domain == 'localhost': domain = '' self.cookiejar.set_cookie(create_cookie(name, value, domain, **kwargs)) def update(self, cookies): if isinstance(cookies, CookieJar): for cookie in cookies: self.cookiejar.set_cookie(cookie) elif isinstance(cookies, CookieManager): for cookie in cookies.cookiejar: self.cookiejar.set_cookie(cookie) else: raise GrabMisuseError('Unknown type of cookies argument: %s' % type(cookies)) @classmethod def from_cookie_list(cls, clist): jar = CookieJar() for cookie in clist: jar.set_cookie(cookie) return cls(jar) def clear(self): self.cookiejar = CookieJar() def __getstate__(self): state = {} for cls in type(self).mro(): cls_slots = getattr(cls, '__slots__', ()) for slot in cls_slots: if slot != '__weakref__': if hasattr(self, slot): state[slot] = getattr(self, slot) state['_cookiejar_cookies'] = list(self.cookiejar) del state['cookiejar'] return state def __setstate__(self, state): state['cookiejar'] = CookieJar() for cookie in state['_cookiejar_cookies']: state['cookiejar'].set_cookie(cookie) del state['_cookiejar_cookies'] for slot, value in state.items(): setattr(self, slot, value) def __getitem__(self, key): for cookie in self.cookiejar: if cookie.name == key: return cookie.value raise KeyError def items(self): res = [] for cookie in self.cookiejar: res.append((cookie.name, cookie.value)) return res def load_from_file(self, path): """ Load cookies from the file. Content of file should be a JSON-serialized list of dicts. """ with open(path) as inf: data = inf.read() if data: items = json.loads(data) else: items = {} for item in items: extra = dict((x, y) for x, y in item.items() if x not in ['name', 'value', 'domain']) self.set(item['name'], item['value'], item['domain'], **extra) def get_dict(self): res = [] for cookie in self.cookiejar: res.append(dict((x, getattr(cookie, x)) for x in COOKIE_ATTRS)) return res def save_to_file(self, path): """ Dump all cookies to file. Cookies are dumped as JSON-serialized dict of keys and values. """ with open(path, 'w') as out: out.write(json.dumps(self.get_dict())) def get_cookie_header(self, req): """ :param req: object with httplib.Request interface Actually, it have to have `url` and `headers` attributes """ mocked_req = MockRequest(req) self.cookiejar.add_cookie_header(mocked_req) return mocked_req.get_new_headers().get('Cookie')
class HttpTransport(Transport): """ HTTP transport using urllib2. Provided basic http transport that provides for cookies, proxies but no authentication. """ def __init__(self, **kwargs): """ @param kwargs: Keyword arguments. - B{proxy} - An http proxy to be specified on requests. The proxy is defined as {protocol:proxy,} - type: I{dict} - default: {} - B{timeout} - Set the url open timeout (seconds). - type: I{float} - default: 90 """ Transport.__init__(self) Unskin(self.options).update(kwargs) self.cookiejar = CookieJar() self.proxy = {} self.urlopener = None def open(self, request): try: url = request.url log.debug('opening (%s)', url) u2request = urllib.request.Request(url) self.proxy = self.options.proxy return self.u2open(u2request) except urllib.error.HTTPError as e: raise TransportError(str(e), e.code, e.fp) def send(self, request): result = None url = request.url msg = request.message headers = request.headers try: u2request = urllib.request.Request(url, msg, headers) self.addcookies(u2request) self.proxy = self.options.proxy request.headers.update(u2request.headers) log.debug('sending:\n%s', request) fp = self.u2open(u2request) self.getcookies(fp, u2request) result = Reply(200, fp.headers.__dict__, fp.read()) log.debug('received:\n%s', result) except urllib.error.HTTPError as e: if e.code in (202, 204): result = None else: raise TransportError(e.msg, e.code, e.fp) return result def addcookies(self, u2request): """ Add cookies in the cookiejar to the request. @param u2request: A urllib2 request. @rtype: u2request: urllib2.Requet. """ self.cookiejar.add_cookie_header(u2request) def getcookies(self, fp, u2request): """ Add cookies in the request to the cookiejar. @param u2request: A urllib2 request. @rtype: u2request: urllib2.Requet. """ self.cookiejar.extract_cookies(fp, u2request) def u2open(self, u2request): """ Open a connection. @param u2request: A urllib2 request. @type u2request: urllib2.Requet. @return: The opened file-like urllib2 object. @rtype: fp """ tm = self.options.timeout url = self.u2opener() return url.open(u2request, timeout=tm) def u2opener(self): """ Create a urllib opener. @return: An opener. @rtype: I{OpenerDirector} """ if self.urlopener is None: return urllib.request.build_opener(*self.u2handlers()) else: return self.urlopener def u2handlers(self): """ Get a collection of urllib handlers. @return: A list of handlers to be installed in the opener. @rtype: [Handler,...] """ handlers = [] handlers.append(urllib.request.ProxyHandler(self.proxy)) return handlers def __deepcopy__(self, memo={}): clone = self.__class__() p = Unskin(self.options) cp = Unskin(clone.options) cp.update(p) return clone