def test_cookiejar(self): cookie1 = create_cookie('foo', 'bar', self.server.address) cookie2 = create_cookie('foo', 'bar', self.server.address) self.assertFalse(cookie1 == cookie2) cookie0 = create_cookie('foo', 'bar', domain='.dumpz.org') self.assertEqual(cookie0.domain, '.dumpz.org') jar = CookieJar() jar.set_cookie(create_cookie('foo', 'bar', domain='foo.com')) jar.set_cookie(create_cookie('foo', 'bar', domain='bar.com')) self.assertEqual(len(jar), 2)
def test_cookiejar(self): c1 = create_cookie('foo', 'bar') c2 = create_cookie('foo', 'bar') self.assertFalse(c1 == c2) c = create_cookie('foo', 'bar', domain='.dumpz.org') self.assertEquals(c.domain, '.dumpz.org') cj = CookieJar() cj.set_cookie(create_cookie('foo', 'bar', domain='foo.com')) cj.set_cookie(create_cookie('foo', 'bar', domain='bar.com')) self.assertEqual(len(cj), 2)
def test_cookiejar(self): c1 = create_cookie('foo', 'bar', self.server.address) c2 = create_cookie('foo', 'bar', self.server.address) self.assertFalse(c1 == c2) c = create_cookie('foo', 'bar', domain='.dumpz.org') self.assertEquals(c.domain, '.dumpz.org') cj = CookieJar() cj.set_cookie(create_cookie('foo', 'bar', domain='foo.com')) cj.set_cookie(create_cookie('foo', 'bar', domain='bar.com')) self.assertEqual(len(cj), 2)
def extract_cookiejar(self): """ Extract cookies that pycurl instance knows. Returns `CookieJar` object. """ # Example of line: # www.google.com\tFALSE\t/accounts/\tFALSE\t0' # \tGoogleAccountsLocale_session\ten # Fields: # * domain # * whether or not all machines under that domain can # read the cookie's information. # * path # * Secure Flag: whether or not a secure connection (HTTPS) # is required to read the cookie. # * exp. timestamp # * name # * value cookiejar = CookieJar() for line in self.curl.getinfo(pycurl.INFO_COOKIELIST): values = line.split('\t') domain = values[0].lower() if domain.startswith('#httponly_'): domain = domain.replace('#httponly_', '') httponly = True else: httponly = False # old # cookies[values[-2]] = values[-1] # new cookie = create_cookie( name=values[5], value=values[6], domain=domain, path=values[2], secure=values[3] == "TRUE", expires=int(values[4]) if values[4] else None, httponly=httponly, ) cookiejar.set_cookie(cookie) return cookiejar
def extract_cookiejar(self): """ Extract cookies that pycurl instance knows. Returns `CookieJar` object. """ # Example of line: # www.google.com\tFALSE\t/accounts/\tFALSE\t0' # \tGoogleAccountsLocale_session\ten # Fields: # * domain # * whether or not all machines under that domain can # read the cookie's information. # * path # * Secure Flag: whether or not a secure connection (HTTPS) # is required to read the cookie. # * exp. timestamp # * name # * value cookiejar = CookieJar() for line in self.curl.getinfo(pycurl.INFO_COOKIELIST): values = line.split("\t") domain = values[0].lower() if domain.startswith("#httponly_"): domain = domain.replace("#httponly_", "") httponly = True else: httponly = False # old # cookies[values[-2]] = values[-1] # new cookie = create_cookie( name=values[5], value=values[6], domain=domain, path=values[2], secure=values[3] == "TRUE", expires=int(values[4]) if values[4] else None, httponly=httponly, ) cookiejar.set_cookie(cookie) return cookiejar
class CookieManager(object): """ Each Grab instance has `cookies` attribute that is instance of `CookieManager` class. That class contains helpful methods to create, load, save cookies from/to different places. """ __slots__ = ('cookiejar',) def __init__(self, cookiejar=None): if cookiejar is not None: self.cookiejar = cookiejar else: self.cookiejar = CookieJar() # self.disable_cookiejar_lock(self.cookiejar) # def disable_cookiejar_lock(self, cj): # cj._cookies_lock = dummy_threading.RLock() def set(self, name, value, domain, **kwargs): """Add new cookie or replace existing cookie with same parameters. :param name: name of cookie :param value: value of cookie :param kwargs: extra attributes of cookie """ if domain == 'localhost': domain = '' self.cookiejar.set_cookie(create_cookie(name, value, domain, **kwargs)) def update(self, cookies): if isinstance(cookies, CookieJar): for cookie in cookies: self.cookiejar.set_cookie(cookie) elif isinstance(cookies, CookieManager): for cookie in cookies.cookiejar: self.cookiejar.set_cookie(cookie) else: raise GrabMisuseError('Unknown type of cookies argument: %s' % type(cookies)) @classmethod def from_cookie_list(cls, clist): cj = CookieJar() for cookie in clist: cj.set_cookie(cookie) return cls(cj) def clear(self): self.cookiejar = CookieJar() def __getstate__(self): state = {} for cls in type(self).mro(): cls_slots = getattr(cls, '__slots__', ()) for slot in cls_slots: if slot != '__weakref__': if hasattr(self, slot): state[slot] = getattr(self, slot) state['_cookiejar_cookies'] = list(self.cookiejar) del state['cookiejar'] return state def __setstate__(self, state): state['cookiejar'] = CookieJar() for cookie in state['_cookiejar_cookies']: state['cookiejar'].set_cookie(cookie) del state['_cookiejar_cookies'] for slot, value in state.items(): setattr(self, slot, value) def __getitem__(self, key): for cookie in self.cookiejar: if cookie.name == key: return cookie.value raise KeyError def items(self): res = [] for cookie in self.cookiejar: res.append((cookie.name, cookie.value)) return res def load_from_file(self, path): """ Load cookies from the file. Content of file should be a JSON-serialized list of dicts. """ with open(path) as inf: data = inf.read() if data: items = json.loads(data) else: items = {} for item in items: extra = dict((x, y) for x, y in item.items() if x not in ['name', 'value', 'domain']) self.set(item['name'], item['value'], item['domain'], **extra) def get_dict(self): res = [] for cookie in self.cookiejar: res.append(dict((x, getattr(cookie, x)) for x in COOKIE_ATTRS)) return res def save_to_file(self, path): """ Dump all cookies to file. Cookies are dumped as JSON-serialized dict of keys and values. """ with open(path, 'w') as out: out.write(json.dumps(self.get_dict()))
def from_cookie_list(cls, clist): cj = CookieJar() for cookie in clist: cj.set_cookie(cookie) return cls(cj)
class ReviewBoardServer(object): """Represents a Review Board server we are communicating with. Provides methods for executing HTTP requests on a Review Board server's Web API. The ``auth_callback`` parameter can be used to specify a callable which will be called when authentication fails. This callable will be passed the realm, and url of the Review Board server and should return a 2-tuple of username, password. The user can be prompted for their credentials using this mechanism. """ def __init__(self, url, cookie_file=None, username=None, password=None, api_token=None, agent=None, session=None, disable_proxy=False, auth_callback=None, otp_token_callback=None, verify_ssl=True, save_cookies=True, ext_auth_cookies=None): if not url.endswith('/'): url += '/' self.url = url + 'api/' self.save_cookies = save_cookies self.ext_auth_cookies = ext_auth_cookies if self.save_cookies: self.cookie_jar, self.cookie_file = create_cookie_jar( cookie_file=cookie_file) try: self.cookie_jar.load(ignore_expires=True) except IOError: pass else: self.cookie_jar = CookieJar() self.cookie_file = None if self.ext_auth_cookies: try: self.cookie_jar.load(ext_auth_cookies, ignore_expires=True) except IOError as e: logging.critical( 'There was an error while loading a ' 'cookie file: %s', e) pass # Get the cookie domain from the url. If the domain # does not contain a '.' (e.g. 'localhost'), we assume # it is a local domain and suffix it (See RFC 2109). parsed_url = urlparse(url) self.domain = parsed_url[1].partition(':')[0] # Remove Port. if self.domain.count('.') < 1: self.domain = '%s.local' % self.domain if session: cookie = Cookie(version=0, name=RB_COOKIE_NAME, value=session, port=None, port_specified=False, domain=self.domain, domain_specified=True, domain_initial_dot=True, path=parsed_url[2], path_specified=True, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest={'HttpOnly': None}) self.cookie_jar.set_cookie(cookie) if self.save_cookies: self.cookie_jar.save() if username: # If the username parameter is given, we have to clear the session # cookie manually or it will override the username:password # combination retrieved from the authentication callback. try: self.cookie_jar.clear(self.domain, parsed_url[2], RB_COOKIE_NAME) except KeyError: pass # Set up the HTTP libraries to support all of the features we need. password_mgr = ReviewBoardHTTPPasswordMgr(self.url, username, password, api_token, auth_callback, otp_token_callback) self.preset_auth_handler = PresetHTTPAuthHandler( self.url, password_mgr) handlers = [] if not verify_ssl: context = ssl._create_unverified_context() handlers.append(HTTPSHandler(context=context)) if disable_proxy: handlers.append(ProxyHandler({})) handlers += [ HTTPCookieProcessor(self.cookie_jar), ReviewBoardHTTPBasicAuthHandler(password_mgr), HTTPDigestAuthHandler(password_mgr), self.preset_auth_handler, ReviewBoardHTTPErrorProcessor(), ] if agent: self.agent = agent else: self.agent = ('RBTools/' + get_package_version()).encode('utf-8') opener = build_opener(*handlers) opener.addheaders = [ (str('User-agent'), str(self.agent)), ] install_opener(opener) self._cache = None self._urlopen = urlopen def enable_cache(self, cache_location=None, in_memory=False): """Enable caching for all future HTTP requests. The cache will be created at the default location if none is provided. If the in_memory parameter is True, the cache will be created in memory instead of on disk. This overrides the cache_location parameter. """ if not self._cache: self._cache = APICache(create_db_in_memory=in_memory, db_location=cache_location) self._urlopen = self._cache.make_request def login(self, username, password): """Reset the user information""" self.preset_auth_handler.reset(username, password) def logout(self): """Logs the user out of the session.""" self.preset_auth_handler.reset(None, None) self.make_request(HttpRequest('%ssession/' % self.url, method='DELETE')) self.cookie_jar.clear(self.domain) if self.save_cookies: self.cookie_jar.save() def process_error(self, http_status, data): """Processes an error, raising an APIError with the information.""" # In Python 3, the data can be bytes, not str, and json.loads # explicitly requires decoded strings. data = force_unicode(data) try: rsp = json_loads(data) assert rsp['stat'] == 'fail' logging.debug('Got API Error %d (HTTP code %d): %s', rsp['err']['code'], http_status, rsp['err']['msg']) logging.debug('Error data: %r', rsp) raise create_api_error(http_status, rsp['err']['code'], rsp, rsp['err']['msg']) except ValueError: logging.debug('Got HTTP error: %s: %s', http_status, data) raise APIError(http_status, None, None, data) def make_request(self, request): """Perform an http request. The request argument should be an instance of 'rbtools.api.request.HttpRequest'. """ try: content_type, body = request.encode_multipart_formdata() headers = request.headers if body: headers.update({ 'Content-Type': content_type, 'Content-Length': str(len(body)), }) else: headers['Content-Length'] = '0' rsp = self._urlopen( Request(request.url, body, headers, request.method)) except HTTPError as e: self.process_error(e.code, e.read()) except URLError as e: raise ServerInterfaceError('%s' % e.reason) if self.save_cookies: try: self.cookie_jar.save() except IOError: pass return rsp
class ReviewBoardServer(object): """Represents a Review Board server we are communicating with. Provides methods for executing HTTP requests on a Review Board server's Web API. The ``auth_callback`` parameter can be used to specify a callable which will be called when authentication fails. This callable will be passed the realm, and url of the Review Board server and should return a 2-tuple of username, password. The user can be prompted for their credentials using this mechanism. """ def __init__(self, url, cookie_file=None, username=None, password=None, api_token=None, agent=None, session=None, disable_proxy=False, auth_callback=None, otp_token_callback=None, verify_ssl=True, save_cookies=True): if not url.endswith('/'): url += '/' self.url = url + 'api/' self.save_cookies = save_cookies if self.save_cookies: self.cookie_jar, self.cookie_file = create_cookie_jar( cookie_file=cookie_file) try: self.cookie_jar.load(ignore_expires=True) except IOError: pass else: self.cookie_jar = CookieJar() self.cookie_file = None # Get the cookie domain from the url. If the domain # does not contain a '.' (e.g. 'localhost'), we assume # it is a local domain and suffix it (See RFC 2109). parsed_url = urlparse(url) self.domain = parsed_url[1].partition(':')[0] # Remove Port. if self.domain.count('.') < 1: self.domain = '%s.local' % self.domain if session: cookie = Cookie( version=0, name=RB_COOKIE_NAME, value=session, port=None, port_specified=False, domain=self.domain, domain_specified=True, domain_initial_dot=True, path=parsed_url[2], path_specified=True, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest={'HttpOnly': None}) self.cookie_jar.set_cookie(cookie) if self.save_cookies: self.cookie_jar.save() if username: # If the username parameter is given, we have to clear the session # cookie manually or it will override the username:password # combination retrieved from the authentication callback. try: self.cookie_jar.clear(self.domain, parsed_url[2], RB_COOKIE_NAME) except KeyError: pass # Set up the HTTP libraries to support all of the features we need. password_mgr = ReviewBoardHTTPPasswordMgr(self.url, username, password, api_token, auth_callback, otp_token_callback) self.preset_auth_handler = PresetHTTPAuthHandler(self.url, password_mgr) handlers = [] if not verify_ssl: context = ssl._create_unverified_context() handlers.append(HTTPSHandler(context=context)) if disable_proxy: handlers.append(ProxyHandler({})) handlers += [ HTTPCookieProcessor(self.cookie_jar), ReviewBoardHTTPBasicAuthHandler(password_mgr), HTTPDigestAuthHandler(password_mgr), self.preset_auth_handler, ReviewBoardHTTPErrorProcessor(), ] if agent: self.agent = agent else: self.agent = ('RBTools/' + get_package_version()).encode('utf-8') opener = build_opener(*handlers) opener.addheaders = [ (b'User-agent', self.agent), ] install_opener(opener) self._cache = None self._urlopen = urlopen def enable_cache(self, cache_location=None, in_memory=False): """Enable caching for all future HTTP requests. The cache will be created at the default location if none is provided. If the in_memory parameter is True, the cache will be created in memory instead of on disk. This overrides the cache_location parameter. """ if not self._cache: self._cache = APICache(create_db_in_memory=in_memory, db_location=cache_location) self._urlopen = self._cache.make_request def login(self, username, password): """Reset the user information""" self.preset_auth_handler.reset(username, password) def logout(self): """Logs the user out of the session.""" self.preset_auth_handler.reset(None, None) self.make_request(HttpRequest('%ssession/' % self.url, method='DELETE')) self.cookie_jar.clear(self.domain) if self.save_cookies: self.cookie_jar.save() def process_error(self, http_status, data): """Processes an error, raising an APIError with the information.""" try: rsp = json_loads(data) assert rsp['stat'] == 'fail' logging.debug('Got API Error %d (HTTP code %d): %s' % (rsp['err']['code'], http_status, rsp['err']['msg'])) logging.debug('Error data: %r' % rsp) raise create_api_error(http_status, rsp['err']['code'], rsp, rsp['err']['msg']) except ValueError: logging.debug('Got HTTP error: %s: %s' % (http_status, data)) raise APIError(http_status, None, None, data) def make_request(self, request): """Perform an http request. The request argument should be an instance of 'rbtools.api.request.HttpRequest'. """ try: content_type, body = request.encode_multipart_formdata() headers = request.headers if body: headers.update({ b'Content-Type': content_type, b'Content-Length': str(len(body)), }) else: headers[b'Content-Length'] = '0' r = Request(request.url.encode('utf-8'), body, headers, request.method.encode('utf-8')) rsp = self._urlopen(r) except HTTPError as e: self.process_error(e.code, e.read()) except URLError as e: raise ServerInterfaceError('%s' % e.reason) if self.save_cookies: try: self.cookie_jar.save() except IOError: pass return rsp
def from_cookie_list(cls, clist): jar = CookieJar() for cookie in clist: jar.set_cookie(cookie) return cls(jar)
class CookieManager(object): """ Each Grab instance has `cookies` attribute that is instance of `CookieManager` class. That class contains helpful methods to create, load, save cookies from/to different places. """ __slots__ = ('cookiejar', ) def __init__(self, cookiejar=None): if cookiejar is not None: self.cookiejar = cookiejar else: self.cookiejar = CookieJar() # self.disable_cookiejar_lock(self.cookiejar) # def disable_cookiejar_lock(self, cj): # cj._cookies_lock = dummy_threading.RLock() def set(self, name, value, domain, **kwargs): """Add new cookie or replace existing cookie with same parameters. :param name: name of cookie :param value: value of cookie :param kwargs: extra attributes of cookie """ if domain == 'localhost': domain = '' self.cookiejar.set_cookie(create_cookie(name, value, domain, **kwargs)) def update(self, cookies): if isinstance(cookies, CookieJar): for cookie in cookies: self.cookiejar.set_cookie(cookie) elif isinstance(cookies, CookieManager): for cookie in cookies.cookiejar: self.cookiejar.set_cookie(cookie) else: raise GrabMisuseError('Unknown type of cookies argument: %s' % type(cookies)) @classmethod def from_cookie_list(cls, clist): jar = CookieJar() for cookie in clist: jar.set_cookie(cookie) return cls(jar) def clear(self): self.cookiejar = CookieJar() def __getstate__(self): state = {} for cls in type(self).mro(): cls_slots = getattr(cls, '__slots__', ()) for slot in cls_slots: if slot != '__weakref__': if hasattr(self, slot): state[slot] = getattr(self, slot) state['_cookiejar_cookies'] = list(self.cookiejar) del state['cookiejar'] return state def __setstate__(self, state): state['cookiejar'] = CookieJar() for cookie in state['_cookiejar_cookies']: state['cookiejar'].set_cookie(cookie) del state['_cookiejar_cookies'] for slot, value in state.items(): setattr(self, slot, value) def __getitem__(self, key): for cookie in self.cookiejar: if cookie.name == key: return cookie.value raise KeyError def items(self): res = [] for cookie in self.cookiejar: res.append((cookie.name, cookie.value)) return res def load_from_file(self, path): """ Load cookies from the file. Content of file should be a JSON-serialized list of dicts. """ with open(path) as inf: data = inf.read() if data: items = json.loads(data) else: items = {} for item in items: extra = dict((x, y) for x, y in item.items() if x not in ['name', 'value', 'domain']) self.set(item['name'], item['value'], item['domain'], **extra) def get_dict(self): res = [] for cookie in self.cookiejar: res.append(dict((x, getattr(cookie, x)) for x in COOKIE_ATTRS)) return res def save_to_file(self, path): """ Dump all cookies to file. Cookies are dumped as JSON-serialized dict of keys and values. """ with open(path, 'w') as out: out.write(json.dumps(self.get_dict())) def get_cookie_header(self, req): """ :param req: object with httplib.Request interface Actually, it have to have `url` and `headers` attributes """ mocked_req = MockRequest(req) self.cookiejar.add_cookie_header(mocked_req) return mocked_req.get_new_headers().get('Cookie')
def phantomjs_download(preq, **kw): p = Phantomjs() p.prepare(preq, kw) resp = Response() try: data = p.run() if data: # print '-=-=-=-=-=-=-=-=-', data['meta'] rdata = data['resp'] try: rdata['content'] = rdata['content'].encode('utf8') except Exception: pass cookies = rdata.get('cookies') if cookies: new_cookies = CookieJar() for cookie in cookies: c = create_cookie(cookie['name'], cookie['value'], domain=cookie['domain'], path=cookie['path'], secure=cookie['secure'], expires=cookie['expiry']) new_cookies.set_cookie(c) rdata['cookies'] = new_cookies resp = Response.from_dict(rdata) resp.meta = data['meta'] # data = result.pop() # resp.url = data['url'] # content = data.get('content', '') # try: # content = content.encode('utf8') # except Exception: # pass # status = data['type'] # if status != 'result': # # content = data['error'] # resp.status_code = data['status_code'] or 571 # resp.reason = data['reason'] # resp._content = content # else: # resp.status_code = data['status_code'] # resp.reason = data['reason'] # resp._content = content # cookies = data.get('cookies') # if cookies: # # {'domain': '.jd.com', 'name': '__jdb', 'expires': 'Wed, 05 Jul 2017 14:50:24 GMT', 'expiry': 1499266224, # # 'value': '122270672.2.1499264397652469999225|1.1499264398', 'path': '/', 'httponly': False, 'secure': False} # # version=0, name=name, value=value, port=None, domain='', path='/', secure=False, expires=None, # # discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False,) # for cookie in cookies: # # cookie.pop('expiry', None) # # cookie.pop('httponly', None) # c = create_cookie(cookie['name'], cookie['value'], domain=cookie['domain'], # path=cookie['path'], secure=cookie['secure'], expires=cookie['expiry']) # resp.cookies.set_cookie(c) # resp.encoding = 'utf-8' # resp._content_consumed = True else: resp.url = preq.url resp.status_code = 574 resp.reason = 'phantomjs:return None' resp._content = '' except PhantomjsTimeout: print('PhantomjsTimeout') # resp = Response() resp.status_code = 572 resp.reason = 'phantomjs:exception:timeout' resp._content = '' # except Exception as exc: # resp.status_code = 570 # resp.reason = 'exception: %s' % exc # resp._content = '' # logger.exception('content exception') print('messages: %s' % ('\n---------\n'.join(p.messages))) return resp