def __init__(self): self.header = { 'Accept': '*/*', 'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Language': 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4', 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded', 'Host': 'music.163.com', 'Referer': 'http://music.163.com', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36', } self.storage = Storage() cookie_jar = LWPCookieJar(self.storage.cookie_path) cookie_jar.load() self.session = requests.Session() self.session.cookies = cookie_jar for cookie in cookie_jar: if cookie.is_expired(): cookie_jar.clear() self.storage.database['user'] = { 'username': '', 'password': '', 'user_id': '', 'nickname': '', } self.storage.save() break
def authenticate(url: str, cookies: str, username: str, password: str, headers: Headers) -> Tuple[str, List[Role]]: """ Authenitcate with user credentials to IdP. Args: url: ECP endpoint URL for the IdP. cookies: A path to a cookie jar. username: Username to provide to the IdP. password: Password to provide to the IdP. headers: Optional headers to provide to the IdP. Returns: A base 64 encoded SAML assertion string, and a list of tuples containing a SAML provider ARN and a Role ARN. """ jar = LWPCookieJar(cookies) soap = saml_login(url, jar, username, password, headers) mesg = "Successfully authenticated with username/password" logger.info(mesg + " to endpoint: " + url) secure_touch(cookies) jar.save(ignore_discard=True) logger.info("Saved cookies to jar: " + jar.filename) return parse_soap_response(soap)
def __init__(self, lang="en", domain=None, result_per_page = 10, agents = None, pause=2.0, safe="off", use_cookie=None): if domain is None: domain = GSearch.DOMAIN if not isinstance(domain, str): #it's array domain = RandString(domain)#randomsly select domain if agents is None: agents = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36' self._results_per_page = result_per_page self._lang = lang self._domain = str(domain) self._ori_domain = domain self._agent = agents self._pause = pause # Lapse to wait between HTTP requests self._safe = safe self._use_cookie = use_cookie self._session = requests.Session() if not use_cookie: #use_browser_cookie is None home_folder = os.getenv('HOME') if not home_folder: home_folder = os.getenv('USERHOME') if not home_folder: home_folder = '.' # Use the current folder on error. self._cookie_jar = LWPCookieJar(os.path.join(home_folder, '.google-cookie')) self._session.cookies = self._cookie_jar try: self._cookie_jar.load() except Exception: pass else: self._cookie_jar = None
def __init__(self): self.header = { "Accept": "*/*", "Accept-Encoding": "gzip,deflate,sdch", "Accept-Language": "zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4", "Connection": "keep-alive", "Content-Type": "application/x-www-form-urlencoded", "Host": "music.163.com", "Referer": "http://music.163.com", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36", } self.storage = Storage() cookie_jar = LWPCookieJar(self.storage.cookie_path) cookie_jar.load() self.session = requests.Session() self.session.cookies = cookie_jar for cookie in cookie_jar: if cookie.is_expired(): cookie_jar.clear() self.storage.database["user"] = { "username": "", "password": "", "user_id": "", "nickname": "", } self.storage.save() break
def refresh(url: str, cookies: str) -> Tuple[str, List[Role]]: """ Reauthenticate with cookies to IdP. Args: url: ECP endpoint URL for the IdP. cookies: A path to a cookie jar. Returns: A base 64 encoded SAML assertion string, and a list of tuples containing a SAML provider ARN and a role ARN. """ jar = LWPCookieJar(cookies) try: jar.load(ignore_discard=True) logger.info("Loaded cookie jar: " + cookies) except FileNotFoundError: raise MissingCookieJar(url) soap = saml_login(url, jar) mesg = "Successfully authenticated with cookies" logger.info(mesg + " to endpoint: " + url) return parse_soap_response(soap)
def use_cookies_local(filename): cookie = LWPCookieJar() cookie.load('cookies.txt', ignore_discard=True, ignore_expires=True) cookie_handler = urllib.request.HTTPCookieProcessor(cookie) opener = urllib.request.build_opener(cookie_handler) response = opener.open('https://www.baidu.com') print(response.read().decode('utf-8'))
def logIn(self): """ Login failed using mechanicalsoup and robobrowser. (for other mechanize alternatives see: https://stackoverflow.com/questions/2662705/are-there-any-alternatives-to-mechanize-in-python ) So fall back to mechanize (however this only support Python 2.x) """ cj = LWPCookieJar() if not os.path.isfile(constants.COOKIE_FILE): email, pw = settingsManagerSingleton.getCredentialsForService( "amazon") python2 = subprocess.check_output(["which", "python2"]) python2 = python2.decode("utf-8").replace("\n", "") LOG.info("Using python interpreter %s" % python2) ret = subprocess.call([python2, "amazon-login.py", email, pw]) if ret != 0: raise ApplicationException("Login failed.") cj.load(constants.COOKIE_FILE, ignore_discard=True, ignore_expires=True) # pass LWPCookieJar directly to requests # see https://stackoverflow.com/a/16859266 self.session.cookies = cj
def wrapper(*args, **kwargs): """ :desc: Wrapper to check if user is logged in, if the stored cookies contain cookie named `acct` and is not expired. """ is_login = False resp = {'success': False, 'message': 'You are not logged in!'} if os.path.exists(COOKIES_FILE_PATH): cookiejar = LWPCookieJar(filename=COOKIES_FILE_PATH) cookiejar.load() for cookie in cookiejar: if cookie.name == 'acct': expiry_time_obj = datetime.utcfromtimestamp(cookie.expires) if datetime.now() > expiry_time_obj: is_login = True if not is_login: os.remove(COOKIES_FILE_PATH) else: return func(*args, **kwargs) return resp
def __init__(self, domain=None, agents=None, lang="en", use_cookie=None): if domain is None: domain = GTranslate.DOMAIN if agents is None: agents = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36' self._agent = agents self._lang = lang self._domain = domain self._use_cookie = use_cookie self._session = requests.Session() if not use_cookie: #use_browser_cookie is None home_folder = os.getenv('HOME') if not home_folder: home_folder = os.getenv('USERHOME') if not home_folder: home_folder = '.' # Use the current folder on error. self._cookie_jar = LWPCookieJar( os.path.join(home_folder, '.google-cookie')) self._session.cookies = self._cookie_jar try: self._cookie_jar.load() except Exception: pass else: self._cookie_jar = None
def get_session(config, session=None): cookies = None if session == None: try: cookies = LWPCookieJar(config.COOKIE_FILE) #log.debug("before cookie load") cookies.load(ignore_discard=True, ignore_expires=True) #log.debug("after cookie load") except: # couldn't read the file; generate new log.debug("exception during cookie load") cookies = None if cookies == None: cookies = _refresh_cookies_using_selenium(config) if cookies == None: log.fatal("Could not log into volunteer connection") sys.exit(1) if session == None: session = requests_html.HTMLSession() session.cookies = cookies #log.debug(f"returning new cookies { cookies }") return session
def cookie_from_str(cookie_str): from http.cookiejar import split_header_words, LWPCookieJar, LoadError, Cookie, iso2time import time cookie_str = cookie_str.split('\n') cookie = LWPCookieJar() index = 0 while 1: line = cookie_str[index] index += 1 if line == "": break if not line.startswith(HEADER): continue line = line[len(HEADER):].strip() for data in split_header_words([line]): name, value = data[0] standard = {} rest = {} for k in BOOLEAN_ATTRS: standard[k] = False for k, v in data[1:]: if k is not None: lc = k.lower() else: lc = None if (lc in VALUE_ATTRS) or (lc in BOOLEAN_ATTRS): k = lc if k in BOOLEAN_ATTRS: if v is None: v = True standard[k] = v elif k in VALUE_ATTRS: standard[k] = v else: rest[k] = v h = standard.get expires = h("expires") discard = h("discard") if expires is not None: expires = iso2time(expires) if expires is None: discard = True domain = h("domain") domain_specified = domain.startswith(".") c = Cookie(h("version"), name, value, h("port"), h("port_spec"), domain, domain_specified, h("domain_dot"), h("path"), h("path_spec"), h("secure"), expires, discard, h("comment"), h("commenturl"), rest) cookie.set_cookie(c) return cookie
def __init__(self): """ Create a new LWPCookieJar that uses an in-memory "file." The contents of this file can be retrieved by calling the 'getvalue' method. """ LWPCookieJar.__init__(self) self._string = ""
def _setup_cookies(self): # all requests should use the same cookie jar self.cookiejar = LWPCookieJar(self.cookiefile) if os.path.isfile(self.cookiefile): self.cookiejar.load(ignore_discard=True) self.cookieprocessor = HTTPCookieProcessor(self.cookiejar) self.opener = build_opener(self.cookieprocessor) install_opener(self.opener)
def load_cookies(self, cookies): try: _cookies = LWPCookieJar(self.filename) _cookies.load() for cookie in _cookies: cookies.set_cookie(copy.copy(cookie)) except OSError: pass
def cookies(request): if request.param == 'cj': cj = LWPCookieJar(CJ_PATH) cj.load() return cj, request.param elif request.param == 'raw': with open(COOKIES_PATH) as f: cookies = json.load(f) return cookies, request.param
def save_cookies(self, cookie_storage): """Save to cookielib's CookieJar or Set-Cookie3 format text file. :param cookie_storage: file location string or CookieJar instance. """ def toPyCookieJar(QtCookieJar, PyCookieJar): for c in QtCookieJar.allCookies(): PyCookieJar.set_cookie(toPyCookie(c)) def toPyCookie(QtCookie): port = None port_specified = False secure = QtCookie.isSecure() name = str(QtCookie.name()) value = str(QtCookie.value()) v = str(QtCookie.path()) path_specified = bool(v != "") path = v if path_specified else None v = str(QtCookie.domain()) domain_specified = bool(v != "") domain = v if domain_specified: domain_initial_dot = v.startswith('.') else: domain_initial_dot = None v = int(QtCookie.expirationDate().toTime_t()) # Long type boundary on 32bit platfroms; avoid ValueError expires = 2147483647 if v > 2147483647 else v rest = {} discard = False return Cookie( 0, name, value, port, port_specified, domain, domain_specified, domain_initial_dot, path, path_specified, secure, expires, discard, None, None, rest, ) if cookie_storage.__class__.__name__ == 'str': cj = LWPCookieJar(cookie_storage) toPyCookieJar(self.cookie_jar, cj) cj.save() elif cookie_storage.__class__.__name__.endswith('CookieJar'): toPyCookieJar(self.cookie_jar, cookie_storage) else: raise ValueError('unsupported cookie_storage type.')
def prepareNetwork(): cookieJar = LWPCookieJar('photo.cookie') cookieJar.load() pool = HTTPConnectionPool(reactor, persistent=True) pool.maxPersistentPerHost = 15 agent = CookieAgent(Agent(reactor, pool=pool), cookieJar) return agent
def build_opener(apiurl, user, password, cookie_path, debuglevel=0, capath=None, cafile=None, headers=()): """build urllib opener for given name/password it creates * HTTPSHandler with proper ssl context * HTTPCookieProcessor with a link to cookiejar * HTTPBasicAuthHandler with user/password * proxyhandler which respects no_proxy variable """ handlers = list() if hasattr(ssl, "SSLContext"): #allow only sslv3 and tlsv1, but not sslv2 ctx = ssl.SSLContext(protocol=ssl.PROTOCOL_SSLv23) ctx.options |= ssl.OP_NO_SSLv2 ctx.verify_mode = ssl.CERT_REQUIRED ctx.set_default_verify_paths() if cafile or capath: if ctx.load_verify_locations(capath=capath, cafile=cafile) != -1: raise Exception("load_verify_locations failed for capath={}, cafile={}".format(capath, cafile)) #TODO: debuglevel httpshandler = HTTPSHandler(debuglevel=debuglevel, context=ctx, check_hostname=True) handlers.append(httpshandler) try: # TODO is this correct? cookie_file = os.path.expanduser(cookie_path) cookiejar = LWPCookieJar(cookie_file) cookiejar.load(ignore_discard=True) except (OSError, IOError, AttributeError): try: os.open(cookie_file, os.O_WRONLY | os.O_CREAT, mode=0o600) except: #TODO: log it cookiejar = CookieJar() handlers.append(HTTPCookieProcessor(cookiejar)) authhandler = HTTPBasicAuthHandler( HTTPPasswordMgrWithDefaultRealm()) authhandler.add_password(None, apiurl, bytes(user, "utf-8"), bytes(password, "ascii")) handlers.append(authhandler) # proxy handling if not proxy_bypass(apiurl): proxyhandler = ProxyHandler() else: proxyhandler = ProxyHandler({}) handlers.append(proxyhandler) opener = _build_opener(*handlers) from bslib import __version__ opener.addheaders = [("User-agent", "bslib/{}".format(__version__)), ] for h in headers: opener.addheaders(h) return opener
def __init__(self, profile, netloc): self.profile = profile self.netloc = netloc self._cookiejar = LWPCookieJar() self._credential = None opener = build_opener() for h in (HTTPCookieProcessor(self._cookiejar), self): opener.add_handler(h) opener.addheaders = [('User-Agent', USER_AGENT)]
def load_cookie(self, file_name=None, **kwargs): file_name = file_name or self._cookie_file if path_exists(file_name): try: lwp_jar = LWPCookieJar() lwp_jar.load(file_name, **kwargs) self._cookie_jar.update(lwp_jar) except LoadError: raise Exception('oops!') pass
def get_random_cookie(): random_cookie_path = abs_path + r'/cookies/' + get_cookie_path() load_cookiejar_random = LWPCookieJar() load_cookiejar_random.load(random_cookie_path, ignore_discard=True, ignore_expires=True) load_random_cookies = requests.utils.dict_from_cookiejar( load_cookiejar_random) random_cookie = requests.utils.cookiejar_from_dict(load_random_cookies) return random_cookie, random_cookie_path
def save_cookie_on_file(path): # 第一种保存cookie到文件的方式 #cookie = MozillaCookieJar(path) # 第二种保存cookie到文件的方式 cookie = LWPCookieJar(path) cookie_manage = urllib.request.HTTPCookieProcessor(cookie) opener = urllib.request.build_opener(cookie_manage) response = opener.open('https://www.baidu.com') cookie.save(ignore_discard=True, ignore_expires=True)
def cookiejar_create(self, cookiejar_file, session): cookie_jar = LWPCookieJar(cookiejar_file.name) cookie_jar.set_cookie(Cookie(0, self.COOKIE_NAME, session, None, False, '', False, True, '/', True, True, None, None, None, None, {})) cookie_jar.save() cookiejar_file.flush()
def __init__(self, cookie_file: Optional[Path] = None) -> None: """Create a new cookie jar at the given path. If the path is None, the cookies will not be persisted. """ self._cookies: LWPCookieJar if cookie_file is None: self._cookies = LWPCookieJar() else: self._cookies = LWPCookieJar(str(cookie_file.resolve()))
class CookieJar: """A cookie jar that can be persisted.""" def __init__(self, cookie_file: Optional[Path] = None) -> None: """Create a new cookie jar at the given path. If the path is None, the cookies will not be persisted. """ self._cookies: LWPCookieJar if cookie_file is None: self._cookies = LWPCookieJar() else: self._cookies = LWPCookieJar(str(cookie_file.resolve())) @property def cookies(self) -> LWPCookieJar: """Return the requests cookie jar.""" return self._cookies def load_cookies(self) -> None: """Load all cookies from the file given in the constructor.""" if self._cookies.filename is None: return try: LOGGER.info("Loading old cookies from %s", self._cookies.filename) self._cookies.load(ignore_discard=True) except (FileNotFoundError, LoadError): LOGGER.warning( "No valid cookie file found at %s, continuing with no cookies", self._cookies.filename) def save_cookies(self, reason: Optional[str] = None) -> None: """Save the cookies in the file given in the constructor.""" if self._cookies.filename is None: return if reason is None: LOGGER.info("Saving cookies") else: LOGGER.info("Saving cookies (%s)", reason) # TODO figure out why ignore_discard is set # TODO possibly catch a few more exceptions self._cookies.save(ignore_discard=True) def create_session(self) -> requests.Session: """Create a new session using the cookie jar.""" sess = requests.Session() # From the request docs: "All requests code should work out of the box # with externally provided instances of CookieJar, e.g. LWPCookieJar # and FileCookieJar." sess.cookies = self.cookies # type: ignore return sess
class SimpleCookieConnector(ConnectorMixin, BaseConnector): def __init__(self, cookie_file, delay=3, extra_headers=None): """ Keywords -------- - cookie_file """ super(SimpleCookieConnector, self).__init__(delay, extra_headers) self._cookie_file = cookie_file self._cookie_jar = LWPCookieJar(filename=self._cookie_file) self._cookie_processor = HTTPCookieProcessor(self._cookie_jar) self._opener = build_opener(self._cookie_processor) self._last_request = None self._last_response = None self._last_content = '' def request(self, url, method='GET', params=None, data=None, headers=None): if not url: return '' params = params or {} data = data or {} headers = (headers or {}).update(self._extra_headers) if method.upper() == 'GET': _url = self.create_get_url(url, params) return self.resolve(Request(_url, data=None, headers=headers)) elif method.upper() == 'POST': return self.resolve(Request(url, data=bytes(urlencode(data), 'utf-8'), headers=headers)) def resolve(self, request): self._last_request = request self._last_response = self._opener.open(self._last_request) content = self._last_response.read() self._last_response.close() sleep(self._delay) charset = self.detect_charset(headers=self._last_response.headers, content=content) self._last_content = content.decode(charset) return self._last_content def save_cookie(self): self._cookie_jar.save(self._cookie_file) def get_cookie(self, name): for cookie in self._cookie_jar: if cookie.name == name: return cookie return None
def init_basicauth(config, config_mtime): """initialize urllib2 with the credentials for Basic Authentication""" def filterhdrs(meth, ishdr, *hdrs): # this is so ugly but httplib doesn't use # a logger object or such def new_method(self, *args, **kwargs): # check if this is a recursive call (note: we do not # have to care about thread safety) is_rec_call = getattr(self, '_orig_stdout', None) is not None try: if not is_rec_call: self._orig_stdout = sys.stdout sys.stdout = StringIO() meth(self, *args, **kwargs) hdr = sys.stdout.getvalue() finally: # restore original stdout if not is_rec_call: sys.stdout = self._orig_stdout del self._orig_stdout for i in hdrs: if ishdr: hdr = re.sub(r'%s:[^\\r]*\\r\\n' % i, '', hdr) else: hdr = re.sub(i, '', hdr) sys.stdout.write(hdr) new_method.__name__ = meth.__name__ return new_method if config['http_debug'] and not config['http_full_debug']: HTTPConnection.send = filterhdrs(HTTPConnection.send, True, 'Cookie', 'Authorization') HTTPResponse.begin = filterhdrs(HTTPResponse.begin, False, 'header: Set-Cookie.*\n') if config['http_debug']: # brute force def urllib2_debug_init(self, debuglevel=0): self._debuglevel = 1 AbstractHTTPHandler.__init__ = urllib2_debug_init cookie_file = os.path.expanduser(config['cookiejar']) global cookiejar cookiejar = LWPCookieJar(cookie_file) try: cookiejar.load(ignore_discard=True) if int(round(config_mtime)) > int(os.stat(cookie_file).st_mtime): cookiejar.clear() cookiejar.save() except IOError: try: fd = os.open(cookie_file, os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o600) os.close(fd) except IOError: # hmm is any good reason why we should catch the IOError? #print 'Unable to create cookiejar file: \'%s\'. Using RAM-based cookies.' % cookie_file cookiejar = CookieJar()
def init_basicauth(config): """initialize urllib2 with the credentials for Basic Authentication""" def filterhdrs(meth, ishdr, *hdrs): # this is so ugly but httplib doesn't use # a logger object or such def new_method(*args, **kwargs): stdout = sys.stdout sys.stdout = StringIO() meth(*args, **kwargs) hdr = sys.stdout.getvalue() sys.stdout = stdout for i in hdrs: if ishdr: hdr = re.sub(r'%s:[^\\r]*\\r\\n' % i, '', hdr) else: hdr = re.sub(i, '', hdr) sys.stdout.write(hdr) new_method.__name__ = meth.__name__ return new_method if config['http_debug'] and not config['http_full_debug']: HTTPConnection.send = filterhdrs(HTTPConnection.send, True, 'Cookie', 'Authorization') HTTPResponse.begin = filterhdrs(HTTPResponse.begin, False, 'header: Set-Cookie.*\n') if sys.version_info < (2, 6): # HTTPS proxy is not supported in old urllib2. It only leads to an error # or, at best, a warning. if 'https_proxy' in os.environ: del os.environ['https_proxy'] if 'HTTPS_PROXY' in os.environ: del os.environ['HTTPS_PROXY'] if config['http_debug']: # brute force def urllib2_debug_init(self, debuglevel=0): self._debuglevel = 1 AbstractHTTPHandler.__init__ = urllib2_debug_init cookie_file = os.path.expanduser(config['cookiejar']) global cookiejar cookiejar = LWPCookieJar(cookie_file) try: cookiejar.load(ignore_discard=True) except IOError: try: open(cookie_file, 'w').close() os.chmod(cookie_file, 0o600) except: #print 'Unable to create cookiejar file: \'%s\'. Using RAM-based cookies.' % cookie_file cookiejar = CookieJar()
def __init__(self): self.home_folder = os.getenv('HOME') if not self.home_folder: self.home_folder = os.getenv('USERHOME') if not home_folder: self.home_folder = '.' # Use the current folder on error. self.cookie_jar = LWPCookieJar(os.path.join(self.home_folder, '.google-cookie')) try: self.cookie_jar.load() except Exception: pass
def init_basicauth(config): """initialize urllib2 with the credentials for Basic Authentication""" def filterhdrs(meth, ishdr, *hdrs): # this is so ugly but httplib doesn't use # a logger object or such def new_method(self, *args, **kwargs): self._orig_stdout = sys.stdout sys.stdout = StringIO() meth(self, *args, **kwargs) hdr = sys.stdout.getvalue() sys.stdout = self._orig_stdout del self._orig_stdout for i in hdrs: if ishdr: hdr = re.sub(r'%s:[^\\r]*\\r\\n' % i, '', hdr) else: hdr = re.sub(i, '', hdr) sys.stdout.write(hdr) new_method.__name__ = meth.__name__ return new_method if config['http_debug'] and not config['http_full_debug']: HTTPConnection.send = filterhdrs(HTTPConnection.send, True, 'Cookie', 'Authorization') HTTPResponse.begin = filterhdrs(HTTPResponse.begin, False, 'header: Set-Cookie.*\n') if sys.version_info < (2, 6): # HTTPS proxy is not supported in old urllib2. It only leads to an error # or, at best, a warning. if 'https_proxy' in os.environ: del os.environ['https_proxy'] if 'HTTPS_PROXY' in os.environ: del os.environ['HTTPS_PROXY'] if config['http_debug']: # brute force def urllib2_debug_init(self, debuglevel=0): self._debuglevel = 1 AbstractHTTPHandler.__init__ = urllib2_debug_init cookie_file = os.path.expanduser(config['cookiejar']) global cookiejar cookiejar = LWPCookieJar(cookie_file) try: cookiejar.load(ignore_discard=True) except IOError: try: open(cookie_file, 'w').close() os.chmod(cookie_file, 0o600) except: #print 'Unable to create cookiejar file: \'%s\'. Using RAM-based cookies.' % cookie_file cookiejar = CookieJar()
def __init__(self, cfg): super().__init__(cfg) cookie = Path(__file__).parent.parent.joinpath(".google-cookie") if cookie.exists(): self.cookie_jar = LWPCookieJar(cookie) # noinspection PyBroadException try: self.cookie_jar.load(ignore_discard=True, ignore_expires=True) except Exception: pass else: self.cookie_jar = None self.get_page("https://www.google.com/")
def wrapper(*args, **kwargs): is_logged_in = False if os.path.exists(COOKIES_FILE_PATH): cookiejar = LWPCookieJar(filename=COOKIES_FILE_PATH) cookiejar.load() if len(cookiejar): is_logged_in = True else: os.remove(COOKIES_FILE_PATH) if is_logged_in is False: return [{'code': 401}] return func(*args, **kwargs)
def wrapper(client, method, url, body=None, headers=None): cookiejar = LWPCookieJar() cookiejar._really_load( StringIO("#LWP-Cookies-2.0\n" + client.credential.get(field,'')), "cookies.txt",True,True) req = Request(url, body, headers or {}, method=method) cookiejar.clear_expired_cookies() cookiejar.add_cookie_header(req) status, headers, body = request(client,req.method,req.full_url,req.data,dict(req.header_items())) response = addinfourl(None, headers, req.full_url, status) cookiejar.extract_cookies(response,req) client.credential[field] = cookiejar.as_lwp_str() return (status, headers, body)
def __init__(self, provider_id, proxies=None, *args, **kwargs): self.provider_id = provider_id self.session = self.SESSION_CLASS() self.cookies = LWPCookieJar() if not os.path.exists(self.COOKIES_FILE): self.cookies.save(self.COOKIES_FILE) self.cookies.load(self.COOKIES_FILE, ignore_discard=True) self.session.headers = self.HEADERS self._state = AttrDict([("proxies", proxies)]) if proxies: self.proxies = proxies self._cache_responses = False
def __init__(self): self.header = { 'Accept': '*/*', 'Accept-Encoding': 'gzip,deflate,sdch', 'Accept-Language': 'zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4', 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded', 'Host': 'music.163.com', 'Referer': 'http://music.163.com', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36', } self.storage = Storage() cookie_jar = LWPCookieJar(self.storage.cookie_path) try: cookie_jar.load() except Exception as e: print(e) cookie_jar.clear() self.session = requests.Session() self.session.cookies = cookie_jar for cookie in cookie_jar: if cookie.is_expired(): cookie_jar.clear() self.storage.database['user'] = { 'username': '', 'password': '', 'user_id': '', 'nickname': '', } self.storage.save() break
def __init__(self, username, password=None, cookie_dir='cookies'): self.username = username self.password = password self.cookie_path = os.path.join(cookie_dir, '{0}.txt'.format(username)) self.session = requests.Session() cookiejar = LWPCookieJar(self.cookie_path) if os.path.exists(self.cookie_path): cookiejar.load() self.session.cookies = cookiejar self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130110 Firefox/21.0' self.session.headers['Referer'] = 'http://dynamic.12306.cn/otsweb/order/querySingleAction.do?method=init' self.urlget = self.session.get self.urlpost = self.session.post self.query = Options()
def __init__(self): self.cookie_filename = os.path.join(__profile__, "cookiejar.txt") self.cookie_jar = LWPCookieJar(self.cookie_filename) if os.access(self.cookie_filename, os.F_OK): self.cookie_jar.load() self.opener = build_opener(HTTPCookieProcessor(self.cookie_jar)) self.opener.addheaders = [ ('Accept-Encoding', 'gzip'), ('Accept-Language', 'en-us,en;q=0.5'), ('Pragma', 'no-cache'), ('Cache-Control', 'no-cache'), ('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36' ) ]
def __init__(self, args): self.columns = args.columns or terminal_width() self.user = args.user self.password = args.password self.passwordcmd = args.passwordcmd self.skip_auth = args.skip_auth cookie_file = os.path.join(os.environ['HOME'], DEFAULT_COOKIE_FILE) self.cookiejar = LWPCookieJar(cookie_file) try: self.cookiejar.load() except IOError: pass self.token_file = os.path.join(os.environ['HOME'], DEFAULT_TOKEN_FILE) try: self.token = open(self.token_file).read().strip() except IOError: self.token = None if getattr(args, 'encoding'): log_info("The --encoding option is deprecated."); log_info("Using %s " % args.base) self.bz = BugzillaProxy(args.base, cookiejar=self.cookiejar)
def make_plans_connection(self): """ Connects to plans, prompting for passwords if necessary """ # create a cookie self.cookie = LWPCookieJar( os.path.join(self.profile_dir, '%s.cookie' % self.username)) try: self.cookie.load() # fails with IOError if it does not exist except IOError: pass # no cookie saved for this user # create plans connection using cookie pc = PlansConnection(self.cookie, base_url=self.config.get('login', 'url')) if pc.plans_login(): pass # we're still logged in else: # we're not logged in, prompt for password if necessary password = (self.args['password'] or getpass("[%s]'s password: " % self.username)) success = pc.plans_login(self.username, password) if not success: print('Failed to log in as [%s].' % self.username, file=sys.stderr) sys.exit(1) return pc
def __init__(self, user="", password="", cookieDir='.', proxy_IP=''): http_header = [ ('User-Agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.65 Safari/537.36'), ('Accept','text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'), ('Accept-Charset','utf-8,ISO-8859-1;q=0.7,*;q=0.3'), ('Accept-Encoding','none'), ('Accept-Language','en-US,en;q=0.8'), ('Connection','keep-alive'), ('Host','s.weibo.com'), ] self.cookieFile = cookieDir+'/'+user ouf = open(self.cookieFile, 'w') ouf.write('#LWP-Cookies-2.0') ouf.close() self.cj = LWPCookieJar(self.cookieFile) self.cj.load(ignore_discard=True, ignore_expires=True) self.opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.cj)) self.opener.add_handler(urllib.request.ProxyHandler(proxies={"http":proxy_IP})) self.opener.addheaders = http_header #get user information from params self.user,self.password = user,password #encode userid userid = bytes(urllib.parse.quote(self.user),'utf-8') self.encode_userid = base64.encodestring(userid)[:-1]
def __init__(self, cookie_path=None): self.opener = build_opener() self.cookiejar = LWPCookieJar() self.set_cookie_path(cookie_path) self.opener.add_handler(HTTPCookieProcessor(self.cookiejar)) self.opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 5.1; rv:11.0) Gecko/20120101 Firefox/11.0')] self.last_request = None self.last_response = None
def get_csrftoken(client): if "cookie" not in client.credential: status, headers, body = client.get( "https://leetcode.com/", request = request_with_credential) if status != 200: return from http.cookiejar import LWPCookieJar from io import StringIO cookiejar = LWPCookieJar() cookiejar._really_load( StringIO("#LWP-Cookies-2.0\n" + client.credential["cookie"]), "cookies.txt",True,True) for cookie in cookiejar: if cookie.name == 'csrftoken': return cookie.value else: return
def login(self, data, url=None): if re.match("^1{0-9}{10}$", data["account"]): account_type = "phone_num" url_login = url or "https://www.zhihu.com/login/phone_num" elif re.match(".+@.+\.com", data["account"]): account_type = "email" url_login = url or "https://www.zhihu.com/login/email" else: print("账号类型错误") self._data = { "_xsrf": self._session.cookies.get("_xsrf", ""), "password": data.get("password", ""), "captcha": self._captcha, "remember_me": "true", account_type: data.get("account", "") } self._headers["X-Xsrftoken"] = self._session.cookies.get("_xsrf", "") self._r = self._session.post(url_login, data=self._data, headers=self._headers) if self._r.status_code != 200: print("提交数据失败") else: self._response_json = json.loads(self._r.content.decode("utf-8")) if self._response_json["r"] == 0: print(self._response_json["msg"]) # save cookies lwpcookie = LWPCookieJar('cookie.txt') cookiejar_from_dict({ c.name: c.value for c in self._session.cookies}, lwpcookie) lwpcookie.save(ignore_discard=True) else: if self._response_json["errcode"] in [1991829, 100005]: print(self._response_json["msg"]) self.get_captcha() self.login() else: print("未知的错误")
def __init__(self, username=None, password=None, cookiefile=None, logger=None): self.bandwidth = 0 self.logger = logger or SilentLogger() self.cookiefile = cookiefile or COOKIEFILE self.cj = LWPCookieJar() if os.path.isfile(self.cookiefile): self.cj.load(self.cookiefile) self.opener = build_opener(HTTPCookieProcessor(self.cj)) self.opener.addheaders = [("User-Agent", USER_AGENT)] self.opener.addheaders.append(("Accept-Encoding", "gzip")) if not os.path.isfile(self.cookiefile) and username is not None and password is not None: self.logger.log("Logging in") self.log_in(username, password) self.html_encoding = "utf-8"
def __init__(self, args): self.user = args['user'] self.password = args['password'] self.skip_auth = args['skip_auth'] cookie_file = os.path.join(os.environ['HOME'], DEFAULT_COOKIE_FILE) self.cookiejar = LWPCookieJar(cookie_file) try: self.cookiejar.load() except IOError: pass self.token_file = os.path.join(os.environ['HOME'], DEFAULT_TOKEN_FILE) try: self.token = open(self.token_file).read().strip() except IOError: self.token = None log_info("Using %s " % args['url']) self.bz = BugzillaProxy(args['url'], cookiejar=self.cookiejar)
def __init__(self, domain = None, agents = None, lang="en", use_cookie=None): if domain is None: domain = GTranslate.DOMAIN if agents is None: agents = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36' self._agent = agents self._lang = lang self._domain = domain self._use_cookie = use_cookie self._session = requests.Session() if not use_cookie: #use_browser_cookie is None home_folder = os.getenv('HOME') if not home_folder: home_folder = os.getenv('USERHOME') if not home_folder: home_folder = '.' # Use the current folder on error. self._cookie_jar = LWPCookieJar(os.path.join(home_folder, '.google-cookie')) self._session.cookies = self._cookie_jar try: self._cookie_jar.load() except Exception: pass else: self._cookie_jar = None
class Opener(DefaultOpener): """ 高级的Opener对象. """ def __init__(self, cookie_path=None): self.opener = build_opener() self.cookiejar = LWPCookieJar(cookie_path) cookie_path and os.path.exists(cookie_path) and self.cookiejar.load() self.opener.add_handler(HTTPCookieProcessor(self.cookiejar)) self.opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 5.1; rv:11.0) Gecko/20120101 Firefox/11.0')] def set_headers(self, arg={}, **kwargs): """ 设置headers. @arg(pairs/dict) @kwargs(mixed) """ if not arg and not kwargs: return headers = dict(self.opener.addheaders) headers.update(arg) headers.update(kwargs) self.opener.addheaders = list(headers.items()) def set_cookies(self, *args, **kwargs): """ 设置cookies. @args(pairs/dict) @kwargs(mixed) """ for arg in args: cookie = DefaultCookiePolicy(**dict(arg)) self.cookiejar.set_cookie(cookie) kwargs and self.cookiejar.set_cookie(DefaultCookiePolicy(**kwargs)) def save_cookies(self, cookie_path=None): if cookie_path or self.cookiejar.filename: self.cookiejar.save(cookie_path)
def __init__(self, cookie_path=None): self.opener = build_opener() self.cookiejar = LWPCookieJar(cookie_path) cookie_path and os.path.exists(cookie_path) and self.cookiejar.load() self.opener.add_handler(HTTPCookieProcessor(self.cookiejar)) self.opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 5.1; rv:11.0) Gecko/20120101 Firefox/11.0')]
class GSearch: DOMAIN = "www.google.com.hk" NUM_RE = re.compile(r"([\d,]+)") def __init__(self, lang="en", domain=None, result_per_page = 10, agents = None, pause=2.0, safe="off", use_cookie=None): if domain is None: domain = GSearch.DOMAIN if not isinstance(domain, str): #it's array domain = RandString(domain)#randomsly select domain if agents is None: agents = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36' self._results_per_page = result_per_page self._lang = lang self._domain = str(domain) self._ori_domain = domain self._agent = agents self._pause = pause # Lapse to wait between HTTP requests self._safe = safe self._use_cookie = use_cookie self._session = requests.Session() if not use_cookie: #use_browser_cookie is None home_folder = os.getenv('HOME') if not home_folder: home_folder = os.getenv('USERHOME') if not home_folder: home_folder = '.' # Use the current folder on error. self._cookie_jar = LWPCookieJar(os.path.join(home_folder, '.google-cookie')) self._session.cookies = self._cookie_jar try: self._cookie_jar.load() except Exception: pass else: self._cookie_jar = None def reset_domain(self): self._domain = str(self._ori_domain) def __call__(self, query, tbs='0', start=0, stop=None, pause=2.0, extra_params={}, tpe='', debug=False): """ :param query: str, query string :param tbs: str, time limits, i.e. "qdr:h" => last hour, "qdr:d" => last 24 hours, "qdr:m" => last month :param start: int, First result to retrieve. :param stop: int, Last result to retrieve. :param extra_params: For example if you don't want google to filter similar results you can set the extra_params to {'filter': '0'} which will append '&filter=0' to every query. :param tpe: Search type (images, videos, news, shopping, books, apps) Use the following values {videos: 'vid', images: 'isch', news: 'nws', shopping: 'shop', books: 'bks', applications: 'app'} :return: generator, the first element in generator is total num in google. and the second element is relative words """ if self._use_cookie: self._cookie_jar = random.choice(self._use_cookie) query = quote_plus(query) # Check extra_params for overlapping for builtin_param in ('hl', 'q', 'btnG', 'tbs', 'safe', 'tbm'): if builtin_param in extra_params.keys(): raise ValueError( 'GET parameter "%s" is overlapping with \ the built-in GET parameter', builtin_param ) # Grab the cookie from the home page. self.page(self.home_url(), debug) havent_yield = True url = self.gurl(query, tbs, tpe, start) while not stop or start < stop: iter_extra_params = extra_params.items() # Append extra GET_parameters to URL for k, v in iter_extra_params: url += url + ('&%s=%s' % (k, v)) # Sleep between requests. time.sleep(float(pause)) # Request the Google Search results page. html,code = self.page(url, debug) if debug: print("page:%s is crawled" % url, file=sys.stderr) if code != 200: if debug: print("status code is %d" % code) print("content:%s" % html) return {"start": start, "query": query, "status": code} # Parse the response and process every anchored URL. soup = BeautifulSoup(html) total_num = int(self.NUM_RE.findall(soup.select("#resultStats")[0].text)[0].replace(",","")) if havent_yield: yield total_num yield [x.text for x in soup.select(".brs_col a")] havent_yield = False for res in soup.select(".g .rc"): e = res.select(".r a")[0] #print(e) res_title = e.text res_url = e["href"] res_body = res.select(".st")[0] yield (res_title, res_url, res_body) # End if there are no more results. if not soup.find(id='nav'): break # Prepare the URL for the next request. start += self._results_per_page url = self.gurl(query, tbs, tpe, start) def home_url(self): return "http://%s/" % (str(self._domain)) def gurl(self, query, tbs, tpe, start): if start: return self.next_page_url(query, start, tbs, tpe) return self.search_url(query, tbs, tpe) def search_url(self, query, tbs, tpe): if self._results_per_page == 10: return self.search_no_num_url(query, tbs, tpe) else: return self.search_num_url(query, tbs, tpe) def search_no_num_url(self, query, tbs, tpe): return "http://%s/search?hl=%s&q=%s&btnG=Google+Search&tbs=%s&safe=%s&tbm=%s" % (str(self._domain),self._lang, query, tbs, self._safe, tpe) def search_num_url(self, query, tbs, tpe): return "http://%s/search?hl=%s&q=%s&num=%d&btnG=Google+Search&tbs=%s&safe=%s&tbm=%s" % (str(self._domain), self._lang, query, self._results_per_page, tbs, self._safe, tpe) def next_page_url(self, query, start, tbs, tpe): if self._results_per_page == 10: return self.next_page_no_num_url(query, start, tbs, tpe) else: return self.next_page_num_url(query, start, tbs, tpe) def next_page_no_num_url(self, query, start, tbs, tpe): return "http://%s/search?hl=%s&q=%s&start=%d&tbs=%s&safe=%s&tbm=%s" % (str(self._domain), self._lang, query, start, tbs, self._safe, tpe) def next_page_num_url(self, query, start, tbs, tpe): return "http://%s/search?hl=%s&q=%s&num=%d&start=%d&tbs=%s&safe=%s&tbm=%s" % (str(self._domain), self._lang, query, self._results_per_page, start, tbs, self._safe, tpe) def page(self, url, debug =False): #print(url) agent = str(self._agent) if debug: print("user-agent:%s" % agent) if self._use_cookie: res = self._session.get(url, headers = {'User-Agent': agent}, cookies = self._cookie_jar,verify=False) else: res = self._session.get(url, headers = {'User-Agent': agent},verify=False) #self._cookie_jar.save() return res.text, res.status_code def filter_result(self, link): try: # Valid results are absolute URLs not pointing to a Google domain # like images.google.com or googleusercontent.com o = urlparse(link, 'http') if o.netloc and 'google' not in o.netloc: return link # Decode hidden URLs. if link.startswith('/url?'): link = parse_qs(o.query)['q'][0] # Valid results are absolute URLs not pointing to a Google domain # like images.google.com or googleusercontent.com o = urlparse(link, 'http') if o.netloc and 'google' not in o.netloc: return link # Otherwise, or on error, return None. except Exception: pass return None
class GTranslate: DOMAIN = "translate.googleapis.com" def __init__(self, domain = None, agents = None, lang="en", use_cookie=None): if domain is None: domain = GTranslate.DOMAIN if agents is None: agents = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.155 Safari/537.36' self._agent = agents self._lang = lang self._domain = domain self._use_cookie = use_cookie self._session = requests.Session() if not use_cookie: #use_browser_cookie is None home_folder = os.getenv('HOME') if not home_folder: home_folder = os.getenv('USERHOME') if not home_folder: home_folder = '.' # Use the current folder on error. self._cookie_jar = LWPCookieJar(os.path.join(home_folder, '.google-cookie')) self._session.cookies = self._cookie_jar try: self._cookie_jar.load() except Exception: pass else: self._cookie_jar = None def __call__(self, message, lang_to='en', lang_from="auto", raw=False): """ crawl google translate. :param message: text :param lang_to: dst lang :param lang_from: src lang :param raw: whether return raw response :return: result """ if self._use_cookie: self._cookie_jar = random.choice(self._use_cookie) if lang_to not in languages: raise Exception("Language %s is not supported as lang_to." % lang_to) if lang_from not in languages and lang_from != 'auto': raise Exception("Language %s is not supported as lang_from." % lang_from) message = quote(message) url = self.translate_url(message, lang_from, lang_to) data = json.loads(re.sub(r"(,|\[)(?=,|])", "\\1 null", self.page(url))) if raw: return data return dict(res = self._res(data), lang_detect = self._lang_detect(data), ref = self._ref(data), example=self._example(data), pos=self._pos(data), tpos=self._transed_pos(data) ) def _res(self, data): return data[0] def _lang_detect(self, data): return [(x,v) for x,v in zip(data[8][0],data[8][2])] def _ref(self, data): return data[14] def _example(self, data): return data[13] def _pos(self, data): p = data[12] if p is None: return {} return {x[0]: x[1] for x in p} def _transed_pos(self, data): p = data[1] if p is None: return {} return {x[0]: x[1] for x in p} def page(self, url,debug =False): agent = str(self._agent) if debug: print("user-agent:%s" % agent) if self._use_cookie: res = self._session.get(url, headers = {'User-Agent': agent}, cookies = self._cookie_jar,verify=False) else: res = self._session.get(url, headers = {'User-Agent': agent},verify=False) return res.text def translate_url(self, txt, f, t): return "http://%s/translate_a/single?client=gtx&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&dt=at&sl=%s&tl=%s&q=%s" % (self._domain, f, t, txt)
class Usr_login: def __init__(self, user="", password="", cookieDir='.', proxy_IP=''): http_header = [ ('User-Agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.65 Safari/537.36'), ('Accept','text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'), ('Accept-Charset','utf-8,ISO-8859-1;q=0.7,*;q=0.3'), ('Accept-Encoding','none'), ('Accept-Language','en-US,en;q=0.8'), ('Connection','keep-alive'), ('Host','s.weibo.com'), ] self.cookieFile = cookieDir+'/'+user ouf = open(self.cookieFile, 'w') ouf.write('#LWP-Cookies-2.0') ouf.close() self.cj = LWPCookieJar(self.cookieFile) self.cj.load(ignore_discard=True, ignore_expires=True) self.opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.cj)) self.opener.add_handler(urllib.request.ProxyHandler(proxies={"http":proxy_IP})) self.opener.addheaders = http_header #get user information from params self.user,self.password = user,password #encode userid userid = bytes(urllib.parse.quote(self.user),'utf-8') self.encode_userid = base64.encodestring(userid)[:-1] #https to login #rsa2 def ssologin(self): #get prelogin #fetch pubkey rsakv servertime nonce url = "https://login.sina.com.cn/sso/prelogin.php?entry=sso&callback=sinaSSOController.preloginCallBack&su=&rsakt=mod&client=ssologin.js(v1.4.11)" time.sleep(0.5) print('Pubkey, rsakv, servertime and nonce obtained') res = self.opener.open(url).read().decode('utf-8') res = res[res.find("(")+1:-1] data = json.loads(res) nonce = data["nonce"] pubkey = data["pubkey"] rsakv = data["rsakv"] servertime = data["servertime"] #init rsa object rsaPublickey = int(pubkey, 16) key = rsa.PublicKey(rsaPublickey,65537) message = str(servertime) + '\t' + str(nonce) + '\n' + str(self.password) message = bytes(message,"utf-8") sp = rsa.encrypt(message,key) sp = binascii.b2a_hex(sp) #to login baseurl = "https://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.11)" params = {"entry":"sinaoauth","gateway":1,"from":"","savestate":0,\ "useticket":1,"vsnf":0,"s":1,"su":self.encode_userid,\ "service":"sinaoauth","servertime":servertime,"nonce":nonce,\ "pwencode":"rsa2","rsakv":rsakv,"sp":sp,"encoding":"UTF-8",\ "callback":"sinaSSOController.loginCallBack","cdult":2,"prelt":83,\ "returntype":"TEXT"} rurl = baseurl + "?" + urllib.parse.urlencode(params) time.sleep(0.5) res = self.opener.open(rurl).read() res = res.decode('utf-8') print('Login responsed obtained') self.cj.save(ignore_discard=True, ignore_expires=True) pos1 = res.find('(') pos2 = res.find(')') js = json.loads(res[pos1+1:pos2]) retcode = js["retcode"] print(js) if retcode == '0': url = js["crossDomainUrlList"][0] time.sleep(0.5) try: self.opener.open(url, timeout=60) except: return False, 'Connection Timeout' self.cj.save(ignore_discard=True, ignore_expires=True) self.opener.close() return True, self.cookieFile else: reason = js["reason"] self.opener.close() return False, str(js)
class Opener: def __init__(self, cookie_path=None): self.opener = build_opener() self.cookiejar = LWPCookieJar() self.set_cookie_path(cookie_path) self.opener.add_handler(HTTPCookieProcessor(self.cookiejar)) self.opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 5.1; rv:11.0) Gecko/20120101 Firefox/11.0')] self.last_request = None self.last_response = None @property def request_info(self): pass @property def response_info(self): pass def set_cookie_path(self, path): self.cookiejar.filename = path try: os.path.exists(path) and self.cookiejar.load() except: pass def set_headers(self, arg={}, **kwargs): if not arg and not kwargs: return headers = dict(self.opener.addheaders) headers.update(arg) headers.update(kwargs) self.opener.addheaders = list(headers.items()) def set_cookies(self, *args, **kwargs): for arg in args: cookie = DefaultCookiePolicy(**arg) self.cookiejar.set_cookie(cookie) kwargs and self.cookiejar.set_cookie(DefaultCookiePolicy(**kwargs)) def save_cookies(self, cookie_path=None): if cookie_path or self.cookiejar.filename: self.cookiejar.save(cookie_path) def urlopen(self, url, param=None, data=None, headers={}, proxies={}, timeout=None, encoding='utf8', errors='strict'): """ 打开目标链接, 返回一个 HttpResponse对象. @url(str/Request): 目标链接. @param(str/dict/pairs tuple): query string. @data(bytes/str/dict): post data. @headers(dict): http request headers. @proxies(dict): 代理, 如:{'http': 'xx.xx.xx.xx:3128', 'https': 'xxx.xxx.xxx.xxx:8080'}. @timeout(int): http request timeout. @encoding/errors(str): url编码. """ if param: full_url = isinstance(url, Request) and url.get_full_url() or url url_parse_dict = urlparse(full_url)._asdict() query_param = url_parse_dict.get('query') + (isinstance(param, str) and param or urlencode(param, encoding, errors)) url_parse_dict['query'] = query_param full_url = urlunparse(url_parse_dict.values()) request = Request(full_url) else: request = isinstance(url, Request) and url or Request(url) if data: if isinstance(data, bytes): request.data = data elif isinstance(data, str): request.data = data.encode(encoding, errors) else: request.data = urlencode(data).encode(encoding, errors) for key, value in headers.items(): request.add_header(key, value) for proxy_type, proxy_host in proxies.items(): request.set_proxy(proxy_host, proxy_type) self.last_request = request self.last_response = self.opener.open(request, timeout=timeout) return self.last_response def clear(self): self.last_request = None self.last_response = None
BeautifulSoup = None # URL templates to make Google searches. url_home = "http://www.google.%(tld)s/" url_search = "http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&btnG=Google+Search&tbs=%(tbs)s&safe=%(safe)s&tbm=%(tpe)s" url_next_page = "http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&start=%(start)d&tbs=%(tbs)s&safe=%(safe)s&tbm=%(tpe)s" url_search_num = "http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&num=%(num)d&btnG=Google+Search&tbs=%(tbs)s&safe=%(safe)s&tbm=%(tpe)s" url_next_page_num = "http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&num=%(num)d&start=%(start)d&tbs=%(tbs)s&safe=%(safe)s&tbm=%(tpe)s" # Cookie jar. Stored at the user's home folder. home_folder = os.getenv('HOME') if not home_folder: home_folder = os.getenv('USERHOME') if not home_folder: home_folder = '.' # Use the current folder on error. cookie_jar = LWPCookieJar(os.path.join(home_folder, '.google-cookie')) try: cookie_jar.load() except Exception: pass class GoogleResult(): def __init__(self): self.title = "" self.url = "" # Request the given URL and return the response page, using the cookie jar. def get_page(url):
class ClansSession(object): """ This object is created on each `clans` incantation as a storage place for configuration info, command-line arguments, and other stuff """ def __init__(self, profile_dir=None): # profile folder: either passed directly (for testing only), # set by CLANS_DIR environment variable, or the standard user # data directory for this OS self.profile_dir = profile_dir or ( os.environ.get('CLANS_DIR', '') or appdirs.user_data_dir(appname='clans', appauthor='baldwint')) # config file location: in data directory self.config_loc = os.path.join(self.profile_dir, 'clans.cfg') # load config, extensions, and define command line args self.config = self._load_config() self.extensions = self._load_extensions() self.formatters = self._load_formatters() self.commands = self._load_commands() # let extensions modify command list self.hook('post_load_commands') def run(self, argv=None): """ Clans main function, run with specified arguments """ # get command line arguments self.args = self.commands.main.parse_args(argv) self.args = vars(self.args) # let command line args override equivalent config file settings self.username = (self.args['username'] or self.config.get('login', 'username')) try: # pass execution to the subcommand func = self.args['func'] func(self) finally: # do this part always, even if subcommand fails self.finish() def _load_config(self): # set config file defaults config = ConfigParser() config.add_section('login') config.set('login', 'username', '') config.set('login', 'url', 'https://www.grinnellplans.com') config.add_section('clans') # text editor: either specified in config file, or $EDITOR, or pico config.set('clans', 'editor', os.environ.get('EDITOR', 'pico')) config.set('clans', 'format', 'raw') config.set('clans', 'timezone', '') config.set('clans', 'date_format', '') # create profile directory if it doesn't exist try: # 0700 for secure-ish cookie storage. os.makedirs(self.profile_dir, 0o700) except OSError: pass # already exists # read user's config file, if present config.read(self.config_loc) return config def _load_extensions(self): """ Load Clans extensions. reads the config file for extension information, and loads extensions as python modules into an ordered dictionary. """ extensions = OrderedDict() if self.config.has_section('extensions'): for name, path in self.config.items('extensions'): try: if not path: # if no value is specified, # assume it is for a built-in extension path = 'clans.ext.%s' % name mod = importlib.import_module(path) assert mod.__name__ == path except ImportError: print('Failed to load extension "%s".' % name, file=sys.stderr) else: extensions[name] = mod return extensions def _load_formatters(self): """ Load output formatters. """ formatters = { 'raw': clans.fmt.RawFormatter, 'json': clans.fmt.JSONFormatter, 'text': clans.fmt.TextFormatter, 'color': clans.fmt.ColorFormatter, } return formatters def hook(self, name, *args, **kwargs): """ Call the method named ``name`` in every loaded extension. Returns: a list of return values. """ def run_hook(ext, name): func = getattr(ext, name, None) if func is not None: return func(self, *args, **kwargs) else: return None results = [run_hook(ext, name) for ext in self.extensions.values()] return results def _load_commands(self): # define command line arguments # globals: options/arguments inherited by all parsers, including root global_parser = argparse.ArgumentParser(add_help=False) global_parser.add_argument( '-u', '--username', dest='username', default='', help='GrinnellPlans username, no brackets.') global_parser.add_argument( '-p', '--password', dest='password', default='', help='GrinnellPlans password. Omit for secure entry.') global_parser.add_argument( '--logout', dest='logout', action='store_true', default=False, help='Log out before quitting.') global_parser.add_argument( '--version', action='version', version='%(prog)s ' + clans.__version__, help='Show clans version number and exit.') # filters: options/arguments for those commands that format text filter_parser = argparse.ArgumentParser(add_help=False) filter_parser.add_argument( '--format', dest='fmt', default=self.config.get('clans', 'format'), choices=self.formatters, help="Display format to use") # main parser: has subcommands for everything commands = CommandSet( description=__doc__, parents=[global_parser], formatter_class=argparse.RawTextHelpFormatter) # edit parser: options/arguments for editing plans commands.add_command( 'edit', edit, parents=[global_parser], description='Opens your plan for editing in a text editor.', help='Edit your plan.') commands["edit"].add_argument( '--from-file', dest='source_file', default=False, metavar='FILE', help="Replace plan with the contents of FILE. " "Skips interactive editing." " Use with caution!") # read parser commands.add_command( 'read', read, parents=[global_parser, filter_parser], description="Read someone else's plan.", help="Read a plan.",) commands["read"].add_argument( 'plan', default=False, metavar='PLAN', help="Name of plan to be read.") # autoread list parser commands.add_command( 'list', autoread, parents=[global_parser, filter_parser], description="Display unread plans on your autoread list.", help="Display autoread list.",) # quicklove parser commands.add_command( 'love', love, parents=[global_parser, filter_parser], description="Search for other users giving you planlove.", help="Check quicklove.",) # search parser commands.add_command( 'search', search, parents=[global_parser, filter_parser], description="Search plans for any word or phrase.", help="Search plans for any word or phrase.",) commands["search"].add_argument( 'term', default=False, metavar='TERM', help="Term to search for.") commands["search"].add_argument( '-l', '--love', dest='love', action='store_true', default=False, help="Restrict search to planlove.") # watch parser commands.add_command( 'watch', watch, parents=[global_parser, filter_parser], description="See recently updated plans.", help="See recently updated plans.",) commands["watch"].add_argument( 'hours', type=int, nargs='?', default=12, metavar='HOURS', help="Specify how many hours' worth of plan updates to show.") # config parser commands.add_command( 'config', config, parents=[global_parser, filter_parser], description="The clans config file sets the default" " behavior of the client." " (Not to be confused with Plans preferences!)", help="Edit clans configuration file.") commands["config"].add_argument( '--dir', dest='profile_dir', action='store_true', default=False, help="Print the path to the clans profile directory.") return commands def make_plans_connection(self): """ Connects to plans, prompting for passwords if necessary """ # create a cookie self.cookie = LWPCookieJar( os.path.join(self.profile_dir, '%s.cookie' % self.username)) try: self.cookie.load() # fails with IOError if it does not exist except IOError: pass # no cookie saved for this user # create plans connection using cookie pc = PlansConnection(self.cookie, base_url=self.config.get('login', 'url')) if pc.plans_login(): pass # we're still logged in else: # we're not logged in, prompt for password if necessary password = (self.args['password'] or getpass("[%s]'s password: "******""" Initialize and return the appropriate output formatter. """ Fmt = self.formatters[self.args['fmt']] kwargs = {} kwargs['timezone'] = self.config.get('clans', 'timezone') or None kwargs['date_format'] = self.config.get('clans', 'date_format') or None # if a key is None, remove it and rely on the default kwargs = dict((k,v) for k,v in kwargs.items() if v is not None) fmt = Fmt(**kwargs) return fmt def finish(self): """ Cookie-related cleanup. Either save the updated cookie, or delete it to log out """ if not hasattr(self, 'cookie'): return # no plans connection was made elif self.args['logout']: os.unlink(self.cookie.filename) else: # save cookie self.cookie.save()
class CookiePageGetter(object): def __init__(self, username=None, password=None, cookiefile=None, logger=None): self.bandwidth = 0 self.logger = logger or SilentLogger() self.cookiefile = cookiefile or COOKIEFILE self.cj = LWPCookieJar() if os.path.isfile(self.cookiefile): self.cj.load(self.cookiefile) self.opener = build_opener(HTTPCookieProcessor(self.cj)) self.opener.addheaders = [("User-Agent", USER_AGENT)] self.opener.addheaders.append(("Accept-Encoding", "gzip")) if not os.path.isfile(self.cookiefile) and username is not None and password is not None: self.logger.log("Logging in") self.log_in(username, password) self.html_encoding = "utf-8" def log_in(self): pass def _read_in_chunks(self, response): data = bytes() while True: small_data = response.read(1024) self.bandwidth += len(small_data) if len(small_data) == 0: break data += small_data return data def _open_with_retry(self, request): # sometimes there are timeouts and such - try to open the page 10 times until giving up for i in range(10): try: response = self.opener.open(request) if i != 0: self.logger.log("* Recovered from open error after %d tries" % i) return response except: pass return None def _prepare_request(self, request, additional_headers, post_data): if additional_headers is not None: for k, v in additional_headers.items(): request.add_header(k, v) if post_data is not None: request.add_data(urlencode(post_data).encode("ascii")) def _request(self, url, additional_headers=None, post_data=None): request = Request(url) self._prepare_request(request, additional_headers, post_data) response = self._open_with_retry(request) if response is None: self.logger.log("* ERROR Could not open <%s>" % url) return bytes() data = self._read_in_chunks(response) self.cj.save(self.cookiefile) # handle gzip'd data if response.info().get("Content-Encoding") == "gzip": gzip_stream = BytesIO(data) gzip_file = gzip.GzipFile(fileobj=gzip_stream) data = gzip_file.read() self._update_encoding(response) return data def get_page_html(self, url, additional_headers=None, post_data=None): data = self._request(url, additional_headers, post_data) data = data.decode(self.html_encoding) return data def download_binary(self, url, fname): data = self._request(url) open(fname, "wb").write(data) def _update_encoding(self, response): content_type = response.info().get('content-type') if content_type is None: return matchobj = re.search("charset=([^;]+)", content_type) if matchobj is None: return self.html_encoding = matchobj.group(1) def _format_size(self, bytes): suffixes = ['T', 'G', 'M', 'K', ''] bytes = [bytes] for i in range(len(suffixes)-1): bytes = list(divmod(bytes[0], 1024)) + bytes[1:] return ', '.join(["%d %sB" % (val, suf) for val, suf in zip(bytes, suffixes) if val != 0]) def log_bandwidth(self): self.logger.log("%s transferred" % (self._format_size(self.bandwidth)))
def init_basicauth(config): """initialize urllib2 with the credentials for Basic Authentication""" def filterhdrs(meth, ishdr, *hdrs): # this is so ugly but httplib doesn't use # a logger object or such def new_method(self, *args, **kwargs): # check if this is a recursive call (note: we do not # have to care about thread safety) is_rec_call = getattr(self, '_orig_stdout', None) is not None try: if not is_rec_call: self._orig_stdout = sys.stdout sys.stdout = StringIO() meth(self, *args, **kwargs) hdr = sys.stdout.getvalue() finally: # restore original stdout if not is_rec_call: sys.stdout = self._orig_stdout del self._orig_stdout for i in hdrs: if ishdr: hdr = re.sub(r'%s:[^\\r]*\\r\\n' % i, '', hdr) else: hdr = re.sub(i, '', hdr) sys.stdout.write(hdr) new_method.__name__ = meth.__name__ return new_method if config['http_debug'] and not config['http_full_debug']: HTTPConnection.send = filterhdrs(HTTPConnection.send, True, 'Cookie', 'Authorization') HTTPResponse.begin = filterhdrs(HTTPResponse.begin, False, 'header: Set-Cookie.*\n') if sys.version_info < (2, 6): # HTTPS proxy is not supported in old urllib2. It only leads to an error # or, at best, a warning. if 'https_proxy' in os.environ: del os.environ['https_proxy'] if 'HTTPS_PROXY' in os.environ: del os.environ['HTTPS_PROXY'] if config['http_debug']: # brute force def urllib2_debug_init(self, debuglevel=0): self._debuglevel = 1 AbstractHTTPHandler.__init__ = urllib2_debug_init cookie_file = os.path.expanduser(config['cookiejar']) global cookiejar cookiejar = LWPCookieJar(cookie_file) try: cookiejar.load(ignore_discard=True) except IOError: try: fd = os.open(cookie_file, os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o600) os.close(fd) except IOError: # hmm is any good reason why we should catch the IOError? #print 'Unable to create cookiejar file: \'%s\'. Using RAM-based cookies.' % cookie_file cookiejar = CookieJar()
class PrettyBugz: def __init__(self, args): self.user = args['user'] self.password = args['password'] self.skip_auth = args['skip_auth'] cookie_file = os.path.join(os.environ['HOME'], DEFAULT_COOKIE_FILE) self.cookiejar = LWPCookieJar(cookie_file) try: self.cookiejar.load() except IOError: pass self.token_file = os.path.join(os.environ['HOME'], DEFAULT_TOKEN_FILE) try: self.token = open(self.token_file).read().strip() except IOError: self.token = None log_info("Using %s " % args['url']) self.bz = BugzillaProxy(args['url'], cookiejar=self.cookiejar) def set_token(self, *args): if args and self.token: args[0]['Bugzilla_token'] = self.token return args def call_bz(self, method, *args): """Attempt to call method with args. Log in if authentication is required. """ try: return method(*self.set_token(*args)) except xmlrpc.client.Fault as fault: # Fault code 410 means login required if fault.faultCode == 410 and not self.skip_auth: self.login() return method(*self.set_token(*args)) raise def login(self, args=None): # Authenticate a session. # perform login params = {} params['login'] = self.user params['password'] = self.password if args is not None: params['remember'] = True log_info('Logging in') try: self.bz.User.login(params) except xmlrpc.client.Fault as fault: raise BugzError("Can't login: "******"Failed to logout: " + fault.faultString) def search(self, args): """Performs a search on the bugzilla database with the keywords given on the title (or the body if specified). """ valid_keys = ['alias', 'assigned_to', 'component', 'creator', 'limit', 'offset', 'op_sys', 'platform', 'priority', 'product', 'resolution', 'severity', 'status', 'version', 'whiteboard'] search_opts = sorted([(opt, val) for opt, val in list(args.__dict__.items()) if val is not None and opt in valid_keys]) params = {} for key in args.__dict__: if key in valid_keys and getattr(args, key) is not None: params[key] = getattr(args, key) if getattr(args, 'terms'): params['summary'] = args.terms search_term = ' '.join(args.terms).strip() if not (params or search_term): raise BugzError('Please give search terms or options.') if search_term: log_msg = 'Searching for \'%s\' ' % search_term else: log_msg = 'Searching for bugs ' if search_opts: log_info(log_msg + 'with the following options:') for opt, val in search_opts: log_info(' %-20s = %s' % (opt, val)) else: log_info(log_msg) if 'status' not in params: params['status'] = ['CONFIRMED', 'IN_PROGRESS', 'UNCONFIRMED'] else: for x in params['status'][:]: if x in ['all', 'ALL']: del params['status'] result = self.call_bz(self.bz.Bug.search, params)['bugs'] if not len(result): log_info('No bugs found.') else: self.list_bugs(result, args) def get(self, args): """ Fetch bug details given the bug id """ log_info('Getting bug %s ..' % args['bugid']) try: result = self.call_bz(self.bz.Bug.get, {'ids':[args['bugid']]}) except xmlrpc.client.Fault as fault: raise BugzError("Can't get bug #" + str(args['bugid']) + ": " \ + fault.faultString) for bug in result['bugs']: self.show_bug_info(bug, args.attachments, args.comments) def post(self, args): """Post a new bug""" params={} params['product'] = args['product'] params['component'] = args['component'] params['summary'] = args['summary'] params['description'] = args['description'] #params['assigned_to'] = args['assigned_to'] params['version'] = args['version'] result = self.call_bz(self.bz.Bug.create, params) log_info('Bug %d submitted' % result['id']) return result def modify(self, args): """Modify an existing bug (eg. adding a comment or changing resolution.)""" params = {} if args['cc_add'] is not None: params['cc'] = {} if args['comment'] is not None: params['comment'] = {} params['ids'] = args['bugid'] # if args['assigned_to'] is not None: # params['assigned_to'] = args['assigned_to'] if args['cc_add'] is not None: params['cc']['add'] = args['cc_add'] if args['comment'] is not None: params['comment']['body'] = args['comment'] if len(params) < 2: raise BugzError('No changes were specified') result = self.call_bz(self.bz.Bug.update, params) for bug in result['bugs']: changes = bug['changes'] if not len(changes): log_info('Added comment to bug %s' % bug['id']) else: log_info('Modified the following fields in bug %s' % bug['id']) for key in changes: log_info('%-12s: removed %s' %(key, changes[key]['removed'])) log_info('%-12s: added %s' %(key, changes[key]['added'])) def attachment(self, args): """ Download or view an attachment given the id.""" log_info('Getting attachment %s' % args.attachid) params = {} params['attachment_ids'] = [args.attachid] result = self.call_bz(self.bz.Bug.attachments, params) result = result['attachments'][args.attachid] action = {True:'Viewing', False:'Saving'} log_info('%s attachment: "%s"' % (action[args.view], result['file_name'])) safe_filename = os.path.basename(re.sub(r'\.\.', '', result['file_name'])) if args.view: print(result['data'].data) else: if os.path.exists(result['file_name']): raise RuntimeError('Filename already exists') fd = open(safe_filename, 'wb') fd.write(result['data'].data) fd.close() def attach(self, args): """ Attach a file to a bug given a filename. """ filename = args['filename'] summary = os.path.basename(filename) comment = args['comment'] if not os.path.exists(filename): raise BugzError('File not found: %s' % filename) params = {} params['ids'] = args['bugid'] fd = open(filename, 'rb') params['data'] = xmlrpc.client.Binary(fd.read()) fd.close() params['file_name'] = os.path.basename(filename) params['summary'] = summary params['content_type'] = args['content_type'] params['comment'] = comment #params['is_patch'] = is_patch result = self.call_bz(self.bz.Bug.add_attachment, params) log_info("'%s' has been attached to bug %s" % (filename, args['bugid'])) def list_bugs(self, buglist, args): for bug in buglist: bugid = bug['id'] status = bug['status'] priority = bug['priority'] severity = bug['severity'] assignee = bug['assigned_to'].split('@')[0] desc = bug['summary'] line = '%s' % (bugid) if args.show_status: line = '%s %-12s' % (line, status) if args.show_priority: line = '%s %-12s' % (line, priority) if args.show_severity: line = '%s %-12s' % (line, severity) line = '%s %-20s' % (line, assignee) line = '%s %s' % (line, desc) print(line[:self.columns]) log_info("%i bug(s) found." % len(buglist)) def show_bug_info(self, bug, show_attachments, show_comments): FieldMap = { 'alias': 'Alias', 'summary': 'Title', 'status': 'Status', 'resolution': 'Resolution', 'product': 'Product', 'component': 'Component', 'version': 'Version', 'platform': 'Hardware', 'op_sys': 'OpSystem', 'priority': 'Priority', 'severity': 'Severity', 'target_milestone': 'TargetMilestone', 'assigned_to': 'AssignedTo', 'url': 'URL', 'whiteboard': 'Whiteboard', 'keywords': 'Keywords', 'depends_on': 'dependsOn', 'blocks': 'Blocks', 'creation_time': 'Reported', 'creator': 'Reporter', 'last_change_time': 'Updated', 'cc': 'CC', 'see_also': 'See Also', } SkipFields = ['is_open', 'id', 'is_confirmed', 'is_creator_accessible', 'is_cc_accessible', 'update_token'] for field in bug: if field in SkipFields: continue if field in FieldMap: desc = FieldMap[field] else: desc = field value = bug[field] if field in ['cc', 'see_also']: for x in value: print('%-12s: %s' % (desc, x)) elif isinstance(value, list): s = ', '.join(["%s" % x for x in value]) if s: print('%-12s: %s' % (desc, s)) elif value is not None and value != '': print('%-12s: %s' % (desc, value)) if show_attachments: bug_attachments = self.call_bz(self.bz.Bug.attachments, {'ids':[bug['id']]}) bug_attachments = bug_attachments['bugs']['%s' % bug['id']] print('%-12s: %d' % ('Attachments', len(bug_attachments))) print() for attachment in bug_attachments: aid = attachment['id'] desc = attachment['summary'] when = attachment['creation_time'] print('[Attachment] [%s] [%s]' % (aid, desc)) if show_comments: bug_comments = self.call_bz(self.bz.Bug.comments, {'ids':[bug['id']]}) bug_comments = bug_comments['bugs']['%s' % bug['id']]['comments'] print() print('%-12s: %d' % ('Comments', len(bug_comments))) print() i = 0 wrapper = textwrap.TextWrapper(width = self.columns, break_long_words = False, break_on_hyphens = False) for comment in bug_comments: who = comment['creator'] when = comment['time'] what = comment['text'] print('[Comment #%d] %s : %s' % (i, who, when)) print('-' * (self.columns - 1)) if what is None: what = '' # print wrapped version for line in what.split('\n'): if len(line) < self.columns: print(line) else: for shortline in wrapper.wrap(line): print(shortline) print() i += 1