class RDWorker: """ Worker class to perform Real-Debrid related actions: - format login info so they can be used by Real-Debrid - login - unrestricting links - keeping cookies """ _endpoint = 'http://www.real-debrid.com/ajax/%s' def __init__(self, cookie_file): self._cookie_file = cookie_file self.cookies = MozillaCookieJar(self._cookie_file) def login(self, username, password_hash): """ Log into Real-Debrid. password_hash must be a MD5-hash of the password string. :param username: :param password_hash: :return: :raise: """ if path.isfile(self._cookie_file): self.cookies.load(self._cookie_file) for cookie in self.cookies: if cookie.name == 'auth' and not cookie.is_expired(): return # no need for a new cookie # request a new cookie if no valid cookie is found or if it's expired opener = build_opener(HTTPCookieProcessor(self.cookies)) try: response = opener.open(self._endpoint % 'login.php?%s' % urlencode({'user': username, 'pass': password_hash})) resp = load(response) opener.close() if resp['error'] == 0: self.cookies.save(self._cookie_file) else: raise LoginError(resp['message'].encode('utf-8'), resp['error']) except Exception as e: raise Exception('Login failed: %s' % str(e)) def unrestrict(self, link, password=''): """ Unrestrict a download URL. Returns tuple of the unrestricted URL and the filename. :param link: url to unrestrict :param password: password to use for the unrestriction :return: :raise: """ opener = build_opener(HTTPCookieProcessor(self.cookies)) response = opener.open(self._endpoint % 'unrestrict.php?%s' % urlencode({'link': link, 'password': password})) resp = load(response) opener.close() if resp['error'] == 0: info = resp['generated_links'][0] return info[2], info[0].replace('/', '_') else: raise UnrestrictionError(resp['message'].encode('utf-8'), resp['error'])
def __init__(self, mobile, password=None, status='0', cachefile='Fetion.cache', cookiesfile=''): '''登录状态: 在线:400 隐身:0 忙碌:600 离开:100 ''' if cachefile: self.cache = Cache(cachefile) if not cookiesfile: cookiesfile = '%s.cookies' % mobile # try: # with open(cookiesfile, 'rb') as f: # cookie_processor = load(f) # except: # cookie_processor = HTTPCookieProcessor(CookieJar()) cookiejar = MozillaCookieJar(filename=cookiesfile) try: f=open(cookiesfile) except IOError: f=open(cookiesfile,'w') f.write(MozillaCookieJar.header) finally: f.close() cookiejar.load(filename=cookiesfile) cookie_processor = HTTPCookieProcessor(cookiejar) self.opener = build_opener(cookie_processor, HTTPHandler) self.mobile, self.password = mobile, password if not self.alive(): if self._login(): cookiejar.save() #dump(cookie_processor, open(cookiesfile, 'wb')) self.changestatus(status)
def __init__(self, mobile, password=None, status='0', cachefile='Fetion.cache', cookiesfile=''): '''登录状态: 在线:400 隐身:0 忙碌:600 离开:100 ''' if cachefile: self.cache = Cache(cachefile) if not cookiesfile: cookiesfile = '%s.cookies' % mobile cookiejar = MozillaCookieJar(filename=cookiesfile) if not os.path.isfile(cookiesfile): open(cookiesfile, 'w').write(MozillaCookieJar.header) cookiejar.load(filename=cookiesfile) cookie_processor = HTTPCookieProcessor(cookiejar) self.opener = build_opener(cookie_processor, HTTPHandler) self.mobile, self.password = mobile, password if not self.alive(): self._login() cookiejar.save() self.changestatus(status)
def _get_cookie_headers(cls): jar = MozillaCookieJar(config.netflix.cookies_path) jar.load() cookies = [] for line in jar: cookies.append('='.join((line.name, line.value))) return cookies
def GetWithCookie( url, cookie_name, data = '', retry = 3): global PATH_TMP, ACGINDEX_UA try: cj = MozillaCookieJar( PATH_TMP + cookie_name ) try : cj.load( PATH_TMP + cookie_name ) except: pass # 还没有cookie只好拉倒咯 ckproc = urllib2.HTTPCookieProcessor( cj ) AmagamiSS = urllib2.build_opener( ckproc ) AmagamiSS.addheaders = [ ACGINDEX_UA ] if data != '': request = urllib2.Request( url = url, data = data ) res = AmagamiSS.open( request ) cj.save() # 只有在post时才保存新获得的cookie else: res = AmagamiSS.open( url ) return Haruka.GetContent( res ) except: # 这里有3次重新连接的机会,3次都超时就跳过 if retry > 0 : return Haruka.GetWithCookie( url, cookie_name, data , retry-1 ) else: return False
class WebBrowser(object): '''mantiene en memoria las cookies, emulando un navegador *actualmente no ejecuta javascript''' def __init__(self, uAgent=None, headers=None): '''uAgent es el agente de usuario''' self.cookie_j = MozillaCookieJar() if uAgent is None: uAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36' self.opener = build_opener(HTTPCookieProcessor(self.cookie_j)) self.user_agent = uAgent self.opener.addheaders = [('User-Agent', self.user_agent)] # self.session = requests.Session() # self.session.headers.update({ 'User-Agent': uAgent }) # self.session.max_redirects = 20 self.timeout = 25 socket.setdefaulttimeout(self.timeout) def newtree(f): return lambda *a, **k: etree.parse(f(*a, **k), parser=etree.HTMLParser()) @newtree def fetch(self, url, data=None, headers=None, method='POST'): '''obtiene los datos de una pagina web, ingresada en url para enviar datos por post, pasar codificados por data''' if headers: self.opener.addheaders = headers if not (data == None or type(data) == str): data = urllib.urlencode(data) if method == 'POST': # self.last_seen = self.session.post(url, data=data) self.last_seen = self.opener.open(url, data) elif method == 'GET': #self.last_seen = self.session.get(url + '?' + data) if data is None: self.last_seen = self.opener.open(url) else: self.last_seen = self.opener.open(url + '?' + data) else: raise Exception return self.last_seen def geturl(self): return self.last_seen.geturl() def save_cookies(self, path): '''guarda los cookies en memoria al disco''' '''path es el directorio''' self.cookie_j.save(path, ignore_discard=True, ignore_expires=True) def load_cookies(self, path): '''carga cookies del disco a la memoria''' '''path es el directorio''' self.cookie_j.load(path, ignore_discard=True, ignore_expires=True) def print_cookies(self): for cookie in self.cookie_j: print cookie.name, cookie.value
def LIVE(url, relogin=False): if not (settings['username'] and settings['password']): xbmcgui.Dialog().ok('Chyba', 'Nastavte prosím moja.markiza.sk konto', '', '') xbmcplugin.setResolvedUrl(int(sys.argv[1]), False, xbmcgui.ListItem()) raise RuntimeError cj = MozillaCookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) if not relogin: try: cj.load(cookiepath) except IOError: relogin = True if relogin: response = opener.open(loginurl).read() token = re.search(r'name=\"_token_\" value=\"(\S+?)\">', response).group(1) logindata = urllib.urlencode({ 'email': settings['username'], 'password': settings['password'], '_token_': token, '_do': 'content1-loginForm-form-submit' }) + '&login=Prihl%C3%A1si%C5%A5+sa' opener.open(loginurl, logindata) log('Saving cookies') cj.save(cookiepath) response = opener.open(url).read() link = re.search(r'<iframe src=\"(\S+?)\"', response).group( 1) #https://videoarchiv.markiza.sk/api/v1/user/live link = link.replace('&', '&') response = opener.open(link).read() if '<iframe src=\"' not in response: #handle expired cookies if relogin: xbmcgui.Dialog().ok('Chyba', 'Skontrolujte prihlasovacie údaje', '', '') raise RuntimeError # loop protection else: LIVE(url, relogin=True) return opener.addheaders = [('Referer', link)] link = re.search(r'<iframe src=\"(\S+?)\"', response).group(1) #https://media.cms.markiza.sk/embed/ response = opener.open(link).read() if '<title>Error</title>' in response: error = re.search('<h2 class="e-title">(.*?)</h2>', response).group( 1) #Video nie je dostupné vo vašej krajine xbmcgui.Dialog().ok('Chyba', error, '', '') raise RuntimeError link = re.search(r'\"hls\": \"(\S+?)\"', response).group( 1) #https://h1-s6.c.markiza.sk/hls/markiza-sd-master.m3u8 response = opener.open(link).read() cookies = '|Cookie=' for cookie in cj: cookies += cookie.name + '=' + cookie.value + ';' cookies = cookies[:-1] play_item = xbmcgui.ListItem(path=link + cookies) xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, listitem=play_item)
def main(*args): # Populate our options, -h/--help is already there for you. usage = "usage: %prog [options] URL" optp = optparse.OptionParser(usage=usage) optp.add_option("-u", "--username", help="the username to login as.") optp.add_option("-d", "--storedir", dest="store_dir", help="the directory to store the certificate/key and \ config file", metavar="DIR", default=path.join(homedir, ".shibboleth")) optp.add_option("-i", "--idp", help="unique ID of the IdP used to log in") optp.add_option('-v', '--verbose', dest='verbose', action='count', help="Increase verbosity (specify multiple times for more)") # Parse the arguments (defaults to parsing sys.argv). opts, args = optp.parse_args() # Here would be a good place to check what came in on the command line and # call optp.error("Useful message") to exit if all it not well. log_level = logging.WARNING # default if opts.verbose == 1: log_level = logging.INFO elif opts.verbose >= 2: log_level = logging.DEBUG # Set up basic configuration, out to stderr with a reasonable default format. logging.basicConfig(level=log_level) if not args: optp.print_help() return if not path.exists(opts.store_dir): os.mkdir(opts.store_dir) sp = args[0] idp = Idp(opts.idp) c = CredentialManager() if opts.username: c.username = opts.username # if the cookies file exists load it cookies_file = path.join(opts.store_dir, 'cookies.txt') cj = MozillaCookieJar(filename=cookies_file) if path.exists(cookies_file): cj.load() shibboleth = Shibboleth(idp, c, cj) shibboleth.openurl(sp) print("Successfully authenticated to %s" % sp) cj.save()
def main(*args): # Populate our options, -h/--help is already there for you. usage = "usage: %prog [options] URL" optp = optparse.OptionParser(usage=usage) optp.add_option("-d", "--storedir", dest="store_dir", help="the directory to store the certificate/key and \ config file", metavar="DIR", default=path.join(homedir, ".shibboleth")) optp.add_option('-v', '--verbose', dest='verbose', action='count', help="Increase verbosity (specify multiple times for more)") # Parse the arguments (defaults to parsing sys.argv). opts, args = optp.parse_args() # Here would be a good place to check what came in on the command line and # call optp.error("Useful message") to exit if all it not well. log_level = logging.WARNING # default if opts.verbose == 1: log_level = logging.INFO elif opts.verbose >= 2: log_level = logging.DEBUG # Set up basic configuration, out to stderr with a reasonable # default format. logging.basicConfig(level=log_level) if not path.exists(opts.store_dir): os.mkdir(opts.store_dir) if args: sp = args[0] # if the cookies file exists load it cookies_file = path.join(opts.store_dir, 'cookies.txt') cj = MozillaCookieJar(filename=cookies_file) if path.exists(cookies_file): cj.load() logout_urls = [] for cookie in cj: if cookie.name.startswith('_shibsession_') or \ cookie.name.startswith('_shibstate_'): logout_urls.append( "https://%s/Shibboleth.sso/Logout" % cookie.domain) logout_urls = list(set(logout_urls)) opener = urllib2.build_opener(HTTPCookieProcessor(cookiejar=cj)) for url in logout_urls: request = urllib2.Request(url) log.debug("GET: %s" % request.get_full_url()) response = opener.open(request) cj.save()
def load_cookies3(): """ 加载 cookie:cookies.txt -> load() —— MozillaCookieJar格式 """ save_to_txt() cj = MozillaCookieJar() cj.load('localCookiesMoz.txt', ignore_discard=True, ignore_expires=True) # 这里必须将参数置为True,否则登录失败 for index, cookie in enumerate(cj): # 显示cookies print('[', index, ']', cookie) return cj
def Get( url, data = '', refer = 'http://www.pixiv.net/', retry = 3 ): global ABS_PATH cj = MozillaCookieJar( ABS_PATH + 'pixiv.cookie.txt' ) try : cj.load( ABS_PATH + 'pixiv.cookie.txt' ) except: pass # 还没有cookie只好拉倒咯 ckproc = urllib2.HTTPCookieProcessor( cj ) opener = urllib2.build_opener( ckproc ) opener.addheaders = [ ('Accept', '*/*'), ('Accept-Language', 'zh-CN,zh;q=0.8'), ('Accept-Charset', 'UTF-8,*;q=0.5'), ('Accept-Encoding', 'gzip,deflate'), ('User-Agent', 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31'), ('Referer', refer) ] # 防止海外访问weibo变英文版 if 'weibo.com' in url: opener.addheaders = [('Cookie', 'lang=zh-cn; SUB=Af3TZPWScES9bnItTjr2Ahd5zd6Niw2rzxab0hB4mX3uLwL2MikEk1FZIrAi5RvgAfCWhPyBL4jbuHRggucLT4hUQowTTAZ0ta7TYSBaNttSmZr6c7UIFYgtxRirRyJ6Ww%3D%3D; UV5PAGE=usr512_114; UV5=usrmdins311164')] debug('Network: url - ' + url) try: # 发出请求 if data != '': debug('Network: post') debug(data) request = urllib2.Request( url = url, data = data ) res = opener.open( request, timeout = 15 ) cj.save() # 只有在post时才保存新获得的cookie else: debug('Network: get') res = opener.open( url, timeout = 15 ) debug('Network: Status Code - ' + str(res.getcode())) return GetContent( res ) except Exception, e: # 自动重试,每张图最多3次 if retry > 0: return Get( url, data, refer, retry-1 ) else: log(e, 'Error: unable to get %s' % url) return False
class NRK: def __init__(self): policy = DefaultCookiePolicy( rfc2965=True, strict_ns_domain=DefaultCookiePolicy.DomainStrict) self.cj = MozillaCookieJar(".cookies", policy) try: self.cj.load() except IOError, e: if e.errno != 2: raise e # else: Ignore "File not found" self.opener = build_opener(HTTPCookieProcessor(self.cj)) self.init() #self.login() self.setspeed()
def login_test(self, provider): with self.app.test_request_context('https://localhost.admin.eutaxia.eu:5000/login', base_url='https://localhost.admin.eutaxia.eu:5000/'): resp = oauth.authorize(provider) assert resp.status_code == 302 location = resp.headers['Location'] session_data = dict(flask.session) cj = MozillaCookieJar(os.path.join(os.path.dirname(__file__), 'cookies.%s.txt' % provider)) cj.load() class NoRedirectHandler(HTTPRedirectHandler): def redirect_request(self, req, fp, code, msg, hdrs, newurl): if newurl.startswith('https://localhost.admin.eutaxia.eu:5000/login/%s' % provider): raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) return HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, hdrs, newurl) opener = build_opener(HTTPCookieProcessor(cj), NoRedirectHandler()) try: res = opener.open(location) except HTTPError as err: assert err.code == 302 url = err.hdrs['Location'] assert url.startswith('https://localhost.admin.eutaxia.eu:5000/login/%s' % provider) else: if provider == 'windowslive': # Unfortunately we can't configure Windows Live to accept two separate # redirect URLs return else: assert False, 'Wrong redirect' with self.app.test_client() as c: with c.session_transaction() as session: session.update(session_data) query_string = urlparse(url).query resp = c.get('/login/%s' % provider, query_string=query_string) assert resp.status_code == 666
def check_kilnauth_token(ui, url): cookiepath = _get_path('hgcookies') if (not os.path.exists(cookiepath)) or (not os.path.isdir(cookiepath)): return '' cookiepath = os.path.join(cookiepath, md5(get_username(get_dest(ui))).hexdigest()) try: if not os.path.exists(cookiepath): return '' cj = MozillaCookieJar(cookiepath) except IOError: return '' domain = get_domain(url) cj.load(ignore_discard=True, ignore_expires=True) for cookie in cj: if domain == cookie.domain: if cookie.name == 'fbToken': return cookie.value
def get_cookie(uname,pword,cookie_jar_path): print(cookie_jar_path) if os.path.isfile(cookie_jar_path): cookie_jar = MozillaCookieJar() cookie_jar.load(cookie_jar_path) ## make sure cookie is still valid print('******************* FIRST COOKIE Check ******************') if check_cookie(cookie_jar): print(" > Re-using previous cookie jar.") print('******************* END FIRST COOKIE Check ******************') return cookie_jar else: print(" > Could not validate old cookie Jar") cookie_jar = get_new_cookie(uname,pword,cookie_jar_path) check_cookie(cookie_jar) else: print('Could not find existing cookie jar -- Creating a new one') cookie_jar = get_new_cookie(uname,pword,cookie_jar_path) return cookie_jar
def __init__(self, mobile, password=None, status='0', cachefile='Fetion.cache', cookiesfile=''): '''登录状态: 在线:400 隐身:0 忙碌:600 离开:100 ''' if cachefile: self.cache = Cache(cachefile) if not cookiesfile: cookiesfile = '%s.cookies' % mobile # try: # with open(cookiesfile, 'rb') as f: # cookie_processor = load(f) # except: # cookie_processor = HTTPCookieProcessor(CookieJar()) cookiejar = MozillaCookieJar(filename=cookiesfile) try: f = open(cookiesfile) except IOError: f = open(cookiesfile, 'w') f.write(MozillaCookieJar.header) finally: f.close() cookiejar.load(filename=cookiesfile) cookie_processor = HTTPCookieProcessor(cookiejar) self.opener = build_opener(cookie_processor, HTTPHandler) self.mobile, self.password = mobile, password if not self.alive(): if self._login(): cookiejar.save() #dump(cookie_processor, open(cookiesfile, 'wb')) self.changestatus(status)
class bulk_downloader: def __init__(self): # List of files to download self.files = [ "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20200110T101421_20200110T101446_019753_025598_C902-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191229T101421_20191229T101446_019578_025007_DB2A-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191217T101422_20191217T101447_019403_024A73_D2A9-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191205T101422_20191205T101447_019228_0244DD_9778-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191123T101423_20191123T101448_019053_023F55_95B6-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191111T101423_20191111T101448_018878_0239B4_3FCF-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191030T101423_20191030T101448_018703_02340F_3D8D-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191018T101423_20191018T101448_018528_022E97_0AEB-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20191006T101423_20191006T101448_018353_022937_B959-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190912T101422_20190912T101447_018003_021E50_B3FB-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190831T101421_20190831T101446_017828_0218D8_1ADE-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190819T101421_20190819T101446_017653_021365_B751-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190807T101420_20190807T101445_017478_020DEF_A757-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20190801T101514_20190801T101539_028374_0334DB_E6C0-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20190801T101449_20190801T101514_028374_0334DB_6CA1-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190726T101419_20190726T101444_017303_0208A8_2D9C-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190714T101419_20190714T101444_017128_020394_A8B6-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190702T101418_20190702T101443_016953_01FE6B_BE7D-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190620T101417_20190620T101442_016778_01F93E_D609-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190608T101416_20190608T101441_016603_01F407_282F-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190527T101416_20190527T101441_016428_01EECF_79D2-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190515T101415_20190515T101440_016253_01E971_7A00-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190503T101415_20190503T101440_016078_01E3E6_D149-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190421T101414_20190421T101439_015903_01DE0C_E919-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190409T101414_20190409T101439_015728_01D843_E7B3-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190328T101413_20190328T101438_015553_01D27A_7404-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190316T101413_20190316T101438_015378_01CCBE_781F-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190304T101413_20190304T101438_015203_01C713_17EF-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190220T101413_20190220T101438_015028_01C151_EA49-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190208T101413_20190208T101438_014853_01BB8C_940D-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190127T101414_20190127T101439_014678_01B5D2_3B0A-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190115T101414_20190115T101439_014503_01B03A_4439-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20190103T101414_20190103T101439_014328_01AA92_7D9B-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181222T101415_20181222T101440_014153_01A4CF_3F05-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181210T101415_20181210T101440_013978_019F03_1C29-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181128T101416_20181128T101441_013803_01995A_6DD3-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181116T101416_20181116T101441_013628_0193C1_FE12-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181104T101416_20181104T101441_013453_018E4D_0014-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181023T101420_20181023T101445_013278_0188CC_5952-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181023T101355_20181023T101420_013278_0188CC_0FA6-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20181011T101417_20181011T101442_013103_01835D_D0A0-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180929T101416_20180929T101441_012928_017E0F_226F-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180917T101416_20180917T101441_012753_0178B3_B66A-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180905T101415_20180905T101440_012578_017358_3259-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180824T101415_20180824T101440_012403_016DE5_85C3-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180812T101414_20180812T101439_012228_01687D_BCA9-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180731T101414_20180731T101439_012053_01631A_ADBC-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180719T101413_20180719T101438_011878_015DD1_3E69-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180707T101412_20180707T101437_011703_015872_5055-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180625T101411_20180625T101436_011528_015300_5709-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180613T101411_20180613T101436_011353_014D8E_1799-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180601T101410_20180601T101435_011178_014821_B178-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180520T101409_20180520T101434_011003_014273_5667-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180508T101408_20180508T101433_010828_013CCB_18C3-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180426T101408_20180426T101433_010653_013720_457C-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180414T101407_20180414T101432_010478_01318E_FB0A-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180402T101407_20180402T101432_010303_012BEA_9E94-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180321T101406_20180321T101431_010128_012640_6D69-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180309T101406_20180309T101431_009953_01208C_4F7B-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180225T101406_20180225T101431_009778_011AAD_2181-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180213T101407_20180213T101432_009603_0114ED_D868-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180201T101407_20180201T101432_009428_010F21_C8FA-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180120T101407_20180120T101432_009253_010968_4DBE-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20180108T101408_20180108T101433_009078_0103B1_EB1D-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171227T101408_20171227T101433_008903_00FDFF_A4F1-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171215T101409_20171215T101434_008728_00F863_906F-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171203T101409_20171203T101434_008553_00F2D6_B8D7-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171121T101409_20171121T101434_008378_00ED57_D6D0-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171028T101410_20171028T101435_008028_00E2F3_6DFC-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171016T101410_20171016T101435_007853_00DDE1_829D-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20171004T101409_20171004T101434_007678_00D8F4_5C9C-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170922T101409_20170922T101434_007503_00D3F7_9FEC-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170910T101409_20170910T101434_007328_00CED7_2D8E-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170829T101408_20170829T101433_007153_00C9B8_96C9-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170817T101408_20170817T101433_006978_00C4A9_5D92-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170805T101407_20170805T101432_006803_00BF8B_4F73-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170724T101407_20170724T101432_006628_00BA88_2017-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170712T101406_20170712T101431_006453_00B58B_7674-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170630T101405_20170630T101430_006278_00B098_CAC7-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170618T101404_20170618T101429_006103_00AB89_7D52-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170606T101404_20170606T101429_005928_00A666_6411-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170525T101403_20170525T101428_005753_00A14F_A827-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170513T101402_20170513T101427_005578_009C52_4E38-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170501T101402_20170501T101427_005403_009788_B5E9-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170419T101401_20170419T101426_005228_009270_5637-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170407T101401_20170407T101426_005053_008D67_BB68-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170326T101400_20170326T101425_004878_008859_36E8-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170314T101400_20170314T101425_004703_008359_7A42-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170302T101400_20170302T101425_004528_007E2B_F8A2-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170206T101400_20170206T101425_004178_0073C4_69B1-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20170113T101401_20170113T101426_003828_00695B_0B49-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SDV_20161220T101403_20161220T101428_003478_005F1B_E6DD-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20161126T101403_20161126T101428_003128_005520_5BBB-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20161102T101404_20161102T101429_002778_004B45_F931-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20161009T101404_20161009T101429_002428_00419A_2FD0-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1B_IW_GRDH_1SSV_20160927T101404_20160927T101429_002253_003CAB_BC6E-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160909T101423_20160909T101448_012974_01487C_40C0-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160909T101448_20160909T101513_012974_01487C_E55B-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160816T101434_20160816T101503_012624_013CE1_AA51-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160723T101444_20160723T101513_012274_013152_9A67-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160629T101426_20160629T101455_011924_0125E4_7F71-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160629T101455_20160629T101520_011924_0125E4_D66A-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160605T101440_20160605T101505_011574_011AE7_0C49-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160512T101439_20160512T101504_011224_010F91_FCE1-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160418T101435_20160418T101500_010874_01048F_89B1-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160325T101434_20160325T101459_010524_00FA2B_4EAD-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160301T101434_20160301T101459_010174_00F035_3B54-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160206T101440_20160206T101505_009824_00E617_C31E-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20160206T101415_20160206T101440_009824_00E617_D79A-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20160113T101434_20160113T101459_009474_00DBEE_5DBA-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20151220T101435_20151220T101500_009124_00D1EE_CFED-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20151102T101442_20151102T101507_008424_00BE79_E2E7-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20151009T101442_20151009T101507_008074_00B50C_12FD-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150915T101441_20150915T101506_007724_00ABB3_226C-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150822T101441_20150822T101506_007374_00A234_599D-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150729T101439_20150729T101504_007024_0098B2_E48E-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150705T101438_20150705T101503_006674_008EB2_3496-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20150518T101442_20150518T101507_005974_007B3F_EFEC-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SDV_20150518T101417_20150518T101442_005974_007B3F_DF42-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150424T101426_20150424T101451_005624_00734A_AD5A-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150331T101444_20150331T101509_005274_006AB8_213D-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150331T101419_20150331T101444_005274_006AB8_EBF3-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150307T101444_20150307T101509_004924_006269_7919-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150307T101419_20150307T101444_004924_006269_9089-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150211T101443_20150211T101508_004574_005A0A_8FEE-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150211T101418_20150211T101443_004574_005A0A_4D0C-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150118T101444_20150118T101509_004224_00522A_42D5-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20150118T101419_20150118T101444_004224_00522A_26FE-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141225T101439_20141225T101504_003874_004A4B_D1FC-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141225T101414_20141225T101439_003874_004A4B_367E-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141201T101440_20141201T101505_003524_004254_556F-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141201T101415_20141201T101440_003524_004254_5C25-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141107T101441_20141107T101506_003174_003A78_745C-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141107T101416_20141107T101441_003174_003A78_5D83-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141014T101419_20141014T101444_002824_0032F1_B89E-PREDORB-10m-power-filt-rtc-gamma.zip", "https://hyp3-download.asf.alaska.edu/asf/data/S1A_IW_GRDH_1SSV_20141014T101444_20141014T101509_002824_0032F1_B54D-PREDORB-10m-power-filt-rtc-gamma.zip" ] # Local stash of cookies so we don't always have to ask self.cookie_jar_path = os.path.join(os.path.expanduser('~'), ".bulk_download_cookiejar.txt") self.cookie_jar = None self.asf_urs4 = { 'url': 'https://urs.earthdata.nasa.gov/oauth/authorize', 'client': 'BO_n7nTIlMljdvU6kRRB3g', 'redir': 'https://auth.asf.alaska.edu/login' } # Make sure we can write it our current directory if os.access(os.getcwd(), os.W_OK) is False: print( "WARNING: Cannot write to current path! Check permissions for {0}" .format(os.getcwd())) exit(-1) # For SSL self.context = {} # Check if user handed in a Metalink or CSV: if len(sys.argv) > 0: download_files = [] input_files = [] for arg in sys.argv[1:]: if arg == '--insecure': try: ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE self.context['context'] = ctx except AttributeError: # Python 2.6 won't complain about SSL Validation pass elif arg.endswith('.metalink') or arg.endswith('.csv'): if os.path.isfile(arg): input_files.append(arg) if arg.endswith('.metalink'): new_files = self.process_metalink(arg) else: new_files = self.process_csv(arg) if new_files is not None: for file_url in (new_files): download_files.append(file_url) else: print( " > I cannot find the input file you specified: {0}" .format(arg)) else: print( " > Command line argument '{0}' makes no sense, ignoring." .format(arg)) if len(input_files) > 0: if len(download_files) > 0: print(" > Processing {0} downloads from {1} input files. ". format(len(download_files), len(input_files))) self.files = download_files else: print( " > I see you asked me to download files from {0} input files, but they had no downloads!" .format(len(input_files))) print(" > I'm super confused and exiting.") exit(-1) # Make sure cookie_jar is good to go! self.get_cookie() # summary self.total_bytes = 0 self.total_time = 0 self.cnt = 0 self.success = [] self.failed = [] self.skipped = [] # Get and validate a cookie def get_cookie(self): if os.path.isfile(self.cookie_jar_path): self.cookie_jar = MozillaCookieJar() self.cookie_jar.load(self.cookie_jar_path) # make sure cookie is still valid if self.check_cookie(): print(" > Re-using previous cookie jar.") return True else: print(" > Could not validate old cookie Jar") # We don't have a valid cookie, prompt user or creds print( "No existing URS cookie found, please enter Earthdata username & password:"******"(Credentials will not be stored, saved or logged anywhere)") # Keep trying 'till user gets the right U:P while self.check_cookie() is False: self.get_new_cookie() return True # Validate cookie before we begin def check_cookie(self): if self.cookie_jar is None: print(" > Cookiejar is bunk: {0}".format(self.cookie_jar)) return False # File we know is valid, used to validate cookie file_check = 'https://urs.earthdata.nasa.gov/profile' # Apply custom Redirect Hanlder opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) install_opener(opener) # Attempt a HEAD request request = Request(file_check) request.get_method = lambda: 'HEAD' try: print(" > attempting to download {0}".format(file_check)) response = urlopen(request, timeout=30) resp_code = response.getcode() # Make sure we're logged in if not self.check_cookie_is_logged_in(self.cookie_jar): return False # Save cookiejar self.cookie_jar.save(self.cookie_jar_path) except HTTPError: # If we ge this error, again, it likely means the user has not agreed to current EULA print("\nIMPORTANT: ") print( "Your user appears to lack permissions to download data from the ASF Datapool." ) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) # This return codes indicate the USER has not been approved to download the data if resp_code in (300, 301, 302, 303): try: redir_url = response.info().getheader('Location') except AttributeError: redir_url = response.getheader('Location') #Funky Test env: if ("vertex-retired.daac.asf.alaska.edu" in redir_url and "test" in self.asf_urs4['redir']): print("Cough, cough. It's dusty in this test env!") return True print("Redirect ({0}) occured, invalid cookie value!".format( resp_code)) return False # These are successes! if resp_code in (200, 307): return True return False def get_new_cookie(self): # Start by prompting user to input their credentials # Another Python2/3 workaround try: new_username = raw_input("Username: "******"Username: "******"Password (will not be displayed): ") # Build URS4 Cookie request auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4[ 'client'] + '&redirect_uri=' + self.asf_urs4[ 'redir'] + '&response_type=code&state=' try: #python2 user_pass = base64.b64encode( bytes(new_username + ":" + new_password)) except TypeError: #python3 user_pass = base64.b64encode( bytes(new_username + ":" + new_password, "utf-8")) user_pass = user_pass.decode("utf-8") # Authenticate against URS, grab all the cookies self.cookie_jar = MozillaCookieJar() opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) request = Request( auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)}) # Watch out cookie rejection! try: response = opener.open(request) except HTTPError as e: if "WWW-Authenticate" in e.headers and "Please enter your Earthdata Login credentials" in e.headers[ "WWW-Authenticate"]: print( " > Username and Password combo was not successful. Please try again." ) return False else: # If an error happens here, the user most likely has not confirmed EULA. print( "\nIMPORTANT: There was an error obtaining a download cookie!" ) print( "Your user appears to lack permission to download data from the ASF Datapool." ) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) except URLError as e: print( "\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. " ) print("Try cookie generation later.") exit(-1) # Did we get a cookie? if self.check_cookie_is_logged_in(self.cookie_jar): #COOKIE SUCCESS! self.cookie_jar.save(self.cookie_jar_path) return True # if we aren't successful generating the cookie, nothing will work. Stop here! print( "WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again." ) print("Response was {0}.".format(response.getcode())) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) # make sure we're logged into URS def check_cookie_is_logged_in(self, cj): for cookie in cj: if cookie.name == 'urs_user_already_logged': # Only get this cookie if we logged in successfully! return True return False # Download the file def download_file_with_cookiejar(self, url, file_count, total, recursion=False): # see if we've already download this file and if it is that it is the correct size download_file = os.path.basename(url).split('?')[0] if os.path.isfile(download_file): try: request = Request(url) request.get_method = lambda: 'HEAD' response = urlopen(request, timeout=30) remote_size = self.get_total_size(response) # Check that we were able to derive a size. if remote_size: local_size = os.path.getsize(download_file) if remote_size < (local_size + (local_size * .01)) and remote_size > ( local_size - (local_size * .01)): print( " > Download file {0} exists! \n > Skipping download of {1}. " .format(download_file, url)) return None, None #partial file size wasn't full file size, lets blow away the chunk and start again print( " > Found {0} but it wasn't fully downloaded. Removing file and downloading again." .format(download_file)) os.remove(download_file) except ssl.CertificateError as e: print(" > ERROR: {0}".format(e)) print( " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag" ) return False, None except HTTPError as e: if e.code == 401: print( " > IMPORTANT: Your user may not have permission to download this type of data!" ) else: print(" > Unknown Error, Could not get file HEAD: {0}". format(e)) except URLError as e: print("URL Error (from HEAD): {0}, {1}".format(e.reason, url)) if "ssl.c" in "{0}".format(e.reason): print( "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error." ) return False, None # attempt https connection try: request = Request(url) response = urlopen(request, timeout=30) # Watch for redirect if response.geturl() != url: # See if we were redirect BACK to URS for re-auth. if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl( ): if recursion: print( " > Entering seemingly endless auth loop. Aborting. " ) return False, None # make this easier. If there is no app_type=401, add it new_auth_url = response.geturl() if "app_type" not in new_auth_url: new_auth_url += "&app_type=401" print( " > While attempting to download {0}....".format(url)) print(" > Need to obtain new cookie from {0}".format( new_auth_url)) old_cookies = [cookie.name for cookie in self.cookie_jar] opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) request = Request(new_auth_url) try: response = opener.open(request) for cookie in self.cookie_jar: if cookie.name not in old_cookies: print(" > Saved new cookie: {0}".format( cookie.name)) # A little hack to save session cookies if cookie.discard: cookie.expires = int( time.time()) + 60 * 60 * 24 * 30 print( " > Saving session Cookie that should have been discarded! " ) self.cookie_jar.save(self.cookie_jar_path, ignore_discard=True, ignore_expires=True) except HTTPError as e: print("HTTP Error: {0}, {1}".format(e.code, url)) return False, None # Okay, now we have more cookies! Lets try again, recursively! print(" > Attempting download again with new cookies!") return self.download_file_with_cookiejar(url, file_count, total, recursion=True) print( " > 'Temporary' Redirect download @ Remote archive:\n > {0}" .format(response.geturl())) # seems to be working print("({0}/{1}) Downloading {2}".format(file_count, total, url)) # Open our local file for writing and build status bar tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.') self.chunk_read(response, tf, report_hook=self.chunk_report) # Reset download status sys.stdout.write('\n') tempfile_name = tf.name tf.close() #handle errors except HTTPError as e: print("HTTP Error: {0}, {1}".format(e.code, url)) if e.code == 401: print( " > IMPORTANT: Your user does not have permission to download this type of data!" ) if e.code == 403: print(" > Got a 403 Error trying to download this file. ") print( " > You MAY need to log in this app and agree to a EULA. ") return False, None except URLError as e: print("URL Error (from GET): {0}, {1}, {2}".format( e, e.reason, url)) if "ssl.c" in "{0}".format(e.reason): print( "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error." ) return False, None except socket.timeout as e: print(" > timeout requesting: {0}; {1}".format(url, e)) return False, None except ssl.CertificateError as e: print(" > ERROR: {0}".format(e)) print( " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag" ) return False, None # Return the file size shutil.copy(tempfile_name, download_file) os.remove(tempfile_name) file_size = self.get_total_size(response) actual_size = os.path.getsize(download_file) if file_size is None: # We were unable to calculate file size. file_size = actual_size return actual_size, file_size def get_redirect_url_from_error(self, error): find_redirect = re.compile(r"id=\"redir_link\"\s+href=\"(\S+)\"") print("error file was: {}".format(error)) redirect_url = find_redirect.search(error) if redirect_url: print("Found: {0}".format(redirect_url.group(0))) return (redirect_url.group(0)) return None # chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook def chunk_report(self, bytes_so_far, file_size): if file_size is not None: percent = float(bytes_so_far) / file_size percent = round(percent * 100, 2) sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" % (bytes_so_far, file_size, percent)) else: # We couldn't figure out the size. sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far)) # chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook def chunk_read(self, response, local_file, chunk_size=8192, report_hook=None): file_size = self.get_total_size(response) bytes_so_far = 0 while 1: try: chunk = response.read(chunk_size) except: sys.stdout.write("\n > There was an error reading data. \n") break try: local_file.write(chunk) except TypeError: local_file.write(chunk.decode(local_file.encoding)) bytes_so_far += len(chunk) if not chunk: break if report_hook: report_hook(bytes_so_far, file_size) return bytes_so_far def get_total_size(self, response): try: file_size = response.info().getheader('Content-Length').strip() except AttributeError: try: file_size = response.getheader('Content-Length').strip() except AttributeError: print("> Problem getting size") return None return int(file_size) # Get download urls from a metalink file def process_metalink(self, ml_file): print("Processing metalink file: {0}".format(ml_file)) with open(ml_file, 'r') as ml: xml = ml.read() # Hack to remove annoying namespace it = ET.iterparse(StringIO(xml)) for _, el in it: if '}' in el.tag: el.tag = el.tag.split('}', 1)[1] # strip all namespaces root = it.root dl_urls = [] ml_files = root.find('files') for dl in ml_files: dl_urls.append(dl.find('resources').find('url').text) if len(dl_urls) > 0: return dl_urls else: return None # Get download urls from a csv file def process_csv(self, csv_file): print("Processing csv file: {0}".format(csv_file)) dl_urls = [] with open(csv_file, 'r') as csvf: try: csvr = csv.DictReader(csvf) for row in csvr: dl_urls.append(row['URL']) except csv.Error as e: print( "WARNING: Could not parse file %s, line %d: %s. Skipping." % (csv_file, csvr.line_num, e)) return None except KeyError as e: print( "WARNING: Could not find URL column in file %s. Skipping." % (csv_file)) if len(dl_urls) > 0: return dl_urls else: return None # Download all the files in the list def download_files(self): for file_name in self.files: # make sure we haven't ctrl+c'd or some other abort trap if abort == True: raise SystemExit # download counter self.cnt += 1 # set a timer start = time.time() # run download size, total_size = self.download_file_with_cookiejar( file_name, self.cnt, len(self.files)) # calculte rate end = time.time() # stats: if size is None: self.skipped.append(file_name) # Check to see that the download didn't error and is the correct size elif size is not False and (total_size < (size + (size * .01)) and total_size > (size - (size * .01))): # Download was good! elapsed = end - start elapsed = 1.0 if elapsed < 1 else elapsed rate = (size / 1024**2) / elapsed print( "Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec" .format(size, elapsed, rate)) # add up metrics self.total_bytes += size self.total_time += elapsed self.success.append({'file': file_name, 'size': size}) else: print("There was a problem downloading {0}".format(file_name)) self.failed.append(file_name) def print_summary(self): # Print summary: print("\n\nDownload Summary ") print( "--------------------------------------------------------------------------------" ) print(" Successes: {0} files, {1} bytes ".format( len(self.success), self.total_bytes)) for success_file in self.success: print(" - {0} {1:.2f}MB".format( success_file['file'], (success_file['size'] / 1024.0**2))) if len(self.failed) > 0: print(" Failures: {0} files".format(len(self.failed))) for failed_file in self.failed: print(" - {0}".format(failed_file)) if len(self.skipped) > 0: print(" Skipped: {0} files".format(len(self.skipped))) for skipped_file in self.skipped: print(" - {0}".format(skipped_file)) if len(self.success) > 0: print(" Average Rate: {0:.2f}MB/sec".format( (self.total_bytes / 1024.0**2) / self.total_time)) print( "--------------------------------------------------------------------------------" )
class Bilibili(): name = u'哔哩哔哩 (Bilibili)' api_url = 'http://interface.bilibili.com/playurl?' bangumi_api_url = 'http://bangumi.bilibili.com/player/web_api/playurl?' SEC1 = '94aba54af9065f71de72f5508f1cd42e' SEC2 = '9b288147e5474dd2aa67085f716c560d' supported_stream_profile = [u'流畅', u'高清', u'超清'] stream_types = [{ 'id': 'hdflv' }, { 'id': 'flv' }, { 'id': 'hdmp4' }, { 'id': 'mp4' }, { 'id': 'live' }, { 'id': 'vc' }] fmt2qlt = dict(hdflv=4, flv=3, hdmp4=2, mp4=1) def __init__(self, appkey=APPKEY, appsecret=APPSECRET, width=720, height=480): self.defaultHeader = {'Referer': 'http://www.bilibili.com'} #self.defaultHeader = {} self.appkey = appkey self.appsecret = appsecret self.WIDTH = width self.HEIGHT = height self.is_login = False cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie' self.cj = MozillaCookieJar(cookie_path) if os.path.isfile(cookie_path): self.cj.load() key = None for ck in self.cj: if ck.name == 'DedeUserID': key = ck.value break if key is not None: self.is_login = True self.mid = str(key) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener) try: os.remove(self._get_tmp_dir() + '/tmp.ass') except: pass def _get_tmp_dir(self): try: return tempfile.gettempdir() except: return '' def get_captcha(self, path=None): key = None for ck in self.cj: if ck.name == 'sid': key = ck.value break if key is None: get_html( LOGIN_CAPTCHA_URL.format(random()), headers={'Referer': 'https://passport.bilibili.com/login'}) result = get_html( LOGIN_CAPTCHA_URL.format(random()), decoded=False, headers={'Referer': 'https://passport.bilibili.com/login'}) if path is None: path = tempfile.gettempdir() + '/captcha.jpg' with open(path, 'wb') as f: f.write(result) return path def get_encryped_pwd(self, pwd): import rsa result = loads( get_html( LOGIN_HASH_URL.format(random()), headers={'Referer': 'https://passport.bilibili.com/login'})) pwd = result['hash'] + pwd key = result['key'] pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key) pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key) pwd = base64.b64encode(pwd) pwd = urllib.quote(pwd) return pwd def api_sign(self, params): params['appkey'] = self.appkey data = '' keys = params.keys() # must sorted. urllib.urlencode(params) doesn't work keys.sort() for key in keys: data += '{}={}&'.format(key, urllib.quote(str(params[key]))) data = data[:-1] # remove last '&' if self.appsecret is None: return data m = hashlib.md5() m.update(data + self.appsecret) return data + '&sign=' + m.hexdigest() def get_category_from_web_page(self): category_dict = {'0': {'title': u'全部', 'url': HOME_URL}} node = category_dict['0'] url = node['url'] result = BeautifulSoup(get_html(url), "html.parser").findAll('li', {'class': 'm-i'}) for item in result: if len(item['class']) != 1: continue tid = item['data-tid'] title = item.em.contents[0] url = 'http:' + item.a['href'] category_dict[tid] = {'title': title, 'url': url} node['subs'].append(tid) #Fix video and movie if '11' not in category_dict['0']['subs']: category_dict['0']['subs'].append('11') if '23' not in category_dict['0']['subs']: category_dict['0']['subs'].append('23') category_dict['11'] = { 'title': u'电视剧', 'url': 'http://bangumi.bilibili.com/tv/' } category_dict['23'] = { 'title': u'电影', 'url': 'http://bangumi.bilibili.com/movie/' } for sub in category_dict['0']['subs']: node = category_dict[sub] url = node['url'] result = BeautifulSoup(get_html(url), "html.parser").select('ul.n_num li') for item in result[1:]: if not item.has_attr('tid'): continue if not hasattr(item, 'a'): continue if item.has_attr('class'): continue tid = item['tid'] title = item.a.contents[0] if item.a['href'][:2] == '//': url = 'http:' + item.a['href'] else: url = HOME_URL + item.a['href'] category_dict[tid] = {'title': title, 'url': url} node['subs'].append(tid) return category_dict def get_category(self, tid='0'): items = [{tid: {'title': '全部', 'url': CATEGORY[tid]['url']}}] for sub in CATEGORY[tid]['subs']: items.append({sub: CATEGORY[sub]}) return items def get_category_name(self, tid): return CATEGORY[str(tid)]['title'] def get_order(self): return ORDER def get_category_by_tag(self, tag=0, tid=0, page=1, pagesize=20): if tag == 0: url = LIST_BY_ALL.format(tid, pagesize, page) else: url = LIST_BY_TAG.format(tag, tid, pagesize, page) results = loads(get_html(url)) return results def get_category_list(self, tid=0, order='default', days=30, page=1, pagesize=20): params = { 'tid': tid, 'order': order, 'days': days, 'page': page, 'pagesize': pagesize } url = LIST_URL.format(self.api_sign(params)) result = loads(get_html(url, headers=self.defaultHeader)) results = [] for i in range(pagesize): if result['list'].has_key(str(i)): results.append(result['list'][str(i)]) else: continue return results, result['pages'] def get_my_info(self): if self.is_login == False: return [] result = loads(get_html(MY_INFO_URL)) return result['data'] def get_bangumi_chase(self, page=1, pagesize=20): if self.is_login == False: return [] url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize) result = loads(get_html(url, headers=self.defaultHeader)) return result['data']['result'], result['data']['pages'] def get_bangumi_detail(self, season_id): url = BANGUMI_SEASON_URL.format(season_id) result = get_html(url, headers=self.defaultHeader) if result[0] != '{': start = result.find('(') + 1 end = result.find(');') result = result[start:end] result = loads(result) return result['result'] def get_history(self, page=1, pagesize=20): if self.is_login == False: return [] url = HISTORY_URL.format(page, pagesize) result = loads(get_html(url, headers=self.defaultHeader)) if len(result['data']) >= int(pagesize): total_page = int(page) + 1 else: total_page = int(page) return result['data'], total_page def get_dynamic(self, page=1, pagesize=20): if self.is_login == False: return [] url = DYNAMIC_URL.format(pagesize, page) result = loads(get_html(url, headers=self.defaultHeader)) total_page = int( (result['data']['page']['count'] + pagesize - 1) / pagesize) return result['data']['feeds'], total_page def get_attention(self, page=1, pagesize=20): if self.is_login == False: return [] url = ATTENTION_URL.format(self.mid, page, pagesize) result = loads(get_html(url)) return result['data']['list'] def get_attention_video(self, mid, tid=0, page=1, pagesize=20): if self.is_login == False: return [] url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid) result = loads(get_html(url, headers=self.defaultHeader)) return result['data'], result['data']['pages'] def get_attention_channel(self, mid): if self.is_login == False: return [] url = ATTENTION_CHANNEL_URL.format(mid) result = loads(get_html(url, headers=self.defaultHeader)) return result['data']['list'] def get_fav_box(self): if self.is_login == False: return [] url = FAV_BOX_URL.format(self.mid) result = loads(get_html(url, headers=self.defaultHeader)) return result['data']['list'] def get_fav(self, fav_box, page=1, pagesize=20): if self.is_login == False: return [] url = FAV_URL.format(self.mid, page, pagesize, fav_box) result = loads(get_html(url, headers=self.defaultHeader)) return result['data']['vlist'], result['data']['pages'] def login(self, userid, pwd, captcha): #utils.get_html('http://www.bilibili.com') if self.is_login == True: return True, '' pwd = self.get_encryped_pwd(pwd) data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format( captcha, userid, pwd) result = get_html( LOGIN_URL, data, { 'Origin': 'https://passport.bilibili.com', 'Referer': 'https://passport.bilibili.com/login' }) key = None for ck in self.cj: if ck.name == 'DedeUserID': key = ck.value break if key is None: return False, LOGIN_ERROR_MAP[loads(result)['code']] self.cj.save() self.is_login = True self.mid = str(key) return True, '' def logout(self): self.cj.clear() self.cj.save() self.is_login = False def get_av_list_detail(self, aid, page=1, fav=0, pagesize=20): params = {'id': aid, 'page': page} if fav != 0: params['fav'] = fav url = VIEW_URL.format(self.api_sign(params)) result = loads(get_html(url, headers=self.defaultHeader)) results = [result] if (int(page) < result['pages']) and (pagesize > 1): results += self.get_av_list_detail(aid, int(page) + 1, fav, pagesize=pagesize - 1)[0] return results, result['pages'] def get_av_list(self, aid): url = AV_URL.format(aid) try: page = get_html(url) result = loads(page) except: result = {} return result # 调用niconvert生成弹幕的ass文件 def parse_subtitle(self, cid): page_full_url = COMMENT_URL.format(cid) website = create_website(page_full_url) if website is None: return '' else: text = website.ass_subtitles_text(font_name=u'黑体', font_size=24, resolution='%d:%d' % (self.WIDTH, self.HEIGHT), line_count=12, bottom_margin=0, tune_seconds=0) f = open(self._get_tmp_dir() + '/tmp.ass', 'w') f.write(text.encode('utf8')) f.close() return 'tmp.ass' def get_video_urls(self, cid): m = hashlib.md5() m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER)) url = INTERFACE_URL.format(str(cid), m.hexdigest()) doc = parseString(get_html(url)) urls = [] for durl in doc.getElementsByTagName('durl'): u = durl.getElementsByTagName('url')[0].firstChild.nodeValue if re.match(r'.*\.qqvideo\.tc\.qq\.com', url): re.sub(r'.*\.qqvideo\.tc', 'http://vsrc.store', u) urls.append(u) #urls.append(u + '|Referer={}'.format(urllib.quote('https://www.bilibili.com/'))) return urls def add_history(self, aid, cid): url = ADD_HISTORY_URL.format(str(cid), str(aid)) get_html(url) def api_req(self, cid, quality, bangumi, bangumi_movie=False, **kwargs): ts = str(int(time.time())) if not bangumi: params_str = 'cid={}&player=1&quality={}&ts={}'.format( cid, quality, ts) chksum = hashlib.md5(bytes(params_str + self.SEC1)).hexdigest() api_url = self.api_url + params_str + '&sign=' + chksum else: mod = 'movie' if bangumi_movie else 'bangumi' params_str = 'cid={}&module={}&player=1&quality={}&ts={}'.format( cid, mod, quality, ts) chksum = hashlib.md5(bytes(params_str + self.SEC2)).hexdigest() api_url = self.bangumi_api_url + params_str + '&sign=' + chksum return get_html(api_url) def download_by_vid(self, cid, bangumi, **kwargs): stream_id = kwargs.get('stream_id') if stream_id and stream_id in self.fmt2qlt: quality = stream_id else: quality = 'hdflv' if bangumi else 'flv' level = kwargs.get('level', 0) xml = self.api_req(cid, level, bangumi, **kwargs) doc = parseString(xml) urls = [] for durl in doc.getElementsByTagName('durl'): u = durl.getElementsByTagName('url')[0].firstChild.nodeValue #urls.append(u) urls.append( urllib.quote_plus(u + '|Referer=https://www.bilibili.com')) return urls def entry(self, **kwargs): # tencent player tc_flashvars = re.search(r'"bili-cid=\d+&bili-aid=\d+&vid=([^"]+)"', self.page) if tc_flashvars: tc_flashvars = tc_flashvars.group(1) if tc_flashvars is not None: self.out = True return qq_download_by_vid(tc_flashvars, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) cid = re.search(r'cid=(\d+)', self.page).group(1) if cid is not None: return self.download_by_vid(cid, False, **kwargs) else: # flashvars? flashvars = re.search(r'flashvars="([^"]+)"', self.page).group(1) if flashvars is None: raise Exception('Unsupported page {}'.format(self.url)) param = flashvars.split('&')[0] t, cid = param.split('=') t = t.strip() cid = cid.strip() if t == 'vid': sina_download_by_vid(cid, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) elif t == 'ykid': youku_download_by_vid(cid, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) elif t == 'uid': tudou_download_by_id(cid, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) else: raise NotImplementedError( 'Unknown flashvars {}'.format(flashvars)) return def movie_entry(self, **kwargs): patt = r"var\s*aid\s*=\s*'(\d+)'" aid = re.search(patt, self.page).group(1) page_list = loads( get_html( 'http://www.bilibili.com/widget/getPageList?aid={}'.format( aid))) # better ideas for bangumi_movie titles? self.title = page_list[0]['pagename'] return self.download_by_vid(page_list[0]['cid'], True, bangumi_movie=True, **kwargs) def get_video_from_url(self, url, **kwargs): self.url = url_locations(url) frag = urlparse(self.url).fragment # http://www.bilibili.com/video/av3141144/index_2.html#page=3 if frag: hit = re.search(r'page=(\d+)', frag) if hit is not None: page = hit.group(1) av_id = re.search(r'av(\d+)', self.url).group(1) self.url = 'http://www.bilibili.com/video/av{}/index_{}.html'.format( av_id, page) self.page = get_html(self.url) if 'bangumi.bilibili.com/movie' in self.url: return self.movie_entry(**kwargs) elif 'bangumi.bilibili.com' in self.url: return self.bangumi_entry(**kwargs) elif 'live.bilibili.com' in self.url: return self.live_entry(**kwargs) elif 'vc.bilibili.com' in self.url: return self.vc_entry(**kwargs) else: return self.entry(**kwargs) def bangumi_entry(self, **kwargs): pass def live_entry(self, **kwargs): pass def vc_entry(self, **kwargs): pass
class Session(requests.Session): """ Session for making API requests and interacting with the filesystem """ def __init__(self): super(Session, self).__init__() self.trust_env = False cookie_file = os.path.expanduser('~/.danabox/cookies.txt') cookie_dir = os.path.dirname(cookie_file) self.cookies = MozillaCookieJar(cookie_file) # Create the $HOME/.danabox dir if it doesn't exist if not os.path.isdir(cookie_dir): os.mkdir(cookie_dir, 0700) # Load existing cookies if the cookies.txt exists if os.path.isfile(cookie_file): self.cookies.load() self.cookies.clear_expired_cookies() def clear(self): """Clear cookies""" try: self.cookies.clear() self.cookies.save() except KeyError: pass def git_root(self): """ Return the absolute path from the git repository root If no git repository exists, raise an EnvironmentError """ try: git_root = subprocess.check_output( ['git', 'rev-parse', '--show-toplevel'], stderr=subprocess.PIPE).strip('\n') except subprocess.CalledProcessError: raise EnvironmentError('Current directory is not a git repository') return git_root def get_app(self): """ Return the application name for the current directory The application is determined by parsing `git remote -v` output for the origin remote. Because Danabox only allows deployment of public Github repos we can create unique app names from a combination of the Github user's name and the repo name. Eg; '[email protected]:opdemand/example-ruby-sinatra.git' becomes 'opdemand-example--ruby--sinatra' If no application is found, raise an EnvironmentError. """ git_root = self.git_root() remotes = subprocess.check_output(['git', 'remote', '-v'], cwd=git_root) if remotes is None: raise EnvironmentError('No git remotes found.') for remote in remotes.splitlines(): if 'github.com' in remote: url = remote.split()[1] break if url is None: raise EnvironmentError('No Github remotes found.') pieces = url.split('/') owner = pieces[-2].split(':')[-1] repo = pieces[-1].replace('.git', '') app_raw = owner + '/' + repo app_name = app_raw.replace('-', '--').replace('/', '-') return app_name app = property(get_app) def request(self, *args, **kwargs): """ Issue an HTTP request with proper cookie handling including `Django CSRF tokens <https://docs.djangoproject.com/en/dev/ref/contrib/csrf/>` """ for cookie in self.cookies: if cookie.name == 'csrftoken': if 'headers' in kwargs: kwargs['headers']['X-CSRFToken'] = cookie.value else: kwargs['headers'] = {'X-CSRFToken': cookie.value} break response = super(Session, self).request(*args, **kwargs) self.cookies.save() return response
# way it won't abort unless the user has configured it. https_handler = urllib2.HTTPSHandler if options.ca_certs: from caslib.validating_https import ValidatingHTTPSConnection class HTTPSConnection(ValidatingHTTPSConnection): ca_certs = options.ca_certs https_handler = HTTPSConnection.HTTPSHandler opener = urllib2.build_opener(https_handler) if options.cookiejar: cookiejar = MozillaCookieJar(os.path.expanduser(options.cookiejar)) try: cookiejar.load(ignore_discard=True) except IOError: pass opener.add_handler(urllib2.HTTPCookieProcessor(cookiejar=cookiejar)) if not options.verbose: logging.basicConfig(level=logging.WARNING) elif options.verbose == 1: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.DEBUG) userdb = NRAOUserDB(options.location, options.username, options.password, opener) for key in args: if options.query_by == DATABASE_ID: user = userdb.get_user_data(database_id=key)
class Bilibili(): def __init__(self, appkey = APPKEY, appsecret = APPSECRET): self.appkey = appkey self.appsecret = appsecret self.is_login = False cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie' self.cj = MozillaCookieJar(cookie_path) if os.path.isfile(cookie_path): self.cj.load() if requests.utils.dict_from_cookiejar(self.cj).has_key('DedeUserID'): self.is_login = True self.mid = str(requests.utils.dict_from_cookiejar(self.cj)['DedeUserID']) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener) def get_captcha(self, path = None): if not requests.utils.dict_from_cookiejar(self.cj).has_key('sid'): utils.get_page_content(LOGIN_CAPTCHA_URL.format(random.random()), headers = {'Referer':'https://passport.bilibili.com/login'}) result = utils.get_page_content(LOGIN_CAPTCHA_URL.format(random.random()), headers = {'Referer':'https://passport.bilibili.com/login'}) if path == None: path = tempfile.gettempdir() + '/captcha.jpg' with open(path, 'wb') as f: f.write(result) return path def get_encryped_pwd(self, pwd): import rsa result = json.loads(utils.get_page_content(LOGIN_HASH_URL.format(random.random()), headers={'Referer':'https://passport.bilibili.com/login'})) pwd = result['hash'] + pwd key = result['key'] pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key) pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key) pwd = base64.b64encode(pwd) pwd = urllib.quote(pwd) return pwd def api_sign(self, params): params['appkey']=self.appkey data = "" keys = params.keys() keys.sort() for key in keys: if data != "": data += "&" value = params[key] if type(value) == int: value = str(value) data += key + "=" + str(urllib.quote(value)) if self.appsecret == None: return data m = hashlib.md5() m.update(data + self.appsecret) return data + '&sign=' + m.hexdigest() def get_category_from_web_page(self): category_dict = {'0': {'title': u'全部', 'url': HOME_URL, 'subs':[]}} node = category_dict['0'] url = node['url'] result = BeautifulSoup(utils.get_page_content(url), "html.parser").findAll('li', {'class': 'm-i'}) for item in result: if len(item['class']) != 1: continue tid = item['data-tid'] title = item.em.contents[0] url = 'http:' + item.a['href'] category_dict[tid] = {'title': title, 'url': url, 'subs':[]} node['subs'].append(tid) #Fix video and movie if '11' not in category_dict['0']['subs']: category_dict['0']['subs'].append('11') if '23' not in category_dict['0']['subs']: category_dict['0']['subs'].append('23') category_dict['11'] = {'title': u'电视剧', 'url': 'http://bangumi.bilibili.com/tv/', 'subs': []} category_dict['23'] = {'title': u'电影', 'url': 'http://bangumi.bilibili.com/movie/', 'subs': []} for sub in category_dict['0']['subs']: node = category_dict[sub] url = node['url'] result = BeautifulSoup(utils.get_page_content(url), "html.parser").select('ul.n_num li') for item in result[1:]: if not item.has_attr('tid'): continue if not hasattr(item, 'a'): continue if item.has_attr('class'): continue tid = item['tid'] title = item.a.contents[0] if item.a['href'][:2] == '//': url = 'http:' + item.a['href'] else: url = HOME_URL + item.a['href'] category_dict[tid] = {'title': title, 'url': url, 'subs':[]} node['subs'].append(tid) return category_dict def get_category(self, tid = '0'): items = [{tid: {'title': '全部', 'url': CATEGORY[tid]['url'], 'subs': []}}] for sub in CATEGORY[tid]['subs']: items.append({sub: CATEGORY[sub]}) return items def get_category_name(self, tid): return CATEGORY[str(tid)]['title'] def get_order(self): return ORDER def get_category_list(self, tid = 0, order = 'default', days = 30, page = 1, pagesize = 10): params = {'tid': tid, 'order': order, 'days': days, 'page': page, 'pagesize': pagesize} url = LIST_URL.format(self.api_sign(params)) result = json.loads(utils.get_page_content(url)) results = [] for i in range(pagesize): if result['list'].has_key(str(i)): results.append(result['list'][str(i)]) else: break return results, result['pages'] def get_my_info(self): if self.is_login == False: return [] result = json.loads(utils.get_page_content(MY_INFO_URL)) return result['data'] def get_bangumi_chase(self, page = 1, pagesize = 10): if self.is_login == False: return [] url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['result'], result['data']['pages'] def get_bangumi_detail(self, season_id): url = BANGUMI_SEASON_URL.format(season_id) result = utils.get_page_content(url) if result[0] != '{': start = result.find('(') + 1 end = result.find(');') result = result[start:end] result = json.loads(result) return result['result'] def get_history(self, page = 1, pagesize = 10): if self.is_login == False: return [] url = HISTORY_URL.format(page, pagesize) result = json.loads(utils.get_page_content(url)) if len(result['data']) >= int(pagesize): total_page = int(page) + 1 else: total_page = int(page) return result['data'], total_page def get_dynamic(self, page = 1, pagesize = 10): if self.is_login == False: return [] url = DYNAMIC_URL.format(pagesize, page) result = json.loads(utils.get_page_content(url)) total_page = int((result['data']['page']['count'] + pagesize - 1) / pagesize) return result['data']['feeds'], total_page def get_attention(self, page = 1, pagesize = 10): if self.is_login == False: return [] url = ATTENTION_URL.format(self.mid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['list'], result['data']['pages'] def get_attention_video(self, mid, tid = 0, page = 1, pagesize = 10): if self.is_login == False: return [] url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid) result = json.loads(utils.get_page_content(url)) return result['data'], result['data']['pages'] def get_attention_channel(self, mid): if self.is_login == False: return [] url = ATTENTION_CHANNEL_URL.format(mid) result = json.loads(utils.get_page_content(url)) return result['data']['list'] def get_attention_channel_list(self, mid, cid, page = 1, pagesize = 10): if self.is_login == False: return [] url = ATTENTION_CHANNEL_LIST_URL.format(mid, cid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['list'], result['data']['total'] def get_fav_box(self): if self.is_login == False: return [] url = FAV_BOX_URL.format(self.mid) result = json.loads(utils.get_page_content(url)) return result['data']['list'] def get_fav(self, fav_box, page = 1, pagesize = 10): if self.is_login == False: return [] url = FAV_URL.format(self.mid, page, pagesize, fav_box) result = json.loads(utils.get_page_content(url)) return result['data']['vlist'], result['data']['pages'] def login(self, userid, pwd, captcha): #utils.get_page_content('http://www.bilibili.com') if self.is_login == True: return True, '' pwd = self.get_encryped_pwd(pwd) data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format(captcha, userid, pwd) result = utils.get_page_content(LOGIN_URL, data, {'Origin':'https://passport.bilibili.com', 'Referer':'https://passport.bilibili.com/login'}) if not requests.utils.dict_from_cookiejar(self.cj).has_key('DedeUserID'): return False, LOGIN_ERROR_MAP[json.loads(result)['code']] self.cj.save() self.is_login = True self.mid = str(requests.utils.dict_from_cookiejar(self.cj)['DedeUserID']) return True, '' def logout(self): self.cj.clear() self.cj.save() self.is_login = False def get_av_list_detail(self, aid, page = 1, fav = 0, pagesize = 10): params = {'id': aid, 'page': page} if fav != 0: params['fav'] = fav url = VIEW_URL.format(self.api_sign(params)) result = json.loads(utils.get_page_content(url)) results = [result] if (int(page) < result['pages']) and (pagesize > 1): results += self.get_av_list_detail(aid, int(page) + 1, fav, pagesize = pagesize - 1)[0] return results, result['pages'] def get_av_list(self, aid): url = AV_URL.format(aid) result = json.loads(utils.get_page_content(url)) return result def get_video_urls(self, cid): m = hashlib.md5() m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER)) url = INTERFACE_URL.format(str(cid), m.hexdigest()) doc = minidom.parseString(utils.get_page_content(url)) urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')] urls = [url if not re.match(r'.*\.qqvideo\.tc\.qq\.com', url) else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', url) for url in urls] return urls def add_history(self, aid, cid): url = ADD_HISTORY_URL.format(str(cid), str(aid)) utils.get_page_content(url)
class CookieWay: def __init__(self): self.cookiejar = MozillaCookieJar() def load(self, file="cookie.txt"): self.cookiejar.load(file, ignore_discard=True, ignore_expires=True) def save(self, file="cookie.txt"): self.cookiejar.save(file, ignore_discard=True, ignore_expires=True) def torequestscj(self, s): for item in self.cookiejar: cookiesobject = requests.cookies.create_cookie(domain=item.domain, name=item.name, value=item.value) s.cookies.set_cookie(cookiesobject) def toseleniumcj(self, driver): domains = [] for item in self.cookiejar: if item.domain not in domains: domains.append(item.domain) for i in range(len(domains)): if domains[i][0:1] == ".": domains[i] = domains[i][1:] domains = list(set(domains)) for item in domains: driver.get("https://" + item) for item2 in self.cookiejar: if item2.domain == item or item2.domain == "." + item: cookie_dict = { 'domain': item2.domain, 'name': item2.name, 'value': item2.value, 'secure': item2.secure } if item2.path_specified: cookie_dict['path'] = item2.path driver.add_cookie(cookie_dict) def sele2resq(self, driver, s): self.selcj_cj(driver) self.torequestscj(s) def resq2sele(self, s, driver): self.reqcj_cj(s) self.toseleniumcj(driver) def selcj_cj(self, driver): cookie = driver.get_cookies() for s_cookie in cookie: self.cookiejar.set_cookie( Cookie( version=0, name=s_cookie['name'], value=s_cookie['value'], port='80', port_specified=False, domain=s_cookie['domain'], domain_specified=True, domain_initial_dot=False, path=s_cookie['path'], path_specified=True, secure=s_cookie['secure'], expires="2069592763", # s_cookie['expiry'] discard=False, comment=None, comment_url=None, rest=None, rfc2109=False)) def reqcj_cj(self, s): for s_cookie in s.cookies: self.cookiejar.set_cookie( Cookie( version=0, name=s_cookie.name, value=s_cookie.value, port='80', port_specified=False, domain=s_cookie.domain, domain_specified=True, domain_initial_dot=False, path="/", path_specified=True, secure=True, expires="2069592763", # s_cookie['expiry'] discard=False, comment=None, comment_url=None, rest=None, rfc2109=False))
class HttpScan(DummyScan): def __init__(self, args): super(HttpScan, self).__init__(args) self.session = requesocks.session() adapters.DEFAULT_RETRIES = self.args.max_retries self.tor = None if self.args.tor: self.out.log("Enabling TOR") self.tor = Torify() self.session.proxies = {'http': 'socks5://127.0.0.1:9050', 'https': 'socks5://127.0.0.1:9050'} if self.args.check_tor: # Check TOR self.out.log("Checking IP via TOR") rip, tip = self.tor.check_ip(verbose=True) if tip is None: self.out.log('TOR is not working properly!', logging.ERROR) exit(-1) if self.args.cookies is not None: if path.exists(self.args.cookies) and path.isfile(self.args.cookies): self.cookies = MozillaCookieJar(self.args.cookies) self.cookies.load() else: # self.out.log('Could not find cookie file: %s' % self.args.load_cookies, logging.ERROR) self.cookies = Cookies.from_request(self.args.cookies) else: self.cookies = None self.ua = UserAgent() if self.args.user_agent is None else self.args.user_agent def filter(self, response): if response is None: return False # Filter responses and save responses that are matching ignore, allow rules if (self.args.allow is None and self.args.ignore is None) or \ (self.args.allow is not None and response.status_code in self.args.allow) or \ (self.args.ignore is not None and response.status_code not in self.args.ignore): # TODO: add regex search return True return False def scan_url(self, url): # TODO: add options r = None ex = None try: r = self.session.get(url) except Exception as e: ex = e finally: self.cb_response(url, r, ex) return r, ex def scan_host(self, host, urls): res = [] for u in urls: url = get_full_url(host, u) r, ex = self.scan_url(url) self.out.logger.write_response(url, r, ex) if self.filter(r): self.out.write(url, r, ex) res.append((url, r, ex)) return res def cb_scan_done(self, future): pass def cb_response(self, url, reponse, exception): pass
class SimpleCrawler: USER_AGENT = 'SimpleCrawler/0.1' HEADERS = { 'User-Agent': USER_AGENT, 'Accept-Encoding': 'gzip', 'Connection': 'keep-alive' } CONTENT_TYPE_PAT = re.compile(r'([^\s;]+)(.*charset=([^\s;]+))?', re.I) def __init__(self, starturl, index_html='', maxlevel=1, cookie_file=None, acldb=None, urldb=None, default_charset=None, delay=0, timeout=300, debug=0): (proto, self.hostport, _x, _y, _z) = urlsplit(starturl) # assert proto == 'http' #Thread.__init__(self) self.debug = debug self.index_html = index_html if cookie_file: self.cookiejar = MozillaCookieJar(cookie_file) self.cookiejar.load() else: self.cookiejar = None self.robotstxt = RobotFileParser() self.robotstxt.set_url(urljoin(starturl, '/robots.txt')) # self.robotstxt.read() self.conn = None self.urldb = urldb self.acldb = acldb self.curlevel = 0 self.delay = delay self.timeout = timeout self.default_charset = default_charset if starturl.endswith('/'): starturl += self.index_html self.urls = [(starturl, maxlevel)] self.crawled = {} # 1:injected, 2:crawled return def accept_url(self, url): if url.endswith('/'): url += self.index_html if self.acldb and not self.acldb.allowed(url): return None return url def inject_url(self, url): if (not self.curlevel) or (not url) or (url in self.crawled): return False if not self.robotstxt.can_fetch(self.USER_AGENT, url): if self.debug: print >> stderr, 'DISALLOW: %r' % url return None if self.debug: print >> stderr, 'INJECT: %r' % url self.crawled[url] = 1 self.urls.append((url, self.curlevel - 1)) return True def get1(self, url, maxretry=5, maxredirect=5): if self.debug: print >> stderr, 'GET: %r' % url # loop for rtry in range(maxredirect): # forge urllib2.Request object. req = Request(url) # add cookie headers if necessary. if self.cookiejar: self.cookiejar.add_cookie_header(req) headers = req.unredirected_hdrs headers.update(self.HEADERS) else: headers = self.HEADERS # get response. for ctry in range(maxretry): try: if not self.conn: print >> stderr, 'Making connection: %r...' % ( self.hostport, ) self.conn = HTTPConnection(self.hostport) self.conn.request('GET', req.get_selector().replace(' ', ''), '', headers) # self.conn.sock.settimeout(self.timeout) resp = self.conn.getresponse() break except BadStatusLine, x: # connection closed unexpectedly print >> stderr, 'Connection closed unexpectedly.' # it restarts the connection... self.conn.close() self.conn = None except socket.error, x: # connection closed unexpectedly print >> stderr, 'Socket error:', x self.conn.close() self.conn = None else:
class Bilibili(): def __init__(self, appkey=APPKEY, appsecret=APPSECRET): self.appkey = appkey self.appsecret = appsecret self.is_login = False cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie' self.cj = MozillaCookieJar(cookie_path) if os.path.isfile(cookie_path): self.cj.load() if requests.utils.dict_from_cookiejar( self.cj).has_key('DedeUserID'): self.is_login = True self.mid = str( requests.utils.dict_from_cookiejar(self.cj)['DedeUserID']) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener) def get_captcha(self, path=None): if not requests.utils.dict_from_cookiejar(self.cj).has_key('sid'): utils.get_page_content( LOGIN_CAPTCHA_URL.format(random.random()), headers={'Referer': 'https://passport.bilibili.com/login'}) result = utils.get_page_content( LOGIN_CAPTCHA_URL.format(random.random()), headers={'Referer': 'https://passport.bilibili.com/login'}) if path == None: path = tempfile.gettempdir() + '/captcha.jpg' with open(path, 'wb') as f: f.write(result) return path def get_encryped_pwd(self, pwd): import rsa result = json.loads( utils.get_page_content( LOGIN_HASH_URL.format(random.random()), headers={'Referer': 'https://passport.bilibili.com/login'})) pwd = result['hash'] + pwd key = result['key'] pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key) pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key) pwd = base64.b64encode(pwd) pwd = urllib.quote(pwd) return pwd def api_sign(self, params): params['appkey'] = self.appkey data = "" keys = params.keys() keys.sort() for key in keys: if data != "": data += "&" value = params[key] if type(value) == int: value = str(value) data += key + "=" + str(urllib.quote(value)) if self.appsecret == None: return data m = hashlib.md5() m.update(data + self.appsecret) return data + '&sign=' + m.hexdigest() def get_category_from_web_page(self): category_dict = {'0': {'title': u'全部', 'url': HOME_URL, 'subs': []}} node = category_dict['0'] url = node['url'] result = BeautifulSoup(utils.get_page_content(url), "html.parser").findAll('li', {'class': 'm-i'}) for item in result: if len(item['class']) != 1: continue tid = item['data-tid'] title = item.em.contents[0] url = 'http:' + item.a['href'] category_dict[tid] = {'title': title, 'url': url, 'subs': []} node['subs'].append(tid) #Fix video and movie if '11' not in category_dict['0']['subs']: category_dict['0']['subs'].append('11') if '23' not in category_dict['0']['subs']: category_dict['0']['subs'].append('23') category_dict['11'] = { 'title': u'电视剧', 'url': 'http://bangumi.bilibili.com/tv/', 'subs': [] } category_dict['23'] = { 'title': u'电影', 'url': 'http://bangumi.bilibili.com/movie/', 'subs': [] } for sub in category_dict['0']['subs']: node = category_dict[sub] url = node['url'] result = BeautifulSoup(utils.get_page_content(url), "html.parser").select('ul.n_num li') for item in result[1:]: if not item.has_attr('tid'): continue if not hasattr(item, 'a'): continue if item.has_attr('class'): continue tid = item['tid'] title = item.a.contents[0] if item.a['href'][:2] == '//': url = 'http:' + item.a['href'] else: url = HOME_URL + item.a['href'] category_dict[tid] = {'title': title, 'url': url, 'subs': []} node['subs'].append(tid) return category_dict def get_category(self, tid='0'): items = [{ tid: { 'title': '全部', 'url': CATEGORY[tid]['url'], 'subs': [] } }] for sub in CATEGORY[tid]['subs']: items.append({sub: CATEGORY[sub]}) return items def get_category_name(self, tid): return CATEGORY[str(tid)]['title'] def get_order(self): return ORDER def get_category_list(self, tid=0, order='default', days=30, page=1, pagesize=10): params = { 'tid': tid, 'order': order, 'days': days, 'page': page, 'pagesize': pagesize } url = LIST_URL.format(self.api_sign(params)) result = json.loads(utils.get_page_content(url)) results = [] for i in range(pagesize): if result['list'].has_key(str(i)): results.append(result['list'][str(i)]) else: break return results, result['pages'] def get_my_info(self): if self.is_login == False: return [] result = json.loads(utils.get_page_content(MY_INFO_URL)) return result['data'] def get_bangumi_chase(self, page=1, pagesize=10): if self.is_login == False: return [] url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['result'], result['data']['pages'] def get_bangumi_detail(self, season_id): url = BANGUMI_SEASON_URL.format(season_id) result = utils.get_page_content(url) if result[0] != '{': start = result.find('(') + 1 end = result.find(');') result = result[start:end] result = json.loads(result) return result['result'] def get_history(self, page=1, pagesize=10): if self.is_login == False: return [] url = HISTORY_URL.format(page, pagesize) result = json.loads(utils.get_page_content(url)) if len(result['data']) >= int(pagesize): total_page = int(page) + 1 else: total_page = int(page) return result['data'], total_page def get_dynamic(self, page=1, pagesize=10): if self.is_login == False: return [] url = DYNAMIC_URL.format(pagesize, page) result = json.loads(utils.get_page_content(url)) total_page = int( (result['data']['page']['count'] + pagesize - 1) / pagesize) return result['data']['feeds'], total_page def get_attention(self, page=1, pagesize=10): if self.is_login == False: return [] url = ATTENTION_URL.format(self.mid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['list'], result['data']['pages'] def get_attention_video(self, mid, tid=0, page=1, pagesize=10): if self.is_login == False: return [] url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid) result = json.loads(utils.get_page_content(url)) return result['data'], result['data']['pages'] def get_attention_channel(self, mid): if self.is_login == False: return [] url = ATTENTION_CHANNEL_URL.format(mid) result = json.loads(utils.get_page_content(url)) return result['data']['list'] def get_attention_channel_list(self, mid, cid, page=1, pagesize=10): if self.is_login == False: return [] url = ATTENTION_CHANNEL_LIST_URL.format(mid, cid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['list'], result['data']['total'] def get_fav_box(self): if self.is_login == False: return [] url = FAV_BOX_URL.format(self.mid) result = json.loads(utils.get_page_content(url)) return result['data']['list'] def get_fav(self, fav_box, page=1, pagesize=10): if self.is_login == False: return [] url = FAV_URL.format(self.mid, page, pagesize, fav_box) result = json.loads(utils.get_page_content(url)) return result['data']['vlist'], result['data']['pages'] def login(self, userid, pwd, captcha): #utils.get_page_content('http://www.bilibili.com') if self.is_login == True: return True, '' pwd = self.get_encryped_pwd(pwd) data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format( captcha, userid, pwd) result = utils.get_page_content( LOGIN_URL, data, { 'Origin': 'https://passport.bilibili.com', 'Referer': 'https://passport.bilibili.com/login' }) if not requests.utils.dict_from_cookiejar( self.cj).has_key('DedeUserID'): return False, LOGIN_ERROR_MAP[json.loads(result)['code']] self.cj.save() self.is_login = True self.mid = str( requests.utils.dict_from_cookiejar(self.cj)['DedeUserID']) return True, '' def logout(self): self.cj.clear() self.cj.save() self.is_login = False def get_av_list_detail(self, aid, page=1, fav=0, pagesize=10): params = {'id': aid, 'page': page} if fav != 0: params['fav'] = fav url = VIEW_URL.format(self.api_sign(params)) result = json.loads(utils.get_page_content(url)) results = [result] if (int(page) < result['pages']) and (pagesize > 1): results += self.get_av_list_detail(aid, int(page) + 1, fav, pagesize=pagesize - 1)[0] return results, result['pages'] def get_av_list(self, aid): url = AV_URL.format(aid) result = json.loads(utils.get_page_content(url)) return result def get_video_urls(self, cid): m = hashlib.md5() m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER)) url = INTERFACE_URL.format(str(cid), m.hexdigest()) doc = minidom.parseString(utils.get_page_content(url)) urls = [ durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl') ] urls = [ url if not re.match(r'.*\.qqvideo\.tc\.qq\.com', url) else re.sub( r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', url) for url in urls ] return urls def add_history(self, aid, cid): url = ADD_HISTORY_URL.format(str(cid), str(aid)) utils.get_page_content(url)
class BasisRetr: """The main entry points, once a BasisRetr object has been created, are: 1) GetDayData()-- download metrics, activity, sleep data for a single day from the basis website and save it, 2) GetActivityCsvForMonth()-- download activity summaries for an entire month, and 3) GetSleepCsvForMonth()--download sleep summaries for an entire month.""" LOGIN_URL = 'https://app.mybasis.com/login' UID_URL = 'https://app.mybasis.com/api/v1/user/me.json' METRICS_URL = 'https://app.mybasis.com/api/v1/chart/{userid}.json?interval=60&units=s&start_date={date}&start_offset=0&end_offset=0&summary=true&bodystates=true&heartrate=true&steps=true&calories=true&gsr=true&skin_temp=true&air_temp=true' ACTIVITIES_URL ='https://app.mybasis.com/api/v2/users/me/days/{date}/activities?expand=activities&type=run,walk,bike,sleep' SLEEP_URL = 'https://app.mybasis.com/api/v2/users/me/days/{date}/activities?expand=activities&type=sleep' SLEEP_EVENTS_URL = 'https://app.mybasis.com/api/v2/users/me/days/{date}/activities?type=sleep&event.type=toss_and_turn&expand=activities.stages,activities.events' DATE_FORMAT = "%04d-%02d-%02d" # save-to filename. date is prefix, format is suffix MO_ACTIVITY_FNAME_TEMPLATE = "{yr:04d}-{mo:02d}_basis_activities_summary.csv" MO_SLEEP_FNAME_TEMPLATE = "{yr:04d}-{mo:02d}_basis_sleep_summary.csv" # day sleep and activity filenames (for month summaries) DAY_ACTIVITY_FNAME_TEMPLATE = "{yr:04d}-{mo:02d}-{dy:02d}_basis_activities.json" DAY_SLEEP_FNAME_TEMPLATE = "{yr:04d}-{mo:02d}-{dy:02d}_basis_sleep.json" DAY_JSON_FNAME_TEMPLATE = "{date}_basis_{typ}.json" METRICS_FNAME_TEMPLATE = "{date}_basis_metrics.{ext}" SLEEP_FNAME_TEMPLATE= "{date}_basis_sleep.{format}" def __init__(self, loadconfig = None): # create config info self.cfg = Config(cfg_items = CFG_ITEMS) if loadconfig: self.cfg.Load() else: # if config file doesn't exist, save the defaults loaded above self.cfg.Save() #saves # url opener for website retrieves opener = urllib2.build_opener() self.cj = MozillaCookieJar(self.cfg.cookie_filename)#BasisRetr.COOKIE_FILENAME) self.session_cookie = None if os.path.exists(self.cfg.cookie_filename):#BasisRetr.COOKIE_FILENAME): self.cj.load() self.CheckSessionCookie() # set session cookie if it exists and hasn't expired # need to use build_opener to submit cookies and post form data self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) def GetDayData(self, yr, mo, day, typ, save_csv, override_cache = False, act_metr= True): """Main entry method for getting a day's worth of data, formatting, then saving it. typ is the type of data: metrics, activities, or sleep. Data is always saved in json format, but if save_csv is True, save to csv as well as json. override_cache ignores any already downloaded json. act_metr, if True, saves sleep and activity state along with metrics.""" date = BasisRetr.DATE_FORMAT % (yr, mo, day) ydate = self.GetYesterdayDateAsString(yr, mo, day) self.Status("Checking Login") self.CheckLogin() # ensure we're logged in self.Status("getting {} for {}".format(typ,date)) # figure out which data to get data = None # filename cfname = "{date}_basis_{typ}.csv".format(date=date, typ=typ) # if needed, download json data from website and save to file if typ == 'metrics': mjdata = self.RetrieveJsonOrCached(date, 'metrics', override_cache) ### MOVE THIS ERROR CHECKING INTO THE ABOVE METHOD if type(mjdata) == str or mjdata == None: # simple error checking self.Status('OnGetDayData: Metrics json conversion failed.') print mjdata[:500] return # also load up actities if typ == 'activities' or act_metr: ajdata = self.RetrieveJsonOrCached(date, 'activities', override_cache) if type(ajdata) == str or ajdata == None: # simple error checking self.Status('OnGetDayData: Activities json conversion failed.') print ajdata[:500] return if typ == 'sleep' or act_metr: sjdata = self.RetrieveJsonOrCached(date, 'sleep', override_cache) if type(sjdata) == str or sjdata == None: # simple error checking self.Status('OnGetDayData: Sleep json conversion failed.') print sjdata[:500] return if act_metr: # add yesterday's sleep data sjdata2= self.RetrieveJsonOrCached(ydate, 'sleep') # Next, turn the list of python objects into a csv file. # If asked to (via act_metr), collect sleep and activity type, then add them to each timestamp. cdata = None if save_csv: if typ == 'activities' or act_metr: act_list = self.JsonActivitiesToList(ajdata) cdata = self.CreateCSVFromList(self.cfg.csv_activity_colnames, act_list) if typ == 'sleep' or act_metr: sleep_evts_list = self.JsonSleepEventsToList(sjdata) cdata = self.CreateCSVFromList(self.cfg.csv_sleep_evt_colnames, sleep_evts_list) if act_metr: # prepend yesterday's sleep events as they may start before midnight. sleep_evts_list[:0] = self.JsonSleepEventsToList(sjdata2) if typ == 'metrics': metrics_list = self.JsonMetricsToList(mjdata) if act_metr: # add activities to metrics self.AddActivityTypeToMetrics(metrics_list, act_list, sleep_evts_list) header = self.cfg.csv_metrics_colnames + self.cfg.csv_activity_type_colnames else: header = self.cfg.csv_metrics_colnames cdata = self.CreateCSVFromList(header, metrics_list) # If we were able to make a csv file, save it. if cdata: fpath = os.path.join(os.path.abspath(self.cfg.savedir), cfname) self.SaveData(cdata, fpath) self.Status("Saved "+typ+" csv file at "+fpath) def CheckLogin(self): # the test below gives HTTP Error 401: Unauthorized if don't get cookie each time # I wonder if cookielib::FileCookieJar might do the right thing # i.e., save all cookies, not just session cookie. if not self.cfg.userid or not self.cfg.session_token: self.Login() def Login(self, login = None, passwd = None): """Log in to basis website to get session (access) token via cookie. Don't need to pass in loginid and password if want to use stored info.""" if login: self.cfg.loginid = login if passwd: self.cfg.passwd = passwd form_data = {'next': 'https://app.mybasis.com', 'submit': 'Login', 'username': self.cfg.loginid, 'password': self.cfg.passwd} enc_form_data = urllib.urlencode(form_data) f = self.opener.open(BasisRetr.LOGIN_URL, enc_form_data) content = f.read() #$ do we need to close f? m = re.search('error_string\s*=\s*"(.+)"', content, re.MULTILINE) if m: raise Exception(m.group(1)) self.CheckSessionCookie() # make sure we got the access token if not self.cfg.session_token: self.Status("Didn't find an access token in:"+["({}={}), ".format(c.name.c.value) for c in self.cj]) else: self.Status("Logged in, Got Access Token = "+self.cfg.session_token) def CheckSessionCookie(self): for cookie in self.cj: if cookie.name == 'access_token': self.cfg.session_token = cookie.value def GetUserID(self): """Retrieve the long hex string that uniquely identifies a user from the Basis website.""" if not self.cfg.session_token: raise Exception('no token', 'no access token found-may be internet connectivity or bad login info.') self.opener.addheaders = [('X-Basis-Authorization', "OAuth "+self.cfg.session_token)] f = self.opener.open(BasisRetr.UID_URL) content = f.read() jresult = json.loads(content) self.cfg.userid= None if 'id' in jresult: self.cfg.userid = jresult['id'] def GetYesterdayDateAsString(self, yr, mo, day): """Need yesterday's date to get sleep events for a given calendar day. This is because sleep events, as downloaded from the Basis Website, start from the prior evening, when you actually went to sleep.""" tday, tmo, tyr = day-1, mo, yr if tday <1: # previous month tmo -= 1 if tmo < 1: # previous year tyr -= 1 tmo = 12 # once we adjusted the month, find the last day of that month tday = calendar.monthrange(tyr, tmo)[1] tdate = BasisRetr.DATE_FORMAT % (tyr, tmo, tday) return tdate def RetrieveMetricsJsonForDay(self, date): # Need userid in order to get metrics if not self.cfg.userid: self.Status("BasisRetr::GetMetrics: No userid available; getting from website.") self.GetUserID() self.Status("Retrieved userid from website.") # Form the URL url = BasisRetr.METRICS_URL.format(date=date,userid=self.cfg.userid) return self.GetJsonData(url) def RetrieveActivitiesJsonForDay(self, date): url = BasisRetr.ACTIVITIES_URL.format(date = date) return self.GetJsonData(url) def RetrieveSleepSummaryJsonForDay(self, date): url = BasisRetr.SLEEP_URL.format(date=date) return self.GetJsonData(url) def RetrieveSleepEventsJsonForDay(self,date): url = BasisRetr.SLEEP_EVENTS_URL.format(date=date) return self.GetJsonData(url) def GetJsonStorageDir(self): """Allow json storage dir to be absolute or relative (to csv dir) path.""" if os.path.isabs(self.cfg.jsondir): return self.cfg.jsondir else: return os.path.join(os.path.abspath(self.cfg.savedir), self.cfg.jsondir) def RetrieveJsonOrCached(self, date, typ, user_override_cache = None): """If json file exists in json dir, then just read that. Otherwise, download from basis website. If override_cache is set, always download from website.""" fname = BasisRetr.DAY_JSON_FNAME_TEMPLATE.format(date=date, typ=typ) fpath = os.path.join(self.GetJsonStorageDir(), fname) # don't use cache if the saved data is very recent-- what's saved may have been before the end of the day. if os.path.isfile(fpath): # these calculations are in seconds since epoch days_prev = 3600*24*self.cfg.nocache_days last_mod_time = os.path.getmtime(fpath) target_time = time.mktime(datetime.datetime.strptime(date, "%Y-%m-%d").timetuple()) force_override_cache = last_mod_time - target_time < days_prev # if file exists and we've said via UI, "don't override the cache", then read json from cache if os.path.isfile(fpath) and not user_override_cache and not force_override_cache: with open(fpath, "r") as f: data = f.read() jdata = json.loads(data) else: # retrieve data from website if typ == 'metrics': jdata = self.RetrieveMetricsJsonForDay(date) elif typ == 'activities': jdata = self.RetrieveActivitiesJsonForDay(date) elif typ == 'sleep': jdata = self.RetrieveSleepEventsJsonForDay(date) elif typ == 'sleep_summary': jdata = self.RetrieveSleepSummaryJsonForDay(date) #json_path = os.path.join(self.GetJsonStorageDir(), fname) # make sure directory exists if not os.path.isdir(self.GetJsonStorageDir()): os.makedirs(self.GetJsonStorageDir()) self.SaveData(json.dumps(jdata), fpath) return jdata def GetJsonData(self, url): if DEBUG: print url if True: try: f = self.opener.open(url) jresult= json.loads(f.read()) except urllib2.HTTPError as e: reason = BaseHTTPServer.BaseHTTPRequestHandler.responses[e.code] jresult = {'code': e.code, 'error':reason, 'url':url} # callback (if available) to UI manager to ensure it doesn't freeze if hasattr(self, 'FreezePrevention'): self.FreezePrevention() if 'code' in jresult and jresult['code'] == 401: # unauthorized. try logging in self.Status("Auth error, Logging in for new session token.") self.Login() try: # try again f = self.opener.open(url) jresult= json.loads(f.read()) except urllib2.HTTPError as e: reason = BaseHTTPServer.BaseHTTPRequestHandler.responses[e.code] jresult = {'code': e.code, 'error':reason, 'url':url} return jresult def SaveData(self, data, fpath): try: fh = file(os.path.abspath(fpath), "w") fh.write(data) except IOError, v: self.Status("problem saving file to:"+fpath+"\n--Error: "+`v`) try: # if problem is on open, then fh doesn't exist. fh.close() except: pass
class HttpScanner(object): def __init__(self, args): """ Initialise HTTP scanner :param args: :return: """ self.args = args self.output = HttpScannerOutput(args) self._init_scan_options() # Reading files self.output.write_log("Reading files and deduplicating.", logging.INFO) self.hosts = self._file_to_list(args.hosts) self.urls = self._file_to_list(args.urls) # self._calc_urls() out = 'Loaded %i hosts %i urls' % (self.hosts_count, self.urls_count) if self.args.ports is not None: out += ' %i ports' % len(self.args.ports) self.output.print_and_log(out) if self.args.ports is not None and not self.args.syn: new_hosts = [] for host in self.hosts: for port in self.args.ports: # print(host, port) new_hosts.append(helper.generate_url(host, port)) self.hosts = new_hosts # self._calc_urls() self.output.print_and_log('%i full urls to scan' % self.full_urls_count) # Queue and workers self.hosts_queue = JoinableQueue() self.workers = [] def _file_to_list(self, filename, dedup=True): """ Get list from file :param filename: file to read :return: list of lines """ if not path.exists(filename) or not path.isfile(filename): self.output.print_and_log('File %s not found!' % filename, logging.ERROR) exit(-1) # Preparing lines list lines = filter(lambda line: line is not None and len(line) > 0, open(filename).read().split('\n')) if len(lines) == 0: self.output.print_and_log('File %s is empty!' % filename, logging.ERROR) exit(-1) return helper.deduplicate(lines) if dedup else lines def _init_scan_options(self): # Session self.session = session() self.session.timeout = self.args.timeout self.session.verify = False # TODO: debug and check # self.session.mount("http://", HTTPAdapter(max_retries=self.args.max_retries)) # self.session.mount("https://", HTTPAdapter(max_retries=self.args.max_retries)) # http://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request # Max retries adapters.DEFAULT_RETRIES = self.args.max_retries # TOR if self.args.tor: self.output.write_log("TOR usage detected. Making some checks.") self.session.proxies = { 'http': 'socks5://127.0.0.1:9050', 'https': 'socks5://127.0.0.1:9050' } url = 'http://ifconfig.me/ip' real_ip, tor_ip = None, None # Ger real IP address try: real_ip = get(url).text.strip() except Exception as exception: self.output.print_and_log("Couldn't get real IP address. Check yout internet connection.", logging.ERROR) self.output.write_log(str(exception), logging.ERROR) exit(-1) # Get TOR IP address try: tor_ip = self.session.get(url).text.strip() except Exception as exception: self.output.print_and_log("TOR socks proxy doesn't seem to be working.", logging.ERROR) self.output.write_log(str(exception), logging.ERROR) exit(-1) # Show IP addresses self.output.print_and_log('Real IP: %s TOR IP: %s' % (real_ip, tor_ip)) if real_ip == tor_ip: self.output.print_and_log("TOR doesn't work! Stop to be secure.", logging.ERROR) exit(-1) # Proxy if self.args.proxy is not None: self.session.proxies = {"https": self.args.proxy, "http": self.args.proxy} # Auth if self.args.auth is not None: items = self.args.auth.split(':') self.session.auth = (items[0], items[1]) # Cookies self.cookies = {} if self.args.cookies is not None: self.cookies = Cookies.from_request(self.args.cookies) # Cookies from file if self.args.load_cookies is not None: if not path.exists(self.args.load_cookies) or not path.isfile(self.args.load_cookies): self.output.print_and_log('Could not find cookie file: %s' % self.args.load_cookies, logging.ERROR) exit(-1) self.cookies = MozillaCookieJar(self.args.load_cookies) self.cookies.load() self.session.cookies = self.cookies # User-Agent self.ua = UserAgent() if self.args.random_agent else None def worker(self, worker_id): self.output.write_log('Worker %i started.' % worker_id) while not self.hosts_queue.empty(): host = self.hosts_queue.get() try: self.scan_host(worker_id, host) finally: self.output.write_log('Worker %i finished.' % worker_id) self.hosts_queue.task_done() def _head_available(self, host): """ Determine if HEAD requests is allowed :param host: :return: """ # Trying to use OPTIONS request try: response = self.session.options(host, headers=self._fill_headers()) o = response.headers['allow'] if 'allow' in response.headers else None if o is not None and o.find('HEAD') != -1: return True except: # TODO: fix pass try: return False if self.session.head(host, headers=self._fill_headers()).status_code == 405 else True except: # TODO: fix return False def scan_host(self, worker_id, host): # check if resolvable ip = helper.url_to_ip(host) if ip is None: self.output.write_log('Could not resolve %s Skipping...' % host, logging.WARNING) self.output.urls_scanned += len(self.urls) return # Check for HEAD host_url = helper.host_to_url(host) head_available = False if self.args.head: head_available = self._head_available(host) if head_available: self.output.write_log('HEAD is supported for %s' % host) errors_count, urls_scanned = 0, 0 for url in self.urls: full_url = urljoin(host_url, url) r = self.scan_url(full_url, head_available) urls_scanned += 1 self.output.urls_scanned += 1 # Output r['worker'] = worker_id self.output.write(**r) if r['exception'] is not None: errors_count += 1 # Skip host on errors if self.args.skip is not None and errors_count == self.args.skip: self.output.write_log('Errors limit reached on %s Skipping other urls.' % host, logging.WARNING) self.output.urls_scanned += len(self.urls) - urls_scanned break # cookies bugfix? self.session.cookies.clear() def _fill_headers(self): # Fill UserAgent in headers headers = {} if self.args.user_agent is not None: headers['User-agent'] = self.args.user_agent elif self.args.random_agent: headers['User-agent'] = self.ua.random # Fill Referer in headers if self.args.referer is not None: headers['Referer'] = self.args.referer return headers def _parse_response(self, url, response, exception): res = {'url': url, 'response': response, 'exception': exception} if response is None or exception is not None: res.update({ 'status': -1, 'length': -1, }) return res try: length = int(response.headers['content-length']) if 'content-length' in response.headers else len( response.text) except Exception as exception: self.output.write_log( "Exception while getting content length for URL: %s Exception: %s" % (url, str(exception)), logging.ERROR) length = 0 res.update({ 'status': response.status_code, 'length': length, }) return res def scan_url(self, url, use_head=False): self.output.write_log('Scanning %s' % url, logging.DEBUG) # Query URL and handle exceptions response, exception = None, None method = 'HEAD' if use_head else 'GET' try: # TODO: add support for user:password in URL response = self.session.request(method, url, headers=self._fill_headers(), allow_redirects=self.args.allow_redirects) except ConnectionError as ex: self.output.write_log('Connection error while quering %s' % url, logging.ERROR) exception = ex except HTTPError as ex: self.output.write_log('HTTP error while quering %s' % url, logging.ERROR) exception = ex except Timeout as ex: self.output.write_log('Timeout while quering %s' % url, logging.ERROR) exception = ex except TooManyRedirects as ex: self.output.write_log('Too many redirects while quering %s' % url, logging.ERROR) exception = ex except Exception as ex: self.output.write_log('Unknown exception while quering %s' % url, logging.ERROR) exception = ex # print('cookies: %s' % self.cookies) print('session.cookies: %s' % self.session.cookies) # self.session.cookies = self.cookies return self._parse_response(url, response, exception) def signal_handler(self): """ Signal hdndler :return: """ # TODO: add saving status via pickle self.output.print_and_log('Signal caught. Stopping...', logging.WARNING) self.stop() exit(signal.SIGINT) def _calc_urls(self): # Calculations self.urls_count = len(self.urls) self.hosts_count = len(self.hosts) self.full_urls_count = len(self.urls) * len(self.hosts) self.output.args.urls_count = self.full_urls_count def start(self): """ Start mulithreaded scan :return: """ # Set signal handler gevent.signal(signal.SIGTERM, self.signal_handler) gevent.signal(signal.SIGINT, self.signal_handler) gevent.signal(signal.SIGQUIT, self.signal_handler) # ICMP scan if self.args.icmp: if geteuid() != 0: self.output.print_and_log('To use ICMP scan option you must run as root. Skipping ICMP scan', logging.WARNING) else: self.output.print_and_log('Starting ICMP scan.') self.hosts = helper.icmp_scan(self.hosts, self.args.timeout) self._calc_urls() self.output.print_and_log('After ICMP scan %i hosts %i urls loaded, %i urls to scan' % (self.hosts_count, self.urls_count, self.full_urls_count)) # SYN scan if self.args.syn: if self.args.tor or self.args.proxy is not None: self.output.print_and_log('SYN scan via tor or proxy is impossible!', logging.WARNING) self.output.print_and_log('Stopping to prevent deanonymization!', logging.WARNING) exit(-1) if geteuid() != 0: self.output.print_and_log('To use SYN scan option you must run as root. Skipping SYN scan', logging.WARNING) else: self.output.print_and_log('Starting SYN scan.') self.hosts = helper.syn_scan(self.hosts, self.args.ports, self.args.timeout) self._calc_urls() self.output.print_and_log('After SYN scan %i hosts %i urls loaded, %i urls to scan' % (self.hosts_count, self.urls_count, self.full_urls_count)) # Check threds count vs hosts count if self.args.threads > self.hosts_count: self.output.write_log('Too many threads! Fixing threads count to %i' % self.hosts_count, logging.WARNING) threads_count = self.hosts_count else: threads_count = self.args.threads # Output urls count self.output.args.urls_count = self.full_urls_count # Start workers self.workers = [spawn(self.worker, i) for i in range(threads_count)] # Fill and join queue [self.hosts_queue.put(host) for host in self.hosts] self.hosts_queue.join() def stop(self): """ Stop scan :return: """ # TODO: stop correctly gevent.killall(self.workers)
class HttpScanner(object): def __init__(self, args): """ Initialise HTTP scanner :param args: :return: """ self.args = args self.output = HttpScannerOutput(args) self._init_scan_options() # Reading files self.output.write_log("Reading files and deduplicating.", logging.INFO) self.hosts = self._file_to_list(args.hosts) self.urls = self._file_to_list(args.urls) # self._calc_urls() out = 'Loaded %i hosts %i urls' % (self.hosts_count, self.urls_count) if self.args.ports is not None: out += ' %i ports' % len(self.args.ports) self.output.print_and_log(out) if self.args.ports is not None and not self.args.syn: new_hosts = [] for host in self.hosts: for port in self.args.ports: # print(host, port) new_hosts.append(helper.generate_url(host, port)) self.hosts = new_hosts # self._calc_urls() self.output.print_and_log('%i full urls to scan' % self.full_urls_count) # Queue and workers self.hosts_queue = JoinableQueue() self.workers = [] def _file_to_list(self, filename, dedup=True): """ Get list from file :param filename: file to read :return: list of lines """ if not path.exists(filename) or not path.isfile(filename): self.output.print_and_log('File %s not found!' % filename, logging.ERROR) exit(-1) # Preparing lines list lines = filter(lambda line: line is not None and len(line) > 0, open(filename).read().split('\n')) if len(lines) == 0: self.output.print_and_log('File %s is empty!' % filename, logging.ERROR) exit(-1) return helper.deduplicate(lines) if dedup else lines def _init_scan_options(self): # Session self.session = session() self.session.timeout = self.args.timeout self.session.verify = False # TODO: debug and check # self.session.mount("http://", HTTPAdapter(max_retries=self.args.max_retries)) # self.session.mount("https://", HTTPAdapter(max_retries=self.args.max_retries)) # http://stackoverflow.com/questions/15431044/can-i-set-max-retries-for-requests-request # Max retries adapters.DEFAULT_RETRIES = self.args.max_retries # TOR if self.args.tor: self.output.write_log("TOR usage detected. Making some checks.") self.session.proxies = { 'http': 'socks5://127.0.0.1:9050', 'https': 'socks5://127.0.0.1:9050' } url = 'http://ifconfig.me/ip' real_ip, tor_ip = None, None # Ger real IP address try: real_ip = get(url).text.strip() except Exception as exception: self.output.print_and_log( "Couldn't get real IP address. Check yout internet connection.", logging.ERROR) self.output.write_log(str(exception), logging.ERROR) exit(-1) # Get TOR IP address try: tor_ip = self.session.get(url).text.strip() except Exception as exception: self.output.print_and_log( "TOR socks proxy doesn't seem to be working.", logging.ERROR) self.output.write_log(str(exception), logging.ERROR) exit(-1) # Show IP addresses self.output.print_and_log('Real IP: %s TOR IP: %s' % (real_ip, tor_ip)) if real_ip == tor_ip: self.output.print_and_log( "TOR doesn't work! Stop to be secure.", logging.ERROR) exit(-1) # Proxy if self.args.proxy is not None: self.session.proxies = { "https": self.args.proxy, "http": self.args.proxy } # Auth if self.args.auth is not None: items = self.args.auth.split(':') self.session.auth = (items[0], items[1]) # Cookies self.cookies = {} if self.args.cookies is not None: self.cookies = Cookies.from_request(self.args.cookies) # Cookies from file if self.args.load_cookies is not None: if not path.exists(self.args.load_cookies) or not path.isfile( self.args.load_cookies): self.output.print_and_log( 'Could not find cookie file: %s' % self.args.load_cookies, logging.ERROR) exit(-1) self.cookies = MozillaCookieJar(self.args.load_cookies) self.cookies.load() self.session.cookies = self.cookies # User-Agent self.ua = UserAgent() if self.args.random_agent else None def worker(self, worker_id): self.output.write_log('Worker %i started.' % worker_id) while not self.hosts_queue.empty(): host = self.hosts_queue.get() try: self.scan_host(worker_id, host) finally: self.output.write_log('Worker %i finished.' % worker_id) self.hosts_queue.task_done() def _head_available(self, host): """ Determine if HEAD requests is allowed :param host: :return: """ # Trying to use OPTIONS request try: response = self.session.options(host, headers=self._fill_headers()) o = response.headers[ 'allow'] if 'allow' in response.headers else None if o is not None and o.find('HEAD') != -1: return True except: # TODO: fix pass try: return False if self.session.head( host, headers=self._fill_headers()).status_code == 405 else True except: # TODO: fix return False def scan_host(self, worker_id, host): # check if resolvable ip = helper.url_to_ip(host) if ip is None: self.output.write_log('Could not resolve %s Skipping...' % host, logging.WARNING) self.output.urls_scanned += len(self.urls) return # Check for HEAD host_url = helper.host_to_url(host) head_available = False if self.args.head: head_available = self._head_available(host) if head_available: self.output.write_log('HEAD is supported for %s' % host) errors_count, urls_scanned = 0, 0 for url in self.urls: full_url = urljoin(host_url, url) r = self.scan_url(full_url, head_available) urls_scanned += 1 self.output.urls_scanned += 1 # Output r['worker'] = worker_id self.output.write(**r) if r['exception'] is not None: errors_count += 1 # Skip host on errors if self.args.skip is not None and errors_count == self.args.skip: self.output.write_log( 'Errors limit reached on %s Skipping other urls.' % host, logging.WARNING) self.output.urls_scanned += len(self.urls) - urls_scanned break # cookies bugfix? self.session.cookies.clear() def _fill_headers(self): # Fill UserAgent in headers headers = {} if self.args.user_agent is not None: headers['User-agent'] = self.args.user_agent elif self.args.random_agent: headers['User-agent'] = self.ua.random # Fill Referer in headers if self.args.referer is not None: headers['Referer'] = self.args.referer return headers def _parse_response(self, url, response, exception): res = {'url': url, 'response': response, 'exception': exception} if response is None or exception is not None: res.update({ 'status': -1, 'length': -1, }) return res try: length = int(response.headers['content-length'] ) if 'content-length' in response.headers else len( response.text) except Exception as exception: self.output.write_log( "Exception while getting content length for URL: %s Exception: %s" % (url, str(exception)), logging.ERROR) length = 0 res.update({ 'status': response.status_code, 'length': length, }) return res def scan_url(self, url, use_head=False): self.output.write_log('Scanning %s' % url, logging.DEBUG) # Query URL and handle exceptions response, exception = None, None method = 'HEAD' if use_head else 'GET' try: # TODO: add support for user:password in URL response = self.session.request( method, url, headers=self._fill_headers(), allow_redirects=self.args.allow_redirects) except ConnectionError as ex: self.output.write_log('Connection error while quering %s' % url, logging.ERROR) exception = ex except HTTPError as ex: self.output.write_log('HTTP error while quering %s' % url, logging.ERROR) exception = ex except Timeout as ex: self.output.write_log('Timeout while quering %s' % url, logging.ERROR) exception = ex except TooManyRedirects as ex: self.output.write_log('Too many redirects while quering %s' % url, logging.ERROR) exception = ex except Exception as ex: self.output.write_log('Unknown exception while quering %s' % url, logging.ERROR) exception = ex # print('cookies: %s' % self.cookies) print('session.cookies: %s' % self.session.cookies) # self.session.cookies = self.cookies return self._parse_response(url, response, exception) def signal_handler(self): """ Signal hdndler :return: """ # TODO: add saving status via pickle self.output.print_and_log('Signal caught. Stopping...', logging.WARNING) self.stop() exit(signal.SIGINT) def _calc_urls(self): # Calculations self.urls_count = len(self.urls) self.hosts_count = len(self.hosts) self.full_urls_count = len(self.urls) * len(self.hosts) self.output.args.urls_count = self.full_urls_count def start(self): """ Start mulithreaded scan :return: """ # Set signal handler gevent.signal(signal.SIGTERM, self.signal_handler) gevent.signal(signal.SIGINT, self.signal_handler) gevent.signal(signal.SIGQUIT, self.signal_handler) # ICMP scan if self.args.icmp: if geteuid() != 0: self.output.print_and_log( 'To use ICMP scan option you must run as root. Skipping ICMP scan', logging.WARNING) else: self.output.print_and_log('Starting ICMP scan.') self.hosts = helper.icmp_scan(self.hosts, self.args.timeout) self._calc_urls() self.output.print_and_log( 'After ICMP scan %i hosts %i urls loaded, %i urls to scan' % (self.hosts_count, self.urls_count, self.full_urls_count)) # SYN scan if self.args.syn: if self.args.tor or self.args.proxy is not None: self.output.print_and_log( 'SYN scan via tor or proxy is impossible!', logging.WARNING) self.output.print_and_log( 'Stopping to prevent deanonymization!', logging.WARNING) exit(-1) if geteuid() != 0: self.output.print_and_log( 'To use SYN scan option you must run as root. Skipping SYN scan', logging.WARNING) else: self.output.print_and_log('Starting SYN scan.') self.hosts = helper.syn_scan(self.hosts, self.args.ports, self.args.timeout) self._calc_urls() self.output.print_and_log( 'After SYN scan %i hosts %i urls loaded, %i urls to scan' % (self.hosts_count, self.urls_count, self.full_urls_count)) # Check threds count vs hosts count if self.args.threads > self.hosts_count: self.output.write_log( 'Too many threads! Fixing threads count to %i' % self.hosts_count, logging.WARNING) threads_count = self.hosts_count else: threads_count = self.args.threads # Output urls count self.output.args.urls_count = self.full_urls_count # Start workers self.workers = [spawn(self.worker, i) for i in range(threads_count)] # Fill and join queue [self.hosts_queue.put(host) for host in self.hosts] self.hosts_queue.join() def stop(self): """ Stop scan :return: """ # TODO: stop correctly gevent.killall(self.workers)
class SimpleCrawler: USER_AGENT = 'SimpleCrawler/0.1' HEADERS = { 'User-Agent': USER_AGENT, 'Accept-Encoding': 'gzip', 'Connection': 'keep-alive' } CONTENT_TYPE_PAT = re.compile(r'([^\s;]+)(.*charset=([^\s;]+))?', re.I) def __init__(self, starturl, index_html='', maxlevel=1, cookie_file=None, acldb=None, urldb=None, default_charset=None, delay=0, timeout=300, debug=0): (proto, self.hostport, _x, _y, _z) = urlsplit(starturl) assert proto == 'http' #Thread.__init__(self) self.debug = debug self.index_html = index_html if cookie_file: self.cookiejar = MozillaCookieJar(cookie_file) self.cookiejar.load() else: self.cookiejar = None self.robotstxt = RobotFileParser() self.robotstxt.set_url(urljoin(starturl, '/robots.txt')) self.robotstxt.read() self.conn = None self.urldb = urldb self.acldb = acldb self.curlevel = 0 self.delay = delay self.timeout = timeout self.default_charset = default_charset if starturl.endswith('/'): starturl += self.index_html self.urls = [(starturl, maxlevel)] self.crawled = {} # 1:injected, 2:crawled return def accept_url(self, url): if url.endswith('/'): url += self.index_html if self.acldb and not self.acldb.allowed(url): return None return url def inject_url(self, url): if (not self.curlevel) or (not url) or (url in self.crawled): return False if not self.robotstxt.can_fetch(self.USER_AGENT, url): if self.debug: print >>stderr, 'DISALLOW: %r' % url return None if self.debug: print >>stderr, 'INJECT: %r' % url self.crawled[url] = 1 self.urls.append((url, self.curlevel-1)) return True def get1(self, url, maxretry=3, maxredirect=3): if self.debug: print >>stderr, 'GET: %r' % url # loop for rtry in range(maxredirect): # forge urllib2.Request object. req = Request(url) # add cookie headers if necessary. if self.cookiejar: self.cookiejar.add_cookie_header(req) headers = req.unredirected_hdrs headers.update(self.HEADERS) else: headers = self.HEADERS # get response. for ctry in range(maxretry): try: if not self.conn: print >>stderr, 'Making connection: %r...' % (self.hostport,) self.conn = HTTPConnection(self.hostport) self.conn.request('GET', req.get_selector().replace(' ',''), '', headers) self.conn.sock.settimeout(self.timeout) resp = self.conn.getresponse() break except BadStatusLine, x: # connection closed unexpectedly print >>stderr, 'Connection closed unexpectedly.' # it restarts the connection... self.conn.close() self.conn = None except socket.error, x: # connection closed unexpectedly print >>stderr, 'Socket error:', x self.conn.close() self.conn = None else:
class querier(object): proxy = None def _get_page(self, pagerequest): """Return the data for a page on scholar.google.com""" # Note that we include a sleep to avoid overloading the scholar server time.sleep(max((5, random.uniform(0, 5)))) # resp = _SESSION.get(pagerequest, headers=_HEADERS, cookies=_COOKIES) req = Request(url=pagerequest, headers=_HEADERS) if self.proxy: r = self.proxy.get(pagerequest, options={}) while not r['body']: # Error on request through the proxy print("Error while communicating, trying again ...") r = self.proxy.get(pagerequest, options={}) return r['body'] hdl = urlopen(req) return hdl.read() # if html.status_code == 200: # return resp.text # else: # raise Exception('Error: {0} {1}'.format(html.status_code, html.reason)) def _get_soup(self, pagerequest): """Return the BeautifulSoup for a page on scholar.google.com""" html = self._get_page(pagerequest) return BeautifulSoup(html, 'html.parser') def _search_scholar_soup(self, soup): """Generator that returns Publication objects from the search page""" while True: for row in soup.find_all('div', 'gs_r'): # Small fix: leave out the first entry, which is the article being searched if row.find('div', class_='gs_ri') is None: continue yield Publication(row, self, 'scholar') if soup.find(class_='gs_ico gs_ico_nav_next'): url = soup.find(class_='gs_ico gs_ico_nav_next').parent['href'] soup = self._get_soup(_HOST + url) else: break def _search_citation_soup(self, soup): """Generator that returns Author objects from the author search page""" while True: for row in soup.find_all('div', 'gsc_1usr'): yield Author(row) next_button = soup.find( class_= 'gs_btnPR gs_in_ib gs_btn_half gs_btn_lsb gs_btn_srt gsc_pgn_pnx' ) if next_button and 'disabled' not in next_button.attrs: url = next_button['onclick'][17:-1] url = codecs.getdecoder("unicode_escape")(url)[0] soup = self._get_soup(_HOST + url) else: break def search_pubs_query(self, query, years=None): """Search by scholar query and return a generator of Publication objects""" url = _PUBSEARCH.format(requests.utils.quote(query.encode('utf8'))) if years is not None: if isinstance(years, list): url += '&as_ylo=' + str(min(years)) + '&as_yhi=' + str( max(years)) if isinstance(years, int): url += '&as_ylo=' + str(years) + '&as_yhi=' + str(years) soup = self._get_soup(_HOST + url) return self._search_scholar_soup(soup) def search_author(self, name): """Search by author name and return a generator of Author objects""" url = _AUTHSEARCH.format(requests.utils.quote(name)) soup = self._get_soup(_HOST + url) return self._search_citation_soup(soup) def search_keyword(self, keyword): """Search by keyword and return a generator of Author objects""" url = _KEYWORDSEARCH.format(requests.utils.quote(keyword)) soup = self._get_soup(_HOST + url) return self._search_citation_soup(soup) def search_pubs_custom_url(self, url): """Search by custom URL and return a generator of Publication objects URL should be of the form '/scholar?q=...'""" soup = self._get_soup(_HOST + url) return self._search_scholar_soup(soup) def search_author_custom_url(self, url): """Search by custom URL and return a generator of Publication objects URL should be of the form '/citation?q=...'""" soup = self._get_soup(_HOST + url) return self._search_citation_soup(soup) def save_cookies(self): """ This stores the latest cookies we're using to disk, for reuse in a later session. """ try: self.cjar.save(self.cFile, ignore_discard=True) return True except Exception as msg: return False def set_proxy(self, token): from proxycrawl import ProxyCrawlAPI self.proxy = ProxyCrawlAPI({'token': token}) def reset_proxy(self): self.proxy = None def __init__(self, cookie_file=''): self.cjar = MozillaCookieJar() self.cFile = cookie_file # If we have a cookie file, load it: if self.cFile and exists(self.cFile): try: self.cjar.load(self.cFile, ignore_discard=True) except Exception as msg: self.cjar = MozillaCookieJar() # Just to be safe self.opener = build_opener(HTTPCookieProcessor(self.cjar)) def __del__(self): self.save_cookies()
def get_url(url, config, additional_headers=None, additional_query_string=None, post_data=None, fail_silent=False, no_cache=False, return_json_errors=[], return_final_url=False, cookie_file=None): response_content = '' request_hash = sha512( (url + dumps(additional_headers) + dumps(additional_query_string) + dumps(post_data)).encode('utf-8')).hexdigest() final_url = url if xbmc_helper().get_bool_setting('debug_requests') is True: xbmc_helper().log_debug( 'get_url - url: {} headers {} query {} post {} no_cache {} silent {} request_hash {} return_json_errors {}, cookie_file', url, additional_headers, additional_query_string, post_data, no_cache, fail_silent, request_hash, return_json_errors, cookie_file) if no_cache is True: etags_data = None else: etags_data = get_etags_data(request_hash) try: headers = { 'Accept-Encoding': 'gzip, deflate', 'User-Agent': config['USER_AGENT'], 'Accept': '*/*', } if additional_headers is not None: headers.update(additional_headers) if config.get('http_headers', None) is not None: headers.update(config.get('http_headers', [])) if etags_data is not None: headers.update({'If-None-Match': etags_data['etag']}) if additional_query_string is not None: _url = compat._format('{}{}{}', url, '?' if url.find('?') == -1 else '&', urlencode(additional_query_string)) url = _url if isinstance(post_data, dict): post_data = urlencode(post_data) cookie_processor = None cookie_jar = None if cookie_file is not None: cookie_jar = MozillaCookieJar(cookie_file) try: cookie_jar.load() except LoadError: xbmc_helper().log_debug('Failed to load from cookiefile {} with error {} - new session?', cookie_file, LoadError.strerror) pass cookie_processor = HTTPCookieProcessor(cookie_jar) if xbmc_helper().get_bool_setting('use_https_proxy') is True and xbmc_helper().get_text_setting( 'https_proxy_host') != '' and xbmc_helper().get_int_setting('https_proxy_port') != 0: proxy_uri = compat._format('{}:{}', xbmc_helper().get_text_setting('https_proxy_host'), xbmc_helper().get_text_setting('https_proxy_port')) xbmc_helper().log_debug('Using proxy uri {}', proxy_uri) prxy_handler = ProxyHandler({ 'http': proxy_uri, 'https': proxy_uri, }) if cookie_processor is None: install_opener(build_opener(prxy_handler)) else: install_opener(build_opener(prxy_handler, cookie_processor)) elif cookie_processor is not None: install_opener(build_opener(cookie_processor)) if post_data is not None: request = Request(url, data=post_data.encode('utf-8'), headers=headers) else: request = Request(url, headers=headers) response = urlopen(request, timeout=40) if response.info().get('Content-Encoding') == 'gzip': response_content = compat._decode(GzipFile(fileobj=BytesIO(response.read())).read()) else: response_content = compat._decode(response.read()) if cookie_jar is not None: cookie_jar.save() final_url = response.geturl() _etag = response.info().get('etag', None) if no_cache is False and _etag is not None: set_etags_data(request_hash, _etag, response_content) except HTTPError as http_error: if http_error.code == 304 and etags_data.get('data', None) is not None: response_content = etags_data.get('data') else: try: if http_error.info().get('Content-Encoding') == 'gzip': error_body = compat._decode(GzipFile(fileobj=BytesIO(http_error.read())).read()) else: error_body = compat._decode(http_error.read()) xbmc_helper().log_debug('HTTP ERROR: {}', error_body) json_errors = loads(error_body) xbmc_helper().log_debug('JSON ERRORS: {}', json_errors) has_decoded_error = False if isinstance(json_errors, dict) and 'errors' not in json_errors.keys() and 'code' in json_errors.keys(): json_errors = {'errors': [json_errors]} elif isinstance(json_errors, list) and len(json_errors) == 1 and isinstance(json_errors[0], dict): json_errors = {'errors': json_errors} err_str = str(http_error.code) return_errors = [] if isinstance(json_errors, dict): for error in json_errors.get('errors', []): if 'msg' in error.keys(): err_str = compat._format('{}|{}', err_str, error.get('msg')) has_decoded_error = True if 'code' in error.keys() and error['code'] in return_json_errors: return_errors.append(error['code']) has_decoded_error = True xbmc_helper().log_debug('return_json_errors {}', return_errors) if len(return_errors) > 0: response_content = dumps({'json_errors': return_errors}) elif has_decoded_error is True: xbmc_helper().notification( 'Error', err_str, ) exit(0) except Exception: raise http_error except Exception as e: xbmc_helper().log_error('Failed to load url: {} headers {} post_data {} - Exception: {}', url, headers, post_data, e) if fail_silent is True: pass else: xbmc_helper().notification(compat._format(xbmc_helper().translation('ERROR'), 'URL Access'), compat._format(xbmc_helper().translation('MSG_NO_ACCESS_TO_URL'), str(url))) exit(0) if return_final_url: return final_url, response_content return response_content
class Session(requests.Session): """ Session for making API requests and interacting with the filesystem """ def __init__(self): super(Session, self).__init__() self.trust_env = False cookie_file = os.path.expanduser('~/.deis/cookies.txt') cookie_dir = os.path.dirname(cookie_file) self.cookies = MozillaCookieJar(cookie_file) # Create the $HOME/.deis dir if it doesn't exist if not os.path.isdir(cookie_dir): os.mkdir(cookie_dir, 0700) # Load existing cookies if the cookies.txt exists if os.path.isfile(cookie_file): self.cookies.load() self.cookies.clear_expired_cookies() def git_root(self): """ Return the absolute path from the git repository root If no git repository exists, raise an EnvironmentError """ try: git_root = subprocess.check_output( ['git', 'rev-parse', '--show-toplevel'], stderr=subprocess.PIPE).strip('\n') except subprocess.CalledProcessError: raise EnvironmentError('Current directory is not a git repository') return git_root def get_formation(self): """ Return the formation name for the current directory The formation is determined by parsing `git remote -v` output. If no formation is found, raise an EnvironmentError. """ git_root = self.git_root() # try to match a deis remote remotes = subprocess.check_output(['git', 'remote', '-v'], cwd=git_root) m = re.match(r'^deis\W+(?P<url>\S+)\W+\(', remotes, re.MULTILINE) if not m: raise EnvironmentError( 'Could not find deis remote in `git remote -v`') url = m.groupdict()['url'] m = re.match('\S+:(?P<formation>[a-z0-9-]+)(.git)?', url) if not m: raise EnvironmentError("Could not parse: {url}".format(**locals())) return m.groupdict()['formation'] formation = property(get_formation) def request(self, *args, **kwargs): """ Issue an HTTP request with proper cookie handling including `Django CSRF tokens <https://docs.djangoproject.com/en/dev/ref/contrib/csrf/>` """ for cookie in self.cookies: if cookie.name == 'csrftoken': if 'headers' in kwargs: kwargs['headers']['X-CSRFToken'] = cookie.value else: kwargs['headers'] = {'X-CSRFToken': cookie.value} break response = super(Session, self).request(*args, **kwargs) self.cookies.save() return response
class bulk_downloader: def __init__(self, id, username, password, table_name): # List of files to download if id[:3] == 'S1A': self.files = [ 'https://datapool.asf.alaska.edu/GRD_HD/SA/{}.zip'.format(id) ] elif id[:3] == 'S1B': self.files = [ 'https://datapool.asf.alaska.edu/GRD_HD/SB/{}.zip'.format(id) ] else: print('no identified sensor: {}'.format(id)) logging.error('sensor not identified: {}'.format(id)) return self.username = username self.password = password self.table_name = table_name self.save_to = os.getenv('IMAGES_PATH') # Local stash of cookies so we don't always have to ask self.cookie_jar_path = os.path.join( os.path.dirname(os.path.abspath('__file__')), '.bulk_download_cookiejar.txt') self.cookie_jar = None self.asf_urs4 = { 'url': 'https://urs.earthdata.nasa.gov/oauth/authorize', 'client': 'BO_n7nTIlMljdvU6kRRB3g', 'redir': 'https://vertex-retired.daac.asf.alaska.edu/services/urs4_token_request' \ } # Make sure we can write it our current directory if os.access(os.getcwd(), os.W_OK) is False: print( 'WARNING: Cannot write to current path! Check permissions for {0}' .format(os.getcwd())) exit(-1) # For SSL self.context = {} # Make sure cookie_jar is good to go! self.get_cookie() # summary self.total_bytes = 0 self.total_time = 0 self.cnt = 0 self.success = [] self.failed = [] self.skipped = [] # Get and validate a cookie def get_cookie(self): # remove the cookie_jar_path file if its older than a day date_created = cookie_creation_date() if date_created: dx = datetime.now() - date_created hour = dx.total_seconds() / (3600) if hour > 10: print('cookie greater than 10 hours so removing it') os.remove(self.cookie_jar_path) if os.path.isfile(self.cookie_jar_path): self.cookie_jar = MozillaCookieJar() self.cookie_jar.load(self.cookie_jar_path) # make sure cookie is still valid if self.check_cookie(): print(' > Re-using previous cookie jar.') return True else: print(' > Could not validate old cookie Jar') # We don't have a valid cookie, prompt user or creds print('No existing URS cookie found, creating one') print('(Credentials will not be stored, saved or logged anywhere)') # Keep trying 'till user gets the right U:P while self.check_cookie() is False: self.get_new_cookie() return True # Validate cookie before we begin def check_cookie(self): if self.cookie_jar is None: print(' > Cookiejar is bunk: {0}'.format(self.cookie_jar)) return False # File we know is valid, used to validate cookie file_check = 'https://urs.earthdata.nasa.gov/profile' # Apply custom Redirect Hanlder opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) install_opener(opener) # Attempt a HEAD request request = Request(file_check) request.get_method = lambda: 'HEAD' try: print(' > attempting to download {0}'.format(file_check)) response = urlopen(request, timeout=30) resp_code = response.getcode() # Make sure we're logged in if not self.check_cookie_is_logged_in(self.cookie_jar): return False # Save cookiejar self.cookie_jar.save(self.cookie_jar_path) except HTTPError: # If we ge this error, again, it likely means the user has not agreed to current EULA print('\nIMPORTANT: ') print( 'Your user appears to lack permissions to download data from the ASF Datapool.' ) print( '\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov' ) exit(-1) # This return codes indicate the USER has not been approved to download the data if resp_code in (300, 301, 302, 303): try: redir_url = response.info().getheader('Location') except AttributeError: redir_url = response.getheader('Location') # Funky Test env: if ('vertex-retired.daac.asf.alaska.edu' in redir_url and 'test' in self.asf_urs4['redir']): print("Cough, cough. It's dusty in this test env!") return True print('Redirect ({0}) occurred, invalid cookie value!'.format( resp_code)) return False # These are successes! if resp_code in (200, 307): return True return False def get_new_cookie(self): # Start by prompting user to input their credentials new_username = self.username new_password = self.password # Build URS4 Cookie request auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4['client'] + '&redirect_uri=' + \ self.asf_urs4['redir'] + '&response_type=code&state=' try: # python2 user_pass = base64.b64encode( bytes(new_username + ':' + new_password)) except TypeError: # python3 user_pass = base64.b64encode( bytes(new_username + ':' + new_password, 'utf-8')) user_pass = user_pass.decode('utf-8') # Authenticate against URS, grab all the cookies self.cookie_jar = MozillaCookieJar() opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) request = Request( auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)}) # Watch out cookie rejection! try: response = opener.open(request) except HTTPError as e: if e.code == 401: print( " > Username and Password combo was not successful. Please try again." ) return False else: # If an error happens here, the user most likely has not confirmed EULA. print( "\nIMPORTANT: There was an error obtaining a download cookie!" ) print( "Your user appears to lack permission to download data from the ASF Datapool." ) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) except URLError as e: print( "\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. " ) print("Try cookie generation later.") exit(-1) # Did we get a cookie? if self.check_cookie_is_logged_in(self.cookie_jar): # COOKIE SUCCESS! print('cookie saved') self.cookie_jar.save(self.cookie_jar_path) save_cookie_creation_date() return True # if we aren't successful generating the cookie, nothing will work. Stop here! print( "WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again." ) print("Response was {0}.".format(response.getcode())) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) # make sure we're logged into URS def check_cookie_is_logged_in(self, cj): for cookie in cj: if cookie.name == 'urs_user_already_logged': # Only get this cookie if we logged in successfully! return True return False # Download the file def download_file_with_cookiejar(self, url, file_count, total, recursion=False): # see if we've already download this file and if it is that it is the correct size download_file = os.path.basename(url).split('?')[0] if os.path.isfile(os.path.join(self.save_to, download_file)): try: request = Request(url) request.get_method = lambda: 'HEAD' response = urlopen(request, timeout=30) remote_size = self.get_total_size(response) # Check that we were able to derive a size. if remote_size: local_size = os.path.getsize( os.path.join(self.save_to, download_file)) if remote_size < (local_size + (local_size * .01)) and remote_size > ( local_size - (local_size * .01)): print( " > Download file {0} exists! \n > Skipping download of {1}. " .format(download_file, url)) return None, None # partial file size wasn't full file size, lets blow away the chunk and start again print( " > Found {0} but it wasn't fully downloaded. Removing file and downloading again." .format(download_file)) os.remove(os.path.join(self.save_to, download_file)) except ssl.CertificateError as e: print(" > ERROR: {0}".format(e)) print( " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag" ) return False, None except HTTPError as e: if e.code == 401: print( " > IMPORTANT: Your user may not have permission to download this type of data!" ) else: print(" > Unknown Error, Could not get file HEAD: {0}". format(e)) except URLError as e: print("URL Error (from HEAD): {0}, {1}".format(e.reason, url)) if "ssl.c" in "{0}".format(e.reason): print( "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error." ) return False, None # attempt https connection try: request = Request(url) response = urlopen(request, timeout=30) # Watch for redirect if response.geturl() != url: # See if we were redirect BACK to URS for re-auth. if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl( ): if recursion: print( " > Entering seemingly endless auth loop. Aborting. " ) return False, None # make this easier. If there is no app_type=401, add it new_auth_url = response.geturl() if "app_type" not in new_auth_url: new_auth_url += "&app_type=401" print( " > While attempting to download {0}....".format(url)) print(" > Need to obtain new cookie from {0}".format( new_auth_url)) old_cookies = [cookie.name for cookie in self.cookie_jar] opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) request = Request(new_auth_url) try: response = opener.open(request) for cookie in self.cookie_jar: if cookie.name not in old_cookies: print(" > Saved new cookie: {0}".format( cookie.name)) # A little hack to save session cookies if cookie.discard: cookie.expires = int( time.time()) + 60 * 60 * 24 * 30 print( " > Saving session Cookie that should have been discarded! " ) self.cookie_jar.save(self.cookie_jar_path, ignore_discard=True, ignore_expires=True) except HTTPError as e: print("HTTP Error: {0}, {1}".format(e.code, url)) return False, None # Okay, now we have more cookies! Lets try again, recursively! print(" > Attempting download again with new cookies!") return self.download_file_with_cookiejar(url, file_count, total, recursion=True) print( " > 'Temporary' Redirect download @ Remote archive:\n > {0}" .format(response.geturl())) # seems to be working print("({0}/{1}) Downloading {2}".format(file_count, total, url)) # Open our local file for writing and build status bar tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.') self.chunk_read(response, tf, report_hook=self.chunk_report) # Reset download status sys.stdout.write('\n') tempfile_name = tf.name tf.close() # handle errors except HTTPError as e: print("HTTP Error: {0}, {1}".format(e.code, url)) if e.code == 401: print( " > IMPORTANT: Your user does not have permission to download this type of data!" ) if e.code == 403: print(" > Got a 403 Error trying to download this file. ") print( " > You MAY need to log in this app and agree to a EULA. ") return False, None except URLError as e: print("URL Error (from GET): {0}, {1}, {2}".format( e, e.reason, url)) if "ssl.c" in "{0}".format(e.reason): print( "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error." ) return False, None except ssl.CertificateError as e: print(" > ERROR: {0}".format(e)) print( " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag" ) return False, None # Return the file size shutil.copy(tempfile_name, os.path.join(self.save_to, download_file)) os.remove(tempfile_name) file_size = self.get_total_size(response) actual_size = os.path.getsize(os.path.join(self.save_to, download_file)) if file_size is None: # We were unable to calculate file size. file_size = actual_size return actual_size, file_size # chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook def chunk_report(self, bytes_so_far, file_size): if file_size is not None: percent = float(bytes_so_far) / file_size percent = round(percent * 100, 2) sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" % (bytes_so_far, file_size, percent)) else: # We couldn't figure out the size. sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far)) # chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook def chunk_read(self, response, local_file, chunk_size=8192, report_hook=None): file_size = self.get_total_size(response) bytes_so_far = 0 while 1: try: chunk = response.read(chunk_size) except: sys.stdout.write("\n > There was an error reading data. \n") break try: local_file.write(chunk) except TypeError: local_file.write(chunk.decode(local_file.encoding)) bytes_so_far += len(chunk) if not chunk: break if report_hook: report_hook(bytes_so_far, file_size) return bytes_so_far def get_total_size(self, response): try: file_size = response.info().getheader('Content-Length').strip() except AttributeError: try: file_size = response.getheader('Content-Length').strip() except AttributeError: print("> Problem getting size") return None return int(file_size) # Download all the files in the list def download_files(self): for file_name in self.files: # make sure we haven't ctrl+c'd or some other abort trap if abort == True: raise SystemExit # download counter self.cnt += 1 # set a timer start = time.time() # run download size, total_size = self.download_file_with_cookiejar( file_name, self.cnt, len(self.files)) # calculte rate end = time.time() # stats: if size is None: self.skipped.append(file_name) # Check to see that the download didn't error and is the correct size elif size is not False and (total_size < (size + (size * .01)) and total_size > (size - (size * .01))): # Download was good! elapsed = end - start elapsed = 1.0 if elapsed < 1 else elapsed rate = (size / 1024**2) / elapsed print( "Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec" .format(size, elapsed, rate)) # add up metrics self.total_bytes += size self.total_time += elapsed self.success.append({'file': file_name, 'size': size}) else: print("There was a problem downloading {0}".format(file_name)) self.failed.append(file_name) def print_summary(self, rid): # Print summary: print("\n\nDownload Summary ") print( "--------------------------------------------------------------------------------" ) print(" Successes: {0} files, {1} bytes ".format( len(self.success), self.total_bytes)) for success_file in self.success: print(" - {0} {1:.2f}MB".format( success_file['file'], (success_file['size'] / 1024.0**2))) if len(self.failed) > 0: print(" Failures: {0} files".format(len(self.failed))) for failed_file in self.failed: print(" - {0}".format(failed_file)) if len(self.skipped) > 0: print(" Skipped: {0} files".format(len(self.skipped))) for skipped_file in self.skipped: print(" - {0}".format(skipped_file)) if len(self.success) > 0: print(" Average Rate: {0:.2f}MB/sec".format( (self.total_bytes / 1024.0**2) / self.total_time)) print( "--------------------------------------------------------------------------------\n\n" ) # since we are downloading one file at a time! if len(self.success) > 0: try: conn, cur = connect_to_db() cur.execute( "UPDATE {} SET downloaded=TRUE WHERE rid={}".format( self.table_name, rid)) conn.commit() close_connection(conn, cur) except Exception as e: print('error inserting into db because {}'.format(e)) logging.error(e) # ideally should not end up here but anyway if len(self.skipped) > 0: try: conn, cur = connect_to_db() cur.execute( "UPDATE {} SET downloaded=FALSE WHERE rid={}".format( self.table_name, rid)) conn.commit() close_connection(conn, cur) except Exception as e: print('error inserting into db because {}'.format(e)) logging.error(e)
def load_cookies_from_mozilla(self, filename): ns_cookiejar = MozillaCookieJar() ns_cookiejar.load(filename, ignore_discard=True, ignore_expires=True) return ns_cookiejar
class LSession(): def __init__(self,cookiefile = None, proxy = None, timeout = 10, retime = 30,sleept = 3): self.timeout=timeout self.retime=retime self.sleept=sleept #proxy '1.234.77.96:80' if cookiefile == None: self.cookiejar = CookieJar() else: self.cookiejar = MozillaCookieJar(filename=cookiefile) #self.cookiejar =cookielib.LWPCookieJar(filename=cookiefile) if not os.path.isfile(cookiefile): open(cookiefile, 'w').write(MozillaCookieJar.header) #open(cookiefile, 'w').write('#abc\n') pass self.cookiejar.load(filename=cookiefile,ignore_discard=True) #print "ck:",self.cookiejar self.cookie_processor = HTTPCookieProcessor(self.cookiejar) self.opener=build_opener(urllib2.HTTPRedirectHandler(),self.cookie_processor) if proxy : self.opener.add_handler(ProxyHandler({"http" : proxy})) #for posting a file try: import MultipartPostHandler #for posting a file,need installed self.opener.add_handler(MultipartPostHandler.MultipartPostHandler()) except NameError as e:print e self.response=None self.request=None self.header=[] def add_header(self,k,v) : self.header.append((k,v)) def build_request(self,url,params=None): self.request=Request(url,params) if not self.response is None:self.request.add_header('Referer',self.url()) #self.request.add_header('User-Agent', # 'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 \ # (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25') #NokiaE63/UC Browser7.9.0.102/28/355/UCWEB #self.request.add_header('User-Agent','NokiaE63/UC Browser7.9.0.102/28/355/UCWEB') self.request.add_header('User-Agent','Opera/9.80 (J2ME/MIDP; Opera Mini/1.0/886; U; en) Presto/2.4.15') while self.header : _k,_v = self.header.pop() self.request.add_header(_k,_v) #Mobile/7B405 #self.request.add_header('User-Agent','Mobile/7B405') return self.request def __del__(self) : self.save_cookie() def urlopen(self,req): retime=self.retime while retime > 0: try: return self.opener.open(req,timeout=self.timeout) except Exception as e: retime -= 1 traceback.print_exc(file=sys.stdout) print 'Wait and retry...%d'%(self.retime-retime) sleep(self.sleept) def savefile(self,filename,url): self.response=self.urlopen(self.build_request(url)) CHUNK = 50 * 1024 with open(filename, 'wb') as fp: while True: chunk = self.response.read(CHUNK) if not chunk: break fp.write(chunk) def post(self,url,post_data): self.response=self.urlopen(self.build_request(url,urlencode(post_data))) return self.response def post_raw(self,url,post_data): self.response=self.urlopen(self.build_request(url,post_data)) return self.response def post_file(self,url,params): self.response=self.urlopen(self.build_request(url, params)) return self.response def get(self,url): self.response=self.urlopen(self.build_request(url)) #import urllib #print urllib.urlopen('http://mrozekma.com/302test.php').geturl() # import requests # r=requests.get(url) # print r.content return self.response def text(self,dec='gbk',enc='utf') : return self.response.read().decode(dec).encode(enc) def url(self) : return self.response.url def logout(self) : self.cookiejar.clear() def Verify_proxy(self) : pass def show_cookie(self): #print self.cookiejar for i in self.cookiejar: print i def save_cookie(self): # if hasattr(self.cookiejar,'save'):#in case non cookiejar # self.cookiejar.save(ignore_discard=True, ignore_expires=False) try: self.cookiejar.save(ignore_discard=True, ignore_expires=False) except Exception as e: traceback.print_exc(file=sys.stdout)
def gen_login_cookie(): cookie = MozillaCookieJar() cookie.load('cookies.txt', ignore_discard=True, ignore_expires=True) return cookie
class bulk_downloader: def __init__(self): # List of files to download self.files = [ "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200626T041209_20200626T041234_033183_03D816_7063.zip", "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200626T041144_20200626T041209_033183_03D816_0D5E.zip", "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200614T041208_20200614T041233_033008_03D2C4_2DC4.zip", "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200614T041143_20200614T041208_033008_03D2C4_584D.zip", "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200602T041208_20200602T041233_032833_03CD92_6A43.zip", "https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20200602T041143_20200602T041208_032833_03CD92_5A25.zip" ] # Local stash of cookies so we don't always have to ask self.cookie_jar_path = os.path.join(os.path.expanduser('~'), ".bulk_download_cookiejar.txt") self.cookie_jar = None self.asf_urs4 = { 'url': 'https://urs.earthdata.nasa.gov/oauth/authorize', 'client': 'BO_n7nTIlMljdvU6kRRB3g', 'redir': 'https://auth.asf.alaska.edu/login' } # Make sure we can write it our current directory if os.access(os.getcwd(), os.W_OK) is False: print( "WARNING: Cannot write to current path! Check permissions for {0}" .format(os.getcwd())) exit(-1) # For SSL self.context = {} # Check if user handed in a Metalink or CSV: if len(sys.argv) > 0: download_files = [] input_files = [] for arg in sys.argv[1:]: if arg == '--insecure': try: ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE self.context['context'] = ctx except AttributeError: # Python 2.6 won't complain about SSL Validation pass elif arg.endswith('.metalink') or arg.endswith('.csv'): if os.path.isfile(arg): input_files.append(arg) if arg.endswith('.metalink'): new_files = self.process_metalink(arg) else: new_files = self.process_csv(arg) if new_files is not None: for file_url in (new_files): download_files.append(file_url) else: print( " > I cannot find the input file you specified: {0}" .format(arg)) else: print( " > Command line argument '{0}' makes no sense, ignoring." .format(arg)) if len(input_files) > 0: if len(download_files) > 0: print(" > Processing {0} downloads from {1} input files. ". format(len(download_files), len(input_files))) self.files = download_files else: print( " > I see you asked me to download files from {0} input files, but they had no downloads!" .format(len(input_files))) print(" > I'm super confused and exiting.") exit(-1) # Make sure cookie_jar is good to go! self.get_cookie() # summary self.total_bytes = 0 self.total_time = 0 self.cnt = 0 self.success = [] self.failed = [] self.skipped = [] # Get and validate a cookie def get_cookie(self): if os.path.isfile(self.cookie_jar_path): self.cookie_jar = MozillaCookieJar() self.cookie_jar.load(self.cookie_jar_path) # make sure cookie is still valid if self.check_cookie(): print(" > Re-using previous cookie jar.") return True else: print(" > Could not validate old cookie Jar") # We don't have a valid cookie, prompt user or creds print( "No existing URS cookie found, please enter Earthdata username & password:"******"(Credentials will not be stored, saved or logged anywhere)") # Keep trying 'till user gets the right U:P while self.check_cookie() is False: self.get_new_cookie() return True # Validate cookie before we begin def check_cookie(self): if self.cookie_jar is None: print(" > Cookiejar is bunk: {0}".format(self.cookie_jar)) return False # File we know is valid, used to validate cookie file_check = 'https://urs.earthdata.nasa.gov/profile' # Apply custom Redirect Hanlder opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) install_opener(opener) # Attempt a HEAD request request = Request(file_check) request.get_method = lambda: 'HEAD' try: print(" > attempting to download {0}".format(file_check)) response = urlopen(request, timeout=30) resp_code = response.getcode() # Make sure we're logged in if not self.check_cookie_is_logged_in(self.cookie_jar): return False # Save cookiejar self.cookie_jar.save(self.cookie_jar_path) except HTTPError: # If we ge this error, again, it likely means the user has not agreed to current EULA print("\nIMPORTANT: ") print( "Your user appears to lack permissions to download data from the ASF Datapool." ) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) # This return codes indicate the USER has not been approved to download the data if resp_code in (300, 301, 302, 303): try: redir_url = response.info().getheader('Location') except AttributeError: redir_url = response.getheader('Location') #Funky Test env: if ("vertex-retired.daac.asf.alaska.edu" in redir_url and "test" in self.asf_urs4['redir']): print("Cough, cough. It's dusty in this test env!") return True print("Redirect ({0}) occured, invalid cookie value!".format( resp_code)) return False # These are successes! if resp_code in (200, 307): return True return False def get_new_cookie(self): # Start by prompting user to input their credentials # Another Python2/3 workaround try: new_username = raw_input("Username: "******"Username: "******"Password (will not be displayed): ") # Build URS4 Cookie request auth_cookie_url = self.asf_urs4['url'] + '?client_id=' + self.asf_urs4[ 'client'] + '&redirect_uri=' + self.asf_urs4[ 'redir'] + '&response_type=code&state=' try: #python2 user_pass = base64.b64encode( bytes(new_username + ":" + new_password)) except TypeError: #python3 user_pass = base64.b64encode( bytes(new_username + ":" + new_password, "utf-8")) user_pass = user_pass.decode("utf-8") # Authenticate against URS, grab all the cookies self.cookie_jar = MozillaCookieJar() opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) request = Request( auth_cookie_url, headers={"Authorization": "Basic {0}".format(user_pass)}) # Watch out cookie rejection! try: response = opener.open(request) except HTTPError as e: if "WWW-Authenticate" in e.headers and "Please enter your Earthdata Login credentials" in e.headers[ "WWW-Authenticate"]: print( " > Username and Password combo was not successful. Please try again." ) return False else: # If an error happens here, the user most likely has not confirmed EULA. print( "\nIMPORTANT: There was an error obtaining a download cookie!" ) print( "Your user appears to lack permission to download data from the ASF Datapool." ) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) except URLError as e: print( "\nIMPORTANT: There was a problem communicating with URS, unable to obtain cookie. " ) print("Try cookie generation later.") exit(-1) # Did we get a cookie? if self.check_cookie_is_logged_in(self.cookie_jar): #COOKIE SUCCESS! self.cookie_jar.save(self.cookie_jar_path) return True # if we aren't successful generating the cookie, nothing will work. Stop here! print( "WARNING: Could not generate new cookie! Cannot proceed. Please try Username and Password again." ) print("Response was {0}.".format(response.getcode())) print( "\n\nNew users: you must first log into Vertex and accept the EULA. In addition, your Study Area must be set at Earthdata https://urs.earthdata.nasa.gov" ) exit(-1) # make sure we're logged into URS def check_cookie_is_logged_in(self, cj): for cookie in cj: if cookie.name == 'urs_user_already_logged': # Only get this cookie if we logged in successfully! return True return False # Download the file def download_file_with_cookiejar(self, url, file_count, total, recursion=False): # see if we've already download this file and if it is that it is the correct size download_file = os.path.basename(url).split('?')[0] if os.path.isfile(download_file): try: request = Request(url) request.get_method = lambda: 'HEAD' response = urlopen(request, timeout=30) remote_size = self.get_total_size(response) # Check that we were able to derive a size. if remote_size: local_size = os.path.getsize(download_file) if remote_size < (local_size + (local_size * .01)) and remote_size > ( local_size - (local_size * .01)): print( " > Download file {0} exists! \n > Skipping download of {1}. " .format(download_file, url)) return None, None #partial file size wasn't full file size, lets blow away the chunk and start again print( " > Found {0} but it wasn't fully downloaded. Removing file and downloading again." .format(download_file)) os.remove(download_file) except ssl.CertificateError as e: print(" > ERROR: {0}".format(e)) print( " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag" ) return False, None except HTTPError as e: if e.code == 401: print( " > IMPORTANT: Your user may not have permission to download this type of data!" ) else: print(" > Unknown Error, Could not get file HEAD: {0}". format(e)) except URLError as e: print("URL Error (from HEAD): {0}, {1}".format(e.reason, url)) if "ssl.c" in "{0}".format(e.reason): print( "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error." ) return False, None # attempt https connection try: request = Request(url) response = urlopen(request, timeout=30) # Watch for redirect if response.geturl() != url: # See if we were redirect BACK to URS for re-auth. if 'https://urs.earthdata.nasa.gov/oauth/authorize' in response.geturl( ): if recursion: print( " > Entering seemingly endless auth loop. Aborting. " ) return False, None # make this easier. If there is no app_type=401, add it new_auth_url = response.geturl() if "app_type" not in new_auth_url: new_auth_url += "&app_type=401" print( " > While attempting to download {0}....".format(url)) print(" > Need to obtain new cookie from {0}".format( new_auth_url)) old_cookies = [cookie.name for cookie in self.cookie_jar] opener = build_opener(HTTPCookieProcessor(self.cookie_jar), HTTPHandler(), HTTPSHandler(**self.context)) request = Request(new_auth_url) try: response = opener.open(request) for cookie in self.cookie_jar: if cookie.name not in old_cookies: print(" > Saved new cookie: {0}".format( cookie.name)) # A little hack to save session cookies if cookie.discard: cookie.expires = int( time.time()) + 60 * 60 * 24 * 30 print( " > Saving session Cookie that should have been discarded! " ) self.cookie_jar.save(self.cookie_jar_path, ignore_discard=True, ignore_expires=True) except HTTPError as e: print("HTTP Error: {0}, {1}".format(e.code, url)) return False, None # Okay, now we have more cookies! Lets try again, recursively! print(" > Attempting download again with new cookies!") return self.download_file_with_cookiejar(url, file_count, total, recursion=True) print( " > 'Temporary' Redirect download @ Remote archive:\n > {0}" .format(response.geturl())) # seems to be working print("({0}/{1}) Downloading {2}".format(file_count, total, url)) # Open our local file for writing and build status bar tf = tempfile.NamedTemporaryFile(mode='w+b', delete=False, dir='.') self.chunk_read(response, tf, report_hook=self.chunk_report) # Reset download status sys.stdout.write('\n') tempfile_name = tf.name tf.close() #handle errors except HTTPError as e: print("HTTP Error: {0}, {1}".format(e.code, url)) if e.code == 401: print( " > IMPORTANT: Your user does not have permission to download this type of data!" ) if e.code == 403: print(" > Got a 403 Error trying to download this file. ") print( " > You MAY need to log in this app and agree to a EULA. ") return False, None except URLError as e: print("URL Error (from GET): {0}, {1}, {2}".format( e, e.reason, url)) if "ssl.c" in "{0}".format(e.reason): print( "IMPORTANT: Remote location may not be accepting your SSL configuration. This is a terminal error." ) return False, None except socket.timeout as e: print(" > timeout requesting: {0}; {1}".format(url, e)) return False, None except ssl.CertificateError as e: print(" > ERROR: {0}".format(e)) print( " > Could not validate SSL Cert. You may be able to overcome this using the --insecure flag" ) return False, None # Return the file size shutil.copy(tempfile_name, download_file) os.remove(tempfile_name) file_size = self.get_total_size(response) actual_size = os.path.getsize(download_file) if file_size is None: # We were unable to calculate file size. file_size = actual_size return actual_size, file_size def get_redirect_url_from_error(self, error): find_redirect = re.compile(r"id=\"redir_link\"\s+href=\"(\S+)\"") print("error file was: {}".format(error)) redirect_url = find_redirect.search(error) if redirect_url: print("Found: {0}".format(redirect_url.group(0))) return (redirect_url.group(0)) return None # chunk_report taken from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook def chunk_report(self, bytes_so_far, file_size): if file_size is not None: percent = float(bytes_so_far) / file_size percent = round(percent * 100, 2) sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" % (bytes_so_far, file_size, percent)) else: # We couldn't figure out the size. sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far)) # chunk_read modified from http://stackoverflow.com/questions/2028517/python-urllib2-progress-hook def chunk_read(self, response, local_file, chunk_size=8192, report_hook=None): file_size = self.get_total_size(response) bytes_so_far = 0 while 1: try: chunk = response.read(chunk_size) except: sys.stdout.write("\n > There was an error reading data. \n") break try: local_file.write(chunk) except TypeError: local_file.write(chunk.decode(local_file.encoding)) bytes_so_far += len(chunk) if not chunk: break if report_hook: report_hook(bytes_so_far, file_size) return bytes_so_far def get_total_size(self, response): try: file_size = response.info().getheader('Content-Length').strip() except AttributeError: try: file_size = response.getheader('Content-Length').strip() except AttributeError: print("> Problem getting size") return None return int(file_size) # Get download urls from a metalink file def process_metalink(self, ml_file): print("Processing metalink file: {0}".format(ml_file)) with open(ml_file, 'r') as ml: xml = ml.read() # Hack to remove annoying namespace it = ET.iterparse(StringIO(xml)) for _, el in it: if '}' in el.tag: el.tag = el.tag.split('}', 1)[1] # strip all namespaces root = it.root dl_urls = [] ml_files = root.find('files') for dl in ml_files: dl_urls.append(dl.find('resources').find('url').text) if len(dl_urls) > 0: return dl_urls else: return None # Get download urls from a csv file def process_csv(self, csv_file): print("Processing csv file: {0}".format(csv_file)) dl_urls = [] with open(csv_file, 'r') as csvf: try: csvr = csv.DictReader(csvf) for row in csvr: dl_urls.append(row['URL']) except csv.Error as e: print( "WARNING: Could not parse file %s, line %d: %s. Skipping." % (csv_file, csvr.line_num, e)) return None except KeyError as e: print( "WARNING: Could not find URL column in file %s. Skipping." % (csv_file)) if len(dl_urls) > 0: return dl_urls else: return None # Download all the files in the list def download_files(self): for file_name in self.files: # make sure we haven't ctrl+c'd or some other abort trap if abort == True: raise SystemExit # download counter self.cnt += 1 # set a timer start = time.time() # run download size, total_size = self.download_file_with_cookiejar( file_name, self.cnt, len(self.files)) # calculte rate end = time.time() # stats: if size is None: self.skipped.append(file_name) # Check to see that the download didn't error and is the correct size elif size is not False and (total_size < (size + (size * .01)) and total_size > (size - (size * .01))): # Download was good! elapsed = end - start elapsed = 1.0 if elapsed < 1 else elapsed rate = (size / 1024**2) / elapsed print( "Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec" .format(size, elapsed, rate)) # add up metrics self.total_bytes += size self.total_time += elapsed self.success.append({'file': file_name, 'size': size}) else: print("There was a problem downloading {0}".format(file_name)) self.failed.append(file_name) def print_summary(self): # Print summary: print("\n\nDownload Summary ") print( "--------------------------------------------------------------------------------" ) print(" Successes: {0} files, {1} bytes ".format( len(self.success), self.total_bytes)) for success_file in self.success: print(" - {0} {1:.2f}MB".format( success_file['file'], (success_file['size'] / 1024.0**2))) if len(self.failed) > 0: print(" Failures: {0} files".format(len(self.failed))) for failed_file in self.failed: print(" - {0}".format(failed_file)) if len(self.skipped) > 0: print(" Skipped: {0} files".format(len(self.skipped))) for skipped_file in self.skipped: print(" - {0}".format(skipped_file)) if len(self.success) > 0: print(" Average Rate: {0:.2f}MB/sec".format( (self.total_bytes / 1024.0**2) / self.total_time)) print( "--------------------------------------------------------------------------------" )
class Json_RPC(object): def __init__(self): #self.cookie_jar=CookieJar() self.cookie_jar=MozillaCookieJar() self.opener=urllib2.build_opener( urllib2.HTTPCookieProcessor(self.cookie_jar), #urllib2.HTTPHandler(debuglevel=1), #urllib2.HTTPSHandler(debuglevel=1), ) def load_cookie(self,filename): ''' Load Cookie from file ''' self.cookie_jar.load(filename,ignore_discard=True) def save_cookie(self,filename): ''' Save Cookie to file ''' self.cookie_jar.save(filename,ignore_discard=True) def json_rpc(self,url,method="GET",**kwargs): ''' Performs a json rpc to url and return python-native result will extract dict or list from result Example: try{callback({'result':0,'data':[]});}catch(e){} will be transcode to {"result":0,"data":[]} See also: http_rpc ''' ret=self.http_rpc(url,method,**kwargs) ret=sub(r'try{(.*)}catch\(.*\){.*};?',r'\1',ret) ret=(search(r'{.+}',ret) or search(r'\[.+\]',ret)).group() #ret=sub(r"'",r'"',ret) ret=loads(ret) return ret def http_rpc(self,url,method="GET",**kwargs): ''' Perfoms a http rpc to url and return raw result url base url to rpc method 'GET' or 'POST' query query string passing by a dict data post data passing by a dict file post files passing by a list of 3-tuple: key, filename, data ( this indicates multipart/form-data ) ''' kwe=Entity(kwargs) if method not in ['GET','POST']: raise RPCError("Method not in GET or POST") if kwe.query: url+="?"+urlencode(kwe.query) if method=='GET': request=Request(url) elif kwe.file: content_type,data=multipart_encode(kwe.data,kwe.file) request=Request(url,data) request.add_header('Content-Type', content_type) elif kwe.data: data=urlencode(kwe.data) request=Request(url,data) else: raise RPCError("POST with no data") request.add_header('User-Agent', "Mozilla/5.0 (Ubuntu; X11; Linux x86_64; rv:8.0) Gecko/20100101 Firefox/8.0" ) request.add_header('Accept-Charset',"UTF-8") response=self.opener.open(request) ret=response.read() response.close() #print "\033[33m"+str(self.cookie_jar)+"\033[0m" # FIXME: An Ugly hack to Tencent server's charset indicator using BOM header if ret.startswith('\xef\xbb\xbf'): ret=ret[3:] return ret
c = {} for v in args: if type(v) == type({}): c.update(v) return c user_agent = {'User-agent': "Mozilla/5.0 (Windows NT 6.3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36" } cookie_file = "/dev/shm/urllib2_cookies.txt" policy = DefaultCookiePolicy() global _cookieJar _cookieJar = MozillaCookieJar(cookie_file, policy) if os.path.exists(cookie_file ): _cookieJar.load() DEBUG_LEVEL = 2 _http = urllib2.HTTPHandler() _http.set_http_debuglevel(DEBUG_LEVEL) _https = urllib2.HTTPSHandler() _https.set_http_debuglevel(DEBUG_LEVEL) _cookies = urllib2.HTTPCookieProcessor( _cookieJar ) urllib2.install_opener(urllib2.build_opener(_http, _https, _cookies)) ajax_header = {"X-Requested-With": "XMLHttpRequest" } json_header = {"Accept": "application/json, text/javascript, */*; q=0.01"}
class BasisRetr: """The main entry points, once a BasisRetr object has been created, are: 1) GetDayData()-- download metrics, activity, sleep data for a single day from the basis website and save it 2) GetActivityCsvForMonth()-- download activity summaries for an entire month 3) GetSleepCsvForMonth()--download sleep summaries for an entire month.""" LOGIN_URL = 'https://app.mybasis.com/login' METRICS_URL = 'https://app.mybasis.com/api/v1/metricsday/me?day={date}&padding=0&bodystates=true&heartrate=true&steps=true&calories=true&gsr=true&skin_temp=true&air_temp=true' ACTIVITIES_URL ='https://app.mybasis.com/api/v2/users/me/days/{date}/activities?expand=activities&type=run,walk,bike,sleep' SLEEP_URL = 'https://app.mybasis.com/api/v2/users/me/days/{date}/activities?expand=activities&type=sleep' SLEEP_EVENTS_URL = 'https://app.mybasis.com/api/v2/users/me/days/{date}/activities?type=sleep&event.type=toss_and_turn&expand=activities.stages,activities.events' def __init__(self, loadconfig = None): # create config info self.app_state = Config(defaults=DEFAULTS, fpath=STATE_FILEPATH) self.has_error = False if loadconfig: self.app_state.Load() else: # if config file doesn't exist, save the defaults loaded above self.app_state.Save() #saves self.CFG= Config2() err_text = self.CFG.Parse(CFG_FILEPATH) if err_text: print 'Config file read error: '+err_text # url opener for website retrieves self.cj = MozillaCookieJar(self.CFG.cookie_filename) self.session_cookie = None if os.path.exists(self.CFG.cookie_filename): self.cj.load() self.CheckSessionCookie() # set session cookie if it exists and hasn't expired # need to use build_opener to submit cookies and post form data self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) def GetDayData(self, yr, mo, day, typ, save_csv, override_cache = False, act_metr= True): """Main entry method for getting a day's worth of data, formatting, then saving it. typ is the type of data: metrics, activities, or sleep. Data is always saved in json format, but if save_csv is True, save to csv as well as json. override_cache ignores any already downloaded json. act_metr, if True, saves sleep and activity state along with metrics.""" date = self.YrMoDyToString(yr, mo, day) # Need yesterday's date to get sleep events for a given calendar day. This is because sleep events, as downloaded from the Basis Website, start from the prior evening, when you actually went to sleep. ydate = self.YrMoDyToString(*self.GetYesterday(yr, mo, day)) self.Status("Checking Login") if not self.CheckLogin(): # ensure we're logged in return False self.Status("getting {} for {}".format(typ,date)) # figure out which data to get data = None # automatically override cache if day is incomplete (doesn't have 24 hrs of data) if not self.DayMetricsJsonIsComplete(date): override_cache = True # if needed, download json data from website and save to file if typ == 'metrics': mjdata = self.RetrieveJsonOrCached(date, 'metrics', override_cache) if not mjdata: # there was an error return False ### MOVE THIS ERROR CHECKING INTO THE ABOVE METHOD if type(mjdata) == str or mjdata == None: # simple error checking self.Status('OnGetDayData: Metrics json conversion failed.') return False # also load up actities if typ == 'activities' or act_metr: ajdata = self.RetrieveJsonOrCached(date, 'activities', override_cache) if type(ajdata) == str or ajdata == None: # simple error checking self.Status('OnGetDayData: Activities json conversion failed.') return False if typ == 'sleep' or act_metr: sjdata = self.RetrieveJsonOrCached(date, 'sleep', override_cache) if type(sjdata) == str or sjdata == None: # simple error checking self.Status('OnGetDayData: Sleep json conversion failed.') return False if act_metr: # add yesterday's sleep data sjdata2= self.RetrieveJsonOrCached(ydate, 'sleep') # Next, turn the list of python objects into a csv file. # If asked to (via act_metr), collect sleep and activity type, then add them to each timestamp. cdata = None if save_csv: if typ == 'activities' or act_metr: act_list = self.JsonActivitiesToList(ajdata) cdata = self.CreateCSVFromList(self.CFG.csv.activity_colnames, act_list) if typ == 'sleep' or act_metr: sleep_evts_list = self.JsonSleepEventsToList(sjdata) cdata = self.CreateCSVFromList(self.CFG.csv.sleep_evt_colnames, sleep_evts_list) if act_metr: # prepend yesterday's sleep events as they may start before midnight. sleep_evts_list[:0] = self.JsonSleepEventsToList(sjdata2) if typ == 'metrics': if u'error' in mjdata: err = mjdata[u'error'] self.Status("HTTP response error ({}, # {}): {}".format(err[0],mjdata[u'code'], err[1])) return metrics_list = self.JsonMetricsToList(mjdata) if act_metr: # add activities to metrics self.AddActivityTypeToMetrics(metrics_list, act_list, sleep_evts_list) header = self.CFG.csv.metrics_colnames + self.CFG.csv.activity_type_colnames else: header = self.CFG.csv.metrics_colnames cdata = self.CreateCSVFromList(header, metrics_list) # If we were able to make a csv file, save it. if cdata: fpath = self.PathForFile(date, typ, 'csv') fname = os.path.split(fpath)[1] self.SaveData(cdata, fpath) self.Status("Saved "+typ+" csv file "+fname) return True # success def PathForFile(self, dt, typ, fmt): cfname = self.CFG.day_fname_template.format(date=dt, typ=typ, fmt=fmt) folder = self.app_state.savedir if fmt =='csv' else self.GetJsonStorageDir() fpath = os.path.join(os.path.abspath(folder), cfname) return fpath ## ## TODO: How deal with sync before you registered with Basis? ## def Sync(self, do_csv, override_cache, act_metr=True, callback = None): """Secondary entry point. Catch up to current day. Downloads any missing or incomplete days, going back self.app_state.sync days.""" # download what we have for today. It won't be complete, but you can at least get the data. today = datetime.date.today() yr, mo, dy = today.year, today.month, today.day file_count = 0 # tallly # of files actually changed if not self.CheckLogin(): # make sure we're logged in correctly before starting return for days in range(self.CFG.sync_days): # see if files already exists dt = self.YrMoDyToString(yr, mo, dy) self.Status('Sync: checking '+dt) fpath = self.PathForFile(dt, 'metrics', 'csv') # if file doesn't exist, then found = false, and/or break if not os.path.isfile(fpath) or not self.DayMetricsJsonIsComplete(dt): # download day. # if override_cache is True, then will always re-download all days. Don't let that happen. if not self.GetDayData(yr, mo, dy, 'metrics', do_csv, override_cache = False, act_metr = act_metr): return # quit if problem file_count += 1 if callable(callback): # callback (if available) to UI manager to prevent freeze callback(yr, mo, dy) # allow # loop change: yesterday. yr, mo, dy = self.GetYesterday(yr, mo, dy) # Done. Let user know. self.Status('Sync done; {} files updated'.format(file_count if file_count > 0 else 'no')) def CheckLogin(self): """Check to see if Login is needed; if so, then log in. """ elapsed_hr = (time.time() - self.app_state.login_timestamp)/3600 if self.CheckSessionCookie() and self.app_state.session_token and elapsed_hr < self.CFG.login_timeout_hrs: success = True else: try: self.Login() success = True except Exception, v: self.Status('Login difficulty: '+`v[0]`) success= False if success: self.app_state.login_timestamp = time.time() return success
class AOJClient(object): def __init__(self, cookie_file_path='aoj-cookie.txt'): self.cookie_file_path = cookie_file_path self.cookiejar = MozillaCookieJar() if os.path.isfile(cookie_file_path): self.cookiejar.load(cookie_file_path) self.opener = urllib2.build_opener( urllib2.HTTPRedirectHandler(), urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(self.cookiejar)) def get_csrf_token(self, url): request = urllib2.Request(url=url) response = self.opener.open(request) data = response.read() return REGEXP_CSRF.findall(data)[0] def refresh_session(self): print 'Not Logged In!' context = {'csrfmiddlewaretoken': self.get_csrf_token(LOGIN_URL), 'username': raw_input('Username: '******'password': getpass.getpass('Password: '******'accounts/login/', data=urllib.urlencode(context)) self.opener.open(request) self.cookiejar.save(self.cookie_file_path) def check_problem_exist(self, problem_name): try: request = urllib2.Request(url=PROB_PREFIX+'read/'+problem_name) response = self.opener.open(request) except urllib2.HTTPError as err: if err.code == 404: # Not Found raise AOJProblemNotExist else: raise def detect_language(self, source_file): if '.' in source_file: selected_language = source_file[source_file.rfind('.')+1:] else: selected_language = '' while selected_language not in LANGUAGES: selected_language = raw_input('Please select your langauge: (' + '/'.join(LANGUAGES) + ') ? ').strip().lower() return selected_language def submit(self, submission): self.check_problem_exist(submission.problem) context = {} context['language'] = self.detect_language(submission.source) context['csrfmiddlewaretoken'] = self.get_csrf_token(url=PROB_PREFIX+'submit/'+submission.problem) try: with open(submission.source) as f: context['source'] = f.read() except IOError: raise AOJFileNotExist() def try_submit(first=True): if not first: self.refresh_session() request = urllib2.Request(url=PROB_PREFIX+'submit/'+submission.problem, data=urllib.urlencode(context)) response = self.opener.open(request) if not response.geturl().lower().startswith(LOGIN_URL): print 'Submission Complete!' return try_submit(first=False) try_submit() def get_submission_list(self, problem_name): self.check_problem_exist(problem_name) request = urllib2.Request(url=SITE_PREFIX+'judge/submission/recent/?problem='+problem_name) response = self.opener.open(request) try: import lxml.html except ImportError: print 'lxml library is needed for parsing HTML' return html = lxml.html.fromstring(unicode(response.read().decode('utf8'))) context = {} fields = ('id', 'problem', 'user', 'language', 'length', 'state', 'stats', 'submitted_on') length = {'id': 9, 'problem': 15, 'user': 15, 'language': 5, 'length': 7, 'state': 15, 'stats': 7, 'submitted_on': 15} template = u'%(id)s %(problem)s %(user)s %(language)s %(length)s %(state)s %(stats)s %(submitted_on)s' def width(string): return sum(1+(unicodedata.east_asian_width(c) in 'WF') for c in string) for tr in html.cssselect('table.submission_list tr'): for field in fields: element = tr.find_class(field) if element: context[field] = unicode(element[0].text_content().strip()) else: context[field] = u'' context[field] = ' ' * (length[field] - width(context[field])) + context[field] print template % context